Your dedication to port this is impressive , I feel like I’m entering the matrix reading the code
Yeah tons of bit shifting and arrays lol. Also plenty of things happen in every line, put this (that jumps to a sub function) there and add this there etc.
Would you be able to optimize it by not looping through every line on every run? That is, only doing the line of the current pixel/texel (or maybe a sliding window)?
I was also looking for any loops that might be offload-able to a feedback+alpha-channel ticker but I’m not sure if that would work/help.
Need to rethink implementation, instead of filling arrays do what it does on a single pixel. Meaning: read the code well, understand what it does and do it again in a different way.
For the meantime a hack of that PAL shader in retroarch, to almost match my real Amiga on composite. Fixed for GLES too. That’s really close
#version 130
#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01
#pragma parameter FIR_INVGAIN "FIR Inv Gain" 1.0 0.0 3.0 0.01
#pragma parameter ihue "I Hue" 0.2 -1.0 1.0 0.01
#pragma parameter qhue "Q Hue" 0.1 -1.0 1.0 0.01
#pragma parameter sat "Saturation" 1.0 0.0 2.0 0.01
#pragma parameter crawl "Dot Crawl" 1.0 0.0 1.0 1.0
#pragma parameter blur "Blur Size" 0.75 0.0 2.0 0.01
#if defined(VERTEX)
#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying
#define COMPAT_ATTRIBUTE attribute
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;
vec4 _oPosition1;
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif
void main()
{
gl_Position = MVPMatrix * VertexCoord;
TEX0.xy = TexCoord.xy;
}
#elif defined(FRAGMENT)
#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;
// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;
uniform COMPAT_PRECISION float FIR_INVGAIN;
uniform COMPAT_PRECISION float ihue;
uniform COMPAT_PRECISION float qhue;
uniform COMPAT_PRECISION float sat;
uniform COMPAT_PRECISION float crawl;
uniform COMPAT_PRECISION float blur;
#else
#define FIR_GAIN 2.0
#define FIR_INVGAIN 1.0
#define ihue 0.0
#define qhue 0.0
#define sat 1.0
#define crawl 1.0
#define blur 1.0
#endif
#define PI 3.14159265358
#define FSC 21477272.73
#define FLINE 15625.0
#define VISIBLELINES 312.0
#define RGB_to_YIQ mat3( 0.299 , 0.595716 , 0.211456 , 0.587 , -0.274453 , -0.522591 , 0.114 , -0.321263 , 0.311135 )
#define YIQ_to_RGB mat3( 1.0 , 1.0 , 1.0 , 0.9563 , -0.2721 , -1.1070 , 0.6210 , -0.6474 , 1.7046 )
#define RGB_to_YUV mat3( 0.299 , -0.14713 , 0.615 , 0.587 , -0.28886 , -0.514991 , 0.114 , 0.436 , -0.10001 )
#define YUV_to_RGB mat3( 1.0 , 1.0 , 1.0 , 0.0 , -0.39465 , 2.03211 , 1.13983 , -0.58060 , 0.0 )
#define FIRTAPS 20
const float FIR[20] = float[20] (-0.008030271,0.003107906,0.016841352,0.032545161,0.049360136,
0.066256720,0.082120150,0.095848433,0.106453014,0.113151423,
0.115441842,0.113151423,0.106453014,0.095848433,0.082120150,
0.066256720,0.049360136,0.032545161,0.016841352,0.003107906);
//#define FIR_GAIN 2.0
//#define FIR_INVGAIN 1.02
float width_ratio;
float height_ratio;
float altv;
float invx;
vec2 dx;
#define time float(FrameCount)
#define fetch(offset, pos, invx) COMPAT_TEXTURE(Source, vec2(pos.xy + vec2(offset*invx ,0.0)))
float mod_luma(vec2 xy, float sinwt, float coswt) {
vec3 rgb = fetch(0.0, xy, invx).xyz*0.5;
rgb += fetch(0.0, xy+dx, invx).xyz*0.3;
rgb += fetch(0.0, xy-dx, invx).xyz*0.2;
vec3 yuv = RGB_to_YUV * rgb;
return clamp(yuv.r + yuv.g*sinwt + yuv.b*coswt, 0.0, 1.0);
}
vec2 modem_UV(vec2 pos, float ofs) {
float t = (pos.x + ofs*invx) * OutputSize.x*SourceSize.x/InputSize.x;
float wt = t * 2.0 * PI/2.0 ;
float phase = wt + altv;
float sinwt = sin(phase);
float coswt = cos(phase);
vec3 rgb = fetch(ofs, pos, invx).xyz*0.5;
rgb += fetch(0.0, pos+dx, invx).xyz*0.3;
rgb += fetch(0.0, pos-dx, invx).xyz*0.2;
vec3 yuv = RGB_to_YUV * rgb;
float signal = clamp(yuv.x + yuv.y*sinwt + yuv.z*coswt, 0.0, 1.0);
return vec2(signal * sinwt, signal * coswt);
}
mat3 mix_mat = mat3(
1.0, 0.0, 0.0,
ihue, sat, 0.0,
qhue, 0.0, sat
);
void main() {
vec2 cent = floor(vTexCoord*SourceSize.xy)+0.5;
vec2 near = cent*SourceSize.zw;
vec2 pos = vTexCoord;
pos.y = mix(vTexCoord.y, near.y,0.8);
dx = vec2(SourceSize.z*blur,0.0);
width_ratio = SourceSize.x / 341.0;
height_ratio = SourceSize.y / 625.0;
float crawler = crawl == 1.0? 2.0*mod(time,30.0): 0.0;
altv = pos.y*SourceSize.y*PI/2.0 + crawler;
invx = 0.05 / OutputSize.x; // equals 5 samples per Fsc period
// lowpass U/V at baseband
vec2 UV = vec2(0.0);
for (int i = 0; i < FIRTAPS; i++) {
vec2 uv = modem_UV(pos, 2.0*float(i) - float(FIRTAPS)); // floats for GLES, or else, bang!
UV += FIR_GAIN* uv * FIR[i];
}
float wt = pos.x*SourceSize.x*PI/2.0;
//float sinwt = sin(wt + altv+ 60.0*floor(mod(time,3.0)+1.0));
//float coswt = cos(wt + altv+ 60.0*floor(mod(time,3.0)+1.0));
float sinwt = sin(wt + altv);
float coswt = cos(wt + altv);
float luma = mod_luma(pos, sinwt, coswt) - FIR_INVGAIN*(UV.x*sinwt + UV.y*coswt);
vec3 yuv_result = vec3(luma, UV.x, UV.y);
yuv_result *= mix_mat;
FragColor = vec4(YUV_to_RGB * yuv_result, 1.0);
}
#endif
After some talk with him the 1st pass should look something like this on GLSL. I have to check if the numbers LFREQ etc are correct.
#version 110
#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01
#if defined(VERTEX)
#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying
#define COMPAT_ATTRIBUTE attribute
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;
vec4 _oPosition1;
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif
void main()
{
gl_Position = MVPMatrix * VertexCoord;
TEX0.xy = TexCoord.xy;
}
#elif defined(FRAGMENT)
#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;
// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;
#else
#define FIR_GAIN 2.0
#endif
#define PI 3.14159265358
#define CRT_CC_SAMPLES 4.0
#define CRT_CC_VPER 2.0
#define BLACK_LVL 7.0/140.0
#define BLACK_PT 0.0
#define Q_OFFSET 90.0
const float hue = 0.0; // 0-359
const float dot_crawl_offset = 0.0; // 0-5
const float LINE_FREQ = 14.31818; // full line
const float Y_FREQ = 2.2; // Luma (Y) 4.2 MHz
const float I_FREQ = 1.5; // Chroma (I) 1.5 MHz
const float Q_FREQ = 0.55; // Chroma (Q) 0.55 MHz
#define RGB_to_YIQ mat3( 0.299, 0.595716, 0.211456,0.587, -0.274453, -0.522591, 0.114, -0.321263, 0.311135 )
#define YIQ_to_RGB mat3( 1.0, 1.0, 1.0, 0.9563, -0.2721, -1.1070,0.6210, -0.6474, 1.7046)
float init_iir (float limit) {
float rate = 1000.0*limit/LINE_FREQ; // cycles / pixel rate
return 2.048 - exp(-(6.434*0.512 / rate)); // 2048 - expx(-6434 * 512/rate)
};
float iirf (float h, float s) {
float c = exp(s - h);
return c;
};
void main() {
vec3 rgb = COMPAT_TEXTURE(Source, vTexCoord).rgb;
vec3 yiq = rgb*RGB_to_YIQ;
float ire = BLACK_LVL+BLACK_PT;
float xo = 156.0;
float yo = 23.0;
// full output resolution x
vec2 pos = vTexCoord*OutputSize.xy*SourceSize.xy/InputSize.xy;
xo = xo - mod(xo, 4.0);
float xoff = mod((pos.x + xo), CRT_CC_SAMPLES);
float ph = mod((pos.y + yo + dot_crawl_offset), CRT_CC_VPER);
float phase = ph*xoff + hue;
float sn = sin(phase/2.0*PI/2.0);
float cs = cos(phase/2.0*PI/2.0);
// Initiate IIR values
float iirY = init_iir(Y_FREQ);
float iirI = init_iir(I_FREQ);
float iirQ = init_iir(Q_FREQ);
// IIR pass before sending as 1 signal
float fY = yiq.r*iirf(iirY, yiq.r);
float fI = yiq.g*iirf(iirI, yiq.g)*cs;
float fQ = yiq.b*iirf(iirQ, yiq.b)*sn;
ire += fY + fI + fQ;
ire = clamp(ire,0.0,1.1);
rgb = yiq*YIQ_to_RGB;
FragColor = vec4(vec3(ire), 1.0);
}
#endif
That looks a lot more manageable!
Yeah just need to understand what the code does and do it properly for GLSL. Now i am not in the mood to jump in to the second pass rabbit hole maybe after some days lol.
All numbers revised to correct values
#version 110
#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01
#if defined(VERTEX)
#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying
#define COMPAT_ATTRIBUTE attribute
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;
vec4 _oPosition1;
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif
void main()
{
gl_Position = MVPMatrix * VertexCoord;
TEX0.xy = TexCoord.xy;
}
#elif defined(FRAGMENT)
#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;
// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;
#else
#define FIR_GAIN 2.0
#endif
#define PI 3.14159265358
#define CRT_CC_SAMPLES 4.0
#define CRT_CC_VPER 2.0
#define BLACK_LVL 7.0/140.0
#define BLACK_PT 0.0
#define Q_OFFSET 90.0
const float hue = 0.0; // 0-359
const float dot_crawl_offset = 0.0; // 0-5
const float LINE_FREQ = 14.31818; // full line
const float Y_FREQ = 4.2; // Luma (Y) 4.2 MHz
const float I_FREQ = 1.5; // Chroma (I) 1.5 MHz
const float Q_FREQ = 0.55; // Chroma (Q) 0.55 MHz
#define RGB_to_YIQ mat3( 0.299, 0.595716, 0.211456,0.587, -0.274453, -0.522591, 0.114, -0.321263, 0.311135 )
#define YIQ_to_RGB mat3( 1.0, 1.0, 1.0, 0.9563, -0.2721, -1.1070,0.6210, -0.6474, 1.7046)
float init_iir (float limit) {
float rate = limit/LINE_FREQ; // cycles / pixel rate
return 1.0 - exp(-PI / rate); // 2048 - expx(-6434 * 512/rate)
};
float iirf (float h, float s) {
float c = exp(s - h);
return c;
};
void main() {
vec3 rgb = COMPAT_TEXTURE(Source, vTexCoord).rgb;
vec3 yiq = rgb*RGB_to_YIQ;
float ire = BLACK_LVL+BLACK_PT;
float xo = 156.0;
float yo = 23.0;
// o
vec2 pos = vTexCoord*OutputSize.xy*SourceSize.xy/InputSize.xy;
xo = xo - mod(xo, 4.0);
float xoff = mod((pos.x + xo), CRT_CC_SAMPLES);
float ph = mod((pos.y + yo + dot_crawl_offset), CRT_CC_VPER);
float phase = ph*xoff + hue;
float sn = sin(phase/2.0*PI/2.0);
float cs = cos(phase/2.0*PI/2.0);
// Initiate IIR values
float iirY = init_iir(Y_FREQ);
float iirI = init_iir(I_FREQ);
float iirQ = init_iir(Q_FREQ);
// IIR pass before sending as 1 signal
float fY = yiq.r*iirf(iirY, yiq.r);
float fI = yiq.g*iirf(iirI, yiq.g)*cs;
float fQ = yiq.b*iirf(iirQ, yiq.b)*sn;
ire += fY + fI + fQ;
ire = clamp(ire,0.0,1.0);
FragColor = vec4(vec3(ire), 1.0);
}
#endif
Ok so added 2 GLSL shaders, in “pal” folder named A520 mimic my Amiga on Composite modulator, this is almost 1:1 replication (!). Did so much study on the matter i can write an ntsc on my cellphone now lol
and a replacement for “ntsc-simple”
@Cyber since you like blargg’s ntsc, check the new “ntsc-simple-hd” i uploaded to slang/glsl. Tell me what you think It’s a bit sharper since it uses 20 passes “blurring” left and right and blargg uses 33.
There is a kernel array of size 66 in blargg, the 2nd part is filled with luma filter values related to “resolution” and “sharpness” values
#define bleed 0.2
#define LUMA_CUTOFF 0.2
// generate luma (y) filter using sinc kernel
// sinc with rolloff (dsf)
float rolloff = 1.0 + sharpness * 0.032;
float maxh = 32.0;
float pow_a_n = pow( rolloff, maxh );
float sum = 0.0;
int i;
// quadratic mapping to reduce negative (blurring) range
float to_angle = resolution + 1.0;
to_angle = PI / maxh * LUMA_CUTOFF * (to_angle * to_angle + 1.0);
kernels [49] = maxh; // default center value
for ( i = 0; i < 33; i++ )
{
int x = i - 16;
float angle = x * to_angle;
// instability occurs at center point with rolloff very close to 1.0
if ( x || pow_a_n > 1.056 || pow_a_n < 0.981 )
{
float rolloff_cos_a = rolloff * cos( angle );
float num = 1.0 - rolloff_cos_a - pow_a_n * cos( maxh * angle ) +
pow_a_n * rolloff * cos( (maxh - 1.0) * angle );
float den = 1.0 - rolloff_cos_a - rolloff_cos_a + rolloff * rolloff;
float dsf = num / den;
kernels [33 + i] = dsf - 0.5;
}
}
// apply blackman window and find sum
for ( i = 0; i < 33; i++ )
{
float x = PI * 2 / 32 * i;
float blackman = 0.42 - 0.5*cos( x ) + 0.08*cos( x * 2 );
sum += kernels [33 + i] * blackman;
}
And the 1st part, the first 33 numbers, is filled with chroma values related to “bleed”
// generate chroma (iq) filter using gaussian kernel
float cutoff_factor = -0.03125;
if ( bleed < 0.0 )
{
// keep extreme value accessible only near upper end of scale (1.0)
bleed *= bleed;
bleed *= bleed;
bleed *= bleed;
bleed *= -30.0 / 0.65;
}
bleed = 0.35 * cutoff_factor * bleed;
for ( i = -16; i <= 16; i++ ){
kernels [16 + i] = exp( float(i) * float(i) * bleed );
}
// normalize even and odd phases separately
for ( i = 0; i < 2; i++ )
{
sum = 0.0;
int x;
for ( x = i; x < 33; x += 2 )
sum += kernels [x];
sum = 1.0 / sum;
for ( x = i; x < 33; x += 2 )
{
kernels[x] *= sum;
}
}
These arrays could be pre-calculated with given bleed, resolution values in steps, let’s say -1.0, 0.5, 0.0, 0.5, 1.0 so you don’t need to calculate all this huge loop in real time and drag performance to bottom.
In the end, luma and chroma samples (the 33-1=32 passes) are filtered multiplied by these arrays. If you look carefully luma is half blurred than chroma (edit: actually 4 times since it runs only 0… to 16 and chroma runs -32… to 32… So it’s a 4:1)
int x = i - 16;
// later on
if ( x ...... // meaning if x >0 so half passes of 32
Thanks for asking, it would be an honour as soon as I get my latest presets pack out the door. I had to overhaul at least 3 times already since the initial release. First one was to add Grade (the old one), second was to tweak filters mainly. The third one was to calibrate using 240p Test Suite, then retweak settings and filters. While I was working on that a couple users complained that they the preset weren’t loading. It was because they don’t have the old Grade because the Online Updater doesn’t pull it anymore and and in addition to that the new Grade is in a different Folder. So I had to overhaul again just to do over all the colour and brightness settings to be able to use this new Grade and I was actually quite satisfied after maybe the third overhaul.
This one’s looking good though.
So in other words as soon as I get a chance. Lol
It’s cool that you’ve gotten your hands dirty in the Blargg code and can understand and visualize what’s been going on under the hood for all these years.
As we can see, it’s highly optimized.
One thing I would like to get some more information on would be the exact ranges and steps of all of the settings.
I know for a fact that most of the settings can be incremented in steps of ±0.1.
Most seem to have a range of between -1 to +1
I think I found one where the max was 0.5 though (gamma) can’t say for sure. All I use are my eyes and trial and error.
One interesting setting is the colour bleed function. Each 0.1 step seems to only increase the bleed marginally. So you can almost go overboard without going overboard.
With many of the other settings a ±0.1 adjustment is quite noticeable.
One thing I don’t get and I wish could be resolved or worked around is why the filter causes the Aspect Ratio to change slightly.
“Lab” here
Direct comparison and 1:1 Trinitron PAL colors, Amiga at least, preset
shaders = "6"
feedback_pass = "0"
shader0 = "../misc/shaders/simple_color_controls.glsl"
filter_linear0 = "false"
shader1 = "../misc/shaders/chromaticity.glsl"
filter_linear1 = "false"
shader2 = "../crt/shaders/crt-consumer/linearize.glsl"
filter_linear2 = "false"
shader3 = "../crt/shaders/crt-consumer/glow_x.glsl"
filter_linear3 = "false"
shader4 = "../crt/shaders/crt-consumer/glow_y.glsl"
filter_linear4 = "false"
shader5 = "../crt/shaders/crt-geom.glsl"
filter_linear5 = "false"
TEMP = "5423.000000"
BLACK = "-0.020000"
gamma_in = "1.000000"
gamma_out_red = "1.000000"
gamma_out_green = "1.000000"
gamma_out_blue = "1.000000"
R = "2.000000"
COLOR_MODE = "-1.000000"
Dx = "-1.000000"
CRTgamma = "1.000000"
SATURATION = "1.200000"
DOTMASK = "0.5"
Default RGB colors
SNES Flashback… yeah that looks like Trinitron
Another day, another project, today’s was “Make crt-geom run on an old 2016 cellphone around 120 gflops gpu” so here it is…
Copy and save as ‘crt-geom-mini.glsl’ drop to shaders_glsl/crt/shaders
#version 110
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or (at your option)
// any later version.
#pragma parameter CURV "CRT-Geom Curvature" 1.0 0.0 1.0 1.0
#pragma parameter SCAN "CRT-Geom Scanline Weight" 0.3 0.2 0.6 0.05
#pragma parameter MASK "CRT-Geom Dotmask Strength" 0.25 0.0 1.0 0.05
#pragma parameter LUM "CRT-Geom Luminance" 0.05 0.0 0.5 0.01
#pragma parameter INTERL "CRT-Geom Interlacing Simulation" 1.0 0.0 1.0 1.0
#pragma parameter SAT "CRT-Geom Saturation" 1.1 0.0 2.0 0.01
#define PI 3.1415926535897932384626433
#if defined(VERTEX)
#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying
#define COMPAT_ATTRIBUTE attribute
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;
COMPAT_VARYING vec2 scale;
COMPAT_VARYING vec2 warpp;
COMPAT_VARYING vec2 warppm;
COMPAT_VARYING vec2 warp;
COMPAT_VARYING float fragpos;
COMPAT_VARYING float omega;
vec4 _oPosition1;
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float SIZE;
#else
#define SIZE 1.0
#endif
void main()
{
gl_Position = MVPMatrix * VertexCoord;
TEX0.xy = TexCoord.xy;
scale = SourceSize.xy/InputSize.xy;
fragpos = TEX0.x*OutputSize.x*scale.x*PI;
warpp = TEX0.xy*scale;
warp = warpp*2.0-1.0;
}
#elif defined(FRAGMENT)
#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;
COMPAT_VARYING vec2 scale;
COMPAT_VARYING float fragpos;
COMPAT_VARYING vec2 warpp;
COMPAT_VARYING vec2 warppm;
COMPAT_VARYING vec2 warp;
COMPAT_VARYING float omega;
// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float SCAN;
uniform COMPAT_PRECISION float MASK;
uniform COMPAT_PRECISION float CURV;
uniform COMPAT_PRECISION float LUM;
uniform COMPAT_PRECISION float SAT;
uniform COMPAT_PRECISION float INTERL;
#else
#define SCAN 0.3
#define MASK 0.6
#define CURV 1.0
#define LUM 0.0
#define SAT 1.0
#define INTERL 1.0
#endif
float scan(float pos, vec3 color)
{
float wid = SCAN + 0.1 * dot(color, vec3(0.333))*0.8;
float weight = pos / wid;
return LUM + (0.1 + SCAN) * exp(-weight * weight ) / wid;
}
vec2 Warp(vec2 pos)
{
pos = warp;
pos *= vec2(1.0+pos.y*pos.y*0.031, 1.0+pos.x*pos.x*0.05);
pos = pos*0.5+0.5;
return pos;
}
void main()
{
vec2 pos;
if (CURV == 1.0) pos = Warp(warpp);
else pos = vTexCoord;
vec2 corn = min(pos,1.0-pos); // This is used to mask the rounded
corn.x = 0.0001/corn.x; // corners later on
if (CURV == 1.0) pos /= scale;
// Lanczos 2
// Source position in fractions of a texel
vec2 src_pos = pos*SourceSize.xy;
// Source bottom left texel centre
vec2 src_centre = floor(src_pos - 0.5) + 0.5;
// f is position. f.x runs left to right, y bottom to top, z right to left, w top to bottom
vec4 f;
f.xy = src_pos - src_centre;
f.zw = 1.0 - f.xy;
// Calculate weights in x and y in parallel.
// These polynomials are piecewise approximation of Lanczos kernel
// Calculator here: https://gist.github.com/going-digital/752271db735a07da7617079482394543
vec4 l2_w0_o3 = (( 1.5672 * f - 2.6445) * f + 0.0837) * f + 0.9976;
vec4 l2_w1_o3 = ((-0.7389 * f + 1.3652) * f - 0.6295) * f - 0.0004;
vec4 w1_2 = l2_w0_o3;
vec2 w12 = w1_2.xy + w1_2.zw;
vec4 wedge = l2_w1_o3 * vec4 (w12.yx, w12.yx);
// Calculate texture read positions. tc12 uses bilinear interpolation to do 4 reads in 1.
vec2 tc12 = SourceSize.zw * (src_centre + w1_2.zw / w12);
vec2 tc0 = SourceSize.zw * (src_centre - 1.0);
vec2 tc3 = SourceSize.zw * (src_centre + 2.0);
// Sharpening adjustment
float sum = wedge.x + wedge.y + wedge.z + wedge.w + w12.x * w12.y;
wedge /= sum;
vec3 res = vec3(
COMPAT_TEXTURE(Source, vec2(tc12.x, tc0.y)).rgb * wedge.y +
COMPAT_TEXTURE(Source, vec2(tc0.x, tc12.y)).rgb * wedge.x +
COMPAT_TEXTURE(Source, tc12.xy).rgb * (w12.x * w12.y) +
COMPAT_TEXTURE(Source, vec2(tc3.x, tc12.y)).rgb * wedge.z +
COMPAT_TEXTURE(Source, vec2(tc12.x, tc3.y)).rgb * wedge.w
);
float fp = fract(pos.y*SourceSize.y-0.5);
if (InputSize.y > 400.0) fp = fract(pos.y*SourceSize.y/2.0-0.5);
if (INTERL == 1.0 && InputSize.y > 400.0)
{
fp = mod(float(FrameCount),2.0) <1.0 ? 0.5+fp:fp;
}
res *= res;
res *= scan(fp,res) + scan(1.0-fp,res);
res *= MASK*sin(fragpos)+1.0-MASK;
res = sqrt(res);
float l = dot(vec3(0.3,0.6,0.1), res);
res *= mix(1.0,1.1,l);
res = mix(vec3(l), res, SAT);
if (corn.y <= corn.x && CURV == 1.0 || corn.x < 0.0001 && CURV == 1.0 )res = vec3(0.0);
FragColor = vec4(res,1.0);
}
#endif
Copy and save to shaders_glsl/crt as ‘crt-geom-mini.glslp’
shaders = 1
shader0 = shaders/crt-geom-mini.glsl
filter_linear0 = true
Runs solid 60 fps on my old Note 3 Pro while the original runs 14 fps. The image is almost the same.
For the record, crt-geom-mini was written to push htc one m7, that’s around ~95 gflops. Crt-cyclon pushes xiaomi note 3 pro ~180 gflops. It would need more than that actually, I have to switch to potato mode to run ~59.5 fps. Crt-sines runs pretty well on both as it uses a very simple filter with 1 texture read + 1 for Convergence and many things passed to vertex. Crt-m7 canned not to clutter the crt folder with a trillion of shaders. That looked really good too with quilez filter and fast scanlines etc.
I believe as long as anyone wants to make a fast shader that quilez filter is the best option for performance and good look. Then lanczos2 that I uploaded to “windowed” folder looks very good and is very fast.
A small snippet of code emulating a slot mask without a mask
void main()
{
vec3 res = COMPAT_TEXTURE(Source,vTexCoord).rgb;
res *= 0.3*sin(vTexCoord.y*SourceSize.y*pi*2.0)+0.7;
float y = vTexCoord.y*SourceSize.y*2.0;
if (mod((vTexCoord.x*OutputSize.x*SourceSize.x/InputSize.x),6.0) < 3.0) y = y+0.5; else y = y-0.5;
res *= 0.3*sin(y*pi)+0.7;
FragColor.rgb = res;
}
“cheap.glsl”
Haha lol yes should pass this as crt-cheap-ultra
Speaking of cheapness, I tend to avoid mod,floor,fract if possible; since sin() is often super speedy, maybe this is faster:
float sin_check_offset = sin(frequency); float is_even = step(sin_check_offset, 0.0); y += is_even * 0.5;
Agree, mod() wrecks performance massively like it stops executing and check what you ordered at snail pace, also multiple texture reads, that’s probably the worse and close second is pow(). Also sign() etc. When you use an I-GPU like Intels of 200-300 gflops you can tell the difference.