A new little shader i did (glsl)

HyperspaceMadness · 11 August 2023 11:51

Your dedication to port this is impressive , I feel like I’m entering the matrix reading the code

DariusG · 11 August 2023 12:12

Yeah tons of bit shifting and arrays lol. Also plenty of things happen in every line, put this (that jumps to a sub function) there and add this there etc.

hunterk · 11 August 2023 13:02

Would you be able to optimize it by not looping through every line on every run? That is, only doing the line of the current pixel/texel (or maybe a sliding window)?

I was also looking for any loops that might be offload-able to a feedback+alpha-channel ticker but I’m not sure if that would work/help.

DariusG · 11 August 2023 15:44

Need to rethink implementation, instead of filling arrays do what it does on a single pixel. Meaning: read the code well, understand what it does and do it again in a different way.

DariusG · 12 August 2023 17:29

For the meantime a hack of that PAL shader in retroarch, to almost match my real Amiga on composite. Fixed for GLES too. That’s really close

#version 130

#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01
#pragma parameter FIR_INVGAIN "FIR Inv Gain" 1.0 0.0 3.0 0.01
#pragma parameter ihue "I Hue" 0.2 -1.0 1.0 0.01
#pragma parameter qhue "Q Hue" 0.1 -1.0 1.0 0.01
#pragma parameter sat "Saturation" 1.0 0.0 2.0 0.01
#pragma parameter crawl "Dot Crawl" 1.0 0.0 1.0 1.0
#pragma parameter blur "Blur Size" 0.75 0.0 2.0 0.01

#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;

vec4 _oPosition1; 
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;

// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif

void main()
{
    gl_Position = MVPMatrix * VertexCoord;
    TEX0.xy = TexCoord.xy;
}

#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;

// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy

#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;
uniform COMPAT_PRECISION float FIR_INVGAIN;
uniform COMPAT_PRECISION float ihue;
uniform COMPAT_PRECISION float qhue;
uniform COMPAT_PRECISION float sat;
uniform COMPAT_PRECISION float crawl;
uniform COMPAT_PRECISION float blur;

#else
#define FIR_GAIN 2.0
#define FIR_INVGAIN 1.0
#define ihue 0.0
#define qhue 0.0
#define sat 1.0
#define crawl 1.0
#define blur 1.0

#endif



#define PI          3.14159265358
#define FSC         21477272.73
#define FLINE       15625.0
#define VISIBLELINES 312.0

#define RGB_to_YIQ  mat3( 0.299 , 0.595716 , 0.211456 ,   0.587    , -0.274453 , -0.522591 ,      0.114    , -0.321263 , 0.311135 )
#define YIQ_to_RGB  mat3( 1.0   , 1.0      , 1.0      ,   0.9563   , -0.2721   , -1.1070   ,      0.6210   , -0.6474   , 1.7046   )

#define RGB_to_YUV  mat3( 0.299 , -0.14713 , 0.615    ,   0.587    , -0.28886  , -0.514991 ,      0.114    , 0.436     , -0.10001 )
#define YUV_to_RGB  mat3( 1.0   , 1.0      , 1.0      ,   0.0      , -0.39465  , 2.03211   ,      1.13983  , -0.58060  , 0.0      )


#define FIRTAPS 20
const float FIR[20] = float[20] (-0.008030271,0.003107906,0.016841352,0.032545161,0.049360136,
											0.066256720,0.082120150,0.095848433,0.106453014,0.113151423,
											0.115441842,0.113151423,0.106453014,0.095848433,0.082120150,
											0.066256720,0.049360136,0.032545161,0.016841352,0.003107906);

//#define FIR_GAIN 2.0
//#define FIR_INVGAIN 1.02

float width_ratio;
float height_ratio;
float altv;
float invx;
vec2 dx;

#define time float(FrameCount)
#define fetch(offset, pos, invx) COMPAT_TEXTURE(Source, vec2(pos.xy + vec2(offset*invx ,0.0)))

float mod_luma(vec2 xy, float sinwt, float coswt) {
    vec3 rgb = fetch(0.0, xy, invx).xyz*0.5;
     rgb += fetch(0.0, xy+dx, invx).xyz*0.3;
     rgb += fetch(0.0, xy-dx, invx).xyz*0.2; 
    vec3 yuv = RGB_to_YUV * rgb;

    return clamp(yuv.r + yuv.g*sinwt + yuv.b*coswt, 0.0, 1.0);    
}

vec2 modem_UV(vec2 pos, float ofs) {

    float t = (pos.x + ofs*invx) * OutputSize.x*SourceSize.x/InputSize.x;
    float wt = t * 2.0 * PI/2.0  ;
    float phase = wt + altv;
    float sinwt = sin(phase);
    float coswt = cos(phase);
    vec3 rgb = fetch(ofs, pos, invx).xyz*0.5;
     rgb += fetch(0.0, pos+dx, invx).xyz*0.3;
     rgb += fetch(0.0, pos-dx, invx).xyz*0.2; 
         
    vec3 yuv = RGB_to_YUV * rgb;
    float signal = clamp(yuv.x + yuv.y*sinwt + yuv.z*coswt, 0.0, 1.0);

    return vec2(signal * sinwt, signal * coswt);
}

mat3 mix_mat = mat3(
    1.0, 0.0, 0.0,
    ihue, sat, 0.0,
    qhue, 0.0, sat
);


void main() {

vec2 cent = floor(vTexCoord*SourceSize.xy)+0.5;
vec2 near = cent*SourceSize.zw;
vec2 pos = vTexCoord; 
   pos.y = mix(vTexCoord.y, near.y,0.8);
   dx = vec2(SourceSize.z*blur,0.0);
   width_ratio = SourceSize.x / 341.0;
   height_ratio = SourceSize.y / 625.0;
float crawler = crawl == 1.0? 2.0*mod(time,30.0): 0.0;
   altv = pos.y*SourceSize.y*PI/2.0 + crawler;
   invx = 0.05 /  OutputSize.x; // equals 5 samples per Fsc period

    // lowpass U/V at baseband
    vec2 UV = vec2(0.0);
    for (int i = 0; i < FIRTAPS; i++) {
        vec2 uv = modem_UV(pos, 2.0*float(i) - float(FIRTAPS)); // floats for GLES, or else, bang!
        UV += FIR_GAIN* uv * FIR[i];
        }

   float wt = pos.x*SourceSize.x*PI/2.0;

   //float sinwt = sin(wt + altv+ 60.0*floor(mod(time,3.0)+1.0));
   //float coswt = cos(wt + altv+ 60.0*floor(mod(time,3.0)+1.0));

    float sinwt = sin(wt + altv);
    float coswt = cos(wt + altv);


   float luma = mod_luma(pos, sinwt, coswt) - FIR_INVGAIN*(UV.x*sinwt + UV.y*coswt);
   vec3 yuv_result = vec3(luma, UV.x, UV.y);
yuv_result *= mix_mat;
    FragColor = vec4(YUV_to_RGB * yuv_result, 1.0);
}
#endif

DariusG · 13 August 2023 11:58

After some talk with him the 1st pass should look something like this on GLSL. I have to check if the numbers LFREQ etc are correct.

#version 110

#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01


#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;

vec4 _oPosition1; 
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;

// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif

void main()
{
    gl_Position = MVPMatrix * VertexCoord;
    TEX0.xy = TexCoord.xy;
}

#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;

// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy

#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;


#else
#define FIR_GAIN 2.0

#endif

#define PI 3.14159265358
#define CRT_CC_SAMPLES 4.0
#define CRT_CC_VPER 2.0
#define BLACK_LVL 7.0/140.0
#define BLACK_PT 0.0
#define Q_OFFSET 90.0
   
const float hue = 0.0;        // 0-359 
const float dot_crawl_offset = 0.0; // 0-5  
const float LINE_FREQ = 14.31818; // full line 
const float Y_FREQ = 2.2;  // Luma   (Y) 4.2  MHz 
const float I_FREQ = 1.5;  // Chroma (I) 1.5  MHz 
const float Q_FREQ = 0.55;   // Chroma (Q) 0.55 MHz 
#define RGB_to_YIQ  mat3( 0.299, 0.595716, 0.211456,0.587, -0.274453, -0.522591, 0.114, -0.321263, 0.311135 )
#define YIQ_to_RGB  mat3( 1.0, 1.0, 1.0, 0.9563, -0.2721, -1.1070,0.6210, -0.6474, 1.7046)

float init_iir (float limit) {
    float rate = 1000.0*limit/LINE_FREQ; // cycles / pixel rate 
    return  2.048 - exp(-(6.434*0.512 / rate));  // 2048 - expx(-6434 * 512/rate)
};

float iirf (float h, float s) {
    float c = exp(s - h);
    return c;
};


void main() {

    vec3 rgb = COMPAT_TEXTURE(Source, vTexCoord).rgb;
    vec3 yiq = rgb*RGB_to_YIQ;

    float ire = BLACK_LVL+BLACK_PT;
    float xo = 156.0;  
    float yo = 23.0;

    //  full output resolution x 
    vec2 pos = vTexCoord*OutputSize.xy*SourceSize.xy/InputSize.xy;    

    xo = xo - mod(xo, 4.0); 
    float xoff = mod((pos.x + xo), CRT_CC_SAMPLES);
    float ph   = mod((pos.y + yo + dot_crawl_offset), CRT_CC_VPER);
    float phase = ph*xoff + hue;
    float sn = sin(phase/2.0*PI/2.0);
    float cs = cos(phase/2.0*PI/2.0);
// Initiate IIR values
    float iirY = init_iir(Y_FREQ);
    float iirI = init_iir(I_FREQ);
    float iirQ = init_iir(Q_FREQ);
// IIR pass before sending as 1 signal
    float fY = yiq.r*iirf(iirY, yiq.r);
    float fI = yiq.g*iirf(iirI, yiq.g)*cs;
    float fQ = yiq.b*iirf(iirQ, yiq.b)*sn;
    
    ire += fY + fI + fQ;
    ire = clamp(ire,0.0,1.1);
    rgb = yiq*YIQ_to_RGB;

    FragColor = vec4(vec3(ire), 1.0);
}
#endif

hunterk · 13 August 2023 13:29

That looks a lot more manageable!

DariusG · 13 August 2023 14:47

Yeah just need to understand what the code does and do it properly for GLSL. Now i am not in the mood to jump in to the second pass rabbit hole maybe after some days lol.

DariusG · 13 August 2023 19:29

All numbers revised to correct values

#version 110

#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01


#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;

vec4 _oPosition1; 
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;

// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif

void main()
{
    gl_Position = MVPMatrix * VertexCoord;
    TEX0.xy = TexCoord.xy;
}

#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;

// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy

#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;


#else
#define FIR_GAIN 2.0

#endif

#define PI 3.14159265358
#define CRT_CC_SAMPLES 4.0
#define CRT_CC_VPER 2.0
#define BLACK_LVL 7.0/140.0
#define BLACK_PT 0.0
#define Q_OFFSET 90.0
   
const float hue = 0.0;        // 0-359 
const float dot_crawl_offset = 0.0; // 0-5  
const float LINE_FREQ = 14.31818; // full line 
const float Y_FREQ = 4.2;  // Luma   (Y) 4.2  MHz 
const float I_FREQ = 1.5;  // Chroma (I) 1.5  MHz 
const float Q_FREQ = 0.55;   // Chroma (Q) 0.55 MHz 
#define RGB_to_YIQ  mat3( 0.299, 0.595716, 0.211456,0.587, -0.274453, -0.522591, 0.114, -0.321263, 0.311135 )
#define YIQ_to_RGB  mat3( 1.0, 1.0, 1.0, 0.9563, -0.2721, -1.1070,0.6210, -0.6474, 1.7046)

float init_iir (float limit) {
    float rate = limit/LINE_FREQ; // cycles / pixel rate 
    return  1.0 - exp(-PI / rate);  // 2048 - expx(-6434 * 512/rate)
};

float iirf (float h, float s) {
    float c = exp(s - h);
    return c;
};


void main() {

    vec3 rgb = COMPAT_TEXTURE(Source, vTexCoord).rgb;
    vec3 yiq = rgb*RGB_to_YIQ;

    float ire = BLACK_LVL+BLACK_PT;
    float xo = 156.0;  
    float yo = 23.0;

    // o   
    vec2 pos = vTexCoord*OutputSize.xy*SourceSize.xy/InputSize.xy;    
    
    xo = xo - mod(xo, 4.0); 
    float xoff = mod((pos.x + xo), CRT_CC_SAMPLES);
    float ph   = mod((pos.y + yo + dot_crawl_offset), CRT_CC_VPER);
    float phase = ph*xoff + hue;
    float sn = sin(phase/2.0*PI/2.0);
    float cs = cos(phase/2.0*PI/2.0);
// Initiate IIR values
    float iirY = init_iir(Y_FREQ);
    float iirI = init_iir(I_FREQ);
    float iirQ = init_iir(Q_FREQ);
// IIR pass before sending as 1 signal
    float fY = yiq.r*iirf(iirY, yiq.r);
    float fI = yiq.g*iirf(iirI, yiq.g)*cs;
    float fQ = yiq.b*iirf(iirQ, yiq.b)*sn;
    
    ire += fY + fI + fQ;
    ire = clamp(ire,0.0,1.0);


    FragColor = vec4(vec3(ire), 1.0);
}
#endif

DariusG · 15 August 2023 18:34

Ok so added 2 GLSL shaders, in “pal” folder named A520 mimic my Amiga on Composite modulator, this is almost 1:1 replication (!). Did so much study on the matter i can write an ntsc on my cellphone now lol

and a replacement for “ntsc-simple”

DariusG · 20 August 2023 07:42

@Cyber since you like blargg’s ntsc, check the new “ntsc-simple-hd” i uploaded to slang/glsl. Tell me what you think It’s a bit sharper since it uses 20 passes “blurring” left and right and blargg uses 33.

There is a kernel array of size 66 in blargg, the 2nd part is filled with luma filter values related to “resolution” and “sharpness” values

#define  bleed 0.2
#define LUMA_CUTOFF 0.2

// generate luma (y) filter using sinc kernel
        // sinc with rolloff (dsf) 
        float rolloff = 1.0 + sharpness * 0.032;
        float maxh = 32.0;
        float pow_a_n = pow( rolloff, maxh );
        float sum = 0.0;
        int i;
        // quadratic mapping to reduce negative (blurring) range 
        float to_angle = resolution + 1.0;
        to_angle = PI / maxh * LUMA_CUTOFF * (to_angle * to_angle + 1.0);
        
        kernels [49] = maxh; // default center value 

    for ( i = 0; i < 33; i++ )
        {
            int x = i - 16;
            float angle = x * to_angle;
            // instability occurs at center point with rolloff very close to 1.0 
            if ( x || pow_a_n > 1.056 || pow_a_n < 0.981 )
            {
                float rolloff_cos_a = rolloff * cos( angle );
                float num = 1.0 - rolloff_cos_a - pow_a_n * cos( maxh * angle ) +
                            pow_a_n * rolloff * cos( (maxh - 1.0) * angle );
                float den = 1.0 - rolloff_cos_a - rolloff_cos_a + rolloff * rolloff;
                float dsf = num / den;
                kernels [33 + i] = dsf - 0.5;
            }
    }
        
        // apply blackman window and find sum 
        for ( i = 0; i < 33; i++ )
        {
            float x = PI * 2 / 32 * i;
            float blackman = 0.42 - 0.5*cos( x ) + 0.08*cos( x * 2 );
            sum += kernels [33 + i] * blackman;
        }

And the 1st part, the first 33 numbers, is filled with chroma values related to “bleed”

// generate chroma (iq) filter using gaussian kernel
        float cutoff_factor = -0.03125;
        
    if ( bleed < 0.0 )
        {
            // keep extreme value accessible only near upper end of scale (1.0) 
            bleed *= bleed;
            bleed *= bleed;
            bleed *= bleed;
            bleed *= -30.0 / 0.65;
        }
        bleed =  0.35 * cutoff_factor * bleed;
           
        for ( i = -16; i <= 16; i++ ){
            kernels [16 + i] = exp( float(i) * float(i) * bleed );
        }
       
        // normalize even and odd phases separately 
        for ( i = 0; i < 2; i++ )
        {
            sum = 0.0;
            int x;
            for ( x = i; x < 33; x += 2 )
                sum += kernels [x];
            
            sum = 1.0 / sum;
            for ( x = i; x < 33; x += 2 )
            {
                kernels[x] *= sum;
            }
        }

These arrays could be pre-calculated with given bleed, resolution values in steps, let’s say -1.0, 0.5, 0.0, 0.5, 1.0 so you don’t need to calculate all this huge loop in real time and drag performance to bottom.

In the end, luma and chroma samples (the 33-1=32 passes) are filtered multiplied by these arrays. If you look carefully luma is half blurred than chroma (edit: actually 4 times since it runs only 0… to 16 and chroma runs -32… to 32… So it’s a 4:1)

        int x = i - 16;

// later on

if ( x ...... // meaning if x >0 so half passes of 32

Cyber · 20 August 2023 15:59

Thanks for asking, it would be an honour as soon as I get my latest presets pack out the door. I had to overhaul at least 3 times already since the initial release. First one was to add Grade (the old one), second was to tweak filters mainly. The third one was to calibrate using 240p Test Suite, then retweak settings and filters. While I was working on that a couple users complained that they the preset weren’t loading. It was because they don’t have the old Grade because the Online Updater doesn’t pull it anymore and and in addition to that the new Grade is in a different Folder. So I had to overhaul again just to do over all the colour and brightness settings to be able to use this new Grade and I was actually quite satisfied after maybe the third overhaul.

This one’s looking good though.

So in other words as soon as I get a chance. Lol

It’s cool that you’ve gotten your hands dirty in the Blargg code and can understand and visualize what’s been going on under the hood for all these years.

As we can see, it’s highly optimized.

One thing I would like to get some more information on would be the exact ranges and steps of all of the settings.

I know for a fact that most of the settings can be incremented in steps of ±0.1.

Most seem to have a range of between -1 to +1

I think I found one where the max was 0.5 though (gamma) can’t say for sure. All I use are my eyes and trial and error.

One interesting setting is the colour bleed function. Each 0.1 step seems to only increase the bleed marginally. So you can almost go overboard without going overboard.

With many of the other settings a ±0.1 adjustment is quite noticeable.

One thing I don’t get and I wish could be resolved or worked around is why the filter causes the Aspect Ratio to change slightly.

DariusG · 1 September 2023 17:20

“Lab” here

Direct comparison and 1:1 Trinitron PAL colors, Amiga at least, preset

shaders = "6"
feedback_pass = "0"
shader0 = "../misc/shaders/simple_color_controls.glsl"
filter_linear0 = "false"
shader1 = "../misc/shaders/chromaticity.glsl"
filter_linear1 = "false"
shader2 = "../crt/shaders/crt-consumer/linearize.glsl"
filter_linear2 = "false"
shader3 = "../crt/shaders/crt-consumer/glow_x.glsl"
filter_linear3 = "false"
shader4 = "../crt/shaders/crt-consumer/glow_y.glsl"
filter_linear4 = "false"
shader5 = "../crt/shaders/crt-geom.glsl"
filter_linear5 = "false"
TEMP = "5423.000000"
BLACK = "-0.020000"
gamma_in = "1.000000"
gamma_out_red = "1.000000"
gamma_out_green = "1.000000"
gamma_out_blue = "1.000000"
R = "2.000000"
COLOR_MODE = "-1.000000"
Dx = "-1.000000"
CRTgamma = "1.000000"
SATURATION = "1.200000"
DOTMASK = "0.5"

Default RGB colors

SNES Flashback… yeah that looks like Trinitron

DariusG · 11 September 2023 12:07

Another day, another project, today’s was “Make crt-geom run on an old 2016 cellphone around 120 gflops gpu” so here it is…

Copy and save as ‘crt-geom-mini.glsl’ drop to shaders_glsl/crt/shaders

#version 110

// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or (at your option)
// any later version.

#pragma parameter CURV "CRT-Geom Curvature" 1.0 0.0 1.0 1.0
#pragma parameter SCAN "CRT-Geom Scanline Weight" 0.3 0.2 0.6 0.05
#pragma parameter MASK "CRT-Geom Dotmask Strength" 0.25 0.0 1.0 0.05
#pragma parameter LUM "CRT-Geom Luminance" 0.05 0.0 0.5 0.01
#pragma parameter INTERL "CRT-Geom Interlacing Simulation" 1.0 0.0 1.0 1.0
#pragma parameter SAT "CRT-Geom Saturation" 1.1 0.0 2.0 0.01

#define PI 3.1415926535897932384626433

#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;
COMPAT_VARYING vec2 scale;
COMPAT_VARYING vec2 warpp;
COMPAT_VARYING vec2 warppm;
COMPAT_VARYING vec2 warp;
COMPAT_VARYING float fragpos;
COMPAT_VARYING float omega;

vec4 _oPosition1; 
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;

// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float SIZE;

#else
#define SIZE     1.0      
   
#endif

void main()
{
    gl_Position = MVPMatrix * VertexCoord;
    TEX0.xy = TexCoord.xy;
    scale = SourceSize.xy/InputSize.xy;
    fragpos = TEX0.x*OutputSize.x*scale.x*PI;
    warpp = TEX0.xy*scale;   
    warp = warpp*2.0-1.0;   
}

#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;
COMPAT_VARYING vec2 scale;
COMPAT_VARYING float fragpos;
COMPAT_VARYING vec2 warpp;
COMPAT_VARYING vec2 warppm;
COMPAT_VARYING vec2 warp;
COMPAT_VARYING float omega;

// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy

#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float SCAN;
uniform COMPAT_PRECISION float MASK;
uniform COMPAT_PRECISION float CURV;
uniform COMPAT_PRECISION float LUM;
uniform COMPAT_PRECISION float SAT;
uniform COMPAT_PRECISION float INTERL;

#else
#define SCAN  0.3      
#define MASK  0.6
#define CURV  1.0
#define LUM 0.0
#define SAT 1.0
#define INTERL 1.0

#endif


float scan(float pos, vec3 color)
    {
    float wid = SCAN + 0.1 * dot(color, vec3(0.333))*0.8;
    float weight = pos / wid;
    return  LUM + (0.1 + SCAN) * exp(-weight * weight ) / wid;
    }

vec2 Warp(vec2 pos)
{
    pos = warp;
    pos *= vec2(1.0+pos.y*pos.y*0.031, 1.0+pos.x*pos.x*0.05);
    pos = pos*0.5+0.5;
    return pos;
}

void main()
{
    vec2 pos;
    if (CURV == 1.0) pos = Warp(warpp);
    else pos = vTexCoord;
    vec2 corn   = min(pos,1.0-pos); // This is used to mask the rounded
         corn.x = 0.0001/corn.x;  // corners later on
    if (CURV == 1.0) pos /= scale;


// Lanczos 2
    // Source position in fractions of a texel
    vec2 src_pos = pos*SourceSize.xy;
    // Source bottom left texel centre
    vec2 src_centre = floor(src_pos - 0.5) + 0.5;
    // f is position. f.x runs left to right, y bottom to top, z right to left, w top to bottom
    vec4 f; 
    f.xy = src_pos - src_centre;
    f.zw = 1.0 - f.xy;
    // Calculate weights in x and y in parallel.
    // These polynomials are piecewise approximation of Lanczos kernel
    // Calculator here: https://gist.github.com/going-digital/752271db735a07da7617079482394543
    vec4 l2_w0_o3 = (( 1.5672 * f - 2.6445) * f + 0.0837) * f + 0.9976;
    vec4 l2_w1_o3 = ((-0.7389 * f + 1.3652) * f - 0.6295) * f - 0.0004;

    vec4 w1_2  = l2_w0_o3;
    vec2 w12   = w1_2.xy + w1_2.zw;
    vec4 wedge = l2_w1_o3 * vec4 (w12.yx, w12.yx);

    // Calculate texture read positions. tc12 uses bilinear interpolation to do 4 reads in 1.
    vec2 tc12 = SourceSize.zw * (src_centre + w1_2.zw / w12);
    vec2 tc0  = SourceSize.zw * (src_centre - 1.0);
    vec2 tc3  = SourceSize.zw * (src_centre + 2.0);
    
    // Sharpening adjustment
    float sum = wedge.x + wedge.y + wedge.z + wedge.w + w12.x * w12.y;    
    wedge /= sum;

    vec3 res = vec3(
        COMPAT_TEXTURE(Source, vec2(tc12.x, tc0.y)).rgb * wedge.y +
        COMPAT_TEXTURE(Source, vec2(tc0.x, tc12.y)).rgb * wedge.x +
        COMPAT_TEXTURE(Source, tc12.xy).rgb * (w12.x * w12.y) +
        COMPAT_TEXTURE(Source, vec2(tc3.x, tc12.y)).rgb * wedge.z +
        COMPAT_TEXTURE(Source, vec2(tc12.x, tc3.y)).rgb * wedge.w
    );


    float fp = fract(pos.y*SourceSize.y-0.5);
    if (InputSize.y > 400.0) fp = fract(pos.y*SourceSize.y/2.0-0.5);

    if (INTERL == 1.0 && InputSize.y > 400.0) 
    {
    fp = mod(float(FrameCount),2.0) <1.0 ? 0.5+fp:fp;
    }
    res *= res;
    res *= scan(fp,res) + scan(1.0-fp,res);
    res *= MASK*sin(fragpos)+1.0-MASK;
    res = sqrt(res);
    float l = dot(vec3(0.3,0.6,0.1), res);
    res *= mix(1.0,1.1,l);
    res = mix(vec3(l), res, SAT);
if (corn.y <= corn.x && CURV == 1.0 || corn.x < 0.0001 && CURV == 1.0 )res = vec3(0.0);

    FragColor = vec4(res,1.0);
}

#endif

Copy and save to shaders_glsl/crt as ‘crt-geom-mini.glslp’

shaders = 1

shader0 = shaders/crt-geom-mini.glsl
filter_linear0 = true

Runs solid 60 fps on my old Note 3 Pro while the original runs 14 fps. The image is almost the same.

DariusG · 16 October 2023 14:36

For the record, crt-geom-mini was written to push htc one m7, that’s around ~95 gflops. Crt-cyclon pushes xiaomi note 3 pro ~180 gflops. It would need more than that actually, I have to switch to potato mode to run ~59.5 fps. Crt-sines runs pretty well on both as it uses a very simple filter with 1 texture read + 1 for Convergence and many things passed to vertex. Crt-m7 canned not to clutter the crt folder with a trillion of shaders. That looked really good too with quilez filter and fast scanlines etc.

I believe as long as anyone wants to make a fast shader that quilez filter is the best option for performance and good look. Then lanczos2 that I uploaded to “windowed” folder looks very good and is very fast.

DariusG · 23 October 2023 17:29

A small snippet of code emulating a slot mask without a mask

void main()
{
vec3 res = COMPAT_TEXTURE(Source,vTexCoord).rgb;

res *= 0.3*sin(vTexCoord.y*SourceSize.y*pi*2.0)+0.7;

float y = vTexCoord.y*SourceSize.y*2.0;

if (mod((vTexCoord.x*OutputSize.x*SourceSize.x/InputSize.x),6.0) < 3.0) y = y+0.5; else y = y-0.5;
res *= 0.3*sin(y*pi)+0.7;

FragColor.rgb = res;    
}

kokoko3k · 23 October 2023 18:08

“cheap.glsl”

DariusG · 23 October 2023 18:10

Haha lol yes should pass this as crt-cheap-ultra

kokoko3k · 24 October 2023 14:33

Speaking of cheapness, I tend to avoid mod,floor,fract if possible; since sin() is often super speedy, maybe this is faster:

float sin_check_offset = sin(frequency);
float is_even = step(sin_check_offset, 0.0);
y +=  is_even * 0.5;

DariusG · 24 October 2023 15:32

Agree, mod() wrecks performance massively like it stops executing and check what you ordered at snail pace, also multiple texture reads, that’s probably the worse and close second is pow(). Also sign() etc. When you use an I-GPU like Intels of 200-300 gflops you can tell the difference.