A new little shader i did (glsl)

DariusG · 9 August 2023 16:03

Here is a webgl version, needs to drag and drop a ppm image I think

https://binji.github.io/NTSC-CRT/

Cyber · 9 August 2023 16:07

This is like those Shaolin movies where a young apprentice wants to learn Kung-Fu and the former Grand Master sends him to go and pick coconuts, plant rice paddies and carry bamboo logs on his back for three years.

He doesn’t understand why and might be a little belligerent at first but at the end of those three years he can beat anyone in the village.

DariusG · 11 August 2023 09:39

Passed all 1st pass loop and here is the performance result, around 10 fps, GPU kneels to it On 1x scale!

code:

#version 130


#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;

vec4 _oPosition1; 
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;

// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif

void main()
{
    gl_Position = MVPMatrix * VertexCoord;
    TEX0.xy = TexCoord.xy*1.0001;
}

#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;

// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy

#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float SCANLINE_BASE_BRIGHTNESS;

#else
#define SCANLINE_BASE_BRIGHTNESS 0.95
#endif

int iirs_initialized = 0; // internal state 
// frequencies for bandlimiting 
const int L_FREQ = 1431818; // full line 
const int Y_FREQ = 420000;  // Luma   (Y) 4.2  MHz 
const int I_FREQ = 150000;  // Chroma (I) 1.5  MHz 
const int Q_FREQ = 55000;   // Chroma (Q) 0.55 MHz 
const int EXP_P = 11;
const int EXP_ONE = (1 << EXP_P);  // 2048 (1*2^11)
const int EXP_PI = 6434;
const int EXP_MASK = (EXP_ONE - 1);  //2047}|
   const int dot_crawl_offset = 0; // 0-5  
   const int hue = 0;        // 0-359 
const int Q_OFFSET = -90;   // in degrees 
// burst hue offset 
const int HUE_OFFSET = -60;  // in degrees 
const int T14_2PI = 16384;
const int T14_MASK = (T14_2PI - 1);
const int T14_PI = (T14_2PI / 2);
const int CRT_CC_SAMPLES = 4; // samples per chroma period (samples per 360 deg) */
const int CRT_CC_VPER = 2; // vertical period in which the artifacts repeat */
const int CRT_PIX_FORMAT_RGB = 0;  // 3 bytes per pixel [R,G,B, R,G,B, R,G,B..] 
const int CRT_PIX_FORMAT_BGR = 1;  // 3 bytes per pixel [B,G,R, B,G,R, B,G,R..] 
const int CRT_PIX_FORMAT_ARGB = 2;  // 4 bytes per pixel [A,R,G,B, A,R,G,B...]   
const int CRT_PIX_FORMAT_RGBA = 3;  // 4 bytes per pixel [R,G,B,A, R,G,B,A...]   
const int CRT_PIX_FORMAT_ABGR = 4;  // 4 bytes per pixel [A,B,G,R, A,B,G,R...]   
const int CRT_PIX_FORMAT_BGRA = 5;  // 4 bytes per pixel [B,G,R,A, B,G,R,A...]  
    int format = 0;     // pix format (one of the CRT_PIX_FORMATs in crt_core.h) 
int field = 0;
int frame;      // 0 = even, 1 = odd */

#define CRT_VRES        262     // vertical resolution 
const int CRT_CC_LINE = 2275;
const int CRT_CB_FREQ = 4; // carrier frequency relative to sample rate 
const int CRT_HRES = CRT_CC_LINE * CRT_CB_FREQ / 10; // horizontal res, 910 total
const int CRT_INPUT_SIZE = CRT_HRES * CRT_VRES;

// define line ranges in which sync is generated
// the numbers are inclusive
// Make sure these numbers fit in (0, CRT_VRES)
const int SYNC_REGION_LO = 3;
const int SYNC_REGION_HI = 6;
// same as above but for equalizing pulses 
const int EQU_REGION_A_LO = 0;
const int EQU_REGION_A_HI = 2;
const int EQU_REGION_B_LO = 7;
const int EQU_REGION_B_HI = 9;
const int BLANK_LEVEL = 0;
const int SYNC_LEVEL = -40;
const int LINE_BEG = 0;
const int black_point = 0;
const int white_point = 0;
#define WHITE_LEVEL      100
#define BURST_LEVEL      20
#define BLACK_LEVEL      7
bool CRT_DO_BANDLIMITING = true;    // enable/disable bandlimiting when encoding 
const int IRE_MAX = 110;   // max value is max value of signed char 
const int IRE_MIN = 0;     // min value is min value of signed char 
// IIR lowpass filter for bandlimiting YIQ 
 int[2] iirY = int[](0,0); // c,h (history) 
 int[2] iirI = int[](0,0); // c,h (history) 
 int[2] iirQ = int[](0,0); // c,h (history) 

int EXP_MUL(int x, int y) { 
    return (x * y) >> EXP_P; }
int EXP_DIV(int x, int y) {
    return (x << EXP_P) / y; }

int[5] e11 = int[](
    2048,        // 2048
    5567,  // e   
    15133, // e^2 
    41135, // e^3 
    111817 // e^4 
); 


int expx(int n){
    bool neg;
    int idx, res;
    int nxt, acc, del;
    int i;

    if (n == 0) {
        return 2048;  // 2048
        }
    neg = n < 0; // neg is 1 if n<0
    
    if (neg) {
        n = -n;  // convert n to positive number
    }
    idx = n >> 11;
    res = 2048;
    
for (i = 0; i < idx / 4; i++) {
        res = EXP_MUL(res, e11[4]);
    }
    idx &= 3;
    
    if (idx > 0) {
        res = EXP_MUL(res, e11[idx]);
    }
    n &= EXP_MASK;
    nxt = 2048;
    acc = 0;
    del = 1;
    
    for (i = 1; i < 17; i++) {

        if (del < nxt || nxt > 0 || del > 0) {
            
        acc += nxt / del;
        nxt = EXP_MUL(nxt, n);
        del *= i;
     
        }
    }
    res = EXP_MUL(res, acc);
    
    if (neg) {
        res = EXP_DIV(EXP_ONE, res);
    }
    return res;
};

int init_iir (int f, int freq, int limit) {
    int rate=f/freq; // cycles / pixel rate 
    return  2048 - expx(-(6434*512 / rate));  // 2048 - expx(-6434 * 512/rate)
};

const int[18] sigpsin15 = int[]( // significant points on sine wave (15-bit) 
    0x0000,
    0x0c88,0x18f8,0x2528,0x30f8,0x3c50,0x4718,0x5130,0x5a80,
    0x62f0,0x6a68,0x70e0,0x7640,0x7a78,0x7d88,0x7f60,0x8000,
    0x7f60
);


int sintabil8(int n)
{
    int f, i, a, b;
    
    // looks scary but if you don't change T14_2PI
    // it won't cause out of bounds memory reads
    //
    f = n >> 0 & 0xff;
    i = n >> 8 & 0xff;
    a = sigpsin15[i];
    b = sigpsin15[i + 1];
    return (a + ((b - a) * f >> 8));
}

// BIT SHIFTING
// X >> 1   EQUALS TO X/2
// X >> 2   EQUALS TO X/4
// X << 1   EQUALS TO X*2
// X << 2   EQUALS TO X*4

ivec2 crt_sincos14(int s, int c, int n)
{
    int h;
    
    n &= T14_MASK;
    h = n & ((T14_2PI/2) - 1);
    
    if (h > ((T14_2PI/4) - 1)) {
        c = -sintabil8(h - (T14_2PI/4));
        s = sintabil8((T14_2PI/2) - h);
    } else {
        c = sintabil8((T14_2PI/4) - h);
        s = sintabil8(h);
    }
    if (n > ((T14_2PI/2) - 1)) {
        c = -c;
        s = -s;
    }
    return ivec2(s, c);
}

int crt_bpp4fmt(int format)
{
    if (format == 0 || format == 1) return 3;
    else if (format == 2 || format == 3 || format == 4|| format == 5) return 4;
    
}

int reset_iir (int f) {
    return 0;
};

// hi-pass for debugging 
// will pass c back to c and h back to h
int iirf (int c, int h, int s) {
    if (CRT_DO_BANDLIMITING) {
        c += EXP_MUL(s - h, c);
        return c;
    
    } else {
        return s;
    }
};



vec3 crt_modulate() {
    int x, y, xo, yo=0;
    int sn, cs, n, ph=0;
    int bpp=0;            
    int cb = 0;
    int destw = 753;  //AV_LEN
    int desth = 236; //((CRT_LINES * 64500) >> 16)
    int[8] iccf    ;  // 4x2
    int[8] ccmodI  ;
    int[8] ccmodQ  ;
    int[8] ccburst ;
    //int[CRT_INPUT_SIZE] analog; // new Array(CRT_INPUT_SIZE).fill(0);
    // 910 * 262 (HRES*VRES) .....actually screen pixels
    int[238420] analog;   
    int[238420] line;
    int ire=0; // composite signal 
    int fy, fi, fq=0;

if (iirs_initialized == 0) {
        init_iir(iirY[1], L_FREQ, Y_FREQ);
        init_iir(iirI[1], L_FREQ, I_FREQ);
        init_iir(iirQ[1], L_FREQ, Q_FREQ);
        
        iirs_initialized = 1;
    }

    if (desth > 236) desth = 236;


 // y 0 to 2
    for (y = 0; y < 2; y++) {     //CRT_CC_VPER
 // y * 180'        
        int vert = (y + dot_crawl_offset)*180; // 360/CRT_CC_VPER
// x 0 to 4            
            for (x = 0; x < CRT_CC_SAMPLES; x++) {   // CRT_CC_SAMPLES
// step is 90'                
                int step = 360/CRT_CC_SAMPLES;  // 90
                n = vert + hue + x * step;
            
            //calculate sin and cos and store them in sn,cs    
             ivec2 snc = crt_sincos14(sn, cs, (n - step + HUE_OFFSET) * 8192 / 180);                                
            ccburst[x+y*CRT_CC_SAMPLES] = snc.x >> 10;
                
             snc =   crt_sincos14(sn, cs, n * 8192 / 180);
            ccmodI[x+y*CRT_CC_SAMPLES] = snc.x >> 10;                 
                
              snc =  crt_sincos14(sn, cs, (n + Q_OFFSET) * 8192 / 180);
            ccmodQ[x+y*CRT_CC_SAMPLES] = snc.x >> 10;                 
            }
        }
    bpp = crt_bpp4fmt(format);

    //AV_BEG  + xoffset + (AV_LEN    - destw) / 2
    xo = 156;  
    //CRT_TOP + yoffset + (CRT_LINES - desth) / 2
    yo = 23;  


// use field and frame variables as you'd like here
// to determine starting phases, etc.
    field &= 1;
    frame &= 1;
    
// align signal 
// xo mod CRT_CC_SAMPLES
    xo = xo - (xo % 4); 


for (n=0; n<262; n++) {     // CRT_VRES

        // filling line with pixels vertically
        // line[238420] = analog[n*910]; // 910*262  QUESTION REMAINS

        int t = LINE_BEG;  //TIME, LINE_BEG  
  
if ((n >= EQU_REGION_A_LO && n <= EQU_REGION_A_HI) || (n >= EQU_REGION_B_LO && n <= EQU_REGION_B_HI)) {
            // equalizing pulses - small blips of sync, mostly blank 
            while (t < (4   * 910 / 100)) line[t++] = SYNC_LEVEL;
            while (t < (50  * 910 / 100)) line[t++] = BLANK_LEVEL;
            while (t < (54  * 910 / 100)) line[t++] = SYNC_LEVEL;
            while (t < (100 * 910 / 100)) line[t++] = BLANK_LEVEL;

        } 
 else if (n >= SYNC_REGION_LO && n <= SYNC_REGION_HI) {
            int[4] even = int[]( 46, 50, 96, 100 );
            int[4] odd  = int[](  4, 50, 96, 100 );
            int[4] offs = even;
                if (field == 1) {
                offs = odd;
                }
  // vertical sync pulse - small blips of blank, mostly sync 
            while (t < (offs[0] * CRT_HRES / 100)) line[t++] = SYNC_LEVEL;
            while (t < (offs[1] * CRT_HRES / 100)) line[t++] = BLANK_LEVEL;
            while (t < (offs[2] * CRT_HRES / 100)) line[t++] = SYNC_LEVEL;
            while (t < (offs[3] * CRT_HRES / 100)) line[t++] = BLANK_LEVEL;
        }

        else {
            // video line 
            // SYNC_BEG 
            while (t < 21) line[t++] = BLANK_LEVEL; // FP 
            // BW_BEG
            while (t < 88) line[t++] = SYNC_LEVEL;  // SYNC 
            // AV_BEG
            while (t < 156)line[t++] = BLANK_LEVEL; // BW + CB + BP 
            
            // CRT_TOP
            if (n < 21) {
                while (t < 910) line[t++] = BLANK_LEVEL;
            }
            // CB_CYCLES of color burst
            // CB_BEG = 97, CB_CYCLES = 10 * CRT_CB_FREQ = 4 
            for (t = 97; t < 137; t++) {
                cb = ccburst[(n % 4) * (t % 2)];
               //BLANK_LEVEL = 0, BURST_LEVEL = 20
                line[t] = (0 + (cb * 20)) >> 5;
                iccf[((n + 3) % 4)*(t % 2)] = line[t];
               
                }
            }         
}  

for (y = 0; y < 236; y++) {

        int h = int(SourceSize.y);  // CHECK?? height of image
        int w = int(SourceSize.x);  // CHECK?? width   --//--
        int field_offset;
        int sy;
//desth 236
        field_offset = (field * h + 236) / 236 / 2;
        sy = (y * h) / desth;
        sy += field_offset;

        if (sy >= h) sy = h;
        sy *= w;
       iirY[1] = reset_iir(iirY[1]);  
       iirI[1] = reset_iir(iirI[1]);  
       iirQ[1] = reset_iir(iirQ[1]);    
//CRT_CC_VPER 2
        ph = (y + yo) % 2;
    for (x = 0; x < destw; x++) {
            int rA, gA, bA=0;
            int[3] pix;
            int xoff=0;
            // RGB to YIQ matrix in 16.16 fixed point format 
            int[9] yiqmat = int[](
                19595,  38470,  7471,   // Y 
                39059, -18022, -21103,  // I 
                13894, -34275,  20382);  // Q 
            vec3 data = COMPAT_TEXTURE(Source,vTexCoord).rgb;
            rA = int((data.r*65536));
            gA = int((data.g*65536));
            bA = int((data.b*65536));
            fy = (yiqmat[0] * rA + yiqmat[1] * gA + yiqmat[2] * bA) >> 14;
            fi = (yiqmat[3] * rA + yiqmat[4] * gA + yiqmat[5] * bA) >> 14;
            fq = (yiqmat[6] * rA + yiqmat[7] * gA + yiqmat[8] * bA) >> 14;

            ire = BLACK_LEVEL + black_point;
            xoff = (x + xo) % CRT_CC_SAMPLES;

            // bandlimit Y,I,Q 
            fy = iirf(iirY[0], iirY[1], fy);
            fi = iirf(iirI[0], iirI[1], fi) * ccmodI[ph*xoff] >> 4;
            fq = iirf(iirQ[0], iirQ[1], fq) * ccmodQ[ph*xoff] >> 4;

            // modulate as (Y + sin(x) * I + cos(x) * Q) 
            ire += (fy + fi + fq) * (WHITE_LEVEL * white_point / 100) >> 10;
            if (ire < IRE_MIN) ire = IRE_MIN;
            if (ire > IRE_MAX) ire = IRE_MAX;
            analog[(x + xo) + (y + yo) * CRT_HRES] = ire;
    } //temporary end


} //temporary end

return vec3(float(fy));

}

void main()
{

    FragColor = vec4(crt_modulate()*0.3,1.0);
}
#endif

fY/I/Q would need to revert to some color format recognisable by GLSL

Probably would need the best GPU available at the moment to run at good speed.

HyperspaceMadness · 11 August 2023 11:51

Your dedication to port this is impressive , I feel like I’m entering the matrix reading the code

DariusG · 11 August 2023 12:12

Yeah tons of bit shifting and arrays lol. Also plenty of things happen in every line, put this (that jumps to a sub function) there and add this there etc.

hunterk · 11 August 2023 13:02

Would you be able to optimize it by not looping through every line on every run? That is, only doing the line of the current pixel/texel (or maybe a sliding window)?

I was also looking for any loops that might be offload-able to a feedback+alpha-channel ticker but I’m not sure if that would work/help.

DariusG · 11 August 2023 15:44

Need to rethink implementation, instead of filling arrays do what it does on a single pixel. Meaning: read the code well, understand what it does and do it again in a different way.

DariusG · 12 August 2023 17:29

For the meantime a hack of that PAL shader in retroarch, to almost match my real Amiga on composite. Fixed for GLES too. That’s really close

#version 130

#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01
#pragma parameter FIR_INVGAIN "FIR Inv Gain" 1.0 0.0 3.0 0.01
#pragma parameter ihue "I Hue" 0.2 -1.0 1.0 0.01
#pragma parameter qhue "Q Hue" 0.1 -1.0 1.0 0.01
#pragma parameter sat "Saturation" 1.0 0.0 2.0 0.01
#pragma parameter crawl "Dot Crawl" 1.0 0.0 1.0 1.0
#pragma parameter blur "Blur Size" 0.75 0.0 2.0 0.01

#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;

vec4 _oPosition1; 
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;

// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif

void main()
{
    gl_Position = MVPMatrix * VertexCoord;
    TEX0.xy = TexCoord.xy;
}

#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;

// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy

#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;
uniform COMPAT_PRECISION float FIR_INVGAIN;
uniform COMPAT_PRECISION float ihue;
uniform COMPAT_PRECISION float qhue;
uniform COMPAT_PRECISION float sat;
uniform COMPAT_PRECISION float crawl;
uniform COMPAT_PRECISION float blur;

#else
#define FIR_GAIN 2.0
#define FIR_INVGAIN 1.0
#define ihue 0.0
#define qhue 0.0
#define sat 1.0
#define crawl 1.0
#define blur 1.0

#endif



#define PI          3.14159265358
#define FSC         21477272.73
#define FLINE       15625.0
#define VISIBLELINES 312.0

#define RGB_to_YIQ  mat3( 0.299 , 0.595716 , 0.211456 ,   0.587    , -0.274453 , -0.522591 ,      0.114    , -0.321263 , 0.311135 )
#define YIQ_to_RGB  mat3( 1.0   , 1.0      , 1.0      ,   0.9563   , -0.2721   , -1.1070   ,      0.6210   , -0.6474   , 1.7046   )

#define RGB_to_YUV  mat3( 0.299 , -0.14713 , 0.615    ,   0.587    , -0.28886  , -0.514991 ,      0.114    , 0.436     , -0.10001 )
#define YUV_to_RGB  mat3( 1.0   , 1.0      , 1.0      ,   0.0      , -0.39465  , 2.03211   ,      1.13983  , -0.58060  , 0.0      )


#define FIRTAPS 20
const float FIR[20] = float[20] (-0.008030271,0.003107906,0.016841352,0.032545161,0.049360136,
											0.066256720,0.082120150,0.095848433,0.106453014,0.113151423,
											0.115441842,0.113151423,0.106453014,0.095848433,0.082120150,
											0.066256720,0.049360136,0.032545161,0.016841352,0.003107906);

//#define FIR_GAIN 2.0
//#define FIR_INVGAIN 1.02

float width_ratio;
float height_ratio;
float altv;
float invx;
vec2 dx;

#define time float(FrameCount)
#define fetch(offset, pos, invx) COMPAT_TEXTURE(Source, vec2(pos.xy + vec2(offset*invx ,0.0)))

float mod_luma(vec2 xy, float sinwt, float coswt) {
    vec3 rgb = fetch(0.0, xy, invx).xyz*0.5;
     rgb += fetch(0.0, xy+dx, invx).xyz*0.3;
     rgb += fetch(0.0, xy-dx, invx).xyz*0.2; 
    vec3 yuv = RGB_to_YUV * rgb;

    return clamp(yuv.r + yuv.g*sinwt + yuv.b*coswt, 0.0, 1.0);    
}

vec2 modem_UV(vec2 pos, float ofs) {

    float t = (pos.x + ofs*invx) * OutputSize.x*SourceSize.x/InputSize.x;
    float wt = t * 2.0 * PI/2.0  ;
    float phase = wt + altv;
    float sinwt = sin(phase);
    float coswt = cos(phase);
    vec3 rgb = fetch(ofs, pos, invx).xyz*0.5;
     rgb += fetch(0.0, pos+dx, invx).xyz*0.3;
     rgb += fetch(0.0, pos-dx, invx).xyz*0.2; 
         
    vec3 yuv = RGB_to_YUV * rgb;
    float signal = clamp(yuv.x + yuv.y*sinwt + yuv.z*coswt, 0.0, 1.0);

    return vec2(signal * sinwt, signal * coswt);
}

mat3 mix_mat = mat3(
    1.0, 0.0, 0.0,
    ihue, sat, 0.0,
    qhue, 0.0, sat
);


void main() {

vec2 cent = floor(vTexCoord*SourceSize.xy)+0.5;
vec2 near = cent*SourceSize.zw;
vec2 pos = vTexCoord; 
   pos.y = mix(vTexCoord.y, near.y,0.8);
   dx = vec2(SourceSize.z*blur,0.0);
   width_ratio = SourceSize.x / 341.0;
   height_ratio = SourceSize.y / 625.0;
float crawler = crawl == 1.0? 2.0*mod(time,30.0): 0.0;
   altv = pos.y*SourceSize.y*PI/2.0 + crawler;
   invx = 0.05 /  OutputSize.x; // equals 5 samples per Fsc period

    // lowpass U/V at baseband
    vec2 UV = vec2(0.0);
    for (int i = 0; i < FIRTAPS; i++) {
        vec2 uv = modem_UV(pos, 2.0*float(i) - float(FIRTAPS)); // floats for GLES, or else, bang!
        UV += FIR_GAIN* uv * FIR[i];
        }

   float wt = pos.x*SourceSize.x*PI/2.0;

   //float sinwt = sin(wt + altv+ 60.0*floor(mod(time,3.0)+1.0));
   //float coswt = cos(wt + altv+ 60.0*floor(mod(time,3.0)+1.0));

    float sinwt = sin(wt + altv);
    float coswt = cos(wt + altv);


   float luma = mod_luma(pos, sinwt, coswt) - FIR_INVGAIN*(UV.x*sinwt + UV.y*coswt);
   vec3 yuv_result = vec3(luma, UV.x, UV.y);
yuv_result *= mix_mat;
    FragColor = vec4(YUV_to_RGB * yuv_result, 1.0);
}
#endif

DariusG · 13 August 2023 11:58

After some talk with him the 1st pass should look something like this on GLSL. I have to check if the numbers LFREQ etc are correct.

#version 110

#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01


#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;

vec4 _oPosition1; 
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;

// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif

void main()
{
    gl_Position = MVPMatrix * VertexCoord;
    TEX0.xy = TexCoord.xy;
}

#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;

// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy

#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;


#else
#define FIR_GAIN 2.0

#endif

#define PI 3.14159265358
#define CRT_CC_SAMPLES 4.0
#define CRT_CC_VPER 2.0
#define BLACK_LVL 7.0/140.0
#define BLACK_PT 0.0
#define Q_OFFSET 90.0
   
const float hue = 0.0;        // 0-359 
const float dot_crawl_offset = 0.0; // 0-5  
const float LINE_FREQ = 14.31818; // full line 
const float Y_FREQ = 2.2;  // Luma   (Y) 4.2  MHz 
const float I_FREQ = 1.5;  // Chroma (I) 1.5  MHz 
const float Q_FREQ = 0.55;   // Chroma (Q) 0.55 MHz 
#define RGB_to_YIQ  mat3( 0.299, 0.595716, 0.211456,0.587, -0.274453, -0.522591, 0.114, -0.321263, 0.311135 )
#define YIQ_to_RGB  mat3( 1.0, 1.0, 1.0, 0.9563, -0.2721, -1.1070,0.6210, -0.6474, 1.7046)

float init_iir (float limit) {
    float rate = 1000.0*limit/LINE_FREQ; // cycles / pixel rate 
    return  2.048 - exp(-(6.434*0.512 / rate));  // 2048 - expx(-6434 * 512/rate)
};

float iirf (float h, float s) {
    float c = exp(s - h);
    return c;
};


void main() {

    vec3 rgb = COMPAT_TEXTURE(Source, vTexCoord).rgb;
    vec3 yiq = rgb*RGB_to_YIQ;

    float ire = BLACK_LVL+BLACK_PT;
    float xo = 156.0;  
    float yo = 23.0;

    //  full output resolution x 
    vec2 pos = vTexCoord*OutputSize.xy*SourceSize.xy/InputSize.xy;    

    xo = xo - mod(xo, 4.0); 
    float xoff = mod((pos.x + xo), CRT_CC_SAMPLES);
    float ph   = mod((pos.y + yo + dot_crawl_offset), CRT_CC_VPER);
    float phase = ph*xoff + hue;
    float sn = sin(phase/2.0*PI/2.0);
    float cs = cos(phase/2.0*PI/2.0);
// Initiate IIR values
    float iirY = init_iir(Y_FREQ);
    float iirI = init_iir(I_FREQ);
    float iirQ = init_iir(Q_FREQ);
// IIR pass before sending as 1 signal
    float fY = yiq.r*iirf(iirY, yiq.r);
    float fI = yiq.g*iirf(iirI, yiq.g)*cs;
    float fQ = yiq.b*iirf(iirQ, yiq.b)*sn;
    
    ire += fY + fI + fQ;
    ire = clamp(ire,0.0,1.1);
    rgb = yiq*YIQ_to_RGB;

    FragColor = vec4(vec3(ire), 1.0);
}
#endif

hunterk · 13 August 2023 13:29

That looks a lot more manageable!

DariusG · 13 August 2023 14:47

Yeah just need to understand what the code does and do it properly for GLSL. Now i am not in the mood to jump in to the second pass rabbit hole maybe after some days lol.

DariusG · 13 August 2023 19:29

All numbers revised to correct values

#version 110

#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01


#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;

vec4 _oPosition1; 
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;

// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif

void main()
{
    gl_Position = MVPMatrix * VertexCoord;
    TEX0.xy = TexCoord.xy;
}

#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;

// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy

#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;


#else
#define FIR_GAIN 2.0

#endif

#define PI 3.14159265358
#define CRT_CC_SAMPLES 4.0
#define CRT_CC_VPER 2.0
#define BLACK_LVL 7.0/140.0
#define BLACK_PT 0.0
#define Q_OFFSET 90.0
   
const float hue = 0.0;        // 0-359 
const float dot_crawl_offset = 0.0; // 0-5  
const float LINE_FREQ = 14.31818; // full line 
const float Y_FREQ = 4.2;  // Luma   (Y) 4.2  MHz 
const float I_FREQ = 1.5;  // Chroma (I) 1.5  MHz 
const float Q_FREQ = 0.55;   // Chroma (Q) 0.55 MHz 
#define RGB_to_YIQ  mat3( 0.299, 0.595716, 0.211456,0.587, -0.274453, -0.522591, 0.114, -0.321263, 0.311135 )
#define YIQ_to_RGB  mat3( 1.0, 1.0, 1.0, 0.9563, -0.2721, -1.1070,0.6210, -0.6474, 1.7046)

float init_iir (float limit) {
    float rate = limit/LINE_FREQ; // cycles / pixel rate 
    return  1.0 - exp(-PI / rate);  // 2048 - expx(-6434 * 512/rate)
};

float iirf (float h, float s) {
    float c = exp(s - h);
    return c;
};


void main() {

    vec3 rgb = COMPAT_TEXTURE(Source, vTexCoord).rgb;
    vec3 yiq = rgb*RGB_to_YIQ;

    float ire = BLACK_LVL+BLACK_PT;
    float xo = 156.0;  
    float yo = 23.0;

    // o   
    vec2 pos = vTexCoord*OutputSize.xy*SourceSize.xy/InputSize.xy;    
    
    xo = xo - mod(xo, 4.0); 
    float xoff = mod((pos.x + xo), CRT_CC_SAMPLES);
    float ph   = mod((pos.y + yo + dot_crawl_offset), CRT_CC_VPER);
    float phase = ph*xoff + hue;
    float sn = sin(phase/2.0*PI/2.0);
    float cs = cos(phase/2.0*PI/2.0);
// Initiate IIR values
    float iirY = init_iir(Y_FREQ);
    float iirI = init_iir(I_FREQ);
    float iirQ = init_iir(Q_FREQ);
// IIR pass before sending as 1 signal
    float fY = yiq.r*iirf(iirY, yiq.r);
    float fI = yiq.g*iirf(iirI, yiq.g)*cs;
    float fQ = yiq.b*iirf(iirQ, yiq.b)*sn;
    
    ire += fY + fI + fQ;
    ire = clamp(ire,0.0,1.0);


    FragColor = vec4(vec3(ire), 1.0);
}
#endif

DariusG · 15 August 2023 18:34

Ok so added 2 GLSL shaders, in “pal” folder named A520 mimic my Amiga on Composite modulator, this is almost 1:1 replication (!). Did so much study on the matter i can write an ntsc on my cellphone now lol

and a replacement for “ntsc-simple”

DariusG · 20 August 2023 07:42

@Cyber since you like blargg’s ntsc, check the new “ntsc-simple-hd” i uploaded to slang/glsl. Tell me what you think It’s a bit sharper since it uses 20 passes “blurring” left and right and blargg uses 33.

There is a kernel array of size 66 in blargg, the 2nd part is filled with luma filter values related to “resolution” and “sharpness” values

#define  bleed 0.2
#define LUMA_CUTOFF 0.2

// generate luma (y) filter using sinc kernel
        // sinc with rolloff (dsf) 
        float rolloff = 1.0 + sharpness * 0.032;
        float maxh = 32.0;
        float pow_a_n = pow( rolloff, maxh );
        float sum = 0.0;
        int i;
        // quadratic mapping to reduce negative (blurring) range 
        float to_angle = resolution + 1.0;
        to_angle = PI / maxh * LUMA_CUTOFF * (to_angle * to_angle + 1.0);
        
        kernels [49] = maxh; // default center value 

    for ( i = 0; i < 33; i++ )
        {
            int x = i - 16;
            float angle = x * to_angle;
            // instability occurs at center point with rolloff very close to 1.0 
            if ( x || pow_a_n > 1.056 || pow_a_n < 0.981 )
            {
                float rolloff_cos_a = rolloff * cos( angle );
                float num = 1.0 - rolloff_cos_a - pow_a_n * cos( maxh * angle ) +
                            pow_a_n * rolloff * cos( (maxh - 1.0) * angle );
                float den = 1.0 - rolloff_cos_a - rolloff_cos_a + rolloff * rolloff;
                float dsf = num / den;
                kernels [33 + i] = dsf - 0.5;
            }
    }
        
        // apply blackman window and find sum 
        for ( i = 0; i < 33; i++ )
        {
            float x = PI * 2 / 32 * i;
            float blackman = 0.42 - 0.5*cos( x ) + 0.08*cos( x * 2 );
            sum += kernels [33 + i] * blackman;
        }

And the 1st part, the first 33 numbers, is filled with chroma values related to “bleed”

// generate chroma (iq) filter using gaussian kernel
        float cutoff_factor = -0.03125;
        
    if ( bleed < 0.0 )
        {
            // keep extreme value accessible only near upper end of scale (1.0) 
            bleed *= bleed;
            bleed *= bleed;
            bleed *= bleed;
            bleed *= -30.0 / 0.65;
        }
        bleed =  0.35 * cutoff_factor * bleed;
           
        for ( i = -16; i <= 16; i++ ){
            kernels [16 + i] = exp( float(i) * float(i) * bleed );
        }
       
        // normalize even and odd phases separately 
        for ( i = 0; i < 2; i++ )
        {
            sum = 0.0;
            int x;
            for ( x = i; x < 33; x += 2 )
                sum += kernels [x];
            
            sum = 1.0 / sum;
            for ( x = i; x < 33; x += 2 )
            {
                kernels[x] *= sum;
            }
        }

These arrays could be pre-calculated with given bleed, resolution values in steps, let’s say -1.0, 0.5, 0.0, 0.5, 1.0 so you don’t need to calculate all this huge loop in real time and drag performance to bottom.

In the end, luma and chroma samples (the 33-1=32 passes) are filtered multiplied by these arrays. If you look carefully luma is half blurred than chroma (edit: actually 4 times since it runs only 0… to 16 and chroma runs -32… to 32… So it’s a 4:1)

        int x = i - 16;

// later on

if ( x ...... // meaning if x >0 so half passes of 32

Cyber · 20 August 2023 15:59

Thanks for asking, it would be an honour as soon as I get my latest presets pack out the door. I had to overhaul at least 3 times already since the initial release. First one was to add Grade (the old one), second was to tweak filters mainly. The third one was to calibrate using 240p Test Suite, then retweak settings and filters. While I was working on that a couple users complained that they the preset weren’t loading. It was because they don’t have the old Grade because the Online Updater doesn’t pull it anymore and and in addition to that the new Grade is in a different Folder. So I had to overhaul again just to do over all the colour and brightness settings to be able to use this new Grade and I was actually quite satisfied after maybe the third overhaul.

This one’s looking good though.

So in other words as soon as I get a chance. Lol

It’s cool that you’ve gotten your hands dirty in the Blargg code and can understand and visualize what’s been going on under the hood for all these years.

As we can see, it’s highly optimized.

One thing I would like to get some more information on would be the exact ranges and steps of all of the settings.

I know for a fact that most of the settings can be incremented in steps of ±0.1.

Most seem to have a range of between -1 to +1

I think I found one where the max was 0.5 though (gamma) can’t say for sure. All I use are my eyes and trial and error.

One interesting setting is the colour bleed function. Each 0.1 step seems to only increase the bleed marginally. So you can almost go overboard without going overboard.

With many of the other settings a ±0.1 adjustment is quite noticeable.

One thing I don’t get and I wish could be resolved or worked around is why the filter causes the Aspect Ratio to change slightly.

DariusG · 1 September 2023 17:20

“Lab” here

Direct comparison and 1:1 Trinitron PAL colors, Amiga at least, preset

shaders = "6"
feedback_pass = "0"
shader0 = "../misc/shaders/simple_color_controls.glsl"
filter_linear0 = "false"
shader1 = "../misc/shaders/chromaticity.glsl"
filter_linear1 = "false"
shader2 = "../crt/shaders/crt-consumer/linearize.glsl"
filter_linear2 = "false"
shader3 = "../crt/shaders/crt-consumer/glow_x.glsl"
filter_linear3 = "false"
shader4 = "../crt/shaders/crt-consumer/glow_y.glsl"
filter_linear4 = "false"
shader5 = "../crt/shaders/crt-geom.glsl"
filter_linear5 = "false"
TEMP = "5423.000000"
BLACK = "-0.020000"
gamma_in = "1.000000"
gamma_out_red = "1.000000"
gamma_out_green = "1.000000"
gamma_out_blue = "1.000000"
R = "2.000000"
COLOR_MODE = "-1.000000"
Dx = "-1.000000"
CRTgamma = "1.000000"
SATURATION = "1.200000"
DOTMASK = "0.5"

Default RGB colors

SNES Flashback… yeah that looks like Trinitron

DariusG · 11 September 2023 12:07

Another day, another project, today’s was “Make crt-geom run on an old 2016 cellphone around 120 gflops gpu” so here it is…

Copy and save as ‘crt-geom-mini.glsl’ drop to shaders_glsl/crt/shaders

#version 110

// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or (at your option)
// any later version.

#pragma parameter CURV "CRT-Geom Curvature" 1.0 0.0 1.0 1.0
#pragma parameter SCAN "CRT-Geom Scanline Weight" 0.3 0.2 0.6 0.05
#pragma parameter MASK "CRT-Geom Dotmask Strength" 0.25 0.0 1.0 0.05
#pragma parameter LUM "CRT-Geom Luminance" 0.05 0.0 0.5 0.01
#pragma parameter INTERL "CRT-Geom Interlacing Simulation" 1.0 0.0 1.0 1.0
#pragma parameter SAT "CRT-Geom Saturation" 1.1 0.0 2.0 0.01

#define PI 3.1415926535897932384626433

#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;
COMPAT_VARYING vec2 scale;
COMPAT_VARYING vec2 warpp;
COMPAT_VARYING vec2 warppm;
COMPAT_VARYING vec2 warp;
COMPAT_VARYING float fragpos;
COMPAT_VARYING float omega;

vec4 _oPosition1; 
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;

// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float SIZE;

#else
#define SIZE     1.0      
   
#endif

void main()
{
    gl_Position = MVPMatrix * VertexCoord;
    TEX0.xy = TexCoord.xy;
    scale = SourceSize.xy/InputSize.xy;
    fragpos = TEX0.x*OutputSize.x*scale.x*PI;
    warpp = TEX0.xy*scale;   
    warp = warpp*2.0-1.0;   
}

#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif

uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;
COMPAT_VARYING vec2 scale;
COMPAT_VARYING float fragpos;
COMPAT_VARYING vec2 warpp;
COMPAT_VARYING vec2 warppm;
COMPAT_VARYING vec2 warp;
COMPAT_VARYING float omega;

// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy

#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)

#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float SCAN;
uniform COMPAT_PRECISION float MASK;
uniform COMPAT_PRECISION float CURV;
uniform COMPAT_PRECISION float LUM;
uniform COMPAT_PRECISION float SAT;
uniform COMPAT_PRECISION float INTERL;

#else
#define SCAN  0.3      
#define MASK  0.6
#define CURV  1.0
#define LUM 0.0
#define SAT 1.0
#define INTERL 1.0

#endif


float scan(float pos, vec3 color)
    {
    float wid = SCAN + 0.1 * dot(color, vec3(0.333))*0.8;
    float weight = pos / wid;
    return  LUM + (0.1 + SCAN) * exp(-weight * weight ) / wid;
    }

vec2 Warp(vec2 pos)
{
    pos = warp;
    pos *= vec2(1.0+pos.y*pos.y*0.031, 1.0+pos.x*pos.x*0.05);
    pos = pos*0.5+0.5;
    return pos;
}

void main()
{
    vec2 pos;
    if (CURV == 1.0) pos = Warp(warpp);
    else pos = vTexCoord;
    vec2 corn   = min(pos,1.0-pos); // This is used to mask the rounded
         corn.x = 0.0001/corn.x;  // corners later on
    if (CURV == 1.0) pos /= scale;


// Lanczos 2
    // Source position in fractions of a texel
    vec2 src_pos = pos*SourceSize.xy;
    // Source bottom left texel centre
    vec2 src_centre = floor(src_pos - 0.5) + 0.5;
    // f is position. f.x runs left to right, y bottom to top, z right to left, w top to bottom
    vec4 f; 
    f.xy = src_pos - src_centre;
    f.zw = 1.0 - f.xy;
    // Calculate weights in x and y in parallel.
    // These polynomials are piecewise approximation of Lanczos kernel
    // Calculator here: https://gist.github.com/going-digital/752271db735a07da7617079482394543
    vec4 l2_w0_o3 = (( 1.5672 * f - 2.6445) * f + 0.0837) * f + 0.9976;
    vec4 l2_w1_o3 = ((-0.7389 * f + 1.3652) * f - 0.6295) * f - 0.0004;

    vec4 w1_2  = l2_w0_o3;
    vec2 w12   = w1_2.xy + w1_2.zw;
    vec4 wedge = l2_w1_o3 * vec4 (w12.yx, w12.yx);

    // Calculate texture read positions. tc12 uses bilinear interpolation to do 4 reads in 1.
    vec2 tc12 = SourceSize.zw * (src_centre + w1_2.zw / w12);
    vec2 tc0  = SourceSize.zw * (src_centre - 1.0);
    vec2 tc3  = SourceSize.zw * (src_centre + 2.0);
    
    // Sharpening adjustment
    float sum = wedge.x + wedge.y + wedge.z + wedge.w + w12.x * w12.y;    
    wedge /= sum;

    vec3 res = vec3(
        COMPAT_TEXTURE(Source, vec2(tc12.x, tc0.y)).rgb * wedge.y +
        COMPAT_TEXTURE(Source, vec2(tc0.x, tc12.y)).rgb * wedge.x +
        COMPAT_TEXTURE(Source, tc12.xy).rgb * (w12.x * w12.y) +
        COMPAT_TEXTURE(Source, vec2(tc3.x, tc12.y)).rgb * wedge.z +
        COMPAT_TEXTURE(Source, vec2(tc12.x, tc3.y)).rgb * wedge.w
    );


    float fp = fract(pos.y*SourceSize.y-0.5);
    if (InputSize.y > 400.0) fp = fract(pos.y*SourceSize.y/2.0-0.5);

    if (INTERL == 1.0 && InputSize.y > 400.0) 
    {
    fp = mod(float(FrameCount),2.0) <1.0 ? 0.5+fp:fp;
    }
    res *= res;
    res *= scan(fp,res) + scan(1.0-fp,res);
    res *= MASK*sin(fragpos)+1.0-MASK;
    res = sqrt(res);
    float l = dot(vec3(0.3,0.6,0.1), res);
    res *= mix(1.0,1.1,l);
    res = mix(vec3(l), res, SAT);
if (corn.y <= corn.x && CURV == 1.0 || corn.x < 0.0001 && CURV == 1.0 )res = vec3(0.0);

    FragColor = vec4(res,1.0);
}

#endif

Copy and save to shaders_glsl/crt as ‘crt-geom-mini.glslp’

shaders = 1

shader0 = shaders/crt-geom-mini.glsl
filter_linear0 = true

Runs solid 60 fps on my old Note 3 Pro while the original runs 14 fps. The image is almost the same.

DariusG · 16 October 2023 14:36

For the record, crt-geom-mini was written to push htc one m7, that’s around ~95 gflops. Crt-cyclon pushes xiaomi note 3 pro ~180 gflops. It would need more than that actually, I have to switch to potato mode to run ~59.5 fps. Crt-sines runs pretty well on both as it uses a very simple filter with 1 texture read + 1 for Convergence and many things passed to vertex. Crt-m7 canned not to clutter the crt folder with a trillion of shaders. That looked really good too with quilez filter and fast scanlines etc.

I believe as long as anyone wants to make a fast shader that quilez filter is the best option for performance and good look. Then lanczos2 that I uploaded to “windowed” folder looks very good and is very fast.

DariusG · 23 October 2023 17:29

A small snippet of code emulating a slot mask without a mask

void main()
{
vec3 res = COMPAT_TEXTURE(Source,vTexCoord).rgb;

res *= 0.3*sin(vTexCoord.y*SourceSize.y*pi*2.0)+0.7;

float y = vTexCoord.y*SourceSize.y*2.0;

if (mod((vTexCoord.x*OutputSize.x*SourceSize.x/InputSize.x),6.0) < 3.0) y = y+0.5; else y = y-0.5;
res *= 0.3*sin(y*pi)+0.7;

FragColor.rgb = res;    
}

kokoko3k · 23 October 2023 18:08

“cheap.glsl”