Here is a webgl version, needs to drag and drop a ppm image I think
This is like those Shaolin movies where a young apprentice wants to learn Kung-Fu and the former Grand Master sends him to go and pick coconuts, plant rice paddies and carry bamboo logs on his back for three years.
He doesnât understand why and might be a little belligerent at first but at the end of those three years he can beat anyone in the village.
Passed all 1st pass loop and here is the performance result, around 10 fps, GPU kneels to it On 1x scale!
code:
#version 130
#if defined(VERTEX)
#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying
#define COMPAT_ATTRIBUTE attribute
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;
vec4 _oPosition1;
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif
void main()
{
gl_Position = MVPMatrix * VertexCoord;
TEX0.xy = TexCoord.xy*1.0001;
}
#elif defined(FRAGMENT)
#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;
// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float SCANLINE_BASE_BRIGHTNESS;
#else
#define SCANLINE_BASE_BRIGHTNESS 0.95
#endif
int iirs_initialized = 0; // internal state
// frequencies for bandlimiting
const int L_FREQ = 1431818; // full line
const int Y_FREQ = 420000; // Luma (Y) 4.2 MHz
const int I_FREQ = 150000; // Chroma (I) 1.5 MHz
const int Q_FREQ = 55000; // Chroma (Q) 0.55 MHz
const int EXP_P = 11;
const int EXP_ONE = (1 << EXP_P); // 2048 (1*2^11)
const int EXP_PI = 6434;
const int EXP_MASK = (EXP_ONE - 1); //2047}|
const int dot_crawl_offset = 0; // 0-5
const int hue = 0; // 0-359
const int Q_OFFSET = -90; // in degrees
// burst hue offset
const int HUE_OFFSET = -60; // in degrees
const int T14_2PI = 16384;
const int T14_MASK = (T14_2PI - 1);
const int T14_PI = (T14_2PI / 2);
const int CRT_CC_SAMPLES = 4; // samples per chroma period (samples per 360 deg) */
const int CRT_CC_VPER = 2; // vertical period in which the artifacts repeat */
const int CRT_PIX_FORMAT_RGB = 0; // 3 bytes per pixel [R,G,B, R,G,B, R,G,B..]
const int CRT_PIX_FORMAT_BGR = 1; // 3 bytes per pixel [B,G,R, B,G,R, B,G,R..]
const int CRT_PIX_FORMAT_ARGB = 2; // 4 bytes per pixel [A,R,G,B, A,R,G,B...]
const int CRT_PIX_FORMAT_RGBA = 3; // 4 bytes per pixel [R,G,B,A, R,G,B,A...]
const int CRT_PIX_FORMAT_ABGR = 4; // 4 bytes per pixel [A,B,G,R, A,B,G,R...]
const int CRT_PIX_FORMAT_BGRA = 5; // 4 bytes per pixel [B,G,R,A, B,G,R,A...]
int format = 0; // pix format (one of the CRT_PIX_FORMATs in crt_core.h)
int field = 0;
int frame; // 0 = even, 1 = odd */
#define CRT_VRES 262 // vertical resolution
const int CRT_CC_LINE = 2275;
const int CRT_CB_FREQ = 4; // carrier frequency relative to sample rate
const int CRT_HRES = CRT_CC_LINE * CRT_CB_FREQ / 10; // horizontal res, 910 total
const int CRT_INPUT_SIZE = CRT_HRES * CRT_VRES;
// define line ranges in which sync is generated
// the numbers are inclusive
// Make sure these numbers fit in (0, CRT_VRES)
const int SYNC_REGION_LO = 3;
const int SYNC_REGION_HI = 6;
// same as above but for equalizing pulses
const int EQU_REGION_A_LO = 0;
const int EQU_REGION_A_HI = 2;
const int EQU_REGION_B_LO = 7;
const int EQU_REGION_B_HI = 9;
const int BLANK_LEVEL = 0;
const int SYNC_LEVEL = -40;
const int LINE_BEG = 0;
const int black_point = 0;
const int white_point = 0;
#define WHITE_LEVEL 100
#define BURST_LEVEL 20
#define BLACK_LEVEL 7
bool CRT_DO_BANDLIMITING = true; // enable/disable bandlimiting when encoding
const int IRE_MAX = 110; // max value is max value of signed char
const int IRE_MIN = 0; // min value is min value of signed char
// IIR lowpass filter for bandlimiting YIQ
int[2] iirY = int[](0,0); // c,h (history)
int[2] iirI = int[](0,0); // c,h (history)
int[2] iirQ = int[](0,0); // c,h (history)
int EXP_MUL(int x, int y) {
return (x * y) >> EXP_P; }
int EXP_DIV(int x, int y) {
return (x << EXP_P) / y; }
int[5] e11 = int[](
2048, // 2048
5567, // e
15133, // e^2
41135, // e^3
111817 // e^4
);
int expx(int n){
bool neg;
int idx, res;
int nxt, acc, del;
int i;
if (n == 0) {
return 2048; // 2048
}
neg = n < 0; // neg is 1 if n<0
if (neg) {
n = -n; // convert n to positive number
}
idx = n >> 11;
res = 2048;
for (i = 0; i < idx / 4; i++) {
res = EXP_MUL(res, e11[4]);
}
idx &= 3;
if (idx > 0) {
res = EXP_MUL(res, e11[idx]);
}
n &= EXP_MASK;
nxt = 2048;
acc = 0;
del = 1;
for (i = 1; i < 17; i++) {
if (del < nxt || nxt > 0 || del > 0) {
acc += nxt / del;
nxt = EXP_MUL(nxt, n);
del *= i;
}
}
res = EXP_MUL(res, acc);
if (neg) {
res = EXP_DIV(EXP_ONE, res);
}
return res;
};
int init_iir (int f, int freq, int limit) {
int rate=f/freq; // cycles / pixel rate
return 2048 - expx(-(6434*512 / rate)); // 2048 - expx(-6434 * 512/rate)
};
const int[18] sigpsin15 = int[]( // significant points on sine wave (15-bit)
0x0000,
0x0c88,0x18f8,0x2528,0x30f8,0x3c50,0x4718,0x5130,0x5a80,
0x62f0,0x6a68,0x70e0,0x7640,0x7a78,0x7d88,0x7f60,0x8000,
0x7f60
);
int sintabil8(int n)
{
int f, i, a, b;
// looks scary but if you don't change T14_2PI
// it won't cause out of bounds memory reads
//
f = n >> 0 & 0xff;
i = n >> 8 & 0xff;
a = sigpsin15[i];
b = sigpsin15[i + 1];
return (a + ((b - a) * f >> 8));
}
// BIT SHIFTING
// X >> 1 EQUALS TO X/2
// X >> 2 EQUALS TO X/4
// X << 1 EQUALS TO X*2
// X << 2 EQUALS TO X*4
ivec2 crt_sincos14(int s, int c, int n)
{
int h;
n &= T14_MASK;
h = n & ((T14_2PI/2) - 1);
if (h > ((T14_2PI/4) - 1)) {
c = -sintabil8(h - (T14_2PI/4));
s = sintabil8((T14_2PI/2) - h);
} else {
c = sintabil8((T14_2PI/4) - h);
s = sintabil8(h);
}
if (n > ((T14_2PI/2) - 1)) {
c = -c;
s = -s;
}
return ivec2(s, c);
}
int crt_bpp4fmt(int format)
{
if (format == 0 || format == 1) return 3;
else if (format == 2 || format == 3 || format == 4|| format == 5) return 4;
}
int reset_iir (int f) {
return 0;
};
// hi-pass for debugging
// will pass c back to c and h back to h
int iirf (int c, int h, int s) {
if (CRT_DO_BANDLIMITING) {
c += EXP_MUL(s - h, c);
return c;
} else {
return s;
}
};
vec3 crt_modulate() {
int x, y, xo, yo=0;
int sn, cs, n, ph=0;
int bpp=0;
int cb = 0;
int destw = 753; //AV_LEN
int desth = 236; //((CRT_LINES * 64500) >> 16)
int[8] iccf ; // 4x2
int[8] ccmodI ;
int[8] ccmodQ ;
int[8] ccburst ;
//int[CRT_INPUT_SIZE] analog; // new Array(CRT_INPUT_SIZE).fill(0);
// 910 * 262 (HRES*VRES) .....actually screen pixels
int[238420] analog;
int[238420] line;
int ire=0; // composite signal
int fy, fi, fq=0;
if (iirs_initialized == 0) {
init_iir(iirY[1], L_FREQ, Y_FREQ);
init_iir(iirI[1], L_FREQ, I_FREQ);
init_iir(iirQ[1], L_FREQ, Q_FREQ);
iirs_initialized = 1;
}
if (desth > 236) desth = 236;
// y 0 to 2
for (y = 0; y < 2; y++) { //CRT_CC_VPER
// y * 180'
int vert = (y + dot_crawl_offset)*180; // 360/CRT_CC_VPER
// x 0 to 4
for (x = 0; x < CRT_CC_SAMPLES; x++) { // CRT_CC_SAMPLES
// step is 90'
int step = 360/CRT_CC_SAMPLES; // 90
n = vert + hue + x * step;
//calculate sin and cos and store them in sn,cs
ivec2 snc = crt_sincos14(sn, cs, (n - step + HUE_OFFSET) * 8192 / 180);
ccburst[x+y*CRT_CC_SAMPLES] = snc.x >> 10;
snc = crt_sincos14(sn, cs, n * 8192 / 180);
ccmodI[x+y*CRT_CC_SAMPLES] = snc.x >> 10;
snc = crt_sincos14(sn, cs, (n + Q_OFFSET) * 8192 / 180);
ccmodQ[x+y*CRT_CC_SAMPLES] = snc.x >> 10;
}
}
bpp = crt_bpp4fmt(format);
//AV_BEG + xoffset + (AV_LEN - destw) / 2
xo = 156;
//CRT_TOP + yoffset + (CRT_LINES - desth) / 2
yo = 23;
// use field and frame variables as you'd like here
// to determine starting phases, etc.
field &= 1;
frame &= 1;
// align signal
// xo mod CRT_CC_SAMPLES
xo = xo - (xo % 4);
for (n=0; n<262; n++) { // CRT_VRES
// filling line with pixels vertically
// line[238420] = analog[n*910]; // 910*262 QUESTION REMAINS
int t = LINE_BEG; //TIME, LINE_BEG
if ((n >= EQU_REGION_A_LO && n <= EQU_REGION_A_HI) || (n >= EQU_REGION_B_LO && n <= EQU_REGION_B_HI)) {
// equalizing pulses - small blips of sync, mostly blank
while (t < (4 * 910 / 100)) line[t++] = SYNC_LEVEL;
while (t < (50 * 910 / 100)) line[t++] = BLANK_LEVEL;
while (t < (54 * 910 / 100)) line[t++] = SYNC_LEVEL;
while (t < (100 * 910 / 100)) line[t++] = BLANK_LEVEL;
}
else if (n >= SYNC_REGION_LO && n <= SYNC_REGION_HI) {
int[4] even = int[]( 46, 50, 96, 100 );
int[4] odd = int[]( 4, 50, 96, 100 );
int[4] offs = even;
if (field == 1) {
offs = odd;
}
// vertical sync pulse - small blips of blank, mostly sync
while (t < (offs[0] * CRT_HRES / 100)) line[t++] = SYNC_LEVEL;
while (t < (offs[1] * CRT_HRES / 100)) line[t++] = BLANK_LEVEL;
while (t < (offs[2] * CRT_HRES / 100)) line[t++] = SYNC_LEVEL;
while (t < (offs[3] * CRT_HRES / 100)) line[t++] = BLANK_LEVEL;
}
else {
// video line
// SYNC_BEG
while (t < 21) line[t++] = BLANK_LEVEL; // FP
// BW_BEG
while (t < 88) line[t++] = SYNC_LEVEL; // SYNC
// AV_BEG
while (t < 156)line[t++] = BLANK_LEVEL; // BW + CB + BP
// CRT_TOP
if (n < 21) {
while (t < 910) line[t++] = BLANK_LEVEL;
}
// CB_CYCLES of color burst
// CB_BEG = 97, CB_CYCLES = 10 * CRT_CB_FREQ = 4
for (t = 97; t < 137; t++) {
cb = ccburst[(n % 4) * (t % 2)];
//BLANK_LEVEL = 0, BURST_LEVEL = 20
line[t] = (0 + (cb * 20)) >> 5;
iccf[((n + 3) % 4)*(t % 2)] = line[t];
}
}
}
for (y = 0; y < 236; y++) {
int h = int(SourceSize.y); // CHECK?? height of image
int w = int(SourceSize.x); // CHECK?? width --//--
int field_offset;
int sy;
//desth 236
field_offset = (field * h + 236) / 236 / 2;
sy = (y * h) / desth;
sy += field_offset;
if (sy >= h) sy = h;
sy *= w;
iirY[1] = reset_iir(iirY[1]);
iirI[1] = reset_iir(iirI[1]);
iirQ[1] = reset_iir(iirQ[1]);
//CRT_CC_VPER 2
ph = (y + yo) % 2;
for (x = 0; x < destw; x++) {
int rA, gA, bA=0;
int[3] pix;
int xoff=0;
// RGB to YIQ matrix in 16.16 fixed point format
int[9] yiqmat = int[](
19595, 38470, 7471, // Y
39059, -18022, -21103, // I
13894, -34275, 20382); // Q
vec3 data = COMPAT_TEXTURE(Source,vTexCoord).rgb;
rA = int((data.r*65536));
gA = int((data.g*65536));
bA = int((data.b*65536));
fy = (yiqmat[0] * rA + yiqmat[1] * gA + yiqmat[2] * bA) >> 14;
fi = (yiqmat[3] * rA + yiqmat[4] * gA + yiqmat[5] * bA) >> 14;
fq = (yiqmat[6] * rA + yiqmat[7] * gA + yiqmat[8] * bA) >> 14;
ire = BLACK_LEVEL + black_point;
xoff = (x + xo) % CRT_CC_SAMPLES;
// bandlimit Y,I,Q
fy = iirf(iirY[0], iirY[1], fy);
fi = iirf(iirI[0], iirI[1], fi) * ccmodI[ph*xoff] >> 4;
fq = iirf(iirQ[0], iirQ[1], fq) * ccmodQ[ph*xoff] >> 4;
// modulate as (Y + sin(x) * I + cos(x) * Q)
ire += (fy + fi + fq) * (WHITE_LEVEL * white_point / 100) >> 10;
if (ire < IRE_MIN) ire = IRE_MIN;
if (ire > IRE_MAX) ire = IRE_MAX;
analog[(x + xo) + (y + yo) * CRT_HRES] = ire;
} //temporary end
} //temporary end
return vec3(float(fy));
}
void main()
{
FragColor = vec4(crt_modulate()*0.3,1.0);
}
#endif
fY/I/Q would need to revert to some color format recognisable by GLSL
Probably would need the best GPU available at the moment to run at good speed.
Your dedication to port this is impressive , I feel like Iâm entering the matrix reading the code
Yeah tons of bit shifting and arrays lol. Also plenty of things happen in every line, put this (that jumps to a sub function) there and add this there etc.
Would you be able to optimize it by not looping through every line on every run? That is, only doing the line of the current pixel/texel (or maybe a sliding window)?
I was also looking for any loops that might be offload-able to a feedback+alpha-channel ticker but Iâm not sure if that would work/help.
Need to rethink implementation, instead of filling arrays do what it does on a single pixel. Meaning: read the code well, understand what it does and do it again in a different way.
For the meantime a hack of that PAL shader in retroarch, to almost match my real Amiga on composite. Fixed for GLES too. Thatâs really close
#version 130
#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01
#pragma parameter FIR_INVGAIN "FIR Inv Gain" 1.0 0.0 3.0 0.01
#pragma parameter ihue "I Hue" 0.2 -1.0 1.0 0.01
#pragma parameter qhue "Q Hue" 0.1 -1.0 1.0 0.01
#pragma parameter sat "Saturation" 1.0 0.0 2.0 0.01
#pragma parameter crawl "Dot Crawl" 1.0 0.0 1.0 1.0
#pragma parameter blur "Blur Size" 0.75 0.0 2.0 0.01
#if defined(VERTEX)
#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying
#define COMPAT_ATTRIBUTE attribute
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;
vec4 _oPosition1;
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif
void main()
{
gl_Position = MVPMatrix * VertexCoord;
TEX0.xy = TexCoord.xy;
}
#elif defined(FRAGMENT)
#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;
// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;
uniform COMPAT_PRECISION float FIR_INVGAIN;
uniform COMPAT_PRECISION float ihue;
uniform COMPAT_PRECISION float qhue;
uniform COMPAT_PRECISION float sat;
uniform COMPAT_PRECISION float crawl;
uniform COMPAT_PRECISION float blur;
#else
#define FIR_GAIN 2.0
#define FIR_INVGAIN 1.0
#define ihue 0.0
#define qhue 0.0
#define sat 1.0
#define crawl 1.0
#define blur 1.0
#endif
#define PI 3.14159265358
#define FSC 21477272.73
#define FLINE 15625.0
#define VISIBLELINES 312.0
#define RGB_to_YIQ mat3( 0.299 , 0.595716 , 0.211456 , 0.587 , -0.274453 , -0.522591 , 0.114 , -0.321263 , 0.311135 )
#define YIQ_to_RGB mat3( 1.0 , 1.0 , 1.0 , 0.9563 , -0.2721 , -1.1070 , 0.6210 , -0.6474 , 1.7046 )
#define RGB_to_YUV mat3( 0.299 , -0.14713 , 0.615 , 0.587 , -0.28886 , -0.514991 , 0.114 , 0.436 , -0.10001 )
#define YUV_to_RGB mat3( 1.0 , 1.0 , 1.0 , 0.0 , -0.39465 , 2.03211 , 1.13983 , -0.58060 , 0.0 )
#define FIRTAPS 20
const float FIR[20] = float[20] (-0.008030271,0.003107906,0.016841352,0.032545161,0.049360136,
0.066256720,0.082120150,0.095848433,0.106453014,0.113151423,
0.115441842,0.113151423,0.106453014,0.095848433,0.082120150,
0.066256720,0.049360136,0.032545161,0.016841352,0.003107906);
//#define FIR_GAIN 2.0
//#define FIR_INVGAIN 1.02
float width_ratio;
float height_ratio;
float altv;
float invx;
vec2 dx;
#define time float(FrameCount)
#define fetch(offset, pos, invx) COMPAT_TEXTURE(Source, vec2(pos.xy + vec2(offset*invx ,0.0)))
float mod_luma(vec2 xy, float sinwt, float coswt) {
vec3 rgb = fetch(0.0, xy, invx).xyz*0.5;
rgb += fetch(0.0, xy+dx, invx).xyz*0.3;
rgb += fetch(0.0, xy-dx, invx).xyz*0.2;
vec3 yuv = RGB_to_YUV * rgb;
return clamp(yuv.r + yuv.g*sinwt + yuv.b*coswt, 0.0, 1.0);
}
vec2 modem_UV(vec2 pos, float ofs) {
float t = (pos.x + ofs*invx) * OutputSize.x*SourceSize.x/InputSize.x;
float wt = t * 2.0 * PI/2.0 ;
float phase = wt + altv;
float sinwt = sin(phase);
float coswt = cos(phase);
vec3 rgb = fetch(ofs, pos, invx).xyz*0.5;
rgb += fetch(0.0, pos+dx, invx).xyz*0.3;
rgb += fetch(0.0, pos-dx, invx).xyz*0.2;
vec3 yuv = RGB_to_YUV * rgb;
float signal = clamp(yuv.x + yuv.y*sinwt + yuv.z*coswt, 0.0, 1.0);
return vec2(signal * sinwt, signal * coswt);
}
mat3 mix_mat = mat3(
1.0, 0.0, 0.0,
ihue, sat, 0.0,
qhue, 0.0, sat
);
void main() {
vec2 cent = floor(vTexCoord*SourceSize.xy)+0.5;
vec2 near = cent*SourceSize.zw;
vec2 pos = vTexCoord;
pos.y = mix(vTexCoord.y, near.y,0.8);
dx = vec2(SourceSize.z*blur,0.0);
width_ratio = SourceSize.x / 341.0;
height_ratio = SourceSize.y / 625.0;
float crawler = crawl == 1.0? 2.0*mod(time,30.0): 0.0;
altv = pos.y*SourceSize.y*PI/2.0 + crawler;
invx = 0.05 / OutputSize.x; // equals 5 samples per Fsc period
// lowpass U/V at baseband
vec2 UV = vec2(0.0);
for (int i = 0; i < FIRTAPS; i++) {
vec2 uv = modem_UV(pos, 2.0*float(i) - float(FIRTAPS)); // floats for GLES, or else, bang!
UV += FIR_GAIN* uv * FIR[i];
}
float wt = pos.x*SourceSize.x*PI/2.0;
//float sinwt = sin(wt + altv+ 60.0*floor(mod(time,3.0)+1.0));
//float coswt = cos(wt + altv+ 60.0*floor(mod(time,3.0)+1.0));
float sinwt = sin(wt + altv);
float coswt = cos(wt + altv);
float luma = mod_luma(pos, sinwt, coswt) - FIR_INVGAIN*(UV.x*sinwt + UV.y*coswt);
vec3 yuv_result = vec3(luma, UV.x, UV.y);
yuv_result *= mix_mat;
FragColor = vec4(YUV_to_RGB * yuv_result, 1.0);
}
#endif
After some talk with him the 1st pass should look something like this on GLSL. I have to check if the numbers LFREQ etc are correct.
#version 110
#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01
#if defined(VERTEX)
#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying
#define COMPAT_ATTRIBUTE attribute
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;
vec4 _oPosition1;
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif
void main()
{
gl_Position = MVPMatrix * VertexCoord;
TEX0.xy = TexCoord.xy;
}
#elif defined(FRAGMENT)
#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;
// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;
#else
#define FIR_GAIN 2.0
#endif
#define PI 3.14159265358
#define CRT_CC_SAMPLES 4.0
#define CRT_CC_VPER 2.0
#define BLACK_LVL 7.0/140.0
#define BLACK_PT 0.0
#define Q_OFFSET 90.0
const float hue = 0.0; // 0-359
const float dot_crawl_offset = 0.0; // 0-5
const float LINE_FREQ = 14.31818; // full line
const float Y_FREQ = 2.2; // Luma (Y) 4.2 MHz
const float I_FREQ = 1.5; // Chroma (I) 1.5 MHz
const float Q_FREQ = 0.55; // Chroma (Q) 0.55 MHz
#define RGB_to_YIQ mat3( 0.299, 0.595716, 0.211456,0.587, -0.274453, -0.522591, 0.114, -0.321263, 0.311135 )
#define YIQ_to_RGB mat3( 1.0, 1.0, 1.0, 0.9563, -0.2721, -1.1070,0.6210, -0.6474, 1.7046)
float init_iir (float limit) {
float rate = 1000.0*limit/LINE_FREQ; // cycles / pixel rate
return 2.048 - exp(-(6.434*0.512 / rate)); // 2048 - expx(-6434 * 512/rate)
};
float iirf (float h, float s) {
float c = exp(s - h);
return c;
};
void main() {
vec3 rgb = COMPAT_TEXTURE(Source, vTexCoord).rgb;
vec3 yiq = rgb*RGB_to_YIQ;
float ire = BLACK_LVL+BLACK_PT;
float xo = 156.0;
float yo = 23.0;
// full output resolution x
vec2 pos = vTexCoord*OutputSize.xy*SourceSize.xy/InputSize.xy;
xo = xo - mod(xo, 4.0);
float xoff = mod((pos.x + xo), CRT_CC_SAMPLES);
float ph = mod((pos.y + yo + dot_crawl_offset), CRT_CC_VPER);
float phase = ph*xoff + hue;
float sn = sin(phase/2.0*PI/2.0);
float cs = cos(phase/2.0*PI/2.0);
// Initiate IIR values
float iirY = init_iir(Y_FREQ);
float iirI = init_iir(I_FREQ);
float iirQ = init_iir(Q_FREQ);
// IIR pass before sending as 1 signal
float fY = yiq.r*iirf(iirY, yiq.r);
float fI = yiq.g*iirf(iirI, yiq.g)*cs;
float fQ = yiq.b*iirf(iirQ, yiq.b)*sn;
ire += fY + fI + fQ;
ire = clamp(ire,0.0,1.1);
rgb = yiq*YIQ_to_RGB;
FragColor = vec4(vec3(ire), 1.0);
}
#endif
That looks a lot more manageable!
Yeah just need to understand what the code does and do it properly for GLSL. Now i am not in the mood to jump in to the second pass rabbit hole maybe after some days lol.
All numbers revised to correct values
#version 110
#pragma parameter FIR_GAIN "FIR Gain" 1.62 0.0 3.0 0.01
#if defined(VERTEX)
#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying
#define COMPAT_ATTRIBUTE attribute
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;
vec4 _oPosition1;
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float WHATEVER;
#else
#define WHATEVER 0.0
#endif
void main()
{
gl_Position = MVPMatrix * VertexCoord;
TEX0.xy = TexCoord.xy;
}
#elif defined(FRAGMENT)
#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;
// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float FIR_GAIN;
#else
#define FIR_GAIN 2.0
#endif
#define PI 3.14159265358
#define CRT_CC_SAMPLES 4.0
#define CRT_CC_VPER 2.0
#define BLACK_LVL 7.0/140.0
#define BLACK_PT 0.0
#define Q_OFFSET 90.0
const float hue = 0.0; // 0-359
const float dot_crawl_offset = 0.0; // 0-5
const float LINE_FREQ = 14.31818; // full line
const float Y_FREQ = 4.2; // Luma (Y) 4.2 MHz
const float I_FREQ = 1.5; // Chroma (I) 1.5 MHz
const float Q_FREQ = 0.55; // Chroma (Q) 0.55 MHz
#define RGB_to_YIQ mat3( 0.299, 0.595716, 0.211456,0.587, -0.274453, -0.522591, 0.114, -0.321263, 0.311135 )
#define YIQ_to_RGB mat3( 1.0, 1.0, 1.0, 0.9563, -0.2721, -1.1070,0.6210, -0.6474, 1.7046)
float init_iir (float limit) {
float rate = limit/LINE_FREQ; // cycles / pixel rate
return 1.0 - exp(-PI / rate); // 2048 - expx(-6434 * 512/rate)
};
float iirf (float h, float s) {
float c = exp(s - h);
return c;
};
void main() {
vec3 rgb = COMPAT_TEXTURE(Source, vTexCoord).rgb;
vec3 yiq = rgb*RGB_to_YIQ;
float ire = BLACK_LVL+BLACK_PT;
float xo = 156.0;
float yo = 23.0;
// o
vec2 pos = vTexCoord*OutputSize.xy*SourceSize.xy/InputSize.xy;
xo = xo - mod(xo, 4.0);
float xoff = mod((pos.x + xo), CRT_CC_SAMPLES);
float ph = mod((pos.y + yo + dot_crawl_offset), CRT_CC_VPER);
float phase = ph*xoff + hue;
float sn = sin(phase/2.0*PI/2.0);
float cs = cos(phase/2.0*PI/2.0);
// Initiate IIR values
float iirY = init_iir(Y_FREQ);
float iirI = init_iir(I_FREQ);
float iirQ = init_iir(Q_FREQ);
// IIR pass before sending as 1 signal
float fY = yiq.r*iirf(iirY, yiq.r);
float fI = yiq.g*iirf(iirI, yiq.g)*cs;
float fQ = yiq.b*iirf(iirQ, yiq.b)*sn;
ire += fY + fI + fQ;
ire = clamp(ire,0.0,1.0);
FragColor = vec4(vec3(ire), 1.0);
}
#endif
Ok so added 2 GLSL shaders, in âpalâ folder named A520 mimic my Amiga on Composite modulator, this is almost 1:1 replication (!). Did so much study on the matter i can write an ntsc on my cellphone now lol
and a replacement for ântsc-simpleâ
@Cyber since you like blarggâs ntsc, check the new ântsc-simple-hdâ i uploaded to slang/glsl. Tell me what you think Itâs a bit sharper since it uses 20 passes âblurringâ left and right and blargg uses 33.
There is a kernel array of size 66 in blargg, the 2nd part is filled with luma filter values related to âresolutionâ and âsharpnessâ values
#define bleed 0.2
#define LUMA_CUTOFF 0.2
// generate luma (y) filter using sinc kernel
// sinc with rolloff (dsf)
float rolloff = 1.0 + sharpness * 0.032;
float maxh = 32.0;
float pow_a_n = pow( rolloff, maxh );
float sum = 0.0;
int i;
// quadratic mapping to reduce negative (blurring) range
float to_angle = resolution + 1.0;
to_angle = PI / maxh * LUMA_CUTOFF * (to_angle * to_angle + 1.0);
kernels [49] = maxh; // default center value
for ( i = 0; i < 33; i++ )
{
int x = i - 16;
float angle = x * to_angle;
// instability occurs at center point with rolloff very close to 1.0
if ( x || pow_a_n > 1.056 || pow_a_n < 0.981 )
{
float rolloff_cos_a = rolloff * cos( angle );
float num = 1.0 - rolloff_cos_a - pow_a_n * cos( maxh * angle ) +
pow_a_n * rolloff * cos( (maxh - 1.0) * angle );
float den = 1.0 - rolloff_cos_a - rolloff_cos_a + rolloff * rolloff;
float dsf = num / den;
kernels [33 + i] = dsf - 0.5;
}
}
// apply blackman window and find sum
for ( i = 0; i < 33; i++ )
{
float x = PI * 2 / 32 * i;
float blackman = 0.42 - 0.5*cos( x ) + 0.08*cos( x * 2 );
sum += kernels [33 + i] * blackman;
}
And the 1st part, the first 33 numbers, is filled with chroma values related to âbleedâ
// generate chroma (iq) filter using gaussian kernel
float cutoff_factor = -0.03125;
if ( bleed < 0.0 )
{
// keep extreme value accessible only near upper end of scale (1.0)
bleed *= bleed;
bleed *= bleed;
bleed *= bleed;
bleed *= -30.0 / 0.65;
}
bleed = 0.35 * cutoff_factor * bleed;
for ( i = -16; i <= 16; i++ ){
kernels [16 + i] = exp( float(i) * float(i) * bleed );
}
// normalize even and odd phases separately
for ( i = 0; i < 2; i++ )
{
sum = 0.0;
int x;
for ( x = i; x < 33; x += 2 )
sum += kernels [x];
sum = 1.0 / sum;
for ( x = i; x < 33; x += 2 )
{
kernels[x] *= sum;
}
}
These arrays could be pre-calculated with given bleed, resolution values in steps, letâs say -1.0, 0.5, 0.0, 0.5, 1.0 so you donât need to calculate all this huge loop in real time and drag performance to bottom.
In the end, luma and chroma samples (the 33-1=32 passes) are filtered multiplied by these arrays. If you look carefully luma is half blurred than chroma (edit: actually 4 times since it runs only 0⊠to 16 and chroma runs -32⊠to 32⊠So itâs a 4:1)
int x = i - 16;
// later on
if ( x ...... // meaning if x >0 so half passes of 32
Thanks for asking, it would be an honour as soon as I get my latest presets pack out the door. I had to overhaul at least 3 times already since the initial release. First one was to add Grade (the old one), second was to tweak filters mainly. The third one was to calibrate using 240p Test Suite, then retweak settings and filters. While I was working on that a couple users complained that they the preset werenât loading. It was because they donât have the old Grade because the Online Updater doesnât pull it anymore and and in addition to that the new Grade is in a different Folder. So I had to overhaul again just to do over all the colour and brightness settings to be able to use this new Grade and I was actually quite satisfied after maybe the third overhaul.
This oneâs looking good though.
So in other words as soon as I get a chance. Lol
Itâs cool that youâve gotten your hands dirty in the Blargg code and can understand and visualize whatâs been going on under the hood for all these years.
As we can see, itâs highly optimized.
One thing I would like to get some more information on would be the exact ranges and steps of all of the settings.
I know for a fact that most of the settings can be incremented in steps of ±0.1.
Most seem to have a range of between -1 to +1
I think I found one where the max was 0.5 though (gamma) canât say for sure. All I use are my eyes and trial and error.
One interesting setting is the colour bleed function. Each 0.1 step seems to only increase the bleed marginally. So you can almost go overboard without going overboard.
With many of the other settings a ±0.1 adjustment is quite noticeable.
One thing I donât get and I wish could be resolved or worked around is why the filter causes the Aspect Ratio to change slightly.
âLabâ here
Direct comparison and 1:1 Trinitron PAL colors, Amiga at least, preset
shaders = "6"
feedback_pass = "0"
shader0 = "../misc/shaders/simple_color_controls.glsl"
filter_linear0 = "false"
shader1 = "../misc/shaders/chromaticity.glsl"
filter_linear1 = "false"
shader2 = "../crt/shaders/crt-consumer/linearize.glsl"
filter_linear2 = "false"
shader3 = "../crt/shaders/crt-consumer/glow_x.glsl"
filter_linear3 = "false"
shader4 = "../crt/shaders/crt-consumer/glow_y.glsl"
filter_linear4 = "false"
shader5 = "../crt/shaders/crt-geom.glsl"
filter_linear5 = "false"
TEMP = "5423.000000"
BLACK = "-0.020000"
gamma_in = "1.000000"
gamma_out_red = "1.000000"
gamma_out_green = "1.000000"
gamma_out_blue = "1.000000"
R = "2.000000"
COLOR_MODE = "-1.000000"
Dx = "-1.000000"
CRTgamma = "1.000000"
SATURATION = "1.200000"
DOTMASK = "0.5"
Default RGB colors
SNES Flashback⊠yeah that looks like Trinitron
Another day, another project, todayâs was âMake crt-geom run on an old 2016 cellphone around 120 gflops gpuâ so here it isâŠ
Copy and save as âcrt-geom-mini.glslâ drop to shaders_glsl/crt/shaders
#version 110
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or (at your option)
// any later version.
#pragma parameter CURV "CRT-Geom Curvature" 1.0 0.0 1.0 1.0
#pragma parameter SCAN "CRT-Geom Scanline Weight" 0.3 0.2 0.6 0.05
#pragma parameter MASK "CRT-Geom Dotmask Strength" 0.25 0.0 1.0 0.05
#pragma parameter LUM "CRT-Geom Luminance" 0.05 0.0 0.5 0.01
#pragma parameter INTERL "CRT-Geom Interlacing Simulation" 1.0 0.0 1.0 1.0
#pragma parameter SAT "CRT-Geom Saturation" 1.1 0.0 2.0 0.01
#define PI 3.1415926535897932384626433
#if defined(VERTEX)
#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying
#define COMPAT_ATTRIBUTE attribute
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 COLOR;
COMPAT_ATTRIBUTE vec4 TexCoord;
COMPAT_VARYING vec4 COL0;
COMPAT_VARYING vec4 TEX0;
COMPAT_VARYING vec2 scale;
COMPAT_VARYING vec2 warpp;
COMPAT_VARYING vec2 warppm;
COMPAT_VARYING vec2 warp;
COMPAT_VARYING float fragpos;
COMPAT_VARYING float omega;
vec4 _oPosition1;
uniform mat4 MVPMatrix;
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
// compatibility #defines
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float SIZE;
#else
#define SIZE 1.0
#endif
void main()
{
gl_Position = MVPMatrix * VertexCoord;
TEX0.xy = TexCoord.xy;
scale = SourceSize.xy/InputSize.xy;
fragpos = TEX0.x*OutputSize.x*scale.x*PI;
warpp = TEX0.xy*scale;
warp = warpp*2.0-1.0;
}
#elif defined(FRAGMENT)
#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
uniform COMPAT_PRECISION int FrameDirection;
uniform COMPAT_PRECISION int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
uniform sampler2D Texture;
COMPAT_VARYING vec4 TEX0;
COMPAT_VARYING vec2 scale;
COMPAT_VARYING float fragpos;
COMPAT_VARYING vec2 warpp;
COMPAT_VARYING vec2 warppm;
COMPAT_VARYING vec2 warp;
COMPAT_VARYING float omega;
// compatibility #defines
#define Source Texture
#define vTexCoord TEX0.xy
#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize
#define OutSize vec4(OutputSize, 1.0 / OutputSize)
#ifdef PARAMETER_UNIFORM
uniform COMPAT_PRECISION float SCAN;
uniform COMPAT_PRECISION float MASK;
uniform COMPAT_PRECISION float CURV;
uniform COMPAT_PRECISION float LUM;
uniform COMPAT_PRECISION float SAT;
uniform COMPAT_PRECISION float INTERL;
#else
#define SCAN 0.3
#define MASK 0.6
#define CURV 1.0
#define LUM 0.0
#define SAT 1.0
#define INTERL 1.0
#endif
float scan(float pos, vec3 color)
{
float wid = SCAN + 0.1 * dot(color, vec3(0.333))*0.8;
float weight = pos / wid;
return LUM + (0.1 + SCAN) * exp(-weight * weight ) / wid;
}
vec2 Warp(vec2 pos)
{
pos = warp;
pos *= vec2(1.0+pos.y*pos.y*0.031, 1.0+pos.x*pos.x*0.05);
pos = pos*0.5+0.5;
return pos;
}
void main()
{
vec2 pos;
if (CURV == 1.0) pos = Warp(warpp);
else pos = vTexCoord;
vec2 corn = min(pos,1.0-pos); // This is used to mask the rounded
corn.x = 0.0001/corn.x; // corners later on
if (CURV == 1.0) pos /= scale;
// Lanczos 2
// Source position in fractions of a texel
vec2 src_pos = pos*SourceSize.xy;
// Source bottom left texel centre
vec2 src_centre = floor(src_pos - 0.5) + 0.5;
// f is position. f.x runs left to right, y bottom to top, z right to left, w top to bottom
vec4 f;
f.xy = src_pos - src_centre;
f.zw = 1.0 - f.xy;
// Calculate weights in x and y in parallel.
// These polynomials are piecewise approximation of Lanczos kernel
// Calculator here: https://gist.github.com/going-digital/752271db735a07da7617079482394543
vec4 l2_w0_o3 = (( 1.5672 * f - 2.6445) * f + 0.0837) * f + 0.9976;
vec4 l2_w1_o3 = ((-0.7389 * f + 1.3652) * f - 0.6295) * f - 0.0004;
vec4 w1_2 = l2_w0_o3;
vec2 w12 = w1_2.xy + w1_2.zw;
vec4 wedge = l2_w1_o3 * vec4 (w12.yx, w12.yx);
// Calculate texture read positions. tc12 uses bilinear interpolation to do 4 reads in 1.
vec2 tc12 = SourceSize.zw * (src_centre + w1_2.zw / w12);
vec2 tc0 = SourceSize.zw * (src_centre - 1.0);
vec2 tc3 = SourceSize.zw * (src_centre + 2.0);
// Sharpening adjustment
float sum = wedge.x + wedge.y + wedge.z + wedge.w + w12.x * w12.y;
wedge /= sum;
vec3 res = vec3(
COMPAT_TEXTURE(Source, vec2(tc12.x, tc0.y)).rgb * wedge.y +
COMPAT_TEXTURE(Source, vec2(tc0.x, tc12.y)).rgb * wedge.x +
COMPAT_TEXTURE(Source, tc12.xy).rgb * (w12.x * w12.y) +
COMPAT_TEXTURE(Source, vec2(tc3.x, tc12.y)).rgb * wedge.z +
COMPAT_TEXTURE(Source, vec2(tc12.x, tc3.y)).rgb * wedge.w
);
float fp = fract(pos.y*SourceSize.y-0.5);
if (InputSize.y > 400.0) fp = fract(pos.y*SourceSize.y/2.0-0.5);
if (INTERL == 1.0 && InputSize.y > 400.0)
{
fp = mod(float(FrameCount),2.0) <1.0 ? 0.5+fp:fp;
}
res *= res;
res *= scan(fp,res) + scan(1.0-fp,res);
res *= MASK*sin(fragpos)+1.0-MASK;
res = sqrt(res);
float l = dot(vec3(0.3,0.6,0.1), res);
res *= mix(1.0,1.1,l);
res = mix(vec3(l), res, SAT);
if (corn.y <= corn.x && CURV == 1.0 || corn.x < 0.0001 && CURV == 1.0 )res = vec3(0.0);
FragColor = vec4(res,1.0);
}
#endif
Copy and save to shaders_glsl/crt as âcrt-geom-mini.glslpâ
shaders = 1
shader0 = shaders/crt-geom-mini.glsl
filter_linear0 = true
Runs solid 60 fps on my old Note 3 Pro while the original runs 14 fps. The image is almost the same.
For the record, crt-geom-mini was written to push htc one m7, thatâs around ~95 gflops. Crt-cyclon pushes xiaomi note 3 pro ~180 gflops. It would need more than that actually, I have to switch to potato mode to run ~59.5 fps. Crt-sines runs pretty well on both as it uses a very simple filter with 1 texture read + 1 for Convergence and many things passed to vertex. Crt-m7 canned not to clutter the crt folder with a trillion of shaders. That looked really good too with quilez filter and fast scanlines etc.
I believe as long as anyone wants to make a fast shader that quilez filter is the best option for performance and good look. Then lanczos2 that I uploaded to âwindowedâ folder looks very good and is very fast.
A small snippet of code emulating a slot mask without a mask
void main()
{
vec3 res = COMPAT_TEXTURE(Source,vTexCoord).rgb;
res *= 0.3*sin(vTexCoord.y*SourceSize.y*pi*2.0)+0.7;
float y = vTexCoord.y*SourceSize.y*2.0;
if (mod((vTexCoord.x*OutputSize.x*SourceSize.x/InputSize.x),6.0) < 3.0) y = y+0.5; else y = y-0.5;
res *= 0.3*sin(y*pi)+0.7;
FragColor.rgb = res;
}
âcheap.glslâ