I was never able to get my phosphor shader to work with it because I didn’t know how to tie in the LUT texture with the modern vertex coordinate stuff (I tried replacing gl_MutliTexCoord[1] with rubyTexCoord1 but it just gave a black screen…) but I was able to get it working with cgwg’s CRT shader:
<?xml version="1.0" encoding="UTF-8"?>
<!--
NTSC shader
Author: Themaister
CRT shader and Gaussian Blur shader
Author: cgwg
License: GPLv3
-->
<shader language="GLSL" style="GLES2">
<vertex><![CDATA[
#version 120
uniform mat4 rubyMVPMatrix;
attribute vec2 rubyVertexCoord;
attribute vec2 rubyTexCoord;
varying vec2 tex_coord;
varying vec2 pix_no;
uniform vec2 rubyTextureSize;
uniform vec2 rubyInputSize;
uniform vec2 rubyOutputSize;
void main()
{
gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
tex_coord = rubyTexCoord;
pix_no = rubyTexCoord * rubyTextureSize * (rubyOutputSize / rubyInputSize);
}
]]></vertex>
<fragment filter="nearest" scale_x="4.0" scale_y="1.0" frame_count_mod="3" float_framebuffer="true"><![CDATA[
#version 120
varying vec2 tex_coord;
uniform sampler2D rubyTexture;
uniform int rubyFrameCount;
varying vec2 pix_no;
#define PI 3.14159265
#define CHROMA_MOD_FREQ (0.4 * PI)
#define CHROMA_AMP 1.0
#define ENCODE_GAMMA (1.0 / 2.2)
const mat3 yiq_mat = mat3(
0.2989, 0.5959, 0.2115,
0.5870, -0.2744, -0.5229,
0.1140, -0.3216, 0.3114);
vec3 rgb2yiq(vec3 col)
{
return yiq_mat * col;
}
void main()
{
vec3 col = texture2D(rubyTexture, tex_coord).rgb;
vec3 yiq = rgb2yiq(pow(col, vec3(ENCODE_GAMMA)));
float chroma_phase = 0.6667 * PI * mod(pix_no.y + float(rubyFrameCount), 3.0);
float mod_phase = chroma_phase + pix_no.x * CHROMA_MOD_FREQ;
float i_mod = CHROMA_AMP * cos(mod_phase);
float q_mod = CHROMA_AMP * sin(mod_phase);
yiq = vec3(yiq.x, yiq.y * i_mod, yiq.z * q_mod);
gl_FragColor = vec4(yiq, 1.0);
}
]]></fragment>
<!-- 2nd pass - Create composite signal,
low-pass and demodulate separately -->
<vertex><![CDATA[
#version 120
uniform mat4 rubyMVPMatrix;
attribute vec2 rubyVertexCoord;
attribute vec2 rubyTexCoord;
uniform vec2 rubyTextureSize;
uniform vec2 rubyOutputSize;
varying vec2 tex_coord;
varying vec2 pix_no;
void main()
{
gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
tex_coord = rubyTexCoord;
pix_no = rubyTexCoord * rubyTextureSize;
}
]]></vertex>
<fragment filter="nearest" scale="1.0" frame_count_mod="3" float_framebuffer="true"><![CDATA[
#version 120
uniform sampler2D rubyTexture;
uniform vec2 rubyTextureSize;
uniform int rubyFrameCount;
varying vec2 tex_coord;
varying vec2 pix_no;
#define PI 3.14159265
#define CHROMA_MOD_FREQ (0.4 * PI)
#define CHROMA_AMP 1.0
#define SATURATION 1.0
#define BRIGHTNESS 1.0
#define chroma_mod (2.0 * SATURATION / CHROMA_AMP)
const float filter[9] = float[9](
0.0019, 0.0031, -0.0108, 0.0, 0.0407,
-0.0445, -0.0807, 0.2913, 0.5982
);
vec3 fetch_offset(float offset, float one_x)
{
return texture2D(rubyTexture, tex_coord + vec2(offset * one_x, 0.0)).xyz;
}
void main()
{
float one_x = 1.0 / rubyTextureSize.x;
float chroma_phase = 0.6667 * PI * mod(pix_no.y + float(rubyFrameCount), 3.0);
float mod_phase = chroma_phase + pix_no.x * CHROMA_MOD_FREQ;
float signal = 0.0;
for (int i = 0; i < 8; i++)
{
float offset = float(i);
float sums =
dot(fetch_offset(offset - 8.0, one_x), vec3(1.0)) +
dot(fetch_offset(8.0 - offset, one_x), vec3(1.0));
signal += sums * filter[i];
}
signal += dot(texture2D(rubyTexture, tex_coord).xyz, vec3(1.0)) * filter[8];
float i_mod = chroma_mod * cos(mod_phase);
float q_mod = chroma_mod * sin(mod_phase);
vec3 out_color = vec3(signal) * vec3(BRIGHTNESS, i_mod, q_mod);
gl_FragColor = vec4(out_color, 1.0);
}
]]></fragment>
<vertex><![CDATA[
#version 120
uniform mat4 rubyMVPMatrix;
attribute vec2 rubyVertexCoord;
attribute vec2 rubyTexCoord;
varying vec2 tex_coord;
void main()
{
gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
tex_coord = rubyTexCoord;
}
]]></vertex>
<fragment scale="1.0" filter="nearest"><![CDATA[
#version 120
varying vec2 tex_coord;
uniform sampler2D rubyTexture;
uniform vec2 rubyTextureSize;
#define NTSC_GAMMA 2.2
const float luma_filter[9] = float[9](
0.0019, 0.0052, 0.0035, -0.0163, -0.0407,
-0.0118, 0.1111, 0.2729, 0.3489
);
const float chroma_filter[9] = float[9](
0.0025, 0.0057, 0.0147, 0.0315, 0.0555,
0.0834, 0.1099, 0.1289, 0.1358
);
const mat3 yiq2rgb_mat = mat3(
1.0, 1.0, 1.0,
0.956, -0.2720, -1.1060,
0.6210, -0.6474, 1.7046);
vec3 yiq2rgb(vec3 yiq)
{
return yiq2rgb_mat * yiq;
}
vec3 fetch_offset(float offset, float one_x)
{
return texture2D(rubyTexture, tex_coord + vec2(offset * one_x, 0.0)).xyz;
}
void main()
{
float one_x = 1.0 / rubyTextureSize.x;
vec3 signal = vec3(0.0);
for (int i = 0; i < 8; i++)
{
float offset = float(i);
vec3 sums = fetch_offset(offset - 8.0, one_x) +
fetch_offset(8.0 - offset, one_x);
signal += sums * vec3(luma_filter[i], chroma_filter[i], chroma_filter[i]);
}
signal += texture2D(rubyTexture, tex_coord).xyz *
vec3(luma_filter[8], chroma_filter[8], chroma_filter[8]);
vec3 rgb = pow(yiq2rgb(signal), vec3(NTSC_GAMMA));
gl_FragColor = vec4(rgb, 1.0);
}
]]></fragment>
<vertex><![CDATA[
varying float CRTgamma;
varying float monitorgamma;
varying vec2 overscan;
varying vec2 aspect;
varying float d;
varying float R;
varying float cornersize;
varying float cornersmooth;
varying vec3 stretch;
varying vec2 sinangle;
varying vec2 cosangle;
uniform vec2 rubyInputSize;
uniform vec2 rubyTextureSize;
uniform vec2 rubyOutputSize;
varying vec2 texCoord;
varying vec2 one;
varying float mod_factor;
varying vec2 ilfac;
//modernizing
attribute vec2 rubyTexCoord;
attribute vec2 rubyVertexCoord;
varying vec2 TexCoord;
uniform mat4 rubyMVPMatrix;
//end modernizing
#define FIX(c) max(abs(c), 1e-5);
float intersect(vec2 xy)
{
float A = dot(xy,xy)+d*d;
float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d);
float C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}
vec2 bkwtrans(vec2 xy)
{
float c = intersect(xy);
vec2 point = vec2(c)*xy;
point -= vec2(-R)*sinangle;
point /= vec2(R);
vec2 tang = sinangle/cosangle;
vec2 poc = point/cosangle;
float A = dot(tang,tang)+1.0;
float B = -2.0*dot(poc,tang);
float C = dot(poc,poc)-1.0;
float a = (-B+sqrt(B*B-4.0*A*C))/(2.0*A);
vec2 uv = (point-a*sinangle)/cosangle;
float r = R*acos(a);
return uv*r/sin(r/R);
}
vec2 fwtrans(vec2 uv)
{
float r = FIX(sqrt(dot(uv,uv)));
uv *= sin(r/R)/r;
float x = 1.0-cos(r/R);
float D = d/R + x*cosangle.x*cosangle.y+dot(uv,sinangle);
return d*(uv*cosangle-x*sinangle)/D;
}
vec3 maxscale()
{
vec2 c = bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.x*cosangle.y));
vec2 a = vec2(0.5,0.5)*aspect;
vec2 lo = vec2(fwtrans(vec2(-a.x,c.y)).x,
fwtrans(vec2(c.x,-a.y)).y)/aspect;
vec2 hi = vec2(fwtrans(vec2(+a.x,c.y)).x,
fwtrans(vec2(c.x,+a.y)).y)/aspect;
return vec3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
}
void main()
{
// START of parameters
// gamma of simulated CRT
CRTgamma = 2.4;
// gamma of display monitor (typically 2.2 is correct)
monitorgamma = 2.2;
// overscan (e.g. 1.02 for 2% overscan)
overscan = vec2(1.00,1.00);
// aspect ratio
aspect = vec2(1.0, 0.75);
// lengths are measured in units of (approximately) the width of the monitor
// simulated distance from viewer to monitor
d = 2.0;
// radius of curvature
R = 1.5;
// tilt angle in radians
// (behavior might be a bit wrong if both components are nonzero)
const vec2 angle = vec2(0.0,-0.15);
// size of curved corners
cornersize = 0.001;
// border smoothness parameter
// decrease if borders are too aliased
cornersmooth = 1000.0;
// END of parameters
// Do the standard vertex processing.
//This is the old way -> gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
//This is the new way ^^
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
sinangle = sin(angle);
cosangle = cos(angle);
stretch = maxscale();
// Texture coords.
//This is the old way -> texCoord = rubyTexCoord.xy;
texCoord = rubyTexCoord;
//This is the new way ^^
ilfac = vec2(1.0,floor(rubyInputSize.y/200.0));
// The size of one texel, in texture-coordinates.
one = ilfac / rubyTextureSize;
// Resulting X pixel-coordinate of the pixel we're drawing.
mod_factor = texCoord.x * rubyTextureSize.x * rubyOutputSize.x / rubyInputSize.x;
}
]]></vertex>
<fragment><![CDATA[
// Comment the next line to disable interpolation in linear gamma (and gain speed).
//#define LINEAR_PROCESSING
// Enable screen curvature.
//#define CURVATURE
// Enable 3x oversampling of the beam profile
#define OVERSAMPLE
// Use the older, purely gaussian beam profile
//#define USEGAUSSIAN
// Macros.
#define FIX(c) max(abs(c), 1e-5);
#define PI 3.141592653589
#ifdef LINEAR_PROCESSING
# define TEX2D(c) pow(texture2D(rubyTexture, (c)), vec4(CRTgamma))
#else
# define TEX2D(c) texture2D(rubyTexture, (c))
#endif
uniform sampler2D rubyTexture;
uniform vec2 rubyInputSize;
uniform vec2 rubyTextureSize;
uniform int rubyFrameCount;
varying vec2 texCoord;
varying vec2 one;
varying float mod_factor;
varying vec2 ilfac;
varying float CRTgamma;
varying float monitorgamma;
varying vec2 overscan;
varying vec2 aspect;
varying float d;
varying float R;
varying float cornersize;
varying float cornersmooth;
varying vec3 stretch;
varying vec2 sinangle;
varying vec2 cosangle;
float intersect(vec2 xy)
{
float A = dot(xy,xy)+d*d;
float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d);
float C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}
vec2 bkwtrans(vec2 xy)
{
float c = intersect(xy);
vec2 point = vec2(c)*xy;
point -= vec2(-R)*sinangle;
point /= vec2(R);
vec2 tang = sinangle/cosangle;
vec2 poc = point/cosangle;
float A = dot(tang,tang)+1.0;
float B = -2.0*dot(poc,tang);
float C = dot(poc,poc)-1.0;
float a = (-B+sqrt(B*B-4.0*A*C))/(2.0*A);
vec2 uv = (point-a*sinangle)/cosangle;
float r = FIX(R*acos(a));
return uv*r/sin(r/R);
}
vec2 transform(vec2 coord)
{
coord *= rubyTextureSize / rubyInputSize;
coord = (coord-vec2(0.5))*aspect*stretch.z+stretch.xy;
return (bkwtrans(coord)/overscan/aspect+vec2(0.5)) * rubyInputSize / rubyTextureSize;
}
float corner(vec2 coord)
{
coord *= rubyTextureSize / rubyInputSize;
coord = (coord - vec2(0.5)) * overscan + vec2(0.5);
coord = min(coord, vec2(1.0)-coord) * aspect;
vec2 cdist = vec2(cornersize);
coord = (cdist - min(coord,cdist));
float dist = sqrt(dot(coord,coord));
return clamp((cdist.x-dist)*cornersmooth,0.0, 1.0);
}
// Calculate the influence of a scanline on the current pixel.
//
// 'distance' is the distance in texture coordinates from the current
// pixel to the scanline in question.
// 'color' is the colour of the scanline at the horizontal location of
// the current pixel.
vec4 scanlineWeights(float distance, vec4 color)
{
// "wid" controls the width of the scanline beam, for each RGB channel
// The "weights" lines basically specify the formula that gives
// you the profile of the beam, i.e. the intensity as
// a function of distance from the vertical center of the
// scanline. In this case, it is gaussian if width=2, and
// becomes nongaussian for larger widths. Ideally this should
// be normalized so that the integral across the beam is
// independent of its width. That is, for a narrower beam
// "weights" should have a higher peak at the center of the
// scanline than for a wider beam.
#ifdef USEGAUSSIAN
vec4 wid = 0.3 + 0.1 * pow(color, vec4(3.0));
vec4 weights = vec4(distance / wid);
return 0.4 * exp(-weights * weights) / wid;
#else
vec4 wid = 2.0 + 2.0 * pow(color, vec4(4.0));
vec4 weights = vec4(distance / 0.3);
return 1.4 * exp(-pow(weights * inversesqrt(0.5 * wid), wid)) / (0.6 + 0.2 * wid);
#endif
}
void main()
{
// Here's a helpful diagram to keep in mind while trying to
// understand the code:
//
// | | | | |
// -------------------------------
// | | | | |
// | 01 | 11 | 21 | 31 | <-- current scanline
// | | @ | | |
// -------------------------------
// | | | | |
// | 02 | 12 | 22 | 32 | <-- next scanline
// | | | | |
// -------------------------------
// | | | | |
//
// Each character-cell represents a pixel on the output
// surface, "@" represents the current pixel (always somewhere
// in the bottom half of the current scan-line, or the top-half
// of the next scanline). The grid of lines represents the
// edges of the texels of the underlying texture.
// Texture coordinates of the texel containing the active pixel.
#ifdef CURVATURE
vec2 xy = transform(texCoord);
#else
vec2 xy = texCoord;
#endif
float cval = corner(xy);
// Of all the pixels that are mapped onto the texel we are
// currently rendering, which pixel are we currently rendering?
vec2 ilvec = vec2(0.0,ilfac.y > 1.5 ? mod(float(rubyFrameCount),2.0) : 0.0);
vec2 ratio_scale = (xy * rubyTextureSize - vec2(0.5) + ilvec)/ilfac;
#ifdef OVERSAMPLE
float filter = fwidth(ratio_scale.y);
#endif
vec2 uv_ratio = fract(ratio_scale);
// Snap to the center of the underlying texel.
xy = (floor(ratio_scale)*ilfac + vec2(0.5) - ilvec) / rubyTextureSize;
// Calculate Lanczos scaling coefficients describing the effect
// of various neighbour texels in a scanline on the current
// pixel.
vec4 coeffs = PI * vec4(1.0 + uv_ratio.x, uv_ratio.x, 1.0 - uv_ratio.x, 2.0 - uv_ratio.x);
// Prevent division by zero.
coeffs = FIX(coeffs);
// Lanczos2 kernel.
coeffs = 2.0 * sin(coeffs) * sin(coeffs / 2.0) / (coeffs * coeffs);
// Normalize.
coeffs /= dot(coeffs, vec4(1.0));
// Calculate the effective colour of the current and next
// scanlines at the horizontal location of the current pixel,
// using the Lanczos coefficients above.
vec4 col = clamp(mat4(
TEX2D(xy + vec2(-one.x, 0.0)),
TEX2D(xy),
TEX2D(xy + vec2(one.x, 0.0)),
TEX2D(xy + vec2(2.0 * one.x, 0.0))) * coeffs,
0.0, 1.0);
vec4 col2 = clamp(mat4(
TEX2D(xy + vec2(-one.x, one.y)),
TEX2D(xy + vec2(0.0, one.y)),
TEX2D(xy + one),
TEX2D(xy + vec2(2.0 * one.x, one.y))) * coeffs,
0.0, 1.0);
#ifndef LINEAR_PROCESSING
col = pow(col , vec4(CRTgamma));
col2 = pow(col2, vec4(CRTgamma));
#endif
// Calculate the influence of the current and next scanlines on
// the current pixel.
vec4 weights = scanlineWeights(uv_ratio.y, col);
vec4 weights2 = scanlineWeights(1.0 - uv_ratio.y, col2);
#ifdef OVERSAMPLE
uv_ratio.y =uv_ratio.y+1.0/3.0*filter;
weights = (weights+scanlineWeights(uv_ratio.y, col))/3.0;
weights2=(weights2+scanlineWeights(abs(1.0-uv_ratio.y), col2))/3.0;
uv_ratio.y =uv_ratio.y-2.0/3.0*filter;
weights=weights+scanlineWeights(abs(uv_ratio.y), col)/3.0;
weights2=weights2+scanlineWeights(abs(1.0-uv_ratio.y), col2)/3.0;
#endif
vec3 mul_res = (col * weights + col2 * weights2).rgb * vec3(cval);
// dot-mask emulation:
// Output pixels are alternately tinted green and magenta.
vec3 dotMaskWeights = mix(
vec3(1.0, 0.7, 1.0),
vec3(0.7, 1.0, 0.7),
floor(mod(mod_factor, 2.0))
);
mul_res *= dotMaskWeights;
// Convert the image gamma for display on our output device.
mul_res = pow(mul_res, vec3(1.0 / monitorgamma));
// Color the texel.
gl_FragColor = vec4(mul_res, 1.0);
}
]]></fragment>
<vertex><![CDATA[
#version 120
uniform mat4 rubyMVPMatrix;
attribute vec2 rubyVertexCoord;
attribute vec2 rubyTexCoord;
varying vec2 tex_coord;
varying vec2 pix_no;
uniform vec2 rubyTextureSize;
uniform vec2 rubyInputSize;
uniform vec2 rubyOutputSize;
void main()
{
gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
tex_coord = rubyTexCoord;
pix_no = rubyTexCoord * rubyTextureSize * (rubyOutputSize / rubyInputSize);
}
]]></vertex>
<fragment filter="linear" outscale="1.0"><![CDATA[
uniform sampler2D rubyTexture;
uniform vec2 rubyTextureSize;
uniform vec2 rubyInputSize;
uniform vec2 rubyOutputSize;
varying vec2 tex_coord;
#define CRTgamma 2.0
#define display_gamma 2.0
#define TEX2D(c) pow(texture2D(rubyTexture,(c)),vec4(CRTgamma))
void main()
{
vec2 xy = tex_coord.st;
float oney = 1.0/rubyTextureSize.x;
float wid = 2.0;
float c1 = exp(-1.0/wid/wid);
float c2 = exp(-4.0/wid/wid);
float c3 = exp(-9.0/wid/wid);
float c4 = exp(-16.0/wid/wid);
float norm = 1.0 / (1.0 + 2.0*(c1+c2+c3+c4));
vec4 sum = vec4(0.0);
sum += TEX2D(xy + vec2(0.0, -4.0 * oney)) * vec4(c4);
sum += TEX2D(xy + vec2(0.0, -3.0 * oney)) * vec4(c3);
sum += TEX2D(xy + vec2(0.0, -2.0 * oney)) * vec4(c2);
sum += TEX2D(xy + vec2(0.0, -1.0 * oney)) * vec4(c1);
sum += TEX2D(xy);
sum += TEX2D(xy + vec2(0.0, +1.0 * oney)) * vec4(c1);
sum += TEX2D(xy + vec2(0.0, +2.0 * oney)) * vec4(c2);
sum += TEX2D(xy + vec2(0.0, +3.0 * oney)) * vec4(c3);
sum += TEX2D(xy + vec2(0.0, +4.0 * oney)) * vec4(c4);
gl_FragColor = pow(sum*vec4(norm),vec4(1.0/display_gamma));
}
]]></fragment>
</shader>
I added a slight horizontal gaussian blur pass at the end, which evens out some of the grid pattern that forms from the interaction of the NTSC shader’s angled gradients with the scanlines from the CRT shader. It makes the scanlines pretty much invisible, but I think the result is better anyway. If anyone wants to try it without the blur pass, just delete the last vertex and fragment.
For the color differences, I believe that’s entirely related to gamma, and I was able to make this shader look exactly like blargg’s filter, color-wise, by adjusting second number in the ENCODE_GAMMA define from the first pass.