Optimal shaders, AR and visual settings

I see, thanks for your reply.

@GPDP That shot does look really good, and that’s just with scanlines plus gaussian blur, eh? I’m pleasantly surprised.

I think I got it working with cgwg’s CRT shader pretty well, but it looks like garbage at low scale factors, as expected (these screenshots are at 7x, where it looks decent):

<?xml version="1.0" encoding="UTF-8"?>
<!--
    CRT shader with phosphorLUT

    Copyright (C) 2010-2012 cgwg, Themaister and DOLLS (phosphorLUT modification by hunterk)

    This program is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by the Free
    Software Foundation; either version 2 of the License, or (at your option)
    any later version.
    -->
<shader language="GLSL">
   <texture id="phosphorLUT" file="240phoriz.png" filter="linear"/>
    <vertex><![CDATA[
varying float CRTgamma;
varying float monitorgamma;
varying vec2 overscan;
varying vec2 aspect;
varying float d;
varying float R;
varying float cornersize;
varying float cornersmooth;

varying vec3 stretch;
varying vec2 sinangle;
varying vec2 cosangle;

uniform vec2 rubyInputSize;
uniform vec2 rubyTextureSize;
uniform vec2 rubyOutputSize;

varying vec2 texCoord;
varying vec2 one;
varying float mod_factor;

#define FIX(c) max(abs(c), 1e-5);

float intersect(vec2 xy)
{
  float A = dot(xy,xy)+d*d;
  float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d);
  float C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
  return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}

vec2 bkwtrans(vec2 xy)
{
  float c = intersect(xy);
  vec2 point = vec2(c)*xy;
  point -= vec2(-R)*sinangle;
  point /= vec2(R);
  vec2 tang = sinangle/cosangle;
  vec2 poc = point/cosangle;
  float A = dot(tang,tang)+1.0;
  float B = -2.0*dot(poc,tang);
  float C = dot(poc,poc)-1.0;
  float a = (-B+sqrt(B*B-4.0*A*C))/(2.0*A);
  vec2 uv = (point-a*sinangle)/cosangle;
  float r = R*acos(a);
  return uv*r/sin(r/R);
}

vec2 fwtrans(vec2 uv)
{
  float r = FIX(sqrt(dot(uv,uv)));
  uv *= sin(r/R)/r;
  float x = 1.0-cos(r/R);
  float D = d/R + x*cosangle.x*cosangle.y+dot(uv,sinangle);
  return d*(uv*cosangle-x*sinangle)/D;
}

vec3 maxscale()
{
  vec2 c = bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.x*cosangle.y));
  vec2 a = vec2(0.5,0.5)*aspect;
  vec2 lo = vec2(fwtrans(vec2(-a.x,c.y)).x,
         fwtrans(vec2(c.x,-a.y)).y)/aspect;
  vec2 hi = vec2(fwtrans(vec2(+a.x,c.y)).x,
         fwtrans(vec2(c.x,+a.y)).y)/aspect;
  return vec3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
}


void main()
{

  // START of parameters

  // gamma of simulated CRT
  CRTgamma = 2.4;
  // gamma of display monitor (typically 2.2 is correct)
  monitorgamma = 2.2;
  // overscan (e.g. 1.02 for 2% overscan)
  overscan = vec2(1.00,1.00);
  // aspect ratio
  aspect = vec2(1.0, 0.75);
  // lengths are measured in units of (approximately) the width of the monitor
  // simulated distance from viewer to monitor
  d = 2.0;
  // radius of curvature
  R = 1.5;
  // tilt angle in radians
  // (behavior might be a bit wrong if both components are nonzero)
  const vec2 angle = vec2(0.0,-0.15);
  // size of curved corners
  cornersize = 0.001;
  // border smoothness parameter
  // decrease if borders are too aliased
  cornersmooth = 1000.0;

  // END of parameters

  // Do the standard vertex processing.
  gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;

  // Precalculate a bunch of useful values we'll need in the fragment
  // shader.
  sinangle = sin(angle);
  cosangle = cos(angle);
  stretch = maxscale();

  // Texture coords.
  texCoord = gl_MultiTexCoord0.xy;
  
  // Find the LUT coordinates
         gl_TexCoord[1].xy = gl_MultiTexCoord1.xy;

  // The size of one texel, in texture-coordinates.
  one = 1.0 / rubyTextureSize;

  // Resulting X pixel-coordinate of the pixel we're drawing.
  mod_factor = texCoord.x * rubyTextureSize.x * rubyOutputSize.x / rubyInputSize.x;            
}
    ]]></vertex>
    <fragment outscale="2.0"><![CDATA[
// Comment the next line to disable interpolation in linear gamma (and gain speed).
//#define LINEAR_PROCESSING

// Enable screen curvature.
//#define CURVATURE

// Enable 3x oversampling of the beam profile
#define OVERSAMPLE

// Use the older, purely gaussian beam profile
//#define USEGAUSSIAN

// Macros.
#define FIX(c) max(abs(c), 1e-5);
#define PI 3.141592653589

#ifdef LINEAR_PROCESSING
#       define TEX2D(c) pow(texture2D(rubyTexture, (c)), vec4(CRTgamma))
#else
#       define TEX2D(c) texture2D(rubyTexture, (c))
#endif

uniform sampler2D rubyTexture;
uniform vec2 rubyInputSize;
uniform vec2 rubyTextureSize;
uniform float brightness;

// Identify LUT texture
      uniform sampler2D phosphorLUT;

varying vec2 texCoord;
varying vec2 one;
varying float mod_factor;

varying float CRTgamma;
varying float monitorgamma;

varying vec2 overscan;
varying vec2 aspect;

varying float d;
varying float R;

varying float cornersize;
varying float cornersmooth;

varying vec3 stretch;
varying vec2 sinangle;
varying vec2 cosangle;

float intersect(vec2 xy)
{
  float A = dot(xy,xy)+d*d;
  float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d);
  float C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
  return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}

vec2 bkwtrans(vec2 xy)
{
  float c = intersect(xy);
  vec2 point = vec2(c)*xy;
  point -= vec2(-R)*sinangle;
  point /= vec2(R);
  vec2 tang = sinangle/cosangle;
  vec2 poc = point/cosangle;
  float A = dot(tang,tang)+1.0;
  float B = -2.0*dot(poc,tang);
  float C = dot(poc,poc)-1.0;
  float a = (-B+sqrt(B*B-4.0*A*C))/(2.0*A);
  vec2 uv = (point-a*sinangle)/cosangle;
  float r = FIX(R*acos(a));
  return uv*r/sin(r/R);
}

vec2 transform(vec2 coord)
{
  coord *= rubyTextureSize / rubyInputSize;
  coord = (coord-vec2(0.5))*aspect*stretch.z+stretch.xy;
  return (bkwtrans(coord)/overscan/aspect+vec2(0.5)) * rubyInputSize / rubyTextureSize;
}

float corner(vec2 coord)
{
  coord *= rubyTextureSize / rubyInputSize;
  coord = (coord - vec2(0.5)) * overscan + vec2(0.5);
  coord = min(coord, vec2(1.0)-coord) * aspect;
  vec2 cdist = vec2(cornersize);
  coord = (cdist - min(coord,cdist));
  float dist = sqrt(dot(coord,coord));
  return clamp((cdist.x-dist)*cornersmooth,0.0, 1.0);
}

// Calculate the influence of a scanline on the current pixel.
//
// 'distance' is the distance in texture coordinates from the current
// pixel to the scanline in question.
// 'color' is the colour of the scanline at the horizontal location of
// the current pixel.
vec4 scanlineWeights(float distance, vec4 color)
{
  // "wid" controls the width of the scanline beam, for each RGB channel
  // The "weights" lines basically specify the formula that gives
  // you the profile of the beam, i.e. the intensity as
  // a function of distance from the vertical center of the
  // scanline. In this case, it is gaussian if width=2, and
  // becomes nongaussian for larger widths. Ideally this should
  // be normalized so that the integral across the beam is
  // independent of its width. That is, for a narrower beam
  // "weights" should have a higher peak at the center of the
  // scanline than for a wider beam.
#ifdef USEGAUSSIAN
  vec4 wid = 0.3 + 0.1 * pow(color, vec4(3.0));
  vec4 weights = vec4(distance / wid);
  return 0.4 * exp(-weights * weights) / wid;
#else
  vec4 wid = 2.0 + 2.0 * pow(color, vec4(4.0));
  vec4 weights = vec4(distance / 0.3);
  return 1.4 * exp(-pow(weights * inversesqrt(0.5 * wid), wid)) / (0.6 + 0.2 * wid);
#endif
}

void main()
{
  // Here's a helpful diagram to keep in mind while trying to
  // understand the code:
  //
  //  |      |      |      |      |
  // -------------------------------
  //  |      |      |      |      |
  //  |  01  |  11  |  21  |  31  | <-- current scanline
  //  |      | @    |      |      |
  // -------------------------------
  //  |      |      |      |      |
  //  |  02  |  12  |  22  |  32  | <-- next scanline
  //  |      |      |      |      |
  // -------------------------------
  //  |      |      |      |      |
  //
  // Each character-cell represents a pixel on the output
  // surface, "@" represents the current pixel (always somewhere
  // in the bottom half of the current scan-line, or the top-half
  // of the next scanline). The grid of lines represents the
  // edges of the texels of the underlying texture.

  // Texture coordinates of the texel containing the active pixel.
#ifdef CURVATURE
  vec2 xy = transform(texCoord);
#else
  vec2 xy = texCoord;
#endif
  float cval = corner(xy);

  // Of all the pixels that are mapped onto the texel we are
  // currently rendering, which pixel are we currently rendering?
  vec2 ratio_scale = xy * rubyTextureSize - vec2(0.5);
#ifdef OVERSAMPLE
  float filter = fwidth(ratio_scale.y);
#endif
  vec2 uv_ratio = fract(ratio_scale);

  // Snap to the center of the underlying texel.
  xy = (floor(ratio_scale) + vec2(0.5)) / rubyTextureSize;

  // Calculate Lanczos scaling coefficients describing the effect
  // of various neighbour texels in a scanline on the current
  // pixel.
  vec4 coeffs = PI * vec4(1.0 + uv_ratio.x, uv_ratio.x, 1.0 - uv_ratio.x, 2.0 - uv_ratio.x);

  // Prevent division by zero.
  coeffs = FIX(coeffs);

  // Lanczos2 kernel.
  coeffs = 2.0 * sin(coeffs) * sin(coeffs / 2.0) / (coeffs * coeffs);

  // Normalize.
  coeffs /= dot(coeffs, vec4(1.0));

  // Calculate the effective colour of the current and next
  // scanlines at the horizontal location of the current pixel,
  // using the Lanczos coefficients above.
  vec4 col  = clamp(mat4(
             TEX2D(xy + vec2(-one.x, 0.0)),
             TEX2D(xy),
             TEX2D(xy + vec2(one.x, 0.0)),
             TEX2D(xy + vec2(2.0 * one.x, 0.0))) * coeffs,
            0.0, 1.0);
  vec4 col2 = clamp(mat4(
             TEX2D(xy + vec2(-one.x, one.y)),
             TEX2D(xy + vec2(0.0, one.y)),
             TEX2D(xy + one),
             TEX2D(xy + vec2(2.0 * one.x, one.y))) * coeffs,
            0.0, 1.0);

#ifndef LINEAR_PROCESSING
  col  = pow(col , vec4(CRTgamma));
  col2 = pow(col2, vec4(CRTgamma));
#endif

  // Calculate the influence of the current and next scanlines on
  // the current pixel.
  vec4 weights  = scanlineWeights(uv_ratio.y, col);
  vec4 weights2 = scanlineWeights(1.0 - uv_ratio.y, col2);
#ifdef OVERSAMPLE
  uv_ratio.y =uv_ratio.y+1.0/3.0*filter;
  weights = (weights+scanlineWeights(uv_ratio.y, col))/3.0;
  weights2=(weights2+scanlineWeights(abs(1.0-uv_ratio.y), col2))/3.0;
  uv_ratio.y =uv_ratio.y-2.0/3.0*filter;
  weights=weights+scanlineWeights(abs(uv_ratio.y), col)/3.0;
  weights2=weights2+scanlineWeights(abs(1.0-uv_ratio.y), col2)/3.0;
#endif
  vec3 mul_res  = (col * weights + col2 * weights2).rgb * vec3(cval);


  // Convert the image gamma for display on our output device.
  mul_res = pow(mul_res, vec3(1.0 / monitorgamma));
  
        // Identify the LUT and screen textures
         float brightness = 1.6;
         vec4 inverse = 1 - (brightness * texture2D(rubyTexture, texCoord));
         vec4 screen = texture2D(phosphorLUT, gl_TexCoord[1].xy);
         
            vec4 final = screen - inverse;

  // Color the texel.
    gl_FragColor = screen - (1.0 - (brightness * vec4(mul_res, 1.0)));
 
}
    ]]></fragment>
</shader>

And here’s another version using aliaspider’s GTU shader instead of the 2 gaussian blur passes. You can uncomment and adjust the ‘KERNEL_HEIGHT’ variable to add faint scanlines:

<?xml version="1.0" encoding="UTF-8"?>
<!-- 
     PhosphorLUT-GTU v1.1
    This shader uses an external lookup texture (LUT) to create a shadow mask with individual RGB phosphor lenses.
    You can swap out the LUTs by changing the 'file' referenced in Line 11.
    This version uses aliaspider's GTU shader code to create the phosphor bloom, rather than 2 passes of gaussian blur.
    Author: hunterk and aliaspider
     License: GPL (contains code from other GPL shaders).
-->
<shader language="GLSL">
   <texture id="phosphorLUT" file="480pvert.png" filter="linear"/>
   <vertex><![CDATA[
      void main()
      {
         gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
         gl_TexCoord[0].xy = gl_MultiTexCoord0.xy;
         gl_TexCoord[1].xy = gl_MultiTexCoord1.xy;
      }
   ]]></vertex>
   <fragment filter="linear" outscale="2.0"><![CDATA[
      uniform sampler2D rubyTexture;
      uniform sampler2D phosphorLUT;

      void main()
      {
         vec4 frame = texture2D(rubyTexture, gl_TexCoord[0].xy);
       vec4 inverse = 1 - texture2D(rubyTexture, gl_TexCoord[0].xy);
         vec4 screen = texture2D(phosphorLUT, gl_TexCoord[1].xy);
       
         gl_FragColor = screen - inverse;
      }
   ]]></fragment>
  <vertex><![CDATA[
  
  
//--------------------------------------------------------------------------//
// CONFIG :
// uncomment next line to zoom the picture removing 
// horizontal and vertical overscan

// #define CROP_OVERSCAN

// CONFIG END.
//--------------------------------------------------------------------------//

  

    uniform vec2        rubyTextureSize;
    uniform vec2         rubyInputSize ;
    uniform vec2         rubyOutputSize;
    
    void main() {
    vec4 pos = ftransform();
#ifdef CROP_OVERSCAN
      pos.w=0.94;
#endif    
    gl_Position=pos;
    gl_TexCoord[0] = gl_MultiTexCoord0;      
    }
    

  ]]></vertex>

  <fragment filter="nearest"><![CDATA[
  
  

//--------------------------------------------------------------------------//
// CONFIG :
  
// a dirty fix for higan-accuracy because it handles 
// changes in game resolution differently
//
// #define HIGAN_ACCURACY


  
  
// select scaling Kernel
// 0 - Gaussian ( default )
// 1 - Spline ( almost like Gaussian with a KERNEL_WIDTH = 1.88561808316 
//                but without the ( barely visible) checkerboard effect.  slower)
// 2 - Bilinear
//
#define KERNEL 0


// for gaussian kernel only :
//    define Kernel width and height ( how big an original pixel becomes)
//    higher values cause more blur, might be beneficial for some games.
//    lower values cause the gap between the original pixels to become visible
//    reducing this value for the height only produces a scanline effect 
//    valid range : [ 0.5 : 4.0 ] 
//    default is 1.88561808316  = (4.0f*sqrt(2.0f)/3.0f) 
//
#define KERNEL_WIDTH 1.88561808316

//    you can set a different value for the kernel height if needed 
//    otherwise KERNEL_WIDTH will be used for both
// 
 //#define KERNEL_HEIGHT 1.0


// desired gamma value for the emulated CRT-TV
// default 2.2
//
#define GAMMA_IN 2.2

// the gamma value of the current display device
// default 2.2
//
#define GAMMA_OUT 2.2

// CONFIG END.
//--------------------------------------------------------------------------//
 

    uniform sampler2D     rubyTexture;
    uniform vec2          rubyTextureSize;
    uniform vec2         rubyInputSize ;
    uniform vec2         rubyOutputSize;
    
    
#define pi            3.14159265358
//uncommenting this line might give some speed-up on Nvidia GPUs
// #pragma optionNV(unroll all)
#ifndef KERNEL_HEIGHT
#define KERNEL_HEIGHT KERNEL_WIDTH
#endif



#if (KERNEL==1)
#define KERNEL_X(x) ((abs(x)<0.5)?(0.75-(abs(x)*abs(x))):((abs(x)<1.5)?(0.5*(1.5-abs(x))*(1.5-abs(x))):0.0))
#define KERNEL_Y(x) KERNEL_X(x)
#define MIN_X -1.0
#define MAX_X 3.0
#define MIN_Y -1.0
#define MAX_Y 3.0
#elif (KERNEL==2)
#define KERNEL_X(x) (min(max((1.0-abs(x)),0.0),1.0))
#define KERNEL_Y(x) KERNEL_X(x)
#define MIN_X -1.0
#define MAX_X 2.0
#define MIN_Y -1.0
#define MAX_Y 2.0
#else
#define KERNEL_X(x) ((sqrt(2.0)/KERNEL_WIDTH)*(exp(-2.0*pi*(x)*(x)/(KERNEL_WIDTH*KERNEL_WIDTH))))
#define KERNEL_Y(y) ((sqrt(2.0)/KERNEL_HEIGHT)*(exp(-2.0*pi*(y)*(y)/(KERNEL_HEIGHT*KERNEL_HEIGHT))))
#define MIN_X max((floor(-KERNEL_WIDTH)+1.0),-3.0)
#define MAX_X min((ceil(KERNEL_WIDTH)+1.0),5.0)
#define MIN_Y max((floor(-KERNEL_HEIGHT)+1.0),-3.0)
#define MAX_Y min((ceil(KERNEL_HEIGHT)+1.0),5.0)
#endif
#define STUDIOSWING(c) min(max((c - ( 16.0 / 255.0 ) )* ( 255.0 / 219.0 ), 0.0),1.0)
#define DECODE_GAMMA(c0) pow(c0,GAMMA_IN)
#define ENCODE_GAMMA(c0) pow(c0,(1.0/GAMMA_OUT))


void main() {
    vec2 oneT = 1.0 / rubyTextureSize ;
    vec2 offset =(gl_TexCoord[0].xy *rubyTextureSize );
#ifdef HIGAN_ACCURACY
    offset.x /= 2.0;
     oneT.x*=2.0;
#endif
    offset -= vec2(0.5,0.5);
    offset-=floor(offset);
    vec3 tempColor = vec3(0.0, 0.0, 0.0);
    float X,Y,fX,Xcoord;
    for (float i = MIN_X; i < MAX_X; i++) {
        X = (offset.x - i);        
        fX = KERNEL_X(X);
        Xcoord = X * oneT.x;                    
        for (float j = MIN_Y; j < MAX_Y; j++) {
            Y = (offset.y - j);                    
            vec2 sourceCoord = gl_TexCoord[0].xy - vec2(Xcoord,Y * oneT.y);                    
            vec3 c=texture2D(rubyTexture, sourceCoord).xyz;
            c.x=DECODE_GAMMA(STUDIOSWING(c.x));
            c.y=DECODE_GAMMA(STUDIOSWING(c.y));
            c.z=DECODE_GAMMA(STUDIOSWING(c.z));
            tempColor += c* fX * KERNEL_Y(Y);    
        }
    }    
        
    tempColor.x=ENCODE_GAMMA(tempColor.x);
    tempColor.y=ENCODE_GAMMA(tempColor.y);
    tempColor.z=ENCODE_GAMMA(tempColor.z);
    
    gl_FragColor=vec4(tempColor,1.0);}


    ]]></fragment>
   <vertex><![CDATA[
attribute vec2 rubyOrigTexCoord;
varying vec2 orig_tex;

      void main()
      {
         gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
         orig_tex = rubyOrigTexCoord;
         gl_TexCoord[1].xy = gl_MultiTexCoord1.xy;
      }
   ]]></vertex>
   <fragment filter="linear" outscale="2.0"><![CDATA[
      uniform sampler2D rubyOrigTexture;
      uniform sampler2D phosphorLUT;
     varying vec2 orig_tex;
     uniform float brightness;

      void main()
      {
      float brightness = 1.2;
         vec4 frame = texture2D(rubyOrigTexture, orig_tex);
       vec4 inverse = 1 - (brightness * texture2D(rubyOrigTexture, orig_tex));
         vec4 screen = texture2D(phosphorLUT, gl_TexCoord[1].xy);
       
         gl_FragColor = screen - inverse;
      }
   ]]></fragment>
   
   <vertex><![CDATA[
    attribute vec2 rubyPass1TexCoord;
    attribute vec2 rubyPass2TexCoord;
    varying vec2 pass1_tex;
    varying vec2 pass2_tex;

    void main() {
       gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
       gl_TexCoord[0] = gl_MultiTexCoord0;
       pass1_tex = rubyPass1TexCoord;
       pass2_tex = rubyPass2TexCoord;
    }
  ]]></vertex>
    <fragment filter="linear"><![CDATA[
    uniform sampler2D rubyPass1Texture; // Result from Pass 1.
     uniform sampler2D rubyPass2Texture; // Result from Pass 2.
     uniform sampler2D rubyTexture; // Result from Pass 3 (previous pass).
     varying vec2 pass1_tex;
     varying vec2 pass2_tex;
    
    //#define LOWRES

     void main() {
        vec4 pass1 = texture2D(rubyPass1Texture, pass1_tex);
        vec4 pass2 = texture2D(rubyPass2Texture, pass2_tex);
      vec4 pass3 = texture2D(rubyTexture, gl_TexCoord[0].xy);
   #ifdef LOWRES
      gl_FragColor = 1.0 - (1.0 - pass1) * (1.0 - pass2) * (1.0 - pass3) * (1.0- pass2);
   #else
      gl_FragColor = 1.0 - (1.0 - pass1) * (1.0 - pass2) * (1.0 - pass3);
   #endif
     }
  ]]></fragment>
   </shader>

I believe you can pretty reliably deal with the brightness by adding a multiplication factor to the ‘inverse’ part that gets subtracted from the LUT. I’ve done so and added a new variable called ‘brightness’ where you can change it as you see fit. Usually values between 1.2 and 1.5 are good. You can see it in the two variants above.

Incidentally, the CRT-phosphorLUT shader is single-pass, so it should play nicely with the FBO / 2-pass shader options for adding other effects on top, such as the hires-blend shader that fixes the SNES’ pseudo-transparency.

Those look nice, but I’m still of the opinion that cgwg’s shader doesn’t scale as well as yours, and I couldn’t get the GTU variant to show scanlines (do you think its implementation of blur is better than yours?), so instead I grabbed the brightness code from those new variants and slapped it into your old code. Here’s the result with your scanline variant:

And here’s without scanlines:

As usual, ignore the tiling issue at the top of the screenshots. They only show up on the shots, not the actual emulator.

The brightness variable helps a lot, although I’m hesitant to raise it much more, as I’m sure on a real 4K display, it’ll get blended in and look much better and not as dark. I really like how the scanlines look on the first shot. Pretty subtle, yet very dynamic as on a real shadow mask CRT. I also took a shot with the aperture LUT, and the scanlines are a little more visible, just as expected from a Trinitron.

Edit: Oops, just realized the first shot was the one I took with the aperture LUT. Using the 240pvert LUT, the scanlines barely appear at all, so it’s kinda meh. Well, it hardly matter, since few people associate shadow mask CRTs with noticeable scanlines the way they do with aperture grille Trinitrons anyway.

Those shots look pretty great. That’s interesting that the scanline variant is so much bloomier than the non-scanlined one. Would you mind pastebinning both of those so I can compare them? I’ve got so many versions floating around on my machine that I’m having trouble keeping straight which versions are which.

As for the GTU shader, the scanlines it adds are not totally black like the ones I added (from maister’s scanline shader), so they may be getting perceptually lost in the shuffle somewhere. The blur in his shader is actually the exact same as mine, theoretically (they both use gaussian with variable weights), but I think his looks better at lower scales than mine. He also does some more adjustment, though, beyond just the blur, so that might be interfering at large scales.

Overall, I’ve been pretty pleased with these results. Now I just need a display big enough and a video card fast enough to do it justice :stuck_out_tongue:

No scanlines:

http://pastebin.com/qf6HBBmf

Scanlines:

http://pastebin.com/4D9mX4xj

My thoughts on the matter are, unless we can get more natural-looking scanlines to work with the shadow mask xxxpvert.png LUTs, my recommendation is to use the scanlines variant with the aperture LUTs only, and the regular, non-scanlines variant with the shadow mask LUTs.

In any case, here’s a shot of the regular shader with a slightly modified CRT-Geom LUT so the whites weren’t as overpowering.

As far as I can see, this is as good as it’s gonna get for 4x, at least for those who don’t like scanlines. Only problem is the searing whites. While the other hi-res LUTs suffer from being too dark, this one at this scale suffers from being too bright around the whites. Scaling down brightness doesn’t do much about this. Any way to fix this?

All this time, I had a silly error in the gaussian passes that was making it only blur vertically and not horizontally. >.<

Anyway… here’s one that incorporates the phosphor LUTs, scanlines and maister’s NTSC shader:

<?xml version="1.0" encoding="UTF-8"?>
<!-- 
     NTSC and scanline code
     Author: Themaister
     Gaussian Blur code
     Author: cgwg
     PhosphorLUT code
     Author: hunterk
    
    This shader uses an external lookup texture (LUT) to create a shadow mask with individual RGB phosphor lenses.
    You can swap out the LUTs by changing the 'file' referenced in Line 16.
    Adjust the 'brightness' variable toward the end of the file if the LUT makes things too dark.
     License: GPLv3 (contains code from other GPL shaders).
-->
<shader language="GLSL" style="GLES2">
   <texture id="phosphorLUT" file="480pvert.png" filter="linear"/>
   
   <vertex><![CDATA[
      #version 120
      uniform mat4 rubyMVPMatrix;
      attribute vec2 rubyVertexCoord;
      attribute vec2 rubyTexCoord;
      varying vec2 tex_coord;

      varying vec2 pix_no;
      uniform vec2 rubyTextureSize;
      uniform vec2 rubyInputSize;
      uniform vec2 rubyOutputSize;

      void main()
      {
         gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
         tex_coord = rubyTexCoord;
         pix_no = rubyTexCoord * rubyTextureSize * (rubyOutputSize / rubyInputSize);
      }
   ]]></vertex>
   <fragment filter="nearest" scale_x="4.0" scale_y="1.0" frame_count_mod="2" float_framebuffer="true"><![CDATA[
      #version 120
      varying vec2 tex_coord;
      uniform sampler2D rubyTexture;
      uniform int rubyFrameCount;
      varying vec2 pix_no;

#define PI 3.14159265
#define CHROMA_MOD_FREQ (0.4 * PI)
#define CHROMA_AMP 1.0
#define ENCODE_GAMMA (1.0 / 2.2)

      const mat3 yiq_mat = mat3(
         0.2989, 0.5959, 0.2115,
         0.5870, -0.2744, -0.5229,
         0.1140, -0.3216, 0.3114);

      vec3 rgb2yiq(vec3 col)
      {
         return yiq_mat * col;
      }

      void main()
      {
         vec3 col = texture2D(rubyTexture, tex_coord).rgb;
         vec3 yiq = rgb2yiq(pow(col, vec3(ENCODE_GAMMA)));

         float chroma_phase = PI * 0.6667 * (mod(pix_no.y, 3.0) + float(rubyFrameCount));
         float mod_phase = chroma_phase + pix_no.x * CHROMA_MOD_FREQ;

         float i_mod = CHROMA_AMP * cos(mod_phase);
         float q_mod = CHROMA_AMP * sin(mod_phase);

         yiq = vec3(yiq.x, yiq.y * i_mod, yiq.z * q_mod);
         gl_FragColor = vec4(yiq, 1.0);
      }
   ]]></fragment>

   <!-- 2nd pass - Create composite signal,
        low-pass and demodulate separately -->
   <vertex><![CDATA[
      #version 120
      uniform mat4 rubyMVPMatrix;
      attribute vec2 rubyVertexCoord;
      attribute vec2 rubyTexCoord;
      uniform vec2 rubyTextureSize;
      uniform vec2 rubyOutputSize;

      varying vec2 tex_coord;

      varying vec2 pix_no;

      void main()
      {
         gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
         tex_coord = rubyTexCoord;
         pix_no = rubyTexCoord * rubyTextureSize;
      }
   ]]></vertex>
   <fragment filter="nearest" scale="1.0" frame_count_mod="2" float_framebuffer="true"><![CDATA[
      #version 120
      uniform sampler2D rubyTexture;
      uniform vec2 rubyTextureSize;
      uniform int rubyFrameCount;
      varying vec2 tex_coord;

      varying vec2 pix_no;

#define PI 3.14159265
#define CHROMA_MOD_FREQ (0.4 * PI)

#define CHROMA_AMP 1.0
#define SATURATION 1.0
#define BRIGHTNESS 1.0
#define chroma_mod (2.0 * SATURATION / CHROMA_AMP)

      const float filter[9] = float[9](
         0.0019, 0.0031, -0.0108, 0.0, 0.0407,
         -0.0445, -0.0807, 0.2913, 0.5982
      );

      vec3 fetch_offset(float offset, float one_x)
      {
         return texture2D(rubyTexture, tex_coord + vec2(offset * one_x, 0.0)).xyz;
      }

      void main()
      {
         float one_x = 1.0 / rubyTextureSize.x;
         float chroma_phase = PI * 0.6667 * (mod(pix_no.y, 3.0) + float(rubyFrameCount));
         float mod_phase = chroma_phase + pix_no.x * CHROMA_MOD_FREQ;

         float signal = 0.0;
         for (int i = 0; i < 8; i++)
         {
            float offset = float(i);
            float sums =
               dot(fetch_offset(offset - 8.0, one_x), vec3(1.0)) +
               dot(fetch_offset(8.0 - offset, one_x), vec3(1.0));

            signal += sums * filter[i];
         }
         signal += dot(texture2D(rubyTexture, tex_coord).xyz, vec3(1.0)) * filter[8];

         float i_mod = chroma_mod * cos(mod_phase);
         float q_mod = chroma_mod * sin(mod_phase);

         vec3 out_color = vec3(signal) * vec3(BRIGHTNESS, i_mod, q_mod);
         gl_FragColor = vec4(out_color, 1.0);
      }
   ]]></fragment>

   
   <vertex><![CDATA[
      #version 120
      uniform mat4 rubyMVPMatrix;
      attribute vec2 rubyVertexCoord;
      attribute vec2 rubyTexCoord;
      varying vec2 tex_coord;

      void main()
      {
         gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
         tex_coord = rubyTexCoord;
      }
   ]]></vertex>
   <fragment scale="1.0" filter="nearest"><![CDATA[
      #version 120
      varying vec2 tex_coord;
      uniform sampler2D rubyTexture;
      uniform vec2 rubyTextureSize;

#define NTSC_GAMMA 2.2

      const float luma_filter[9] = float[9](
         -0.0020, -0.0009, 0.0038, 0.0178, 0.0445,
         0.0817, 0.1214, 0.1519, 0.1634
      );

      const float chroma_filter[9] = float[9](
         0.0046, 0.0082, 0.0182, 0.0353, 0.0501,
         0.0832, 0.1062, 0.1222, 0.1280
      );

      const mat3 yiq2rgb_mat = mat3(
         1.0, 1.0, 1.0,
         0.956, -0.2720, -1.1060,
         0.6210, -0.6474, 1.7046);

      vec3 yiq2rgb(vec3 yiq)
      {
         return yiq2rgb_mat * yiq;
      }

      vec3 fetch_offset(float offset, float one_x)
      {
         return texture2D(rubyTexture, tex_coord + vec2(offset * one_x, 0.0)).xyz;
      }

      void main()
      {
         float one_x = 1.0 / rubyTextureSize.x;
         vec3 signal = vec3(0.0);
         for (int i = 0; i < 8; i++)
         {
            float offset = float(i);

            vec3 sums = fetch_offset(offset - 8.0, one_x) +
               fetch_offset(8.0 - offset, one_x);

            signal += sums * vec3(luma_filter[i], chroma_filter[i], chroma_filter[i]);
         }
         signal += texture2D(rubyTexture, tex_coord).xyz *
            vec3(luma_filter[8], chroma_filter[8], chroma_filter[8]);

         vec3 rgb = pow(yiq2rgb(signal), vec3(NTSC_GAMMA));
         gl_FragColor = vec4(rgb, 1.0);
      }
   ]]></fragment>
 <vertex><![CDATA[
      uniform mat4 rubyMVPMatrix;
      uniform vec2 rubyTextureSize;
      uniform vec2 rubyInputSize;
      uniform vec2 rubyOutputSize;
      attribute vec2 rubyVertexCoord;
      attribute vec2 rubyTexCoord;
      attribute vec2 rubyLUTTexCoord;
      varying vec2 tex_coord;
      varying vec2 lut_coord;
      varying vec2 omega;

      void main()
      {
         gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
         tex_coord = rubyTexCoord;
         lut_coord = rubyLUTTexCoord;
         omega = vec2(3.1415 * rubyOutputSize.x * rubyTextureSize.x / rubyInputSize.x, 2.0 * 3.1415 * rubyTextureSize.y);
      }
   ]]></vertex>
      <fragment filter="linear" outscale="2.0"><![CDATA[
     uniform sampler2D rubyTexture;
     uniform sampler2D phosphorLUT;
     uniform vec2 rubyTextureSize;
     uniform vec2 rubyInputSize;
     uniform vec2 rubyOutputSize;
     varying vec2 tex_coord;
     varying vec2 lut_coord;
     varying vec2 omega;

     const float base_brightness = 0.95;
     const vec2 sine_comp = vec2(0.05, 0.85);

     void main()
      {
         vec4 frame = texture2D(rubyTexture, tex_coord.xy);
         vec4 inverse = 1 - texture2D(rubyTexture, tex_coord.xy);
         vec4 screen = texture2D(phosphorLUT, lut_coord.xy);
         vec4 final = screen - inverse;
         vec4 scanline = final * (base_brightness + dot(sine_comp * sin(tex_coord.xy * omega), vec2(1.0)));
         
     gl_FragColor = clamp(scanline, 0.0, 1.0);
       }
   ]]></fragment>
         <vertex><![CDATA[
      #version 120
      uniform mat4 rubyMVPMatrix;
      attribute vec2 rubyVertexCoord;
      attribute vec2 rubyTexCoord;
      varying vec2 tex_coord;

      varying vec2 pix_no;
      uniform vec2 rubyTextureSize;
      uniform vec2 rubyInputSize;
      uniform vec2 rubyOutputSize;

      void main()
      {
         gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
         tex_coord = rubyTexCoord;
         pix_no = rubyTexCoord * rubyTextureSize * (rubyOutputSize / rubyInputSize);
      }
   ]]></vertex>
   <fragment filter="linear" outscale="1.0"><![CDATA[
uniform sampler2D     rubyTexture;
uniform vec2 rubyTextureSize;
uniform vec2 rubyInputSize;
uniform vec2 rubyOutputSize;
varying vec2 tex_coord;

#define CRTgamma 2.5
#define display_gamma 2.0
#define TEX2D(c) pow(texture2D(rubyTexture,(c)),vec4(CRTgamma))

void main()
{
  vec2 xy = tex_coord.st;
  float oney = 1.0/rubyTextureSize.y;

  float wid = 2.0;

  float c1 = exp(-1.0/wid/wid);
  float c2 = exp(-4.0/wid/wid);
  float c3 = exp(-9.0/wid/wid);
  float c4 = exp(-16.0/wid/wid);
  float norm = 1.0 / (1.0 + 2.0*(c1+c2+c3+c4));

  vec4 sum = vec4(0.0);

  sum += TEX2D(xy + vec2(0.0, -4.0 * oney)) * vec4(c4);
  sum += TEX2D(xy + vec2(0.0, -3.0 * oney)) * vec4(c3);
  sum += TEX2D(xy + vec2(0.0, -2.0 * oney)) * vec4(c2);
  sum += TEX2D(xy + vec2(0.0, -1.0 * oney)) * vec4(c1);
  sum += TEX2D(xy);
  sum += TEX2D(xy + vec2(0.0, +1.0 * oney)) * vec4(c1);
  sum += TEX2D(xy + vec2(0.0, +2.0 * oney)) * vec4(c2);
  sum += TEX2D(xy + vec2(0.0, +3.0 * oney)) * vec4(c3);
  sum += TEX2D(xy + vec2(0.0, +4.0 * oney)) * vec4(c4);

  gl_FragColor = pow(sum*vec4(norm),vec4(1.0/display_gamma));
}
]]></fragment>
   <vertex><![CDATA[
      #version 120
      uniform mat4 rubyMVPMatrix;
      attribute vec2 rubyVertexCoord;
      attribute vec2 rubyTexCoord;
      varying vec2 tex_coord;

      varying vec2 pix_no;
      uniform vec2 rubyTextureSize;
      uniform vec2 rubyInputSize;
      uniform vec2 rubyOutputSize;

      void main()
      {
         gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
         tex_coord = rubyTexCoord;
         pix_no = rubyTexCoord * rubyTextureSize * (rubyOutputSize / rubyInputSize);
      }
   ]]></vertex>
   <fragment filter="linear" outscale="1.0"><![CDATA[
uniform sampler2D     rubyTexture;
uniform vec2 rubyTextureSize;
uniform vec2 rubyInputSize;
uniform vec2 rubyOutputSize;
varying vec2 tex_coord;

#define CRTgamma 2.5
#define display_gamma 2.0
#define TEX2D(c) pow(texture2D(rubyTexture,(c)),vec4(CRTgamma))

void main()
{
  vec2 xy = tex_coord.st;
  float oney = 1.0/rubyTextureSize.x;

  float wid = 2.0;

  float c1 = exp(-1.0/wid/wid);
  float c2 = exp(-4.0/wid/wid);
  float c3 = exp(-9.0/wid/wid);
  float c4 = exp(-16.0/wid/wid);
  float norm = 1.0 / (1.0 + 2.0*(c1+c2+c3+c4));

  vec4 sum = vec4(0.0);

  sum += TEX2D(xy + vec2(0.0, -4.0 * oney)) * vec4(c4);
  sum += TEX2D(xy + vec2(0.0, -3.0 * oney)) * vec4(c3);
  sum += TEX2D(xy + vec2(0.0, -2.0 * oney)) * vec4(c2);
  sum += TEX2D(xy + vec2(0.0, -1.0 * oney)) * vec4(c1);
  sum += TEX2D(xy);
  sum += TEX2D(xy + vec2(0.0, +1.0 * oney)) * vec4(c1);
  sum += TEX2D(xy + vec2(0.0, +2.0 * oney)) * vec4(c2);
  sum += TEX2D(xy + vec2(0.0, +3.0 * oney)) * vec4(c3);
  sum += TEX2D(xy + vec2(0.0, +4.0 * oney)) * vec4(c4);

  gl_FragColor = pow(sum*vec4(norm),vec4(1.0/display_gamma));
}
]]></fragment>
<vertex><![CDATA[
attribute vec2 rubyOrigTexCoord;
varying vec2 orig_tex;
      uniform mat4 rubyMVPMatrix;
      attribute vec2 rubyVertexCoord;
      attribute vec2 rubyTexCoord;
      varying vec2 lut_coord;
      attribute vec2 rubyLUTTexCoord;
      varying vec2 tex_coord;

      void main()
      {
         gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
         orig_tex = rubyOrigTexCoord;
         lut_coord.xy = rubyLUTTexCoord.xy;
      }
   ]]></vertex>
   <fragment filter="linear" outscale="1.0"><![CDATA[
      uniform sampler2D rubyOrigTexture;
      uniform sampler2D phosphorLUT;
     varying vec2 orig_tex;
     uniform float brightness;
     varying vec2 lut_coord;

      void main()
      {
      float brightness = 1.1;
         vec4 frame = texture2D(rubyOrigTexture, orig_tex);
       vec4 inverse = 1 - (brightness * texture2D(rubyOrigTexture, orig_tex));
         vec4 screen = texture2D(phosphorLUT, lut_coord);
       
         gl_FragColor = screen - inverse;
      }
   ]]></fragment>
      <vertex><![CDATA[
         uniform mat4 rubyMVPMatrix;
      attribute vec2 rubyVertexCoord;
    attribute vec2 rubyPass4TexCoord;
    attribute vec2 rubyPass5TexCoord;
    varying vec2 pass4_tex;
    varying vec2 pass5_tex;
    attribute vec2 rubyTexCoord;
    varying vec2 tex_coord;

    void main() {
       gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
       tex_coord = rubyTexCoord;
       pass4_tex = rubyPass4TexCoord;
       pass5_tex = rubyPass5TexCoord;
    }
  ]]></vertex>
    <fragment filter="linear"><![CDATA[
    uniform sampler2D rubyPass4Texture; // Result from Pass 4.
     uniform sampler2D rubyPass5Texture; // Result from Pass 5.
     uniform sampler2D rubyTexture; // Result from Pass 3 (previous pass).
     varying vec2 pass4_tex;
     varying vec2 pass5_tex;
     varying vec2 tex_coord;

     void main() {
        vec4 pass4 = texture2D(rubyPass4Texture, pass4_tex);
        vec4 pass5 = texture2D(rubyPass5Texture, pass5_tex);
      vec4 pass6 = texture2D(rubyTexture, tex_coord.xy);

      gl_FragColor = 1.0 - (1.0 - pass4) * (1.0 - pass5);
     }
  ]]></fragment>
      </shader>

This is using his old scanline code. I’ll post another version using his newer ntsc+scanline code whenever i get a chance.

Pretty cool stuff.

I have a 1360x768 monitor, and I want to use fullscreen and stretching. Is there any hope for me? ._.

What was the issue with the horizontal blur? And can you post a shader that doesn’t have the NTSC filter built in? I’d like to test a few things.

this line:

  float oney = 1.0/rubyTextureSize.y;

should have the .y changed to .x in one of them. I lifted those passes from another shader and never looked closely enough to notice -_-

EDIT: looks like it needs more than just that change. I’ll have to keep working on it.

Has there been any more work on this? I actually really like the concept of how the SMB3 shot I posted looks, but the lines could be blended better (especially around edges), and I still wonder what could be done about the bright colors being so overpowering.

any chance to get the halation effect from “crt-geom-halation-*.shader” as a standalone cg implmentation? I’d love to combine it with some other cg files.

I believe the halation is just a regular ol’ gaussian blur in two passes. If that is indeed the case, you can grab the gaussian passes from the Cg version of my phosphorLUT shader.

hmm, not really. gaussian blur smears everything, that’s not really the effect I’m looking for. to use an image from your blog:

aside from the scanlines there is a nice halo effect of bright pixels without blurring the edges too much. I took a look into the crt-geom shader but I couldn’t find a specific halation part. maybe it’s a secondary effect of the specific scanline calculation.

Here’s the code to the halation shader (the halation part comes from the first two fragment shaders):

<?xml version="1.0" encoding="UTF-8"?>
<!--
    CRT shader

    Copyright (C) 2010-2012 cgwg, Themaister and DOLLS

    This program is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by the Free
    Software Foundation; either version 2 of the License, or (at your option)
    any later version.
    -->
<shader language="GLSL">
<fragment filter="nearest" scale="1.0"><![CDATA[
uniform sampler2D     rubyTexture;
uniform vec2 rubyTextureSize;
uniform vec2 rubyInputSize;
uniform vec2 rubyOutputSize;

#define CRTgamma 2.5
#define display_gamma 2.2
#define TEX2D(c) pow(texture2D(rubyTexture,(c)),vec4(CRTgamma))

void main()
{
  vec2 xy = gl_TexCoord[0].st;
  float oney = 1.0/rubyTextureSize.y;

  float wid = 2.0;

  float c1 = exp(-1.0/wid/wid);
  float c2 = exp(-4.0/wid/wid);
  float c3 = exp(-9.0/wid/wid);
  float c4 = exp(-16.0/wid/wid);
  float norm = 1.0 / (1.0 + 2.0*(c1+c2+c3+c4));

  vec4 sum = vec4(0.0);

  sum += TEX2D(xy + vec2(0.0, -4.0 * oney)) * vec4(c4);
  sum += TEX2D(xy + vec2(0.0, -3.0 * oney)) * vec4(c3);
  sum += TEX2D(xy + vec2(0.0, -2.0 * oney)) * vec4(c2);
  sum += TEX2D(xy + vec2(0.0, -1.0 * oney)) * vec4(c1);
  sum += TEX2D(xy);
  sum += TEX2D(xy + vec2(0.0, +1.0 * oney)) * vec4(c1);
  sum += TEX2D(xy + vec2(0.0, +2.0 * oney)) * vec4(c2);
  sum += TEX2D(xy + vec2(0.0, +3.0 * oney)) * vec4(c3);
  sum += TEX2D(xy + vec2(0.0, +4.0 * oney)) * vec4(c4);

  gl_FragColor = pow(sum*vec4(norm),vec4(1.0/display_gamma));
}
]]></fragment>
<fragment filter="nearest" scale="1.0"><![CDATA[
uniform sampler2D     rubyTexture;
uniform vec2 rubyTextureSize;
uniform vec2 rubyInputSize;
uniform vec2 rubyOutputSize;

#define display_gamma 2.2
#define TEX2D(c) pow(texture2D(rubyTexture,(c)),vec4(display_gamma))

void main()
{
  vec2 xy = gl_TexCoord[0].st;
  float onex = 1.0/rubyTextureSize.x;

  float wid = 2.0;

  float c1 = exp(-1.0/wid/wid);
  float c2 = exp(-4.0/wid/wid);
  float c3 = exp(-9.0/wid/wid);
  float c4 = exp(-16.0/wid/wid);
  float norm = 1.0 / (1.0 + 2.0*(c1+c2+c3+c4));

  vec4 sum = vec4(0.0);

  sum += TEX2D(xy + vec2(-4.0 * onex, 0.0)) * vec4(c4);
  sum += TEX2D(xy + vec2(-3.0 * onex, 0.0)) * vec4(c3);
  sum += TEX2D(xy + vec2(-2.0 * onex, 0.0)) * vec4(c2);
  sum += TEX2D(xy + vec2(-1.0 * onex, 0.0)) * vec4(c1);
  sum += TEX2D(xy);
  sum += TEX2D(xy + vec2(+1.0 * onex, 0.0)) * vec4(c1);
  sum += TEX2D(xy + vec2(+2.0 * onex, 0.0)) * vec4(c2);
  sum += TEX2D(xy + vec2(+3.0 * onex, 0.0)) * vec4(c3);
  sum += TEX2D(xy + vec2(+4.0 * onex, 0.0)) * vec4(c4);

  gl_FragColor = pow(sum*vec4(norm),vec4(1.0/display_gamma));
}
]]></fragment>
<vertex><![CDATA[
varying float CRTgamma;
varying float monitorgamma;
varying vec2 overscan;
varying vec2 aspect;
varying float d;
varying float R;
varying float cornersize;
varying float cornersmooth;

varying vec3 stretch;
varying vec2 sinangle;
varying vec2 cosangle;

uniform vec2 rubyOrigInputSize;
uniform vec2 rubyOrigTextureSize;
uniform vec2 rubyOutputSize;
uniform vec2 rubyInputSize;

varying vec2 texCoord;
varying vec2 one;
varying float mod_factor;
varying vec2 ilfac;

#define FIX(c) max(abs(c), 1e-5);

float intersect(vec2 xy)
{
  float A = dot(xy,xy)+d*d;
  float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d);
  float C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
  return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}

vec2 bkwtrans(vec2 xy)
{
  float c = intersect(xy);
  vec2 point = vec2(c)*xy;
  point -= vec2(-R)*sinangle;
  point /= vec2(R);
  vec2 tang = sinangle/cosangle;
  vec2 poc = point/cosangle;
  float A = dot(tang,tang)+1.0;
  float B = -2.0*dot(poc,tang);
  float C = dot(poc,poc)-1.0;
  float a = (-B+sqrt(B*B-4.0*A*C))/(2.0*A);
  vec2 uv = (point-a*sinangle)/cosangle;
  float r = R*acos(a);
  return uv*r/sin(r/R);
}

vec2 fwtrans(vec2 uv)
{
  float r = FIX(sqrt(dot(uv,uv)));
  uv *= sin(r/R)/r;
  float x = 1.0-cos(r/R);
  float D = d/R + x*cosangle.x*cosangle.y+dot(uv,sinangle);
  return d*(uv*cosangle-x*sinangle)/D;
}

vec3 maxscale()
{
  vec2 c = bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.x*cosangle.y));
  vec2 a = vec2(0.5,0.5)*aspect;
  vec2 lo = vec2(fwtrans(vec2(-a.x,c.y)).x,
         fwtrans(vec2(c.x,-a.y)).y)/aspect;
  vec2 hi = vec2(fwtrans(vec2(+a.x,c.y)).x,
         fwtrans(vec2(c.x,+a.y)).y)/aspect;
  return vec3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
}


void main()
{

  // START of parameters

  // gamma of simulated CRT
  CRTgamma = 2.4;
  // gamma of display monitor (typically 2.2 is correct)
  monitorgamma = 2.2;
  // overscan (e.g. 1.02 for 2% overscan)
  overscan = vec2(1.01,1.01);
  // aspect ratio
  aspect = vec2(1.0, 0.75);
  // lengths are measured in units of (approximately) the width of the monitor
  // simulated distance from viewer to monitor
  d = 1.5;
  // radius of curvature
  R = 1.5;
  // tilt angle in radians
  // (behavior might be a bit wrong if both components are nonzero)
  const vec2 angle = vec2(0.0,-0.15);
  // size of curved corners
  cornersize = 0.03;
  // border smoothness parameter
  // decrease if borders are too aliased
  cornersmooth = 80.0;

  // END of parameters

  // Do the standard vertex processing.
  gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;

  // Precalculate a bunch of useful values we'll need in the fragment
  // shader.
  sinangle = sin(angle);
  cosangle = cos(angle);
  stretch = maxscale();

  // Texture coords.
  texCoord = gl_MultiTexCoord0.xy;
  
  ilfac = vec2(1.0,floor(rubyInputSize.y/200.0));

  // The size of one texel, in texture-coordinates.
  one = ilfac / rubyOrigTextureSize;

  // Resulting X pixel-coordinate of the pixel we're drawing.
  mod_factor = texCoord.x * rubyOrigTextureSize.x * rubyOutputSize.x / rubyOrigInputSize.x;            
}
]]></vertex>
<fragment filter="linear" outscale="1.0"><![CDATA[
uniform int rubyFrameCount;

// Comment the next line to disable interpolation in linear gamma (and gain speed).
//#define LINEAR_PROCESSING

// Enable screen curvature.
#define CURVATURE

// Enable 3x oversampling of the beam profile
#define OVERSAMPLE

// Use the older, purely gaussian beam profile
//#define USEGAUSSIAN

// Macros.
#define FIX(c) max(abs(c), 1e-5);
#define PI 3.141592653589

#ifdef LINEAR_PROCESSING
#       define TEX2D(c) pow(texture2D(rubyOrigTexture, (c)), vec4(CRTgamma))
#else
#       define TEX2D(c) texture2D(rubyOrigTexture, (c))
#endif

uniform sampler2D rubyOrigTexture;
uniform vec2 rubyOrigInputSize;
uniform vec2 rubyOrigTextureSize;

uniform sampler2D rubyTexture;
uniform vec2 rubyInputSize;
uniform vec2 rubyTextureSize;

varying vec2 texCoord;
varying vec2 one;
varying float mod_factor;
varying vec2 ilfac;

varying float CRTgamma;
varying float monitorgamma;

varying vec2 overscan;
varying vec2 aspect;

varying float d;
varying float R;

varying float cornersize;
varying float cornersmooth;

varying vec3 stretch;
varying vec2 sinangle;
varying vec2 cosangle;

float intersect(vec2 xy)
{
  float A = dot(xy,xy)+d*d;
  float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d);
  float C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
  return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}

vec2 bkwtrans(vec2 xy)
{
  float c = intersect(xy);
  vec2 point = vec2(c)*xy;
  point -= vec2(-R)*sinangle;
  point /= vec2(R);
  vec2 tang = sinangle/cosangle;
  vec2 poc = point/cosangle;
  float A = dot(tang,tang)+1.0;
  float B = -2.0*dot(poc,tang);
  float C = dot(poc,poc)-1.0;
  float a = (-B+sqrt(B*B-4.0*A*C))/(2.0*A);
  vec2 uv = (point-a*sinangle)/cosangle;
  float r = FIX(R*acos(a));
  return uv*r/sin(r/R);
}

vec2 transform(vec2 coord)
{
  coord *= rubyOrigTextureSize / rubyOrigInputSize;
  coord = vec2(1.0,1.0)*(coord-vec2(0.5))*aspect*stretch.z+stretch.xy;
  return (bkwtrans(coord)/overscan/aspect+vec2(0.5)) * rubyOrigInputSize / rubyOrigTextureSize;
}

float corner(vec2 coord)
{
  coord *= rubyOrigTextureSize / rubyOrigInputSize;
  coord = (coord - vec2(0.5)) * overscan + vec2(0.5);
  coord = min(coord, vec2(1.0)-coord) * aspect;
  vec2 cdist = vec2(cornersize);
  coord = (cdist - min(coord,cdist));
  float dist = sqrt(dot(coord,coord));
  return clamp((cdist.x-dist)*cornersmooth,0.0, 1.0);
}

// Calculate the influence of a scanline on the current pixel.
//
// 'distance' is the distance in texture coordinates from the current
// pixel to the scanline in question.
// 'color' is the colour of the scanline at the horizontal location of
// the current pixel.
vec4 scanlineWeights(float distance, vec4 color)
{
  // "wid" controls the width of the scanline beam, for each RGB channel
  // The "weights" lines basically specify the formula that gives
  // you the profile of the beam, i.e. the intensity as
  // a function of distance from the vertical center of the
  // scanline. In this case, it is gaussian if width=2, and
  // becomes nongaussian for larger widths. Ideally this should
  // be normalized so that the integral across the beam is
  // independent of its width. That is, for a narrower beam
  // "weights" should have a higher peak at the center of the
  // scanline than for a wider beam.
#ifdef USEGAUSSIAN
  vec4 wid = 0.3 + 0.1 * pow(color, vec4(3.0));
  vec4 weights = vec4(distance / wid);
  return 0.4 * exp(-weights * weights) / wid;
#else
  vec4 wid = 2.0 + 2.0 * pow(color, vec4(4.0));
  vec4 weights = vec4(distance / 0.3);
  return 1.4 * exp(-pow(weights * inversesqrt(0.5 * wid), wid)) / (0.6 + 0.2 * wid);
#endif
}

void main()
{
  // Here's a helpful diagram to keep in mind while trying to
  // understand the code:
  //
  //  |      |      |      |      |
  // -------------------------------
  //  |      |      |      |      |
  //  |  01  |  11  |  21  |  31  | <-- current scanline
  //  |      | @    |      |      |
  // -------------------------------
  //  |      |      |      |      |
  //  |  02  |  12  |  22  |  32  | <-- next scanline
  //  |      |      |      |      |
  // -------------------------------
  //  |      |      |      |      |
  //
  // Each character-cell represents a pixel on the output
  // surface, "@" represents the current pixel (always somewhere
  // in the bottom half of the current scan-line, or the top-half
  // of the next scanline). The grid of lines represents the
  // edges of the texels of the underlying texture.

  // Texture coordinates of the texel containing the active pixel.
#ifdef CURVATURE
  vec2 xy = transform(texCoord);
#else
  vec2 xy = texCoord;
#endif
  float cval = corner(xy);

  vec2 xy2 = ((xy*rubyOrigTextureSize/rubyOrigInputSize-vec2(0.5))*vec2(1.0,1.0)+vec2(0.5))*rubyInputSize/rubyTextureSize;
  // Of all the pixels that are mapped onto the texel we are
  // currently rendering, which pixel are we currently rendering?
  vec2 ilvec = vec2(0.0,ilfac.y > 1.5 ? mod(float(rubyFrameCount),2.0) : 0.0);
  vec2 ratio_scale = (xy * rubyTextureSize - vec2(0.5) + ilvec)/ilfac;
//  vec2 ratio_scale = xy * rubyOrigTextureSize - vec2(0.5);
#ifdef OVERSAMPLE
  float filter = fwidth(ratio_scale.y);
#endif
  vec2 uv_ratio = fract(ratio_scale);

  // Snap to the center of the underlying texel.
  xy = (floor(ratio_scale)*ilfac + vec2(0.5) - ilvec) / rubyTextureSize;
//  xy = (floor(ratio_scale) + vec2(0.5)) / rubyOrigTextureSize;

  // Calculate Lanczos scaling coefficients describing the effect
  // of various neighbour texels in a scanline on the current
  // pixel.
  vec4 coeffs = PI * vec4(1.0 + uv_ratio.x, uv_ratio.x, 1.0 - uv_ratio.x, 2.0 - uv_ratio.x);

  // Prevent division by zero.
  coeffs = FIX(coeffs);

  // Lanczos2 kernel.
  coeffs = 2.0 * sin(coeffs) * sin(coeffs / 2.0) / (coeffs * coeffs);

  // Normalize.
  coeffs /= dot(coeffs, vec4(1.0));

  // Calculate the effective colour of the current and next
  // scanlines at the horizontal location of the current pixel,
  // using the Lanczos coefficients above.
  vec4 col  = clamp(mat4(
             TEX2D(xy + vec2(-one.x, 0.0)),
             TEX2D(xy),
             TEX2D(xy + vec2(one.x, 0.0)),
             TEX2D(xy + vec2(2.0 * one.x, 0.0))) * coeffs,
            0.0, 1.0);
  vec4 col2 = clamp(mat4(
             TEX2D(xy + vec2(-one.x, one.y)),
             TEX2D(xy + vec2(0.0, one.y)),
             TEX2D(xy + one),
             TEX2D(xy + vec2(2.0 * one.x, one.y))) * coeffs,
            0.0, 1.0);

#ifndef LINEAR_PROCESSING
  col  = pow(col , vec4(CRTgamma));
  col2 = pow(col2, vec4(CRTgamma));
#endif

  // Calculate the influence of the current and next scanlines on
  // the current pixel.
  vec4 weights  = scanlineWeights(uv_ratio.y, col);
  vec4 weights2 = scanlineWeights(1.0 - uv_ratio.y, col2);
#ifdef OVERSAMPLE
  uv_ratio.y =uv_ratio.y+1.0/3.0*filter;
  weights = (weights+scanlineWeights(uv_ratio.y, col))/3.0;
  weights2=(weights2+scanlineWeights(abs(1.0-uv_ratio.y), col2))/3.0;
  uv_ratio.y =uv_ratio.y-2.0/3.0*filter;
  weights=weights+scanlineWeights(abs(uv_ratio.y), col)/3.0;
  weights2=weights2+scanlineWeights(abs(1.0-uv_ratio.y), col2)/3.0;
#endif
  vec3 mul_res  = (col * weights + col2 * weights2).rgb;
  mul_res += pow(texture2D(rubyTexture, xy2).rgb, vec3(monitorgamma))*0.1;
  mul_res *= vec3(cval);

  // dot-mask emulation:
  // Output pixels are alternately tinted green and magenta.
  vec3 dotMaskWeights = mix(
          vec3(1.0, 0.7, 1.0),
          vec3(0.7, 1.0, 0.7),
          floor(mod(mod_factor, 2.0))
      );
                    
  mul_res *= dotMaskWeights;

  // Convert the image gamma for display on our output device.
  mul_res = pow(mul_res, vec3(1.0 / monitorgamma));

  // Color the texel.
  gl_FragColor = vec4(mul_res, 1.0);
}
]]></fragment>
</shader>

Now, here’s that same shot with just those first two fragments applied:

The reason it’s not super-blurry in the shot you posted is because cgwg took the gaussian passes and then mixed the output of his CRT pass with them. That’s where I got the idea for my shader, which uses the same trick.

thanks for the explanation. I cannont quite reproduce this effect from crt-geom though. for example I wanna add a halation effect to a scaling shader like xBR. it would be nice if you or someone else could post an example cgp (and cg if necessary). I worked on mdapt (which is very elementary) but I’m still very new to the shader programming, so I don’t comprehend everything.

In the cgp, you would want to have the xBR shader first, followed by the two gaussian passes to get a halation effect applied to the smoothed image, then you would need another pass that applies xBR again to the original image and combines it with results of the first xBR+gaussian passes. If your halation/blur effect is strong enough, you could probably skip the initial xBR and apply the blurs to the raw image, since it will blur out the pixels anyway.

Either way, the key is applying the blur for halation and then applying xBR to the raw, original image instead of the blurred image. I know how to do that in GLSL (just add ‘uniform sampler2D rubyOrigTexture’ to xBR’s uniforms and replace xBR’s ‘texture2D(rubyTexture…’ with ‘texture2D(rubyOrigTexture…’) but not in Cg, unfortunately, and I don’t think any existing Cg shaders do such a thing for reference…

Perhaps maister can help.

no problem, thx for the explanation though. there are several cgp-shader in “common shaders” which reference to different passes. I’ll check it out how it works :wink:

@Sp00kyFox Ok, I think I’ve mostly got a handle on referencing the original image. It’s similar to referencing previous passes, except you need a struct and TEXUNIT assignment for “ORIG” instead of PREV*.

Here’s an example of a standalone Cg/cgp halation shader: http://www.mediafire.com/download/vecac7jzldmbvn4/Halation.zip NOTE: my texCoords are screwed up somehow on the Orig part. I threw in some hacky values that work for SNES. Genesis needs 2.0 , 4.0 instead… Additionally, you could opt for a simpler combine than the ‘screen’ method I used. Something like ‘(pass1 + pass2) / 1.5’ works well, too.

Here’s how it looks:

So, I think if you modify the xBR third pass to work from the original image and then combine with the blurred image similar to what I did in that example, you should be set.

nice explanation, thank you. I’ll check it out.