I’ve just started learning Cg and I’ve been practicing by trying to port XML/GLSL shaders with varying success. One example is the gaussian scanlines part of maister’s NTSC+gaussian scanlines shader.
Isolating that part was easy for GLSL, and it leaves this code:
<?xml version="1.0" encoding="UTF-8"?>
<!-- 
   Gaussian Scanline
   Author: Themaister
   License: GPLv3
-->
<shader language="GLSL" style="GLES2">
   <vertex><![CDATA[
      uniform mat4 rubyMVPMatrix;
      uniform vec2 rubyTextureSize;
      attribute vec2 rubyVertexCoord;
      attribute vec2 rubyTexCoord;
      varying vec2 tex_coord;
      varying vec2 one;
      varying vec2 pix_no;
      void main()
      {
         gl_Position = rubyMVPMatrix * vec4(rubyVertexCoord, 0.0, 1.0);
         tex_coord = rubyTexCoord;
         one = 1.0 / rubyTextureSize;
         pix_no = rubyTexCoord * rubyTextureSize;
      }
   ]]></vertex>
   <fragment filter="nearest" scale_x="1.0" outscale_y="1.0"><![CDATA[
      uniform sampler2D rubyTexture;
      uniform vec2 rubyTextureSize;
      varying vec2 tex_coord;
      varying vec2 one;
      varying vec2 pix_no;
#define DISPLAY_GAMMA 2.1
#define CRT_GAMMA 2.5
#define TEX(off) pow(texture2D(rubyTexture, tex_coord + vec2(0.0, (off) * one.y)).rgb, vec3(CRT_GAMMA))
      void main()
      {
         vec3 frame0 = TEX(-2.0);
         vec3 frame1 = TEX(-1.0);
         vec3 frame2 = TEX(0.0);
         vec3 frame3 = TEX(1.0);
         vec3 frame4 = TEX(2.0);
         float offset_dist = fract(pix_no.y) - 0.5;
         float dist0 =  2.0 + offset_dist;
         float dist1 =  1.0 + offset_dist;
         float dist2 =  0.0 + offset_dist;
         float dist3 = -1.0 + offset_dist;
         float dist4 = -2.0 + offset_dist;
         vec3 scanline = frame0 * exp(-5.0 * dist0 * dist0);
         scanline += frame1 * exp(-5.0 * dist1 * dist1);
         scanline += frame2 * exp(-5.0 * dist2 * dist2);
         scanline += frame3 * exp(-5.0 * dist3 * dist3);
         scanline += frame4 * exp(-5.0 * dist4 * dist4);
         gl_FragColor = vec4(pow(1.15 * scanline, vec3(1.0 / DISPLAY_GAMMA)), 1.0);
      }
   ]]></fragment>
   </shader>
And it looks like this (ignore the weird black voids; that’s just my video card failing…):

When I tried to port it to Cg, I ended up with this code:
/* COMPATIBILITY 
   - HLSL compilers
   - Cg   compilers
*/
#define DISPLAY_GAMMA 2.1
#define CRT_GAMMA 2.5
#define TEX(off) pow(tex2D(IN.texture, texCoord + float2(0.0, (off) * one.y)).rgb, float3(CRT_GAMMA))
// VERTEX SHADER //
void main_vertex
(
    float4 position    : POSITION,
    float2 texCoord : TEXCOORD0,
    uniform float4x4 modelViewProj,
    out float4 oPosition : POSITION,
    out float2 otexCoord : TEXCOORD
)
{
    oPosition = mul(modelViewProj, position);
    otexCoord = texCoord;
}
struct input
{
  float2 video_size;
  float2 texCoord_size;
  float2 output_size;
  float frame_count;
  float frame_direction;
  float frame_rotation;
  sampler2D texture : TEXUNIT0;
};
struct output 
{
  float4 col    : COLOR;
};
// FRAGMENT SHADER //
output main_fragment(in float2 texCoord : TEXCOORD0,
uniform input IN
)
{
float2 pix_no = texCoord * IN.video_size;
float2 one = 1.0 / IN.video_size;
         float3 frame0 = TEX(-2.0);
         float3 frame1 = TEX(-1.0);
         float3 frame2 = TEX(0.0);
         float3 frame3 = TEX(1.0);
         float3 frame4 = TEX(2.0);
         float offset_dist = fract(pix_no.y) - 0.5;
         float dist0 =  2.0 + offset_dist;
         float dist1 =  1.0 + offset_dist;
         float dist2 =  0.0 + offset_dist;
         float dist3 = -1.0 + offset_dist;
         float dist4 = -2.0 + offset_dist;
         float3 scanline = frame0 * exp(-5.0 * dist0 * dist0);
         scanline += frame1 * exp(-5.0 * dist1 * dist1);
         scanline += frame2 * exp(-5.0 * dist2 * dist2);
         scanline += frame3 * exp(-5.0 * dist3 * dist3);
         scanline += frame4 * exp(-5.0 * dist4 * dist4);
         float4 final = float4(pow(1.15 * scanline, float3(1.0 / DISPLAY_GAMMA)), 1.0);
output OUT;
   OUT.col = final;
   return OUT;
}
And it sort of works, but not really:
 Can anyone suggest what I might be doing wrong? I assume it’s something fairly simple.
Can anyone suggest what I might be doing wrong? I assume it’s something fairly simple.
