HQX shader is giving me some problems

altmoola · 15 January 2017 03:47

I’m having an issue implementing an HQX filter that I found in the LibRetro GitHub repository.

LUT texture was obtained here
Shader file was obtained here (I attempted to convert it to compile in MonoGame with SM 5)

This is what I’m getting:

And this is what I’m expecting:

This is the relevant code:

    // loading the effect
    var effect = base.Content.Load<Effect>(@"Effects\hqx");
    var lutTexture = base.Content.Load<Texture2D>(@"Effects\hq4x");
    var projection = Matrix.CreateOrthographicOffCenter(0, 720, 480, 0, 0, 1);
    var halfPixelOffset = Matrix.CreateTranslation(0.5f, 0.5f, 0);


    effect.Parameters["modelViewProj"].SetValue(halfPixelOffset * projection);
    effect.Parameters["texture_size"].SetValue(new Vector2(240f, 160f));
    effect.Parameters["LutTexture"].SetValue(lutTexture);


    this.effect = effect;

Here is how I’ve configured the required LUT image:

Here is where I draw a pre-rendered texture using the effect:

    // renderTarget is a 240x160 texture that has the image draw to it first
    // the scaling is done by drawing the renderTarget at the higher resolution
    spriteBatch.Begin(SpriteSortMode.Deferred, BlendState.NonPremultiplied,
                      SamplerState.PointClamp, null, null, this.Effect);
    {
        spriteBatch.Draw(this.renderTarget, new Rectangle(0, 0, 720, 480), 
                         this.renderTarget.Bounds, Color.White);
    }
    spriteBatch.End();

This is the contents of the hqx.fx file:

    #define SCALE 4


    #define trY 48.0
    #define trU 7.0
    #define trV 6.0


    static float3 yuv_threshold = float3(trY/255.0, trU/255.0, trV/255.0);


    const static float3x3 yuv = float3x3(0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081);
    const static float3 yuv_offset = float3(0, 0.5, 0.5);


    bool diff(float3 yuv1, float3 yuv2) {
        bool3 res = abs((yuv1 + yuv_offset) - (yuv2 + yuv_offset)) > yuv_threshold;
        return res.x || res.y || res.z;
    }


    struct out_vertex {
        float4 position : SV_POSITION;
        float4 color    : COLOR;
        float2 texCoord : TEXCOORD0;
        float4 t1       : TEXCOORD1;
        float4 t2       : TEXCOORD2;
        float4 t3       : TEXCOORD3;
    };


    sampler2D decal : register(s0);
    sampler2D LUT : register(s1)
    { 
        Texture = (LutTexture);
        Filter = Linear;  
        AddressU = clamp;
        AddressV = clamp;
    };


    float2 texture_size;
    float4x4 modelViewProj;


    out_vertex main_vertex
    (
        float4 position    : SV_POSITION,
        float4 color    : COLOR,
        float2 texCoord : TEXCOORD0
    )
    {
        out_vertex OUT;


        OUT.position = mul(position, modelViewProj);
        OUT.color = color;


        float2 ps = 1.0/texture_size;
        float dx = ps.x;
        float dy = ps.y;


        OUT.texCoord = texCoord;
        OUT.t1 = texCoord.xxxy + float4(-dx, 0, dx, -dy); //  w1 | w2 | w3
        OUT.t2 = texCoord.xxxy + float4(-dx, 0, dx,   0); //  w4 | w5 | w6
        OUT.t3 = texCoord.xxxy + float4(-dx, 0, dx,  dy); //  w7 | w8 | w9


        return OUT;
    }
    float4 main_fragment(in out_vertex VAR) : COLOR
    {
        float2 fp = frac(VAR.texCoord*texture_size);
        float2 quad = sign(-0.5 + fp);
        float2 ps = 1.0/texture_size;


        float dx = ps.x;
        float dy = ps.y;
        float3 p1 = tex2D(decal, VAR.texCoord).rgb;
        float3 p2 = tex2D(decal, VAR.texCoord + float2(dx, dy) * quad).rgb;
        float3 p3 = tex2D(decal, VAR.texCoord + float2(dx, 0) * quad).rgb;
        float3 p4 = tex2D(decal, VAR.texCoord + float2(0, dy) * quad).rgb;
        float4x3 pixels = float4x3(p1, p2, p3, p4);


        float3 w1  = mul(yuv, tex2D(decal, VAR.t1.xw).rgb);
        float3 w2  = mul(yuv, tex2D(decal, VAR.t1.yw).rgb);
        float3 w3  = mul(yuv, tex2D(decal, VAR.t1.zw).rgb);


        float3 w4  = mul(yuv, tex2D(decal, VAR.t2.xw).rgb);
        float3 w5  = mul(yuv, p1);
        float3 w6  = mul(yuv, tex2D(decal, VAR.t2.zw).rgb);


        float3 w7  = mul(yuv, tex2D(decal, VAR.t3.xw).rgb);
        float3 w8  = mul(yuv, tex2D(decal, VAR.t3.yw).rgb);
        float3 w9  = mul(yuv, tex2D(decal, VAR.t3.zw).rgb);


        bool3x3 pattern = bool3x3(diff(w5, w1), diff(w5, w2), diff(w5, w3),
                                  diff(w5, w4), false       , diff(w5, w6),
                                  diff(w5, w7), diff(w5, w8), diff(w5, w9));
        bool4 cross = bool4(diff(w4, w2), diff(w2, w6), diff(w8, w4), diff(w6, w8));
        
        float2 index;
        index.x = dot(pattern[0], float3(1, 2, 4)) +
                  dot(pattern[1], float3(8, 0, 16)) +
                  dot(pattern[2], float3(32, 64, 128));
        index.y = dot(cross, float4(1, 2, 4, 8)) * (SCALE * SCALE) +
                  dot(floor(fp * SCALE), float2(1, SCALE));


        float2 step = 1.0 / float2(256.0, 16.0 * (SCALE * SCALE));
        float2 offset = step / 2.0;
        float4 weights = tex2D(LUT, index * step + offset);
        float sum = dot(weights, float4(1,1,1,1));
        float3 res = mul(transpose(pixels), weights / sum);


        return float4(res, 1.0);
    }


    technique T0
    {
        pass P0
        {
            VertexShader = compile vs_5_0 main_vertex();
            PixelShader = compile ps_5_0 main_fragment();
        }
    }

Can anyone help me figure out what is going on here? Any help is greatly appreciated.

hunterk · 15 January 2017 06:28

Hmm. Maybe the LUT’s getting read differently than expected? Have you tried flipping it vertically/horizontally?

Other than that, you might try raising the issue with Armada, the guy who wrote it, over in his repo for it:

EDIT: I’m also curious as to why you’re going with hqx instead of xbr? performance concerns?

altmoola · 15 January 2017 06:28

I feel like you are correct about the LUT texture being an issue. However rotating or flipping it hasn’t seemed to help. Is it safe to rotate that image in an image editor? I’ve been playing around with the weights that it gets from the LUT texture (by simply ignoring them or overriding them) and it basically just makes the texture a bit blurry.

For example, if I completely ignore the LUT weights and just override it like so:

float4 weights = float4(0.1,0.1,0.1,0.1);

I’ll get something like this:

Any idea how I could figure out what kind of weights the LUT texture should be pulling? Should I be generating mipmaps on the texture when it gets compiled (I currently am)?

I actually have already gotten the xbr shader working. I’m just trying to get the hqx one to work as well.

I did open up an issue located here: https://github.com/Armada651/hqx-shader/issues/1

But if you have any other ideas let me know.

hunterk · 15 January 2017 06:28

If it’s reading from the mipmap, that’s definitely going to be bad, so yeah, turning off mipmapping could be a good thing.

If you’re interested, you might also take a look at how the LUTs are generated:

altmoola · 15 January 2017 06:28

So the issue was that the PremultiplyAlpha setting should have been false in the Pipeline configuration (as well as turning off mipmaps).

Thanks for the help hunter.

hunterk · 15 January 2017 06:28

Oh nice. Easy solutions! That’s the best kind

np