[REQUEST] Convert shader to GLSL

bootsector · 15 January 2017 03:33

Hi there!

Could someone convert the following shader to the GLSL format so I can use it on RetroArch for the OUYA?

https://github.com/libretro/common-shad … hyllian.cg

Thanks!

bootsector

hunterk · 15 January 2017 05:04

Here you go. I’m curious to hear whether you get 60 fps with it. I know it’s quite fast, so it wouldn’t surprise me!

// GLSL shader autogenerated by cg2glsl.py.
#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_VARYING     vec2 VARtexCoord;
COMPAT_VARYING     float _frame_rotation;
struct input_dummy {
    vec2 _video_size;
    vec2 _texture_size;
    vec2 _output_dummy_size;
    float _frame_count;
    float _frame_direction;
    float _frame_rotation;
};
struct out_vertex {
    vec2 VARtexCoord;
};
vec4 _oPosition1;
out_vertex _ret_0;
input_dummy _IN1;
vec4 _r0008;
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 TexCoord;
 
uniform mat4 MVPMatrix;
uniform int FrameDirection;
uniform int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
void main()
{
    vec2 _ps;
    out_vertex _TMP3;
    _ps = 1.00000000E+00/TextureSize;
    _r0008 = VertexCoord.x*MVPMatrix[0];
    _r0008 = _r0008 + VertexCoord.y*MVPMatrix[1];
    _r0008 = _r0008 + VertexCoord.z*MVPMatrix[2];
    _r0008 = _r0008 + VertexCoord.w*MVPMatrix[3];
    _oPosition1 = _r0008;
    _TMP3.VARtexCoord = TexCoord.xy + _ps*vec2( -4.90000010E-01, 0.00000000E+00);
    VARtexCoord = _TMP3.VARtexCoord;
    gl_Position = _r0008;
    return;
} 
#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_VARYING     vec2 VARtexCoord;
COMPAT_VARYING     float _frame_rotation;
struct input_dummy {
    vec2 _video_size;
    vec2 _texture_size;
    vec2 _output_dummy_size;
    float _frame_count;
    float _frame_direction;
    float _frame_rotation;
};
struct out_vertex {
    vec2 VARtexCoord;
};
vec4 _ret_0;
float _TMP9;
float _TMP8;
float _TMP7;
float _TMP6;
float _TMP14;
float _TMP13;
float _TMP12;
float _TMP11;
vec3 _TMP10;
vec3 _TMP15;
vec4 _TMP4;
vec4 _TMP3;
vec4 _TMP2;
vec4 _TMP1;
vec2 _TMP0;
out_vertex _VAR1;
uniform sampler2D Texture;
input_dummy _IN1;
vec2 _x0024;
vec2 _x0026;
vec2 _c0028;
vec2 _c0032;
vec2 _c0034;
vec4 _r0036;
vec4 _v0036;
vec3 _r0046;
vec2 _a0048;
vec3 _TMP53;
vec3 _TMP55;
vec3 _TMP63;
vec3 _TMP65;
vec3 _t0074;
vec3 _TMP75;
vec3 _x0076;
vec3 _x0082;
vec3 _TMP85;
vec3 _TMP91;
vec3 _x0092;
float _c0098;
float _a0100;
 
uniform int FrameDirection;
uniform int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
void main()
{
    vec2 _dx;
    vec2 _tc;
    vec2 _fp;
    vec3 _c11;
    vec3 _c12;
    vec3 _color;
    vec2 _pos;
    vec3 _lum;
    vec3 _d;
    float _mod_factor;
    vec3 _dotMaskWeights;
    _dx = vec2(1.00000000E+00/TextureSize.x, 0.00000000E+00);
    _x0024 = VARtexCoord*TextureSize;
    _TMP0 = floor(_x0024);
    _tc = (_TMP0 + vec2( 5.00000000E-01, 5.00000000E-01))/TextureSize;
    _x0026 = VARtexCoord*TextureSize;
    _fp = fract(_x0026);
    _c0028 = _tc - _dx;
    _TMP1 = COMPAT_TEXTURE(Texture, _c0028);
    _TMP2 = COMPAT_TEXTURE(Texture, _tc);
    _c0032 = _tc + _dx;
    _TMP3 = COMPAT_TEXTURE(Texture, _c0032);
    _c0034 = _tc + 2.00000000E+00*_dx;
    _TMP4 = COMPAT_TEXTURE(Texture, _c0034);
    _v0036 = vec4(_fp.x*_fp.x*_fp.x, _fp.x*_fp.x, _fp.x, 1.00000000E+00);
    _r0036.x = dot(vec4( -1.66666672E-01, 5.00000000E-01, -3.33333343E-01, 0.00000000E+00), _v0036);
    _r0036.y = dot(vec4( 5.00000000E-01, -1.00000000E+00, -5.00000000E-01, 1.00000000E+00), _v0036);
    _r0036.z = dot(vec4( -5.00000000E-01, 5.00000000E-01, 1.00000000E+00, 0.00000000E+00), _v0036);
    _r0036.w = dot(vec4( 1.66666672E-01, 0.00000000E+00, -1.66666672E-01, 0.00000000E+00), _v0036);
    _r0046 = _r0036.x*_TMP1.xyz;
    _r0046 = _r0046 + _r0036.y*_TMP2.xyz;
    _r0046 = _r0046 + _r0036.z*_TMP3.xyz;
    _r0046 = _r0046 + _r0036.w*_TMP4.xyz;
    _color = _r0046;
    _a0048 = _fp - vec2( 5.00000000E-01, 5.00000000E-01);
    _pos = abs(_a0048);
    _c11 = vec3( 1.00000001E-01, 1.00000001E-01, 1.00000001E-01) + _TMP2.xyz*vec3( 5.99999964E-01, 5.99999964E-01, 5.99999964E-01);
    _c12 = vec3( 1.00000001E-01, 1.00000001E-01, 1.00000001E-01) + _TMP3.xyz*vec3( 5.99999964E-01, 5.99999964E-01, 5.99999964E-01);
    _TMP55 = vec3(inversesqrt(_c11.x), inversesqrt(_c11.y), inversesqrt(_c11.z));
    _TMP53 = 1.00000000E+00/_TMP55;
    _TMP65 = vec3(inversesqrt(_c12.x), inversesqrt(_c12.y), inversesqrt(_c12.z));
    _TMP63 = 1.00000000E+00/_TMP65;
    _t0074 = 1.00000000E+00 - _TMP53;
    _lum = _TMP53 + _t0074*(_TMP63 - _TMP53);
    _x0076 = _pos.y/_lum;
    _TMP10 = min(vec3( 1.00000000E+00, 1.00000000E+00, 1.00000000E+00), _x0076);
    _TMP75 = max(vec3( 0.00000000E+00, 0.00000000E+00, 0.00000000E+00), _TMP10);
    _x0082 = 1.00000000E+00 - _TMP75;
    _TMP15 = min(vec3( 1.00000000E+00, 1.00000000E+00, 1.00000000E+00), _x0082);
    _TMP85 = max(vec3( 0.00000000E+00, 0.00000000E+00, 0.00000000E+00), _TMP15);
    _d = _TMP85*_TMP85*(vec3( 3.00000000E+00, 3.00000000E+00, 3.00000000E+00) - 2.00000000E+00*_TMP85);
    _d = 6.99999988E-01*(_d - 1.00000000E+00) + 1.00000000E+00;
    _color.z = _r0046.z*1.10000002E+00;
    _x0092 = _color*_d;
    _TMP10 = min(vec3( 1.00000000E+00, 1.00000000E+00, 1.00000000E+00), _x0092);
    _TMP91 = max(vec3( 0.00000000E+00, 0.00000000E+00, 0.00000000E+00), _TMP10);
    _mod_factor = (VARtexCoord.x*OutputSize.x*TextureSize.x)/InputSize.x;
    _a0100 = _mod_factor/2.00000000E+00;
    _TMP11 = abs(_a0100);
    _TMP12 = fract(_TMP11);
    _TMP13 = abs(2.00000000E+00);
    _c0098 = _TMP12*_TMP13;
    if (_mod_factor < 0.00000000E+00) { 
        _TMP14 = -_c0098;
    } else {
        _TMP14 = _c0098;
    } 
    _TMP6 = floor(_TMP14);
    _dotMaskWeights = vec3( 1.00000000E+00, 6.99999988E-01, 1.00000000E+00) + _TMP6*vec3( -3.00000012E-01, 3.00000012E-01, -3.00000012E-01);
    _TMP7 = pow(_TMP91.x, 2.40000010E+00);
    _TMP8 = pow(_TMP91.y, 2.40000010E+00);
    _TMP9 = pow(_TMP91.z, 2.40000010E+00);
    _color = vec3(_TMP7, _TMP8, _TMP9);
    _color.xyz = _color.xyz*_dotMaskWeights;
    _color = _color*1.50000000E+00;
    _TMP7 = pow(_color.x, 4.54545438E-01);
    _TMP8 = pow(_color.y, 4.54545438E-01);
    _TMP9 = pow(_color.z, 4.54545438E-01);
    _color = vec3(_TMP7, _TMP8, _TMP9);
    _ret_0 = vec4(_color.x, _color.y, _color.z, 1.00000000E+00);
    FragColor = _ret_0;
    return;
} 
#endif

bootsector · 15 January 2017 05:04

Thanks!

Tried to convert it under Windows using cg2glsl.py but got some errors. Did you have to modify the original file in order to convert it?

Unfortunately I’ve got only 20fps on the OUYA, so I think I need a better machine for running RetroArch now…

hunterk · 15 January 2017 05:04

Ah, too bad

I didn’t have to do anything to it, but I used the old Makefile.shaders (which is missing from the the RetroArch codebase now for some reason) and it converted just fine.

bootsector · 15 January 2017 05:05

hyllian has just released a low-end/lightweight version of this shader:

http://pastebin.com/5hfEXz8g

I would love to test this again on the OUYA if you help me to convert it again to GLSL! :rolleyes:

hunterk · 15 January 2017 05:05

Try this one:

// GLSL shader autogenerated by cg2glsl.py.
#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_VARYING     vec2 VARtexCoord;
COMPAT_VARYING     float _frame_rotation;
struct input_dummy {
    vec2 _video_size;
    vec2 _texture_size;
    vec2 _output_dummy_size;
    float _frame_count;
    float _frame_direction;
    float _frame_rotation;
};
struct out_vertex {
    vec2 VARtexCoord;
};
vec4 _oPosition1;
out_vertex _ret_0;
input_dummy _IN1;
vec4 _r0008;
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 TexCoord;
 
uniform mat4 MVPMatrix;
uniform int FrameDirection;
uniform int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
void main()
{
    vec2 _ps;
    out_vertex _TMP3;
    _ps = 1.00000000E+00/TextureSize;
    _r0008 = VertexCoord.x*MVPMatrix[0];
    _r0008 = _r0008 + VertexCoord.y*MVPMatrix[1];
    _r0008 = _r0008 + VertexCoord.z*MVPMatrix[2];
    _r0008 = _r0008 + VertexCoord.w*MVPMatrix[3];
    _oPosition1 = _r0008;
    _TMP3.VARtexCoord = TexCoord.xy + _ps*vec2( -4.90000010E-01, 0.00000000E+00);
    VARtexCoord = _TMP3.VARtexCoord;
    gl_Position = _r0008;
    return;
} 
#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_VARYING     vec2 VARtexCoord;
COMPAT_VARYING     float _frame_rotation;
struct input_dummy {
    vec2 _video_size;
    vec2 _texture_size;
    vec2 _output_dummy_size;
    float _frame_count;
    float _frame_direction;
    float _frame_rotation;
};
struct out_vertex {
    vec2 VARtexCoord;
};
vec4 _ret_0;
vec3 _TMP5;
float _TMP8;
float _TMP7;
float _TMP6;
float _TMP9;
vec4 _TMP4;
vec4 _TMP3;
vec4 _TMP2;
vec4 _TMP1;
vec2 _TMP0;
out_vertex _VAR1;
uniform sampler2D Texture;
input_dummy _IN1;
vec2 _x0018;
vec2 _x0020;
vec2 _c0022;
vec2 _c0026;
vec2 _c0028;
vec4 _r0030;
vec4 _v0030;
vec3 _r0040;
float _a0042;
float _x0044;
float _TMP47;
vec3 _a0054;
 
uniform int FrameDirection;
uniform int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
void main()
{
    vec2 _dx;
    vec2 _tc;
    vec2 _fp;
    vec3 _color;
    float _d;
    _dx = vec2(1.00000000E+00/TextureSize.x, 0.00000000E+00);
    _x0018 = VARtexCoord*TextureSize;
    _TMP0 = floor(_x0018);
    _tc = (_TMP0 + vec2( 5.00000000E-01, 5.00000000E-01))/TextureSize;
    _x0020 = VARtexCoord*TextureSize;
    _fp = fract(_x0020);
    _c0022 = _tc - _dx;
    _TMP1 = COMPAT_TEXTURE(Texture, _c0022);
    _TMP2 = COMPAT_TEXTURE(Texture, _tc);
    _c0026 = _tc + _dx;
    _TMP3 = COMPAT_TEXTURE(Texture, _c0026);
    _c0028 = _tc + 2.00000000E+00*_dx;
    _TMP4 = COMPAT_TEXTURE(Texture, _c0028);
    _v0030 = vec4(_fp.x*_fp.x*_fp.x, _fp.x*_fp.x, _fp.x, 1.00000000E+00);
    _r0030.x = dot(vec4( -5.00000000E-01, 1.00000000E+00, -5.00000000E-01, 0.00000000E+00), _v0030);
    _r0030.y = dot(vec4( 1.50000000E+00, -2.50000000E+00, 0.00000000E+00, 1.00000000E+00), _v0030);
    _r0030.z = dot(vec4( -1.50000000E+00, 2.00000000E+00, 5.00000000E-01, 0.00000000E+00), _v0030);
    _r0030.w = dot(vec4( 5.00000000E-01, -5.00000000E-01, 0.00000000E+00, 0.00000000E+00), _v0030);
    _r0040 = _r0030.x*_TMP1.xyz;
    _r0040 = _r0040 + _r0030.y*_TMP2.xyz;
    _r0040 = _r0040 + _r0030.z*_TMP3.xyz;
    _r0040 = _r0040 + _r0030.w*_TMP4.xyz;
    _a0042 = _fp.y - 5.00000000E-01;
    _d = abs(_a0042);
    _x0044 = 1.00000000E+00 - _d;
    _TMP9 = min(1.00000000E+00, _x0044);
    _TMP47 = max(0.00000000E+00, _TMP9);
    _d = _TMP47*_TMP47*(3.00000000E+00 - 2.00000000E+00*_TMP47);
    _a0054 = _r0040*_d;
    _TMP6 = pow(_a0054.x, 2.40000010E+00);
    _TMP7 = pow(_a0054.y, 2.40000010E+00);
    _TMP8 = pow(_a0054.z, 2.40000010E+00);
    _color = vec3(_TMP6, _TMP7, _TMP8);
    _TMP6 = pow(_color.x, 4.54545438E-01);
    _TMP7 = pow(_color.y, 4.54545438E-01);
    _TMP8 = pow(_color.z, 4.54545438E-01);
    _TMP5 = vec3(_TMP6, _TMP7, _TMP8);
    _ret_0 = vec4(_TMP5.x, _TMP5.y, _TMP5.z, 1.00000000E+00);
    FragColor = _ret_0;
    return;
} 
#endif

bootsector · 15 January 2017 05:05

Thanks again! But I’ve got pretty much the same performance on the OUYA!

Tatsuya79 · 15 January 2017 05:05

What’s the culprit for bad shader performance on mobile platform? memory bandwitdth? Is there any architecture that can do 60fps with a crt shader atm?

hunterk · 15 January 2017 05:05

I think Nvidia Shield / Tegra 4 can almost get full speed with cgwg’s CRT, which means Hyllian’s should likely be fine on it.

Tatsuya79 · 15 January 2017 05:05

Just tried your conversions Hunterk on my Galaxy S 3: -hyllian standard 35fps -hyllian fast 58~60fps!

Barely there! I wonder if it could be fast enough with a small overclock, I’m at standard speed.

There is still some little bugs: you can see like 3 different parts on the picture where the scanlines are slightly different in thickness. That’s with integer scale on. Could be the pentile amoled screen though…

Tatsuya79 · 15 January 2017 05:05

Tried a GPU overclock = no difference. Pushed the CPU from 1400MHz to 1600MHz = min framerate now at ~59.5fps.

So that’s almost it. A phone slightly faster than a Galaxy S 3 should run OK with Hyllian-LQ shader.

bootsector · 15 January 2017 05:12

Hi there!

It looks like this shader has been updated a lot since the last GLSL! Could someone please post the updated GLSL for it? Thanks!