Request for shader optimized for Android

The shaders in retroarch are beautiful but let’s face it,it is slow on android.

I understand that the reason for this is because the shaders were originally intended for powerful gpus, but that does not mean that all shaders(CRT etc) will always be slow on android.

PPSSPP for android also has shaders wherein a lot of these does not cause a single FPS drop even though each of these is beautiful,works properly and are not downgraded.

I request for shaders that are very optimized for android without becoming downgraded.

There’s not really any such thing as optimizing for Android. The GPUs in mobile devices simply aren’t very powerful right now but they’re improving all the time. The shaders for PPSSPP are just super old/simple (most are ports of old ePSXe shaders from circa 2008-9) and that’s why they run so fast.

sigh if only i could a.Port the said shaders to retroarch b.create new shaderS that are on the same vein as the mentioned ones but otherwise work fine on retroarch

ALAS,I’M HOPELESS

THANKS ANYWAYS

Which ones do you want?

jutst two both of which are from ppsspp

1.Crt (the one that gives an old tv look) 2.) Scalines(the one that gives off lines)

I know they are already in retroarch,but they are different from the one found in PPSSPP

Can you post a screenshot of each of those shaders? I have ported them to Cg successfully but they both look odd at the moment. For example, the scanline one looks very yellow and I suspect the vignetting isn’t quiet right. PPSSPP implements their framecounter differently from us, so I probably won’t be able to get it exactly the same, but I can probably get closer with a screenshot.

Can you state your hardware?

Here are the pics

my hardware is : 1.2 ghz dual core A9 768 mb RAM

13/32 and 15/32 mean 13fps out of 32 and 15fps out of 32, so, It’s running at 100% speed WITH frameskip. It drops 2fps with the shaders (agan, with frameskip), that means a 6% impact and it was already running at half framerate to begin with.

^you are wrong.The drop in the FPS is caused by taking screenshots.same thing applies to drastic and reicast…

Plus frameskip is enabled explaining why the fps is halved.

The two shaders do not cause any fps drop.Other shaders like the FXAA one slows emulation though

Ok… I guess

Ok, here’s what I have so far in Cg. I’ll try to convert them to GLSL this evening.

It seems the scanline one is actually correct, it just looks bad on things like Super Mario World…

/* COMPATIBILITY 
   - HLSL compilers
   - Cg   compilers
*/

/*
Scanline shader
ported from PPSSPP
*/

struct sine_coord
{
   float2 omega;
};

struct input
{
   float2 video_size;
   float2 texture_size;
   float2 output_size;
   float  frame_count;
   float  frame_direction;
   float frame_rotation;
   sampler2D texture;
};

void main_vertex
(
   float4 position : POSITION,
   out float4 oPosition : POSITION,
   uniform float4x4 modelViewProj,

   float4 color : COLOR,
   out float4 oColor : COLOR,

   float2 tex : TEXCOORD,
   out float2 oTex : TEXCOORD,

   uniform input IN,
   out sine_coord coords : TEXCOORD2
)
{
   oPosition = mul(modelViewProj, position);
   oColor = color;
   oTex = tex;
}

const float amount = 1.0; // suitable range = 0.0 - 1.0
const float intensity = 0.5; // suitable range = 0.0 - 1.0

float4 main_fragment (in float2 tex : TEXCOORD, uniform input IN) : COLOR
{
   float pos0 = ((tex.y + 1.0) * IN.texture_size.y * amount);
   float pos1 = cos((frac(pos0) - 0.5) * 3.1415926 * intensity) * 1.5;
   float4 rgb = tex2D(IN.texture, tex);
   
   // slight contrast curve
   float4 color = rgb * 0.5 + 0.5 * rgb * rgb * 1.2;
   
   // color tint
   color *= float4(0.9, 1.0, 0.7, 0.0);
   
   // vignette
   color *= 1.1 - 0.6 * (dot(tex - 0.5, tex - 0.5) * 2.0);
   
   return float4(lerp(float4(0.0), color, pos1));
}

And here’s the CRT one:

/* COMPATIBILITY 
   - HLSL compilers
   - Cg   compilers
*/

/*
CRT shader
ported from PPSSPP
*/

struct sine_coord
{
   float2 omega;
};

struct input
{
   float2 video_size;
   float2 texture_size;
   float2 output_size;
   float  frame_count;
   float  frame_direction;
   float frame_rotation;
   sampler2D texture;
};

void main_vertex
(
   float4 position : POSITION,
   out float4 oPosition : POSITION,
   uniform float4x4 modelViewProj,

   float4 color : COLOR,
   out float4 oColor : COLOR,

   float2 tex : TEXCOORD,
   out float2 oTex : TEXCOORD,

   uniform input IN
)
{
   oPosition = mul(modelViewProj, position);
   oColor = color;
   oTex = tex;
}

float4 main_fragment (in float2 tex : TEXCOORD, uniform input IN) : COLOR
{
   int vPos = int((tex.y + IN.frame_count * 1.0) * IN.texture_size.y); // IN.texture_size.y is a hardcoded 272.0 in ppsspp, but I assumed that was meant to be the native vertical resolution.
   float line_intensity = fmod(float(vPos), 1.0);
   
   //color shift
   float off = line_intensity * 0.0005;
   float2 shift = float2(off, 0.0);
   
   //shift R and G channels to simulate color bleed
   float2 colorShift = float2(0.001, 0.0);
	float r = tex2D(IN.texture, tex + colorShift + shift).r;
	float g = tex2D(IN.texture, tex - colorShift + shift).g;
	float b = tex2D(IN.texture, tex).b;
	
	float4 c = float4(r, g * 0.99, b, 1.0) * clamp(line_intensity, 0.85, 1.0);
	float rollbar = sin((tex.y + IN.frame_count) * 4.0);
   
   return float4(c + (rollbar * 0.02));
}

Let me know if the flickering stuff doesn’t look right in the CRT one and I can keep playing with the values. I couldn’t find any videos of how it’s supposed to look.

EDIT: hmm. they don’t seem to want to convert programatically… I may try to convert them by hand.

wow,thanks for my request

Have you converted them to glsl?After all,my request was for android.

TIA

Ok, here’s the scanline one:

#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_VARYING     vec2 VARtexCoord;
COMPAT_VARYING     float _frame_rotation;
struct input_dummy {
    vec2 _video_size;
    vec2 _texture_size;
    vec2 _output_dummy_size;
    float _frame_count;
    float _frame_direction;
    float _frame_rotation;
};
struct out_vertex {
    vec2 VARtexCoord;
};
vec4 _oPosition1;
out_vertex _ret_0;
input_dummy _IN1;
vec4 _r0008;
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 TexCoord;
 
uniform mat4 MVPMatrix;
uniform int FrameDirection;
uniform int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
void main()
{
    vec2 _ps;
    out_vertex _TMP3;
    _ps = 1.00000000E+00/TextureSize;
    _r0008 = VertexCoord.x*MVPMatrix[0];
    _r0008 = _r0008 + VertexCoord.y*MVPMatrix[1];
    _r0008 = _r0008 + VertexCoord.z*MVPMatrix[2];
    _r0008 = _r0008 + VertexCoord.w*MVPMatrix[3];
    _oPosition1 = _r0008;
    _TMP3.VARtexCoord = TexCoord.xy + _ps*vec2( -4.90000010E-01, 0.00000000E+00);
    VARtexCoord = _TMP3.VARtexCoord;
    gl_Position = _r0008;
    return;
} 
#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_VARYING     vec2 VARtexCoord;
COMPAT_VARYING     float _frame_rotation;
struct input_dummy {
    vec2 _video_size;
    vec2 _texture_size;
    vec2 _output_dummy_size;
    float _frame_count;
    float _frame_direction;
    float _frame_rotation;
};
struct out_vertex {
    vec2 VARtexCoord;
};
vec4 _ret_0;
out_vertex _VAR1;
uniform sampler2D Texture;
input_dummy _IN1;
 
uniform int FrameDirection;
uniform int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;

float amount = 1.0; // suitable range = 0.0 - 1.0
float intensity = 0.5; // suitable range = 0.0 - 1.0

void main()
{
float pos0 = ((VARtexCoord.y + 1.0) * TextureSize.y * amount);
  float pos1 = cos((fract( pos0 ) - 0.5)*3.1415926*intensity)*1.5;
  vec4 rgb = COMPAT_TEXTURE(Texture, VARtexCoord );
  
  // slight contrast curve
  vec4 color = rgb*0.5+0.5*rgb*rgb*1.2;
  
  // color tint
  color *= vec4(0.9,1.0,0.7, 0.0);
  
  // vignette
  color *= 1.1 - 0.6 * (dot(VARtexCoord - 0.5, VARtexCoord - 0.5) * 2.0);

  _ret_0 = mix(vec4(0,0,0,0), color, pos1);
    FragColor = _ret_0;
    return;
} 
#endif

I got the CRT one converted, as well, except it seems the framecount uniform is completely broken in glsl :open_mouth:

I’ll talk to maister about it and see if he can fix it. Here it is in its broken state for if/when it gets fixed:

#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_VARYING     vec2 VARtexCoord;
COMPAT_VARYING     float _frame_rotation;
struct input_dummy {
    vec2 _video_size;
    vec2 _texture_size;
    vec2 _output_dummy_size;
    float _frame_count;
    float _frame_direction;
    float _frame_rotation;
};
struct out_vertex {
    vec2 VARtexCoord;
};
vec4 _oPosition1;
out_vertex _ret_0;
input_dummy _IN1;
vec4 _r0008;
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 TexCoord;
 
uniform mat4 MVPMatrix;
uniform int FrameDirection;
uniform int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
void main()
{
    vec2 _ps;
    out_vertex _TMP3;
    _ps = 1.00000000E+00/TextureSize;
    _r0008 = VertexCoord.x*MVPMatrix[0];
    _r0008 = _r0008 + VertexCoord.y*MVPMatrix[1];
    _r0008 = _r0008 + VertexCoord.z*MVPMatrix[2];
    _r0008 = _r0008 + VertexCoord.w*MVPMatrix[3];
    _oPosition1 = _r0008;
    _TMP3.VARtexCoord = TexCoord.xy + _ps*vec2( -4.90000010E-01, 0.00000000E+00);
    VARtexCoord = _TMP3.VARtexCoord;
    gl_Position = _r0008;
    return;
} 
#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_VARYING     vec2 VARtexCoord;
COMPAT_VARYING     float _frame_rotation;
struct input_dummy {
    vec2 _video_size;
    vec2 _texture_size;
    vec2 _output_dummy_size;
    float _frame_count;
    float _frame_direction;
    float _frame_rotation;
};
struct out_vertex {
    vec2 VARtexCoord;
};
vec4 _ret_0;
out_vertex _VAR1;
uniform sampler2D Texture;
input_dummy _IN1;

uniform int FrameDirection;
uniform int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;

void main()
{
    // scanlines
    int vPos = int( ( VARtexCoord.y + FrameCount * 0.5 ) * TextureSize.y );
    float line_intensity = mod( float(vPos), 2.0 );
    
    // color shift
    float off = line_intensity * 0.0005;
    vec2 shift = vec2( off, 0.0 );
    
    // shift R and G channels to simulate NTSC color bleed
    vec2 colorShift = vec2( 0.001, 0.0 );
    float r = COMPAT_TEXTURE(Texture, VARtexCoord + colorShift + shift ).r;
    float g = COMPAT_TEXTURE(Texture, VARtexCoord - colorShift + shift ).g;
    float b = COMPAT_TEXTURE(Texture, VARtexCoord ).b;
    
    vec4 c = vec4( r, g * 0.99, b, 1.0 ) * clamp( line_intensity, 0.85, 1.0 );
    
    float rollbar = sin( ( VARtexCoord.y + FrameCount ) * 4.0 );
    _ret_0 = vec4(c + (rollbar * 0.02));
    FragColor = _ret_0;
    return;
} 
#endif

I tried the scanlines one. It’s fast but why did they make it that yellowish? And indeed, it worked great with sf2ce on fba but not with Batman on Nestopia (no scanlines, just a darker picture).

Wouldn’t this one be better (if it’s fast enough)?

Yeah, no idea why they want it yellow like that :confused:

It works on Nestopia (just checked it), the scanlines are just very faint and Batman is quite dark to begin with. Try it on something brighter, like SMB3.

Forgot the link above!

He could try it. Mobile GPUs tend to have problems with gaussian stuff for whatever reason, but I agree that it would probably suit most things much better. Here it is in GLSL:

// GLSL shader autogenerated by cg2glsl.py.
#if defined(VERTEX)

#if __VERSION__ >= 130
#define COMPAT_VARYING out
#define COMPAT_ATTRIBUTE in
#define COMPAT_TEXTURE texture
#else
#define COMPAT_VARYING varying 
#define COMPAT_ATTRIBUTE attribute 
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_VARYING     vec2 VARone;
COMPAT_VARYING     vec2 VARpix_no;
COMPAT_VARYING     vec2 VARtex;
struct data {
    vec2 VARtex;
    vec2 VARpix_no;
    vec2 VARone;
};
struct input_dummy {
    vec2 _video_size;
    vec2 _texture_size;
    vec2 _output_dummy_size;
    float _frame_count;
};
vec4 _oPosition1;
data _oData1;
input_dummy _IN1;
vec4 _r0006;
COMPAT_ATTRIBUTE vec4 VertexCoord;
COMPAT_ATTRIBUTE vec4 TexCoord;
 
uniform mat4 MVPMatrix;
uniform int FrameDirection;
uniform int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
void main()
{
    _r0006 = VertexCoord.x*MVPMatrix[0];
    _r0006 = _r0006 + VertexCoord.y*MVPMatrix[1];
    _r0006 = _r0006 + VertexCoord.z*MVPMatrix[2];
    _r0006 = _r0006 + VertexCoord.w*MVPMatrix[3];
    _oPosition1 = _r0006;
    VARtex = TexCoord.xy;
    VARpix_no = TexCoord.xy*TextureSize;
    VARone = 1.00000000E+00/TextureSize;
    gl_Position = _r0006;
} 
#elif defined(FRAGMENT)

#if __VERSION__ >= 130
#define COMPAT_VARYING in
#define COMPAT_TEXTURE texture
out vec4 FragColor;
#else
#define COMPAT_VARYING varying
#define FragColor gl_FragColor
#define COMPAT_TEXTURE texture2D
#endif

#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#define COMPAT_PRECISION mediump
#else
#define COMPAT_PRECISION
#endif
COMPAT_VARYING     vec2 VARone;
COMPAT_VARYING     vec2 VARpix_no;
COMPAT_VARYING     vec2 VARtex;
struct data {
    vec2 VARtex;
    vec2 VARpix_no;
    vec2 VARone;
};
struct input_dummy {
    vec2 _video_size;
    vec2 VARtexture_size;
    vec2 _output_dummy_size;
    float _frame_count;
};
vec4 _ret_0;
vec3 _TMP11;
float _TMP14;
float _TMP13;
float _TMP12;
float _TMP5;
vec4 _TMP4;
vec4 _TMP3;
vec4 _TMP2;
vec4 _TMP1;
vec4 _TMP0;
data _vertex1;
uniform sampler2D Texture;
vec2 _c0020;
vec2 _c0030;
vec2 _c0050;
vec2 _c0060;
float _TMP71;
float _x0072;
float _TMP75;
float _x0076;
float _TMP79;
float _x0080;
float _TMP83;
float _x0084;
float _TMP87;
float _x0088;
vec3 _a0092;
 
uniform int FrameDirection;
uniform int FrameCount;
uniform COMPAT_PRECISION vec2 OutputSize;
uniform COMPAT_PRECISION vec2 TextureSize;
uniform COMPAT_PRECISION vec2 InputSize;
void main()
{
    vec3 _frame0;
    vec3 _frame1;
    vec3 _frame2;
    vec3 _frame3;
    vec3 _frame4;
    float _offset_dist;
    float _dist0;
    float _dist1;
    float _dist3;
    float _dist4;
    vec3 _scanline;
    _c0020 = VARtex + vec2(0.00000000E+00, -2.00000000E+00*VARone.y);
    _TMP0 = COMPAT_TEXTURE(Texture, _c0020);
    _TMP12 = pow(_TMP0.x, 2.50000000E+00);
    _TMP13 = pow(_TMP0.y, 2.50000000E+00);
    _TMP14 = pow(_TMP0.z, 2.50000000E+00);
    _frame0 = vec3(_TMP12, _TMP13, _TMP14);
    _c0030 = VARtex + vec2(0.00000000E+00, -VARone.y);
    _TMP1 = COMPAT_TEXTURE(Texture, _c0030);
    _TMP12 = pow(_TMP1.x, 2.50000000E+00);
    _TMP13 = pow(_TMP1.y, 2.50000000E+00);
    _TMP14 = pow(_TMP1.z, 2.50000000E+00);
    _frame1 = vec3(_TMP12, _TMP13, _TMP14);
    _TMP2 = COMPAT_TEXTURE(Texture, VARtex);
    _TMP12 = pow(_TMP2.x, 2.50000000E+00);
    _TMP13 = pow(_TMP2.y, 2.50000000E+00);
    _TMP14 = pow(_TMP2.z, 2.50000000E+00);
    _frame2 = vec3(_TMP12, _TMP13, _TMP14);
    _c0050 = VARtex + vec2(0.00000000E+00, VARone.y);
    _TMP3 = COMPAT_TEXTURE(Texture, _c0050);
    _TMP12 = pow(_TMP3.x, 2.50000000E+00);
    _TMP13 = pow(_TMP3.y, 2.50000000E+00);
    _TMP14 = pow(_TMP3.z, 2.50000000E+00);
    _frame3 = vec3(_TMP12, _TMP13, _TMP14);
    _c0060 = VARtex + vec2(0.00000000E+00, 2.00000000E+00*VARone.y);
    _TMP4 = COMPAT_TEXTURE(Texture, _c0060);
    _TMP12 = pow(_TMP4.x, 2.50000000E+00);
    _TMP13 = pow(_TMP4.y, 2.50000000E+00);
    _TMP14 = pow(_TMP4.z, 2.50000000E+00);
    _frame4 = vec3(_TMP12, _TMP13, _TMP14);
    _TMP5 = fract(VARpix_no.y);
    _offset_dist = _TMP5 - 5.00000000E-01;
    _dist0 = 2.00000000E+00 + _offset_dist;
    _dist1 = 1.00000000E+00 + _offset_dist;
    _dist3 = -1.00000000E+00 + _offset_dist;
    _dist4 = -2.00000000E+00 + _offset_dist;
    _x0072 = -5.00000000E+00*_dist0*_dist0;
    _TMP71 = pow(2.71828198E+00, _x0072);
    _scanline = _frame0*_TMP71;
    _x0076 = -5.00000000E+00*_dist1*_dist1;
    _TMP75 = pow(2.71828198E+00, _x0076);
    _scanline = _scanline + _frame1*_TMP75;
    _x0080 = -5.00000000E+00*_offset_dist*_offset_dist;
    _TMP79 = pow(2.71828198E+00, _x0080);
    _scanline = _scanline + _frame2*_TMP79;
    _x0084 = -5.00000000E+00*_dist3*_dist3;
    _TMP83 = pow(2.71828198E+00, _x0084);
    _scanline = _scanline + _frame3*_TMP83;
    _x0088 = -5.00000000E+00*_dist4*_dist4;
    _TMP87 = pow(2.71828198E+00, _x0088);
    _scanline = _scanline + _frame4*_TMP87;
    _a0092 = 1.14999998E+00*_scanline;
    _TMP12 = pow(_a0092.x, 4.76190507E-01);
    _TMP13 = pow(_a0092.y, 4.76190507E-01);
    _TMP14 = pow(_a0092.z, 4.76190507E-01);
    _TMP11 = vec3(_TMP12, _TMP13, _TMP14);
    _ret_0 = vec4(_TMP11.x, _TMP11.y, _TMP11.z, 1.00000000E+00);
    FragColor = _ret_0;
    return;
} 
#endif

Thanks and you’re right, that’s slower. ~52fps on SGS3, even slower than Hyllian-LQ. But it’s working better on NES games than PPSSPP scanlines. I see the same problem I described here (the 3 parts bug). Perhaps a GPU / driver bug after all.

after trying out the scanline shader,i have to say that i am a bit disappointed because it is nowhere as close as how beautiful the ppsspp shader looks like.

Then again,i only asked for speed therefore everything is very good

Thank you