Oh, guest.r, that xBR-enhanced looks very nice! it’s a similar idea?
Hyllian has a deposterized version of xBR in the repo but it doesn’t seem to work on my Intel IGP.
The multipass slang version is much faster, I think, but here’s a single-pass Cg version of sabr-hybrid-deposterize you can try, papermanzero:
#pragma parameter minimum "Edge Thresh Min" 0.05 0.0 1.0 0.01
#pragma parameter maximum "Edge Thresh Max" 0.35 0.0 1.0 0.01
#ifdef PARAMETER_UNIFORM
uniform float minimum;
uniform float maximum;
#else
#define minimum 0.05
#define maximum 0.35
#endif
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Blur3x3Resize Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
/*
SABR v3.0 Shader
Joshua Street
Portions of this algorithm were taken from Hyllian's 5xBR v3.7c
shader.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
// PASS SETTINGS:
// gamma-management.h needs to know what kind of pipeline we're using and
// what pass this is in that pipeline. This will become obsolete if/when we
// can #define things like this in the .cgp preset file.
//#define GAMMA_ENCODE_EVERY_FBO
//#define FIRST_PASS
//#define LAST_PASS
//#define SIMULATE_CRT_ON_LCD
//#define SIMULATE_GBA_ON_LCD
//#define SIMULATE_LCD_ON_CRT
//#define SIMULATE_GBA_ON_CRT
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../../include/gamma-management.h"
#include "../../include/blur-functions.h"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct input
{
float2 video_size;
float2 texture_size;
float2 output_size;
float frame_count;
float frame_direction;
float frame_rotation;
};
struct out_vertex
{
float4 position : POSITION;
float2 tc : TEXCOORD0;
// float2 blur_dxdy : TEXCOORD1;
float4 xyp_1_2_3;
float4 xyp_5_10_15;
float4 xyp_6_7_8;
float4 xyp_9_14_9;
float4 xyp_11_12_13;
float4 xyp_16_17_18;
float4 xyp_21_22_23;
};
//////////////////////////////// CONSTANTS ///////////////////////////////
const static float4 Ai = float4( 1.0, -1.0, -1.0, 1.0);
const static float4 B45 = float4( 1.0, 1.0, -1.0, -1.0);
const static float4 C45 = float4( 1.5, 0.5, -0.5, 0.5);
const static float4 B30 = float4( 0.5, 2.0, -0.5, -2.0);
const static float4 C30 = float4( 1.0, 1.0, -0.5, 0.0);
const static float4 B60 = float4( 2.0, 0.5, -2.0, -0.5);
const static float4 C60 = float4( 2.0, 0.0, -1.0, 0.5);
const static float4 M45 = float4(0.4, 0.4, 0.4, 0.4);
const static float4 M30 = float4(0.2, 0.4, 0.2, 0.4);
const static float4 M60 = M30.yxwz;
const static float4 Mshift = float4(0.2, 0.2, 0.2, 0.2);
const static float coef = 2.0;
const static float4 threshold = float4(0.32, 0.32, 0.32, 0.32);
const static float3 lum = float3(0.21, 0.72, 0.07);
//////////////////////////////// SABR HELPER FUNCTIONS ///////////////////////////////
float4 lum_to(float3 v0, float3 v1, float3 v2, float3 v3) {
return float4(dot(lum, v0), dot(lum, v1), dot(lum, v2), dot(lum, v3));
}
float4 lum_df(float4 A, float4 B) {
return abs(A - B);
}
bool4 lum_eq(float4 A, float4 B) {
return lum_df(A, B) < float4(threshold);
}
float4 lum_wd(float4 a, float4 b, float4 c, float4 d, float4 e, float4 f, float4 g, float4 h) {
return lum_df(a, b) + lum_df(a, c) + lum_df(d, e) + lum_df(d, f) + 4.0 * lum_df(g, h);
}
float c_df(float3 c1, float3 c2) {
float3 df = abs(c1 - c2);
return df.r + df.g + df.b;
}
////////////////////////// EDGE-DETECT HELPER FUNCTIONS /////////////////////////
float thresh2(float thr1, float thr2 , float val) {
val = (val < thr1) ? 0.0 : val;
val = (val > thr2) ? 1.0 : val;
return val;
}
// averaged pixel intensity from 3 color channels
float avg_intensity(float4 pix) {
return dot(pix.rgb, float3(0.2126, 0.7152, 0.0722));
}
vec4 get_pixel(sampler2D tex, vec2 coords, float dx, float dy) {
return tex2D(tex, coords + float2(dx, dy));
}
// returns pixel color
float IsEdge(sampler2D tex, float2 coords, input IN){
float dxtex = 1.0 / IN.texture_size.x;//params.SourceSize.z;
float dytex = 1.0 / IN.texture_size.y;//params.SourceSize.w;
float pix[9];
int k = -1;
float delta;
// read neighboring pixel intensities
for (int i=-1; i<2; i++) {
for(int j=-1; j<2; j++) {
k++;
pix[k] = avg_intensity(get_pixel(tex, coords, float(i) * dxtex,
float(j) * dytex));
}
}
// average color differences around neighboring pixels
delta = (abs(pix[1]-pix[7])+
abs(pix[5]-pix[3]) +
abs(pix[0]-pix[8])+
abs(pix[2]-pix[6])
)/4.;
return thresh2(minimum, maximum,clamp(delta,0.0,1.0));
}
//////////////////////////////// VERTEX SHADER ///////////////////////////////
out_vertex main_vertex
(
float4 position : POSITION,
float4 color : COLOR,
float2 tc : TEXCOORD0,
uniform float4x4 modelViewProj,
uniform input IN
)
{
out_vertex OUT;
OUT.position = mul(modelViewProj, position);
OUT.tc = tc + float2(0.0000001, 0.0000001);
// Get the uv sample distance between output pixels. Blurs are not generic
// Gaussian resizers, and correct blurs require:
// 1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0.
// 2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0
// 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
// Arbitrary upsizing will be acceptable if filter_linearN = "true", and
// arbitrary downsizing will be acceptable if mipmap_inputN = "true" too.
// Only the "resize" blurs can manage this though, and they still aren't
// proper Gaussian resizers (because they sample between texels and always
// blur after resizing, not during).
// const float2 dxdy_scale = IN.video_size/IN.output_size;
// OUT.blur_dxdy = dxdy_scale/IN.texture_size;
float x = 1.0 / IN.texture_size.x;
float y = 1.0 / IN.texture_size.y;
OUT.xyp_1_2_3 = OUT.tc.xxxy + float4( -x, 0.0, x, -2.0 * y);
OUT.xyp_6_7_8 = OUT.tc.xxxy + float4( -x, 0.0, x, -y);
OUT.xyp_11_12_13 = OUT.tc.xxxy + float4( -x, 0.0, x, 0.0);
OUT.xyp_16_17_18 = OUT.tc.xxxy + float4( -x, 0.0, x, y);
OUT.xyp_21_22_23 = OUT.tc.xxxy + float4( -x, 0.0, x, 2.0 * y);
OUT.xyp_5_10_15 = OUT.tc.xyyy + float4(-2.0 * x, -y, 0.0, y);
OUT.xyp_9_14_9 = OUT.tc.xyyy + float4( 2.0 * x, -y, 0.0, y);
return OUT;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 main_fragment(in out_vertex VAR,
uniform sampler2D decal : TEXUNIT0, uniform input IN) : COLOR
{
//////////////////////////// BLUR FRAGMENT START /////////////////////////////
const float2 dxdy_scale = IN.video_size / IN.output_size * float2(2.0);
float2 blur_dxdy = dxdy_scale / IN.texture_size;
float3 color = tex2Dblur5x5(decal, VAR.tc, blur_dxdy);
// Encode and output the blurred image:
float4 Smooth = encode_output(float4(color, 1.0));
//////////////////////////// BLUR FRAGMENT END ///////////////////////////////
//////////////////////////// SABR FRAGMENT START /////////////////////////////
/*
Mask for algorithm
+-----+-----+-----+-----+-----+
| | 1 | 2 | 3 | |
+-----+-----+-----+-----+-----+
| 5 | 6 | 7 | 8 | 9 |
+-----+-----+-----+-----+-----+
| 10 | 11 | 12 | 13 | 14 |
+-----+-----+-----+-----+-----+
| 15 | 16 | 17 | 18 | 19 |
+-----+-----+-----+-----+-----+
| | 21 | 22 | 23 | |
+-----+-----+-----+-----+-----+
*/
// Store mask values
float3 P1 = tex2D(decal, VAR.xyp_1_2_3.xw ).rgb;
float3 P2 = tex2D(decal, VAR.xyp_1_2_3.yw ).rgb;
float3 P3 = tex2D(decal, VAR.xyp_1_2_3.zw ).rgb;
float3 P6 = tex2D(decal, VAR.xyp_6_7_8.xw ).rgb;
float3 P7 = tex2D(decal, VAR.xyp_6_7_8.yw ).rgb;
float3 P8 = tex2D(decal, VAR.xyp_6_7_8.zw ).rgb;
float3 P11 = tex2D(decal, VAR.xyp_11_12_13.xw).rgb;
float3 P12 = tex2D(decal, VAR.xyp_11_12_13.yw).rgb;
float3 P13 = tex2D(decal, VAR.xyp_11_12_13.zw).rgb;
float3 P16 = tex2D(decal, VAR.xyp_16_17_18.xw).rgb;
float3 P17 = tex2D(decal, VAR.xyp_16_17_18.yw).rgb;
float3 P18 = tex2D(decal, VAR.xyp_16_17_18.zw).rgb;
float3 P21 = tex2D(decal, VAR.xyp_21_22_23.xw).rgb;
float3 P22 = tex2D(decal, VAR.xyp_21_22_23.yw).rgb;
float3 P23 = tex2D(decal, VAR.xyp_21_22_23.zw).rgb;
float3 P5 = tex2D(decal, VAR.xyp_5_10_15.xy ).rgb;
float3 P10 = tex2D(decal, VAR.xyp_5_10_15.xz ).rgb;
float3 P15 = tex2D(decal, VAR.xyp_5_10_15.xw ).rgb;
float3 P9 = tex2D(decal, VAR.xyp_9_14_9.xy ).rgb;
float3 P14 = tex2D(decal, VAR.xyp_9_14_9.xz ).rgb;
float3 P19 = tex2D(decal, VAR.xyp_9_14_9.xw ).rgb;
// Store luminance values of each point
float4 p7 = lum_to(P7, P11, P17, P13);
float4 p8 = lum_to(P8, P6, P16, P18);
float4 p11 = p7.yzwx; // P11, P17, P13, P7
float4 p12 = lum_to(P12, P12, P12, P12);
float4 p13 = p7.wxyz; // P13, P7, P11, P17
float4 p14 = lum_to(P14, P2, P10, P22);
float4 p16 = p8.zwxy; // P16, P18, P8, P6
float4 p17 = p7.zwxy; // P11, P17, P13, P7
float4 p18 = p8.wxyz; // P18, P8, P6, P16
float4 p19 = lum_to(P19, P3, P5, P21);
float4 p22 = p14.wxyz; // P22, P14, P2, P10
float4 p23 = lum_to(P23, P9, P1, P15);
float2 fp = frac(VAR.tc * IN.texture_size);
float4 ma45 = smoothstep(C45 - M45, C45 + M45, Ai * fp.y + B45 * fp.x);
float4 ma30 = smoothstep(C30 - M30, C30 + M30, Ai * fp.y + B30 * fp.x);
float4 ma60 = smoothstep(C60 - M60, C60 + M60, Ai * fp.y + B60 * fp.x);
float4 marn = smoothstep(C45 - M45 + Mshift, C45 + M45 + Mshift, Ai * fp.y + B45 * fp.x);
float4 e45 = lum_wd(p12, p8, p16, p18, p22, p14, p17, p13);
float4 econt = lum_wd(p17, p11, p23, p13, p7, p19, p12, p18);
float4 e30 = lum_df(p13, p16);
float4 e60 = lum_df(p8, p17);
bool4 r45 = p12 != p13 && p12 != p17 && (
!lum_eq(p13, p7) && !lum_eq(p13, p8) ||
!lum_eq(p17, p11) && !lum_eq(p17, p16) ||
lum_eq(p12, p18) && (
!lum_eq(p13, p14) && !lum_eq(p13, p19) ||
!lum_eq(p17, p22) && !lum_eq(p17, p23)) ||
lum_eq(p12, p16) ||
lum_eq(p12, p8));
bool4 r30 = p12 != p16 && p11 != p16;
bool4 r60 = p12 != p8 && p7 != p8;
bool4 edr45 = e45 < econt && r45;
bool4 edrrn = e45 <= econt;
bool4 edr30 = e30 * coef <= e60 && r30;
bool4 edr60 = e60 * coef <= e30 && r60;
float4 final45 = float4(!edr30 && !edr60 && edr45);
float4 final30 = float4(edr45 && edr30 && !edr60);
float4 final60 = float4(edr45 && edr60 && !edr30);
float4 final36 = float4(edr45 && edr30 && edr60);
float4 finalrn = float4(!edr45 && edrrn);
float4 px = step(lum_df(p12, p17), lum_df(p12, p13));
float4 mac = final36 * max(ma30, ma60) + final30 * ma30 + final60 * ma60 + final45 * ma45 + finalrn * marn;
float3 res1 = P12;
res1 = lerp(res1, lerp(P13, P17, px.x), mac.x);
res1 = lerp(res1, lerp(P7 , P13, px.y), mac.y);
res1 = lerp(res1, lerp(P11, P7 , px.z), mac.z);
res1 = lerp(res1, lerp(P17, P11, px.w), mac.w);
float3 res2 = P12;
res2 = lerp(res2, lerp(P17, P11, px.w), mac.w);
res2 = lerp(res2, lerp(P11, P7 , px.z), mac.z);
res2 = lerp(res2, lerp(P7 , P13, px.y), mac.y);
res2 = lerp(res2, lerp(P13, P17, px.x), mac.x);
float4 Sharp = float4(lerp(res1, res2, step(c_df(P12, res1), c_df(P12, res2))), 1.0);
//////////////////////////////// SABR FRAGMENT END ////////////////////////////////
////////////////////////// EDGE-DETECT FRAGMENT START /////////////////////////////
float test = IsEdge(decal, VAR.tc, IN);
float4 hybrid = float4(0.0);
hybrid = (test > 0.01) ? Sharp : Smooth;
return hybrid;
}
I single-passed it with the intention of maybe integrating it into beetle-psx-HW, though that core still needs some uniforms added to be able to use it. I’ll also need to bring the #included functions into the main shader to make it self-contained and will probably do so for this one before committing it to the repo.