e4s-game/gamedata/shaders/r2/shadow.hlsli
2026-06-18 01:18:29 +03:00

252 lines
7.3 KiB
HLSL

#ifndef SHADOW_H
#define SHADOW_H
#include "common.hlsli"
uniform sampler s_smap : register(ps, s0); // 2D/cube shadowmap
#define KERNEL .6f
//////////////////////////////////////////////////////////////////////////////////////////
// software
//////////////////////////////////////////////////////////////////////////////////////////
float sample_sw(float2 tc, float2 shift, float depth_cmp)
{
static const float ts = KERNEL / float(SMAP_size);
tc += shift * ts;
float texsize = SMAP_size;
float offset = 0.5f / texsize;
float2 Tex00 = tc + float2(-offset, -offset);
float2 Tex01 = tc + float2(-offset, offset);
float2 Tex10 = tc + float2(offset, -offset);
float2 Tex11 = tc + float2(offset, offset);
float4 depth = float4(
depth_cmp - tex2D(s_smap, Tex00).x,
depth_cmp - tex2D(s_smap, Tex01).x,
depth_cmp - tex2D(s_smap, Tex10).x,
depth_cmp - tex2D(s_smap, Tex11).x);
float4 compare = step(depth, 0.0f);
float2 fr = frac(Tex00 * texsize);
float2 ifr = float2(1.0f, 1.0f) - fr;
float4 fr4 = float4(ifr.x * ifr.y, ifr.x * fr.y, fr.x * ifr.y, fr.x * fr.y);
return dot(compare, fr4);
}
float shadow_sw(float4 tc)
{
float2 tc_dw = tc.xy / tc.w;
float4 s;
s.x = sample_sw(tc_dw, float2(-1, -1), tc.z);
s.y = sample_sw(tc_dw, float2(+1, -1), tc.z);
s.z = sample_sw(tc_dw, float2(-1, +1), tc.z);
s.w = sample_sw(tc_dw, float2(+1, +1), tc.z);
return dot(s, 1.h / 4.h);
}
//////////////////////////////////////////////////////////////////////////////////////////
// hardware + PCF
//////////////////////////////////////////////////////////////////////////////////////////
float sample_hw_pcf(float4 tc, float4 shift)
{
static const float ts = KERNEL / float(SMAP_size);
#ifndef SUNSHAFTS_DYNAMIC
return tex2Dproj(s_smap, tc + tc.w * shift * ts).x;
#else // SUNSHAFTS_DYNAMIC
float4 tc2 = tc / tc.w + shift * ts;
tc2.w = 0;
return tex2Dlod(s_smap, tc2);
#endif // SUNSHAFTS_DYNAMIC
}
float shadow_hw(float4 tc)
{
float s0 = sample_hw_pcf(tc, float4(-1.0f, -1.0f, 0.0f, 0.0f));
float s1 = sample_hw_pcf(tc, float4(+1.0f, -1.0f, 0.0f, 0.0f));
float s2 = sample_hw_pcf(tc, float4(-1.0f, +1.0f, 0.0f, 0.0f));
float s3 = sample_hw_pcf(tc, float4(+1.0f, +1.0f, 0.0f, 0.0f));
return (s0 + s1 + s2 + s3) / (4.h);
}
//////////////////////////////////////////////////////////////////////////////////////////
// hardware (ATI) + DF24/Fetch4
//////////////////////////////////////////////////////////////////////////////////////////
/*
float sample_hw_f4 (float4 tc,float4 shift){
static const float ts = KERNEL / float(SMAP_size);
float4 D4 = tex2Dproj (s_smap,tc + tc.w*shift*ts);
float4 dcmp = tc.z/tc.w ;
float4 cmp = dcmp<D4 ;
return dot (cmp,1.h/4.h);
}
*/
float sample_hw_f4(float4 tc, float4 shift)
{
static const float ts = KERNEL / float(SMAP_size);
float4 T4 = tc / tc.w;
T4.xy += shift.xy * ts;
float4 D4 = tex2D(s_smap, T4);
float4 compare = T4.z < D4;
float texsize = SMAP_size;
float2 fr = frac(T4.xy * texsize);
float2 ifr = float2(1.0f, 1.0f) - fr;
float4 fr4 = float4(ifr.x * ifr.y, ifr.x * fr.y, fr.x * ifr.y, fr.x * fr.y);
float4 fr4s = fr4.zywx;
return dot(compare, fr4s);
// return dot (compare, 1.h/4.h) ;
}
float shadow_hw_f4(float4 tc)
{
float s0 = sample_hw_f4(tc, float4(-1.0f, -1.0f, 0.0f, 0.0f));
float s1 = sample_hw_f4(tc, float4(+1.0f, -1.0f, 0.0f, 0.0f));
float s2 = sample_hw_f4(tc, float4(-1.0f, +1.0f, 0.0f, 0.0f));
float s3 = sample_hw_f4(tc, float4(+1.0f, +1.0f, 0.0f, 0.0f));
return (s0 + s1 + s2 + s3) / 4.h;
}
//////////////////////////////////////////////////////////////////////////////////////////
// testbed
uniform sampler2D jitter0;
uniform sampler2D jitter1;
uniform sampler2D jitter2;
float4 test(float4 tc, float2 offset)
{
float4 tcx = float4(tc.xy + tc.w * offset, tc.zw);
return tex2Dproj(s_smap, tcx);
}
float shadowtest(float4 tc, float4 tcJ) // jittered sampling
{
float4 r;
const float scale = (2.7f / float(SMAP_size));
float4 J0 = tex2Dproj(jitter0, tcJ) * scale;
float4 J1 = tex2Dproj(jitter1, tcJ) * scale;
r.x = test(tc, J0.xy).x;
r.y = test(tc, J0.wz).y;
r.z = test(tc, J1.xy).z;
r.w = test(tc, J1.wz).x;
return dot(r, 1.h / 4.h);
}
float shadowtest_sun(float4 tc, float4 tcJ) // jittered sampling
{
float4 r;
// const float scale = (2.0f/float(SMAP_size));
const float scale = (0.7f / float(SMAP_size));
float2 tc_J = frac(tc.xy / tc.w * SMAP_size / 4.0f) * .5f;
float4 J0 = tex2D(jitter0, tc_J) * scale;
// float4 J1 = tex2D (jitter1,tc_J)*scale;
const float k = .5f / float(SMAP_size);
r.x = test(tc, J0.xy + float2(-k, -k)).x;
r.y = test(tc, J0.wz + float2(k, -k)).y;
r.z = test(tc, -J0.xy + float2(-k, k)).z;
r.w = test(tc, -J0.wz + float2(k, k)).x;
return dot(r, 1.h / 4.h);
}
// jittered sampling
float shadow_high(float4 tc)
{
const float scale = (0.5f / float(SMAP_size));
float2 tc_J = frac(tc.xy / tc.w * SMAP_size / 4.0f) * .5f;
float4 J0 = tex2D(jitter0, tc_J) * scale;
const float k = 1.0f / float(SMAP_size);
float4 r;
r.x = test(tc, J0.xy + float2(-k, -k)).x;
r.y = test(tc, J0.wz + float2(k, -k)).y;
r.z = test(tc, J0.xy + float2(-k, k)).z;
r.w = test(tc, J0.wz + float2(k, k)).x;
const float k1 = 1.3f / float(SMAP_size);
float4 r1;
r1.x = test(tc, -J0.xy + float2(-k1, 0.0f)).x;
r1.y = test(tc, -J0.wz + float2(0.0f, -k1)).y;
r1.z = test(tc, -2 * J0.xy + float2(k1, 0.0f)).z;
r1.w = test(tc, -2 * J0.wz + float2(0.0f, k1)).x;
return (r.x + r.y + r.z + r.w + r1.x + r1.y + r1.z + r1.w) * 1.0f / 8.0f;
}
//////////////////////////////////////////////////////////////////////////////////////////
// select hardware or software shadowmaps
//////////////////////////////////////////////////////////////////////////////////////////
#ifdef USE_HWSMAP_PCF
// D24X8+PCF
float shadow(float4 tc)
{
return shadow_hw(tc);
}
#else
#ifdef USE_FETCH4
// DF24+Fetch4
float shadow(float4 tc)
{
return shadow_hw_f4(tc);
}
#else
// FP32
float shadow(float4 tc)
{
return shadow_sw(tc);
}
#endif
#endif
#ifdef USE_HWSMAP_PCF
// D24X8+PCF
float shadow_volumetric(float4 tc)
{
return sample_hw_pcf(tc, float4(0, 0, 0, 0));
}
#else
#ifdef USE_FETCH4
// DF24+Fetch4
float shadow_volumetric(float4 tc)
{
return sample_hw_f4(tc, float4(0, 0, 0, 0));
}
#else
// FP32
float shadow_volumetric(float4 tc)
{
return sample_sw(tc.xy / tc.w, float2(0, 0), tc.z);
}
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////
#ifdef USE_SUNMASK
uniform float3x4 m_sunmask; // ortho-projection
float sunmask(float4 P)
{ //
float2 tc = mul(m_sunmask, P);
return tex2Dlod(s_lmap, float4(tc.xy, 0.0f, 0.0f)).w; // A8
}
#else
float sunmask(float4 P)
{
return 1.h;
} //
#endif
//////////////////////////////////////////////////////////////////////////////////////////
uniform float4x4 m_shadow;
#endif