e4s-sdk/gamedata/shaders/r3/shadow.h
2026-06-17 23:06:51 +03:00

852 lines
No EOL
31 KiB
C

#ifndef SHADOW_H
#define SHADOW_H
#include "common.h"
//uniform sampler s_smap : register(ps,s0); // 2D/cube shadowmap
//Texture2D<float> s_smap; // 2D/cube shadowmap
// Used for RGBA texture too ?!
Texture2D s_smap : register(ps,t0); // 2D/cube shadowmap
Texture2D<float> s_smap_minmax; // 2D/cube shadowmap
#include "gather.ps"
SamplerComparisonState smp_smap; // Special comare sampler
sampler smp_jitter;
Texture2D jitter0;
Texture2D jitter1;
//uniform sampler2D jitter2;
//uniform sampler2D jitter3;
//uniform float4 jitterS;
Texture2D jitterMipped;
#ifndef USE_ULTRA_SHADOWS
#define KERNEL 0.6f
#else
#define KERNEL 1.0f
#endif
float modify_light( float light )
{
return ( light > 0.7 ? 1.0 : lerp( 0.0, 1.0, saturate( light / 0.7 ) ) );
}
//////////////////////////////////////////////////////////////////////////////////////////
// hardware + PCF
//////////////////////////////////////////////////////////////////////////////////////////
float sample_hw_pcf (float4 tc,float4 shift)
{
static const float ts = KERNEL / float(SMAP_size);
tc.xyz /= tc.w;
tc.xy += shift.xy * ts;
return s_smap.SampleCmpLevelZero( smp_smap, tc.xy, tc.z).x;
}
#define GS2 3
float shadow_hw( float4 tc )
{
float s0 = sample_hw_pcf( tc, float4( -1, -1, 0, 0) );
float s1 = sample_hw_pcf( tc, float4( +1, -1, 0, 0) );
float s2 = sample_hw_pcf( tc, float4( -1, +1, 0, 0) );
float s3 = sample_hw_pcf( tc, float4( +1, +1, 0, 0) );
return (s0+s1+s2+s3)/4.h;
}
#if SUN_QUALITY>=4
#define FILTER_SIZE 11
#define FS FILTER_SIZE
#define FS2 ( FILTER_SIZE / 2 )
static const float W2[11][11] =
{ { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
{ 0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0 },
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
{ 0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0 },
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
};
static const float W1[11][11] =
{ { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0,0.0 },
{ 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 },
{ 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 },
{ 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 },
{ 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 },
{ 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 },
{ 0.0,0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
};
static const float W0[11][11] =
{ { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.1,0.1,0.1,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.1,1.0,0.1,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.1,0.1,0.1,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
};
float Fw( int r, int c, float fL )
{
return (1.0-fL) * (1.0-fL) * (1.0-fL) * W0[r][c] +
3.0f * (1.0-fL) * (1.0-fL) * fL * W1[r][c] +
3.0f * fL * fL * (1.0-fL) * W2[r][c] +
fL * fL * fL * 1.0f;
}
#define BLOCKER_FILTER_SIZE 11
#define BFS BLOCKER_FILTER_SIZE
#define BFS2 ( BLOCKER_FILTER_SIZE / 2 )
#define SUN_WIDTH 300.0f
// uses gather for DX11/10.1 and visibilty encoding for DX10.0
float shadow_extreme_quality( float3 tc )
{
float s = 0.0f;
float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 );
float2 tcs = floor( stc );
float2 fc;
int row;
int col;
float w = 0.0;
float avgBlockerDepth = 0;
float blockerCount = 0;
float fRatio;
float4 v1[ FS2 + 1 ];
float2 v0[ FS2 + 1 ];
float2 off;
fc = stc - tcs;
tc.xy = tc.xy - ( (1.0f/SMAP_size) * fc );
tc.z -= 0.0001f;
#if defined(SM_4_1) || defined( SM_5)
// find number of blockers and sum up blocker depth
for( row = -BFS2; row <= BFS2; row += 2 )
{
for( col = -BFS2; col <= BFS2; col += 2 )
{
float4 d4 = s_smap.Gather( smp_nofilter, tc.xy, int2( col, row ) );
float4 b4 = ( tc.zzzz <= d4 ) ? (0.0f).xxxx : (1.0f).xxxx;
blockerCount += dot( b4, (1.0f).xxxx );
avgBlockerDepth += dot( d4, b4 );
}
}
#else // SM_4_0
uint vmask[ FS + 1 ];
[unroll]for( col = 0; col <= FS; ++col )
vmask[ col ] = uint(0);
[unroll(11)]for( row = -FS2; row <= FS2; row +=2 )
{
[unroll]for( int col = -FS2; col <= FS2; col +=2 )
{
float4 d4;
float b;
d4.w = s_smap.SampleLevel( smp_nofilter, tc.xy, 0, int2( col, row ) ).x;
b = ( tc.z <= d4.w ) ? (0.0f) : (1.0f);
vmask[ col + FS2 + 0 ] += ( ( tc.z <= d4.w ) ? ( uint(1) << uint( row + FS2 + 0 ) ) : uint(0) );
blockerCount += b;
avgBlockerDepth += d4.w * b;
d4.z = s_smap.SampleLevel( smp_nofilter, tc.xy, 0, int2( col+1, row ) ).x;
b = ( tc.z <= d4.z ) ? (0.0f) : (1.0f);
vmask[ col + FS2 + 1 ] += ( ( tc.z <= d4.z ) ? ( uint(1) << uint( row + FS2 + 0 ) ) : uint(0) );
blockerCount += b;
avgBlockerDepth += d4.z * b;
d4.x = s_smap.SampleLevel( smp_nofilter, tc.xy, 0, int2( col, row+1 ) ).x;
vmask[ col + FS2 + 0 ] += ( ( tc.z <= d4.x ) ? ( uint(1) << uint( row + FS2 + 1 ) ) : uint(0) );
b = ( tc.z <= d4.x ) ? (0.0f) : (1.0f);
blockerCount += b;
avgBlockerDepth += d4.x * b;
d4.y = s_smap.SampleLevel( smp_nofilter, tc.xy, 0, int2( col+1, row+1 ) ).x;
vmask[ col + FS2 + 1 ] += ( ( tc.z <= d4.y ) ? ( uint(1) << uint( row + FS2 + 1 ) ) : uint(0) );
b = ( tc.z <= d4.y ) ? (0.0f) : (1.0f);
blockerCount += b;
avgBlockerDepth += d4.y * b;
}
}
#endif
// compute ratio average blocker depth vs. pixel depth
if( blockerCount > 0.0 )
{
avgBlockerDepth /= blockerCount;
fRatio = saturate( ( ( tc.z - avgBlockerDepth ) * SUN_WIDTH ) / avgBlockerDepth );
fRatio *= fRatio;
}
else
{
fRatio = 0.0;
}
for( row = 0; row < FS; ++row )
{
for( col = 0; col < FS; ++col )
w += Fw(row,col,fRatio);
}
// filter shadow map samples using the dynamic weights
[unroll(11)]for( row = -FS2; row <= FS2; row += 2 )
{
[unroll]for( int col = -FS2; col <= FS2; col += 2 )
{
#if ( defined(SM_5) ) || ( defined(SM_4_1) )
#ifdef SM_5
v1[(col+FS2)/2] = s_smap.GatherCmpRed( smp_smap, tc.xy, tc.z,
int2( col, row ) );
#else // SM_4_1
float4 d4 = s_smap.Gather( smp_linear, tc.xy, int2( col, row ) );
v1[(col+FS2)/2] = ( tc.zzzz <= d4 ) ? (1.0f).xxxx : (0.0f).xxxx;
#endif
#else
v1[(col+FS2)/2].w = ( ( vmask[ col + FS2 + 0 ] & ( uint(1) << uint( row + FS2 + 0 ) ) ) ? 1.0f : 0.0f );
v1[(col+FS2)/2].z = ( ( vmask[ col + FS2 + 1 ] & ( uint(1) << uint( row + FS2 + 0 ) ) ) ? 1.0f : 0.0f );
v1[(col+FS2)/2].x = ( ( vmask[ col + FS2 + 0 ] & ( uint(1) << uint( row + FS2 + 1 ) ) ) ? 1.0f : 0.0f );
v1[(col+FS2)/2].y = ( ( vmask[ col + FS2 + 1 ] & ( uint(1) << uint( row + FS2 + 1 ) ) ) ? 1.0f : 0.0f );
#endif
if( col == -FS2 )
{
s += ( 1 - fc.y ) * ( v1[0].w * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,0,fRatio) * fc.x ) + v1[0].z * ( fc.x * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,1,fRatio) ) + Fw(row+FS2,1,fRatio) ) );
s += ( fc.y ) * ( v1[0].x * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,0,fRatio) * fc.x ) + v1[0].y * ( fc.x * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,1,fRatio) ) + Fw(row+FS2,1,fRatio) ) );
if( row > -FS2 )
{
s += ( 1 - fc.y ) * ( v0[0].x * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,0,fRatio) * fc.x ) + v0[0].y * ( fc.x * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,1,fRatio) ) + Fw(row+FS2-1,1,fRatio) ) );
s += ( fc.y ) * ( v1[0].w * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,0,fRatio) * fc.x ) + v1[0].z * ( fc.x * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,1,fRatio) ) + Fw(row+FS2-1,1,fRatio) ) );
}
}
else if( col == FS2 )
{
s += ( 1 - fc.y ) * ( v1[FS2].w * ( fc.x * ( Fw(row+FS2,FS-2,fRatio) - Fw(row+FS2,FS-1,fRatio) ) + Fw(row+FS2,FS-1,fRatio) ) + v1[FS2].z * fc.x * Fw(row+FS2,FS-1,fRatio) );
s += ( fc.y ) * ( v1[FS2].x * ( fc.x * ( Fw(row+FS2,FS-2,fRatio) - Fw(row+FS2,FS-1,fRatio) ) + Fw(row+FS2,FS-1,fRatio) ) + v1[FS2].y * fc.x * Fw(row+FS2,FS-1,fRatio) );
if( row > -FS2 )
{
s += ( 1 - fc.y ) * ( v0[FS2].x * ( fc.x * ( Fw(row+FS2-1,FS-2,fRatio) - Fw(row+FS2-1,FS-1,fRatio) ) + Fw(row+FS2-1,FS-1,fRatio) ) + v0[FS2].y * fc.x * Fw(row+FS2-1,FS-1,fRatio) );
s += ( fc.y ) * ( v1[FS2].w * ( fc.x * ( Fw(row+FS2-1,FS-2,fRatio) - Fw(row+FS2-1,FS-1,fRatio) ) + Fw(row+FS2-1,FS-1,fRatio) ) + v1[FS2].z * fc.x * Fw(row+FS2-1,FS-1,fRatio) );
}
}
else
{
s += ( 1 - fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x * ( Fw(row+FS2,col+FS2-1,fRatio) - Fw(row+FS2,col+FS2+0,fRatio) ) + Fw(row+FS2,col+FS2+0,fRatio) ) +
v1[(col+FS2)/2].z * ( fc.x * ( Fw(row+FS2,col+FS2-0,fRatio) - Fw(row+FS2,col+FS2+1,fRatio) ) + Fw(row+FS2,col+FS2+1,fRatio) ) );
s += ( fc.y ) * ( v1[(col+FS2)/2].x * ( fc.x * ( Fw(row+FS2,col+FS2-1,fRatio) - Fw(row+FS2,col+FS2+0,fRatio) ) + Fw(row+FS2,col+FS2+0,fRatio) ) +
v1[(col+FS2)/2].y * ( fc.x * ( Fw(row+FS2,col+FS2-0,fRatio) - Fw(row+FS2,col+FS2+1,fRatio) ) + Fw(row+FS2,col+FS2+1,fRatio) ) );
if( row > -FS2 )
{
s += ( 1 - fc.y ) * ( v0[(col+FS2)/2].x * ( fc.x * ( Fw(row+FS2-1,col+FS2-1,fRatio) - Fw(row+FS2-1,col+FS2+0,fRatio) ) + Fw(row+FS2-1,col+FS2+0,fRatio) ) +
v0[(col+FS2)/2].y * ( fc.x * ( Fw(row+FS2-1,col+FS2-0,fRatio) - Fw(row+FS2-1,col+FS2+1,fRatio) ) + Fw(row+FS2-1,col+FS2+1,fRatio) ) );
s += ( fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x * ( Fw(row+FS2-1,col+FS2-1,fRatio) - Fw(row+FS2-1,col+FS2+0,fRatio) ) + Fw(row+FS2-1,col+FS2+0,fRatio) ) +
v1[(col+FS2)/2].z * ( fc.x * ( Fw(row+FS2-1,col+FS2-0,fRatio) - Fw(row+FS2-1,col+FS2+1,fRatio) ) + Fw(row+FS2-1,col+FS2+1,fRatio) ) );
}
}
if( row != FS2 )
v0[(col+FS2)/2] = v1[(col+FS2)/2].xy;
}
}
return s/w;
}
float4 Fw( int r, int c )
{
return float4( W0[r][c], W1[r][c], W2[r][c], 1.0f );
}
//======================================================================================
// This shader computes the contact hardening shadow filter
//======================================================================================
float shadow_extreme_quality_fused( float3 tc )
{
float4 s = (0.0f).xxxx;
float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 );
float2 tcs = floor( stc );
float2 fc;
int row;
int col;
float w = 0.0;
float avgBlockerDepth = 0;
float blockerCount = 0;
float fRatio;
float4 v1[ FS2 + 1 ];
float2 v0[ FS2 + 1 ];
float2 off;
fc = stc - tcs;
tc.xy = tc.xy - ( fc * (1.0f/SMAP_size) );
// filter shadow map samples using the dynamic weights
[unroll(FS)]for( row = -FS2; row <= FS2; row += 2 )
{
for( col = -FS2; col <= FS2; col += 2 )
{
float4 d4;
#ifndef PS_4
d4 = s_smap.Gather( smp_nofilter, tc.xy + (1.0f/SMAP_size) * float2( col, row ) );
#else
d4.w = s_smap.SampleLevel( smp_nofilter, tc.xy + (1.0f/SMAP_size) * float2( col, row ), 0 ).x;
d4.z = s_smap.SampleLevel( smp_nofilter, tc.xy + (1.0f/SMAP_size) * float2( col+1, row ) , 0 ).x;
d4.y = s_smap.SampleLevel( smp_nofilter, tc.xy + (1.0f/SMAP_size) * float2( col+1, row+1 ), 0 ).x;
d4.x = s_smap.SampleLevel( smp_nofilter, tc.xy + (1.0f/SMAP_size) * float2( col, row+1 ), 0 ).x;
#endif
float4 b4 = ( tc.zzzz <= d4 ) ? (0.0f).xxxx : (1.0f).xxxx;
v1[(col+FS2)/2] = ( tc.zzzz <= d4 ) ? (1.0f).xxxx : (0.0f).xxxx;
blockerCount += dot( b4, (1.0).xxxx );
avgBlockerDepth += dot( d4, b4 );
if( col == -FS2 )
{
s += ( 1 - fc.y ) * ( v1[0].w * ( Fw(row+FS2,0) -
Fw(row+FS2,0) * fc.x ) + v1[0].z *
( fc.x * ( Fw(row+FS2,0) -
Fw(row+FS2,1) ) +
Fw(row+FS2,1) ) );
s += ( fc.y ) * ( v1[0].x * ( Fw(row+FS2,0) -
Fw(row+FS2,0) * fc.x ) +
v1[0].y * ( fc.x * ( Fw(row+FS2,0) -
Fw(row+FS2,1) ) +
Fw(row+FS2,1) ) );
if( row > -FS2 )
{
s += ( 1 - fc.y ) * ( v0[0].x * ( Fw(row+FS2-1,0) -
Fw(row+FS2-1,0) * fc.x ) + v0[0].y *
( fc.x * ( Fw(row+FS2-1,0) -
Fw(row+FS2-1,1) ) +
Fw(row+FS2-1,1) ) );
s += ( fc.y ) * ( v1[0].w * ( Fw(row+FS2-1,0) -
Fw(row+FS2-1,0) * fc.x ) + v1[0].z *
( fc.x * ( Fw(row+FS2-1,0) -
Fw(row+FS2-1,1) ) +
Fw(row+FS2-1,1) ) );
}
}
else if( col == FS2 )
{
s += ( 1 - fc.y ) * ( v1[FS2].w * ( fc.x * ( Fw(row+FS2,FS-2) -
Fw(row+FS2,FS-1) ) +
Fw(row+FS2,FS-1) ) + v1[FS2].z * fc.x *
Fw(row+FS2,FS-1) );
s += ( fc.y ) * ( v1[FS2].x * ( fc.x * ( Fw(row+FS2,FS-2) -
Fw(row+FS2,FS-1) ) +
Fw(row+FS2,FS-1) ) + v1[FS2].y * fc.x *
Fw(row+FS2,FS-1) );
if( row > -FS2 )
{
s += ( 1 - fc.y ) * ( v0[FS2].x * ( fc.x *
( Fw(row+FS2-1,FS-2) -
Fw(row+FS2-1,FS-1) ) +
Fw(row+FS2-1,FS-1) ) +
v0[FS2].y * fc.x * Fw(row+FS2-1,FS-1) );
s += ( fc.y ) * ( v1[FS2].w * ( fc.x *
( Fw(row+FS2-1,FS-2) -
Fw(row+FS2-1,FS-1) ) +
Fw(row+FS2-1,FS-1) ) +
v1[FS2].z * fc.x * Fw(row+FS2-1,FS-1) );
}
}
else
{
s += ( 1 - fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x *
( Fw(row+FS2,col+FS2-1) -
Fw(row+FS2,col+FS2+0) ) +
Fw(row+FS2,col+FS2+0) ) +
v1[(col+FS2)/2].z * ( fc.x *
( Fw(row+FS2,col+FS2-0) -
Fw(row+FS2,col+FS2+1) ) +
Fw(row+FS2,col+FS2+1) ) );
s += ( fc.y ) * ( v1[(col+FS2)/2].x * ( fc.x *
( Fw(row+FS2,col+FS2-1) -
Fw(row+FS2,col+FS2+0) ) +
Fw(row+FS2,col+FS2+0) ) +
v1[(col+FS2)/2].y * ( fc.x *
( Fw(row+FS2,col+FS2-0) -
Fw(row+FS2,col+FS2+1) ) +
Fw(row+FS2,col+FS2+1) ) );
if( row > -FS2 )
{
s += ( 1 - fc.y ) * ( v0[(col+FS2)/2].x * ( fc.x *
( Fw(row+FS2-1,col+FS2-1) -
Fw(row+FS2-1,col+FS2+0) ) +
Fw(row+FS2-1,col+FS2+0) ) +
v0[(col+FS2)/2].y * ( fc.x *
( Fw(row+FS2-1,col+FS2-0) -
Fw(row+FS2-1,col+FS2+1) ) +
Fw(row+FS2-1,col+FS2+1) ) );
s += ( fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x *
( Fw(row+FS2-1,col+FS2-1) -
Fw(row+FS2-1,col+FS2+0) ) +
Fw(row+FS2-1,col+FS2+0) ) +
v1[(col+FS2)/2].z * ( fc.x *
( Fw(row+FS2-1,col+FS2-0) -
Fw(row+FS2-1,col+FS2+1) ) +
Fw(row+FS2-1,col+FS2+1) ) );
}
}
if( row != FS2 )
{
v0[(col+FS2)/2] = v1[(col+FS2)/2].xy;
}
}
}
// compute ratio using formulas from PCSS
if( blockerCount > 0.0 )
{
avgBlockerDepth /= blockerCount;
fRatio = saturate( ( ( tc.z - avgBlockerDepth ) * SUN_WIDTH ) / avgBlockerDepth );
fRatio *= fRatio;
}
else
{
fRatio = 0.0;
}
// sum up weights of dynamic filter matrix
for( row = 0; row < FS; ++row )
{
for( col = 0; col < FS; ++col )
{
w += Fw(row,col,fRatio);
}
}
return dot(s, float4((1.0f-fRatio)*(1.0f-fRatio)*(1.0f-fRatio),
3.0f * (1.0-fRatio)*(1.0-fRatio)*fRatio,
3.0f * fRatio*fRatio*(1.0-fRatio),
fRatio*fRatio*fRatio ) )/w;
}
#endif
#ifdef SM_4_1
float dx10_1_hw_hq_7x7( float3 tc )
{
float s = 0.0f;
float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 );
float2 tcs = floor( stc );
float2 fc;
int row;
int col;
fc.xy = stc - tcs;
tc.xy = tcs * ( 1.0 / SMAP_size );
// loop over the rows
for( row = -GS2; row <= GS2; row += 2 )
{
[unroll]for( col = -GS2; col <= GS2; col += 2 )
{
float4 v = ( tc.zzzz <= s_smap.Gather( smp_nofilter, tc.xy, int2( col, row ) ) ) ? (1.0).xxxx : (0.0).xxxx;
if( row == -GS2 ) // top row
{
if( col == -GS2 ) // left
s += dot( float4( 1.0-fc.x, 1.0, 1.0-fc.y, (1.0-fc.x)*(1.0-fc.y) ), v );
else if( col == GS2 ) // right
s += dot( float4( 1.0f, fc.x, fc.x*(1.0-fc.y), 1.0-fc.y ), v );
else // center
s += dot( float4( 1.0, 1.0, 1.0-fc.y, 1.0-fc.y ), v );
}
else if( row == GS2 ) // bottom row
{
if( col == -GS2 ) // left
s += dot( float4( (1.0-fc.x)*fc.y, fc.y, 1.0, (1.0-fc.x) ), v );
else if( col == GS2 ) // right
s += dot( float4( fc.y, fc.x*fc.y, fc.x, 1.0 ), v );
else // center
s += dot( float4(fc.yy,1.0,1.0), v );
}
else // center rows
{
if( col == -GS2 ) // left
s += dot( float4( (1.0-fc.x), 1.0, 1.0, (1.0-fc.x) ), v );
else if( col == GS2 ) // right
s += dot( float4( 1.0, fc.x, fc.x, 1.0 ), v );
else // center
s += dot( (1.0).xxxx, v );
}
}
}
return s*(1.0/49.0);
}
#endif
float dx10_0_hw_hq_7x7( float4 tc )
{
tc.xyz /= tc.w;
float s = 0.0;
float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 );
float2 tcs = floor( stc );
float2 fc;
fc = stc - tcs;
tc.xy = tc.xy - ( fc * ( 1.0/SMAP_size ) );
float2 pwAB = ( ( 2.0 ).xx - fc );
float2 tcAB = ( 1.0/SMAP_size ).xx / pwAB;
float2 tcM = (0.5/SMAP_size ).xx;
float2 pwGH = ( ( 1.0 ).xx + fc );
float2 tcGH = (1.0/SMAP_size) * ( fc / pwGH );
for( int row = -GS2; row <= GS2; row += 2 )
{
for( int col = -GS2; col <= GS2; col += 2 )
{
if( row == -GS2 ) // top row
{
if( col == -GS2 ) // left
s += ( pwAB.x * pwAB.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + tcAB, tc.z, int2( col, row ) ).x;
else if( col == GS2 ) // right
s += ( pwGH.x * pwAB.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcGH.x, tcAB.y), tc.z, int2( col, row ) ).x;
else // center
s += ( 2.0 * pwAB.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcM.x, tcAB.y), tc.z, int2( col, row ) ).x;
}
else if( row == GS2 ) // bottom row
{
if( col == -GS2 ) // left
s += ( pwAB.x * pwGH.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcAB.x, tcGH.y ), tc.z, int2( col, row ) ).x;
else if( col == GS2 ) // right
s += ( pwGH.x * pwGH.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + tcGH, tc.z, int2( col, row ) ).x;
else // center
s += ( 2.0 * pwGH.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcM.x, tcGH.y ), tc.z, int2( col, row ) ).x;
}
else // center rows
{
if( col == -GS2 ) // left
s += ( pwAB.x * 2.0 ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcAB.x, tcM.y ), tc.z, int2( col, row ) ).x;
else if( col == GS2 ) // right
s += ( pwGH.x * 2.0 ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcGH.x, tcM.y), tc.z, int2( col, row ) ).x;
else // center
s += ( 2.0 * 2.0 ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + tcM, tc.z, int2( col, row ) ).x;
}
}
}
return s/49.0;
}
#ifdef SM_MINMAX
bool cheap_reject( float3 tc, inout bool full_light )
{
float4 plane0 = sm_minmax_gather( tc.xy, int2( -1,-1 ) );
float4 plane1 = sm_minmax_gather( tc.xy, int2( 1,-1 ) );
float4 plane2 = sm_minmax_gather( tc.xy, int2( -1, 1 ) );
float4 plane3 = sm_minmax_gather( tc.xy, int2( 1, 1 ) );
bool plane = all( ( plane0 >= (0).xxxx ) * ( plane1 >= (0).xxxx ) * ( plane2 >= (0).xxxx ) * ( plane3 >= (0).xxxx ) );
[flatten] if( !plane ) // if there are no proper plane equations in the support region
{
bool no_plane = all( ( plane0 < (0).xxxx ) * ( plane1 < (0).xxxx ) * ( plane2 < (0).xxxx ) * ( plane3 < (0).xxxx ) );
float4 z = ( tc.z - 0.0005 ).xxxx;
bool reject = all( ( z > -plane0 ) * ( z > -plane1 ) * ( z > -plane2 ) * ( z > -plane3 ) );
[flatten] if( no_plane && reject )
{
full_light = false;
return true;
}
else
{
return false;
}
}
else // plane equation detected
{
// compute corrected z for texel pos
static const float scale = float( SMAP_size / 4 );
float2 fc = frac( tc.xy * scale );
float z = lerp( lerp( plane0.y, plane1.x, fc.x ), lerp( plane2.z, plane3.w, fc.x ), fc.y );
// do minmax test with new z
full_light = ( ( tc.z - 0.0001 ) <= z );
return true;
}
}
#endif // SM_MINMAX
float shadow_hw_hq( float4 tc )
{
#ifdef SM_MINMAX
bool full_light = false;
bool cheap_path = cheap_reject( tc.xyz / tc.w, full_light );
[branch] if( cheap_path )
{
[branch] if( full_light == true )
return 1.0;
else
return sample_hw_pcf( tc, (0).xxxx );
}
else
{
#if SUN_QUALITY>=4 // extreme quality
return shadow_extreme_quality( tc.xyz / tc.w );
#else // SUN_QUALITY<4
#ifdef SM_4_1
return dx10_1_hw_hq_7x7( tc.xyz / tc.w );
#else // SM_4_1
return dx10_0_hw_hq_7x7( tc );
#endif // SM_4_1
#endif //SUN_QUALITY==4
}
#else // SM_MINMAX
#if SUN_QUALITY>=4 // extreme quality
return shadow_extreme_quality( tc.xyz / tc.w );
#else // SUN_QUALITY<4
#ifdef SM_4_1
return dx10_1_hw_hq_7x7( tc.xyz / tc.w );
#else // SM_4_1
return dx10_0_hw_hq_7x7( tc );
#endif // SM_4_1
#endif //SUN_QUALITY==4
#endif // SM_MINMAX
}
//////////////////////////////////////////////////////////////////////////////////////////
// D24X8+PCF
//////////////////////////////////////////////////////////////////////////////////////////
float4 test (float4 tc, float2 offset)
{
// float4 tcx = float4 (tc.xy + tc.w*offset, tc.zw);
// return tex2Dproj (s_smap,tcx);
tc.xyz /= tc.w;
tc.xy += offset;
return s_smap.SampleCmpLevelZero( smp_smap, tc.xy, tc.z).x;
}
/*half shadowtest_sun (float4 tc, float4 tcJ) // jittered sampling
{
half4 r;
const float scale = (0.5f/float(SMAP_size));
float texsize = 2*SMAP_size;
float2 tc_J = tc.xy/tc.w*texsize/8.0f;
float2 fr = frac(tc_J)*.5f;
// half4 J0 = tex2D (jitter0,fr)*scale;
// half4 J1 = tex2D (jitter1,fr)*scale*2;
float4 J0 = jitter0.Sample( smp_jitter, fr )*scale;
// float4 J1 = jitter1.Sample( smp_jitter, fr )*scale;
float k = 0.99f/float(SMAP_size);
r.x = test (tc,J0.xy+float2(-k,-k)).x;
r.y = test (tc,J0.wz+float2( k,-k)).y;
r.z = test (tc,J0.xy+float2(-k, k)).z;
r.w = test (tc,J0.wz+float2( k, k)).x;
half4 f;
float k1 = 1.5f/float(SMAP_size);
f.x = test (tc,-J0.xy+float2(-k1,0)).x;
f.y = test (tc,-J0.wz+float2( 0,-k1)).y;
f.z = test (tc,-J0.xy+float2( k1, 0)).z;
f.w = test (tc,-J0.wz+float2( 0, k1)).x;
half res = ( r.x + r.y + r.z + r.w + f.x + f.y + f.z + f.w )*1.h/(4.h + 4.h );
return res;
}*/
half shadowtest_sun (float4 tc, float4 tcJ) // jittered sampling
{
half4 r;
// const float scale = (2.0f/float(SMAP_size));
const float scale = (0.7f/float(SMAP_size));
float2 tc_J = frac(tc.xy/tc.w*SMAP_size/4.0f )*.5f;
float4 J0 = jitter0.Sample(smp_jitter,tc_J)*scale;
//half4 J1 = tex2D (jitter1,tc_J)*scale;
const float k = .5f/float(SMAP_size);
r.x = test (tc, J0.xy+half2(-k,-k)).x;
r.y = test (tc, J0.wz+half2( k,-k)).y;
r.z = test (tc,-J0.xy+half2(-k, k)).z;
r.w = test (tc,-J0.wz+half2( k, k)).x;
return dot(r,1.h/4.h);
}
half shadow_high (float4 tc) // jittered sampling
{
const float scale = (0.5f/float(SMAP_size));
float2 tc_J = frac(tc.xy/tc.w*SMAP_size/4.0f )*.5f;
float4 J0 = jitter0.Sample (smp_jitter,tc_J)*scale;
const float k = 1.f/float(SMAP_size);
half4 r;
r.x = test (tc,J0.xy+half2(-k,-k)).x;
r.y = test (tc,J0.wz+half2( k,-k)).y;
r.z = test (tc,J0.xy+half2(-k, k)).z;
r.w = test (tc,J0.wz+half2( k, k)).x;
const float k1 = 1.3f/float(SMAP_size);
half4 r1;
r1.x = test (tc,-J0.xy+half2(-k1,0)).x;
r1.y = test (tc,-J0.wz+half2( 0,-k1)).y;
r1.z = test (tc,-2*J0.xy+half2( k1, 0)).z;
r1.w = test (tc,-2*J0.wz+half2( 0, k1)).x;
return ( r.x + r.y + r.z + r.w + r1.x + r1.y + r1.z + r1.w )*1.h/8.h;
}
float shadow( float4 tc )
{
#ifdef USE_ULTRA_SHADOWS
# ifdef SM_MINMAX
return modify_light( shadow_hw_hq( tc ) );
# else
return shadow_hw_hq( tc );
# endif
#else
# if SUN_QUALITY>=2 // Hight quality
//return shadowtest_sun ( tc, float4(0,0,0,0) ); // jittered sampling;
return shadow_hw (tc);
# else
return shadow_hw (tc);
# endif
#endif
}
float shadow_volumetric( float4 tc )
{
return sample_hw_pcf (tc,float4(-1,-1,0,0));
}
#ifdef SM_MINMAX
//////////////////////////////////////////////////////////////////////////////////////////
// hardware + PCF
//////////////////////////////////////////////////////////////////////////////////////////
float shadow_dx10_1( float4 tc, float2 tcJ, float2 pos2d )
{
return shadow( tc );
}
float shadow_dx10_1_sunshafts( float4 tc, float2 pos2d )
{
float3 t = tc.xyz / tc.w;
float minmax = s_smap_minmax.SampleLevel( smp_nofilter, t, 0 ).x;
bool umbra = ( ( minmax.x < 0 ) && ( t.z > -minmax.x ) );
[branch] if( umbra )
{
return 0.0;
}
else
{
return shadow_hw( tc );
}
}
#endif
//////////////////////////////////////////////////////////////////////////////////////////
// testbed
//uniform sampler2D jitter0;
//uniform sampler2D jitter1;
float shadowtest (float4 tc, float4 tcJ) // jittered sampling
{
float4 r;
const float scale = (2.7f/float(SMAP_size));
// float4 J0 = tex2Dproj (jitter0,tcJ)*scale;
// float4 J1 = tex2Dproj (jitter1,tcJ)*scale;
tcJ.xy /= tcJ.w;
float4 J0 = jitter0.Sample( smp_jitter, tcJ )*scale;
float4 J1 = jitter1.Sample( smp_jitter, tcJ )*scale;
r.x = test (tc,J0.xy).x;
r.y = test (tc,J0.wz).y;
r.z = test (tc,J1.xy).z;
r.w = test (tc,J1.wz).x;
return dot(r,1.h/4.h);
}
float shadow_rain (float4 tc, float2 tcJ) // jittered sampling
{
float4 r;
const float scale = (4.0f/float(SMAP_size));
// float4 J0 = jitter0.Sample( smp_jitter, tcJ )*scale;
// float4 J1 = jitter1.Sample( smp_jitter, tcJ )*scale;
float4 J0 = jitter0.Sample( smp_linear, tcJ )*scale;
float4 J1 = jitter1.Sample( smp_linear, tcJ )*scale;
r.x = test (tc,J0.xy).x;
r.y = test (tc,J0.wz).y;
r.z = test (tc,J1.xy).z;
r.w = test (tc,J1.wz).x;
// float4 J0 = jitterMipped.Sample( smp_base, tcJ )*scale;
// r.x = test (tc,J0.xy).x;
// r.y = test (tc,J0.wz).y;
// r.z = test (tc,J0.yz).z;
// r.w = test (tc,J0.xw).x;
return dot(r,1.h/4.h);
}
//////////////////////////////////////////////////////////////////////////////////////////
#ifdef USE_SUNMASK
float3x4 m_sunmask; // ortho-projection
float sunmask( float4 P )
{
float2 tc = mul( m_sunmask, P ); //
// return tex2D( s_lmap, tc ).w; // A8
return s_lmap.Sample( smp_linear, tc ).w; // A8
}
#else
float sunmask( float4 P ) { return 1.h; } //
#endif
//////////////////////////////////////////////////////////////////////////////////////////
uniform float4x4 m_shadow;
#endif