852 lines
No EOL
31 KiB
C
852 lines
No EOL
31 KiB
C
#ifndef SHADOW_H
|
|
#define SHADOW_H
|
|
|
|
#include "common.h"
|
|
|
|
//uniform sampler s_smap : register(ps,s0); // 2D/cube shadowmap
|
|
//Texture2D<float> s_smap; // 2D/cube shadowmap
|
|
// Used for RGBA texture too ?!
|
|
Texture2D s_smap : register(ps,t0); // 2D/cube shadowmap
|
|
|
|
Texture2D<float> s_smap_minmax; // 2D/cube shadowmap
|
|
#include "gather.ps"
|
|
|
|
SamplerComparisonState smp_smap; // Special comare sampler
|
|
sampler smp_jitter;
|
|
|
|
Texture2D jitter0;
|
|
Texture2D jitter1;
|
|
//uniform sampler2D jitter2;
|
|
//uniform sampler2D jitter3;
|
|
//uniform float4 jitterS;
|
|
|
|
Texture2D jitterMipped;
|
|
|
|
#ifndef USE_ULTRA_SHADOWS
|
|
#define KERNEL 0.6f
|
|
#else
|
|
#define KERNEL 1.0f
|
|
#endif
|
|
|
|
float modify_light( float light )
|
|
{
|
|
return ( light > 0.7 ? 1.0 : lerp( 0.0, 1.0, saturate( light / 0.7 ) ) );
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
// hardware + PCF
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
float sample_hw_pcf (float4 tc,float4 shift)
|
|
{
|
|
static const float ts = KERNEL / float(SMAP_size);
|
|
|
|
tc.xyz /= tc.w;
|
|
tc.xy += shift.xy * ts;
|
|
|
|
return s_smap.SampleCmpLevelZero( smp_smap, tc.xy, tc.z).x;
|
|
}
|
|
|
|
#define GS2 3
|
|
|
|
float shadow_hw( float4 tc )
|
|
{
|
|
float s0 = sample_hw_pcf( tc, float4( -1, -1, 0, 0) );
|
|
float s1 = sample_hw_pcf( tc, float4( +1, -1, 0, 0) );
|
|
float s2 = sample_hw_pcf( tc, float4( -1, +1, 0, 0) );
|
|
float s3 = sample_hw_pcf( tc, float4( +1, +1, 0, 0) );
|
|
|
|
return (s0+s1+s2+s3)/4.h;
|
|
}
|
|
|
|
#if SUN_QUALITY>=4
|
|
#define FILTER_SIZE 11
|
|
#define FS FILTER_SIZE
|
|
#define FS2 ( FILTER_SIZE / 2 )
|
|
|
|
static const float W2[11][11] =
|
|
{ { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0 },
|
|
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
|
|
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
|
|
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
|
|
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
|
|
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
|
|
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
|
|
{ 0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2,0.0 },
|
|
{ 0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
};
|
|
|
|
static const float W1[11][11] =
|
|
{ { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0,0.0 },
|
|
{ 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 },
|
|
{ 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 },
|
|
{ 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 },
|
|
{ 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 },
|
|
{ 0.0,0.0,0.2,1.0,1.0,1.0,1.0,1.0,0.2,0.0,0.0 },
|
|
{ 0.0,0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
};
|
|
|
|
static const float W0[11][11] =
|
|
{ { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.1,0.1,0.1,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.1,1.0,0.1,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.1,0.1,0.1,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
{ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
|
|
};
|
|
|
|
float Fw( int r, int c, float fL )
|
|
{
|
|
return (1.0-fL) * (1.0-fL) * (1.0-fL) * W0[r][c] +
|
|
3.0f * (1.0-fL) * (1.0-fL) * fL * W1[r][c] +
|
|
3.0f * fL * fL * (1.0-fL) * W2[r][c] +
|
|
fL * fL * fL * 1.0f;
|
|
}
|
|
|
|
#define BLOCKER_FILTER_SIZE 11
|
|
#define BFS BLOCKER_FILTER_SIZE
|
|
#define BFS2 ( BLOCKER_FILTER_SIZE / 2 )
|
|
|
|
#define SUN_WIDTH 300.0f
|
|
|
|
// uses gather for DX11/10.1 and visibilty encoding for DX10.0
|
|
float shadow_extreme_quality( float3 tc )
|
|
{
|
|
float s = 0.0f;
|
|
float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 );
|
|
float2 tcs = floor( stc );
|
|
float2 fc;
|
|
int row;
|
|
int col;
|
|
float w = 0.0;
|
|
float avgBlockerDepth = 0;
|
|
float blockerCount = 0;
|
|
float fRatio;
|
|
float4 v1[ FS2 + 1 ];
|
|
float2 v0[ FS2 + 1 ];
|
|
float2 off;
|
|
|
|
fc = stc - tcs;
|
|
tc.xy = tc.xy - ( (1.0f/SMAP_size) * fc );
|
|
tc.z -= 0.0001f;
|
|
|
|
#if defined(SM_4_1) || defined( SM_5)
|
|
// find number of blockers and sum up blocker depth
|
|
for( row = -BFS2; row <= BFS2; row += 2 )
|
|
{
|
|
for( col = -BFS2; col <= BFS2; col += 2 )
|
|
{
|
|
float4 d4 = s_smap.Gather( smp_nofilter, tc.xy, int2( col, row ) );
|
|
float4 b4 = ( tc.zzzz <= d4 ) ? (0.0f).xxxx : (1.0f).xxxx;
|
|
|
|
blockerCount += dot( b4, (1.0f).xxxx );
|
|
avgBlockerDepth += dot( d4, b4 );
|
|
}
|
|
}
|
|
#else // SM_4_0
|
|
uint vmask[ FS + 1 ];
|
|
|
|
[unroll]for( col = 0; col <= FS; ++col )
|
|
vmask[ col ] = uint(0);
|
|
|
|
[unroll(11)]for( row = -FS2; row <= FS2; row +=2 )
|
|
{
|
|
[unroll]for( int col = -FS2; col <= FS2; col +=2 )
|
|
{
|
|
float4 d4;
|
|
float b;
|
|
|
|
d4.w = s_smap.SampleLevel( smp_nofilter, tc.xy, 0, int2( col, row ) ).x;
|
|
b = ( tc.z <= d4.w ) ? (0.0f) : (1.0f);
|
|
vmask[ col + FS2 + 0 ] += ( ( tc.z <= d4.w ) ? ( uint(1) << uint( row + FS2 + 0 ) ) : uint(0) );
|
|
blockerCount += b;
|
|
avgBlockerDepth += d4.w * b;
|
|
|
|
d4.z = s_smap.SampleLevel( smp_nofilter, tc.xy, 0, int2( col+1, row ) ).x;
|
|
b = ( tc.z <= d4.z ) ? (0.0f) : (1.0f);
|
|
vmask[ col + FS2 + 1 ] += ( ( tc.z <= d4.z ) ? ( uint(1) << uint( row + FS2 + 0 ) ) : uint(0) );
|
|
blockerCount += b;
|
|
avgBlockerDepth += d4.z * b;
|
|
|
|
d4.x = s_smap.SampleLevel( smp_nofilter, tc.xy, 0, int2( col, row+1 ) ).x;
|
|
vmask[ col + FS2 + 0 ] += ( ( tc.z <= d4.x ) ? ( uint(1) << uint( row + FS2 + 1 ) ) : uint(0) );
|
|
b = ( tc.z <= d4.x ) ? (0.0f) : (1.0f);
|
|
blockerCount += b;
|
|
avgBlockerDepth += d4.x * b;
|
|
|
|
d4.y = s_smap.SampleLevel( smp_nofilter, tc.xy, 0, int2( col+1, row+1 ) ).x;
|
|
vmask[ col + FS2 + 1 ] += ( ( tc.z <= d4.y ) ? ( uint(1) << uint( row + FS2 + 1 ) ) : uint(0) );
|
|
b = ( tc.z <= d4.y ) ? (0.0f) : (1.0f);
|
|
blockerCount += b;
|
|
avgBlockerDepth += d4.y * b;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// compute ratio average blocker depth vs. pixel depth
|
|
if( blockerCount > 0.0 )
|
|
{
|
|
avgBlockerDepth /= blockerCount;
|
|
fRatio = saturate( ( ( tc.z - avgBlockerDepth ) * SUN_WIDTH ) / avgBlockerDepth );
|
|
fRatio *= fRatio;
|
|
}
|
|
else
|
|
{
|
|
fRatio = 0.0;
|
|
}
|
|
|
|
for( row = 0; row < FS; ++row )
|
|
{
|
|
for( col = 0; col < FS; ++col )
|
|
w += Fw(row,col,fRatio);
|
|
}
|
|
|
|
// filter shadow map samples using the dynamic weights
|
|
[unroll(11)]for( row = -FS2; row <= FS2; row += 2 )
|
|
{
|
|
[unroll]for( int col = -FS2; col <= FS2; col += 2 )
|
|
{
|
|
#if ( defined(SM_5) ) || ( defined(SM_4_1) )
|
|
#ifdef SM_5
|
|
v1[(col+FS2)/2] = s_smap.GatherCmpRed( smp_smap, tc.xy, tc.z,
|
|
int2( col, row ) );
|
|
#else // SM_4_1
|
|
float4 d4 = s_smap.Gather( smp_linear, tc.xy, int2( col, row ) );
|
|
v1[(col+FS2)/2] = ( tc.zzzz <= d4 ) ? (1.0f).xxxx : (0.0f).xxxx;
|
|
#endif
|
|
#else
|
|
v1[(col+FS2)/2].w = ( ( vmask[ col + FS2 + 0 ] & ( uint(1) << uint( row + FS2 + 0 ) ) ) ? 1.0f : 0.0f );
|
|
v1[(col+FS2)/2].z = ( ( vmask[ col + FS2 + 1 ] & ( uint(1) << uint( row + FS2 + 0 ) ) ) ? 1.0f : 0.0f );
|
|
v1[(col+FS2)/2].x = ( ( vmask[ col + FS2 + 0 ] & ( uint(1) << uint( row + FS2 + 1 ) ) ) ? 1.0f : 0.0f );
|
|
v1[(col+FS2)/2].y = ( ( vmask[ col + FS2 + 1 ] & ( uint(1) << uint( row + FS2 + 1 ) ) ) ? 1.0f : 0.0f );
|
|
#endif
|
|
if( col == -FS2 )
|
|
{
|
|
s += ( 1 - fc.y ) * ( v1[0].w * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,0,fRatio) * fc.x ) + v1[0].z * ( fc.x * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,1,fRatio) ) + Fw(row+FS2,1,fRatio) ) );
|
|
s += ( fc.y ) * ( v1[0].x * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,0,fRatio) * fc.x ) + v1[0].y * ( fc.x * ( Fw(row+FS2,0,fRatio) - Fw(row+FS2,1,fRatio) ) + Fw(row+FS2,1,fRatio) ) );
|
|
if( row > -FS2 )
|
|
{
|
|
s += ( 1 - fc.y ) * ( v0[0].x * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,0,fRatio) * fc.x ) + v0[0].y * ( fc.x * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,1,fRatio) ) + Fw(row+FS2-1,1,fRatio) ) );
|
|
s += ( fc.y ) * ( v1[0].w * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,0,fRatio) * fc.x ) + v1[0].z * ( fc.x * ( Fw(row+FS2-1,0,fRatio) - Fw(row+FS2-1,1,fRatio) ) + Fw(row+FS2-1,1,fRatio) ) );
|
|
}
|
|
}
|
|
else if( col == FS2 )
|
|
{
|
|
s += ( 1 - fc.y ) * ( v1[FS2].w * ( fc.x * ( Fw(row+FS2,FS-2,fRatio) - Fw(row+FS2,FS-1,fRatio) ) + Fw(row+FS2,FS-1,fRatio) ) + v1[FS2].z * fc.x * Fw(row+FS2,FS-1,fRatio) );
|
|
s += ( fc.y ) * ( v1[FS2].x * ( fc.x * ( Fw(row+FS2,FS-2,fRatio) - Fw(row+FS2,FS-1,fRatio) ) + Fw(row+FS2,FS-1,fRatio) ) + v1[FS2].y * fc.x * Fw(row+FS2,FS-1,fRatio) );
|
|
if( row > -FS2 )
|
|
{
|
|
s += ( 1 - fc.y ) * ( v0[FS2].x * ( fc.x * ( Fw(row+FS2-1,FS-2,fRatio) - Fw(row+FS2-1,FS-1,fRatio) ) + Fw(row+FS2-1,FS-1,fRatio) ) + v0[FS2].y * fc.x * Fw(row+FS2-1,FS-1,fRatio) );
|
|
s += ( fc.y ) * ( v1[FS2].w * ( fc.x * ( Fw(row+FS2-1,FS-2,fRatio) - Fw(row+FS2-1,FS-1,fRatio) ) + Fw(row+FS2-1,FS-1,fRatio) ) + v1[FS2].z * fc.x * Fw(row+FS2-1,FS-1,fRatio) );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
s += ( 1 - fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x * ( Fw(row+FS2,col+FS2-1,fRatio) - Fw(row+FS2,col+FS2+0,fRatio) ) + Fw(row+FS2,col+FS2+0,fRatio) ) +
|
|
v1[(col+FS2)/2].z * ( fc.x * ( Fw(row+FS2,col+FS2-0,fRatio) - Fw(row+FS2,col+FS2+1,fRatio) ) + Fw(row+FS2,col+FS2+1,fRatio) ) );
|
|
s += ( fc.y ) * ( v1[(col+FS2)/2].x * ( fc.x * ( Fw(row+FS2,col+FS2-1,fRatio) - Fw(row+FS2,col+FS2+0,fRatio) ) + Fw(row+FS2,col+FS2+0,fRatio) ) +
|
|
v1[(col+FS2)/2].y * ( fc.x * ( Fw(row+FS2,col+FS2-0,fRatio) - Fw(row+FS2,col+FS2+1,fRatio) ) + Fw(row+FS2,col+FS2+1,fRatio) ) );
|
|
if( row > -FS2 )
|
|
{
|
|
s += ( 1 - fc.y ) * ( v0[(col+FS2)/2].x * ( fc.x * ( Fw(row+FS2-1,col+FS2-1,fRatio) - Fw(row+FS2-1,col+FS2+0,fRatio) ) + Fw(row+FS2-1,col+FS2+0,fRatio) ) +
|
|
v0[(col+FS2)/2].y * ( fc.x * ( Fw(row+FS2-1,col+FS2-0,fRatio) - Fw(row+FS2-1,col+FS2+1,fRatio) ) + Fw(row+FS2-1,col+FS2+1,fRatio) ) );
|
|
s += ( fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x * ( Fw(row+FS2-1,col+FS2-1,fRatio) - Fw(row+FS2-1,col+FS2+0,fRatio) ) + Fw(row+FS2-1,col+FS2+0,fRatio) ) +
|
|
v1[(col+FS2)/2].z * ( fc.x * ( Fw(row+FS2-1,col+FS2-0,fRatio) - Fw(row+FS2-1,col+FS2+1,fRatio) ) + Fw(row+FS2-1,col+FS2+1,fRatio) ) );
|
|
}
|
|
}
|
|
if( row != FS2 )
|
|
v0[(col+FS2)/2] = v1[(col+FS2)/2].xy;
|
|
}
|
|
}
|
|
|
|
return s/w;
|
|
}
|
|
|
|
float4 Fw( int r, int c )
|
|
{
|
|
return float4( W0[r][c], W1[r][c], W2[r][c], 1.0f );
|
|
}
|
|
|
|
//======================================================================================
|
|
// This shader computes the contact hardening shadow filter
|
|
//======================================================================================
|
|
float shadow_extreme_quality_fused( float3 tc )
|
|
{
|
|
float4 s = (0.0f).xxxx;
|
|
float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 );
|
|
float2 tcs = floor( stc );
|
|
float2 fc;
|
|
int row;
|
|
int col;
|
|
float w = 0.0;
|
|
float avgBlockerDepth = 0;
|
|
float blockerCount = 0;
|
|
float fRatio;
|
|
float4 v1[ FS2 + 1 ];
|
|
float2 v0[ FS2 + 1 ];
|
|
float2 off;
|
|
|
|
fc = stc - tcs;
|
|
tc.xy = tc.xy - ( fc * (1.0f/SMAP_size) );
|
|
|
|
// filter shadow map samples using the dynamic weights
|
|
[unroll(FS)]for( row = -FS2; row <= FS2; row += 2 )
|
|
{
|
|
for( col = -FS2; col <= FS2; col += 2 )
|
|
{
|
|
float4 d4;
|
|
|
|
#ifndef PS_4
|
|
d4 = s_smap.Gather( smp_nofilter, tc.xy + (1.0f/SMAP_size) * float2( col, row ) );
|
|
#else
|
|
d4.w = s_smap.SampleLevel( smp_nofilter, tc.xy + (1.0f/SMAP_size) * float2( col, row ), 0 ).x;
|
|
d4.z = s_smap.SampleLevel( smp_nofilter, tc.xy + (1.0f/SMAP_size) * float2( col+1, row ) , 0 ).x;
|
|
d4.y = s_smap.SampleLevel( smp_nofilter, tc.xy + (1.0f/SMAP_size) * float2( col+1, row+1 ), 0 ).x;
|
|
d4.x = s_smap.SampleLevel( smp_nofilter, tc.xy + (1.0f/SMAP_size) * float2( col, row+1 ), 0 ).x;
|
|
#endif
|
|
float4 b4 = ( tc.zzzz <= d4 ) ? (0.0f).xxxx : (1.0f).xxxx;
|
|
|
|
v1[(col+FS2)/2] = ( tc.zzzz <= d4 ) ? (1.0f).xxxx : (0.0f).xxxx;
|
|
blockerCount += dot( b4, (1.0).xxxx );
|
|
avgBlockerDepth += dot( d4, b4 );
|
|
|
|
if( col == -FS2 )
|
|
{
|
|
s += ( 1 - fc.y ) * ( v1[0].w * ( Fw(row+FS2,0) -
|
|
Fw(row+FS2,0) * fc.x ) + v1[0].z *
|
|
( fc.x * ( Fw(row+FS2,0) -
|
|
Fw(row+FS2,1) ) +
|
|
Fw(row+FS2,1) ) );
|
|
s += ( fc.y ) * ( v1[0].x * ( Fw(row+FS2,0) -
|
|
Fw(row+FS2,0) * fc.x ) +
|
|
v1[0].y * ( fc.x * ( Fw(row+FS2,0) -
|
|
Fw(row+FS2,1) ) +
|
|
Fw(row+FS2,1) ) );
|
|
if( row > -FS2 )
|
|
{
|
|
s += ( 1 - fc.y ) * ( v0[0].x * ( Fw(row+FS2-1,0) -
|
|
Fw(row+FS2-1,0) * fc.x ) + v0[0].y *
|
|
( fc.x * ( Fw(row+FS2-1,0) -
|
|
Fw(row+FS2-1,1) ) +
|
|
Fw(row+FS2-1,1) ) );
|
|
s += ( fc.y ) * ( v1[0].w * ( Fw(row+FS2-1,0) -
|
|
Fw(row+FS2-1,0) * fc.x ) + v1[0].z *
|
|
( fc.x * ( Fw(row+FS2-1,0) -
|
|
Fw(row+FS2-1,1) ) +
|
|
Fw(row+FS2-1,1) ) );
|
|
}
|
|
}
|
|
else if( col == FS2 )
|
|
{
|
|
s += ( 1 - fc.y ) * ( v1[FS2].w * ( fc.x * ( Fw(row+FS2,FS-2) -
|
|
Fw(row+FS2,FS-1) ) +
|
|
Fw(row+FS2,FS-1) ) + v1[FS2].z * fc.x *
|
|
Fw(row+FS2,FS-1) );
|
|
s += ( fc.y ) * ( v1[FS2].x * ( fc.x * ( Fw(row+FS2,FS-2) -
|
|
Fw(row+FS2,FS-1) ) +
|
|
Fw(row+FS2,FS-1) ) + v1[FS2].y * fc.x *
|
|
Fw(row+FS2,FS-1) );
|
|
if( row > -FS2 )
|
|
{
|
|
s += ( 1 - fc.y ) * ( v0[FS2].x * ( fc.x *
|
|
( Fw(row+FS2-1,FS-2) -
|
|
Fw(row+FS2-1,FS-1) ) +
|
|
Fw(row+FS2-1,FS-1) ) +
|
|
v0[FS2].y * fc.x * Fw(row+FS2-1,FS-1) );
|
|
s += ( fc.y ) * ( v1[FS2].w * ( fc.x *
|
|
( Fw(row+FS2-1,FS-2) -
|
|
Fw(row+FS2-1,FS-1) ) +
|
|
Fw(row+FS2-1,FS-1) ) +
|
|
v1[FS2].z * fc.x * Fw(row+FS2-1,FS-1) );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
s += ( 1 - fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x *
|
|
( Fw(row+FS2,col+FS2-1) -
|
|
Fw(row+FS2,col+FS2+0) ) +
|
|
Fw(row+FS2,col+FS2+0) ) +
|
|
v1[(col+FS2)/2].z * ( fc.x *
|
|
( Fw(row+FS2,col+FS2-0) -
|
|
Fw(row+FS2,col+FS2+1) ) +
|
|
Fw(row+FS2,col+FS2+1) ) );
|
|
s += ( fc.y ) * ( v1[(col+FS2)/2].x * ( fc.x *
|
|
( Fw(row+FS2,col+FS2-1) -
|
|
Fw(row+FS2,col+FS2+0) ) +
|
|
Fw(row+FS2,col+FS2+0) ) +
|
|
v1[(col+FS2)/2].y * ( fc.x *
|
|
( Fw(row+FS2,col+FS2-0) -
|
|
Fw(row+FS2,col+FS2+1) ) +
|
|
Fw(row+FS2,col+FS2+1) ) );
|
|
if( row > -FS2 )
|
|
{
|
|
s += ( 1 - fc.y ) * ( v0[(col+FS2)/2].x * ( fc.x *
|
|
( Fw(row+FS2-1,col+FS2-1) -
|
|
Fw(row+FS2-1,col+FS2+0) ) +
|
|
Fw(row+FS2-1,col+FS2+0) ) +
|
|
v0[(col+FS2)/2].y * ( fc.x *
|
|
( Fw(row+FS2-1,col+FS2-0) -
|
|
Fw(row+FS2-1,col+FS2+1) ) +
|
|
Fw(row+FS2-1,col+FS2+1) ) );
|
|
s += ( fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x *
|
|
( Fw(row+FS2-1,col+FS2-1) -
|
|
Fw(row+FS2-1,col+FS2+0) ) +
|
|
Fw(row+FS2-1,col+FS2+0) ) +
|
|
v1[(col+FS2)/2].z * ( fc.x *
|
|
( Fw(row+FS2-1,col+FS2-0) -
|
|
Fw(row+FS2-1,col+FS2+1) ) +
|
|
Fw(row+FS2-1,col+FS2+1) ) );
|
|
}
|
|
}
|
|
|
|
if( row != FS2 )
|
|
{
|
|
v0[(col+FS2)/2] = v1[(col+FS2)/2].xy;
|
|
}
|
|
}
|
|
}
|
|
|
|
// compute ratio using formulas from PCSS
|
|
if( blockerCount > 0.0 )
|
|
{
|
|
avgBlockerDepth /= blockerCount;
|
|
fRatio = saturate( ( ( tc.z - avgBlockerDepth ) * SUN_WIDTH ) / avgBlockerDepth );
|
|
fRatio *= fRatio;
|
|
}
|
|
else
|
|
{
|
|
fRatio = 0.0;
|
|
}
|
|
|
|
// sum up weights of dynamic filter matrix
|
|
for( row = 0; row < FS; ++row )
|
|
{
|
|
for( col = 0; col < FS; ++col )
|
|
{
|
|
w += Fw(row,col,fRatio);
|
|
}
|
|
}
|
|
|
|
return dot(s, float4((1.0f-fRatio)*(1.0f-fRatio)*(1.0f-fRatio),
|
|
3.0f * (1.0-fRatio)*(1.0-fRatio)*fRatio,
|
|
3.0f * fRatio*fRatio*(1.0-fRatio),
|
|
fRatio*fRatio*fRatio ) )/w;
|
|
}
|
|
#endif
|
|
|
|
#ifdef SM_4_1
|
|
|
|
float dx10_1_hw_hq_7x7( float3 tc )
|
|
{
|
|
float s = 0.0f;
|
|
float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 );
|
|
float2 tcs = floor( stc );
|
|
float2 fc;
|
|
int row;
|
|
int col;
|
|
|
|
fc.xy = stc - tcs;
|
|
tc.xy = tcs * ( 1.0 / SMAP_size );
|
|
|
|
// loop over the rows
|
|
for( row = -GS2; row <= GS2; row += 2 )
|
|
{
|
|
[unroll]for( col = -GS2; col <= GS2; col += 2 )
|
|
{
|
|
float4 v = ( tc.zzzz <= s_smap.Gather( smp_nofilter, tc.xy, int2( col, row ) ) ) ? (1.0).xxxx : (0.0).xxxx;
|
|
|
|
if( row == -GS2 ) // top row
|
|
{
|
|
if( col == -GS2 ) // left
|
|
s += dot( float4( 1.0-fc.x, 1.0, 1.0-fc.y, (1.0-fc.x)*(1.0-fc.y) ), v );
|
|
else if( col == GS2 ) // right
|
|
s += dot( float4( 1.0f, fc.x, fc.x*(1.0-fc.y), 1.0-fc.y ), v );
|
|
else // center
|
|
s += dot( float4( 1.0, 1.0, 1.0-fc.y, 1.0-fc.y ), v );
|
|
}
|
|
else if( row == GS2 ) // bottom row
|
|
{
|
|
if( col == -GS2 ) // left
|
|
s += dot( float4( (1.0-fc.x)*fc.y, fc.y, 1.0, (1.0-fc.x) ), v );
|
|
else if( col == GS2 ) // right
|
|
s += dot( float4( fc.y, fc.x*fc.y, fc.x, 1.0 ), v );
|
|
else // center
|
|
s += dot( float4(fc.yy,1.0,1.0), v );
|
|
}
|
|
else // center rows
|
|
{
|
|
if( col == -GS2 ) // left
|
|
s += dot( float4( (1.0-fc.x), 1.0, 1.0, (1.0-fc.x) ), v );
|
|
else if( col == GS2 ) // right
|
|
s += dot( float4( 1.0, fc.x, fc.x, 1.0 ), v );
|
|
else // center
|
|
s += dot( (1.0).xxxx, v );
|
|
}
|
|
}
|
|
}
|
|
|
|
return s*(1.0/49.0);
|
|
}
|
|
|
|
#endif
|
|
|
|
float dx10_0_hw_hq_7x7( float4 tc )
|
|
{
|
|
tc.xyz /= tc.w;
|
|
|
|
float s = 0.0;
|
|
float2 stc = ( SMAP_size * tc.xy ) + float2( 0.5, 0.5 );
|
|
float2 tcs = floor( stc );
|
|
float2 fc;
|
|
|
|
fc = stc - tcs;
|
|
tc.xy = tc.xy - ( fc * ( 1.0/SMAP_size ) );
|
|
|
|
float2 pwAB = ( ( 2.0 ).xx - fc );
|
|
float2 tcAB = ( 1.0/SMAP_size ).xx / pwAB;
|
|
float2 tcM = (0.5/SMAP_size ).xx;
|
|
float2 pwGH = ( ( 1.0 ).xx + fc );
|
|
float2 tcGH = (1.0/SMAP_size) * ( fc / pwGH );
|
|
|
|
for( int row = -GS2; row <= GS2; row += 2 )
|
|
{
|
|
for( int col = -GS2; col <= GS2; col += 2 )
|
|
{
|
|
if( row == -GS2 ) // top row
|
|
{
|
|
if( col == -GS2 ) // left
|
|
s += ( pwAB.x * pwAB.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + tcAB, tc.z, int2( col, row ) ).x;
|
|
else if( col == GS2 ) // right
|
|
s += ( pwGH.x * pwAB.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcGH.x, tcAB.y), tc.z, int2( col, row ) ).x;
|
|
else // center
|
|
s += ( 2.0 * pwAB.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcM.x, tcAB.y), tc.z, int2( col, row ) ).x;
|
|
}
|
|
else if( row == GS2 ) // bottom row
|
|
{
|
|
if( col == -GS2 ) // left
|
|
s += ( pwAB.x * pwGH.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcAB.x, tcGH.y ), tc.z, int2( col, row ) ).x;
|
|
else if( col == GS2 ) // right
|
|
s += ( pwGH.x * pwGH.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + tcGH, tc.z, int2( col, row ) ).x;
|
|
else // center
|
|
s += ( 2.0 * pwGH.y ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcM.x, tcGH.y ), tc.z, int2( col, row ) ).x;
|
|
}
|
|
else // center rows
|
|
{
|
|
if( col == -GS2 ) // left
|
|
s += ( pwAB.x * 2.0 ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcAB.x, tcM.y ), tc.z, int2( col, row ) ).x;
|
|
else if( col == GS2 ) // right
|
|
s += ( pwGH.x * 2.0 ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + float2( tcGH.x, tcM.y), tc.z, int2( col, row ) ).x;
|
|
else // center
|
|
s += ( 2.0 * 2.0 ) * s_smap.SampleCmpLevelZero( smp_smap, tc.xy + tcM, tc.z, int2( col, row ) ).x;
|
|
}
|
|
}
|
|
}
|
|
|
|
return s/49.0;
|
|
}
|
|
|
|
#ifdef SM_MINMAX
|
|
bool cheap_reject( float3 tc, inout bool full_light )
|
|
{
|
|
float4 plane0 = sm_minmax_gather( tc.xy, int2( -1,-1 ) );
|
|
float4 plane1 = sm_minmax_gather( tc.xy, int2( 1,-1 ) );
|
|
float4 plane2 = sm_minmax_gather( tc.xy, int2( -1, 1 ) );
|
|
float4 plane3 = sm_minmax_gather( tc.xy, int2( 1, 1 ) );
|
|
bool plane = all( ( plane0 >= (0).xxxx ) * ( plane1 >= (0).xxxx ) * ( plane2 >= (0).xxxx ) * ( plane3 >= (0).xxxx ) );
|
|
|
|
[flatten] if( !plane ) // if there are no proper plane equations in the support region
|
|
{
|
|
bool no_plane = all( ( plane0 < (0).xxxx ) * ( plane1 < (0).xxxx ) * ( plane2 < (0).xxxx ) * ( plane3 < (0).xxxx ) );
|
|
float4 z = ( tc.z - 0.0005 ).xxxx;
|
|
bool reject = all( ( z > -plane0 ) * ( z > -plane1 ) * ( z > -plane2 ) * ( z > -plane3 ) );
|
|
[flatten] if( no_plane && reject )
|
|
{
|
|
full_light = false;
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
else // plane equation detected
|
|
{
|
|
// compute corrected z for texel pos
|
|
static const float scale = float( SMAP_size / 4 );
|
|
float2 fc = frac( tc.xy * scale );
|
|
float z = lerp( lerp( plane0.y, plane1.x, fc.x ), lerp( plane2.z, plane3.w, fc.x ), fc.y );
|
|
|
|
// do minmax test with new z
|
|
full_light = ( ( tc.z - 0.0001 ) <= z );
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
#endif // SM_MINMAX
|
|
|
|
float shadow_hw_hq( float4 tc )
|
|
{
|
|
#ifdef SM_MINMAX
|
|
bool full_light = false;
|
|
bool cheap_path = cheap_reject( tc.xyz / tc.w, full_light );
|
|
|
|
[branch] if( cheap_path )
|
|
{
|
|
[branch] if( full_light == true )
|
|
return 1.0;
|
|
else
|
|
return sample_hw_pcf( tc, (0).xxxx );
|
|
}
|
|
else
|
|
{
|
|
#if SUN_QUALITY>=4 // extreme quality
|
|
return shadow_extreme_quality( tc.xyz / tc.w );
|
|
#else // SUN_QUALITY<4
|
|
#ifdef SM_4_1
|
|
return dx10_1_hw_hq_7x7( tc.xyz / tc.w );
|
|
#else // SM_4_1
|
|
return dx10_0_hw_hq_7x7( tc );
|
|
#endif // SM_4_1
|
|
#endif //SUN_QUALITY==4
|
|
}
|
|
#else // SM_MINMAX
|
|
#if SUN_QUALITY>=4 // extreme quality
|
|
return shadow_extreme_quality( tc.xyz / tc.w );
|
|
#else // SUN_QUALITY<4
|
|
#ifdef SM_4_1
|
|
return dx10_1_hw_hq_7x7( tc.xyz / tc.w );
|
|
#else // SM_4_1
|
|
return dx10_0_hw_hq_7x7( tc );
|
|
#endif // SM_4_1
|
|
#endif //SUN_QUALITY==4
|
|
#endif // SM_MINMAX
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
// D24X8+PCF
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
float4 test (float4 tc, float2 offset)
|
|
{
|
|
|
|
// float4 tcx = float4 (tc.xy + tc.w*offset, tc.zw);
|
|
// return tex2Dproj (s_smap,tcx);
|
|
|
|
tc.xyz /= tc.w;
|
|
tc.xy += offset;
|
|
return s_smap.SampleCmpLevelZero( smp_smap, tc.xy, tc.z).x;
|
|
}
|
|
|
|
/*half shadowtest_sun (float4 tc, float4 tcJ) // jittered sampling
|
|
{
|
|
half4 r;
|
|
|
|
const float scale = (0.5f/float(SMAP_size));
|
|
|
|
float texsize = 2*SMAP_size;
|
|
float2 tc_J = tc.xy/tc.w*texsize/8.0f;
|
|
float2 fr = frac(tc_J)*.5f;
|
|
|
|
// half4 J0 = tex2D (jitter0,fr)*scale;
|
|
// half4 J1 = tex2D (jitter1,fr)*scale*2;
|
|
float4 J0 = jitter0.Sample( smp_jitter, fr )*scale;
|
|
// float4 J1 = jitter1.Sample( smp_jitter, fr )*scale;
|
|
|
|
float k = 0.99f/float(SMAP_size);
|
|
r.x = test (tc,J0.xy+float2(-k,-k)).x;
|
|
r.y = test (tc,J0.wz+float2( k,-k)).y;
|
|
|
|
r.z = test (tc,J0.xy+float2(-k, k)).z;
|
|
r.w = test (tc,J0.wz+float2( k, k)).x;
|
|
|
|
half4 f;
|
|
float k1 = 1.5f/float(SMAP_size);
|
|
f.x = test (tc,-J0.xy+float2(-k1,0)).x;
|
|
f.y = test (tc,-J0.wz+float2( 0,-k1)).y;
|
|
|
|
f.z = test (tc,-J0.xy+float2( k1, 0)).z;
|
|
f.w = test (tc,-J0.wz+float2( 0, k1)).x;
|
|
|
|
half res = ( r.x + r.y + r.z + r.w + f.x + f.y + f.z + f.w )*1.h/(4.h + 4.h );
|
|
return res;
|
|
}*/
|
|
half shadowtest_sun (float4 tc, float4 tcJ) // jittered sampling
|
|
{
|
|
half4 r;
|
|
|
|
// const float scale = (2.0f/float(SMAP_size));
|
|
const float scale = (0.7f/float(SMAP_size));
|
|
|
|
|
|
float2 tc_J = frac(tc.xy/tc.w*SMAP_size/4.0f )*.5f;
|
|
float4 J0 = jitter0.Sample(smp_jitter,tc_J)*scale;
|
|
//half4 J1 = tex2D (jitter1,tc_J)*scale;
|
|
|
|
const float k = .5f/float(SMAP_size);
|
|
r.x = test (tc, J0.xy+half2(-k,-k)).x;
|
|
r.y = test (tc, J0.wz+half2( k,-k)).y;
|
|
r.z = test (tc,-J0.xy+half2(-k, k)).z;
|
|
r.w = test (tc,-J0.wz+half2( k, k)).x;
|
|
|
|
return dot(r,1.h/4.h);
|
|
}
|
|
|
|
half shadow_high (float4 tc) // jittered sampling
|
|
{
|
|
|
|
const float scale = (0.5f/float(SMAP_size));
|
|
|
|
float2 tc_J = frac(tc.xy/tc.w*SMAP_size/4.0f )*.5f;
|
|
float4 J0 = jitter0.Sample (smp_jitter,tc_J)*scale;
|
|
|
|
const float k = 1.f/float(SMAP_size);
|
|
half4 r;
|
|
r.x = test (tc,J0.xy+half2(-k,-k)).x;
|
|
r.y = test (tc,J0.wz+half2( k,-k)).y;
|
|
|
|
r.z = test (tc,J0.xy+half2(-k, k)).z;
|
|
r.w = test (tc,J0.wz+half2( k, k)).x;
|
|
|
|
|
|
const float k1 = 1.3f/float(SMAP_size);
|
|
half4 r1;
|
|
r1.x = test (tc,-J0.xy+half2(-k1,0)).x;
|
|
r1.y = test (tc,-J0.wz+half2( 0,-k1)).y;
|
|
|
|
r1.z = test (tc,-2*J0.xy+half2( k1, 0)).z;
|
|
r1.w = test (tc,-2*J0.wz+half2( 0, k1)).x;
|
|
|
|
return ( r.x + r.y + r.z + r.w + r1.x + r1.y + r1.z + r1.w )*1.h/8.h;
|
|
}
|
|
|
|
float shadow( float4 tc )
|
|
{
|
|
#ifdef USE_ULTRA_SHADOWS
|
|
# ifdef SM_MINMAX
|
|
return modify_light( shadow_hw_hq( tc ) );
|
|
# else
|
|
return shadow_hw_hq( tc );
|
|
# endif
|
|
#else
|
|
# if SUN_QUALITY>=2 // Hight quality
|
|
//return shadowtest_sun ( tc, float4(0,0,0,0) ); // jittered sampling;
|
|
return shadow_hw (tc);
|
|
# else
|
|
return shadow_hw (tc);
|
|
# endif
|
|
#endif
|
|
}
|
|
|
|
float shadow_volumetric( float4 tc )
|
|
{
|
|
return sample_hw_pcf (tc,float4(-1,-1,0,0));
|
|
}
|
|
|
|
|
|
#ifdef SM_MINMAX
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
// hardware + PCF
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
float shadow_dx10_1( float4 tc, float2 tcJ, float2 pos2d )
|
|
{
|
|
return shadow( tc );
|
|
}
|
|
|
|
float shadow_dx10_1_sunshafts( float4 tc, float2 pos2d )
|
|
{
|
|
float3 t = tc.xyz / tc.w;
|
|
float minmax = s_smap_minmax.SampleLevel( smp_nofilter, t, 0 ).x;
|
|
bool umbra = ( ( minmax.x < 0 ) && ( t.z > -minmax.x ) );
|
|
|
|
[branch] if( umbra )
|
|
{
|
|
return 0.0;
|
|
}
|
|
else
|
|
{
|
|
return shadow_hw( tc );
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
// testbed
|
|
|
|
//uniform sampler2D jitter0;
|
|
//uniform sampler2D jitter1;
|
|
float shadowtest (float4 tc, float4 tcJ) // jittered sampling
|
|
{
|
|
float4 r;
|
|
|
|
const float scale = (2.7f/float(SMAP_size));
|
|
|
|
// float4 J0 = tex2Dproj (jitter0,tcJ)*scale;
|
|
// float4 J1 = tex2Dproj (jitter1,tcJ)*scale;
|
|
tcJ.xy /= tcJ.w;
|
|
float4 J0 = jitter0.Sample( smp_jitter, tcJ )*scale;
|
|
float4 J1 = jitter1.Sample( smp_jitter, tcJ )*scale;
|
|
|
|
r.x = test (tc,J0.xy).x;
|
|
r.y = test (tc,J0.wz).y;
|
|
r.z = test (tc,J1.xy).z;
|
|
r.w = test (tc,J1.wz).x;
|
|
|
|
return dot(r,1.h/4.h);
|
|
}
|
|
|
|
float shadow_rain (float4 tc, float2 tcJ) // jittered sampling
|
|
{
|
|
float4 r;
|
|
|
|
const float scale = (4.0f/float(SMAP_size));
|
|
// float4 J0 = jitter0.Sample( smp_jitter, tcJ )*scale;
|
|
// float4 J1 = jitter1.Sample( smp_jitter, tcJ )*scale;
|
|
float4 J0 = jitter0.Sample( smp_linear, tcJ )*scale;
|
|
float4 J1 = jitter1.Sample( smp_linear, tcJ )*scale;
|
|
|
|
r.x = test (tc,J0.xy).x;
|
|
r.y = test (tc,J0.wz).y;
|
|
r.z = test (tc,J1.xy).z;
|
|
r.w = test (tc,J1.wz).x;
|
|
|
|
// float4 J0 = jitterMipped.Sample( smp_base, tcJ )*scale;
|
|
|
|
// r.x = test (tc,J0.xy).x;
|
|
// r.y = test (tc,J0.wz).y;
|
|
// r.z = test (tc,J0.yz).z;
|
|
// r.w = test (tc,J0.xw).x;
|
|
|
|
return dot(r,1.h/4.h);
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
#ifdef USE_SUNMASK
|
|
float3x4 m_sunmask; // ortho-projection
|
|
float sunmask( float4 P )
|
|
{
|
|
float2 tc = mul( m_sunmask, P ); //
|
|
// return tex2D( s_lmap, tc ).w; // A8
|
|
return s_lmap.Sample( smp_linear, tc ).w; // A8
|
|
}
|
|
#else
|
|
float sunmask( float4 P ) { return 1.h; } //
|
|
#endif
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
uniform float4x4 m_shadow;
|
|
|
|
#endif |