#ifndef common_functions_h_included #define common_functions_h_included // contrast function float Contrast(float Input, float ContrastPower) { //piecewise contrast function bool IsAboveHalf = Input > 0.5 ; float ToRaise = saturate(2*(IsAboveHalf ? 1-Input : Input)); float Output = 0.5*pow(ToRaise, ContrastPower); Output = IsAboveHalf ? 1-Output : Output; return Output; } void tonemap( out float4 low, out float4 high, float3 rgb, float scale) { rgb = rgb*scale; const float fWhiteIntensity = 1.7; const float fWhiteIntensitySQR = fWhiteIntensity*fWhiteIntensity; // low = (rgb/(rgb + 1)).xyzz; low = ( (rgb*(1+rgb/fWhiteIntensitySQR)) / (rgb+1) ).xyzz; high = rgb.xyzz/def_hdr; // 8x dynamic range /* rgb = rgb*scale; low = rgb.xyzz; high = low/def_hdr; // 8x dynamic range */ } float4 combine_bloom( float3 low, float4 high) { return float4( low + high*high.a, 1.h ); } float calc_fogging( float4 w_pos ) { return dot(w_pos,fog_plane); } float2 unpack_tc_base( float2 tc, float du, float dv ) { return (tc.xy + float2 (du,dv))*(32.f/32768.f); //!Increase from 32bit to 64bit floating point } float3 calc_sun_r1( float3 norm_w ) { return L_sun_color*saturate(dot((norm_w),-L_sun_dir_w)); } float3 calc_model_hemi_r1( float3 norm_w ) { return max(0,norm_w.y)*L_hemi_color; } float3 calc_model_lq_lighting( float3 norm_w ) { return L_material.x*calc_model_hemi_r1(norm_w) + L_ambient + L_material.y*calc_sun_r1(norm_w); } float3 unpack_normal( float3 v ) { return 2*v-1; } float3 unpack_bx2( float3 v ) { return 2*v-1; } float3 unpack_bx4( float3 v ) { return 4*v-2; } //!reduce the amount of stretching from 4*v-2 and increase precision float2 unpack_tc_lmap( float2 tc ) { return tc*(1.f/32768.f); } // [-1 .. +1 ] float4 unpack_color( float4 c ) { return c.bgra; } float4 unpack_D3DCOLOR( float4 c ) { return c.bgra; } float3 unpack_D3DCOLOR( float3 c ) { return c.bgr; } float3 p_hemi( float2 tc ) { // float3 t_lmh = tex2D (s_hemi, tc); // float3 t_lmh = s_hemi.Sample( smp_rtlinear, tc); // return dot(t_lmh,1.h/4.h); float4 t_lmh = s_hemi.Sample( smp_rtlinear, tc); return t_lmh.a; } float get_hemi( float4 lmh) { return lmh.a; } float get_sun( float4 lmh) { return lmh.g; } float3 v_hemi(float3 n) { return L_hemi_color*(.5f + .5f*n.y); } float3 v_sun(float3 n) { return L_sun_color*dot(n,-L_sun_dir_w); } float3 calc_reflection( float3 pos_w, float3 norm_w ) { return reflect(normalize(pos_w-eye_position), norm_w); } #define USABLE_BIT_1 uint(0x00002000) #define USABLE_BIT_2 uint(0x00004000) #define USABLE_BIT_3 uint(0x00008000) #define USABLE_BIT_4 uint(0x00010000) #define USABLE_BIT_5 uint(0x00020000) #define USABLE_BIT_6 uint(0x00040000) #define USABLE_BIT_7 uint(0x00080000) #define USABLE_BIT_8 uint(0x00100000) #define USABLE_BIT_9 uint(0x00200000) #define USABLE_BIT_10 uint(0x00400000) #define USABLE_BIT_11 uint(0x00800000) // At least two of those four bit flags must be mutually exclusive (i.e. all 4 bits must not be set together) #define USABLE_BIT_12 uint(0x01000000) // This is because setting 0x47800000 sets all 5 FP16 exponent bits to 1 which means infinity #define USABLE_BIT_13 uint(0x02000000) // This will be translated to a +/-MAX_FLOAT in the FP16 render target (0xFBFF/0x7BFF), overwriting the #define USABLE_BIT_14 uint(0x04000000) // mantissa bits where other bit flags are stored. #define USABLE_BIT_15 uint(0x80000000) #define MUST_BE_SET uint(0x40000000) // This flag *must* be stored in the floating-point representation of the bit flag to store /* float2 gbuf_pack_normal( float3 norm ) { float2 res; res = 0.5 * ( norm.xy + float2( 1, 1 ) ) ; res.x *= ( norm.z < 0 ? -1.0 : 1.0 ); return res; } float3 gbuf_unpack_normal( float2 norm ) { float3 res; res.xy = ( 2.0 * abs( norm ) ) - float2(1,1); res.z = ( norm.x < 0 ? -1.0 : 1.0 ) * sqrt( abs( 1 - res.x * res.x - res.y * res.y ) ); return res; } */ // Holger Gruen AMD - I change normal packing and unpacking to make sure N.z is accessible without ALU cost // this help the HDAO compute shader to run more efficiently float2 gbuf_pack_normal( float3 norm ) { float2 res; res.x = norm.z; res.y = 0.5f * ( norm.x + 1.0f ) ; res.y *= ( norm.y < 0.0f ? -1.0f : 1.0f ); return res; } float3 gbuf_unpack_normal( float2 norm ) { float3 res; res.z = norm.x; res.x = ( 2.0f * abs( norm.y ) ) - 1.0f; res.y = ( norm.y < 0 ? -1.0 : 1.0 ) * sqrt( abs( 1 - res.x * res.x - res.z * res.z ) ); return res; } float gbuf_pack_hemi_mtl( float hemi, float mtl ) { uint packed_mtl = uint( ( mtl / 1.333333333 ) * 31.0 ); // uint packed = ( MUST_BE_SET + ( uint( hemi * 255.0 ) << 13 ) + ( ( packed_mtl & uint( 31 ) ) << 21 ) ); // Clamp hemi max value uint packed = ( MUST_BE_SET + ( uint( saturate(hemi) * 255.9 ) << 13 ) + ( ( packed_mtl & uint( 31 ) ) << 21 ) ); if( ( packed & USABLE_BIT_13 ) == 0 ) packed |= USABLE_BIT_14; if( packed_mtl & uint( 16 ) ) packed |= USABLE_BIT_15; return asfloat( packed ); } float gbuf_unpack_hemi( float mtl_hemi ) { // return float( ( asuint( mtl_hemi ) >> 13 ) & uint(255) ) * (1.0/255.0); return float( ( asuint( mtl_hemi ) >> 13 ) & uint(255) ) * (1.0/254.8); } float gbuf_unpack_mtl( float mtl_hemi ) { uint packed = asuint( mtl_hemi ); uint packed_hemi = ( ( packed >> 21 ) & uint(15) ) + ( ( packed & USABLE_BIT_15 ) == 0 ? 0 : 16 ); return float( packed_hemi ) * (1.0/31.0) * 1.333333333; } #ifndef EXTEND_F_DEFFER f_deffer pack_gbuffer( float4 norm, float4 pos, float4 col ) #else f_deffer pack_gbuffer( float4 norm, float4 pos, float4 col, uint imask ) #endif { f_deffer res; #ifndef GBUFFER_OPTIMIZATION res.position = pos; res.Ne = norm; res.C = col; #else res.position = float4( gbuf_pack_normal( norm ), pos.z, gbuf_pack_hemi_mtl( norm.w, pos.w ) ); res.C = col; #endif #ifdef EXTEND_F_DEFFER res.mask = imask; #endif return res; } #ifdef GBUFFER_OPTIMIZATION gbuffer_data gbuffer_load_data( float2 tc : TEXCOORD, float2 pos2d, int iSample ) { gbuffer_data gbd; gbd.P = float3(0,0,0); gbd.hemi = 0; gbd.mtl = 0; gbd.C = 0; gbd.N = float3(0,0,0); #ifndef USE_MSAA float4 P = s_position.Sample( smp_nofilter, tc ); #else float4 P = s_position.Load( int3( pos2d, 0 ), iSample ); #endif // 3d view space pos reconstruction math // center of the plane (0,0) or (0.5,0.5) at distance 1 is eyepoint(0,0,0) + lookat (assuming |lookat| ==1 // left/right = (0,0,1) -/+ tan(fHorzFOV/2) * (1,0,0 ) // top/bottom = (0,0,1) +/- tan(fVertFOV/2) * (0,1,0 ) // lefttop = ( -tan(fHorzFOV/2), tan(fVertFOV/2), 1 ) // righttop = ( tan(fHorzFOV/2), tan(fVertFOV/2), 1 ) // leftbottom = ( -tan(fHorzFOV/2), -tan(fVertFOV/2), 1 ) // rightbottom = ( tan(fHorzFOV/2), -tan(fVertFOV/2), 1 ) gbd.P = float3( P.z * ( pos2d * pos_decompression_params.zw - pos_decompression_params.xy ), P.z ); // reconstruct N gbd.N = gbuf_unpack_normal( P.xy ); // reconstruct material gbd.mtl = gbuf_unpack_mtl( P.w ); // reconstruct hemi gbd.hemi = gbuf_unpack_hemi( P.w ); #ifndef USE_MSAA float4 C = s_diffuse.Sample( smp_nofilter, tc ); #else float4 C = s_diffuse.Load( int3( pos2d, 0 ), iSample ); #endif gbd.C = C.xyz; gbd.gloss = C.w; return gbd; } gbuffer_data gbuffer_load_data( float2 tc : TEXCOORD, float2 pos2d ) { return gbuffer_load_data( tc, pos2d, 0 ); } gbuffer_data gbuffer_load_data_offset( float2 tc : TEXCOORD, float2 OffsetTC : TEXCOORD, float2 pos2d ) { float2 delta = ( ( OffsetTC - tc ) * pos_decompression_params2.xy ); return gbuffer_load_data( OffsetTC, pos2d + delta, 0 ); } gbuffer_data gbuffer_load_data_offset( float2 tc : TEXCOORD, float2 OffsetTC : TEXCOORD, float2 pos2d, uint iSample ) { float2 delta = ( ( OffsetTC - tc ) * pos_decompression_params2.xy ); return gbuffer_load_data( OffsetTC, pos2d + delta, iSample ); } #else // GBUFFER_OPTIMIZATION gbuffer_data gbuffer_load_data( float2 tc : TEXCOORD, uint iSample ) { gbuffer_data gbd; #ifndef USE_MSAA float4 P = s_position.Sample( smp_nofilter, tc ); #else float4 P = s_position.Load( int3( tc * pos_decompression_params2.xy, 0 ), iSample ); #endif gbd.P = P.xyz; gbd.mtl = P.w; #ifndef USE_MSAA float4 N = s_normal.Sample( smp_nofilter, tc ); #else float4 N = s_normal.Load( int3( tc * pos_decompression_params2.xy, 0 ), iSample ); #endif gbd.N = N.xyz; gbd.hemi = N.w; #ifndef USE_MSAA float4 C = s_diffuse.Sample( smp_nofilter, tc ); #else float4 C = s_diffuse.Load( int3( tc * pos_decompression_params2.xy, 0 ), iSample ); #endif gbd.C = C.xyz; gbd.gloss = C.w; return gbd; } gbuffer_data gbuffer_load_data( float2 tc : TEXCOORD ) { return gbuffer_load_data( tc, 0 ); } gbuffer_data gbuffer_load_data_offset( float2 tc : TEXCOORD, float2 OffsetTC : TEXCOORD, uint iSample ) { return gbuffer_load_data( OffsetTC, iSample ); } #endif // GBUFFER_OPTIMIZATION ////////////////////////////////////////////////////////////////////////// // Aplha to coverage code #if ( defined( MSAA_ALPHATEST_DX10_1_ATOC ) || defined( MSAA_ALPHATEST_DX10_1 ) ) #if MSAA_SAMPLES == 2 uint alpha_to_coverage ( float alpha, float2 pos2d ) { uint mask; uint pos = uint(pos2d.x) | uint( pos2d.y); if( alpha < 0.3333 ) mask = 0; else if( alpha < 0.6666 ) mask = 1 << ( pos & 1 ); else mask = 3; return mask; } #endif #if MSAA_SAMPLES == 4 uint alpha_to_coverage ( float alpha, float2 pos2d ) { uint mask; float off = float( ( uint(pos2d.x) | uint( pos2d.y) ) & 3 ); alpha = saturate( alpha - off * ( ( 0.2 / 4.0 ) / 3.0 ) ); if( alpha < 0.40 ) { if( alpha < 0.20 ) mask = 0; else if( alpha < 0.40 ) // only one bit set mask = 1; } else { if( alpha < 0.60 ) // 2 bits set => 1100 0110 0011 1001 1010 0101 { mask = 3; } else if( alpha < 0.8 ) // 3 bits set => 1110 0111 1011 1101 mask = 7; else mask = 0xf; } return mask; } #endif #if MSAA_SAMPLES == 8 uint alpha_to_coverage ( float alpha, float2 pos2d ) { uint mask; float off = float( ( uint(pos2d.x) | uint( pos2d.y) ) & 3 ); alpha = saturate( alpha - off * ( ( 0.1111 / 8.0 ) / 3.0 ) ); if( alpha < 0.4444 ) { if( alpha < 0.2222 ) { if( alpha < 0.1111 ) mask = 0; else // only one bit set 0.2222 mask = 1; } else { if( alpha < 0.3333 ) // 2 bits set0=> 10000001 + 11000000 .. 00000011 : 8 // 0.2222 // set1=> 10100000 .. 00000101 + 10000010 + 01000001 : 8 // set2=> 10010000 .. 00001001 + 10000100 + 01000010 + 00100001 : 8 // set3=> 10001000 .. 00010001 + 10001000 + 01000100 + 00100010 + 00010001 : 8 { mask = 3; } else // 3 bits set0 => 11100000 .. 00000111 + 10000011 + 11000001 : 8 ? 0.4444 // 0.3333 // set1 => 10110000 .. 00001011 + 10000101 + 11000010 + 01100001: 8 // set2 => 11010000 .. 00001101 + 10000110 + 01000011 + 10100001: 8 // set3 => 10011000 .. 00010011 + 10001001 + 11000100 + 01100010 + 00110001 : 8 // set4 => 11001000 .. 00011001 + 10001100 + 01000110 + 00100011 + 10010001 : 8 { mask = 0x7; } } } else { if( alpha < 0.6666 ) { if( alpha < 0.5555 ) // 4 bits set0 => 11110000 .. 00001111 + 10000111 + 11000011 + 11100001 : 8 // 0.5555 // set1 => 11011000 .. 00011011 + 10001101 + 11000110 + 01100011 + 10110001 : 8 // set2 => 11001100 .. 00110011 + 10011001 : 4 make 8 // set3 => 11000110 + 01100011 + 10110001 + 11011000 + 01101100 + 00110110 + 00011011 + 10001101 : 8 // set4 => 10111000 .. 00010111 + 10001011 + 11000101 + 11100010 + 01110001 : 8 // set5 => 10011100 .. 00100111 + 10010011 + 11001001 + 11100100 + 01110010 + 00111001 : 8 // set6 => 10101010 .. 01010101 : 2 make 8 // set7 => 10110100 + 01011010 + 00101101 + 10010110 + 01001011 + 10100101 + 11010010 + 01101001 : 8 // set8 => 10011010 + 01001101 + 10100110 + 01010011 + 10101001 + 11010100 + 01101010 + 00110101 : 8 { mask = 0xf; } else // 5 bits set0 => 11111000 01111100 00111110 00011111 10001111 11000111 11100011 11110001 : 8 // 0.6666 // set1 => 10111100 : 8 // set2 => 10011110 : 8 // set3 => 11011100 : 8 // set4 => 11001110 : 8 // set5 => 11011010 : 8 // set6 => 10110110 : 8 { mask = 0x1F; } } else { if( alpha < 0.7777 ) // 6 bits set0 => 11111100 01111110 00111111 10011111 11001111 11100111 11110011 11111001 : 8 // set1 => 10111110 : 8 // set2 => 11011110 : 8 { mask = 0x3F; } else if( alpha < 0.8888 ) // 7 bits set0 => 11111110 :8 { mask = 0x7F; } else // all 8 bits set mask = 0xFF; } } return mask; } #endif #endif #endif // common_functions_h_included