add game&rawdata

This commit is contained in:
Vasily Petrov 2026-06-17 23:06:51 +03:00
parent 0133cd976c
commit 49b34b5546
45731 changed files with 709831 additions and 0 deletions

View file

@ -0,0 +1,100 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_0 /Emain /Zpr /Fctest\p20_accum_sun_far.ps
// accum_sun_far.ps
//
//
// Parameters:
//
// float4 Ldynamic_color;
// float4 Ldynamic_dir;
// row_major float4x4 m_shadow;
// row_major float3x4 m_sunmask;
// sampler2D s_accumulator;
// sampler2D s_lmap;
// sampler3D s_material;
// sampler2D s_normal;
// sampler2D s_position;
// sampler2D s_smap;
//
//
// Registers:
//
// Name Reg Size
// -------------- ----- ----
// m_shadow c0 4
// m_sunmask c4 2
// Ldynamic_color c6 1
// Ldynamic_dir c7 1
// s_smap s0 1
// s_position s1 1
// s_normal s2 1
// s_lmap s3 1
// s_material s4 1
// s_accumulator s5 1
//
ps_2_0
def c8, 1, -0.000244140625, 0.000244140625, 0
def c9, -0.000244140625, 0.000244140625, 2048, 0.00555555569
def c10, 0.333000004, 0, 0, 0
dcl t0.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
dcl_2d s3
dcl_volume s4
dcl_2d s5
texld r0, t0, s1
texld_pp r1, t0, s2
mov r3.w, c8.x
mov r3.xyz, r0
dp4 r1.w, c3, r3
dp4 r0.x, c0, r3
rcp r1.w, r1.w
dp4 r0.y, c1, r3
mad r7.xy, r0, r1.w, c9
mad r6.xy, r0, r1.w, -c9
mad r5.xy, r0, r1.w, c8.z
mad r8.xy, r0, r1.w, c8.y
dp4 r9.x, c4, r3
dp3_pp r0.x, r3, r3
dp4 r9.y, c5, r3
rsq_pp r1.w, r0.x
mad_pp r4.xyz, r3, -r1.w, -c7
mov_pp r0.z, r0.w
nrm_pp r2.xyz, r4
dp3_pp r0.y, r2, r1
dp3_pp r0.x, -c7, r1
texld r7, r7, s0
texld r6, r6, s0
texld r5, r5, s0
texld r4, r8, s0
texld_pp r2, r9, s3
texld_pp r0, r0, s4
texld_pp r1, t0, s5
dp4 r3.w, c2, r3
add r4.y, -r7.x, r3.w
add r4.z, -r6.x, r3.w
mul r2.xy, r8, c9.z
add r4.w, -r5.x, r3.w
add r4.x, -r4.x, r3.w
cmp_pp r4, -r4, c8.x, c8.w
frc r2.xy, r2
add_pp r3.xy, -r2, c8.x
mul_pp r5.y, r2.y, r3.x
mul_pp r5.z, r2.x, r3.y
mul_pp r5.x, r3.y, r3.x
mul_pp r5.w, r2.y, r2.x
dp4_pp r3.w, r4, r5
mad_pp r5.w, r2.w, -r3.w, c10.x
mul_sat r4.w, r3.z, c9.w
mul r4.w, r4.w, r4.w
mul r4.w, r5.w, r4.w
mad_pp r2.w, r2.w, r3.w, r4.w
mul_pp r0, r0, c6
mad_pp r0, r0, r2.w, r1
mov_pp oC0, r0
// approximately 52 instruction slots used (9 texture, 43 arithmetic)

View file

@ -0,0 +1,41 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_0 /Emain /Zpr /Fctest\p20_clouds.ps clouds.ps
//
//
// Parameters:
//
// sampler2D s_clouds0;
// sampler2D s_clouds1;
// sampler2D s_tonemap;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// s_clouds0 s0 1
// s_clouds1 s1 1
// s_tonemap s2 1
//
ps_2_0
def c0, 0.5, 0, 0, 0
dcl v0
dcl_pp t0.xy
dcl_pp t1.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
texld_pp r0, t0, s0
texld_pp r1, t1, s1
add_pp r0.xyz, r0, r1
mul_pp r1.xyz, r0, v0
mov_pp r0.xy, c0.x
texld_pp r0, r0, s2
mul_pp r0.xyz, r1, r0.x
mov_pp r0.w, v0.w
mov_pp oC0, r0
// approximately 9 instruction slots used (3 texture, 6 arithmetic)

View file

@ -0,0 +1,38 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_0 /Emain /Zpr /Fctest\p20_deffer_impl_flat.ps
// deffer_impl_flat.ps
//
//
// Parameters:
//
// float4 L_material;
// sampler2D s_base;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// L_material c0 1
// s_base s0 1
//
ps_2_0
def c1, 0.0250000004, 0.00784313772, 0, 0
dcl t0.xy
dcl t1.xyz
dcl_pp t2.xyz
dcl_2d s0
texld_pp r0, t0, s0
nrm_pp r1.xyz, t2
mov_pp r2.w, c0.w
mad_pp r2.xyz, r1, c1.x, t1
mov_pp oC0, r2
mov_pp r1.w, r0.w
mov_pp oC1, r1
mov r0.w, c1.y
mov_pp oC2, r0
// approximately 11 instruction slots used (1 texture, 10 arithmetic)

View file

@ -0,0 +1,102 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_0 /Emain /Zpr /Fctest\p20_deffer_impl_flat_d.ps
// deffer_impl_flat_d.ps
//
//
// Parameters:
//
// float4 L_material;
// sampler2D s_base;
// sampler2D s_dn_a;
// sampler2D s_dn_b;
// sampler2D s_dn_g;
// sampler2D s_dn_r;
// sampler2D s_dt_a;
// sampler2D s_dt_b;
// sampler2D s_dt_g;
// sampler2D s_dt_r;
// sampler2D s_mask;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// L_material c0 1
// s_base s0 1
// s_mask s1 1
// s_dt_r s2 1
// s_dt_g s3 1
// s_dt_b s4 1
// s_dt_a s5 1
// s_dn_r s6 1
// s_dn_g s7 1
// s_dn_b s8 1
// s_dn_a s9 1
//
ps_2_0
def c1, 1, -0.5, 0.5, 0.0250000004
def c2, 1, 1, 0.5, 0.00784313772
dcl t0.xy
dcl t1.xyz
dcl_pp t2.xyz
dcl_pp t3.xyz
dcl_pp t4.xyz
dcl t5.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
dcl_2d s3
dcl_2d s4
dcl_2d s5
dcl_2d s6
dcl_2d s7
dcl_2d s8
dcl_2d s9
texld_pp r9, t5, s9
texld_pp r8, t5, s8
texld_pp r7, t5, s6
texld_pp r6, t5, s7
texld_pp r2, t0, s1
texld_pp r0, t0, s0
texld_pp r5, t5, s3
texld_pp r4, t5, s2
texld_pp r3, t5, s4
texld_pp r1, t5, s5
add_pp r9.xyz, r9.wzyx, c1.y
add_pp r8.xyz, r8.wzyx, c1.y
add_pp r7.xyz, r7.wzyx, c1.y
dp4_pp r1.w, r2, c1.x
add_pp r6.xyz, r6.wzyx, c1.y
rcp_pp r1.w, r1.w
mul_pp r2, r2, r1.w
mul_pp r6.xyz, r6, r2.y
mad_pp r6.xyz, r7, r2.x, r6
mad_pp r6.xyz, r8, r2.z, r6
mad_pp r6.xyz, r9, r2.w, r6
mul_pp r6.w, r6.z, c1.z
mul r1.w, r6.x, t2.x
mad r1.w, t2.y, r6.y, r1.w
mad_pp r7.x, t2.z, r6.w, r1.w
mul_pp r6.xyz, r6, c2
dp3_pp r7.y, t3, r6
dp3_pp r7.z, t4, r6
nrm_pp r6.xyz, r7
mad_pp r7.xyz, r6, c1.w, t1
mov_pp r7.w, c0.w
mov_pp oC0, r7
mov_pp r6.w, r0.w
mov_pp oC1, r6
mul_pp r5.xyz, r5, r2.y
mad_pp r4.xyz, r4, r2.x, r5
mad_pp r2.xyz, r3, r2.z, r4
mad_pp r1.xyz, r1, r2.w, r2
mul_pp r0.xyz, r0, r1
add_pp r0.xyz, r0, r0
mov r0.w, c2.w
mov_pp oC2, r0
// approximately 44 instruction slots used (10 texture, 34 arithmetic)

View file

@ -0,0 +1,45 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_0 /Emain /Zpr /Fctest\p20_sky2.ps sky2.ps
//
//
// Parameters:
//
// samplerCUBE s_sky0;
// samplerCUBE s_sky1;
// sampler2D s_tonemap;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// s_sky0 s0 1
// s_sky1 s1 1
// s_tonemap s2 1
//
ps_2_0
def c0, 0.5, 0, 0, 0
def c1, 0.333333343, 0.333333343, 0.333333343, 1
dcl v0
dcl_pp t0.xyz
dcl_pp t1.xyz
dcl_cube s0
dcl_cube s1
dcl_2d s2
texld_pp r1, t0, s0
texld_pp r0, t1, s1
lrp_pp r2.xyz, v0.w, r0, r1
mul_pp r1.xyz, r2, v0
mov_pp r0.xy, c0.x
texld_pp r0, r0, s2
add_pp r0.w, r0.x, r0.x
mul_pp r0.xyz, r1, r0.w
mov r0.w, c0.y
mov_pp oC0, r0
mul r0, r0, c1
mov_pp oC1, r0
// approximately 13 instruction slots used (3 texture, 10 arithmetic)

View file

@ -0,0 +1,35 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_0 /Emain /Zpr /Fctest\p20_yuv2rgb.ps yuv2rgb.ps
//
//
// Parameters:
//
// sampler2D s_base;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// s_base s1 1
//
ps_2_0
def c0, -0.869610012, 0.53075999, -1.07860005, 0
def c1, 1.16406, 1, 0, 0
def c2, 1.59765005, -0.8125, 0, 0
def c3, -0.390625, 2.01561999, 0, 0
dcl t0.xy
dcl_2d s1
texld_pp r0, t0, s1
mul_pp r1.x, r0.x, c1.x
mad_pp r1.yz, r0.y, c3.zxyw, r1.x
mad_pp r0.xy, r0.z, c2, r1
mov_pp r0.z, r1.z
add_pp r0.xyz, r0, c0
mov r0.w, c1.y
mov_pp oC0, r0
// approximately 8 instruction slots used (1 texture, 7 arithmetic)

View file

@ -0,0 +1,100 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_b /Emain /Zpr /Fctest\p2b_accum_sun_far.ps
// accum_sun_far.ps
//
//
// Parameters:
//
// float4 Ldynamic_color;
// float4 Ldynamic_dir;
// row_major float4x4 m_shadow;
// row_major float3x4 m_sunmask;
// sampler2D s_accumulator;
// sampler2D s_lmap;
// sampler3D s_material;
// sampler2D s_normal;
// sampler2D s_position;
// sampler2D s_smap;
//
//
// Registers:
//
// Name Reg Size
// -------------- ----- ----
// m_shadow c0 4
// m_sunmask c4 2
// Ldynamic_color c6 1
// Ldynamic_dir c7 1
// s_smap s0 1
// s_position s1 1
// s_normal s2 1
// s_lmap s3 1
// s_material s4 1
// s_accumulator s5 1
//
ps_2_x
def c8, 1, -0.000244140625, 0.000244140625, 0
def c9, -0.000244140625, 0.000244140625, 2048, 0.00555555569
def c10, 0.333000004, 0, 0, 0
dcl t0.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
dcl_2d s3
dcl_volume s4
dcl_2d s5
texld r0, t0, s1
texld_pp r1, t0, s2
mov r3.w, c8.x
mov r3.xyz, r0
dp4 r1.w, c3, r3
dp4 r0.x, c0, r3
rcp r1.w, r1.w
dp4 r0.y, c1, r3
mad r7.xy, r0, r1.w, c9
mad r6.xy, r0, r1.w, -c9
mad r5.xy, r0, r1.w, c8.z
mad r8.xy, r0, r1.w, c8.y
dp4 r9.x, c4, r3
dp3_pp r0.x, r3, r3
dp4 r9.y, c5, r3
rsq_pp r1.w, r0.x
mad_pp r4.xyz, r3, -r1.w, -c7
mov_pp r0.z, r0.w
nrm_pp r2.xyz, r4
dp3_pp r0.y, r2, r1
dp3_pp r0.x, -c7, r1
texld r7, r7, s0
texld r6, r6, s0
texld r5, r5, s0
texld r4, r8, s0
texld_pp r2, r9, s3
texld_pp r0, r0, s4
texld_pp r1, t0, s5
dp4 r3.w, c2, r3
add r4.y, -r7.x, r3.w
add r4.z, -r6.x, r3.w
mul r2.xy, r8, c9.z
add r4.w, -r5.x, r3.w
add r4.x, -r4.x, r3.w
cmp_pp r4, -r4, c8.x, c8.w
frc r2.xy, r2
add_pp r3.xy, -r2, c8.x
mul_pp r5.y, r2.y, r3.x
mul_pp r5.z, r2.x, r3.y
mul_pp r5.x, r3.y, r3.x
mul_pp r5.w, r2.y, r2.x
dp4_pp r3.w, r4, r5
mad_pp r5.w, r2.w, -r3.w, c10.x
mul_sat r4.w, r3.z, c9.w
mul r4.w, r4.w, r4.w
mul r4.w, r5.w, r4.w
mad_pp r2.w, r2.w, r3.w, r4.w
mul_pp r0, r0, c6
mad_pp r0, r0, r2.w, r1
mov_pp oC0, r0
// approximately 52 instruction slots used (9 texture, 43 arithmetic)

View file

@ -0,0 +1,174 @@
--------------------------------------------------------------------------------
Running performance on file test\p2B_accum_sun_far.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 0: TEXr r0, f[TEX0], TEX1;
| TEX | tex | 0: TEXr r0, f[TEX0], TEX1;
| SCB0 | mov | 1: MOVr r3.xyz, r0;
| SCB1 | mov | 2: MOVr r3.w, const.---x;
| | |
2 | SCB0 | dp4 | 4: DP4r r1.x, const, r3;
| SCB1 | nop | 4: DP4r r1.x, const, r3;
| | |
3 | SCT1 | div | 6: RCPr r1.y, r1;
| SCB0 | dp4 | 7: DP4r r2.z, const, r3;
| SCB1 | nop | 7: DP4r r2.z, const, r3;
| | |
4 | SCB0 | dp4 | 9: DP4r r2.w, const, r3;
| SCB1 | nop | 9: DP4r r2.w, const, r3;
| | |
5 | SCB0 | mad | 13: MADr r2.xy, r2.zw--, r1.yy--, const.xy--;
| SCB1 | mad | 11: MADr r1.zw, r2, r1.--yy, const.--xx;
| | |
6 | SCT0 | mov | 15: TEXr r1.x, r1.zwzz, TEX0;
| TEX | tex | 15: TEXr r1.x, r1.zwzz, TEX0;
| SCB1 | mad | 16: MADr r1.zw, r2, r1.--yy, const.--xy;
| | |
7 | SCT0 | mov | 18: TEXr r4.x, r1.zwzz, TEX0;
| TEX | tex | 18: TEXr r4.x, r1.zwzz, TEX0;
| SCB0 | dp4 | 19: DP4r r1.z, const, r3;
| SCB1 | nop | 19: DP4r r1.z, const, r3;
| | |
8 | SCT0 | mov | 21: TEXr r2.x, r2, TEX0;
| TEX | tex | 21: TEXr r2.x, r2, TEX0;
| SCB0 | dp4 | 22: DP4r r1.w, const, r3;
| SCB1 | nop | 22: DP4r r1.w, const, r3;
| | |
9 | SCB0 | dp4 | 24: DP4r r4.z, const, r3;
| SCB1 | nop | 24: DP4r r4.z, const, r3;
| | |
10 | SCT1 | mov | 26: MOVr r3.z, r0.--w-;
| SCB0 | add | 27: ADDr h6.y,-r4.-x--, r4.-z--;
| SCB1 | add | 28: ADDr h6.w,-r1.---x, r4.---z;
| | |
11 | SCT0 | mov | 29: TEXh h1.w, r1.zwzz, TEX3;
| TEX | tex | 29: TEXh h1.w, r1.zwzz, TEX3;
| SCB0 | dp3 | 30: DP3h h4.z, r0, r0;
| | |
12 | SCB0 | mad | 31: MADr r4.xy, r2.zw--, r1.yy--, const.xx--;
| SCB1 | lg2 | 33: LG2h/2 h4.w, |h4.zzzz|;
| | |
13 | SCB0 | add | 35: ADDr h6.z,-r2.--x-, r4;
| SCB1 | ex2 | 34: EX2h h4.w,-h4.wwww;
| | |
14 | SCT0 | mov | 36: TEXr r1.x, r4, TEX0;
| TEX | tex | 36: TEXr r1.x, r4, TEX0;
| SCB0 | add | 37: ADDr h6.x,-r1, r4.z---;
| | |
15 | SCT0 | div | 38: TEXh h4.xyz, f[TEX0], TEX2;
| TEX | tex | 38: TEXh h4.xyz, f[TEX0], TEX2;
| SCB0/1 | mul | 39: MOVrc0 hc,-h6;
| | |
16 | SCT0 | mul | 40: MULr r1.xy, r4, const.xx--;
| SCB0 | dp3 | 42: DP3h r3.x,-const, h4;
| | |
17 | SCB0 | frc | 44: FRCr h5.xy, r1;
| | |
18 | SCT0/1 | mul | 45: MOVh h2, const.xxxx;
| SCB0 | mad | 47: MADh h0.xyz, r0,-h4.www-,-const;
| | |
19 | SCT1 | mov | 49: NRMh h0.xyz, h0;
| SRB | nrm | 49: NRMh h0.xyz, h0;
| SCB0 | dp3 | 50: DP3h r3.y, h0, h4;
| | |
20 | SCT0/1 | mul | 51: MOVh h2(LT0.xyzw), const.xxxx;
| SCB1 | add | 53: ADDh h5.zw,-h5.--xy, const.--xx;
| | |
21 | SCT0 | mul | 56: MULh h4.yz, h5.-yx-, h5.-zw-;
| SCT1 | mul | 55: MULh h4.w, h5.---y, h5.---x;
| SCB0 | mul | 57: MULh h4.x, h5.w---, h5.z---;
| SCB1 | mul | 58: MULr_s r2.w, r0.---z, const.---x;
| | |
22 | SCB0 | dp4 | 60: DP4h h1.z, h2, h4;
| SCB1 | nop | 60: DP4h h1.z, h2, h4;
| | |
23 | SCT0 | mul | 61: MULr r2.x, r2.w---, r2.w---;
| SCB1 | mad | 62: MADh h4.w, h1,-h1.---z, const.---x;
| | |
24 | SCT0 | mul | 64: MULr h1.x, h4.w---, r2;
| SCB0 | mad | 65: MADh h2.x, h1.w---, h1.z---, h1;
| | |
25 | SCT0 | mov | 66: TEXh h0, r3, TEX4;
| TEX | tex | 66: TEXh h0, r3, TEX4;
| SCB0/1 | mul | 67: MULh h0, h0, const;
| | |
26 | SCT0 | div | 69: TEXh h1, f[TEX0], TEX5;
| TEX | tex | 69: TEXh h1, f[TEX0], TEX5;
| SCB0/1 | mad | 70: MADh h0, h0, h2.xxxx, h1;
Pass SCT TEX SCB
1: 50% 100% 100%
2: 0% 0% 100%
3: 25% 0% 100%
4: 0% 0% 100%
5: 0% 0% 100%
6: 0% 100% 50%
7: 0% 100% 100%
8: 0% 100% 100%
9: 0% 0% 100%
10: 25% 0% 50%
11: 0% 100% 75%
12: 0% 0% 75%
13: 0% 0% 50%
14: 0% 100% 25%
15: 50% 100% 100%
16: 50% 0% 75%
17: 0% 0% 50%
18: 100% 0% 75%
19: 0% 0% 75%
20: 100% 0% 50%
21: 75% 0% 50%
22: 0% 0% 100%
23: 25% 0% 25%
24: 25% 0% 25%
25: 0% 100% 100%
26: 50% 100% 100%
MEAN: 22% 34% 75%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 0% 100% 100% 100%
2: 0% 0% 0% 100% 100%
3: 0% 100% 0% 100% 100%
4: 0% 0% 0% 100% 100%
5: 0% 0% 0% 100% 100%
6: 0% 0% 100% 0% 100%
7: 0% 0% 100% 100% 100%
8: 0% 0% 100% 100% 100%
9: 0% 0% 0% 100% 100%
10: 0% 100% 0% 100% 100%
11: 0% 0% 100% 100% 0%
12: 0% 0% 0% 100% 100%
13: 0% 0% 0% 100% 100%
14: 0% 0% 100% 100% 0%
15: 100% 0% 100% 100% 100%
16: 100% 0% 0% 100% 0%
17: 0% 0% 0% 100% 0%
18: 100% 100% 0% 100% 0%
19: 0% 0% 0% 100% 0%
20: 100% 100% 0% 0% 100%
21: 100% 100% 0% 100% 100%
22: 0% 0% 0% 100% 100%
23: 100% 0% 0% 0% 100%
24: 100% 0% 0% 100% 0%
25: 0% 0% 100% 100% 100%
26: 100% 0% 100% 100% 100%
MEAN: 34% 19% 34% 88% 73%
Cycles: 28.25 :: R Regs Used: 5 :: R Regs Max Index (0 based): 4
--------------------------------------------------------------------------------
Running performance on file test\p2b_accum_sun_far.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 23.00 :: R Regs Used: 4 :: R Regs Max Index (0 based): 3
Pixel throughput (assuming 1 cycle texture lookup) 278.26 MP/s
--------------------------------------------------------------------------------
Running performance on file test\p2b_accum_sun_far.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 21.00 :: R Regs Used: 4 :: R Regs Max Index (0 based): 3
Pixel throughput (assuming 1 cycle texture lookup) 457.14 MP/s

View file

@ -0,0 +1,164 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_b /Emain /Zpr /Fctest\p2b_accum_sun_near.ps
// accum_sun_near.ps
//
//
// Parameters:
//
// float4 Ldynamic_color;
// float4 Ldynamic_dir;
// row_major float4x4 m_shadow;
// row_major float3x4 m_sunmask;
// sampler2D s_accumulator;
// sampler2D s_lmap;
// sampler3D s_material;
// sampler2D s_normal;
// sampler2D s_position;
// sampler2D s_smap;
//
//
// Registers:
//
// Name Reg Size
// -------------- ----- ----
// m_shadow c0 4
// m_sunmask c4 2
// Ldynamic_color c6 1
// Ldynamic_dir c7 1
// s_position s0 1
// s_normal s1 1
// s_smap s2 1
// s_lmap s3 1
// s_material s4 1
// s_accumulator s5 1
//
ps_2_x
def c8, 1, -0.000537109387, -4.88281257e-005, 0
def c9, -0.000537109387, -4.88281257e-005, 2048, 0.25
def c10, 4.88281257e-005, -0.000537109387, 0, 0
def c11, 0.000537109387, -0.000537109387, 0, 0
def c12, -4.88281257e-005, -0.000537109387, 0, 0
def c13, 4.88281257e-005, -4.88281257e-005, 0, 0
def c14, 0.000537109387, -4.88281257e-005, 0, 0
dcl t0.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
dcl_2d s3
dcl_volume s4
dcl_2d s5
texld r0, t0, s0
texld_pp r1, t0, s1
mov r19.xyz, r0
mov r19.w, c8.x
dp4 r1.w, c3, r19
dp4 r0.x, c0, r19
rcp r1.w, r1.w
dp4 r0.y, c1, r19
mad r14.xy, r0, r1.w, c9
mad r13.xy, r0, r1.w, c12
mad r3.xy, r0, r1.w, c8.z
mad r23.xy, r0, r1.w, c8.y
mad r12.xy, r0, r1.w, c13
mad r11.xy, r0, r1.w, c11
mad r10.xy, r0, r1.w, c14
mad r22.xy, r0, r1.w, c10
mad r9.xy, r0, r1.w, -c11
mad r8.xy, r0, r1.w, -c13
mad r7.xy, r0, r1.w, -c10
mad r21.xy, r0, r1.w, -c14
mad r6.xy, r0, r1.w, -c12
mad r5.xy, r0, r1.w, -c9
mad r4.xy, r0, r1.w, -c8.y
mad r20.xy, r0, r1.w, -c8.z
dp4 r24.x, c4, r19
dp4 r24.y, c5, r19
nrm_pp r2.xyz, r19
dp3_pp r0.x, r2, r1
mov_pp r0.z, r0.w
add_pp r0.w, r0.x, r0.x
mad_pp r2.xyz, r1, -r0.w, r2
dp3_pp r0.x, -c7, r1
dp3_pp r0.y, -c7, r2
texld r18, r14, s2
texld r17, r13, s2
texld r16, r3, s2
texld r3, r23, s2
texld r15, r12, s2
texld r14, r11, s2
texld r13, r10, s2
texld r12, r22, s2
texld r11, r9, s2
texld r10, r8, s2
texld r9, r7, s2
texld r8, r21, s2
texld r7, r6, s2
texld r6, r5, s2
texld r5, r4, s2
texld r4, r20, s2
texld_pp r2, r24, s3
texld_pp r0, r0, s4
texld_pp r1, t0, s5
dp4 r5.w, c2, r19
add r3.y, -r18.x, r5.w
add r3.z, -r17.x, r5.w
mul r2.xy, r23, c9.z
add r3.w, -r16.x, r5.w
add r3.x, -r3.x, r5.w
cmp_pp r3, -r3, c8.x, c8.w
frc r17.xy, r2
add_pp r2.xy, -r17, c8.x
mul_pp r16.y, r17.y, r2.x
mul_pp r16.z, r17.x, r2.y
mul_pp r16.w, r17.y, r17.x
mul_pp r16.x, r2.y, r2.x
dp4_pp r3.x, r3, r16
add r12.y, -r15.x, r5.w
add r12.z, -r14.x, r5.w
mul r2.xy, r22, c9.z
add r12.w, -r13.x, r5.w
add r12.x, -r12.x, r5.w
cmp_pp r12, -r12, c8.x, c8.w
frc r14.xy, r2
add_pp r2.xy, -r14, c8.x
mul_pp r13.y, r14.y, r2.x
mul_pp r13.z, r14.x, r2.y
mul_pp r13.w, r14.y, r14.x
mul_pp r13.x, r2.y, r2.x
dp4_pp r3.y, r12, r13
add r8.y, -r11.x, r5.w
add r8.z, -r10.x, r5.w
mul r2.xy, r21, c9.z
add r8.w, -r9.x, r5.w
add r8.x, -r8.x, r5.w
cmp_pp r8, -r8, c8.x, c8.w
frc r2.xy, r2
add_pp r10.xy, -r2, c8.x
mul_pp r9.y, r2.y, r10.x
mul_pp r9.z, r2.x, r10.y
mul_pp r9.x, r10.y, r10.x
mul_pp r9.w, r2.y, r2.x
dp4_pp r3.z, r8, r9
add r4.y, -r7.x, r5.w
add r4.z, -r6.x, r5.w
mul r2.xy, r20, c9.z
add r4.w, -r5.x, r5.w
add r4.x, -r4.x, r5.w
cmp_pp r4, -r4, c8.x, c8.w
frc r2.xy, r2
add_pp r6.xy, -r2, c8.x
mul_pp r5.y, r2.y, r6.x
mul_pp r5.z, r2.x, r6.y
mul_pp r5.x, r6.y, r6.x
mul_pp r5.w, r2.y, r2.x
dp4_pp r3.w, r4, r5
dp4_pp r3.w, r3, c9.w
mul_pp r2.w, r2.w, r3.w
mul_pp r0, r0, c6
mad_pp r0, r0, r2.w, r1
mov_pp oC0, r0
// approximately 112 instruction slots used (21 texture, 91 arithmetic)

View file

@ -0,0 +1,361 @@
--------------------------------------------------------------------------------
Running performance on file test\p2B_accum_sun_near.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 0: TEXr r0, f[TEX0], TEX0;
| TEX | tex | 0: TEXr r0, f[TEX0], TEX0;
| SCB0 | mov | 1: MOVr r1.xyz, r0;
| SCB1 | mov | 2: MOVr r1.w, const.---x;
| | |
2 | SCT1 | mov | 4: NRMh h0.xyz, r0;
| SRB | nrm | 4: NRMh h0.xyz, r0;
| SCB0 | dp4 | 5: DP4r r2.x, const, r1;
| SCB1 | nop | 5: DP4r r2.x, const, r1;
| | |
3 | SCB0 | dp4 | 7: DP4r r4.x, const, r1;
| SCB1 | nop | 7: DP4r r4.x, const, r1;
| | |
4 | SCB0 | dp4 | 9: DP4r r4.y, const, r1;
| SCB1 | nop | 9: DP4r r4.y, const, r1;
| | |
5 | SCT1 | div | 11: RCPr r6.z, r2;
| SCB0 | dp4 | 12: DP4r r6.y, const, r1;
| SCB1 | nop | 12: DP4r r6.y, const, r1;
| | |
6 | SCB0 | mad | 14: MADr r2.xy, r4, r6.zz--, const.xy--;
| SCB1 | mad | 16: MADr r4.zw, r4.--xy, r6.--zz, const.--yx;
| | |
8 | SCT0 | mov | 18: TEXr r6.x, r2, TEX2;
| TEX | tex | 18: TEXr r6.x, r2, TEX2;
| SCB0 | add | 19: ADDr h6.y,-r6.-x--, r6;
| SCB1 | mad | 20: MADr r3.zw, r4.--xy, r6.--zz, const.--xx;
| | |
9 | SCT0 | mov | 22: TEXr r2.x, r4.zwzz, TEX2;
| TEX | tex | 22: TEXr r2.x, r4.zwzz, TEX2;
| SCB1 | mad | 23: MADr r2.zw, r4.--xy, r6.--zz, const.--xx;
| | |
10 | SCB1 | add | 25: ADDr h6.z,-r2.--x-, r6.--y-;
| | |
11 | SCT0 | mov | 26: TEXr r2.x, r2.zwzz, TEX2;
| TEX | tex | 26: TEXr r2.x, r2.zwzz, TEX2;
| SCB1 | add | 27: ADDr h6.w,-r2.---x, r6.---y;
| | |
12 | SCT0 | mov | 28: TEXr r2.x, r3.zwzz, TEX2;
| TEX | tex | 28: TEXr r2.x, r3.zwzz, TEX2;
| SCB0 | add | 29: ADDr h6.x,-r2, r6.y---;
| | |
13 | SCT0/1 | mul | 30: MOVrc0 hc,-h6;
| SCB1 | mad | 31: MADr r4.zw, r4.--xy, r6.--zz, const.--xy;
| | |
14 | SCT0 | mov | 33: TEXr r3.x, r4.zwzz, TEX2;
| TEX | tex | 33: TEXr r3.x, r4.zwzz, TEX2;
| SCB1 | mad | 34: MADr r4.zw, r4.--xy, r6.--zz, const.--xy;
| | |
15 | SCT0/1 | mul | 36: MOVh h4, const.xxxx;
| SCB0 | add | 38: ADDr h5.y,-r3.-x--, r6;
| | |
16 | SCT0/1 | mul | 39: MOVh h4(LT0.xyzw), const.xxxx;
| SCB1 | mad | 41: MADr r5.zw, r4.--xy, r6.--zz, const.--xy;
| | |
17 | SCT0 | mov | 43: TEXr r3.x, r4.zwzz, TEX2;
| TEX | tex | 43: TEXr r3.x, r4.zwzz, TEX2;
| SCB1 | mad | 44: MADr r4.zw, r4.--xy, r6.--zz, const.--xy;
| | |
18 | SCB1 | add | 46: ADDr h5.z,-r3.--x-, r6.--y-;
| | |
19 | SCB0/1 | mad | 47: MADr r6.xw, r4.x--y, r6.z--z, const.x--y;
| | |
20 | SCT0 | mov | 49: TEXr r3.x, r4.zwzz, TEX2;
| TEX | tex | 49: TEXr r3.x, r4.zwzz, TEX2;
| SCB1 | add | 50: ADDr h5.w,-r3.---x, r6.---y;
| | |
21 | SCT0 | mov | 51: TEXr r6.x, r6.xwyy, TEX2;
| TEX | tex | 51: TEXr r6.x, r6.xwyy, TEX2;
| SCB1 | mad | 52: MADr r4.zw, r4.--xy, r6.--zz, const.--xy;
| | |
22 | SCT0 | mov | 54: TEXr r3.x, r4.zwzz, TEX2;
| TEX | tex | 54: TEXr r3.x, r4.zwzz, TEX2;
| SCB0 | add | 55: ADDr h5.x,-r3, r6.y---;
| | |
23 | SCT0/1 | mul | 56: MOVrc0 hc,-h5;
| SCB0 | mad | 57: MADr r3.xy, r4, r6.zz--, const.xy--;
| | |
24 | SCT0/1 | mul | 59: MOVh h5, const.xxxx;
| SCB0/1 | mul | 61: MOVh h5(LT0.xyzw), const.xxxx;
| | |
25 | SCT0 | mov | 63: TEXr r3.x, r3, TEX2;
| TEX | tex | 63: TEXr r3.x, r3, TEX2;
| SCB0 | add | 64: ADDr h6.y,-r3.-x--, r6;
| | |
26 | SCT0 | mov | 65: TEXr r5.x, r5.zwzz, TEX2;
| TEX | tex | 65: TEXr r5.x, r5.zwzz, TEX2;
| SCB1 | mad | 66: MADr r5.zw, r4.--xy, r6.--zz, const.--xy;
| | |
27 | SCB1 | add | 68: ADDr h6.z,-r5.--x-, r6.--y-;
| | |
28 | SCT0 | mov | 69: TEXr r5.x, r5.zwzz, TEX2;
| TEX | tex | 69: TEXr r5.x, r5.zwzz, TEX2;
| SCB1 | mad | 70: MADr r5.zw, r4.--xy, r6.--zz, const.--xy;
| | |
29 | SCB1 | add | 72: ADDr h6.w,-r5.---x, r6.---y;
| | |
30 | SCT0 | mov | 73: TEXr r5.x, r5.zwzz, TEX2;
| TEX | tex | 73: TEXr r5.x, r5.zwzz, TEX2;
| SCB0 | add | 74: ADDr h6.x,-r5, r6.y---;
| | |
31 | SCT0/1 | mul | 75: MOVrc0 hc,-h6;
| SCB0 | mad | 76: MADr r5.xy, r4, r6.zz--, const.xy--;
| | |
32 | SCT0/1 | mul | 78: MOVh h6, const.xxxx;
| SCB0/1 | mul | 80: MOVh h6(LT0.xyzw), const.xxxx;
| | |
33 | SCT0 | mov | 82: TEXr r5.x, r5, TEX2;
| TEX | tex | 82: TEXr r5.x, r5, TEX2;
| SCB0 | add | 84: ADDr h10.y,-r5.-x--, r6;
| SCB1 | add | 83: ADDr h10.z,-r6.--x-, r6.--y-;
| | |
34 | SCB0/1 | mad | 85: MADr r6.xw, r4.x--y, r6.z--z, const.x--x;
| | |
35 | SCT0 | mov | 87: TEXr r6.x, r6.xwyy, TEX2;
| TEX | tex | 87: TEXr r6.x, r6.xwyy, TEX2;
| SCB1 | mad | 88: MADr r6.zw, r4.--xy, r6.--zz, const.--xx;
| | |
36 | SCT0 | mov | 90: TEXr r4.x, r6.zwzz, TEX2;
| TEX | tex | 90: TEXr r4.x, r6.zwzz, TEX2;
| SCB0 | add | 91: ADDr h10.x,-r4, r6.y---;
| SCB1 | add | 92: ADDr h10.w,-r6.---x, r6.---y;
| | |
37 | SCT0/1 | mul | 93: MOVrc0 hc,-h10;
| SCB0 | mul | 94: MULr r5.xy, r6.zw--, const.xx--;
| | |
38 | SCT0/1 | mul | 96: MOVh h8, const.xxxx;
| SCB0 | frc | 98: FRCr h10.xy, r5;
| | |
39 | SCT1 | mul | 99: MULh h10.w, h10.---y, h10.---x;
| SCB0 | add | 100: ADDh h12.xy,-h10, const.xx--;
| | |
40 | SCT0 | mul | 102: MULh h10.yz, h10.-yx-, h12.-xy-;
| SCB0/1 | mul | 103: MOVh h8(LT0.xyzw), const.xxxx;
| | |
41 | SCT0 | mul | 105: MULh h10.x, h12.y---, h12;
| SCB0 | dp4 | 106: DP4h h8.x, h8, h10;
| SCB1 | nop | 106: DP4h h8.x, h8, h10;
| | |
42 | SCT0 | mul | 107: MULr r5.xy, r5.zw--, const.xx--;
| SCB0 | frc | 109: FRCr h10.xy, r5;
| | |
43 | SCT1 | mul | 110: MULh h10.w, h10.---y, h10.---x;
| SCB1 | add | 111: ADDh h8.zw,-h10.--xy, const.--xx;
| | |
44 | SCT0 | mul | 113: MULh h10.yz, h10.-yx-, h8.-zw-;
| SCB0 | mul | 114: MULh h10.x, h8.w---, h8.z---;
| | |
45 | SCB0 | dp4 | 115: DP4h h8.y, h6, h10;
| SCB1 | nop | 115: DP4h h8.y, h6, h10;
| | |
46 | SCB0 | mul | 116: MULr r3.xy, r4.zw--, const.xx--;
| | |
47 | SCB0 | frc | 118: FRCr h6.xy, r3;
| | |
48 | SCT1 | mul | 119: MULh h6.w, h6.---y, h6.---x;
| SCB1 | add | 120: ADDh h8.zw,-h6.--xy, const.--xx;
| | |
49 | SCT0 | mul | 122: MULh h6.yz, h6.-yx-, h8.-zw-;
| SCB0 | mul | 123: MULh h6.x, h8.w---, h8.z---;
| | |
50 | SCB0 | dp4 | 124: DP4h h8.z, h5, h6;
| SCB1 | nop | 124: DP4h h8.z, h5, h6;
| | |
51 | SCT1 | mul | 125: MULr r2.zw, r3, const.--xx;
| SCB0 | frc | 127: FRCr h5.xy, r2.zw--;
| SCB1 | mov | 128: MOVr r3.z, r0.--w-;
| | |
52 | SCT1 | mul | 129: MULh h5.w, h5.---y, h5.---x;
| SCB0 | add | 130: ADDh h6.xy,-h5, const.xx--;
| | |
53 | SCT0 | mul | 132: MULh h5.yz, h5.-yx-, h6.-xy-;
| SCB0 | mul | 133: MULh h5.x, h6.y---, h6;
| | |
54 | SCB0 | dp4 | 134: DP4h h8.w, h4, h5;
| SCB1 | nop | 134: DP4h h8.w, h4, h5;
| | |
55 | SCT0 | div | 135: TEXh h4.xyz, f[TEX0], TEX1;
| TEX | tex | 135: TEXh h4.xyz, f[TEX0], TEX1;
| SCB0 | dp3 | 136: DP3h*2 h0.w, h0, h4;
| | |
56 | SCB0 | mad | 137: MADh h0.xyz, h4,-h0.www-, h0;
| | |
57 | SCB0 | dp4 | 138: DP4r r2.z, const, r1;
| SCB1 | nop | 138: DP4r r2.z, const, r1;
| | |
58 | SCB0 | dp4 | 140: DP4r r2.w, const, r1;
| SCB1 | nop | 140: DP4r r2.w, const, r1;
| | |
59 | SCT0 | mov | 142: TEXh h0.w, r2.zwzz, TEX3;
| TEX | tex | 142: TEXh h0.w, r2.zwzz, TEX3;
| SCB0 | dp4 | 143: DP4h h4.w, h8, const.xxxx;
| SCB1 | nop | 143: DP4h h4.w, h8, const.xxxx;
| | |
60 | SCB0 | dp3 | 145: DP3h r3.x,-const, h4;
| SCB1 | mul | 147: MULh h2.w, h0, h4;
| | |
61 | SCB0 | dp3 | 148: DP3h r3.y,-const, h0;
| | |
62 | SCT0 | mov | 150: TEXh h1, r3, TEX4;
| TEX | tex | 150: TEXh h1, r3, TEX4;
| SCB0/1 | mul | 151: MULh h1, h1, const;
| | |
63 | SCT0 | div | 153: TEXh h0, f[TEX0], TEX5;
| TEX | tex | 153: TEXh h0, f[TEX0], TEX5;
| SCB0/1 | mad | 154: MADh h0, h1, h2.wwww, h0;
Pass SCT TEX SCB
1: 50% 100% 100%
2: 0% 0% 100%
3: 0% 0% 100%
4: 0% 0% 100%
5: 25% 0% 100%
6: 0% 0% 100%
7: 0% 0% 0%
8: 0% 100% 75%
9: 0% 100% 50%
10: 0% 0% 25%
11: 0% 100% 25%
12: 0% 100% 25%
13: 100% 0% 50%
14: 0% 100% 50%
15: 100% 0% 25%
16: 100% 0% 50%
17: 0% 100% 50%
18: 0% 0% 25%
19: 0% 0% 50%
20: 0% 100% 25%
21: 0% 100% 50%
22: 0% 100% 25%
23: 100% 0% 50%
24: 100% 0% 100%
25: 0% 100% 25%
26: 0% 100% 50%
27: 0% 0% 25%
28: 0% 100% 50%
29: 0% 0% 25%
30: 0% 100% 25%
31: 100% 0% 50%
32: 100% 0% 100%
33: 0% 100% 50%
34: 0% 0% 50%
35: 0% 100% 50%
36: 0% 100% 50%
37: 100% 0% 50%
38: 100% 0% 50%
39: 25% 0% 50%
40: 50% 0% 100%
41: 25% 0% 100%
42: 50% 0% 50%
43: 25% 0% 50%
44: 50% 0% 25%
45: 0% 0% 100%
46: 0% 0% 50%
47: 0% 0% 50%
48: 25% 0% 50%
49: 50% 0% 25%
50: 0% 0% 100%
51: 50% 0% 75%
52: 25% 0% 50%
53: 50% 0% 25%
54: 0% 0% 100%
55: 50% 100% 75%
56: 0% 0% 75%
57: 0% 0% 100%
58: 0% 0% 100%
59: 0% 100% 100%
60: 0% 0% 75%
61: 0% 0% 75%
62: 0% 100% 100%
63: 50% 100% 100%
MEAN: 23% 33% 60%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 0% 100% 100% 100%
2: 0% 0% 0% 100% 100%
3: 0% 0% 0% 100% 100%
4: 0% 0% 0% 100% 100%
5: 0% 100% 0% 100% 100%
6: 0% 0% 0% 100% 100%
7: 0% 0% 0% 0% 0%
8: 0% 0% 100% 100% 100%
9: 0% 0% 100% 0% 100%
10: 0% 0% 0% 0% 100%
11: 0% 0% 100% 0% 100%
12: 0% 0% 100% 100% 0%
13: 100% 100% 0% 0% 100%
14: 0% 0% 100% 0% 100%
15: 100% 100% 0% 100% 0%
16: 100% 100% 0% 0% 100%
17: 0% 0% 100% 0% 100%
18: 0% 0% 0% 0% 100%
19: 0% 0% 0% 100% 100%
20: 0% 0% 100% 0% 100%
21: 0% 0% 100% 0% 100%
22: 0% 0% 100% 100% 0%
23: 100% 100% 0% 100% 0%
24: 100% 100% 0% 100% 100%
25: 0% 0% 100% 100% 0%
26: 0% 0% 100% 0% 100%
27: 0% 0% 0% 0% 100%
28: 0% 0% 100% 0% 100%
29: 0% 0% 0% 0% 100%
30: 0% 0% 100% 100% 0%
31: 100% 100% 0% 100% 0%
32: 100% 100% 0% 100% 100%
33: 0% 0% 100% 100% 100%
34: 0% 0% 0% 100% 100%
35: 0% 0% 100% 0% 100%
36: 0% 0% 100% 100% 100%
37: 100% 100% 0% 100% 0%
38: 100% 100% 0% 100% 0%
39: 0% 100% 0% 100% 0%
40: 100% 0% 0% 100% 100%
41: 100% 0% 0% 100% 100%
42: 100% 0% 0% 100% 0%
43: 0% 100% 0% 0% 100%
44: 100% 0% 0% 100% 0%
45: 0% 0% 0% 100% 100%
46: 0% 0% 0% 100% 0%
47: 0% 0% 0% 100% 0%
48: 0% 100% 0% 0% 100%
49: 100% 0% 0% 100% 0%
50: 0% 0% 0% 100% 100%
51: 0% 100% 0% 100% 100%
52: 0% 100% 0% 100% 0%
53: 100% 0% 0% 100% 0%
54: 0% 0% 0% 100% 100%
55: 100% 0% 100% 100% 0%
56: 0% 0% 0% 100% 0%
57: 0% 0% 0% 100% 100%
58: 0% 0% 0% 100% 100%
59: 0% 0% 100% 100% 100%
60: 0% 0% 0% 100% 0%
61: 0% 0% 0% 100% 0%
62: 0% 0% 100% 100% 100%
63: 100% 0% 100% 100% 100%
MEAN: 28% 23% 33% 71% 65%
Cycles: 78.75 :: R Regs Used: 7 :: R Regs Max Index (0 based): 6
--------------------------------------------------------------------------------
Running performance on file test\p2b_accum_sun_near.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 56.25 :: R Regs Used: 5 :: R Regs Max Index (0 based): 4
Pixel throughput (assuming 1 cycle texture lookup) 114.29 MP/s
--------------------------------------------------------------------------------
Running performance on file test\p2b_accum_sun_near.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 50.00 :: R Regs Used: 4 :: R Regs Max Index (0 based): 3
Pixel throughput (assuming 1 cycle texture lookup) 192.00 MP/s

View file

@ -0,0 +1,41 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_b /Emain /Zpr /Fctest\p2b_clouds.ps clouds.ps
//
//
// Parameters:
//
// sampler2D s_clouds0;
// sampler2D s_clouds1;
// sampler2D s_tonemap;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// s_clouds0 s0 1
// s_clouds1 s1 1
// s_tonemap s2 1
//
ps_2_x
def c0, 0.5, 0, 0, 0
dcl v0
dcl_pp t0.xy
dcl_pp t1.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
texld_pp r0, t0, s0
texld_pp r1, t1, s1
add_pp r0.xyz, r0, r1
mul_pp r1.xyz, r0, v0
mov_pp r0.xy, c0.x
texld_pp r0, r0, s2
mul_pp r0.xyz, r1, r0.x
mov_pp r0.w, v0.w
mov_pp oC0, r0
// approximately 9 instruction slots used (3 texture, 6 arithmetic)

View file

@ -0,0 +1,52 @@
--------------------------------------------------------------------------------
Running performance on file test\p2B_clouds.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 0: TEXh h1.xyz, f[TEX0], TEX0;
| SCT1 | div | 1: MOVh h0, f[COL0];
| TEX | tex | 0: TEXh h1.xyz, f[TEX0], TEX0;
| SRB | col | 2: MOVh h1.w, const.---x;
| SCB1 | mul | 2: MOVh h1.w, const.---x;
| | |
2 | SCT0 | div | 4: TEXh h2.xyz, f[TEX1], TEX1;
| TEX | tex | 4: TEXh h2.xyz, f[TEX1], TEX1;
| SCB0 | add | 5: ADDh h1.xyz, h1, h2;
| | |
3 | SCT0 | mul | 6: MULh h0.xyz, h1, h0;
| | |
4 | SCT0 | mov | 7: TEXh h1.x, h1.wwww, TEX2;
| TEX | tex | 7: TEXh h1.x, h1.wwww, TEX2;
| SCB0 | mul | 8: MULh h0.xyz, h0, h1.xxx-;
Pass SCT TEX SCB
1: 50% 100% 25%
2: 50% 100% 75%
3: 75% 0% 0%
4: 0% 100% 75%
MEAN: 43% 75% 43%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 0% 100% 0% 100%
2: 100% 0% 100% 100% 0%
3: 100% 0% 0% 0% 0%
4: 0% 0% 100% 100% 0%
MEAN: 75% 0% 75% 50% 25%
Cycles: 4.00 :: R Regs Used: 2 :: R Regs Max Index (0 based): 1
--------------------------------------------------------------------------------
Running performance on file test\p2b_clouds.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 4.00 :: R Regs Used: 2 :: R Regs Max Index (0 based): 1
Pixel throughput (assuming 1 cycle texture lookup) 1.60 GP/s
--------------------------------------------------------------------------------
Running performance on file test\p2b_clouds.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 4.00 :: R Regs Used: 2 :: R Regs Max Index (0 based): 1
Pixel throughput (assuming 1 cycle texture lookup) 2.40 GP/s

View file

@ -0,0 +1,38 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_b /Emain /Zpr /Fctest\p2b_deffer_impl_flat.ps
// deffer_impl_flat.ps
//
//
// Parameters:
//
// float4 L_material;
// sampler2D s_base;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// L_material c0 1
// s_base s0 1
//
ps_2_x
def c1, 0.0250000004, 0.00784313772, 0, 0
dcl t0.xy
dcl t1.xyz
dcl_pp t2.xyz
dcl_2d s0
texld_pp r0, t0, s0
nrm_pp r1.xyz, t2
mov_pp r2.w, c0.w
mad_pp r2.xyz, r1, c1.x, t1
mov_pp oC0, r2
mov_pp r1.w, r0.w
mov_pp oC1, r1
mov r0.w, c1.y
mov_pp oC2, r0
// approximately 11 instruction slots used (1 texture, 10 arithmetic)

View file

@ -0,0 +1,52 @@
--------------------------------------------------------------------------------
Running performance on file test\p2B_deffer_impl_flat.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 0: TEXh h1, f[TEX0], TEX0;
| SCT1 | mov | 1: NRMh h4.xyz, f[TEX2];
| TEX | tex | 0: TEXh h1, f[TEX0], TEX0;
| SRB | nrm | 1: NRMh h4.xyz, f[TEX2];
| SCB1 | mul | 2: MOVh h0.w, const;
| | |
2 | SCT0 | mul | 6: MOVh h6.xyz, h1;
| SCT1 | mul | 4: MOVh h6.w, const.---x;
| SCB1 | mul | 7: MOVh h4.w, h1;
| | |
3 | SCT0 | div | 8: MADh h0.xyz, h4, const.xxx-, f[TEX1];
| SCB0 | mad | 8: MADh h0.xyz, h4, const.xxx-, f[TEX1];
Pass SCT TEX SCB
1: 50% 100% 25%
2: 100% 0% 25%
3: 75% 0% 75%
4: 0% 0% 0%
MEAN: 56% 25% 31%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 0% 100% 0% 100%
2: 100% 100% 0% 0% 100%
3: 100% 0% 0% 100% 0%
4: 0% 0% 0% 0% 0%
MEAN: 75% 25% 25% 25% 50%
Cycles: 4.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 3
Max register used is > number of registers used, registers are not being used efficiently
--------------------------------------------------------------------------------
Running performance on file test\p2b_deffer_impl_flat.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 2.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 3
Max register used is > number of registers used, registers are not being used efficiently
Pixel throughput (assuming 1 cycle texture lookup) 3.20 GP/s
--------------------------------------------------------------------------------
Running performance on file test\p2b_deffer_impl_flat.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 2.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 3
Max register used is > number of registers used, registers are not being used efficiently
Pixel throughput (assuming 1 cycle texture lookup) 4.80 GP/s

View file

@ -0,0 +1,102 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_b /Emain /Zpr /Fctest\p2b_deffer_impl_flat_d.ps
// deffer_impl_flat_d.ps
//
//
// Parameters:
//
// float4 L_material;
// sampler2D s_base;
// sampler2D s_dn_a;
// sampler2D s_dn_b;
// sampler2D s_dn_g;
// sampler2D s_dn_r;
// sampler2D s_dt_a;
// sampler2D s_dt_b;
// sampler2D s_dt_g;
// sampler2D s_dt_r;
// sampler2D s_mask;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// L_material c0 1
// s_base s0 1
// s_mask s1 1
// s_dt_r s2 1
// s_dt_g s3 1
// s_dt_b s4 1
// s_dt_a s5 1
// s_dn_r s6 1
// s_dn_g s7 1
// s_dn_b s8 1
// s_dn_a s9 1
//
ps_2_x
def c1, 1, -0.5, 0.5, 0.0250000004
def c2, 1, 1, 0.5, 0.00784313772
dcl t0.xy
dcl t1.xyz
dcl_pp t2.xyz
dcl_pp t3.xyz
dcl_pp t4.xyz
dcl t5.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
dcl_2d s3
dcl_2d s4
dcl_2d s5
dcl_2d s6
dcl_2d s7
dcl_2d s8
dcl_2d s9
texld_pp r9, t5, s9
texld_pp r8, t5, s8
texld_pp r7, t5, s6
texld_pp r6, t5, s7
texld_pp r2, t0, s1
texld_pp r0, t0, s0
texld_pp r5, t5, s3
texld_pp r4, t5, s2
texld_pp r3, t5, s4
texld_pp r1, t5, s5
add_pp r9.xyz, r9.wzyx, c1.y
add_pp r8.xyz, r8.wzyx, c1.y
add_pp r7.xyz, r7.wzyx, c1.y
dp4_pp r1.w, r2, c1.x
add_pp r6.xyz, r6.wzyx, c1.y
rcp_pp r1.w, r1.w
mul_pp r2, r2, r1.w
mul_pp r6.xyz, r6, r2.y
mad_pp r6.xyz, r7, r2.x, r6
mad_pp r6.xyz, r8, r2.z, r6
mad_pp r6.xyz, r9, r2.w, r6
mul_pp r6.w, r6.z, c1.z
mul r1.w, r6.x, t2.x
mad r1.w, t2.y, r6.y, r1.w
mad_pp r7.x, t2.z, r6.w, r1.w
mul_pp r6.xyz, r6, c2
dp3_pp r7.y, t3, r6
dp3_pp r7.z, t4, r6
nrm_pp r6.xyz, r7
mad_pp r7.xyz, r6, c1.w, t1
mov_pp r7.w, c0.w
mov_pp oC0, r7
mov_pp r6.w, r0.w
mov_pp oC1, r6
mul_pp r5.xyz, r5, r2.y
mad_pp r4.xyz, r4, r2.x, r5
mad_pp r2.xyz, r3, r2.z, r4
mad_pp r1.xyz, r1, r2.w, r2
mul_pp r0.xyz, r0, r1
add_pp r0.xyz, r0, r0
mov r0.w, c2.w
mov_pp oC2, r0
// approximately 44 instruction slots used (10 texture, 34 arithmetic)

View file

@ -0,0 +1,141 @@
--------------------------------------------------------------------------------
Running performance on file test\p2B_deffer_impl_flat_d.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 0: TEXh h0, f[TEX0], TEX1;
| TEX | tex | 0: TEXh h0, f[TEX0], TEX1;
| SCB0 | dp4 | 1: DP4h h1.x, h0, const.xxxx;
| SCB1 | nop | 1: DP4h h1.x, h0, const.xxxx;
| | |
2 | SCT0/1 | div | 3: DIVh h1, h0, h1;
| SCB1 | mul | 4: MOVh h6.w, const.---x;
| | |
3 | SCT0 | div | 6: TEXh h0.yzw, f[TEX5], TEX6;
| TEX | tex | 6: TEXh h0.yzw, f[TEX5], TEX6;
| SCB0 | add | 7: ADDh h2.xyz, h0.wzy-, const.xxx-;
| | |
4 | SCT0 | div | 9: TEXh h0.yzw, f[TEX5], TEX7;
| TEX | tex | 9: TEXh h0.yzw, f[TEX5], TEX7;
| SCB0 | add | 10: ADDh h0.xyz, h0.wzy-, const.xxx-;
| | |
5 | SCT0 | mul | 12: MULh h0.xyz, h0, h1.yyy-;
| SCB0 | mad | 13: MADh h4.xyz, h2, h1.xxx-, h0;
| | |
6 | SCT0 | div | 14: TEXh h0.yzw, f[TEX5], TEX9;
| TEX | tex | 14: TEXh h0.yzw, f[TEX5], TEX9;
| SCB0 | add | 15: ADDh h2.xyz, h0.wzy-, const.xxx-;
| | |
7 | SCT0 | div | 17: TEXh h0.yzw, f[TEX5], TEX8;
| TEX | tex | 17: TEXh h0.yzw, f[TEX5], TEX8;
| SCB0 | add | 18: ADDh h3.xyz, h0.wzy-, const.xxx-;
| SCB1 | mul | 20: MOVh h0.w, const;
| | |
8 | SCT0 | mul | 22: MADh h3.xyz, h3, h1.zzz-, h4;
| SCB0 | mad | 22: MADh h3.xyz, h3, h1.zzz-, h4;
| | |
9 | SCT0 | div | 23: TEXh h0.xyz, f[TEX5], TEX3;
| TEX | tex | 23: TEXh h0.xyz, f[TEX5], TEX3;
| SCB0 | mad | 24: MADh h2.xyz, h2, h1.www-, h3;
| | |
10 | SCT0 | div | 25: TEXh h3.xyz, f[TEX5], TEX2;
| TEX | tex | 25: TEXh h3.xyz, f[TEX5], TEX2;
| SCB0 | mul | 26: MULh h0.xyz, h0, h1.yyy-;
| | |
11 | SCT0/1 | div | 27: MOVr r3.xzw, f[TEX2].x-yz;
| SCB0 | mad | 29: MADh h3.xyz, h3, h1.xxx-, h0;
| SCB1 | mul | 28: MULh/2 h2.w, h2.---z, r3;
| | |
12 | SCT0 | div | 30: TEXh h0.xyz, f[TEX5], TEX4;
| TEX | tex | 30: TEXh h0.xyz, f[TEX5], TEX4;
| SCB0 | mad | 31: MADh h1.xyz, h0, h1.zzz-, h3;
| | |
13 | SCT0 | mul | 32: MULh h3.xyz, h2, const.xxy-;
| SCB1 | mul | 34: MULr r3.w, h2.---x, r3.---x;
| | |
14 | SCT0 | div | 35: TEXh h0.xyz, f[TEX5], TEX5;
| TEX | tex | 35: TEXh h0.xyz, f[TEX5], TEX5;
| SCB0 | mad | 36: MADh h0.xyz, h0, h1.www-, h1;
| | |
15 | SCT0 | div | 37: DP3h h4.y, f[TEX3], h3;
| SCB0 | dp3 | 37: DP3h h4.y, f[TEX3], h3;
| SCB1 | mad | 38: MADr h3.w, r3.---z, h2.---y, r3;
| | |
16 | SCT0 | div | 39: DP3h h4.z, f[TEX4], h3;
| SCB0 | dp3 | 39: DP3h h4.z, f[TEX4], h3;
| | |
17 | SCT0 | div | 40: TEXh h1, f[TEX0], TEX0;
| TEX | tex | 40: TEXh h1, f[TEX0], TEX0;
| SCB0 | add | 41: ADDh h4.x, h2.w---, h3.w---;
| SCB1 | mul | 42: MOVh h4.w, h1;
| | |
18 | SCT1 | mov | 43: NRMh h4.xyz, h4;
| SRB | nrm | 43: NRMh h4.xyz, h4;
| SCB0 | mul | 44: MULh*2 h6.xyz, h1, h0;
| | |
19 | SCT0 | div | 45: MADh h0.xyz, h4, const.xxx-, f[TEX1];
| SCB0 | mad | 45: MADh h0.xyz, h4, const.xxx-, f[TEX1];
Pass SCT TEX SCB
1: 50% 100% 100%
2: 100% 0% 25%
3: 50% 100% 75%
4: 50% 100% 75%
5: 75% 0% 75%
6: 50% 100% 75%
7: 50% 100% 100%
8: 75% 0% 75%
9: 50% 100% 75%
10: 50% 100% 75%
11: 75% 0% 100%
12: 50% 100% 75%
13: 75% 0% 25%
14: 50% 100% 75%
15: 75% 0% 75%
16: 75% 0% 75%
17: 50% 100% 50%
18: 0% 0% 75%
19: 75% 0% 75%
20: 0% 0% 0%
MEAN: 56% 50% 68%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 0% 100% 100% 100%
2: 100% 100% 0% 0% 100%
3: 100% 0% 100% 100% 0%
4: 100% 0% 100% 100% 0%
5: 100% 0% 0% 100% 0%
6: 100% 0% 100% 100% 0%
7: 100% 0% 100% 100% 100%
8: 100% 0% 0% 100% 0%
9: 100% 0% 100% 100% 0%
10: 100% 0% 100% 100% 0%
11: 100% 100% 0% 100% 100%
12: 100% 0% 100% 100% 0%
13: 100% 0% 0% 0% 100%
14: 100% 0% 100% 100% 0%
15: 100% 0% 0% 100% 0%
16: 100% 0% 0% 100% 0%
17: 100% 0% 100% 100% 100%
18: 0% 0% 0% 100% 0%
19: 100% 0% 0% 100% 0%
20: 0% 0% 0% 0% 0%
MEAN: 90% 10% 50% 85% 30%
Cycles: 20.00 :: R Regs Used: 4 :: R Regs Max Index (0 based): 3
--------------------------------------------------------------------------------
Running performance on file test\p2b_deffer_impl_flat_d.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 19.00 :: R Regs Used: 4 :: R Regs Max Index (0 based): 3
Pixel throughput (assuming 1 cycle texture lookup) 336.84 MP/s
--------------------------------------------------------------------------------
Running performance on file test\p2b_deffer_impl_flat_d.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 18.00 :: R Regs Used: 4 :: R Regs Max Index (0 based): 3
Pixel throughput (assuming 1 cycle texture lookup) 533.33 MP/s

View file

@ -0,0 +1,45 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_b /Emain /Zpr /Fctest\p2b_sky2.ps sky2.ps
//
//
// Parameters:
//
// samplerCUBE s_sky0;
// samplerCUBE s_sky1;
// sampler2D s_tonemap;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// s_sky0 s0 1
// s_sky1 s1 1
// s_tonemap s2 1
//
ps_2_x
def c0, 0.5, 0, 0, 0
def c1, 0.333333343, 0.333333343, 0.333333343, 1
dcl v0
dcl_pp t0.xyz
dcl_pp t1.xyz
dcl_cube s0
dcl_cube s1
dcl_2d s2
texld_pp r1, t0, s0
texld_pp r0, t1, s1
lrp_pp r2.xyz, v0.w, r0, r1
mul_pp r1.xyz, r2, v0
mov_pp r0.xy, c0.x
texld_pp r0, r0, s2
add_pp r0.w, r0.x, r0.x
mul_pp r0.xyz, r1, r0.w
mov r0.w, c0.y
mov_pp oC0, r0
mul r0, r0, c1
mov_pp oC1, r0
// approximately 13 instruction slots used (3 texture, 10 arithmetic)

View file

@ -0,0 +1,58 @@
--------------------------------------------------------------------------------
Running performance on file test\p2B_sky2.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 0: TEXh h0.xyz, f[TEX0], TEX0;
| TEX | tex | 0: TEXh h0.xyz, f[TEX0], TEX0;
| SCB1 | mov | 1: MOVr r1.w, const.---x;
| | |
2 | SCT1 | div | 3: MOVh h1, f[COL0];
| SRB | col | 5: MOVr h0.w, const.---x;
| SCB0 | mad | 4: MADh h2.xyz,-h1.www-, h0, h0;
| SCB1 | mul | 5: MOVr h0.w, const.---x;
| | |
3 | SCT0 | div | 7: TEXh h0.xyz, f[TEX1], TEX1;
| TEX | tex | 7: TEXh h0.xyz, f[TEX1], TEX1;
| SCB0 | mad | 8: MADh h0.xyz, h1.www-, h0, h2;
| | |
4 | SCT0 | mov | 9: TEXh h2.x, r1.wwww, TEX2;
| TEX | tex | 9: TEXh h2.x, r1.wwww, TEX2;
| SCB0 | mul | 10: MULh*2 h0.xyz, h0, h1;
| | |
5 | SCT0 | mul | 11: MULh h0.xyz, h0, h2.xxx-;
| SCB0/1 | mul | 12: MULr h4, h0, const.xxxy;
Pass SCT TEX SCB
1: 50% 100% 25%
2: 0% 0% 100%
3: 50% 100% 75%
4: 0% 100% 75%
5: 75% 0% 100%
MEAN: 35% 60% 75%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 0% 100% 0% 100%
2: 0% 0% 0% 100% 100%
3: 100% 0% 100% 100% 0%
4: 0% 0% 100% 100% 0%
5: 100% 0% 0% 100% 100%
MEAN: 60% 0% 60% 80% 60%
Cycles: 5.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 2
--------------------------------------------------------------------------------
Running performance on file test\p2b_sky2.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 5.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 2
Pixel throughput (assuming 1 cycle texture lookup) 1.28 GP/s
--------------------------------------------------------------------------------
Running performance on file test\p2b_sky2.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 5.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 2
Pixel throughput (assuming 1 cycle texture lookup) 1.92 GP/s

View file

@ -0,0 +1,35 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tps_2_b /Emain /Zpr /Fctest\p2b_yuv2rgb.ps yuv2rgb.ps
//
//
// Parameters:
//
// sampler2D s_base;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// s_base s1 1
//
ps_2_x
def c0, -0.869610012, 0.53075999, -1.07860005, 0
def c1, 1.16406, 1, 0, 0
def c2, 1.59765005, -0.8125, 0, 0
def c3, -0.390625, 2.01561999, 0, 0
dcl t0.xy
dcl_2d s1
texld_pp r0, t0, s1
mul_pp r1.x, r0.x, c1.x
mad_pp r1.yz, r0.y, c3.zxyw, r1.x
mad_pp r0.xy, r0.z, c2, r1
mov_pp r0.z, r1.z
add_pp r0.xyz, r0, c0
mov r0.w, c1.y
mov_pp oC0, r0
// approximately 8 instruction slots used (1 texture, 7 arithmetic)

View file

@ -0,0 +1,49 @@
--------------------------------------------------------------------------------
Running performance on file test\p2B_yuv2rgb.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 2: TEXh h0.xyz, f[TEX0], TEX1;
| SCT1 | mul | 0: MOVh h0.w, const.---x;
| TEX | tex | 2: TEXh h0.xyz, f[TEX0], TEX1;
| SCB0 | mul | 3: MULh h0.x, h0, const.x---;
| | |
2 | SCB0 | mad | 5: MADh h1.xy, h0.yy--, const.xy--, h0.xx--;
| | |
3 | SCT0 | mul | 7: MOVh h0.y, h1.-x--;
| SCB0 | mad | 8: MADh h0.xy, h0.zz--, const.xy--, h0;
| SCB1 | mul | 10: MOVh h0.z, h1.--y-;
| | |
4 | SCB0 | add | 11: ADDh h0.xyz, h0, const;
Pass SCT TEX SCB
1: 75% 100% 25%
2: 0% 0% 50%
3: 25% 0% 75%
4: 0% 0% 75%
MEAN: 25% 25% 56%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 100% 100% 100% 0%
2: 0% 0% 0% 100% 0%
3: 100% 0% 0% 100% 100%
4: 0% 0% 0% 100% 0%
MEAN: 50% 25% 25% 100% 25%
Cycles: 4.00 :: R Regs Used: 1 :: R Regs Max Index (0 based): 0
--------------------------------------------------------------------------------
Running performance on file test\p2b_yuv2rgb.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 4.00 :: R Regs Used: 1 :: R Regs Max Index (0 based): 0
Pixel throughput (assuming 1 cycle texture lookup) 1.60 GP/s
--------------------------------------------------------------------------------
Running performance on file test\p2b_yuv2rgb.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 3.00 :: R Regs Used: 1 :: R Regs Max Index (0 based): 0
Pixel throughput (assuming 1 cycle texture lookup) 3.20 GP/s

View file

@ -0,0 +1,96 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /DUSE_SHADER3=1 /nologo /Tps_3_0 /Emain /Zpr
// /Fctest\p30_accum_sun_far.ps accum_sun_far.ps
//
//
// Parameters:
//
// float4 Ldynamic_color;
// float4 Ldynamic_dir;
// row_major float4x4 m_shadow;
// row_major float3x4 m_sunmask;
// sampler2D s_accumulator;
// sampler2D s_lmap;
// sampler3D s_material;
// sampler2D s_normal;
// sampler2D s_position;
// sampler2D s_smap;
//
//
// Registers:
//
// Name Reg Size
// -------------- ----- ----
// m_shadow c0 4
// m_sunmask c4 2
// Ldynamic_color c6 1
// Ldynamic_dir c7 1
// s_smap s0 1
// s_position s1 1
// s_normal s2 1
// s_lmap s3 1
// s_material s4 1
// s_accumulator s5 1
//
ps_3_0
def c8, 1, 0, -0.000244140625, 0.000244140625
def c9, 2048, 0.00555555569, 0.333000004, 0
dcl_texcoord v0.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
dcl_2d s3
dcl_volume s4
dcl_2d s5
texld r0, v0, s1
mad r1, r0.xyzx, c8.xxxy, c8.yyyx
dp4 r2.w, c3, r1
rcp r4.z, r2.w
dp4 r4.x, c0, r1
dp4 r4.y, c1, r1
mad r2.xy, r4, r4.z, c8.zwzw
texld r2, r2, s0
dp4 r4.w, c2, r1
add r2.y, -r2.x, r4.w
mad r3.xy, r4, r4.z, c8.wzzw
texld r3, r3, s0
add r2.z, r4.w, -r3.x
mad r3.xy, r4, r4.z, c8.w
mad r4.xy, r4, r4.z, c8.z
texld r3, r3, s0
add r2.w, r4.w, -r3.x
texld r3, r4, s0
mul r4.xy, r4, c9.x
frc r5.xy, r4
add r2.x, r4.w, -r3.x
add_pp r4.xy, -r5, c8.x
cmp_pp r2, -r2, c8.x, c8.y
mul_pp r3.yz, r5.xyxw, r4.xxyw
mul_pp r3.w, r5.y, r5.x
mul_pp r3.x, r4.y, r4.x
dp4_pp r2.w, r2, r3
dp4 r2.x, c4, r1
dp4 r2.y, c5, r1
texld_pp r1, r2, s3
mad_pp r1.x, r1.w, -r2.w, c9.z
mul_sat r1.z, r0.z, c9.y
mul r1.y, r1.z, r1.z
dp3_pp r1.z, r0, r0
mul r1.y, r1.x, r1.y
rsq_pp r1.z, r1.z
mad_pp r2.w, r1.w, r2.w, r1.y
mad_pp r1.xyz, r0, -r1.z, -c7
mov_pp r0.z, r0.w
nrm_pp r2.xyz, r1
texld_pp r1, v0, s2
dp3_pp r0.y, r2, r1
dp3_pp r0.x, -c7, r1
texld_pp r0, r0, s4
mul_pp r0, r0, c6
texld_pp r1, v0, s5
mad_pp oC0, r0, r2.w, r1
// approximately 49 instruction slots used (9 texture, 40 arithmetic)

View file

@ -0,0 +1,173 @@
--------------------------------------------------------------------------------
Running performance on file test\p30_accum_sun_far.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 0: TEXr r0, f[TEX0], TEX1;
| TEX | tex | 0: TEXr r0, f[TEX0], TEX1;
| SCB0/1 | mad | 1: MADr r3, r0.xyzx, const.xxxy, const.yyyx;
| | |
2 | SCB0 | dp4 | 3: DP4r r1.x, const, r3;
| SCB1 | nop | 3: DP4r r1.x, const, r3;
| | |
3 | SCT1 | div | 5: RCPr r1.y, r1;
| SCB0 | dp4 | 6: DP4r r2.z, const, r3;
| SCB1 | nop | 6: DP4r r2.z, const, r3;
| | |
4 | SCB0 | dp4 | 8: DP4r r2.w, const, r3;
| SCB1 | nop | 8: DP4r r2.w, const, r3;
| | |
5 | SCB0 | mad | 12: MADr r2.xy, r2.zw--, r1.yy--, const.xy--;
| SCB1 | mad | 10: MADr r1.zw, r2, r1.--yy, const.--xx;
| | |
6 | SCT0 | mov | 14: TEXr r1.x, r1.zwzz, TEX0;
| TEX | tex | 14: TEXr r1.x, r1.zwzz, TEX0;
| SCB1 | mad | 15: MADr r1.zw, r2, r1.--yy, const.--xy;
| | |
7 | SCT0 | mov | 17: TEXr r4.x, r1.zwzz, TEX0;
| TEX | tex | 17: TEXr r4.x, r1.zwzz, TEX0;
| SCB0 | dp4 | 18: DP4r r1.z, const, r3;
| SCB1 | nop | 18: DP4r r1.z, const, r3;
| | |
8 | SCT0 | mov | 20: TEXr r2.x, r2, TEX0;
| TEX | tex | 20: TEXr r2.x, r2, TEX0;
| SCB0 | dp4 | 21: DP4r r1.w, const, r3;
| SCB1 | nop | 21: DP4r r1.w, const, r3;
| | |
9 | SCB0 | dp4 | 23: DP4r r4.z, const, r3;
| SCB1 | nop | 23: DP4r r4.z, const, r3;
| | |
10 | SCT1 | mov | 25: MOVr r3.z, r0.--w-;
| SCB0 | add | 26: ADDr h6.y,-r4.-x--, r4.-z--;
| SCB1 | add | 27: ADDr h6.w, r4.---z,-r1.---x;
| | |
11 | SCB0 | dp3 | 28: DP3h h4.z, r0, r0;
| | |
12 | SCB0 | mad | 29: MADr r4.xy, r2.zw--, r1.yy--, const.xx--;
| SCB1 | lg2 | 31: LG2h/2 h4.w, |h4.zzzz|;
| | |
13 | SCB0 | add | 33: ADDr h6.z, r4,-r2.--x-;
| SCB1 | ex2 | 32: EX2h h4.w,-h4.wwww;
| | |
14 | SCT0 | mov | 36: TEXr r1.x, r4, TEX0;
| SCT1 | mul | 34: MULr r2.zw, r4.--xy, const.--xx;
| TEX | tex | 36: TEXr r1.x, r4, TEX0;
| | |
15 | SCT0 | div | 37: TEXh h4.xyz, f[TEX0], TEX2;
| TEX | tex | 37: TEXh h4.xyz, f[TEX0], TEX2;
| SCB0 | add | 38: ADDr h6.x, r4.z---,-r1;
| | |
16 | SCT0/1 | mul | 39: MOVh h2, const.xxxx;
| SCB1 | frc | 41: FRCr h7.zw, r2;
| | |
17 | SCT0/1 | mul | 42: MOVrc0 hc,-h6;
| SCB0 | dp3 | 43: DP3h r3.x,-const, h4;
| SCB1 | mul | 45: MULr_s r2.w, r0.---z, const;
| | |
18 | SCT0/1 | mul | 47: MOVh h2(LT0.xyzw), const.xxxx;
| SCB0 | mad | 49: MADh h0.xyz, r0,-h4.www-,-const;
| | |
19 | SCT1 | mov | 51: NRMh h0.xyz, h0;
| SRB | nrm | 51: NRMh h0.xyz, h0;
| SCB0 | dp3 | 52: DP3h r3.y, h0, h4;
| | |
20 | SCT0 | mov | 54: TEXh h0, r3, TEX4;
| SCT1 | mul | 53: MULh h4.w, h7, h7.---z;
| TEX | tex | 54: TEXh h0, r3, TEX4;
| SCB1 | add | 55: ADDh h1.zw,-h7, const.--xx;
| | |
21 | SCT0 | mul | 57: MULh h4.yz, h7.-wz-, h1.-zw-;
| SCB0 | mul | 58: MULh h4.x, h1.w---, h1.z---;
| | |
22 | SCT0 | mov | 59: TEXh h1.w, r1.zwzz, TEX3;
| TEX | tex | 59: TEXh h1.w, r1.zwzz, TEX3;
| SCB0 | dp4 | 60: DP4h h1.z, h2, h4;
| SCB1 | nop | 60: DP4h h1.z, h2, h4;
| | |
23 | SCT0 | mul | 61: MULr r2.x, r2.w---, r2.w---;
| SCB1 | mad | 62: MADh h4.w, h1,-h1.---z, const.---x;
| | |
24 | SCT0 | mul | 64: MULr h1.x, h4.w---, r2;
| SCB0 | mad | 65: MADh h2.x, h1.w---, h1.z---, h1;
| | |
25 | SCT0 | div | 66: TEXh h1, f[TEX0], TEX5;
| TEX | tex | 66: TEXh h1, f[TEX0], TEX5;
| SCB0/1 | mul | 67: MULh h0, h0, const;
| | |
26 | SCB0/1 | mad | 69: MADh h0, h0, h2.xxxx, h1;
Pass SCT TEX SCB
1: 50% 100% 100%
2: 0% 0% 100%
3: 25% 0% 100%
4: 0% 0% 100%
5: 0% 0% 100%
6: 0% 100% 50%
7: 0% 100% 100%
8: 0% 100% 100%
9: 0% 0% 100%
10: 25% 0% 50%
11: 0% 0% 75%
12: 0% 0% 75%
13: 0% 0% 50%
14: 50% 100% 0%
15: 50% 100% 25%
16: 100% 0% 50%
17: 100% 0% 75%
18: 100% 0% 75%
19: 0% 0% 75%
20: 25% 100% 50%
21: 50% 0% 25%
22: 0% 100% 100%
23: 25% 0% 25%
24: 25% 0% 25%
25: 50% 100% 100%
26: 0% 0% 100%
MEAN: 25% 34% 70%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 0% 100% 100% 100%
2: 0% 0% 0% 100% 100%
3: 0% 100% 0% 100% 100%
4: 0% 0% 0% 100% 100%
5: 0% 0% 0% 100% 100%
6: 0% 0% 100% 0% 100%
7: 0% 0% 100% 100% 100%
8: 0% 0% 100% 100% 100%
9: 0% 0% 0% 100% 100%
10: 0% 100% 0% 100% 100%
11: 0% 0% 0% 100% 0%
12: 0% 0% 0% 100% 100%
13: 0% 0% 0% 100% 100%
14: 0% 100% 100% 0% 0%
15: 100% 0% 100% 100% 0%
16: 100% 100% 0% 0% 100%
17: 100% 100% 0% 100% 0%
18: 100% 100% 0% 100% 0%
19: 0% 0% 0% 100% 0%
20: 0% 100% 100% 0% 100%
21: 100% 0% 0% 100% 0%
22: 0% 0% 100% 100% 100%
23: 100% 0% 0% 0% 100%
24: 100% 0% 0% 100% 0%
25: 100% 0% 100% 100% 100%
26: 0% 0% 0% 100% 100%
MEAN: 34% 26% 34% 80% 69%
Cycles: 28.25 :: R Regs Used: 5 :: R Regs Max Index (0 based): 4
--------------------------------------------------------------------------------
Running performance on file test\p30_accum_sun_far.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 27.25 :: R Regs Used: 5 :: R Regs Max Index (0 based): 4
Pixel throughput (assuming 1 cycle texture lookup) 237.04 MP/s
--------------------------------------------------------------------------------
Running performance on file test\p30_accum_sun_far.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 21.00 :: R Regs Used: 4 :: R Regs Max Index (0 based): 3
Pixel throughput (assuming 1 cycle texture lookup) 457.14 MP/s

View file

@ -0,0 +1,154 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /DUSE_SHADER3=1 /nologo /Tps_3_0 /Emain /Zpr
// /Fctest\p30_accum_sun_near.ps accum_sun_near.ps
//
//
// Parameters:
//
// float4 Ldynamic_color;
// float4 Ldynamic_dir;
// row_major float4x4 m_shadow;
// row_major float3x4 m_sunmask;
// sampler2D s_accumulator;
// sampler2D s_lmap;
// sampler3D s_material;
// sampler2D s_normal;
// sampler2D s_position;
// sampler2D s_smap;
//
//
// Registers:
//
// Name Reg Size
// -------------- ----- ----
// m_shadow c0 4
// m_sunmask c4 2
// Ldynamic_color c6 1
// Ldynamic_dir c7 1
// s_position s0 1
// s_normal s1 1
// s_smap s2 1
// s_lmap s3 1
// s_material s4 1
// s_accumulator s5 1
//
ps_3_0
def c8, 1, 0, -0.000537109387, -4.88281257e-005
def c9, 2048, 4.88281257e-005, -0.000537109387, -4.88281257e-005
def c10, 0.000537109387, -0.000537109387, 0.25, 0
dcl_texcoord v0.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
dcl_2d s3
dcl_volume s4
dcl_2d s5
texld r0, v0, s0
mad r1, r0.xyzx, c8.xxxy, c8.yyyx
dp4 r2.w, c3, r1
rcp r7.z, r2.w
dp4 r7.x, c0, r1
dp4 r7.y, c1, r1
mad r2.xy, r7, r7.z, c8.zwzw
texld r2, r2, s2
dp4 r7.w, c2, r1
add r2.y, -r2.x, r7.w
mad r3.xy, r7, r7.z, c8.wzzw
texld r3, r3, s2
add r2.z, r7.w, -r3.x
mad r3.xy, r7, r7.z, c8.w
texld r3, r3, s2
add r2.w, r7.w, -r3.x
mad r4.xy, r7, r7.z, c8.z
texld r3, r4, s2
mul r4.xy, r4, c9.x
frc r4.xy, r4
add r2.x, r7.w, -r3.x
add_pp r5.xy, -r4, c8.x
cmp_pp r2, -r2, c8.x, c8.y
mul_pp r3.yz, r4.xyxw, r5.xxyw
mul_pp r3.x, r5.y, r5.x
mul_pp r3.w, r4.y, r4.x
dp4_pp r2.x, r2, r3
mad r3.xy, r7, r7.z, c9.ywzw
texld r3, r3, s2
add r3.y, r7.w, -r3.x
mad r4.xy, r7, r7.z, c10
texld r4, r4, s2
add r3.z, r7.w, -r4.x
mad r4.xy, r7, r7.z, -c9.zyzw
texld r4, r4, s2
add r3.w, r7.w, -r4.x
mad r5.xy, r7, r7.z, c9.yzzw
texld r4, r5, s2
mul r5.xy, r5, c9.x
frc r5.xy, r5
add r3.x, r7.w, -r4.x
add_pp r6.xy, -r5, c8.x
cmp_pp r3, -r3, c8.x, c8.y
mul_pp r4.yz, r5.xyxw, r6.xxyw
mul_pp r4.x, r6.y, r6.x
mul_pp r4.w, r5.y, r5.x
dp4_pp r2.y, r3, r4
mad r3.xy, r7, r7.z, c10.yxzw
texld r3, r3, s2
add r3.y, r7.w, -r3.x
mad r4.xy, r7, r7.z, c9.wyzw
texld r4, r4, s2
add r3.z, r7.w, -r4.x
mad r4.xy, r7, r7.z, -c9.yzzw
texld r4, r4, s2
add r3.w, r7.w, -r4.x
mad r5.xy, r7, r7.z, c9.zyzw
texld r4, r5, s2
mul r5.xy, r5, c9.x
add r3.x, r7.w, -r4.x
frc r4.xy, r5
cmp_pp r5, -r3, c8.x, c8.y
add_pp r3.xy, -r4, c8.x
mul_pp r6.yz, r4.xyxw, r3.xxyw
mul_pp r6.x, r3.y, r3.x
mul_pp r6.w, r4.y, r4.x
mad r3.xy, r7, r7.z, -c8.wzzw
texld r3, r3, s2
add r3.y, r7.w, -r3.x
mad r4.xy, r7, r7.z, -c8.zwzw
texld r4, r4, s2
add r3.z, r7.w, -r4.x
mad r4.xy, r7, r7.z, -c8.z
mad r7.xy, r7, r7.z, -c8.w
texld r4, r4, s2
add r3.w, r7.w, -r4.x
texld r4, r7, s2
mul r7.xy, r7, c9.x
frc r8.xy, r7
add r3.x, r7.w, -r4.x
add_pp r7.xy, -r8, c8.x
cmp_pp r3, -r3, c8.x, c8.y
mul_pp r4.yz, r8.xyxw, r7.xxyw
mul_pp r4.w, r8.y, r8.x
mul_pp r4.x, r7.y, r7.x
dp4_pp r2.z, r5, r6
dp4_pp r2.w, r3, r4
dp4_pp r3.w, r2, c10.z
dp4 r2.x, c4, r1
dp4 r2.y, c5, r1
texld_pp r2, r2, s3
nrm_pp r2.xyz, r0
texld_pp r1, v0, s1
dp3_pp r0.z, r2, r1
mul_pp r2.w, r3.w, r2.w
add_pp r0.y, r0.z, r0.z
mov_pp r0.z, r0.w
mad_pp r2.xyz, r1, -r0.y, r2
dp3_pp r0.x, -c7, r1
dp3_pp r0.y, -c7, r2
texld_pp r0, r0, s4
mul_pp r0, r0, c6
texld_pp r1, v0, s5
mad_pp oC0, r0, r2.w, r1
// approximately 106 instruction slots used (21 texture, 85 arithmetic)

View file

@ -0,0 +1,360 @@
--------------------------------------------------------------------------------
Running performance on file test\p30_accum_sun_near.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 0: TEXr r0, f[TEX0], TEX0;
| TEX | tex | 0: TEXr r0, f[TEX0], TEX0;
| SCB0/1 | mad | 1: MADr r1, r0.xyzx, const.xxxy, const.yyyx;
| | |
2 | SCT1 | mov | 3: NRMh h0.xyz, r0;
| SRB | nrm | 3: NRMh h0.xyz, r0;
| SCB0 | dp4 | 4: DP4r r2.x, const, r1;
| SCB1 | nop | 4: DP4r r2.x, const, r1;
| | |
3 | SCB0 | dp4 | 6: DP4r r4.x, const, r1;
| SCB1 | nop | 6: DP4r r4.x, const, r1;
| | |
4 | SCB0 | dp4 | 8: DP4r r4.y, const, r1;
| SCB1 | nop | 8: DP4r r4.y, const, r1;
| | |
5 | SCT1 | div | 10: RCPr r6.z, r2;
| SCB0 | dp4 | 11: DP4r r6.y, const, r1;
| SCB1 | nop | 11: DP4r r6.y, const, r1;
| | |
6 | SCB0 | mad | 13: MADr r2.xy, r4, r6.zz--, const.xy--;
| SCB1 | mad | 15: MADr r4.zw, r4.--xy, r6.--zz, const.--yx;
| | |
8 | SCT0 | mov | 17: TEXr r6.x, r2, TEX2;
| TEX | tex | 17: TEXr r6.x, r2, TEX2;
| SCB0 | add | 18: ADDr h6.y, r6,-r6.-x--;
| SCB1 | mad | 19: MADr r3.zw, r4.--xy, r6.--zz, const.--xx;
| | |
9 | SCT0 | mov | 21: TEXr r2.x, r4.zwzz, TEX2;
| TEX | tex | 21: TEXr r2.x, r4.zwzz, TEX2;
| SCB1 | mad | 22: MADr r2.zw, r4.--xy, r6.--zz, const.--xx;
| | |
10 | SCB1 | add | 24: ADDr h6.z, r6.--y-,-r2.--x-;
| | |
11 | SCT0 | mov | 25: TEXr r2.x, r2.zwzz, TEX2;
| TEX | tex | 25: TEXr r2.x, r2.zwzz, TEX2;
| SCB1 | add | 26: ADDr h6.w, r6.---y,-r2.---x;
| | |
12 | SCT0 | mov | 27: TEXr r2.x, r3.zwzz, TEX2;
| TEX | tex | 27: TEXr r2.x, r3.zwzz, TEX2;
| SCB0 | add | 28: ADDr h6.x, r6.y---,-r2;
| | |
13 | SCT0/1 | mul | 29: MOVrc0 hc,-h6;
| SCB1 | mad | 30: MADr r4.zw, r4.--xy, r6.--zz, const.--xy;
| | |
14 | SCT0 | mov | 32: TEXr r3.x, r4.zwzz, TEX2;
| TEX | tex | 32: TEXr r3.x, r4.zwzz, TEX2;
| SCB1 | mad | 33: MADr r4.zw, r4.--xy, r6.--zz, const.--xy;
| | |
15 | SCT0/1 | mul | 35: MOVh h4, const.xxxx;
| SCB0 | add | 37: ADDr h5.y, r6,-r3.-x--;
| | |
16 | SCT0/1 | mul | 38: MOVh h4(LT0.xyzw), const.xxxx;
| SCB1 | mad | 40: MADr r5.zw, r4.--xy, r6.--zz, const.--xy;
| | |
17 | SCT0 | mov | 42: TEXr r3.x, r4.zwzz, TEX2;
| TEX | tex | 42: TEXr r3.x, r4.zwzz, TEX2;
| SCB1 | mad | 43: MADr r4.zw, r4.--xy, r6.--zz, const.--xy;
| | |
18 | SCB1 | add | 45: ADDr h5.z, r6.--y-,-r3.--x-;
| | |
19 | SCB0/1 | mad | 46: MADr r6.xw, r4.x--y, r6.z--z, const.x--y;
| | |
20 | SCT0 | mov | 48: TEXr r3.x, r4.zwzz, TEX2;
| TEX | tex | 48: TEXr r3.x, r4.zwzz, TEX2;
| SCB1 | add | 49: ADDr h5.w, r6.---y,-r3.---x;
| | |
21 | SCT0 | mov | 50: TEXr r6.x, r6.xwyy, TEX2;
| TEX | tex | 50: TEXr r6.x, r6.xwyy, TEX2;
| SCB1 | mad | 51: MADr r4.zw, r4.--xy, r6.--zz, const.--xy;
| | |
22 | SCT0 | mov | 53: TEXr r3.x, r4.zwzz, TEX2;
| TEX | tex | 53: TEXr r3.x, r4.zwzz, TEX2;
| SCB0 | add | 54: ADDr h5.x, r6.y---,-r3;
| | |
23 | SCT0/1 | mul | 55: MOVrc0 hc,-h5;
| SCB0 | mad | 56: MADr r3.xy, r4, r6.zz--, const.xy--;
| | |
24 | SCT0/1 | mul | 58: MOVh h5, const.xxxx;
| SCB0/1 | mul | 60: MOVh h5(LT0.xyzw), const.xxxx;
| | |
25 | SCT0 | mov | 62: TEXr r3.x, r3, TEX2;
| TEX | tex | 62: TEXr r3.x, r3, TEX2;
| SCB0 | add | 63: ADDr h6.y, r6,-r3.-x--;
| | |
26 | SCT0 | mov | 64: TEXr r5.x, r5.zwzz, TEX2;
| TEX | tex | 64: TEXr r5.x, r5.zwzz, TEX2;
| SCB1 | mad | 65: MADr r5.zw, r4.--xy, r6.--zz, const.--xy;
| | |
27 | SCB1 | add | 67: ADDr h6.z, r6.--y-,-r5.--x-;
| | |
28 | SCT0 | mov | 68: TEXr r5.x, r5.zwzz, TEX2;
| TEX | tex | 68: TEXr r5.x, r5.zwzz, TEX2;
| SCB1 | mad | 69: MADr r5.zw, r4.--xy, r6.--zz, const.--xy;
| | |
29 | SCB1 | add | 71: ADDr h6.w, r6.---y,-r5.---x;
| | |
30 | SCT0 | mov | 72: TEXr r5.x, r5.zwzz, TEX2;
| TEX | tex | 72: TEXr r5.x, r5.zwzz, TEX2;
| SCB0 | add | 73: ADDr h6.x, r6.y---,-r5;
| | |
31 | SCT0/1 | mul | 74: MOVrc0 hc,-h6;
| SCB0 | mad | 75: MADr r5.xy, r4, r6.zz--, const.xy--;
| | |
32 | SCT0/1 | mul | 77: MOVh h6, const.xxxx;
| SCB0/1 | mul | 79: MOVh h6(LT0.xyzw), const.xxxx;
| | |
33 | SCT0 | mov | 81: TEXr r5.x, r5, TEX2;
| TEX | tex | 81: TEXr r5.x, r5, TEX2;
| SCB0 | add | 83: ADDr h10.y,-r5.-x--, r6;
| SCB1 | add | 82: ADDr h10.z, r6.--y-,-r6.--x-;
| | |
34 | SCB0/1 | mad | 84: MADr r6.xw, r4.x--y, r6.z--z, const.x--x;
| | |
35 | SCT0 | mov | 86: TEXr r6.x, r6.xwzz, TEX2;
| TEX | tex | 86: TEXr r6.x, r6.xwzz, TEX2;
| SCB1 | mad | 87: MADr r6.zw, r4.--xy, r6.--zz, const.--xx;
| | |
36 | SCT0 | mov | 89: TEXr r4.x, r6.zwzz, TEX2;
| TEX | tex | 89: TEXr r4.x, r6.zwzz, TEX2;
| SCB0 | add | 90: ADDr h10.x, r6.y---,-r4;
| SCB1 | add | 91: ADDr h10.w, r6.---y,-r6.---x;
| | |
37 | SCT0/1 | mul | 92: MOVrc0 hc,-h10;
| SCB0 | mul | 93: MULr r5.xy, r6.zw--, const.xx--;
| | |
38 | SCT0/1 | mul | 95: MOVh h8, const.xxxx;
| SCB0 | frc | 97: FRCr h10.xy, r5;
| | |
39 | SCT1 | mul | 98: MULh h10.w, h10.---y, h10.---x;
| SCB0 | add | 99: ADDh h12.xy,-h10, const.xx--;
| | |
40 | SCT0 | mul | 101: MULh h10.yz, h10.-yx-, h12.-xy-;
| SCB0/1 | mul | 102: MOVh h8(LT0.xyzw), const.xxxx;
| | |
41 | SCT0 | mul | 104: MULh h10.x, h12.y---, h12;
| SCB0 | dp4 | 105: DP4h h8.x, h8, h10;
| SCB1 | nop | 105: DP4h h8.x, h8, h10;
| | |
42 | SCT0 | mul | 106: MULr r5.xy, r5.zw--, const.xx--;
| SCB0 | frc | 108: FRCr h10.xy, r5;
| | |
43 | SCT1 | mul | 109: MULh h10.w, h10.---y, h10.---x;
| SCB1 | add | 110: ADDh h8.zw,-h10.--xy, const.--xx;
| | |
44 | SCT0 | mul | 112: MULh h10.yz, h10.-yx-, h8.-zw-;
| SCB0 | mul | 113: MULh h10.x, h8.w---, h8.z---;
| | |
45 | SCB0 | dp4 | 114: DP4h h8.y, h6, h10;
| SCB1 | nop | 114: DP4h h8.y, h6, h10;
| | |
46 | SCB0 | mul | 115: MULr r3.xy, r4.zw--, const.xx--;
| | |
47 | SCB0 | frc | 117: FRCr h6.xy, r3;
| | |
48 | SCT1 | mul | 118: MULh h6.w, h6.---y, h6.---x;
| SCB1 | add | 119: ADDh h8.zw,-h6.--xy, const.--xx;
| | |
49 | SCT0 | mul | 121: MULh h6.yz, h6.-yx-, h8.-zw-;
| SCB0 | mul | 122: MULh h6.x, h8.w---, h8.z---;
| | |
50 | SCB0 | dp4 | 123: DP4h h8.z, h5, h6;
| SCB1 | nop | 123: DP4h h8.z, h5, h6;
| | |
51 | SCT1 | mul | 124: MULr r2.zw, r3, const.--xx;
| SCB0 | frc | 126: FRCr h5.xy, r2.zw--;
| SCB1 | mov | 127: MOVr r3.z, r0.--w-;
| | |
52 | SCT1 | mul | 128: MULh h5.w, h5.---y, h5.---x;
| SCB0 | add | 129: ADDh h6.xy,-h5, const.xx--;
| | |
53 | SCT0 | mul | 131: MULh h5.yz, h5.-yx-, h6.-xy-;
| SCB0 | mul | 132: MULh h5.x, h6.y---, h6;
| | |
54 | SCB0 | dp4 | 133: DP4h h8.w, h4, h5;
| SCB1 | nop | 133: DP4h h8.w, h4, h5;
| | |
55 | SCT0 | div | 134: TEXh h4.xyz, f[TEX0], TEX1;
| TEX | tex | 134: TEXh h4.xyz, f[TEX0], TEX1;
| SCB0 | dp3 | 135: DP3h*2 h0.w, h0, h4;
| | |
56 | SCB0 | mad | 136: MADh h0.xyz, h4,-h0.www-, h0;
| | |
57 | SCB0 | dp4 | 137: DP4r r2.z, const, r1;
| SCB1 | nop | 137: DP4r r2.z, const, r1;
| | |
58 | SCB0 | dp4 | 139: DP4r r2.w, const, r1;
| SCB1 | nop | 139: DP4r r2.w, const, r1;
| | |
59 | SCT0 | mov | 141: TEXh h0.w, r2.zwzz, TEX3;
| TEX | tex | 141: TEXh h0.w, r2.zwzz, TEX3;
| SCB0 | dp4 | 142: DP4h h4.w, h8, const.xxxx;
| SCB1 | nop | 142: DP4h h4.w, h8, const.xxxx;
| | |
60 | SCB0 | dp3 | 144: DP3h r3.x,-const, h4;
| SCB1 | mul | 146: MULh h2.w, h4, h0;
| | |
61 | SCB0 | dp3 | 147: DP3h r3.y,-const, h0;
| | |
62 | SCT0 | mov | 149: TEXh h1, r3, TEX4;
| TEX | tex | 149: TEXh h1, r3, TEX4;
| SCB0/1 | mul | 150: MULh h1, h1, const;
| | |
63 | SCT0 | div | 152: TEXh h0, f[TEX0], TEX5;
| TEX | tex | 152: TEXh h0, f[TEX0], TEX5;
| SCB0/1 | mad | 153: MADh h0, h1, h2.wwww, h0;
Pass SCT TEX SCB
1: 50% 100% 100%
2: 0% 0% 100%
3: 0% 0% 100%
4: 0% 0% 100%
5: 25% 0% 100%
6: 0% 0% 100%
7: 0% 0% 0%
8: 0% 100% 75%
9: 0% 100% 50%
10: 0% 0% 25%
11: 0% 100% 25%
12: 0% 100% 25%
13: 100% 0% 50%
14: 0% 100% 50%
15: 100% 0% 25%
16: 100% 0% 50%
17: 0% 100% 50%
18: 0% 0% 25%
19: 0% 0% 50%
20: 0% 100% 25%
21: 0% 100% 50%
22: 0% 100% 25%
23: 100% 0% 50%
24: 100% 0% 100%
25: 0% 100% 25%
26: 0% 100% 50%
27: 0% 0% 25%
28: 0% 100% 50%
29: 0% 0% 25%
30: 0% 100% 25%
31: 100% 0% 50%
32: 100% 0% 100%
33: 0% 100% 50%
34: 0% 0% 50%
35: 0% 100% 50%
36: 0% 100% 50%
37: 100% 0% 50%
38: 100% 0% 50%
39: 25% 0% 50%
40: 50% 0% 100%
41: 25% 0% 100%
42: 50% 0% 50%
43: 25% 0% 50%
44: 50% 0% 25%
45: 0% 0% 100%
46: 0% 0% 50%
47: 0% 0% 50%
48: 25% 0% 50%
49: 50% 0% 25%
50: 0% 0% 100%
51: 50% 0% 75%
52: 25% 0% 50%
53: 50% 0% 25%
54: 0% 0% 100%
55: 50% 100% 75%
56: 0% 0% 75%
57: 0% 0% 100%
58: 0% 0% 100%
59: 0% 100% 100%
60: 0% 0% 75%
61: 0% 0% 75%
62: 0% 100% 100%
63: 50% 100% 100%
MEAN: 23% 33% 60%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 0% 100% 100% 100%
2: 0% 0% 0% 100% 100%
3: 0% 0% 0% 100% 100%
4: 0% 0% 0% 100% 100%
5: 0% 100% 0% 100% 100%
6: 0% 0% 0% 100% 100%
7: 0% 0% 0% 0% 0%
8: 0% 0% 100% 100% 100%
9: 0% 0% 100% 0% 100%
10: 0% 0% 0% 0% 100%
11: 0% 0% 100% 0% 100%
12: 0% 0% 100% 100% 0%
13: 100% 100% 0% 0% 100%
14: 0% 0% 100% 0% 100%
15: 100% 100% 0% 100% 0%
16: 100% 100% 0% 0% 100%
17: 0% 0% 100% 0% 100%
18: 0% 0% 0% 0% 100%
19: 0% 0% 0% 100% 100%
20: 0% 0% 100% 0% 100%
21: 0% 0% 100% 0% 100%
22: 0% 0% 100% 100% 0%
23: 100% 100% 0% 100% 0%
24: 100% 100% 0% 100% 100%
25: 0% 0% 100% 100% 0%
26: 0% 0% 100% 0% 100%
27: 0% 0% 0% 0% 100%
28: 0% 0% 100% 0% 100%
29: 0% 0% 0% 0% 100%
30: 0% 0% 100% 100% 0%
31: 100% 100% 0% 100% 0%
32: 100% 100% 0% 100% 100%
33: 0% 0% 100% 100% 100%
34: 0% 0% 0% 100% 100%
35: 0% 0% 100% 0% 100%
36: 0% 0% 100% 100% 100%
37: 100% 100% 0% 100% 0%
38: 100% 100% 0% 100% 0%
39: 0% 100% 0% 100% 0%
40: 100% 0% 0% 100% 100%
41: 100% 0% 0% 100% 100%
42: 100% 0% 0% 100% 0%
43: 0% 100% 0% 0% 100%
44: 100% 0% 0% 100% 0%
45: 0% 0% 0% 100% 100%
46: 0% 0% 0% 100% 0%
47: 0% 0% 0% 100% 0%
48: 0% 100% 0% 0% 100%
49: 100% 0% 0% 100% 0%
50: 0% 0% 0% 100% 100%
51: 0% 100% 0% 100% 100%
52: 0% 100% 0% 100% 0%
53: 100% 0% 0% 100% 0%
54: 0% 0% 0% 100% 100%
55: 100% 0% 100% 100% 0%
56: 0% 0% 0% 100% 0%
57: 0% 0% 0% 100% 100%
58: 0% 0% 0% 100% 100%
59: 0% 0% 100% 100% 100%
60: 0% 0% 0% 100% 0%
61: 0% 0% 0% 100% 0%
62: 0% 0% 100% 100% 100%
63: 100% 0% 100% 100% 100%
MEAN: 28% 23% 33% 71% 65%
Cycles: 78.75 :: R Regs Used: 7 :: R Regs Max Index (0 based): 6
--------------------------------------------------------------------------------
Running performance on file test\p30_accum_sun_near.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 58.25 :: R Regs Used: 5 :: R Regs Max Index (0 based): 4
Pixel throughput (assuming 1 cycle texture lookup) 110.34 MP/s
--------------------------------------------------------------------------------
Running performance on file test\p30_accum_sun_near.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 50.00 :: R Regs Used: 4 :: R Regs Max Index (0 based): 3
Pixel throughput (assuming 1 cycle texture lookup) 192.00 MP/s

View file

@ -0,0 +1,40 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /DUSE_SHADER3=1 /nologo /Tps_3_0 /Emain /Zpr /Fctest\p30_clouds.ps
// clouds.ps
//
//
// Parameters:
//
// sampler2D s_clouds0;
// sampler2D s_clouds1;
// sampler2D s_tonemap;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// s_clouds0 s0 1
// s_clouds1 s1 1
// s_tonemap s2 1
//
ps_3_0
def c0, 0.5, 0, 0, 0
dcl_color_pp v0
dcl_texcoord_pp v1.xy
dcl_texcoord1_pp v2.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
texld_pp r0, v1, s0
texld_pp r1, v2, s1
add_pp r0.xyz, r0, r1
mul_pp r1.xyz, r0, v0
texld_pp r0, c0.x, s2
mul_pp oC0.xyz, r1, r0.x
mov_pp oC0.w, v0.w
// approximately 7 instruction slots used (3 texture, 4 arithmetic)

View file

@ -0,0 +1,51 @@
--------------------------------------------------------------------------------
Running performance on file test\p30_clouds.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 2: TEXh h0.xyz, f[TEX1], TEX0;
| SCT1 | mov | 0: MOVr r1.w, const.---x;
| TEX | tex | 2: TEXh h0.xyz, f[TEX1], TEX0;
| | |
2 | SCT0 | div | 3: TEXh h1.xyz, f[TEX2], TEX1;
| TEX | tex | 3: TEXh h1.xyz, f[TEX2], TEX1;
| SCB0 | add | 4: ADDh h1.xyz, h0, h1;
| | |
3 | SCT0/1 | div | 5: MOVh h0, f[TEX0];
| SCB0 | mul | 6: MULh h0.xyz, h1, h0;
| | |
4 | SCT0 | mov | 7: TEXh h1.x, r1.wwww, TEX2;
| TEX | tex | 7: TEXh h1.x, r1.wwww, TEX2;
| SCB0 | mul | 8: MULh h0.xyz, h0, h1.xxx-;
Pass SCT TEX SCB
1: 75% 100% 0%
2: 50% 100% 75%
3: 100% 0% 75%
4: 0% 100% 75%
MEAN: 56% 75% 56%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 100% 100% 0% 0%
2: 100% 0% 100% 100% 0%
3: 100% 100% 0% 100% 0%
4: 0% 0% 100% 100% 0%
MEAN: 75% 50% 75% 75% 0%
Cycles: 4.00 :: R Regs Used: 2 :: R Regs Max Index (0 based): 1
--------------------------------------------------------------------------------
Running performance on file test\p30_clouds.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 4.00 :: R Regs Used: 1 :: R Regs Max Index (0 based): 0
Pixel throughput (assuming 1 cycle texture lookup) 1.60 GP/s
--------------------------------------------------------------------------------
Running performance on file test\p30_clouds.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 4.00 :: R Regs Used: 1 :: R Regs Max Index (0 based): 0
Pixel throughput (assuming 1 cycle texture lookup) 2.40 GP/s

View file

@ -0,0 +1,36 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /DUSE_SHADER3=1 /nologo /Tps_3_0 /Emain /Zpr
// /Fctest\p30_deffer_impl_flat.ps deffer_impl_flat.ps
//
//
// Parameters:
//
// float4 L_material;
// sampler2D s_base;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// L_material c0 1
// s_base s0 1
//
ps_3_0
def c1, 0.0250000004, 1, 0, 0.00784313772
dcl_texcoord v1.xy
dcl_texcoord1 v2.xyz
dcl_texcoord2_pp v3.xyz
dcl_2d s0
nrm_pp r0.xyz, v3
mad_pp oC0.xyz, r0, c1.x, v2
mov_pp oC1.xyz, r0
mov_pp oC0.w, c0.w
texld_pp r0, v1, s0
mov_pp oC1.w, r0.w
mad_pp oC2, r0.xyzx, c1.yyyz, c1.zzzw
// approximately 9 instruction slots used (1 texture, 8 arithmetic)

View file

@ -0,0 +1,51 @@
--------------------------------------------------------------------------------
Running performance on file test\p30_deffer_impl_flat.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 0: TEXh h1, f[TEX1], TEX0;
| SCT1 | mov | 1: NRMh h4.xyz, f[TEX3];
| TEX | tex | 0: TEXh h1, f[TEX1], TEX0;
| SRB | nrm | 1: NRMh h4.xyz, f[TEX3];
| SCB1 | mul | 2: MOVh h0.w, const;
| | |
2 | SCT1 | mul | 4: MOVh h4.w, h1;
| SCB0/1 | mad | 5: MADh h6, h1.xyzx, const.xxxy, const.yyyz;
| | |
3 | SCT0 | div | 7: MADh h0.xyz, h4, const.xxx-, f[TEX2];
| SCB0 | mad | 7: MADh h0.xyz, h4, const.xxx-, f[TEX2];
Pass SCT TEX SCB
1: 50% 100% 25%
2: 25% 0% 100%
3: 75% 0% 75%
4: 0% 0% 0%
MEAN: 37% 25% 50%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 0% 100% 0% 100%
2: 0% 100% 0% 100% 100%
3: 100% 0% 0% 100% 0%
4: 0% 0% 0% 0% 0%
MEAN: 50% 25% 25% 50% 50%
Cycles: 4.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 3
Max register used is > number of registers used, registers are not being used efficiently
--------------------------------------------------------------------------------
Running performance on file test\p30_deffer_impl_flat.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 2.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 3
Max register used is > number of registers used, registers are not being used efficiently
Pixel throughput (assuming 1 cycle texture lookup) 3.20 GP/s
--------------------------------------------------------------------------------
Running performance on file test\p30_deffer_impl_flat.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 2.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 3
Max register used is > number of registers used, registers are not being used efficiently
Pixel throughput (assuming 1 cycle texture lookup) 4.80 GP/s

View file

@ -0,0 +1,98 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /DUSE_SHADER3=1 /nologo /Tps_3_0 /Emain /Zpr
// /Fctest\p30_deffer_impl_flat_d.ps deffer_impl_flat_d.ps
//
//
// Parameters:
//
// float4 L_material;
// sampler2D s_base;
// sampler2D s_dn_a;
// sampler2D s_dn_b;
// sampler2D s_dn_g;
// sampler2D s_dn_r;
// sampler2D s_dt_a;
// sampler2D s_dt_b;
// sampler2D s_dt_g;
// sampler2D s_dt_r;
// sampler2D s_mask;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// L_material c0 1
// s_base s0 1
// s_mask s1 1
// s_dt_r s2 1
// s_dt_g s3 1
// s_dt_b s4 1
// s_dt_a s5 1
// s_dn_r s6 1
// s_dn_g s7 1
// s_dn_b s8 1
// s_dn_a s9 1
//
ps_3_0
def c1, 1, -0.5, 0.5, 0.0250000004
def c2, 0.00784313772, 0, 0, 0
dcl_texcoord v1.xy
dcl_texcoord1 v2.xyz
dcl_texcoord2_pp v3.xyz
dcl_texcoord3_pp v4.xyz
dcl_texcoord4_pp v5.xyz
dcl_texcoord5 v6.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
dcl_2d s3
dcl_2d s4
dcl_2d s5
dcl_2d s6
dcl_2d s7
dcl_2d s8
dcl_2d s9
texld_pp r0, v6, s5
texld_pp r2, v6, s4
texld_pp r3, v6, s2
texld_pp r4, v6, s3
texld_pp r1, v1, s1
dp4_pp r0.w, r1, c1.x
rcp_pp r0.w, r0.w
mul_pp r1, r1, r0.w
mul_pp r4.xyz, r4, r1.y
mad_pp r3.xyz, r3, r1.x, r4
mad_pp r2.xyz, r2, r1.z, r3
mad_pp r4.xyz, r0, r1.w, r2
texld_pp r0, v1, s0
texld_pp r2, v6, s6
texld_pp r3, v6, s7
add_pp r3.xyz, r3.wzyw, c1.y
add_pp r2.xyz, r2.wzyw, c1.y
mul_pp r3.xyz, r1.y, r3
mad_pp r3.xyz, r2, r1.x, r3
texld_pp r2, v6, s8
add_pp r2.xyz, r2.wzyw, c1.y
mad_pp r3.xyz, r2, r1.z, r3
texld_pp r2, v6, s9
add_pp r1.xyz, r2.wzyw, c1.y
mul_pp r2.xyz, r4, r0
mad_pp r0.xyz, r1, r1.w, r3
mov_pp oC1.w, r0.w
mul_pp r0.w, r0.z, c1.z
dp3_pp r1.x, v3, r0.xyww
mul_pp r0.xyz, r0, c1.xxzw
dp3_pp r1.y, v4, r0
dp3_pp r1.z, v5, r0
add_pp oC2.xyz, r2, r2
nrm_pp r0.xyz, r1
mad_pp oC0.xyz, r0, c1.w, v2
mov_pp oC1.xyz, r0
mov_pp oC0.w, c0.w
mov_pp oC2.w, c2.x
// approximately 40 instruction slots used (10 texture, 30 arithmetic)

View file

@ -0,0 +1,140 @@
--------------------------------------------------------------------------------
Running performance on file test\p30_deffer_impl_flat_d.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 2: TEXh h0, f[TEX1], TEX1;
| SCT1 | mul | 0: MOVh h6.w, const.---x;
| TEX | tex | 2: TEXh h0, f[TEX1], TEX1;
| SCB0 | dp4 | 3: DP4h h1.x, h0, const.xxxx;
| SCB1 | nop | 3: DP4h h1.x, h0, const.xxxx;
| | |
2 | SCT0/1 | div | 5: DIVh h1, h0, h1;
| | |
3 | SCT0 | div | 6: TEXh h0.yzw, f[TEX6], TEX6;
| TEX | tex | 6: TEXh h0.yzw, f[TEX6], TEX6;
| SCB0 | add | 7: ADDh h2.xyz, h0.wzy-, const.xxx-;
| | |
4 | SCT0 | div | 9: TEXh h0.yzw, f[TEX6], TEX7;
| TEX | tex | 9: TEXh h0.yzw, f[TEX6], TEX7;
| SCB0 | add | 10: ADDh h0.xyz, h0.wzy-, const.xxx-;
| | |
5 | SCT0 | mul | 12: MULh h0.xyz, h1.yyy-, h0;
| SCB0 | mad | 13: MADh h4.xyz, h2, h1.xxx-, h0;
| | |
6 | SCT0 | div | 14: TEXh h0.yzw, f[TEX6], TEX9;
| TEX | tex | 14: TEXh h0.yzw, f[TEX6], TEX9;
| SCB0 | add | 15: ADDh h2.xyz, h0.wzy-, const.xxx-;
| | |
7 | SCT0 | div | 17: TEXh h0.yzw, f[TEX6], TEX8;
| TEX | tex | 17: TEXh h0.yzw, f[TEX6], TEX8;
| SCB0 | add | 18: ADDh h3.xyz, h0.wzy-, const.xxx-;
| SCB1 | mul | 20: MOVh h0.w, const;
| | |
8 | SCT0 | mul | 22: MADh h3.xyz, h3, h1.zzz-, h4;
| SCB0 | mad | 22: MADh h3.xyz, h3, h1.zzz-, h4;
| | |
9 | SCT0 | div | 23: TEXh h0.xyz, f[TEX6], TEX3;
| TEX | tex | 23: TEXh h0.xyz, f[TEX6], TEX3;
| SCB0 | mul | 24: MULh h4.xyz, h0, h1.yyy-;
| | |
10 | SCT0 | div | 25: TEXh h6.xyz, f[TEX6], TEX5;
| TEX | tex | 25: TEXh h6.xyz, f[TEX6], TEX5;
| SCB0 | mad | 26: MADh h0.xyz, h2, h1.www-, h3;
| | |
11 | SCT0 | div | 27: TEXh h2.xyz, f[TEX6], TEX2;
| TEX | tex | 27: TEXh h2.xyz, f[TEX6], TEX2;
| SCB0 | mad | 28: MADh h3.xyz, h2, h1.xxx-, h4;
| | |
12 | SCT0 | div | 29: TEXh h2.xyz, f[TEX6], TEX4;
| TEX | tex | 29: TEXh h2.xyz, f[TEX6], TEX4;
| SCB0 | mad | 30: MADh h1.xyz, h2, h1.zzz-, h3;
| | |
13 | SCT0 | mul | 31: MOVh h3.xy, h0;
| SCT1 | mul | 32: MOVh/2 h3.z, h0;
| SCB0 | mad | 33: MADh h6.xyz, h6, h1.www-, h1;
| | |
14 | SCT0 | div | 34: TEXh h2, f[TEX1], TEX0;
| TEX | tex | 34: TEXh h2, f[TEX1], TEX0;
| SCB0 | mul | 35: MULh h4.xyz, h0, const.xxy-;
| SCB1 | mul | 37: MOVh h4.w, h2;
| | |
15 | SCT0 | div | 38: DP3h h0.x, f[TEX3], h3;
| SCB0 | dp3 | 38: DP3h h0.x, f[TEX3], h3;
| | |
16 | SCT0 | div | 39: DP3h h0.y, f[TEX4], h4;
| SCB0 | dp3 | 39: DP3h h0.y, f[TEX4], h4;
| | |
17 | SCT0 | div | 40: DP3h h0.z, f[TEX5], h4;
| SCB0 | dp3 | 40: DP3h h0.z, f[TEX5], h4;
| | |
18 | SCT1 | mov | 41: NRMh h4.xyz, h0;
| SRB | nrm | 41: NRMh h4.xyz, h0;
| SCB0 | mul | 42: MULh*2 h6.xyz, h6, h2;
| | |
19 | SCT0 | div | 43: MADh h0.xyz, h4, const.xxx-, f[TEX2];
| SCB0 | mad | 43: MADh h0.xyz, h4, const.xxx-, f[TEX2];
Pass SCT TEX SCB
1: 75% 100% 100%
2: 100% 0% 0%
3: 50% 100% 75%
4: 50% 100% 75%
5: 75% 0% 75%
6: 50% 100% 75%
7: 50% 100% 100%
8: 75% 0% 75%
9: 50% 100% 75%
10: 50% 100% 75%
11: 50% 100% 75%
12: 50% 100% 75%
13: 75% 0% 75%
14: 50% 100% 100%
15: 75% 0% 75%
16: 75% 0% 75%
17: 75% 0% 75%
18: 0% 0% 75%
19: 75% 0% 75%
20: 0% 0% 0%
MEAN: 57% 50% 71%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 100% 100% 100% 100%
2: 100% 100% 0% 0% 0%
3: 100% 0% 100% 100% 0%
4: 100% 0% 100% 100% 0%
5: 100% 0% 0% 100% 0%
6: 100% 0% 100% 100% 0%
7: 100% 0% 100% 100% 100%
8: 100% 0% 0% 100% 0%
9: 100% 0% 100% 100% 0%
10: 100% 0% 100% 100% 0%
11: 100% 0% 100% 100% 0%
12: 100% 0% 100% 100% 0%
13: 100% 100% 0% 100% 0%
14: 100% 0% 100% 100% 100%
15: 100% 0% 0% 100% 0%
16: 100% 0% 0% 100% 0%
17: 100% 0% 0% 100% 0%
18: 0% 0% 0% 100% 0%
19: 100% 0% 0% 100% 0%
20: 0% 0% 0% 0% 0%
MEAN: 90% 15% 50% 90% 15%
Cycles: 20.00 :: R Regs Used: 4 :: R Regs Max Index (0 based): 3
--------------------------------------------------------------------------------
Running performance on file test\p30_deffer_impl_flat_d.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 19.00 :: R Regs Used: 4 :: R Regs Max Index (0 based): 3
Pixel throughput (assuming 1 cycle texture lookup) 336.84 MP/s
--------------------------------------------------------------------------------
Running performance on file test\p30_deffer_impl_flat_d.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 18.00 :: R Regs Used: 4 :: R Regs Max Index (0 based): 3
Pixel throughput (assuming 1 cycle texture lookup) 533.33 MP/s

View file

@ -0,0 +1,43 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /DUSE_SHADER3=1 /nologo /Tps_3_0 /Emain /Zpr /Fctest\p30_sky2.ps
// sky2.ps
//
//
// Parameters:
//
// samplerCUBE s_sky0;
// samplerCUBE s_sky1;
// sampler2D s_tonemap;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// s_sky0 s0 1
// s_sky1 s1 1
// s_tonemap s2 1
//
ps_3_0
def c0, 0.5, 0, 0.333333343, 1
dcl_color_pp v0
dcl_texcoord_pp v1.xyz
dcl_texcoord1_pp v2.xyz
dcl_cube s0
dcl_cube s1
dcl_2d s2
texld_pp r1, v1, s0
texld_pp r0, v2, s1
lrp_pp r2.xyz, v0.w, r0, r1
mul_pp r1.xyz, r2, v0
texld_pp r0, c0.x, s2
add_pp r0.w, r0.x, r0.x
mul_pp r0.xyz, r1, r0.w
mov r0.w, c0.y
mov_pp oC0, r0
mul_pp oC1, r0, c0.zzzw
// approximately 11 instruction slots used (3 texture, 8 arithmetic)

View file

@ -0,0 +1,57 @@
--------------------------------------------------------------------------------
Running performance on file test\p30_sky2.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 0: TEXh h0.xyz, f[TEX1], TEX0;
| TEX | tex | 0: TEXh h0.xyz, f[TEX1], TEX0;
| SCB1 | mov | 1: MOVr r1.w, const.---x;
| | |
2 | SCT0/1 | div | 3: MOVh h1, f[TEX0];
| SCB0 | mad | 4: MADh h2.xyz,-h1.www-, h0, h0;
| SCB1 | mul | 5: MOVh h0.w, const.---x;
| | |
3 | SCT0 | div | 7: TEXh h0.xyz, f[TEX2], TEX1;
| TEX | tex | 7: TEXh h0.xyz, f[TEX2], TEX1;
| SCB0 | mad | 8: MADh h0.xyz, h1.www-, h0, h2;
| | |
4 | SCT0 | mov | 9: TEXh h2.x, r1.wwww, TEX2;
| TEX | tex | 9: TEXh h2.x, r1.wwww, TEX2;
| SCB0 | mul | 10: MULh*2 h0.xyz, h0, h1;
| | |
5 | SCT0 | mul | 11: MULh h0.xyz, h0, h2.xxx-;
| SCB0/1 | mul | 12: MULh h4, h0, const.xxxy;
Pass SCT TEX SCB
1: 50% 100% 25%
2: 100% 0% 100%
3: 50% 100% 75%
4: 0% 100% 75%
5: 75% 0% 100%
MEAN: 55% 60% 75%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 0% 100% 0% 100%
2: 100% 100% 0% 100% 100%
3: 100% 0% 100% 100% 0%
4: 0% 0% 100% 100% 0%
5: 100% 0% 0% 100% 100%
MEAN: 80% 20% 60% 80% 60%
Cycles: 5.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 2
--------------------------------------------------------------------------------
Running performance on file test\p30_sky2.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 5.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 2
Pixel throughput (assuming 1 cycle texture lookup) 1.28 GP/s
--------------------------------------------------------------------------------
Running performance on file test\p30_sky2.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 5.00 :: R Regs Used: 3 :: R Regs Max Index (0 based): 2
Pixel throughput (assuming 1 cycle texture lookup) 1.92 GP/s

View file

@ -0,0 +1,34 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /DUSE_SHADER3=1 /nologo /Tps_3_0 /Emain /Zpr /Fctest\p30_yuv2rgb.ps
// yuv2rgb.ps
//
//
// Parameters:
//
// sampler2D s_base;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// s_base s1 1
//
ps_3_0
def c0, -0.869610012, 0.53075999, -1.07860005, 0
def c1, 1.16406, -0.390625, 2.01561999, 1
def c2, 1.59765005, -0.8125, 0, 0
dcl_texcoord v0.xy
dcl_2d s1
texld_pp r0, v0, s1
mul_pp r1.x, r0.x, c1.x
mad_pp r1.yz, r0.y, c1, r1.x
mad_pp r0.xy, r0.z, c2, r1
mov_pp r0.z, r1.z
add_pp oC0.xyz, r0, c0
mov_pp oC0.w, c1.w
// approximately 7 instruction slots used (1 texture, 6 arithmetic)

View file

@ -0,0 +1,49 @@
--------------------------------------------------------------------------------
Running performance on file test\p30_yuv2rgb.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v65.04
IPU0 ------ Simplified schedule: --------
Pass | Unit | uOp | PC: Op
-----+--------+------+-------------------------
1 | SCT0 | div | 2: TEXh h0.xyz, f[TEX0], TEX1;
| SCT1 | mul | 0: MOVh h0.w, const.---x;
| TEX | tex | 2: TEXh h0.xyz, f[TEX0], TEX1;
| SCB0 | mul | 3: MULh h0.x, h0, const.x---;
| | |
2 | SCB0 | mad | 5: MADh h1.xy, h0.yy--, const.xy--, h0.xx--;
| | |
3 | SCT0 | mul | 7: MOVh h0.y, h1.-x--;
| SCB0 | mad | 8: MADh h0.xy, h0.zz--, const.xy--, h0;
| SCB1 | mul | 10: MOVh h0.z, h1.--y-;
| | |
4 | SCB0 | add | 11: ADDh h0.xyz, h0, const;
Pass SCT TEX SCB
1: 75% 100% 25%
2: 0% 0% 50%
3: 25% 0% 75%
4: 0% 0% 75%
MEAN: 25% 25% 56%
Pass SCT0 SCT1 TEX SCB0 SCB1
1: 100% 100% 100% 100% 0%
2: 0% 0% 0% 100% 0%
3: 100% 0% 0% 100% 100%
4: 0% 0% 0% 100% 0%
MEAN: 50% 25% 25% 100% 25%
Cycles: 4.00 :: R Regs Used: 1 :: R Regs Max Index (0 based): 0
--------------------------------------------------------------------------------
Running performance on file test\p30_yuv2rgb.ps
-------------------- NV40 --------------------
Target: GeForce 6800 Ultra (NV40) :: Unified Compiler: v81.95
Cycles: 4.00 :: R Regs Used: 1 :: R Regs Max Index (0 based): 0
Pixel throughput (assuming 1 cycle texture lookup) 1.60 GP/s
--------------------------------------------------------------------------------
Running performance on file test\p30_yuv2rgb.ps
-------------------- G70 --------------------
Target: GeForce 7800 GT (G70) :: Unified Compiler: v81.95
Cycles: 3.00 :: R Regs Used: 1 :: R Regs Max Index (0 based): 0
Pixel throughput (assuming 1 cycle texture lookup) 3.20 GP/s

View file

@ -0,0 +1,32 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /nologo /Tvs_2_0 /Emain /Zpr /Fctest\v20_sky2.vs sky2.vs
//
//
// Parameters:
//
// row_major float4x4 m_WVP;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// m_WVP c0 4
//
vs_2_0
dcl_position v0
dcl_color v1
dcl_texcoord v2
dcl_texcoord1 v3
dp4 oPos.x, c0, v0
dp4 oPos.y, c1, v0
dp4 oPos.z, c2, v0
dp4 oPos.w, c3, v0
mov oD0, v1
mov oT0.xyz, v2
mov oT1.xyz, v3
// approximately 7 instruction slots used

View file

@ -0,0 +1,37 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /DUSE_SHADER3=1 /nologo /Tvs_3_0 /Emain /Zpr /Fctest\v30_sky2.vs
// sky2.vs
//
//
// Parameters:
//
// row_major float4x4 m_WVP;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// m_WVP c0 4
//
vs_3_0
dcl_position v0
dcl_color v1
dcl_texcoord v2
dcl_texcoord1 v3
dcl_position o0
dcl_color o1
dcl_texcoord o2.xyz
dcl_texcoord1 o3.xyz
dp4 o0.x, c0, v0
dp4 o0.y, c1, v0
dp4 o0.z, c2, v0
dp4 o0.w, c3, v0
mov o1, v1
mov o2.xyz, v2
mov o3.xyz, v3
// approximately 7 instruction slots used

View file

@ -0,0 +1,37 @@
//
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.07.239.0000
//
// fxc /DUSE_SHADER3=1 /Gfp /nologo /Tvs_3_0 /Emain /Zpr /Fctest\v3F_sky2.vs
// sky2.vs
//
//
// Parameters:
//
// row_major float4x4 m_WVP;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// m_WVP c0 4
//
vs_3_0
dcl_position v0
dcl_color v1
dcl_texcoord v2
dcl_texcoord1 v3
dcl_position o0
dcl_color o1
dcl_texcoord o2.xyz
dcl_texcoord1 o3.xyz
dp4 o0.x, c0, v0
dp4 o0.y, c1, v0
dp4 o0.z, c2, v0
dp4 o0.w, c3, v0
mov o1, v1
mov o2.xyz, v2
mov o3.xyz, v3
// approximately 7 instruction slots used