#include "common.h" struct v2p { float4 tc0: TEXCOORD0; // Central float4 tc1: TEXCOORD1; // -1,+1 float4 tc2: TEXCOORD2; // -2,+2 float4 tc3: TEXCOORD3; // -3,+3 float4 tc4: TEXCOORD4; // -4,+4 float4 tc5: TEXCOORD5; // -5,+5 float4 tc6: TEXCOORD6; // -6,+6 float4 tc7: TEXCOORD7; // -7,+7 }; ////////////////////////////////////////////////////////////////////////////////////////// // perform 4x4 bilinear, 8x8p, the step (B) // b): 64x64p => 8x8p #ifdef FP16_FILTER // native bilinear half sample (half2 tc) { return dot (tex2D(s_image,tc), 1/4.h); // sum components } #else // emulate bilinear half sample (half2 tc) { half phalf = .5h/64.h; half4 res; res.x = dot (tex2D(s_image,tc+half2(-phalf,-phalf)), 1/4.h); // sum components res.y = dot (tex2D(s_image,tc+half2(+phalf,-phalf)), 1/4.h); // sum components res.z = dot (tex2D(s_image,tc+half2(-phalf,+phalf)), 1/4.h); // sum components res.w = dot (tex2D(s_image,tc+half2(+phalf,+phalf)), 1/4.h); // sum components return dot (res, 1/4.h); // sum components } #endif half4 main ( v2p I ) : COLOR { // sample half4 accum0; accum0.x = sample(I.tc0); accum0.y = sample(I.tc1); accum0.z = sample(I.tc2); accum0.w = sample(I.tc3); half4 accum1; accum1.x = sample(I.tc4); accum1.y = sample(I.tc5); accum1.z = sample(I.tc6); accum1.w = sample(I.tc7); half4 accum2; accum2.x = sample(I.tc0.wz); accum2.y = sample(I.tc1.wz); accum2.z = sample(I.tc2.wz); accum2.w = sample(I.tc3.wz); half4 accum3; accum3.x = sample(I.tc4.wz); accum3.y = sample(I.tc5.wz); accum3.z = sample(I.tc6.wz); accum3.w = sample(I.tc7.wz); // perform accumulation half4 final; final.x = dot(accum0,1/4.h); final.y = dot(accum1,1/4.h); final.z = dot(accum2,1/4.h); final.w = dot(accum3,1/4.h); // OK return final; }