// (c) 2008 Steven Gratton
// Guided in part by samples from the AMD CAL SDK 

std::string newtest=
"il_ps_2_0\n"
  "dcl_input_position_interp(linear_noperspective) vWinCoord0.xy\n"
  "mov r10,vWinCoord0.xy00\n"
"ftoi r11,r10\n"
  "mov g[0],r11\n"
"ret_dyn\n"
  "end\n"

;



std::string fastmult=
"il_ps_2_0\n"
  "dcl_input_position_interp(linear_noperspective) vWinCoord0.xy\n"
  "dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n"
  "dcl_resource_id(1)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n"
  "dcl_cb cb0[1] ; k4max as float, pitch of mat2 as int \n"
  "dcl_literal l0,4.0,8.0,0.5,0.5\n"
  "flr r10,vWinCoord0.xyxx\n"

  "mad r10.xyzw,r10.xy00,l0.xy00,l0.zzzz ;r10.xy contains position of topleft element of output \n"



"; r10.z starts with 0.5...\n"
"; r10.z is going to increment by +1.0 from 0.5 to k4-.5 inclusive as a float\n"
"; r10.w is going to increment by +4.0 from 0.5 to k4-3.5 inclusive as a float\n"
"; mat0 looks like: \n"
"; r0 \n"
"; r1 \n"
"; r2 \n"
"; r3 \n"
"; r4 \n"
"; r5 \n"
"; r6 \n"
"; r7 \n"
"; mat1 is: \n"
"; r100 r101 r102 r103 \n"  
"; r110 r111 r112 r113 \n"  
"; r120 r121 r122 r123 \n"  
"; r130 r131 r132 r133 \n"  
"; mat2 is: \n"
"; r200 r201 r202 r203 \n"  
"; r210 r211 r212 r213 \n"  
"; r220 r221 r222 r223 \n"  
"; r230 r231 r232 r233 \n"  
"; r240 r241 r242 r243 \n"  
"; r250 r251 r252 r253 \n"  
"; r260 r261 r262 r263 \n"  
"; r270 r271 r272 r273 \n"  
"; We are calculating mat2+=mat0*mat1. \n"
"; Initializing mat2...\n"

"ftoi r11,r10 \n"
  //"ftoi r13.x,cb0[0].y \n"
"imad r12.x,r11.y,cb0[0].y,r11.x\n"
"mov r14.x,r12.x\n"

  //do this to start with C=0... 
  // seems v. slow, though not since I moved the test to the start
  /*
"dcl_literal l1,0.0,0.0,0.0,0.0 \n"
"mov r200,l1\n"
"mov r201,l1\n"
"mov r202,l1\n"
"mov r203,l1\n"
"mov r210,l1\n"
"mov r211,l1\n"
"mov r212,l1\n"
"mov r213,l1\n"
"mov r220,l1\n"
"mov r221,l1\n"
"mov r222,l1\n"
"mov r223,l1\n"
"mov r230,l1\n"
"mov r231,l1\n"
"mov r232,l1\n"
"mov r233,l1\n"
"mov r240,l1\n"
"mov r241,l1\n"
"mov r242,l1\n"
"mov r243,l1\n"
"mov r250,l1\n"
"mov r251,l1\n"
"mov r252,l1\n"
"mov r253,l1\n"
"mov r260,l1\n"
"mov r261,l1\n"
"mov r262,l1\n"
"mov r263,l1\n"
"mov r270,l1\n"
"mov r271,l1\n"
"mov r272,l1\n"
"mov r273,l1\n"
*/
"mov r200,g[r12.x]\n"
"mov r201,g[r12.x+1]\n"
"mov r202,g[r12.x+2]\n"
"mov r203,g[r12.x+3]\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov r210,g[r12.x]\n"
"mov r211,g[r12.x+1]\n"
"mov r212,g[r12.x+2]\n"
"mov r213,g[r12.x+3]\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov r220,g[r12.x]\n"
"mov r221,g[r12.x+1]\n"
"mov r222,g[r12.x+2]\n"
"mov r223,g[r12.x+3]\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov r230,g[r12.x]\n"
"mov r231,g[r12.x+1]\n"
"mov r232,g[r12.x+2]\n"
"mov r233,g[r12.x+3]\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov r240,g[r12.x]\n"
"mov r241,g[r12.x+1]\n"
"mov r242,g[r12.x+2]\n"
"mov r243,g[r12.x+3]\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov r250,g[r12.x]\n"
"mov r251,g[r12.x+1]\n"
"mov r252,g[r12.x+2]\n"
"mov r253,g[r12.x+3]\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov r260,g[r12.x]\n"
"mov r261,g[r12.x+1]\n"
"mov r262,g[r12.x+2]\n"
"mov r263,g[r12.x+3]\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov r270,g[r12.x]\n"
"mov r271,g[r12.x+1]\n"
"mov r272,g[r12.x+2]\n"
"mov r273,g[r12.x+3]\n"
  
  //"mov r15,r15.0000\n"
  // looping on r10.z;
  // I tried a different register, r16.x, only for the loop
  // didn't make any difference.
  // However, moving the test to the start certainly has
  // made things better!
"whileloop\n"
 "breakc_relop(gt) r10.z,cb0[0].x \n"


"; read in new blocks and multiply-add\n"
"sample_resource(0)_sampler(0) r0, r10.zy11\n"
"sample_resource(0)_sampler(0)_aoffimmi(0.0,1.0,0.0) r1, r10.zy\n"
"sample_resource(0)_sampler(0)_aoffimmi(0.0,2.0,0.0) r2, r10.zy\n"
"sample_resource(0)_sampler(0)_aoffimmi(0.0,3.0,0.0) r3, r10.zy\n"
"sample_resource(0)_sampler(0)_aoffimmi(0.0,4.0,0.0) r4, r10.zy\n"
"sample_resource(0)_sampler(0)_aoffimmi(0.0,5.0,0.0) r5, r10.zy\n"
"sample_resource(0)_sampler(0)_aoffimmi(0.0,6.0,0.0) r6, r10.zy\n"
"sample_resource(0)_sampler(0)_aoffimmi(0.0,7.0,0.0) r7, r10.zy\n"

"sample_resource(1)_sampler(0) r100, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(1.0,0.0,0.0) r101, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(2.0,0.0,0.0) r102, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(3.0,0.0,0.0) r103, r10.xw\n"

"sample_resource(1)_sampler(0)_aoffimmi(0.0,1.0,0.0) r110, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(1.0,1.0,0.0) r111, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(2.0,1.0,0.0) r112, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(3.0,1.0,0.0) r113, r10.xw\n"

"sample_resource(1)_sampler(0)_aoffimmi(0.0,2.0,0.0) r120, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(1.0,2.0,0.0) r121, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(2.0,2.0,0.0) r122, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(3.0,2.0,0.0) r123, r10.xw\n"

"sample_resource(1)_sampler(0)_aoffimmi(0.0,3.0,0.0) r130, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(1.0,3.0,0.0) r131, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(2.0,3.0,0.0) r132, r10.xw\n"
"sample_resource(1)_sampler(0)_aoffimmi(3.0,3.0,0.0) r133, r10.xw\n"

"mad r200,r0.xxxx,r100,r200 \n"
"mad r200,r0.yyyy,r110,r200 \n"
"mad r200,r0.zzzz,r120,r200 \n"
"mad r200,r0.wwww,r130,r200 \n"

"mad r210,r1.xxxx,r100,r210 \n"
"mad r210,r1.yyyy,r110,r210 \n"
"mad r210,r1.zzzz,r120,r210 \n"
"mad r210,r1.wwww,r130,r210 \n"

"mad r220,r2.xxxx,r100,r220 \n"
"mad r220,r2.yyyy,r110,r220 \n"
"mad r220,r2.zzzz,r120,r220 \n"
"mad r220,r2.wwww,r130,r220 \n"

"mad r230,r3.xxxx,r100,r230 \n"
"mad r230,r3.yyyy,r110,r230 \n"
"mad r230,r3.zzzz,r120,r230 \n"
"mad r230,r3.wwww,r130,r230 \n"

"mad r240,r4.xxxx,r100,r240 \n"
"mad r240,r4.yyyy,r110,r240 \n"
"mad r240,r4.zzzz,r120,r240 \n"
"mad r240,r4.wwww,r130,r240 \n"

"mad r250,r5.xxxx,r100,r250 \n"
"mad r250,r5.yyyy,r110,r250 \n"
"mad r250,r5.zzzz,r120,r250 \n"
"mad r250,r5.wwww,r130,r250 \n"

"mad r260,r6.xxxx,r100,r260 \n"
"mad r260,r6.yyyy,r110,r260 \n"
"mad r260,r6.zzzz,r120,r260 \n"
"mad r260,r6.wwww,r130,r260 \n"

"mad r270,r7.xxxx,r100,r270 \n"
"mad r270,r7.yyyy,r110,r270 \n"
"mad r270,r7.zzzz,r120,r270 \n"
"mad r270,r7.wwww,r130,r270 \n"

"mad r201,r0.xxxx,r101,r201 \n"
"mad r201,r0.yyyy,r111,r201 \n"
"mad r201,r0.zzzz,r121,r201 \n"
"mad r201,r0.wwww,r131,r201 \n"

"mad r211,r1.xxxx,r101,r211 \n"
"mad r211,r1.yyyy,r111,r211 \n"
"mad r211,r1.zzzz,r121,r211 \n"
"mad r211,r1.wwww,r131,r211 \n"

"mad r221,r2.xxxx,r101,r221 \n"
"mad r221,r2.yyyy,r111,r221 \n"
"mad r221,r2.zzzz,r121,r221 \n"
"mad r221,r2.wwww,r131,r221 \n"

"mad r231,r3.xxxx,r101,r231 \n"
"mad r231,r3.yyyy,r111,r231 \n"
"mad r231,r3.zzzz,r121,r231 \n"
"mad r231,r3.wwww,r131,r231 \n"

"mad r241,r4.xxxx,r101,r241 \n"
"mad r241,r4.yyyy,r111,r241 \n"
"mad r241,r4.zzzz,r121,r241 \n"
"mad r241,r4.wwww,r131,r241 \n"

"mad r251,r5.xxxx,r101,r251 \n"
"mad r251,r5.yyyy,r111,r251 \n"
"mad r251,r5.zzzz,r121,r251 \n"
"mad r251,r5.wwww,r131,r251 \n"

"mad r261,r6.xxxx,r101,r261 \n"
"mad r261,r6.yyyy,r111,r261 \n"
"mad r261,r6.zzzz,r121,r261 \n"
"mad r261,r6.wwww,r131,r261 \n"

"mad r271,r7.xxxx,r101,r271 \n"
"mad r271,r7.yyyy,r111,r271 \n"
"mad r271,r7.zzzz,r121,r271 \n"
"mad r271,r7.wwww,r131,r271 \n"


"mad r202,r0.xxxx,r102,r202 \n"
"mad r202,r0.yyyy,r112,r202 \n"
"mad r202,r0.zzzz,r122,r202 \n"
"mad r202,r0.wwww,r132,r202 \n"

"mad r212,r1.xxxx,r102,r212 \n"
"mad r212,r1.yyyy,r112,r212 \n"
"mad r212,r1.zzzz,r122,r212 \n"
"mad r212,r1.wwww,r132,r212 \n"

"mad r222,r2.xxxx,r102,r222 \n"
"mad r222,r2.yyyy,r112,r222 \n"
"mad r222,r2.zzzz,r122,r222 \n"
"mad r222,r2.wwww,r132,r222 \n"

"mad r232,r3.xxxx,r102,r232 \n"
"mad r232,r3.yyyy,r112,r232 \n"
"mad r232,r3.zzzz,r122,r232 \n"
"mad r232,r3.wwww,r132,r232 \n"

"mad r242,r4.xxxx,r102,r242 \n"
"mad r242,r4.yyyy,r112,r242 \n"
"mad r242,r4.zzzz,r122,r242 \n"
"mad r242,r4.wwww,r132,r242 \n"

"mad r252,r5.xxxx,r102,r252 \n"
"mad r252,r5.yyyy,r112,r252 \n"
"mad r252,r5.zzzz,r122,r252 \n"
"mad r252,r5.wwww,r132,r252 \n"

"mad r262,r6.xxxx,r102,r262 \n"
"mad r262,r6.yyyy,r112,r262 \n"
"mad r262,r6.zzzz,r122,r262 \n"
"mad r262,r6.wwww,r132,r262 \n"

"mad r272,r7.xxxx,r102,r272 \n"
"mad r272,r7.yyyy,r112,r272 \n"
"mad r272,r7.zzzz,r122,r272 \n"
"mad r272,r7.wwww,r132,r272 \n"


"mad r203,r0.xxxx,r103,r203 \n"
"mad r203,r0.yyyy,r113,r203 \n"
"mad r203,r0.zzzz,r123,r203 \n"
"mad r203,r0.wwww,r133,r203 \n"

"mad r213,r1.xxxx,r103,r213 \n"
"mad r213,r1.yyyy,r113,r213 \n"
"mad r213,r1.zzzz,r123,r213 \n"
"mad r213,r1.wwww,r133,r213 \n"

"mad r223,r2.xxxx,r103,r223 \n"
"mad r223,r2.yyyy,r113,r223 \n"
"mad r223,r2.zzzz,r123,r223 \n"
"mad r223,r2.wwww,r133,r223 \n"

"mad r233,r3.xxxx,r103,r233 \n"
"mad r233,r3.yyyy,r113,r233 \n"
"mad r233,r3.zzzz,r123,r233 \n"
"mad r233,r3.wwww,r133,r233 \n"

"mad r243,r4.xxxx,r103,r243 \n"
"mad r243,r4.yyyy,r113,r243 \n"
"mad r243,r4.zzzz,r123,r243 \n"
"mad r243,r4.wwww,r133,r243 \n"

"mad r253,r5.xxxx,r103,r253 \n"
"mad r253,r5.yyyy,r113,r253 \n"
"mad r253,r5.zzzz,r123,r253 \n"
"mad r253,r5.wwww,r133,r253 \n"

"mad r263,r6.xxxx,r103,r263 \n"
"mad r263,r6.yyyy,r113,r263 \n"
"mad r263,r6.zzzz,r123,r263 \n"
"mad r263,r6.wwww,r133,r263 \n"

"mad r273,r7.xxxx,r103,r273 \n"
"mad r273,r7.yyyy,r113,r273 \n"
"mad r273,r7.zzzz,r123,r273 \n"
"mad r273,r7.wwww,r133,r273 \n"

  /*
  // doubling the math...


"mad r200,r0.xxxx,r100,r200 \n"
"mad r200,r0.yyyy,r110,r200 \n"
"mad r200,r0.zzzz,r120,r200 \n"
"mad r200,r0.wwww,r130,r200 \n"

"mad r210,r1.xxxx,r100,r210 \n"
"mad r210,r1.yyyy,r110,r210 \n"
"mad r210,r1.zzzz,r120,r210 \n"
"mad r210,r1.wwww,r130,r210 \n"

"mad r220,r2.xxxx,r100,r220 \n"
"mad r220,r2.yyyy,r110,r220 \n"
"mad r220,r2.zzzz,r120,r220 \n"
"mad r220,r2.wwww,r130,r220 \n"

"mad r230,r3.xxxx,r100,r230 \n"
"mad r230,r3.yyyy,r110,r230 \n"
"mad r230,r3.zzzz,r120,r230 \n"
"mad r230,r3.wwww,r130,r230 \n"

"mad r240,r4.xxxx,r100,r240 \n"
"mad r240,r4.yyyy,r110,r240 \n"
"mad r240,r4.zzzz,r120,r240 \n"
"mad r240,r4.wwww,r130,r240 \n"

"mad r250,r5.xxxx,r100,r250 \n"
"mad r250,r5.yyyy,r110,r250 \n"
"mad r250,r5.zzzz,r120,r250 \n"
"mad r250,r5.wwww,r130,r250 \n"

"mad r260,r6.xxxx,r100,r260 \n"
"mad r260,r6.yyyy,r110,r260 \n"
"mad r260,r6.zzzz,r120,r260 \n"
"mad r260,r6.wwww,r130,r260 \n"

"mad r270,r7.xxxx,r100,r270 \n"
"mad r270,r7.yyyy,r110,r270 \n"
"mad r270,r7.zzzz,r120,r270 \n"
"mad r270,r7.wwww,r130,r270 \n"

"mad r201,r0.xxxx,r101,r201 \n"
"mad r201,r0.yyyy,r111,r201 \n"
"mad r201,r0.zzzz,r121,r201 \n"
"mad r201,r0.wwww,r131,r201 \n"

"mad r211,r1.xxxx,r101,r211 \n"
"mad r211,r1.yyyy,r111,r211 \n"
"mad r211,r1.zzzz,r121,r211 \n"
"mad r211,r1.wwww,r131,r211 \n"

"mad r221,r2.xxxx,r101,r221 \n"
"mad r221,r2.yyyy,r111,r221 \n"
"mad r221,r2.zzzz,r121,r221 \n"
"mad r221,r2.wwww,r131,r221 \n"

"mad r231,r3.xxxx,r101,r231 \n"
"mad r231,r3.yyyy,r111,r231 \n"
"mad r231,r3.zzzz,r121,r231 \n"
"mad r231,r3.wwww,r131,r231 \n"

"mad r241,r4.xxxx,r101,r241 \n"
"mad r241,r4.yyyy,r111,r241 \n"
"mad r241,r4.zzzz,r121,r241 \n"
"mad r241,r4.wwww,r131,r241 \n"

"mad r251,r5.xxxx,r101,r251 \n"
"mad r251,r5.yyyy,r111,r251 \n"
"mad r251,r5.zzzz,r121,r251 \n"
"mad r251,r5.wwww,r131,r251 \n"

"mad r261,r6.xxxx,r101,r261 \n"
"mad r261,r6.yyyy,r111,r261 \n"
"mad r261,r6.zzzz,r121,r261 \n"
"mad r261,r6.wwww,r131,r261 \n"

"mad r271,r7.xxxx,r101,r271 \n"
"mad r271,r7.yyyy,r111,r271 \n"
"mad r271,r7.zzzz,r121,r271 \n"
"mad r271,r7.wwww,r131,r271 \n"


"mad r202,r0.xxxx,r102,r202 \n"
"mad r202,r0.yyyy,r112,r202 \n"
"mad r202,r0.zzzz,r122,r202 \n"
"mad r202,r0.wwww,r132,r202 \n"

"mad r212,r1.xxxx,r102,r212 \n"
"mad r212,r1.yyyy,r112,r212 \n"
"mad r212,r1.zzzz,r122,r212 \n"
"mad r212,r1.wwww,r132,r212 \n"

"mad r222,r2.xxxx,r102,r222 \n"
"mad r222,r2.yyyy,r112,r222 \n"
"mad r222,r2.zzzz,r122,r222 \n"
"mad r222,r2.wwww,r132,r222 \n"

"mad r232,r3.xxxx,r102,r232 \n"
"mad r232,r3.yyyy,r112,r232 \n"
"mad r232,r3.zzzz,r122,r232 \n"
"mad r232,r3.wwww,r132,r232 \n"

"mad r242,r4.xxxx,r102,r242 \n"
"mad r242,r4.yyyy,r112,r242 \n"
"mad r242,r4.zzzz,r122,r242 \n"
"mad r242,r4.wwww,r132,r242 \n"

"mad r252,r5.xxxx,r102,r252 \n"
"mad r252,r5.yyyy,r112,r252 \n"
"mad r252,r5.zzzz,r122,r252 \n"
"mad r252,r5.wwww,r132,r252 \n"

"mad r262,r6.xxxx,r102,r262 \n"
"mad r262,r6.yyyy,r112,r262 \n"
"mad r262,r6.zzzz,r122,r262 \n"
"mad r262,r6.wwww,r132,r262 \n"

"mad r272,r7.xxxx,r102,r272 \n"
"mad r272,r7.yyyy,r112,r272 \n"
"mad r272,r7.zzzz,r122,r272 \n"
"mad r272,r7.wwww,r132,r272 \n"


"mad r203,r0.xxxx,r103,r203 \n"
"mad r203,r0.yyyy,r113,r203 \n"
"mad r203,r0.zzzz,r123,r203 \n"
"mad r203,r0.wwww,r133,r203 \n"

"mad r213,r1.xxxx,r103,r213 \n"
"mad r213,r1.yyyy,r113,r213 \n"
"mad r213,r1.zzzz,r123,r213 \n"
"mad r213,r1.wwww,r133,r213 \n"

"mad r223,r2.xxxx,r103,r223 \n"
"mad r223,r2.yyyy,r113,r223 \n"
"mad r223,r2.zzzz,r123,r223 \n"
"mad r223,r2.wwww,r133,r223 \n"

"mad r233,r3.xxxx,r103,r233 \n"
"mad r233,r3.yyyy,r113,r233 \n"
"mad r233,r3.zzzz,r123,r233 \n"
"mad r233,r3.wwww,r133,r233 \n"

"mad r243,r4.xxxx,r103,r243 \n"
"mad r243,r4.yyyy,r113,r243 \n"
"mad r243,r4.zzzz,r123,r243 \n"
"mad r243,r4.wwww,r133,r243 \n"

"mad r253,r5.xxxx,r103,r253 \n"
"mad r253,r5.yyyy,r113,r253 \n"
"mad r253,r5.zzzz,r123,r253 \n"
"mad r253,r5.wwww,r133,r253 \n"

"mad r263,r6.xxxx,r103,r263 \n"
"mad r263,r6.yyyy,r113,r263 \n"
"mad r263,r6.zzzz,r123,r263 \n"
"mad r263,r6.wwww,r133,r263 \n"

"mad r273,r7.xxxx,r103,r273 \n"
"mad r273,r7.yyyy,r113,r273 \n"
"mad r273,r7.zzzz,r123,r273 \n"
"mad r273,r7.wwww,r133,r273 \n"

  
  // and again...


"mad r200,r0.xxxx,r100,r200 \n"
"mad r200,r0.yyyy,r110,r200 \n"
"mad r200,r0.zzzz,r120,r200 \n"
"mad r200,r0.wwww,r130,r200 \n"

"mad r210,r1.xxxx,r100,r210 \n"
"mad r210,r1.yyyy,r110,r210 \n"
"mad r210,r1.zzzz,r120,r210 \n"
"mad r210,r1.wwww,r130,r210 \n"

"mad r220,r2.xxxx,r100,r220 \n"
"mad r220,r2.yyyy,r110,r220 \n"
"mad r220,r2.zzzz,r120,r220 \n"
"mad r220,r2.wwww,r130,r220 \n"

"mad r230,r3.xxxx,r100,r230 \n"
"mad r230,r3.yyyy,r110,r230 \n"
"mad r230,r3.zzzz,r120,r230 \n"
"mad r230,r3.wwww,r130,r230 \n"

"mad r240,r4.xxxx,r100,r240 \n"
"mad r240,r4.yyyy,r110,r240 \n"
"mad r240,r4.zzzz,r120,r240 \n"
"mad r240,r4.wwww,r130,r240 \n"

"mad r250,r5.xxxx,r100,r250 \n"
"mad r250,r5.yyyy,r110,r250 \n"
"mad r250,r5.zzzz,r120,r250 \n"
"mad r250,r5.wwww,r130,r250 \n"

"mad r260,r6.xxxx,r100,r260 \n"
"mad r260,r6.yyyy,r110,r260 \n"
"mad r260,r6.zzzz,r120,r260 \n"
"mad r260,r6.wwww,r130,r260 \n"

"mad r270,r7.xxxx,r100,r270 \n"
"mad r270,r7.yyyy,r110,r270 \n"
"mad r270,r7.zzzz,r120,r270 \n"
"mad r270,r7.wwww,r130,r270 \n"

"mad r201,r0.xxxx,r101,r201 \n"
"mad r201,r0.yyyy,r111,r201 \n"
"mad r201,r0.zzzz,r121,r201 \n"
"mad r201,r0.wwww,r131,r201 \n"

"mad r211,r1.xxxx,r101,r211 \n"
"mad r211,r1.yyyy,r111,r211 \n"
"mad r211,r1.zzzz,r121,r211 \n"
"mad r211,r1.wwww,r131,r211 \n"

"mad r221,r2.xxxx,r101,r221 \n"
"mad r221,r2.yyyy,r111,r221 \n"
"mad r221,r2.zzzz,r121,r221 \n"
"mad r221,r2.wwww,r131,r221 \n"

"mad r231,r3.xxxx,r101,r231 \n"
"mad r231,r3.yyyy,r111,r231 \n"
"mad r231,r3.zzzz,r121,r231 \n"
"mad r231,r3.wwww,r131,r231 \n"

"mad r241,r4.xxxx,r101,r241 \n"
"mad r241,r4.yyyy,r111,r241 \n"
"mad r241,r4.zzzz,r121,r241 \n"
"mad r241,r4.wwww,r131,r241 \n"

"mad r251,r5.xxxx,r101,r251 \n"
"mad r251,r5.yyyy,r111,r251 \n"
"mad r251,r5.zzzz,r121,r251 \n"
"mad r251,r5.wwww,r131,r251 \n"

"mad r261,r6.xxxx,r101,r261 \n"
"mad r261,r6.yyyy,r111,r261 \n"
"mad r261,r6.zzzz,r121,r261 \n"
"mad r261,r6.wwww,r131,r261 \n"

"mad r271,r7.xxxx,r101,r271 \n"
"mad r271,r7.yyyy,r111,r271 \n"
"mad r271,r7.zzzz,r121,r271 \n"
"mad r271,r7.wwww,r131,r271 \n"


"mad r202,r0.xxxx,r102,r202 \n"
"mad r202,r0.yyyy,r112,r202 \n"
"mad r202,r0.zzzz,r122,r202 \n"
"mad r202,r0.wwww,r132,r202 \n"

"mad r212,r1.xxxx,r102,r212 \n"
"mad r212,r1.yyyy,r112,r212 \n"
"mad r212,r1.zzzz,r122,r212 \n"
"mad r212,r1.wwww,r132,r212 \n"

"mad r222,r2.xxxx,r102,r222 \n"
"mad r222,r2.yyyy,r112,r222 \n"
"mad r222,r2.zzzz,r122,r222 \n"
"mad r222,r2.wwww,r132,r222 \n"

"mad r232,r3.xxxx,r102,r232 \n"
"mad r232,r3.yyyy,r112,r232 \n"
"mad r232,r3.zzzz,r122,r232 \n"
"mad r232,r3.wwww,r132,r232 \n"

"mad r242,r4.xxxx,r102,r242 \n"
"mad r242,r4.yyyy,r112,r242 \n"
"mad r242,r4.zzzz,r122,r242 \n"
"mad r242,r4.wwww,r132,r242 \n"

"mad r252,r5.xxxx,r102,r252 \n"
"mad r252,r5.yyyy,r112,r252 \n"
"mad r252,r5.zzzz,r122,r252 \n"
"mad r252,r5.wwww,r132,r252 \n"

"mad r262,r6.xxxx,r102,r262 \n"
"mad r262,r6.yyyy,r112,r262 \n"
"mad r262,r6.zzzz,r122,r262 \n"
"mad r262,r6.wwww,r132,r262 \n"

"mad r272,r7.xxxx,r102,r272 \n"
"mad r272,r7.yyyy,r112,r272 \n"
"mad r272,r7.zzzz,r122,r272 \n"
"mad r272,r7.wwww,r132,r272 \n"


"mad r203,r0.xxxx,r103,r203 \n"
"mad r203,r0.yyyy,r113,r203 \n"
"mad r203,r0.zzzz,r123,r203 \n"
"mad r203,r0.wwww,r133,r203 \n"

"mad r213,r1.xxxx,r103,r213 \n"
"mad r213,r1.yyyy,r113,r213 \n"
"mad r213,r1.zzzz,r123,r213 \n"
"mad r213,r1.wwww,r133,r213 \n"

"mad r223,r2.xxxx,r103,r223 \n"
"mad r223,r2.yyyy,r113,r223 \n"
"mad r223,r2.zzzz,r123,r223 \n"
"mad r223,r2.wwww,r133,r223 \n"

"mad r233,r3.xxxx,r103,r233 \n"
"mad r233,r3.yyyy,r113,r233 \n"
"mad r233,r3.zzzz,r123,r233 \n"
"mad r233,r3.wwww,r133,r233 \n"

"mad r243,r4.xxxx,r103,r243 \n"
"mad r243,r4.yyyy,r113,r243 \n"
"mad r243,r4.zzzz,r123,r243 \n"
"mad r243,r4.wwww,r133,r243 \n"

"mad r253,r5.xxxx,r103,r253 \n"
"mad r253,r5.yyyy,r113,r253 \n"
"mad r253,r5.zzzz,r123,r253 \n"
"mad r253,r5.wwww,r133,r253 \n"

"mad r263,r6.xxxx,r103,r263 \n"
"mad r263,r6.yyyy,r113,r263 \n"
"mad r263,r6.zzzz,r123,r263 \n"
"mad r263,r6.wwww,r133,r263 \n"

"mad r273,r7.xxxx,r103,r273 \n"
"mad r273,r7.yyyy,r113,r273 \n"
"mad r273,r7.zzzz,r123,r273 \n"
"mad r273,r7.wwww,r133,r273 \n"


  // doubling the math...


"mad r200,r0.xxxx,r100,r200 \n"
"mad r200,r0.yyyy,r110,r200 \n"
"mad r200,r0.zzzz,r120,r200 \n"
"mad r200,r0.wwww,r130,r200 \n"

"mad r210,r1.xxxx,r100,r210 \n"
"mad r210,r1.yyyy,r110,r210 \n"
"mad r210,r1.zzzz,r120,r210 \n"
"mad r210,r1.wwww,r130,r210 \n"

"mad r220,r2.xxxx,r100,r220 \n"
"mad r220,r2.yyyy,r110,r220 \n"
"mad r220,r2.zzzz,r120,r220 \n"
"mad r220,r2.wwww,r130,r220 \n"

"mad r230,r3.xxxx,r100,r230 \n"
"mad r230,r3.yyyy,r110,r230 \n"
"mad r230,r3.zzzz,r120,r230 \n"
"mad r230,r3.wwww,r130,r230 \n"

"mad r240,r4.xxxx,r100,r240 \n"
"mad r240,r4.yyyy,r110,r240 \n"
"mad r240,r4.zzzz,r120,r240 \n"
"mad r240,r4.wwww,r130,r240 \n"

"mad r250,r5.xxxx,r100,r250 \n"
"mad r250,r5.yyyy,r110,r250 \n"
"mad r250,r5.zzzz,r120,r250 \n"
"mad r250,r5.wwww,r130,r250 \n"

"mad r260,r6.xxxx,r100,r260 \n"
"mad r260,r6.yyyy,r110,r260 \n"
"mad r260,r6.zzzz,r120,r260 \n"
"mad r260,r6.wwww,r130,r260 \n"

"mad r270,r7.xxxx,r100,r270 \n"
"mad r270,r7.yyyy,r110,r270 \n"
"mad r270,r7.zzzz,r120,r270 \n"
"mad r270,r7.wwww,r130,r270 \n"

"mad r201,r0.xxxx,r101,r201 \n"
"mad r201,r0.yyyy,r111,r201 \n"
"mad r201,r0.zzzz,r121,r201 \n"
"mad r201,r0.wwww,r131,r201 \n"

"mad r211,r1.xxxx,r101,r211 \n"
"mad r211,r1.yyyy,r111,r211 \n"
"mad r211,r1.zzzz,r121,r211 \n"
"mad r211,r1.wwww,r131,r211 \n"

"mad r221,r2.xxxx,r101,r221 \n"
"mad r221,r2.yyyy,r111,r221 \n"
"mad r221,r2.zzzz,r121,r221 \n"
"mad r221,r2.wwww,r131,r221 \n"

"mad r231,r3.xxxx,r101,r231 \n"
"mad r231,r3.yyyy,r111,r231 \n"
"mad r231,r3.zzzz,r121,r231 \n"
"mad r231,r3.wwww,r131,r231 \n"

"mad r241,r4.xxxx,r101,r241 \n"
"mad r241,r4.yyyy,r111,r241 \n"
"mad r241,r4.zzzz,r121,r241 \n"
"mad r241,r4.wwww,r131,r241 \n"

"mad r251,r5.xxxx,r101,r251 \n"
"mad r251,r5.yyyy,r111,r251 \n"
"mad r251,r5.zzzz,r121,r251 \n"
"mad r251,r5.wwww,r131,r251 \n"

"mad r261,r6.xxxx,r101,r261 \n"
"mad r261,r6.yyyy,r111,r261 \n"
"mad r261,r6.zzzz,r121,r261 \n"
"mad r261,r6.wwww,r131,r261 \n"

"mad r271,r7.xxxx,r101,r271 \n"
"mad r271,r7.yyyy,r111,r271 \n"
"mad r271,r7.zzzz,r121,r271 \n"
"mad r271,r7.wwww,r131,r271 \n"


"mad r202,r0.xxxx,r102,r202 \n"
"mad r202,r0.yyyy,r112,r202 \n"
"mad r202,r0.zzzz,r122,r202 \n"
"mad r202,r0.wwww,r132,r202 \n"

"mad r212,r1.xxxx,r102,r212 \n"
"mad r212,r1.yyyy,r112,r212 \n"
"mad r212,r1.zzzz,r122,r212 \n"
"mad r212,r1.wwww,r132,r212 \n"

"mad r222,r2.xxxx,r102,r222 \n"
"mad r222,r2.yyyy,r112,r222 \n"
"mad r222,r2.zzzz,r122,r222 \n"
"mad r222,r2.wwww,r132,r222 \n"

"mad r232,r3.xxxx,r102,r232 \n"
"mad r232,r3.yyyy,r112,r232 \n"
"mad r232,r3.zzzz,r122,r232 \n"
"mad r232,r3.wwww,r132,r232 \n"

"mad r242,r4.xxxx,r102,r242 \n"
"mad r242,r4.yyyy,r112,r242 \n"
"mad r242,r4.zzzz,r122,r242 \n"
"mad r242,r4.wwww,r132,r242 \n"

"mad r252,r5.xxxx,r102,r252 \n"
"mad r252,r5.yyyy,r112,r252 \n"
"mad r252,r5.zzzz,r122,r252 \n"
"mad r252,r5.wwww,r132,r252 \n"

"mad r262,r6.xxxx,r102,r262 \n"
"mad r262,r6.yyyy,r112,r262 \n"
"mad r262,r6.zzzz,r122,r262 \n"
"mad r262,r6.wwww,r132,r262 \n"

"mad r272,r7.xxxx,r102,r272 \n"
"mad r272,r7.yyyy,r112,r272 \n"
"mad r272,r7.zzzz,r122,r272 \n"
"mad r272,r7.wwww,r132,r272 \n"


"mad r203,r0.xxxx,r103,r203 \n"
"mad r203,r0.yyyy,r113,r203 \n"
"mad r203,r0.zzzz,r123,r203 \n"
"mad r203,r0.wwww,r133,r203 \n"

"mad r213,r1.xxxx,r103,r213 \n"
"mad r213,r1.yyyy,r113,r213 \n"
"mad r213,r1.zzzz,r123,r213 \n"
"mad r213,r1.wwww,r133,r213 \n"

"mad r223,r2.xxxx,r103,r223 \n"
"mad r223,r2.yyyy,r113,r223 \n"
"mad r223,r2.zzzz,r123,r223 \n"
"mad r223,r2.wwww,r133,r223 \n"

"mad r233,r3.xxxx,r103,r233 \n"
"mad r233,r3.yyyy,r113,r233 \n"
"mad r233,r3.zzzz,r123,r233 \n"
"mad r233,r3.wwww,r133,r233 \n"

"mad r243,r4.xxxx,r103,r243 \n"
"mad r243,r4.yyyy,r113,r243 \n"
"mad r243,r4.zzzz,r123,r243 \n"
"mad r243,r4.wwww,r133,r243 \n"

"mad r253,r5.xxxx,r103,r253 \n"
"mad r253,r5.yyyy,r113,r253 \n"
"mad r253,r5.zzzz,r123,r253 \n"
"mad r253,r5.wwww,r133,r253 \n"

"mad r263,r6.xxxx,r103,r263 \n"
"mad r263,r6.yyyy,r113,r263 \n"
"mad r263,r6.zzzz,r123,r263 \n"
"mad r263,r6.wwww,r133,r263 \n"

"mad r273,r7.xxxx,r103,r273 \n"
"mad r273,r7.yyyy,r113,r273 \n"
"mad r273,r7.zzzz,r123,r273 \n"
"mad r273,r7.wwww,r133,r273 \n"



*/



"add r10.__zw,r10.00zw,l0.001x\n"

  /*
  "mov r16.x,r10.z\n"
  "ge r17.x,r16.x,cb0[0].x\n"
  "break_logicalnz r17.x\n"
  */

  //"add r15.x,r15.x,r15.1\n"

  // now trying looping like in simplematmult, so commenting the following out...
  //"breakc_relop(gt) r10.z,cb0[0].x \n"
  //"breakc_relop(gt) r16.x,cb0[0].x \n"
"endloop\n"


"; outputting the results... \n"

"mov r12.x,r14.x\n"
  //"imad r12.x,r11.y,cb0[0].y,r11.x\n"
  //"mov g[r12.x],vWinCoord0.xy11\n" 
"mov g[r12.x],r200\n"
"mov g[r12.x+1],r201\n"
"mov g[r12.x+2],r202\n"
"mov g[r12.x+3],r203\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov g[r12.x],r210\n"
"mov g[r12.x+1],r211\n"
"mov g[r12.x+2],r212\n"
"mov g[r12.x+3],r213\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov g[r12.x],r220\n"
"mov g[r12.x+1],r221\n"
"mov g[r12.x+2],r222\n"
"mov g[r12.x+3],r223\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov g[r12.x],r230\n"
"mov g[r12.x+1],r231\n"
"mov g[r12.x+2],r232\n"
"mov g[r12.x+3],r233\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov g[r12.x],r240\n"
"mov g[r12.x+1],r241\n"
"mov g[r12.x+2],r242\n"
"mov g[r12.x+3],r243\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov g[r12.x],r250\n"
"mov g[r12.x+1],r251\n"
"mov g[r12.x+2],r252\n"
"mov g[r12.x+3],r253\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov g[r12.x],r260\n"
"mov g[r12.x+1],r261\n"
"mov g[r12.x+2],r262\n"
"mov g[r12.x+3],r263\n"
"iadd r12.x,r12.x,cb0[0].y\n"
"mov g[r12.x],r270\n"
"mov g[r12.x+1],r271\n"
"mov g[r12.x+2],r272\n"
"mov g[r12.x+3],r273\n"
  //"mov g[0],r15\n"

"ret_dyn\n"
  "end\n"
;

