1 | mdef(0)_out(4)_in(6) |
---|
2 | mad out0, in0.x, in1, in2 |
---|
3 | mad out1, in0.y, in1, in3 |
---|
4 | mad out2, in0.z, in1, in4 |
---|
5 | mad out3, in0.w, in1, in5 |
---|
6 | mend |
---|
7 | il_ps_2_0 |
---|
8 | ; |
---|
9 | ; 8x8 outer product version |
---|
10 | ; 20100707 N.Nakasato (University of Aizu, Japan) |
---|
11 | ; Provided "as is" |
---|
12 | ; |
---|
13 | dcl_input_interp(linear) v0.xy |
---|
14 | dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float) ;A |
---|
15 | dcl_resource_id(1)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float) ;B |
---|
16 | dcl_literal l0, 0.0, 1.0, 2.0, 3.0 |
---|
17 | dcl_literal l1, 4.0, 1.0, 2.0, 3.0 |
---|
18 | dcl_literal l2, -1.0, 1.0, -2.0, 3.0 |
---|
19 | dcl_literal l4, 1, 1, 1, 2 |
---|
20 | dcl_cb cb0[1] ; {(float)n/4, (float)n, 0.0, 0.0} where n is matrix size |
---|
21 | |
---|
22 | flr r100.xy, v0.xy |
---|
23 | mul r100.x, r100.x, l0.z ; x2 |
---|
24 | mul r100.y, r100.y, l0.z ; x2 |
---|
25 | mov r100.z, l2.z ; -2 |
---|
26 | |
---|
27 | mov r50, l0.xxxx |
---|
28 | mov r51, l0.xxxx |
---|
29 | mov r52, l0.xxxx |
---|
30 | mov r53, l0.xxxx |
---|
31 | mov r54, l0.xxxx |
---|
32 | mov r55, l0.xxxx |
---|
33 | mov r56, l0.xxxx |
---|
34 | mov r57, l0.xxxx |
---|
35 | |
---|
36 | mov r60, l0.xxxx |
---|
37 | mov r61, l0.xxxx |
---|
38 | mov r62, l0.xxxx |
---|
39 | mov r63, l0.xxxx |
---|
40 | mov r64, l0.xxxx |
---|
41 | mov r65, l0.xxxx |
---|
42 | mov r66, l0.xxxx |
---|
43 | mov r67, l0.xxxx |
---|
44 | |
---|
45 | whileloop |
---|
46 | add r100.z, r100.z, l0.z ; +2 |
---|
47 | eq r999, r100.z, cb0[0].y |
---|
48 | break_logicalnz r999.x |
---|
49 | |
---|
50 | ; read A (transposed) |
---|
51 | sample_resource(0)_sampler(0)_aoffimmi(0.0,0.0,0.0) r30, r100.yz |
---|
52 | sample_resource(0)_sampler(1)_aoffimmi(1.0,0.0,0.0) r31, r100.yz |
---|
53 | |
---|
54 | ; read B |
---|
55 | sample_resource(1)_sampler(2)_aoffimmi(0.0,0.0,0.0) r32, r100.xz |
---|
56 | sample_resource(1)_sampler(3)_aoffimmi(1.0,0.0,0.0) r33, r100.xz |
---|
57 | |
---|
58 | mcall(0)(r50,r51,r52,r53),(r30,r32,r50,r51,r52,r53) |
---|
59 | mcall(0)(r60,r61,r62,r63),(r30,r33,r60,r61,r62,r63) |
---|
60 | |
---|
61 | mcall(0)(r54,r55,r56,r57),(r31,r32,r54,r55,r56,r57) |
---|
62 | mcall(0)(r64,r65,r66,r67),(r31,r33,r64,r65,r66,r67) |
---|
63 | |
---|
64 | ; read A (transposed) |
---|
65 | sample_resource(0)_sampler(4)_aoffimmi(0.0,1.0,0.0) r30, r100.yz |
---|
66 | sample_resource(0)_sampler(5)_aoffimmi(1.0,1.0,0.0) r31, r100.yz |
---|
67 | |
---|
68 | ; read B |
---|
69 | sample_resource(1)_sampler(6)_aoffimmi(0.0,1.0,0.0) r32, r100.xz |
---|
70 | sample_resource(1)_sampler(7)_aoffimmi(1.0,1.0,0.0) r33, r100.xz |
---|
71 | |
---|
72 | breakc_relop(eq) cb[0].z, l0.y |
---|
73 | mcall(0)(r50,r51,r52,r53),(r30,r32,r50,r51,r52,r53) |
---|
74 | mcall(0)(r60,r61,r62,r63),(r30,r33,r60,r61,r62,r63) |
---|
75 | |
---|
76 | mcall(0)(r54,r55,r56,r57),(r31,r32,r54,r55,r56,r57) |
---|
77 | mcall(0)(r64,r65,r66,r67),(r31,r33,r64,r65,r66,r67) |
---|
78 | endloop |
---|
79 | |
---|
80 | mul r100.y, r100.y, l1.x ; x2 x4 = x8 |
---|
81 | mov r1, r100.xy |
---|
82 | mul r2.x, r1.y, cb0[0].x |
---|
83 | add r2.x, r2.x, r1.x |
---|
84 | ftoi r3.x, r2.x |
---|
85 | ftoi r4.x, cb0[0].x |
---|
86 | |
---|
87 | iadd r3.y, r3.x, l4.x |
---|
88 | iadd r3.__zw, r3.xyxy, r4.xxxx |
---|
89 | |
---|
90 | mov g[r3.x], r50 |
---|
91 | mov g[r3.y], r60 |
---|
92 | mov g[r3.z], r51 |
---|
93 | mov g[r3.w], r61 |
---|
94 | |
---|
95 | imul r4.x, r4.x, l4.w |
---|
96 | iadd r3.xyzw, r3.xyzw, r4.xxxx |
---|
97 | mov g[r3.x], r52 |
---|
98 | mov g[r3.y], r62 |
---|
99 | mov g[r3.z], r53 |
---|
100 | mov g[r3.w], r63 |
---|
101 | |
---|
102 | iadd r3.xyzw, r3.xyzw, r4.xxxx |
---|
103 | mov g[r3.x], r54 |
---|
104 | mov g[r3.y], r64 |
---|
105 | mov g[r3.z], r55 |
---|
106 | mov g[r3.w], r65 |
---|
107 | |
---|
108 | iadd r3.xyzw, r3.xyzw, r4.xxxx |
---|
109 | mov g[r3.x], r56 |
---|
110 | mov g[r3.y], r66 |
---|
111 | mov g[r3.z], r57 |
---|
112 | mov g[r3.w], r67 |
---|
113 | |
---|
114 | ret_dyn |
---|
115 | endmain |
---|
116 | |
---|
117 | end |
---|