1 ;****************************************************************************************/
\r
4 ;* Author: Ken Baird */
\r
5 ;* Description: 3dnow render assembly */
\r
7 ;* The contents of this file are subject to the Genesis3D Public License */
\r
8 ;* Version 1.01 (the "License"); you may not use this file except in */
\r
9 ;* compliance with the License. You may obtain a copy of the License at */
\r
10 ;* http://www.genesis3d.com */
\r
12 ;* Software distributed under the License is distributed on an "AS IS" */
\r
13 ;* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See */
\r
14 ;* the License for the specific language governing rights and limitations */
\r
15 ;* under the License. */
\r
17 ;* The Original Code is Genesis3D, released March 25, 1999. */
\r
18 ;* Copyright (C) 1999 WildTangent Inc All Rights Reserved */
\r
20 ;****************************************************************************************/
\r
27 assume ds:FLAT,es:FLAT,ss:FLAT
\r
28 assume fs:nothing,gs:nothing
\r
33 EXTERNDEF GBitPtr:DWORD
\r
34 EXTERNDEF ABitPtr:DWORD
\r
35 EXTERNDEF SolidColor:DWORD
\r
36 EXTERNDEF pTex:DWORD
\r
37 EXTERNDEF ClientWindow:DWORD
\r
38 EXTERNDEF Dest:DWORD
\r
39 EXTERNDEF NumASpans:DWORD
\r
40 EXTERNDEF RemainingCount:DWORD
\r
41 EXTERNDEF UDivZStepX:DWORD
\r
42 EXTERNDEF VDivZStepX:DWORD
\r
43 EXTERNDEF ZiStepX:DWORD
\r
44 EXTERNDEF UDivZStepY:DWORD
\r
45 EXTERNDEF VDivZStepY:DWORD
\r
46 EXTERNDEF ZiStepY:DWORD
\r
47 EXTERNDEF UDivZ16StepX:DWORD
\r
48 EXTERNDEF VDivZ16StepX:DWORD
\r
49 EXTERNDEF Zi16StepX:DWORD
\r
50 EXTERNDEF ZiOrigin:DWORD
\r
51 EXTERNDEF FloatTemp:DWORD
\r
52 EXTERNDEF GLMapMulU:DWORD
\r
53 EXTERNDEF UAdjust:DWORD
\r
54 EXTERNDEF UAdjustL:DWORD
\r
55 EXTERNDEF TexPal:DWORD
\r
56 EXTERNDEF ATexPal:DWORD
\r
58 EXTERNDEF UFixed:DWORD
\r
59 EXTERNDEF MaxU:DWORD
\r
60 EXTERNDEF MaxV:DWORD
\r
61 EXTERNDEF QFixedScaleLUT:DWORD
\r
62 EXTERNDEF GMipLevel4_8:DWORD
\r
63 EXTERNDEF GMipLevel20:DWORD
\r
64 EXTERNDEF GLMapAdd:DWORD
\r
65 EXTERNDEF GLightWidth:DWORD
\r
66 EXTERNDEF GLightData:DWORD
\r
67 EXTERNDEF ZBuffer:DWORD
\r
68 EXTERNDEF Zero:QWORD
\r
69 EXTERNDEF UV16:QWORD
\r
70 EXTERNDEF UVLeft:QWORD
\r
71 EXTERNDEF UVLeft2:QWORD
\r
72 EXTERNDEF UVLeftW:QWORD
\r
73 EXTERNDEF UVDivZ16StepX:QWORD
\r
74 EXTERNDEF UVDivZStepX:QWORD
\r
75 EXTERNDEF UVDivZStepY:QWORD
\r
78 EXTERNDEF Q128:QWORD
\r
79 EXTERNDEF WrapMask:QWORD
\r
80 EXTERNDEF QFixedScale16:QWORD
\r
81 EXTERNDEF QFixedScale:QWORD
\r
82 EXTERNDEF UVDivZOrigin:QWORD
\r
86 EXTERNDEF GLMapMulUV:QWORD
\r
87 EXTERNDEF UVL16:QWORD
\r
88 EXTERNDEF UV162:QWORD
\r
89 EXTERNDEF UV16V:QWORD
\r
90 EXTERNDEF QShiftV:QWORD
\r
91 EXTERNDEF LMapMask8:QWORD
\r
92 EXTERNDEF UVAdjustL:QWORD
\r
93 EXTERNDEF UVAdjust:QWORD
\r
94 EXTERNDEF UVAdjust2:QWORD
\r
95 EXTERNDEF QGMip20:QWORD
\r
96 EXTERNDEF QGMip4_8:QWORD
\r
97 EXTERNDEF QDibCan:QWORD
\r
98 EXTERNDEF QZCan:QWORD
\r
99 EXTERNDEF QDibOrCan:QWORD
\r
100 EXTERNDEF QZOrCan:QWORD
\r
101 EXTERNDEF QZVal:QWORD
\r
102 EXTERNDEF QZDelta:QWORD
\r
103 EXTERNDEF QZOut:QWORD
\r
104 EXTERNDEF QDibOut:QWORD
\r
105 EXTERNDEF SCan:QWORD
\r
106 EXTERNDEF QZVal32_0:QWORD
\r
107 EXTERNDEF QZVal32_1:QWORD
\r
108 EXTERNDEF QZBufferPrec:QWORD
\r
109 EXTERNDEF pZBufferPtr:DWORD
\r
110 EXTERNDEF QNegAlpha:QWORD
\r
111 EXTERNDEF RGBADelta:QWORD
\r
112 EXTERNDEF VertAlpha:QWORD
\r
152 ;include listing.inc
\r
154 include stdcall.inc
\r
158 _TEXT$01 SEGMENT PARA USE32 PUBLIC 'CODE'
\r
159 ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
\r
163 ;here's the standard 32 bit pal based combine with lightmap polydraw
\r
164 ;it's your basic subdivided affine mapper with an mmx combine
\r
165 ;no z operations in this loop
\r
166 ;prefetching is useless
\r
167 ;you might think doing pmulhw is a better idea rather than the
\r
168 ;additional shifts it takes to use pmullw...
\r
169 ;trouble is the lack of unsigned word ops
\r
171 cProc DrawSpan32_AsmLit3DNow, 12,<x1 : dword, x2 : dword, y : dword>
\r
187 mov ebx,offset ClientWindow
\r
190 imul eax, [ebx].PixelPitch
\r
192 mov edi,[ebx].Buffer
\r
206 mov [NumASpans],ecx
\r
207 mov [RemainingCount],eax
\r
209 ; prefetch [GBitPtr]
\r
210 ; mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7
\r
211 movd mm0,x1 ; |x | | | | | | |
\r
212 movq mm2,[UVDivZStepX] ; |x | |UZdX | | | | |
\r
214 movd mm1,y ; |x |y |UZdX | | | | |
\r
215 movq mm3,[UVDivZStepY] ; |x |y |UZdX |UZdY | | | |
\r
217 punpckldq mm0,mm0 ; x|x |y |UZdX |UZdY | | | |
\r
218 punpckldq mm1,mm1 ; x|x y|y UZdX|VZdX |UZdY | | | |
\r
220 ; prefetch [GBitPtr+32]
\r
225 pfmul mm2,mm0 ; x|x y|y UZX|VZX UZdY|VZdY | | | |
\r
226 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZdY|VZdY |ZdX | | |
\r
228 pfmul mm3,mm1 ; x|x y|y UZX|VZX UZY|VZY |ZdX | | |
\r
229 movq mm6,[UVDivZOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZdX | |UZO |
\r
231 ; prefetch [GBitPtr+64]
\r
233 movd mm5,[ZiStepY] ; x|x y|y UZX|VZX UZY|VZY |ZdX |ZdY |UZO |
\r
234 pfmul mm4,mm0 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY |UZO |
\r
236 pfmul mm5,mm1 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO |
\r
237 movd mm7,[ZiOrigin]
\r
239 pfadd mm6,mm2 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZXS|VZXS |
\r
240 pfadd mm4,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZXS|VZXS |
\r
242 ; prefetch [GBitPtr+96]
\r
244 pfadd mm6,mm3 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZ|VZ |
\r
245 pfadd mm4,mm5 ; x|x y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |
\r
247 movd mm7,[Zi16StepX]
\r
249 pfrcp mm0,mm4 ; ZL|ZL y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |
\r
250 movq mm1,mm6 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |
\r
252 pfadd mm4,mm7 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |
\r
253 pfmul mm1,mm0 ; ZL|ZL UL|VL UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |
\r
255 ; prefetch [GBitPtr+128]
\r
257 pfrcp mm2,mm4 ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZ|VZ |
\r
258 pfadd mm6,[UVDivZ16StepX] ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZR|VZR |
\r
260 pfmul mm2,mm6 ; ZL|ZL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |
\r
261 movq mm0,mm1 ; UL|VL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |
\r
263 movq mm3,mm2 ; ULi|VLi UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |
\r
264 pfmul mm0,[QFixedScale] ; UL6|VL6 UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |
\r
266 pfmul mm3,[QFixedScale] ; UL6|VL6 UL|VL UR6|UR6 UR6|UR6 |ZRi |ZY UZR|VZR |
\r
267 pf2id mm0,mm0 ;UL6i|VL6i UL|VL UR6|VR6 UR6|VR6 |ZRi |ZY UZR|VZR |
\r
269 pf2id mm3,mm3 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR |
\r
270 ; prefetch [GBitPtr+160]
\r
273 jz HandleLeftoverPixelsLit
\r
275 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
276 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
277 pfmul mm7,[QFixedScale]
\r
278 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
279 paddd mm7,[UVAdjustL]
\r
283 mov ebx,dword ptr[UVL16+4]
\r
285 ; prefetch [GBitPtr+192]
\r
290 mov dword ptr[UVL16+4],ecx
\r
296 mov dword ptr[UVL16+4],0
\r
298 mov eax,dword ptr[UVL16]
\r
302 mov dword ptr[UVL16],ecx
\r
308 mov dword ptr[UVL16],0
\r
312 ; prefetch [GBitPtr+224]
\r
318 psrad mm7,[QGMip4_8]
\r
320 psrld mm5,[QGMip20]
\r
321 pand mm7,[LMapMask8]
\r
327 mov eax,dword ptr[UVL16]
\r
329 ; prefetch [GLightData]
\r
333 imul eax,[GLightWidth]
\r
338 add eax,dword ptr[UVL16+4]
\r
341 lea eax,[2*eax+eax]
\r
344 add eax,[GLightData]
\r
346 ;bilininterpolate to get good color
\r
347 punpcklbw mm6,[eax+3]
\r
348 mov ecx,[GLightWidth]
\r
350 punpcklbw mm5,[eax]
\r
369 punpcklbw mm2,[eax+3]
\r
371 punpcklbw mm5,[eax]
\r
373 ; prefetch [GBitPtr+256]
\r
418 ;use float uv for lightmap uv
\r
419 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
422 pfmul mm7,mm2 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
423 pfmul mm5,[QFixedScale]
\r
425 pfmul mm7,[QFixedScale]
\r
428 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
429 paddd mm5,[UVAdjust]
\r
431 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
432 paddd mm5,[UVAdjust2]
\r
437 movd mm7,[Zi16StepX]
\r
440 pfmul mm1,[QFixedScale16] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
441 ;right side stuff becomes left ; ULw|VLw DU16|DV16 UL|VL URw|VRw |ZLi |ZY UZL|VZL |
\r
442 pfadd mm4,mm7 ;ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZL|VZL |
\r
444 pand mm5,[WrapMask]
\r
445 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
447 movq mm7,[UVAdjustL]
\r
450 pfadd mm6,[UVDivZ16StepX] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
453 psrlq mm5,[QShiftV]
\r
460 mov ebx,dword ptr[UVL16+4]
\r
464 mov dword ptr[UVL16+4],ecx
\r
470 mov dword ptr[UVL16+4],0
\r
472 mov eax,dword ptr[UVL16]
\r
476 mov dword ptr[UVL16],ecx
\r
482 mov dword ptr[UVL16],0
\r
489 psrad mm7,[QGMip4_8]
\r
491 psrld mm5,[QGMip20]
\r
492 pand mm7,[LMapMask8]
\r
498 mov eax,dword ptr[UVL16]
\r
501 imul eax,[GLightWidth]
\r
506 add eax,dword ptr[UVL16+4]
\r
510 lea eax,[2*eax+eax]
\r
513 add eax,[GLightData]
\r
515 mov ecx,[GLightWidth]
\r
517 ;bilininterpolate to get good color
\r
518 punpcklbw mm6,[eax+3]
\r
519 punpcklbw mm5,[eax]
\r
539 punpcklbw mm2,[eax+3]
\r
541 punpcklbw mm5,[eax]
\r
545 mov eax,dword ptr[UV16V]
\r
549 mov ebx,dword ptr[UV16+4]
\r
605 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
607 mov al,byte ptr[esi]
\r
608 movq mm4,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
612 pand mm4,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
616 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
617 psrlq mm4,[QShiftV]
\r
619 mov ebx,dword ptr[UV16+4]
\r
622 mov edx,dword ptr[UV16V]
\r
625 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
631 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
642 mov al,byte ptr[esi]
\r
645 pand mm4,[WrapMask]
\r
648 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
651 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
652 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
654 psrlq mm4,[QShiftV]
\r
655 mov ebx,dword ptr[UV16+4]
\r
660 mov edx,dword ptr[UV16V]
\r
676 packuswb mm7,mm5 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
682 pand mm4,[WrapMask]
\r
684 mov al,byte ptr[esi]
\r
687 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
690 mov ebx,dword ptr[UV16+4]
\r
691 psrlq mm4,[QShiftV]
\r
693 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
702 mov edx,dword ptr[UV16V]
\r
711 pand mm4,[WrapMask]
\r
714 mov al,byte ptr[esi]
\r
717 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
719 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
722 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
725 psrlq mm4,[QShiftV]
\r
731 mov ebx,dword ptr[UV16+4]
\r
736 mov edx,dword ptr[UV16V]
\r
740 pand mm4,[WrapMask]
\r
751 psrlq mm4,[QShiftV]
\r
754 mov al,byte ptr[esi]
\r
757 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
759 mov ebx,dword ptr[UV16+4]
\r
761 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
770 mov edx,dword ptr[UV16V]
\r
781 pand mm4,[WrapMask]
\r
784 mov al,byte ptr[esi]
\r
785 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
787 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
790 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
793 psrlq mm4,[QShiftV]
\r
799 mov ebx,dword ptr[UV16+4]
\r
804 mov edx,dword ptr[UV16V]
\r
808 pand mm4,[WrapMask]
\r
819 psrlq mm4,[QShiftV]
\r
822 mov al,byte ptr[esi]
\r
825 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
827 mov ebx,dword ptr[UV16+4]
\r
829 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
838 mov edx,dword ptr[UV16V]
\r
849 pand mm4,[WrapMask]
\r
852 mov al,byte ptr[esi]
\r
853 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
855 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
858 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
861 psrlq mm4,[QShiftV]
\r
867 mov ebx,dword ptr[UV16+4]
\r
872 mov edx,dword ptr[UV16V]
\r
876 pand mm4,[WrapMask]
\r
887 psrlq mm4,[QShiftV]
\r
890 mov al,byte ptr[esi]
\r
893 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
895 mov ebx,dword ptr[UV16+4]
\r
897 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
906 mov edx,dword ptr[UV16V]
\r
917 pand mm4,[WrapMask]
\r
920 mov al,byte ptr[esi]
\r
921 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
923 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
926 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
929 psrlq mm4,[QShiftV]
\r
935 mov ebx,dword ptr[UV16+4]
\r
940 mov edx,dword ptr[UV16V]
\r
944 pand mm4,[WrapMask]
\r
955 psrlq mm4,[QShiftV]
\r
958 mov al,byte ptr[esi]
\r
961 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
963 mov ebx,dword ptr[UV16+4]
\r
965 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
974 mov edx,dword ptr[UV16V]
\r
984 pand mm4,[WrapMask]
\r
987 mov al,byte ptr[esi]
\r
988 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
990 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
995 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
998 psrlq mm4,[QShiftV]
\r
1004 mov ebx,dword ptr[UV16+4]
\r
1009 mov edx,dword ptr[UV16V]
\r
1013 pand mm4,[WrapMask]
\r
1025 psrlq mm4,[QShiftV]
\r
1027 mov al,byte ptr[esi]
\r
1030 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
1033 mov ebx,dword ptr[UV16+4]
\r
1035 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
1043 mov edx,dword ptr[UV16V]
\r
1054 pand mm4,[WrapMask]
\r
1057 mov al,byte ptr[esi]
\r
1058 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
1060 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
1063 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
1066 psrlq mm4,[QShiftV]
\r
1072 mov ebx,dword ptr[UV16+4]
\r
1077 mov edx,dword ptr[UV16V]
\r
1083 pand mm4,[WrapMask]
\r
1094 psrlq mm4,[QShiftV]
\r
1096 mov al,byte ptr[esi]
\r
1099 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
1101 mov ebx,dword ptr[UV16+4]
\r
1103 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
1111 mov edx,dword ptr[UV16V]
\r
1123 mov al,byte ptr[esi]
\r
1125 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
1127 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
1136 ; get corrected right side deltas ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
1142 movq mm0,[UVLeftW] ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
1144 movq mm1,[UVLeft] ; ULw|VLw UL|VL argb|ARGBagAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
1145 punpckldq mm2,mm2 ; ULw|VLw UL|VL ZRi|ZRi agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
1147 pfmul mm2,mm6 ; ULw|VLw UL|VL UR|VR agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
1149 add edi,64 ; move screen pointer to start of next aspan
\r
1152 pfmul mm3,[QFixedScale]
\r
1156 dec [NumASpans] ; dec num affine spans
\r
1159 HandleLeftoverPixelsLit:
\r
1164 cmp [RemainingCount],0
\r
1167 mov eax,[RemainingCount]
\r
1168 mov dword ptr[ZIR],eax
\r
1169 mov dword ptr[ZIR+4],eax
\r
1171 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
1174 pfsub mm6,[UVDivZ16StepX]
\r
1175 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
1177 pfmul mm5,[QFixedScale]
\r
1180 pfmul mm7,[QFixedScale]
\r
1181 pfmul mm3,[UVDivZStepX]
\r
1187 movd mm6,[Zi16StepX]
\r
1189 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
1192 movd mm6,[ZiStepX]
\r
1193 mov ebx,[RemainingCount]
\r
1200 paddd mm5,[UVAdjust]
\r
1205 paddd mm5,[UVAdjust2]
\r
1209 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
1211 pfmul mm1,qword ptr[QFixedScaleLUT+ebx] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
1214 pand mm5,[WrapMask]
\r
1215 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
1218 movq mm7,[UVAdjustL]
\r
1220 psrlq mm5,[QShiftV]
\r
1229 mov ebx,dword ptr[UVL16+4]
\r
1233 mov dword ptr[UVL16+4],ecx
\r
1239 mov dword ptr[UVL16+4],0
\r
1241 mov eax,dword ptr[UVL16]
\r
1245 mov dword ptr[UVL16],ecx
\r
1251 mov dword ptr[UVL16],0
\r
1257 psrad mm7,[QGMip4_8]
\r
1258 psrld mm5,[QGMip20]
\r
1260 pand mm7,[LMapMask8]
\r
1266 mov eax,dword ptr[UVL16]
\r
1269 imul eax,[GLightWidth]
\r
1272 add eax,dword ptr[UVL16+4]
\r
1276 lea eax,[2*eax+eax]
\r
1279 add eax,[GLightData]
\r
1281 ;bilininterpolate to get good color
\r
1282 punpcklbw mm6,[eax+3]
\r
1283 mov ecx,[GLightWidth]
\r
1286 punpcklbw mm5,[eax]
\r
1298 pmullw mm6,mm7 ; B|B
\r
1300 punpcklbw mm2,[eax+3]
\r
1306 mov ebx,dword ptr[UV16+4]
\r
1307 punpcklbw mm5,[eax]
\r
1309 mov eax,dword ptr[UV16V]
\r
1338 movq mm2,mm7 ;make ABGR ARGB
\r
1342 punpckhwd mm7,mm2 ;BAGB
\r
1349 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
1351 mov al,byte ptr[esi]
\r
1352 movq mm4,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
1356 pand mm4,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
1358 movd mm6,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
1360 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
1362 punpcklbw mm6,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
1364 mov ebx,dword ptr[UV16+4]
\r
1366 psrlq mm4,[QShiftV]
\r
1376 mov edx,dword ptr[UV16V]
\r
1385 dec [RemainingCount]
\r
1386 jge LeftoverLoopLit
\r
1395 cRet DrawSpan32_AsmLit3DNow
\r
1396 endProc DrawSpan32_AsmLit3DNow
\r
1400 ;32 bit gouraud perspective mapper
\r
1401 ;this needs serious cleaning... the clamping is useless
\r
1402 ;this big stack hurts
\r
1404 cProc DrawSpan32_AsmGouraud3DNow, 36,<x1 : dword, x2 : dword, y : dword, r1 : dword, g1 : dword, b1 : dword, r2 : dword, g2 : dword, b2 : dword>
\r
1421 mov ebx,offset ClientWindow
\r
1423 imul eax, [ebx].PixelPitch
\r
1424 mov edi,[ebx].Buffer
\r
1437 mov [NumASpans],ecx
\r
1438 mov [RemainingCount],eax
\r
1440 ;grab the left side lights
\r
1444 ; punpckldq mm5,qword ptr[Zero]
\r
1457 ; mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7
\r
1458 movd mm0,x1 ; |x | | | | | | |
\r
1459 movq mm2,[UVDivZStepX] ; |x | UZdX|VZdX | | | | |
\r
1461 movd mm1,y ; |x |y UZdX|VZdX | | | | |
\r
1462 movq mm3,[UVDivZStepY] ; |x |y UZdX|VZdXUZdY|VZdY | | | |
\r
1464 punpckldq mm0,mm0 ; x|x |y UZdX|VZdXUZdY|VZdY | | | |
\r
1465 punpckldq mm1,mm1 ; x|x y|y UZdX|VZdXUZdY|VZdY | | | |
\r
1467 movd mm7,edx ; x|x y|y UZdX|VZdXUZdY|VZdY | | | |wid
\r
1468 movd mm5,b2 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b | |wid
\r
1470 pi2fd mm0,mm0 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b | |wid
\r
1471 movd mm6,b1 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b |b |wid
\r
1473 pi2fd mm7,mm7 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b |b |wid
\r
1474 punpckldq mm5,qword ptr[g2] ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b |b |wid
\r
1476 pi2fd mm1,mm1 ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b |b |wid
\r
1477 punpckldq mm6,qword ptr[g1] ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b g|b |wid
\r
1479 pfmul mm2,mm0 ; x|x y|y UZX|VZX UZdY|VZdY | g|b g|b |wid
\r
1480 pfrcp mm7,mm7 ; x|x y|y UZX|VZX UZdY|VZdY | g|b g|b dw|dw
\r
1482 pfsub mm5,mm6 ; x|x y|y UZX|VZX UZdY|VZdY | gd|bd g|b dw|dw
\r
1483 movd mm4,[r1] ; x|x y|y UZX|VZX UZdY|VZdY |r gd|bd g|b dw|dw
\r
1485 movd mm6,[r2] ; x|x y|y UZX|VZX UZdY|VZdY |r gd|bd g|r dw|dw
\r
1486 pfmul mm3,mm1 ; x|x y|y UZX|VZX UZY|VZY |r gd|bd g|r dw|dw
\r
1488 pfsub mm6,mm4 ; x|x y|y UZX|VZX UZY|VZY |r gd|bd x|rd dw|dw
\r
1489 pfmul mm7,[Q128] ; x|x y|y UZX|VZX UZY|VZY |r gd|bd x|rd DW|DW
\r
1491 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZY|VZY |ZdX gd|bd x|rd DW|DW
\r
1492 pfmul mm5,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZdX GD|BD x|rd DW|DW
\r
1494 pfmul mm4,mm0 ; x|x y|y UZX|VZX UZY|VZY |ZX GD|BD x|rd DW|DW
\r
1495 pfmul mm6,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZX GD|BD x|RD DW|DW
\r
1501 movq [RGBADelta],mm5
\r
1503 movd mm5,[ZiStepY] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY x|RD DW|DW
\r
1504 movq mm6,[UVDivZOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY UZO|VZO DW|DW
\r
1506 pfmul mm5,mm1 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO DW|DW
\r
1507 movd mm7,[ZiOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO |ZO
\r
1509 pfadd mm6,mm2 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZXS|VZXS |ZO
\r
1510 pfadd mm4,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZXS|VZXS |ZO
\r
1512 pfadd mm6,mm3 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZ|VZ |ZO
\r
1513 pfadd mm4,mm5 ; x|x y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZO
\r
1515 movd mm7,[Zi16StepX] ; x|x y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZdX16
\r
1516 pfrcp mm0,mm4 ; ZL|ZL y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZdX16
\r
1518 movq mm1,mm6 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZdX16
\r
1519 pfadd mm4,mm7 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
1521 pfmul mm1,mm0 ; ZL|ZL UL|VL UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
1522 pfrcp mm2,mm4 ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
1524 pfadd mm6,[UVDivZ16StepX] ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
1525 pfmul mm2,mm6 ; ZL|ZL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
1527 movq mm0,mm1 ; UL|VL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
1528 movq mm3,mm2 ; ULi|VLi UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |ZdX16
\r
1530 pfmul mm0,[QFixedScale] ; UL6|VL6 UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |ZdX16
\r
1531 pfmul mm3,[QFixedScale] ; UL6|VL6 UL|VL UR6|UR6 UR6|UR6 |ZRi |ZY UZR|VZR |ZdX16
\r
1533 pf2id mm0,mm0 ;UL6i|VL6i UL|VL UR6|VR6 UR6|VR6 |ZRi |ZY UZR|VZR |ZdX16
\r
1534 pf2id mm3,mm3 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR |ZdX16
\r
1538 jz HandleLeftoverPixelsLit
\r
1540 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
1541 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
1542 pfmul mm7,[QFixedScale]
\r
1543 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
1544 paddd mm7,[UVAdjustL]
\r
1548 mov ebx,dword ptr[UVL16+4]
\r
1551 jle TryClampU0Litp
\r
1553 mov dword ptr[UVL16+4],ecx
\r
1559 mov dword ptr[UVL16+4],0
\r
1561 mov eax,dword ptr[UVL16]
\r
1563 jle TryClampV0Litp
\r
1565 mov dword ptr[UVL16],ecx
\r
1571 mov dword ptr[UVL16],0
\r
1578 ;use float uv for lightmap uv
\r
1579 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
1582 pfmul mm7,mm2 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
1583 pfmul mm5,[QFixedScale]
\r
1585 pfmul mm7,[QFixedScale]
\r
1588 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
1589 paddd mm5,[UVAdjust]
\r
1591 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
1592 paddd mm5,[UVAdjust2]
\r
1597 movd mm7,[Zi16StepX]
\r
1600 pfmul mm1,[QFixedScale16] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
1601 ;right side stuff becomes left ; ULw|VLw DU16|DV16 UL|VL URw|VRw |ZLi |ZY UZL|VZL |
\r
1602 pfadd mm4,mm7 ;ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZL|VZL |
\r
1604 pand mm5,[WrapMask]
\r
1605 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
1607 movq mm7,[UVAdjustL]
\r
1610 pfadd mm6,[UVDivZ16StepX] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
1613 psrlq mm5,[QShiftV]
\r
1620 mov ebx,dword ptr[UVL16+4]
\r
1624 mov dword ptr[UVL16+4],ecx
\r
1630 mov dword ptr[UVL16+4],0
\r
1632 mov eax,dword ptr[UVL16]
\r
1636 mov dword ptr[UVL16],ecx
\r
1642 mov dword ptr[UVL16],0
\r
1645 movq [UVLeftW],mm3
\r
1646 mov eax,dword ptr[UV16V]
\r
1647 mov ebx,dword ptr[UV16+4]
\r
1658 movq mm3,[RGBADelta]
\r
1664 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
1666 mov al,byte ptr[esi]
\r
1667 movq mm4,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
1671 pand mm4,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
1675 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
1676 psrlq mm4,[QShiftV]
\r
1678 mov ebx,dword ptr[UV16+4]
\r
1681 mov edx,dword ptr[UV16V]
\r
1683 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
1688 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
1700 mov al,byte ptr[esi]
\r
1702 pand mm4,[WrapMask]
\r
1704 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
1706 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
1707 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
1709 psrlq mm4,[QShiftV]
\r
1710 mov ebx,dword ptr[UV16+4]
\r
1715 mov edx,dword ptr[UV16V]
\r
1727 packuswb mm7,mm5 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
1733 pand mm4,[WrapMask]
\r
1735 mov al,byte ptr[esi]
\r
1738 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
1741 mov ebx,dword ptr[UV16+4]
\r
1742 psrlq mm4,[QShiftV]
\r
1744 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
1753 mov edx,dword ptr[UV16V]
\r
1761 pand mm4,[WrapMask]
\r
1764 mov al,byte ptr[esi]
\r
1765 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
1767 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
1770 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
1773 psrlq mm4,[QShiftV]
\r
1779 mov ebx,dword ptr[UV16+4]
\r
1782 mov edx,dword ptr[UV16V]
\r
1786 pand mm4,[WrapMask]
\r
1797 psrlq mm4,[QShiftV]
\r
1799 mov al,byte ptr[esi]
\r
1802 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
1804 mov ebx,dword ptr[UV16+4]
\r
1806 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
1815 mov edx,dword ptr[UV16V]
\r
1824 pand mm4,[WrapMask]
\r
1827 mov al,byte ptr[esi]
\r
1828 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
1830 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
1833 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
1836 psrlq mm4,[QShiftV]
\r
1842 mov ebx,dword ptr[UV16+4]
\r
1845 mov edx,dword ptr[UV16V]
\r
1849 pand mm4,[WrapMask]
\r
1860 psrlq mm4,[QShiftV]
\r
1862 mov al,byte ptr[esi]
\r
1865 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
1867 mov ebx,dword ptr[UV16+4]
\r
1869 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
1878 mov edx,dword ptr[UV16V]
\r
1886 pand mm4,[WrapMask]
\r
1889 mov al,byte ptr[esi]
\r
1890 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
1892 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
1895 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
1898 psrlq mm4,[QShiftV]
\r
1904 mov ebx,dword ptr[UV16+4]
\r
1907 mov edx,dword ptr[UV16V]
\r
1911 pand mm4,[WrapMask]
\r
1922 psrlq mm4,[QShiftV]
\r
1924 mov al,byte ptr[esi]
\r
1927 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
1929 mov ebx,dword ptr[UV16+4]
\r
1931 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
1940 mov edx,dword ptr[UV16V]
\r
1948 pand mm4,[WrapMask]
\r
1951 mov al,byte ptr[esi]
\r
1952 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
1954 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
1957 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
1960 psrlq mm4,[QShiftV]
\r
1966 mov ebx,dword ptr[UV16+4]
\r
1969 mov edx,dword ptr[UV16V]
\r
1973 pand mm4,[WrapMask]
\r
1984 psrlq mm4,[QShiftV]
\r
1986 mov al,byte ptr[esi]
\r
1989 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
1991 mov ebx,dword ptr[UV16+4]
\r
1993 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
2002 mov edx,dword ptr[UV16V]
\r
2010 pand mm4,[WrapMask]
\r
2013 mov al,byte ptr[esi]
\r
2014 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
2016 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
2019 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
2022 psrlq mm4,[QShiftV]
\r
2028 mov ebx,dword ptr[UV16+4]
\r
2031 mov edx,dword ptr[UV16V]
\r
2035 pand mm4,[WrapMask]
\r
2046 psrlq mm4,[QShiftV]
\r
2048 mov al,byte ptr[esi]
\r
2051 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
2053 mov ebx,dword ptr[UV16+4]
\r
2055 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
2064 mov edx,dword ptr[UV16V]
\r
2072 pand mm4,[WrapMask]
\r
2075 mov al,byte ptr[esi]
\r
2076 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
2078 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
2081 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
2084 psrlq mm4,[QShiftV]
\r
2090 mov ebx,dword ptr[UV16+4]
\r
2093 mov edx,dword ptr[UV16V]
\r
2097 pand mm4,[WrapMask]
\r
2108 psrlq mm4,[QShiftV]
\r
2110 mov al,byte ptr[esi]
\r
2113 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
2115 mov ebx,dword ptr[UV16+4]
\r
2117 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
2125 mov edx,dword ptr[UV16V]
\r
2134 mov al,byte ptr[esi]
\r
2136 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
2138 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
2150 ; get corrected right side deltas ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
2156 movq mm0,[UVLeftW] ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
2158 movq mm1,[UVLeft] ; ULw|VLw UL|VL argb|ARGBagAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
2159 punpckldq mm2,mm2 ; ULw|VLw UL|VL ZRi|ZRi agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
2161 pfmul mm2,mm6 ; ULw|VLw UL|VL UR|VR agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
2163 add edi,64 ; move screen pointer to start of next aspan
\r
2166 pfmul mm3,[QFixedScale]
\r
2170 dec [NumASpans] ; dec num affine spans
\r
2173 HandleLeftoverPixelsLit:
\r
2179 cmp [RemainingCount],0
\r
2182 mov eax,[RemainingCount]
\r
2183 mov dword ptr[ZIR],eax
\r
2184 mov dword ptr[ZIR+4],eax
\r
2186 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
2189 pfsub mm6,[UVDivZ16StepX]
\r
2190 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
2192 pfmul mm5,[QFixedScale]
\r
2195 pfmul mm7,[QFixedScale]
\r
2196 pfmul mm3,[UVDivZStepX]
\r
2202 movd mm6,[Zi16StepX]
\r
2204 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
2207 movd mm6,[ZiStepX]
\r
2208 mov ebx,[RemainingCount]
\r
2215 paddd mm5,[UVAdjust]
\r
2220 paddd mm5,[UVAdjust2]
\r
2224 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
2226 pfmul mm1,qword ptr[QFixedScaleLUT+ebx] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
2229 pand mm5,[WrapMask]
\r
2230 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
2233 movq mm7,[UVAdjustL]
\r
2235 psrlq mm5,[QShiftV]
\r
2244 mov ebx,dword ptr[UVL16+4]
\r
2248 mov dword ptr[UVL16+4],ecx
\r
2254 mov dword ptr[UVL16+4],0
\r
2256 mov eax,dword ptr[UVL16]
\r
2260 mov dword ptr[UVL16],ecx
\r
2266 mov dword ptr[UVL16],0
\r
2269 mov ebx,dword ptr[UV16+4]
\r
2270 mov eax,dword ptr[UV16V]
\r
2281 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
2283 mov al,byte ptr[esi]
\r
2284 movq mm4,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
2288 pand mm4,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
2290 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
2292 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
2294 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
2296 mov ebx,dword ptr[UV16+4]
\r
2298 psrlq mm4,[QShiftV]
\r
2306 mov edx,dword ptr[UV16V]
\r
2317 dec [RemainingCount]
\r
2318 jge LeftoverLoopLit
\r
2327 cRet DrawSpan32_AsmGouraud3DNow
\r
2328 endProc DrawSpan32_AsmGouraud3DNow
\r
2331 ;affine gouraud mapper... will probably be phased out
\r
2333 cProc DrawScanLineGouraudNoZ_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
2345 jle GouraudReturnNoZ
\r
2352 ; prefetch [GBitPtr]
\r
2357 punpckldq mm1,qword ptr[ecx].uf
\r
2366 punpckldq mm2,qword ptr[ebx].uf
\r
2372 pfmul mm0,[QFixedScale]
\r
2375 punpckldq mm3,qword ptr[ecx].gf
\r
2380 pfmul mm2,[QFixedScale]
\r
2386 punpckldq mm5,qword ptr[ebx].gf
\r
2406 pand mm0,[WrapMask]
\r
2412 mov ebx,dword ptr[UV16+4]
\r
2415 psrlq mm0,[QShiftV]
\r
2425 mov eax,dword ptr[UV16V]
\r
2433 mov al,byte ptr[esi]
\r
2434 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
2437 paddd mm2,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
2439 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
2441 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
2443 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
2445 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
2447 mov ebx,dword ptr[UV16+4]
\r
2449 psrlq mm0,[QShiftV]
\r
2457 mov ebp,dword ptr[UV16V]
\r
2470 jge GouraudLoopNoZ
\r
2483 cRet DrawScanLineGouraudNoZ_Asm3DNow
\r
2484 endProc DrawScanLineGouraudNoZ_Asm3DNow
\r
2488 ;does true 32 bit alpha blending... not your greyscale
\r
2489 ;junk like hardware... I mean true 32 bit alpha
\r
2490 ;looks bad without filtering though
\r
2492 cProc DrawScanLineGouraudNoZAlphaTex_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
2504 jle GouraudReturnNoZAlphaTex
\r
2511 ; prefetch [GBitPtr]
\r
2516 punpckldq mm1,qword ptr[ecx].uf
\r
2525 punpckldq mm2,qword ptr[ebx].uf
\r
2531 pfmul mm0,[QFixedScale]
\r
2534 punpckldq mm3,qword ptr[ecx].gf
\r
2539 pfmul mm2,[QFixedScale]
\r
2545 punpckldq mm5,qword ptr[ebx].gf
\r
2565 pand mm0,[WrapMask]
\r
2571 mov ebx,dword ptr[UV16+4]
\r
2574 psrlq mm0,[QShiftV]
\r
2584 mov eax,dword ptr[UV16V]
\r
2591 GouraudLoopNoZAlphaTex:
\r
2592 mov al,byte ptr[esi]
\r
2593 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
2596 paddd mm2,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
2598 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
2600 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
2602 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
2604 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
2606 mov ebx,dword ptr[UV16+4]
\r
2608 psrlq mm0,[QShiftV]
\r
2614 mov al,byte ptr[esi]
\r
2624 mov ebp,dword ptr[UV16V]
\r
2627 movd mm4,[ebx+eax*4]
\r
2630 punpcklbw mm0,[Zero]
\r
2632 movq mm6,[QNegAlpha]
\r
2634 punpcklbw mm4,[Zero]
\r
2654 jge GouraudLoopNoZAlphaTex
\r
2660 GouraudReturnNoZAlphaTex:
\r
2667 cRet DrawScanLineGouraudNoZAlphaTex_Asm3DNow
\r
2668 endProc DrawScanLineGouraudNoZAlphaTex_Asm3DNow
\r
2673 cProc DrawScanLineGouraudZBufferAlphaTex_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
2685 jle GouraudReturnZBufferAlphaTex
\r
2695 punpckldq mm1,qword ptr[ecx].uf
\r
2704 punpckldq mm2,qword ptr[ebx].uf
\r
2710 pfmul mm0,[QFixedScale]
\r
2713 punpckldq mm3,qword ptr[ecx].gf
\r
2718 pfmul mm2,[QFixedScale]
\r
2726 punpckldq mm5,qword ptr[ebx].gf
\r
2748 pand mm0,[WrapMask]
\r
2751 add [pZBufferPtr],edi
\r
2761 mov ebx,dword ptr[UV16+4]
\r
2766 psrlq mm0,[QShiftV]
\r
2775 mov eax,dword ptr[UV16V]
\r
2789 GouraudLoopZBufferAlphaTex:
\r
2790 mov al,byte ptr[esi]
\r
2791 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
2794 paddd mm2,[ARL] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
2796 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
2798 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
2800 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
2802 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
2804 mov ebx,dword ptr[UV16+4]
\r
2806 psrlq mm0,[QShiftV]
\r
2812 mov al,byte ptr[esi]
\r
2819 mov ebp,dword ptr[UV16V]
\r
2821 movd mm1,[ebx+eax*4]
\r
2824 movq mm3,[QNegAlpha]
\r
2827 punpcklbw mm1,[Zero]
\r
2833 mov ebx,pZBufferPtr
\r
2839 cmp word ptr[ebx],ax
\r
2841 jg SkipPixelZBufferAlphaTex
\r
2846 punpcklbw mm0,[Zero]
\r
2851 mov word ptr[ebx],ax
\r
2860 SkipPixelZBufferAlphaTex:
\r
2862 add [pZBufferPtr],2
\r
2866 jge GouraudLoopZBufferAlphaTex
\r
2872 GouraudReturnZBufferAlphaTex:
\r
2879 cRet DrawScanLineGouraudZBufferAlphaTex_Asm3DNow
\r
2880 endProc DrawScanLineGouraudZBufferAlphaTex_Asm3DNow
\r
2886 cProc DrawScanLineGouraudNoZBufferZWriteAlphaTex_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
2898 jle GouraudReturnNoZBufferZWriteAlphaTex
\r
2908 punpckldq mm1,qword ptr[ecx].uf
\r
2917 punpckldq mm2,qword ptr[ebx].uf
\r
2923 pfmul mm0,[QFixedScale]
\r
2926 punpckldq mm3,qword ptr[ecx].gf
\r
2931 pfmul mm2,[QFixedScale]
\r
2939 punpckldq mm5,qword ptr[ebx].gf
\r
2961 pand mm0,[WrapMask]
\r
2964 add [pZBufferPtr],edi
\r
2974 mov ebx,dword ptr[UV16+4]
\r
2979 psrlq mm0,[QShiftV]
\r
2988 mov eax,dword ptr[UV16V]
\r
3002 GouraudLoopNoZBufferZWriteAlphaTex:
\r
3003 mov al,byte ptr[esi]
\r
3004 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
3007 paddd mm2,[ARL] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
3009 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
3011 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
3013 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
3015 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
3017 mov ebx,dword ptr[UV16+4]
\r
3019 psrlq mm0,[QShiftV]
\r
3025 mov al,byte ptr[esi]
\r
3032 mov ebp,dword ptr[UV16V]
\r
3034 movd mm1,[ebx+eax*4]
\r
3037 movq mm3,[QNegAlpha]
\r
3040 punpcklbw mm1,[Zero]
\r
3046 mov ebx,pZBufferPtr
\r
3055 punpcklbw mm0,[Zero]
\r
3060 mov word ptr[ebx],ax
\r
3069 add [pZBufferPtr],2
\r
3073 jge GouraudLoopNoZBufferZWriteAlphaTex
\r
3079 GouraudReturnNoZBufferZWriteAlphaTex:
\r
3086 cRet DrawScanLineGouraudNoZBufferZWriteAlphaTex_Asm3DNow
\r
3087 endProc DrawScanLineGouraudNoZBufferZWriteAlphaTex_Asm3DNow
\r
3090 ;zmask but no zwrite
\r
3092 cProc DrawScanLineGouraudZBufferNoZWriteAlphaTex_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
3104 jle GouraudReturnZBufferNoZWriteAlphaTex
\r
3114 punpckldq mm1,qword ptr[ecx].uf
\r
3123 punpckldq mm2,qword ptr[ebx].uf
\r
3129 pfmul mm0,[QFixedScale]
\r
3132 punpckldq mm3,qword ptr[ecx].gf
\r
3137 pfmul mm2,[QFixedScale]
\r
3145 punpckldq mm5,qword ptr[ebx].gf
\r
3167 pand mm0,[WrapMask]
\r
3170 add [pZBufferPtr],edi
\r
3180 mov ebx,dword ptr[UV16+4]
\r
3185 psrlq mm0,[QShiftV]
\r
3194 mov eax,dword ptr[UV16V]
\r
3208 GouraudLoopZBufferNoZWriteAlphaTex:
\r
3209 mov al,byte ptr[esi]
\r
3210 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
3213 paddd mm2,[ARL] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
3215 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
3217 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
3219 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
3221 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
3223 mov ebx,dword ptr[UV16+4]
\r
3225 psrlq mm0,[QShiftV]
\r
3231 mov al,byte ptr[esi]
\r
3238 mov ebp,dword ptr[UV16V]
\r
3240 movd mm1,[ebx+eax*4]
\r
3243 movq mm3,[QNegAlpha]
\r
3246 punpcklbw mm1,[Zero]
\r
3252 mov ebx,pZBufferPtr
\r
3258 cmp word ptr[ebx],ax
\r
3260 jg SkipPixelZBufferNoZWriteAlphaTex
\r
3265 punpcklbw mm0,[Zero]
\r
3278 SkipPixelZBufferNoZWriteAlphaTex:
\r
3280 add [pZBufferPtr],2
\r
3284 jge GouraudLoopZBufferNoZWriteAlphaTex
\r
3290 GouraudReturnZBufferNoZWriteAlphaTex:
\r
3297 cRet DrawScanLineGouraudZBufferNoZWriteAlphaTex_Asm3DNow
\r
3298 endProc DrawScanLineGouraudZBufferNoZWriteAlphaTex_Asm3DNow
\r
3302 ;solid color gouraud (no texture)
\r
3304 cProc DrawScanLineGouraudNoZSolid_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
3316 jle GouraudReturnSolidNoZ
\r
3323 movd mm1,SolidColor
\r
3326 punpcklbw mm1,[Zero]
\r
3332 punpckldq mm3,qword ptr[ecx].gf
\r
3339 punpckldq mm5,qword ptr[ebx].gf
\r
3364 GouraudLoopSolidNoZ:
\r
3377 jge GouraudLoopSolidNoZ
\r
3381 GouraudReturnSolidNoZ:
\r
3388 cRet DrawScanLineGouraudNoZSolid_Asm3DNow
\r
3389 endProc DrawScanLineGouraudNoZSolid_Asm3DNow
\r
3394 cProc DrawScanLineGouraudZBufferSolid_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
3406 jle GouraudReturnSolidZBuffer
\r
3413 movd mm1,SolidColor
\r
3416 punpcklbw mm1,[Zero]
\r
3422 punpckldq mm3,qword ptr[ecx].gf
\r
3429 punpckldq mm5,qword ptr[ebx].gf
\r
3453 add [pZBufferPtr],edi
\r
3459 mov ebx,pZBufferPtr
\r
3466 GouraudLoopSolidZBuffer:
\r
3478 cmp word ptr[ebx],ax
\r
3479 jg SkipPixelGouraudSolidZBuffer
\r
3484 mov word ptr[ebx],ax
\r
3486 SkipPixelGouraudSolidZBuffer:
\r
3490 jge GouraudLoopSolidZBuffer
\r
3494 GouraudReturnSolidZBuffer:
\r
3501 cRet DrawScanLineGouraudZBufferSolid_Asm3DNow
\r
3502 endProc DrawScanLineGouraudZBufferSolid_Asm3DNow
\r
3505 ; same with zmask no zwrite
\r
3507 cProc DrawScanLineGouraudZBufferNoZWriteSolid_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
3519 jle GouraudReturnSolidZBufferNoZWrite
\r
3526 movd mm1,SolidColor
\r
3529 punpcklbw mm1,[Zero]
\r
3535 punpckldq mm3,qword ptr[ecx].gf
\r
3542 punpckldq mm5,qword ptr[ebx].gf
\r
3566 add [pZBufferPtr],edi
\r
3572 mov ebx,pZBufferPtr
\r
3579 GouraudLoopSolidZBufferNoZWrite:
\r
3591 cmp word ptr[ebx],ax
\r
3592 jg SkipPixelGouraudSolidZBufferNoZWrite
\r
3597 SkipPixelGouraudSolidZBufferNoZWrite:
\r
3601 jge GouraudLoopSolidZBufferNoZWrite
\r
3605 GouraudReturnSolidZBufferNoZWrite:
\r
3612 cRet DrawScanLineGouraudZBufferNoZWriteSolid_Asm3DNow
\r
3613 endProc DrawScanLineGouraudZBufferNoZWriteSolid_Asm3DNow
\r
3616 ;same with zwrite only
\r
3618 cProc DrawScanLineGouraudNoZBufferZWriteSolid_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
3630 jle GouraudReturnSolidNoZBufferZWrite
\r
3637 movd mm1,SolidColor
\r
3640 punpcklbw mm1,[Zero]
\r
3646 punpckldq mm3,qword ptr[ecx].gf
\r
3653 punpckldq mm5,qword ptr[ebx].gf
\r
3677 add [pZBufferPtr],edi
\r
3683 mov ebx,pZBufferPtr
\r
3690 GouraudLoopSolidNoZBufferZWrite:
\r
3702 cmp word ptr[ebx],ax
\r
3703 jg SkipPixelGouraudSolidNoZBufferZWrite
\r
3708 mov word ptr[ebx],ax
\r
3710 SkipPixelGouraudSolidNoZBufferZWrite:
\r
3714 jge GouraudLoopSolidNoZBufferZWrite
\r
3718 GouraudReturnSolidNoZBufferZWrite:
\r
3725 cRet DrawScanLineGouraudNoZBufferZWriteSolid_Asm3DNow
\r
3726 endProc DrawScanLineGouraudNoZBufferZWriteSolid_Asm3DNow
\r
3729 ;affine color keyed
\r
3731 cProc DrawScanLineGouraudNoZTrans_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
3743 jle GouraudReturnNoZTrans
\r
3750 ; prefetch [GBitPtr]
\r
3755 punpckldq mm1,qword ptr[ecx].uf
\r
3764 punpckldq mm2,qword ptr[ebx].uf
\r
3770 pfmul mm0,[QFixedScale]
\r
3773 punpckldq mm3,qword ptr[ecx].gf
\r
3778 pfmul mm2,[QFixedScale]
\r
3784 punpckldq mm5,qword ptr[ebx].gf
\r
3804 pand mm0,[WrapMask]
\r
3810 mov ebx,dword ptr[UV16+4]
\r
3813 psrlq mm0,[QShiftV]
\r
3823 mov eax,dword ptr[UV16V]
\r
3830 GouraudLoopNoZTrans:
\r
3831 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
3832 mov al,byte ptr[esi]
\r
3834 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
3837 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
3839 mov ebx,dword ptr[UV16+4]
\r
3840 psrlq mm0,[QShiftV]
\r
3846 mov ebp,dword ptr[UV16V]
\r
3851 je SkipPixelGouraudNoZTrans
\r
3853 ;ouch register contention from hell
\r
3854 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
3855 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
3861 SkipPixelGouraudNoZTrans:
\r
3866 paddd mm2,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
3871 jge GouraudLoopNoZTrans
\r
3877 GouraudReturnNoZTrans:
\r
3884 cRet DrawScanLineGouraudNoZTrans_Asm3DNow
\r
3885 endProc DrawScanLineGouraudNoZTrans_Asm3DNow
\r
3888 ;affine textured with zbuffering and gouraud
\r
3890 cProc DrawScanLineGouraudZBuffer_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
3902 jle GouraudReturnZBuffer
\r
3912 punpckldq mm1,qword ptr[ecx].uf
\r
3921 punpckldq mm2,qword ptr[ebx].uf
\r
3927 pfmul mm0,[QFixedScale]
\r
3930 punpckldq mm3,qword ptr[ecx].gf
\r
3935 pfmul mm2,[QFixedScale]
\r
3943 punpckldq mm5,qword ptr[ebx].gf
\r
3965 pand mm0,[WrapMask]
\r
3968 add [pZBufferPtr],edi
\r
3978 mov ebx,dword ptr[UV16+4]
\r
3983 psrlq mm0,[QShiftV]
\r
3992 mov eax,dword ptr[UV16V]
\r
4003 GouraudLoopZBuffer:
\r
4004 mov al,byte ptr[esi]
\r
4005 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
4008 paddd mm2,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
4010 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4012 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
4015 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4018 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
4021 mov ebx,dword ptr[UV16+4]
\r
4023 psrlq mm0,[QShiftV]
\r
4031 mov ebp,dword ptr[UV16V]
\r
4034 mov ebx,pZBufferPtr
\r
4040 cmp word ptr[ebx],ax
\r
4041 jg SkipPixelGouraudZBuffer
\r
4044 mov word ptr[ebx],ax
\r
4046 SkipPixelGouraudZBuffer:
\r
4051 add [pZBufferPtr],2
\r
4054 jge GouraudLoopZBuffer
\r
4060 GouraudReturnZBuffer:
\r
4067 cRet DrawScanLineGouraudZBuffer_Asm3DNow
\r
4068 endProc DrawScanLineGouraudZBuffer_Asm3DNow
\r
4071 ;same with no zwrite
\r
4073 cProc DrawScanLineGouraudZBufferNoZWrite_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
4085 jle GouraudReturnZBufferNoZWrite
\r
4095 punpckldq mm1,qword ptr[ecx].uf
\r
4104 punpckldq mm2,qword ptr[ebx].uf
\r
4110 pfmul mm0,[QFixedScale]
\r
4113 punpckldq mm3,qword ptr[ecx].gf
\r
4118 pfmul mm2,[QFixedScale]
\r
4126 punpckldq mm5,qword ptr[ebx].gf
\r
4148 pand mm0,[WrapMask]
\r
4151 add [pZBufferPtr],edi
\r
4161 mov ebx,dword ptr[UV16+4]
\r
4166 psrlq mm0,[QShiftV]
\r
4175 mov eax,dword ptr[UV16V]
\r
4186 GouraudLoopZBufferNoZWrite:
\r
4187 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
4189 mov al,byte ptr[esi]
\r
4190 paddd mm2,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
4194 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4196 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
4199 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4202 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
4205 mov ebx,dword ptr[UV16+4]
\r
4207 psrlq mm0,[QShiftV]
\r
4215 mov ebp,dword ptr[UV16V]
\r
4218 mov ebx,pZBufferPtr
\r
4224 cmp word ptr[ebx],ax
\r
4225 jg SkipPixelGouraudZBufferNoZWrite
\r
4229 SkipPixelGouraudZBufferNoZWrite:
\r
4234 add [pZBufferPtr],2
\r
4237 jge GouraudLoopZBufferNoZWrite
\r
4243 GouraudReturnZBufferNoZWrite:
\r
4250 cRet DrawScanLineGouraudZBufferNoZWrite_Asm3DNow
\r
4251 endProc DrawScanLineGouraudZBufferNoZWrite_Asm3DNow
\r
4254 ;same with z write only (no compare)
\r
4256 cProc DrawScanLineGouraudNoZBufferZWrite_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
4268 jle GouraudReturnNoZBufferZWrite
\r
4278 punpckldq mm1,qword ptr[ecx].uf
\r
4287 punpckldq mm2,qword ptr[ebx].uf
\r
4293 pfmul mm0,[QFixedScale]
\r
4296 punpckldq mm3,qword ptr[ecx].gf
\r
4301 pfmul mm2,[QFixedScale]
\r
4309 punpckldq mm5,qword ptr[ebx].gf
\r
4331 pand mm0,[WrapMask]
\r
4334 add [pZBufferPtr],edi
\r
4344 mov ebx,dword ptr[UV16+4]
\r
4349 psrlq mm0,[QShiftV]
\r
4358 mov eax,dword ptr[UV16V]
\r
4369 GouraudLoopNoZBufferZWrite:
\r
4370 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
4372 mov al,byte ptr[esi]
\r
4373 paddd mm2,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
4377 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4379 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
4382 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4385 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
4388 mov ebx,dword ptr[UV16+4]
\r
4390 psrlq mm0,[QShiftV]
\r
4398 mov ebp,dword ptr[UV16V]
\r
4401 mov ebx,pZBufferPtr
\r
4408 mov word ptr[ebx],ax
\r
4413 add [pZBufferPtr],2
\r
4416 jge GouraudLoopNoZBufferZWrite
\r
4422 GouraudReturnNoZBufferZWrite:
\r
4429 cRet DrawScanLineGouraudNoZBufferZWrite_Asm3DNow
\r
4430 endProc DrawScanLineGouraudNoZBufferZWrite_Asm3DNow
\r
4433 ;affine textured, gouraud, colorkeyed, zbuffered
\r
4435 cProc DrawScanLineGouraudZBufferTrans_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
4447 jle GouraudReturnZBufferTrans
\r
4457 punpckldq mm1,qword ptr[ecx].uf
\r
4466 punpckldq mm2,qword ptr[ebx].uf
\r
4472 pfmul mm0,[QFixedScale]
\r
4475 punpckldq mm3,qword ptr[ecx].gf
\r
4480 pfmul mm2,[QFixedScale]
\r
4488 punpckldq mm5,qword ptr[ebx].gf
\r
4510 pand mm0,[WrapMask]
\r
4513 add [pZBufferPtr],edi
\r
4523 mov ebx,dword ptr[UV16+4]
\r
4528 psrlq mm0,[QShiftV]
\r
4537 mov eax,dword ptr[UV16V]
\r
4548 GouraudLoopZBufferTrans:
\r
4549 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
4550 mov al,byte ptr[esi]
\r
4552 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4555 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4557 mov ebx,dword ptr[UV16+4]
\r
4558 psrlq mm0,[QShiftV]
\r
4564 mov ebp,dword ptr[UV16V]
\r
4569 je SkipPixelGouraudZBufferTrans
\r
4571 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
4574 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
4577 mov ebx,pZBufferPtr
\r
4584 cmp word ptr[ebx],ax
\r
4585 jg SkipPixelGouraudZBufferTrans
\r
4588 mov word ptr[ebx],ax
\r
4590 SkipPixelGouraudZBufferTrans:
\r
4595 add [pZBufferPtr],2
\r
4596 paddd mm2,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
4601 jge GouraudLoopZBufferTrans
\r
4607 GouraudReturnZBufferTrans:
\r
4614 cRet DrawScanLineGouraudZBufferTrans_Asm3DNow
\r
4615 endProc DrawScanLineGouraudZBufferTrans_Asm3DNow
\r
4618 ;same with zwrite only
\r
4620 cProc DrawScanLineGouraudNoZBufferZWriteTrans_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
4632 jle GouraudReturnTransNoZBufferZWrite
\r
4642 punpckldq mm1,qword ptr[ecx].uf
\r
4651 punpckldq mm2,qword ptr[ebx].uf
\r
4657 pfmul mm0,[QFixedScale]
\r
4660 punpckldq mm3,qword ptr[ecx].gf
\r
4665 pfmul mm2,[QFixedScale]
\r
4673 punpckldq mm5,qword ptr[ebx].gf
\r
4695 pand mm0,[WrapMask]
\r
4698 add [pZBufferPtr],edi
\r
4708 mov ebx,dword ptr[UV16+4]
\r
4713 psrlq mm0,[QShiftV]
\r
4722 mov eax,dword ptr[UV16V]
\r
4733 GouraudLoopTransNoZBufferZWrite:
\r
4734 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
4735 mov al,byte ptr[esi]
\r
4737 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4740 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4742 mov ebx,dword ptr[UV16+4]
\r
4743 psrlq mm0,[QShiftV]
\r
4749 mov ebp,dword ptr[UV16V]
\r
4754 je SkipPixelGouraudTransNoZBufferZWrite
\r
4756 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
4759 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
4762 mov ebx,pZBufferPtr
\r
4769 cmp word ptr[ebx],ax
\r
4770 jg SkipPixelGouraudTransNoZBufferZWrite
\r
4773 mov word ptr[ebx],ax
\r
4775 SkipPixelGouraudTransNoZBufferZWrite:
\r
4780 add [pZBufferPtr],2
\r
4781 paddd mm2,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
4786 jge GouraudLoopTransNoZBufferZWrite
\r
4792 GouraudReturnTransNoZBufferZWrite:
\r
4799 cRet DrawScanLineGouraudNoZBufferZWriteTrans_Asm3DNow
\r
4800 endProc DrawScanLineGouraudNoZBufferZWriteTrans_Asm3DNow
\r
4805 cProc DrawScanLineGouraudZBufferNoZWriteTrans_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
4817 jle GouraudReturnTransZBufferNoZWrite
\r
4827 punpckldq mm1,qword ptr[ecx].uf
\r
4836 punpckldq mm2,qword ptr[ebx].uf
\r
4842 pfmul mm0,[QFixedScale]
\r
4845 punpckldq mm3,qword ptr[ecx].gf
\r
4850 pfmul mm2,[QFixedScale]
\r
4858 punpckldq mm5,qword ptr[ebx].gf
\r
4880 pand mm0,[WrapMask]
\r
4883 add [pZBufferPtr],edi
\r
4893 mov ebx,dword ptr[UV16+4]
\r
4898 psrlq mm0,[QShiftV]
\r
4907 mov eax,dword ptr[UV16V]
\r
4918 GouraudLoopTransZBufferNoZWrite:
\r
4919 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
4920 mov al,byte ptr[esi]
\r
4922 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4925 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
4927 mov ebx,dword ptr[UV16+4]
\r
4928 psrlq mm0,[QShiftV]
\r
4934 mov ebp,dword ptr[UV16V]
\r
4939 je SkipPixelGouraudTransZBufferNoZWrite
\r
4941 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
4944 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
4947 mov ebx,pZBufferPtr
\r
4954 cmp word ptr[ebx],ax
\r
4955 jg SkipPixelGouraudTransZBufferNoZWrite
\r
4959 SkipPixelGouraudTransZBufferNoZWrite:
\r
4964 add [pZBufferPtr],2
\r
4965 paddd mm2,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
4970 jge GouraudLoopTransZBufferNoZWrite
\r
4976 GouraudReturnTransZBufferNoZWrite:
\r
4983 cRet DrawScanLineGouraudZBufferNoZWriteTrans_Asm3DNow
\r
4984 endProc DrawScanLineGouraudZBufferNoZWriteTrans_Asm3DNow
\r
4987 ;zbuffered lightmap combine routine
\r
4988 ;the zbuffering in the inner loop uses a method i came up
\r
4989 ;with that I call the trashcan method. It always does a
\r
4990 ;write, but uses flags to look up a pointer, either to junk
\r
4991 ;or to a real zbuffer. Same for the screen write
\r
4992 ;it's very bizzare, but it's quick. no jumps
\r
4993 ;all the zbuffering for perspective correct stuff suffers
\r
4994 ;from inaccuracy when the z delta is negative
\r
4995 ;this is probably more signed unsigned mmx problems messing
\r
4996 ;with me. I haven't had time to fix it yet
\r
4998 cProc DrawSpan32_AsmLitZBuffer3DNow, 12,<x1 : dword, x2 : dword, y : dword>
\r
5013 mov ebx,offset ClientWindow
\r
5015 mov edi,[ebx].Buffer
\r
5017 imul eax, [ebx].PixelPitch
\r
5027 mov eax,offset QZCan
\r
5028 mov ebx,offset SCan
\r
5031 mov eax,offset QDibCan
\r
5034 mov ebx,offset QDibCan
\r
5035 mov eax,offset QDibOrCan
\r
5038 mov ebx,offset QZCan
\r
5039 mov eax,offset QZOrCan
\r
5052 mov [NumASpans],ecx
\r
5053 mov [RemainingCount],eax
\r
5054 ; mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7
\r
5055 movd mm0,x1 ; |x | | | | | | |
\r
5056 movq mm2,[UVDivZStepX] ; |x | |UZdX | | | | |
\r
5058 movd mm1,y ; |x |y |UZdX | | | | |
\r
5059 movq mm3,[UVDivZStepY] ; |x |y |UZdX |UZdY | | | |
\r
5061 punpckldq mm0,mm0 ; x|x |y |UZdX |UZdY | | | |
\r
5062 punpckldq mm1,mm1 ; x|x y|y UZdX|VZdX |UZdY | | | |
\r
5067 pfmul mm2,mm0 ; x|x y|y UZX|VZX UZdY|VZdY | | | |
\r
5068 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZdY|VZdY |ZdX | | |
\r
5071 pfmul mm4,[QZBufferPrec]
\r
5073 movq [QZDelta],mm4
\r
5075 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZdY|VZdY |ZdX | | |
\r
5076 pfmul mm3,mm1 ; x|x y|y UZX|VZX UZY|VZY |ZdX | | |
\r
5077 movq mm6,[UVDivZOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZdX | |UZO |
\r
5079 movd mm5,[ZiStepY] ; x|x y|y UZX|VZX UZY|VZY |ZdX |ZdY |UZO |
\r
5080 pfmul mm4,mm0 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY |UZO |
\r
5082 pfmul mm5,mm1 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO |
\r
5083 movd mm7,[ZiOrigin]
\r
5085 pfadd mm6,mm2 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZXS|VZXS |
\r
5086 pfadd mm4,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZXS|VZXS |
\r
5088 pfadd mm6,mm3 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZ|VZ |
\r
5089 pfadd mm4,mm5 ; x|x y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |
\r
5092 movq mm1,[QZDelta]
\r
5094 pfmul mm7,[QZBufferPrec]
\r
5100 movq [QZVal32_0],mm0
\r
5104 movq [QZVal32_1],mm0
\r
5106 movd mm7,[Zi16StepX]
\r
5107 movq [QZDelta],mm1
\r
5109 pfrcp mm0,mm4 ; ZL|ZL y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |
\r
5110 movq mm1,mm6 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |
\r
5112 pfadd mm4,mm7 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |
\r
5113 pfmul mm1,mm0 ; ZL|ZL UL|VL UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |
\r
5115 pfrcp mm2,mm4 ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZ|VZ |
\r
5116 pfadd mm6,[UVDivZ16StepX] ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZR|VZR |
\r
5118 pfmul mm2,mm6 ; ZL|ZL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |
\r
5119 movq mm0,mm1 ; UL|VL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |
\r
5121 movq mm3,mm2 ; ULi|VLi UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |
\r
5122 pfmul mm0,[QFixedScale] ; UL6|VL6 UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |
\r
5124 pfmul mm3,[QFixedScale] ; UL6|VL6 UL|VL UR6|UR6 UR6|UR6 |ZRi |ZY UZR|VZR |
\r
5125 pf2id mm0,mm0 ;UL6i|VL6i UL|VL UR6|VR6 UR6|VR6 |ZRi |ZY UZR|VZR |
\r
5127 pf2id mm3,mm3 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR |
\r
5130 jz HandleLeftoverPixels128z
\r
5133 ;use float uv for lightmap uv
\r
5134 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
5137 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
5138 pfmul mm5,[QFixedScale]
\r
5140 pfmul mm7,[QFixedScale]
\r
5143 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
5144 paddd mm5,[UVAdjust]
\r
5146 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
5147 paddd mm5,[UVAdjust2]
\r
5152 movd mm7,[Zi16StepX]
\r
5155 pfmul mm1,[QFixedScale16] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
5156 ;right side stuff becomes left ; ULw|VLw DU16|DV16 UL|VL URw|VRw |ZLi |ZY UZL|VZL |
\r
5157 pfadd mm4,mm7 ;ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZL|VZL |
\r
5159 pand mm5,[WrapMask]
\r
5160 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
5162 movq mm7,[UVAdjustL]
\r
5165 pfadd mm6,[UVDivZ16StepX] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
5168 psrlq mm5,[QShiftV]
\r
5174 mov ebx,dword ptr[UVL16+4]
\r
5176 jle TryClampU0128z
\r
5178 mov dword ptr[UVL16+4],ecx
\r
5184 mov dword ptr[UVL16+4],0
\r
5186 mov eax,dword ptr[UVL16]
\r
5188 jle TryClampV0128z
\r
5190 mov dword ptr[UVL16],ecx
\r
5196 mov dword ptr[UVL16],0
\r
5202 movq [UVLeftW],mm3
\r
5203 psrad mm7,[QGMip4_8]
\r
5205 psrld mm5,[QGMip20]
\r
5206 pand mm7,[LMapMask8]
\r
5212 mov eax,dword ptr[UVL16]
\r
5215 imul eax,[GLightWidth]
\r
5220 add eax,dword ptr[UVL16+4]
\r
5224 lea eax,[2*eax+eax]
\r
5227 add eax,[GLightData]
\r
5229 ;bilininterpolate to get good color
\r
5230 punpcklbw mm6,[eax+3]
\r
5231 mov ecx,[GLightWidth]
\r
5233 punpcklbw mm5,[eax]
\r
5248 pmullw mm6,mm7 ; B|B
\r
5250 punpcklbw mm2,[eax+3]
\r
5256 mov ebx,dword ptr[UV16+4]
\r
5257 punpcklbw mm5,[eax]
\r
5259 mov eax,dword ptr[UV16V]
\r
5287 movq mm2,mm7 ;make ABGR ARGB
\r
5293 punpckhwd mm7,mm2 ;BAGB
\r
5302 ;grab zbuffer values
\r
5303 movq mm2,[QZVal32_0]
\r
5304 movq mm3,[QZVal32_1]
\r
5320 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
5322 mov al,byte ptr[esi]
\r
5323 movq mm4,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
5328 pand mm4,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
5331 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
5334 psrlq mm4,[QShiftV]
\r
5341 punpcklwd mm3,[Zero]
\r
5343 mov edx,dword ptr[UV16V]
\r
5344 paddd mm3,[QZOrCan]
\r
5346 mov ebx,dword ptr[UV16+4]
\r
5351 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
5352 punpcklwd mm3,[Zero]
\r
5354 mov edi,dword ptr[QZOut]
\r
5355 paddd mm3,[QDibOrCan]
\r
5358 movq [QDibOut],mm3
\r
5360 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
5361 mov edi,dword ptr[edi]
\r
5367 mov ax,word ptr[QZVal]
\r
5370 mov word ptr[edi],ax
\r
5372 mov edi,dword ptr[QDibOut]
\r
5375 mov edi,dword ptr[edi]
\r
5381 mov al,byte ptr[esi]
\r
5386 pand mm4,[WrapMask]
\r
5387 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
5390 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
5392 psrlq mm4,[QShiftV]
\r
5393 mov ebx,dword ptr[UV16+4]
\r
5398 mov edx,dword ptr[UV16V]
\r
5404 mov edi,dword ptr[QZOut+4]
\r
5409 mov edi,dword ptr[edi]
\r
5412 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
5413 mov ax,word ptr[QZVal+2]
\r
5416 mov word ptr[edi+2],ax
\r
5419 mov edi,dword ptr[QDibOut+4]
\r
5421 mov al,byte ptr[esi]
\r
5424 mov edi,dword ptr[edi]
\r
5425 pand mm4,[WrapMask]
\r
5433 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
5434 psrlq mm4,[QShiftV]
\r
5436 punpckhwd mm3,[Zero]
\r
5439 mov ebx,dword ptr[UV16+4]
\r
5440 paddd mm3,[QZorCan]
\r
5442 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
5448 mov edi,dword ptr[QZOut]
\r
5451 punpckhwd mm3,[Zero]
\r
5453 mov edi,dword ptr[edi]
\r
5456 mov ax,word ptr[QZVal+4]
\r
5457 paddd mm3,[QDibOrCan]
\r
5459 mov word ptr[edi+4],ax
\r
5460 movq [QDibOut],mm3
\r
5463 mov edi,dword ptr[QDibOut]
\r
5466 mov edi,dword ptr[edi]
\r
5467 mov edx,dword ptr[UV16V]
\r
5475 pand mm4,[WrapMask]
\r
5482 mov al,byte ptr[esi]
\r
5484 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
5485 psrlq mm4,[QShiftV]
\r
5487 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
5490 mov ebx,dword ptr[UV16+4]
\r
5491 mov edx,dword ptr[UV16V]
\r
5499 mov edi,dword ptr[QZOut+4]
\r
5504 mov edi,dword ptr[edi]
\r
5506 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
5507 mov ax,word ptr[QZVal+6]
\r
5510 mov word ptr[edi+6],ax
\r
5513 movq mm2,[QZVal32_0]
\r
5514 movq mm3,[QZVal32_1]
\r
5516 paddd mm2,[QZDelta]
\r
5517 paddd mm3,[QZDelta]
\r
5519 movq [QZVal32_0],mm2
\r
5520 movq [QZVal32_1],mm3
\r
5530 mov edi,dword ptr[QDibOut+4]
\r
5534 mov al,byte ptr[esi]
\r
5537 mov edi,dword ptr[edi]
\r
5538 pand mm4,[WrapMask]
\r
5540 pcmpgtw mm2,[ebp+8]
\r
5548 psrlq mm4,[QShiftV]
\r
5555 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
5556 punpcklwd mm3,[Zero]
\r
5558 mov ebx,dword ptr[UV16+4]
\r
5559 paddd mm3,[QZorCan]
\r
5561 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
5567 mov edi,dword ptr[QZOut]
\r
5570 punpcklwd mm3,[Zero]
\r
5572 mov edi,dword ptr[edi]
\r
5575 mov ax,word ptr[QZVal]
\r
5576 paddd mm3,[QDibOrCan]
\r
5578 mov word ptr[edi+8],ax
\r
5579 movq [QDibOut],mm3
\r
5582 mov edi,dword ptr[QDibOut]
\r
5585 mov edx,dword ptr[UV16V]
\r
5587 mov edi,dword ptr[edi]
\r
5595 pand mm4,[WrapMask]
\r
5601 mov al,byte ptr[esi]
\r
5604 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
5605 psrlq mm4,[QShiftV]
\r
5607 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
5610 mov ebx,dword ptr[UV16+4]
\r
5611 mov edx,dword ptr[UV16V]
\r
5619 mov edi,dword ptr[QZOut+4]
\r
5624 mov edi,dword ptr[edi]
\r
5626 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
5627 mov ax,word ptr[QZVal+2]
\r
5630 mov word ptr[edi+10],ax
\r
5633 mov edi,dword ptr[QDibOut+4]
\r
5635 mov al,byte ptr[esi]
\r
5638 mov edi,dword ptr[edi]
\r
5639 pand mm4,[WrapMask]
\r
5647 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
5648 psrlq mm4,[QShiftV]
\r
5650 punpckhwd mm3,[Zero]
\r
5653 mov ebx,dword ptr[UV16+4]
\r
5654 paddd mm3,[QZorCan]
\r
5656 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
5662 mov edi,dword ptr[QZOut]
\r
5665 punpckhwd mm3,[Zero]
\r
5667 mov edi,dword ptr[edi]
\r
5670 mov ax,word ptr[QZVal+4]
\r
5671 paddd mm3,[QDibOrCan]
\r
5673 mov word ptr[edi+12],ax
\r
5674 movq [QDibOut],mm3
\r
5677 mov edi,dword ptr[QDibOut]
\r
5680 mov edi,dword ptr[edi]
\r
5682 mov edx,dword ptr[UV16V]
\r
5690 pand mm4,[WrapMask]
\r
5696 mov al,byte ptr[esi]
\r
5699 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
5700 psrlq mm4,[QShiftV]
\r
5702 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
5705 mov ebx,dword ptr[UV16+4]
\r
5706 mov edx,dword ptr[UV16V]
\r
5714 mov edi,dword ptr[QZOut+4]
\r
5719 mov edi,dword ptr[edi]
\r
5721 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
5722 mov ax,word ptr[QZVal+6]
\r
5725 mov word ptr[edi+14],ax
\r
5728 movq mm2,[QZVal32_0]
\r
5729 movq mm3,[QZVal32_1]
\r
5731 paddd mm2,[QZDelta]
\r
5732 paddd mm3,[QZDelta]
\r
5734 movq [QZVal32_0],mm2
\r
5735 movq [QZVal32_1],mm3
\r
5745 mov edi,dword ptr[QDibOut+4]
\r
5748 mov edi,dword ptr[edi]
\r
5750 mov al,byte ptr[esi]
\r
5753 pand mm4,[WrapMask]
\r
5754 pcmpgtw mm2,[ebp+16]
\r
5762 psrlq mm4,[QShiftV]
\r
5769 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
5770 punpcklwd mm3,[Zero]
\r
5772 mov ebx,dword ptr[UV16+4]
\r
5773 paddd mm3,[QZorCan]
\r
5775 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
5781 mov edi,dword ptr[QZOut]
\r
5784 punpcklwd mm3,[Zero]
\r
5785 mov edi,dword ptr[edi]
\r
5787 mov ax,word ptr[QZVal]
\r
5788 paddd mm3,[QDibOrCan]
\r
5791 mov word ptr[edi+16],ax
\r
5792 movq [QDibOut],mm3
\r
5795 mov edi,dword ptr[QDibOut]
\r
5798 mov edi,dword ptr[edi]
\r
5799 mov edx,dword ptr[UV16V]
\r
5807 pand mm4,[WrapMask]
\r
5813 mov al,byte ptr[esi]
\r
5816 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
5817 psrlq mm4,[QShiftV]
\r
5819 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
5822 mov ebx,dword ptr[UV16+4]
\r
5823 mov edx,dword ptr[UV16V]
\r
5831 mov edi,dword ptr[QZOut+4]
\r
5835 mov edi,dword ptr[edi]
\r
5837 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
5838 mov ax,word ptr[QZVal+2]
\r
5841 mov word ptr[edi+18],ax
\r
5844 mov edi,dword ptr[QDibOut+4]
\r
5846 mov al,byte ptr[esi]
\r
5848 mov edi,dword ptr[edi]
\r
5850 pand mm4,[WrapMask]
\r
5858 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
5859 psrlq mm4,[QShiftV]
\r
5861 punpckhwd mm3,[Zero]
\r
5864 mov ebx,dword ptr[UV16+4]
\r
5865 paddd mm3,[QZorCan]
\r
5867 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
5873 mov edi,dword ptr[QZOut]
\r
5876 punpckhwd mm3,[Zero]
\r
5878 mov edi,dword ptr[edi]
\r
5881 mov ax,word ptr[QZVal+4]
\r
5882 paddd mm3,[QDibOrCan]
\r
5884 mov word ptr[edi+20],ax
\r
5885 movq [QDibOut],mm3
\r
5888 mov edi,dword ptr[QDibOut]
\r
5891 mov edi,dword ptr[edi]
\r
5892 mov edx,dword ptr[UV16V]
\r
5900 pand mm4,[WrapMask]
\r
5906 mov al,byte ptr[esi]
\r
5909 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
5910 psrlq mm4,[QShiftV]
\r
5912 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
5915 mov ebx,dword ptr[UV16+4]
\r
5916 mov edx,dword ptr[UV16V]
\r
5924 mov edi,dword ptr[QZOut+4]
\r
5928 mov edi,dword ptr[edi]
\r
5930 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
5931 mov ax,word ptr[QZVal+6]
\r
5934 mov word ptr[edi+22],ax
\r
5937 movq mm2,[QZVal32_0]
\r
5938 movq mm3,[QZVal32_1]
\r
5940 paddd mm2,[QZDelta]
\r
5941 paddd mm3,[QZDelta]
\r
5943 movq [QZVal32_0],mm2
\r
5944 movq [QZVal32_1],mm3
\r
5954 mov edi,dword ptr[QDibOut+4]
\r
5957 mov edi,dword ptr[edi]
\r
5959 mov al,byte ptr[esi]
\r
5962 pand mm4,[WrapMask]
\r
5963 pcmpgtw mm2,[ebp+24]
\r
5971 psrlq mm4,[QShiftV]
\r
5978 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
5979 punpcklwd mm3,[Zero]
\r
5981 mov ebx,dword ptr[UV16+4]
\r
5982 paddd mm3,[QZorCan]
\r
5984 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
5990 mov edi,dword ptr[QZOut]
\r
5993 punpcklwd mm3,[Zero]
\r
5995 mov edi,dword ptr[edi]
\r
5998 mov ax,word ptr[QZVal]
\r
5999 paddd mm3,[QDibOrCan]
\r
6001 mov word ptr[edi+24],ax
\r
6002 movq [QDibOut],mm3
\r
6005 mov edi,dword ptr[QDibOut]
\r
6008 mov edi,dword ptr[edi]
\r
6009 mov edx,dword ptr[UV16V]
\r
6017 pand mm4,[WrapMask]
\r
6023 mov al,byte ptr[esi]
\r
6026 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
6027 psrlq mm4,[QShiftV]
\r
6029 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
6032 mov ebx,dword ptr[UV16+4]
\r
6033 mov edx,dword ptr[UV16V]
\r
6041 mov edi,dword ptr[QZOut+4]
\r
6045 mov edi,dword ptr[edi]
\r
6047 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
6048 mov ax,word ptr[QZVal+2]
\r
6051 mov word ptr[edi+26],ax
\r
6054 mov edi,dword ptr[QDibOut+4]
\r
6056 mov al,byte ptr[esi]
\r
6059 mov edi,dword ptr[edi]
\r
6060 pand mm4,[WrapMask]
\r
6068 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
6069 psrlq mm4,[QShiftV]
\r
6071 punpckhwd mm3,[Zero]
\r
6074 mov ebx,dword ptr[UV16+4]
\r
6075 paddd mm3,[QZorCan]
\r
6077 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
6083 mov edi,dword ptr[QZOut]
\r
6086 punpckhwd mm3,[Zero]
\r
6087 mov edi,dword ptr[edi]
\r
6089 mov ax,word ptr[QZVal+4]
\r
6090 paddd mm3,[QDibOrCan]
\r
6093 mov word ptr[edi+28],ax
\r
6094 movq [QDibOut],mm3
\r
6097 mov edi,dword ptr[QDibOut]
\r
6100 mov edi,dword ptr[edi]
\r
6101 mov edx,dword ptr[UV16V]
\r
6109 pand mm4,[WrapMask]
\r
6115 mov al,byte ptr[esi]
\r
6117 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
6119 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
6125 mov edi,dword ptr[QZOut+4]
\r
6128 mov edi,dword ptr[edi]
\r
6130 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
6131 mov ax,word ptr[QZVal+6]
\r
6133 mov word ptr[edi+30],ax
\r
6135 mov edi,dword ptr[QDibOut+4]
\r
6138 mov edi,dword ptr[edi]
\r
6141 movq mm2,[QZVal32_0]
\r
6142 movq mm3,[QZVal32_1]
\r
6144 paddd mm2,[QZDelta]
\r
6145 paddd mm3,[QZDelta]
\r
6147 movq [QZVal32_0],mm2
\r
6148 movq [QZVal32_1],mm3
\r
6154 ; get corrected right side deltas ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
6160 movq mm0,[UVLeftW] ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
6162 movq mm1,[UVLeft] ; ULw|VLw UL|VL argb|ARGBagAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
6163 punpckldq mm2,mm2 ; ULw|VLw UL|VL ZRi|ZRi agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
6165 pfmul mm2,mm6 ; ULw|VLw UL|VL UR|VR agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
6166 mov eax,offset QZCan
\r
6167 add dword ptr[Dest],32
\r
6169 add dword ptr[eax+4],32
\r
6170 mov eax,offset QDibCan
\r
6172 add dword ptr[eax+4],64
\r
6176 pfmul mm3,[QFixedScale]
\r
6180 dec [NumASpans] ; dec num affine spans
\r
6183 HandleLeftoverPixels128z:
\r
6190 cmp [RemainingCount],0
\r
6193 mov eax,[RemainingCount]
\r
6194 mov dword ptr[ZIR],eax
\r
6195 mov dword ptr[ZIR+4],eax
\r
6197 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
6200 pfsub mm6,[UVDivZ16StepX]
\r
6201 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
6203 pfmul mm5,[QFixedScale]
\r
6206 pfmul mm7,[QFixedScale]
\r
6207 pfmul mm3,[UVDivZStepX]
\r
6213 movd mm6,[Zi16StepX]
\r
6215 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
6218 paddd mm5,[UVAdjust]
\r
6220 movd mm6,[ZiStepX]
\r
6221 mov ebx,[RemainingCount]
\r
6233 pfmul mm4,[QZBufferPrec]
\r
6235 paddd mm5,[UVAdjust2]
\r
6239 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
6241 pfmul mm1,qword ptr[QFixedScaleLUT+ebx] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
6244 pand mm5,[WrapMask]
\r
6245 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
6248 movq mm7,[UVAdjustL]
\r
6250 psrlq mm5,[QShiftV]
\r
6258 mov ebx,dword ptr[UVL16+4]
\r
6260 jle TryClampU1128z
\r
6262 mov dword ptr[UVL16+4],ecx
\r
6268 mov dword ptr[UVL16+4],0
\r
6270 mov eax,dword ptr[UVL16]
\r
6272 jle TryClampV1128z
\r
6274 mov dword ptr[UVL16],ecx
\r
6280 mov dword ptr[UVL16],0
\r
6286 psrad mm7,[QGMip4_8]
\r
6287 psrld mm5,[QGMip20]
\r
6289 pand mm7,[LMapMask8]
\r
6295 mov eax,dword ptr[UVL16]
\r
6298 imul eax,[GLightWidth]
\r
6301 add eax,dword ptr[UVL16+4]
\r
6305 lea eax,[2*eax+eax]
\r
6308 add eax,[GLightData]
\r
6310 ;bilininterpolate to get good color
\r
6311 punpcklbw mm6,[eax+3]
\r
6312 mov ecx,[GLightWidth]
\r
6314 punpcklbw mm5,[eax]
\r
6327 pmullw mm6,mm7 ; B|B
\r
6328 punpcklbw mm2,[eax+3]
\r
6333 mov ebx,dword ptr[UV16+4]
\r
6334 punpcklbw mm5,[eax]
\r
6339 mov eax,dword ptr[UV16V]
\r
6363 movq mm2,mm7 ;make ABGR ARGB
\r
6368 mov eax,offset QDibCan
\r
6369 punpckhwd mm7,mm2 ;BAGB
\r
6374 mov edi,dword ptr[eax+4]
\r
6377 mov ebp,dword ptr[Dest]
\r
6378 movd mm3,dword ptr[ZiStepX]
\r
6383 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
6386 mov al,byte ptr[esi]
\r
6387 movq mm5,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
6392 pand mm5,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
6395 movd mm6,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
6398 movq [UV16],mm5 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
6401 punpcklbw mm6,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
6403 mov ebx,dword ptr[UV16+4]
\r
6405 psrlq mm5,[QShiftV]
\r
6412 mov edx,dword ptr[UV16V]
\r
6416 cmp ax,word ptr[ebp]
\r
6422 mov word ptr[ebp],ax
\r
6433 dec [RemainingCount]
\r
6434 jge LeftoverLoop128z
\r
6449 cRet DrawSpan32_AsmLitZBuffer3DNow
\r
6450 endProc DrawSpan32_AsmLitZBuffer3DNow
\r
6454 ;this is the most used routine likely.
\r
6455 ;lightmap combine, zwrite
\r
6457 cProc DrawSpan32_AsmLitZWrite3DNow, 12,<x1 : dword, x2 : dword, y : dword>
\r
6472 mov ebx,offset ClientWindow
\r
6474 mov edi,[ebx].Buffer
\r
6476 imul eax, [ebx].PixelPitch
\r
6494 mov [NumASpans],ecx
\r
6495 mov [RemainingCount],eax
\r
6496 ; mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7
\r
6497 movd mm0,x1 ; |x | | | | | | |
\r
6498 movq mm2,[UVDivZStepX] ; |x | |UZdX | | | | |
\r
6500 movd mm1,y ; |x |y |UZdX | | | | |
\r
6501 movq mm3,[UVDivZStepY] ; |x |y |UZdX |UZdY | | | |
\r
6503 punpckldq mm0,mm0 ; x|x |y |UZdX |UZdY | | | |
\r
6504 punpckldq mm1,mm1 ; x|x y|y UZdX|VZdX |UZdY | | | |
\r
6509 pfmul mm2,mm0 ; x|x y|y UZX|VZX UZdY|VZdY | | | |
\r
6510 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZdY|VZdY |ZdX | | |
\r
6513 pfmul mm4,[QZBufferPrec]
\r
6515 movq [QZDelta],mm4
\r
6517 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZdY|VZdY |ZdX | | |
\r
6518 pfmul mm3,mm1 ; x|x y|y UZX|VZX UZY|VZY |ZdX | | |
\r
6519 movq mm6,[UVDivZOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZdX | |UZO |
\r
6521 movd mm5,[ZiStepY] ; x|x y|y UZX|VZX UZY|VZY |ZdX |ZdY |UZO |
\r
6522 pfmul mm4,mm0 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY |UZO |
\r
6524 pfmul mm5,mm1 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO |
\r
6525 movd mm7,[ZiOrigin]
\r
6527 pfadd mm6,mm2 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZXS|VZXS |
\r
6528 pfadd mm4,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZXS|VZXS |
\r
6530 pfadd mm6,mm3 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZ|VZ |
\r
6531 pfadd mm4,mm5 ; x|x y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |
\r
6534 ; movq mm1,[QZDelta]
\r
6535 ; punpckldq mm7,mm7
\r
6536 ; pfmul mm7,[QZBufferPrec]
\r
6540 ; movq [QZVal32_0],mm0
\r
6542 ; movq [QZVal32_1],mm7
\r
6544 ; movd mm7,[Zi16StepX]
\r
6545 ; movq [QZDelta],mm1
\r
6548 movq mm1,[QZDelta]
\r
6550 pfmul mm7,[QZBufferPrec]
\r
6556 movq [QZVal32_0],mm0
\r
6560 movq [QZVal32_1],mm0
\r
6562 movd mm7,[Zi16StepX]
\r
6563 movq [QZDelta],mm1
\r
6565 pfrcp mm0,mm4 ; ZL|ZL y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |
\r
6566 movq mm1,mm6 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |
\r
6568 pfadd mm4,mm7 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |
\r
6569 pfmul mm1,mm0 ; ZL|ZL UL|VL UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |
\r
6571 pfrcp mm2,mm4 ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZ|VZ |
\r
6572 pfadd mm6,[UVDivZ16StepX] ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZR|VZR |
\r
6574 pfmul mm2,mm6 ; ZL|ZL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |
\r
6575 movq mm0,mm1 ; UL|VL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |
\r
6577 movq mm3,mm2 ; ULi|VLi UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |
\r
6578 pfmul mm0,[QFixedScale] ; UL6|VL6 UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |
\r
6580 pfmul mm3,[QFixedScale] ; UL6|VL6 UL|VL UR6|UR6 UR6|UR6 |ZRi |ZY UZR|VZR |
\r
6581 pf2id mm0,mm0 ;UL6i|VL6i UL|VL UR6|VR6 UR6|VR6 |ZRi |ZY UZR|VZR |
\r
6583 pf2id mm3,mm3 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR |
\r
6586 jz HandleLeftoverPixelsLit
\r
6588 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
6589 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
6590 pfmul mm7,[QFixedScale]
\r
6591 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
6592 paddd mm7,[UVAdjustL]
\r
6596 mov ebx,dword ptr[UVL16+4]
\r
6601 jle TryClampU0Litp
\r
6603 mov dword ptr[UVL16+4],ecx
\r
6609 mov dword ptr[UVL16+4],0
\r
6611 mov eax,dword ptr[UVL16]
\r
6613 jle TryClampV0Litp
\r
6615 mov dword ptr[UVL16],ecx
\r
6621 mov dword ptr[UVL16],0
\r
6625 prefetch [GLightData]
\r
6630 movq [UVLeftW],mm3
\r
6631 psrad mm7,[QGMip4_8]
\r
6633 psrld mm5,[QGMip20]
\r
6634 pand mm7,[LMapMask8]
\r
6640 mov eax,dword ptr[UVL16]
\r
6643 imul eax,[GLightWidth]
\r
6648 add eax,dword ptr[UVL16+4]
\r
6652 lea eax,[2*eax+eax]
\r
6655 add eax,[GLightData]
\r
6657 ;bilininterpolate to get good color
\r
6658 punpcklbw mm6,[eax+3]
\r
6659 mov ecx,[GLightWidth]
\r
6662 punpcklbw mm5,[eax]
\r
6680 punpcklbw mm2,[eax+3]
\r
6682 punpcklbw mm5,[eax]
\r
6686 ; prefetch [GBitPtr+256]
\r
6705 movq mm3,[UVLeftW]
\r
6729 ;use float uv for lightmap uv
\r
6730 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
6733 pfmul mm7,mm2 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
6734 pfmul mm5,[QFixedScale]
\r
6736 pfmul mm7,[QFixedScale]
\r
6739 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
6740 paddd mm5,[UVAdjust]
\r
6742 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
6743 paddd mm5,[UVAdjust2]
\r
6748 movd mm7,[Zi16StepX]
\r
6751 pfmul mm1,[QFixedScale16] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
6752 ;right side stuff becomes left ; ULw|VLw DU16|DV16 UL|VL URw|VRw |ZLi |ZY UZL|VZL |
\r
6753 pfadd mm4,mm7 ;ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZL|VZL |
\r
6755 pand mm5,[WrapMask]
\r
6756 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
6758 movq mm7,[UVAdjustL]
\r
6761 pfadd mm6,[UVDivZ16StepX] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
6764 psrlq mm5,[QShiftV]
\r
6773 mov ebx,dword ptr[UVL16+4]
\r
6777 mov dword ptr[UVL16+4],ecx
\r
6783 mov dword ptr[UVL16+4],0
\r
6785 mov eax,dword ptr[UVL16]
\r
6789 mov dword ptr[UVL16],ecx
\r
6795 mov dword ptr[UVL16],0
\r
6801 movq [UVLeftW],mm3
\r
6802 psrad mm7,[QGMip4_8]
\r
6804 psrld mm5,[QGMip20]
\r
6805 pand mm7,[LMapMask8]
\r
6811 mov eax,dword ptr[UVL16]
\r
6814 imul eax,[GLightWidth]
\r
6819 add eax,dword ptr[UVL16+4]
\r
6823 lea eax,[2*eax+eax]
\r
6826 add eax,[GLightData]
\r
6828 mov ecx,[GLightWidth]
\r
6830 ;bilininterpolate to get good color
\r
6831 punpcklbw mm6,[eax+3]
\r
6833 punpcklbw mm5,[eax]
\r
6852 punpcklbw mm2,[eax+3]
\r
6854 punpcklbw mm5,[eax]
\r
6859 mov eax,dword ptr[UV16V]
\r
6871 mov ebx,dword ptr[UV16+4]
\r
6917 ;grab zbuffer values
\r
6918 movq mm2,[QZVal32_0]
\r
6919 movq mm7,[QZVal32_1]
\r
6932 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
6934 mov al,byte ptr[esi]
\r
6935 movq mm4,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
6939 pand mm4,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
6943 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
6944 psrlq mm4,[QShiftV]
\r
6948 mov ebx,dword ptr[UV16+4]
\r
6951 mov edx,dword ptr[UV16V]
\r
6954 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
6960 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
6972 mov al,byte ptr[esi]
\r
6975 pand mm4,[WrapMask]
\r
6980 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
6983 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
6984 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
6986 psrlq mm4,[QShiftV]
\r
6987 mov ebx,dword ptr[UV16+4]
\r
6992 mov edx,dword ptr[UV16V]
\r
7007 packuswb mm7,mm5 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
7014 pand mm4,[WrapMask]
\r
7016 mov al,byte ptr[esi]
\r
7019 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
7022 mov ebx,dword ptr[UV16+4]
\r
7023 psrlq mm4,[QShiftV]
\r
7025 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
7034 mov edx,dword ptr[UV16V]
\r
7046 pand mm4,[WrapMask]
\r
7047 mov al,byte ptr[esi]
\r
7049 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
7051 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
7054 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
7057 psrlq mm4,[QShiftV]
\r
7063 mov ebx,dword ptr[UV16+4]
\r
7069 mov edx,dword ptr[UV16V]
\r
7073 pand mm4,[WrapMask]
\r
7085 psrlq mm4,[QShiftV]
\r
7088 mov al,byte ptr[esi]
\r
7091 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
7093 mov ebx,dword ptr[UV16+4]
\r
7095 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
7104 mov edx,dword ptr[UV16V]
\r
7116 pand mm4,[WrapMask]
\r
7117 mov al,byte ptr[esi]
\r
7118 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
7120 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
7123 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
7126 psrlq mm4,[QShiftV]
\r
7132 mov ebx,dword ptr[UV16+4]
\r
7136 mov edx,dword ptr[UV16V]
\r
7140 pand mm4,[WrapMask]
\r
7154 movq mm2,[QZVal32_0]
\r
7155 movq mm7,[QZVal32_1]
\r
7157 paddd mm2,[QZDelta]
\r
7158 paddd mm7,[QZDelta]
\r
7160 movq [QZVal32_0],mm2
\r
7161 movq [QZVal32_1],mm7
\r
7172 psrlq mm4,[QShiftV]
\r
7175 mov al,byte ptr[esi]
\r
7178 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
7180 mov ebx,dword ptr[UV16+4]
\r
7182 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
7191 mov edx,dword ptr[UV16V]
\r
7203 pand mm4,[WrapMask]
\r
7204 mov al,byte ptr[esi]
\r
7205 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
7207 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
7210 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
7213 psrlq mm4,[QShiftV]
\r
7219 mov ebx,dword ptr[UV16+4]
\r
7224 mov edx,dword ptr[UV16V]
\r
7228 pand mm4,[WrapMask]
\r
7240 psrlq mm4,[QShiftV]
\r
7243 mov al,byte ptr[esi]
\r
7246 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
7248 mov ebx,dword ptr[UV16+4]
\r
7250 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
7259 mov edx,dword ptr[UV16V]
\r
7271 pand mm4,[WrapMask]
\r
7272 mov al,byte ptr[esi]
\r
7273 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
7275 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
7278 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
7281 psrlq mm4,[QShiftV]
\r
7287 mov ebx,dword ptr[UV16+4]
\r
7291 mov edx,dword ptr[UV16V]
\r
7295 pand mm4,[WrapMask]
\r
7309 movq mm2,[QZVal32_0]
\r
7310 movq mm7,[QZVal32_1]
\r
7312 paddd mm2,[QZDelta]
\r
7313 paddd mm7,[QZDelta]
\r
7315 movq [QZVal32_0],mm2
\r
7316 movq [QZVal32_1],mm7
\r
7326 psrlq mm4,[QShiftV]
\r
7329 mov al,byte ptr[esi]
\r
7332 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
7335 mov ebx,dword ptr[UV16+4]
\r
7337 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
7346 mov edx,dword ptr[UV16V]
\r
7358 pand mm4,[WrapMask]
\r
7359 mov al,byte ptr[esi]
\r
7360 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
7362 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
7365 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
7368 psrlq mm4,[QShiftV]
\r
7374 mov ebx,dword ptr[UV16+4]
\r
7379 mov edx,dword ptr[UV16V]
\r
7383 pand mm4,[WrapMask]
\r
7396 psrlq mm4,[QShiftV]
\r
7398 mov al,byte ptr[esi]
\r
7401 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
7404 mov ebx,dword ptr[UV16+4]
\r
7406 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
7414 mov edx,dword ptr[UV16V]
\r
7426 pand mm4,[WrapMask]
\r
7427 mov al,byte ptr[esi]
\r
7428 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
7430 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
7433 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
7436 psrlq mm4,[QShiftV]
\r
7442 mov ebx,dword ptr[UV16+4]
\r
7446 mov edx,dword ptr[UV16V]
\r
7451 pand mm4,[WrapMask]
\r
7464 movq mm2,[QZVal32_0]
\r
7465 movq mm7,[QZVal32_1]
\r
7467 paddd mm2,[QZDelta]
\r
7468 paddd mm7,[QZDelta]
\r
7470 movq [QZVal32_0],mm2
\r
7471 movq [QZVal32_1],mm7
\r
7482 psrlq mm4,[QShiftV]
\r
7485 mov al,byte ptr[esi]
\r
7488 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
7491 mov ebx,dword ptr[UV16+4]
\r
7493 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
7501 mov edx,dword ptr[UV16V]
\r
7511 mov al,byte ptr[esi]
\r
7514 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
7516 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
7519 add dword ptr[Dest],32
\r
7529 movq mm2,[QZVal32_0]
\r
7530 movq mm7,[QZVal32_1]
\r
7532 paddd mm2,[QZDelta]
\r
7533 paddd mm7,[QZDelta]
\r
7535 movq [QZVal32_0],mm2
\r
7536 movq [QZVal32_1],mm7
\r
7538 ; get corrected right side deltas ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
7544 movq mm0,[UVLeftW] ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
7546 movq mm1,[UVLeft] ; ULw|VLw UL|VL argb|ARGBagAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
7547 punpckldq mm2,mm2 ; ULw|VLw UL|VL ZRi|ZRi agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
7549 pfmul mm2,mm6 ; ULw|VLw UL|VL UR|VR agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
7551 add edi,64 ; move screen pointer to start of next aspan
\r
7554 pfmul mm3,[QFixedScale]
\r
7558 dec [NumASpans] ; dec num affine spans
\r
7561 HandleLeftoverPixelsLit:
\r
7567 mov eax,[RemainingCount]
\r
7568 mov dword ptr[ZIR],eax
\r
7569 mov dword ptr[ZIR+4],eax
\r
7571 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
7574 pfsub mm6,[UVDivZ16StepX]
\r
7575 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
7577 pfmul mm5,[QFixedScale]
\r
7580 pfmul mm7,[QFixedScale]
\r
7581 pfmul mm3,[UVDivZStepX]
\r
7587 movd mm6,[Zi16StepX]
\r
7589 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
7592 movd mm6,[ZiStepX]
\r
7593 mov ebx,[RemainingCount]
\r
7600 paddd mm5,[UVAdjust]
\r
7605 paddd mm5,[UVAdjust2]
\r
7611 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
7613 pfmul mm4,[QZBufferPrec]
\r
7614 pfmul mm1,qword ptr[QFixedScaleLUT+ebx] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
7617 pand mm5,[WrapMask]
\r
7618 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
7621 movq mm7,[UVAdjustL]
\r
7623 psrlq mm5,[QShiftV]
\r
7632 mov ebx,dword ptr[UVL16+4]
\r
7636 mov dword ptr[UVL16+4],ecx
\r
7642 mov dword ptr[UVL16+4],0
\r
7644 mov eax,dword ptr[UVL16]
\r
7648 mov dword ptr[UVL16],ecx
\r
7654 mov dword ptr[UVL16],0
\r
7660 psrad mm7,[QGMip4_8]
\r
7661 psrld mm5,[QGMip20]
\r
7663 pand mm7,[LMapMask8]
\r
7669 mov eax,dword ptr[UVL16]
\r
7672 imul eax,[GLightWidth]
\r
7675 add eax,dword ptr[UVL16+4]
\r
7679 lea eax,[2*eax+eax]
\r
7682 add eax,[GLightData]
\r
7684 ;bilininterpolate to get good color
\r
7685 punpcklbw mm6,[eax+3]
\r
7686 mov ecx,[GLightWidth]
\r
7688 punpcklbw mm5,[eax]
\r
7701 pmullw mm6,mm7 ; B|B
\r
7703 punpcklbw mm2,[eax+3]
\r
7709 mov ebx,dword ptr[UV16+4]
\r
7710 punpcklbw mm5,[eax]
\r
7712 mov eax,dword ptr[UV16V]
\r
7742 movq mm2,mm7 ;make ABGR ARGB
\r
7746 punpckhwd mm7,mm2 ;BAGB
\r
7753 mov ebp,dword ptr[Dest]
\r
7754 movd mm3,dword ptr[ZiStepX]
\r
7759 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
7762 mov al,byte ptr[esi]
\r
7763 movq mm5,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
7768 pand mm5,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
7771 movd mm6,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
7774 movq [UV16],mm5 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
7777 punpcklbw mm6,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
7778 mov word ptr[ebp],ax
\r
7780 mov ebx,dword ptr[UV16+4]
\r
7783 psrlq mm5,[QShiftV]
\r
7791 mov edx,dword ptr[UV16V]
\r
7802 dec [RemainingCount]
\r
7803 jge LeftoverLoopLit
\r
7814 cRet DrawSpan32_AsmLitZWrite3DNow
\r
7815 endProc DrawSpan32_AsmLitZWrite3DNow
\r
7818 ;perspective correct gouraud with zwrite
\r
7820 cProc DrawSpan32_AsmGouraudZWrite3DNow, 36,<x1 : dword, x2 : dword, y : dword, r1 : dword, g1 : dword, b1 : dword, r2 : dword, g2 : dword, b2 : dword>
\r
7837 mov ebx,offset ClientWindow
\r
7839 imul eax,[ebx].PixelPitch
\r
7840 mov edi,[ebx].Buffer
\r
7857 mov [NumASpans],ecx
\r
7858 mov [RemainingCount],eax
\r
7860 ;grab the left side lights
\r
7864 ; punpckldq mm5,qword ptr[Zero]
\r
7877 ; mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7
\r
7878 movd mm0,x1 ; |x | | | | | | |
\r
7879 movq mm2,[UVDivZStepX] ; |x | UZdX|VZdX | | | | |
\r
7881 movd mm1,y ; |x |y UZdX|VZdX | | | | |
\r
7882 movq mm3,[UVDivZStepY] ; |x |y UZdX|VZdXUZdY|VZdY | | | |
\r
7884 punpckldq mm0,mm0 ; x|x |y UZdX|VZdXUZdY|VZdY | | | |
\r
7885 punpckldq mm1,mm1 ; x|x y|y UZdX|VZdXUZdY|VZdY | | | |
\r
7887 movd mm7,edx ; x|x y|y UZdX|VZdXUZdY|VZdY | | | |wid
\r
7888 movd mm5,b2 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b | |wid
\r
7890 pi2fd mm0,mm0 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b | |wid
\r
7891 movd mm6,b1 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b |b |wid
\r
7893 pi2fd mm7,mm7 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b |b |wid
\r
7894 punpckldq mm5,qword ptr[g2] ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b |b |wid
\r
7896 pi2fd mm1,mm1 ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b |b |wid
\r
7897 punpckldq mm6,qword ptr[g1] ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b g|b |wid
\r
7899 pfmul mm2,mm0 ; x|x y|y UZX|VZX UZdY|VZdY | g|b g|b |wid
\r
7900 pfrcp mm7,mm7 ; x|x y|y UZX|VZX UZdY|VZdY | g|b g|b dw|dw
\r
7902 pfsub mm5,mm6 ; x|x y|y UZX|VZX UZdY|VZdY | gd|bd g|b dw|dw
\r
7903 movd mm4,[r1] ; x|x y|y UZX|VZX UZdY|VZdY |r gd|bd g|b dw|dw
\r
7905 movd mm6,[r2] ; x|x y|y UZX|VZX UZdY|VZdY |r gd|bd g|r dw|dw
\r
7906 pfmul mm3,mm1 ; x|x y|y UZX|VZX UZY|VZY |r gd|bd g|r dw|dw
\r
7908 pfsub mm6,mm4 ; x|x y|y UZX|VZX UZY|VZY |r gd|bd x|rd dw|dw
\r
7909 pfmul mm7,[Q128] ; x|x y|y UZX|VZX UZY|VZY |r gd|bd x|rd DW|DW
\r
7911 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZY|VZY |ZdX gd|bd x|rd DW|DW
\r
7914 pfmul mm4,[QZBufferPrec]
\r
7916 movq [QZDelta],mm4
\r
7918 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZY|VZY |ZdX gd|bd x|rd DW|DW
\r
7919 pfmul mm5,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZdX GD|BD x|rd DW|DW
\r
7921 pfmul mm4,mm0 ; x|x y|y UZX|VZX UZY|VZY |ZX GD|BD x|rd DW|DW
\r
7922 pfmul mm6,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZX GD|BD x|RD DW|DW
\r
7928 movq [RGBADelta],mm5
\r
7930 movd mm5,[ZiStepY] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY x|RD DW|DW
\r
7931 movq mm6,[UVDivZOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY UZO|VZO DW|DW
\r
7933 pfmul mm5,mm1 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO DW|DW
\r
7934 movd mm7,[ZiOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO |ZO
\r
7936 pfadd mm6,mm2 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZXS|VZXS |ZO
\r
7937 pfadd mm4,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZXS|VZXS |ZO
\r
7939 pfadd mm6,mm3 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZ|VZ |ZO
\r
7940 pfadd mm4,mm5 ; x|x y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZO
\r
7943 movq mm1,[QZDelta]
\r
7945 pfmul mm7,[QZBufferPrec]
\r
7951 movq [QZVal32_0],mm0
\r
7955 movq [QZVal32_1],mm0
\r
7957 movd mm7,[Zi16StepX]
\r
7958 movq [QZDelta],mm1
\r
7960 pfrcp mm0,mm4 ; ZL|ZL y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZdX16
\r
7962 movq mm1,mm6 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZdX16
\r
7963 pfadd mm4,mm7 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
7965 pfmul mm1,mm0 ; ZL|ZL UL|VL UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
7966 pfrcp mm2,mm4 ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
7968 pfadd mm6,[UVDivZ16StepX] ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
7969 pfmul mm2,mm6 ; ZL|ZL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
7971 movq mm0,mm1 ; UL|VL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
7972 movq mm3,mm2 ; ULi|VLi UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |ZdX16
\r
7974 pfmul mm0,[QFixedScale] ; UL6|VL6 UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |ZdX16
\r
7975 pfmul mm3,[QFixedScale] ; UL6|VL6 UL|VL UR6|UR6 UR6|UR6 |ZRi |ZY UZR|VZR |ZdX16
\r
7977 pf2id mm0,mm0 ;UL6i|VL6i UL|VL UR6|VR6 UR6|VR6 |ZRi |ZY UZR|VZR |ZdX16
\r
7978 pf2id mm3,mm3 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR |ZdX16
\r
7982 jz HandleLeftoverPixelsLit
\r
7984 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
7985 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
7986 pfmul mm7,[QFixedScale]
\r
7987 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
7988 paddd mm7,[UVAdjustL]
\r
7992 mov ebx,dword ptr[UVL16+4]
\r
7995 jle TryClampU0Litp
\r
7997 mov dword ptr[UVL16+4],ecx
\r
8003 mov dword ptr[UVL16+4],0
\r
8005 mov eax,dword ptr[UVL16]
\r
8007 jle TryClampV0Litp
\r
8009 mov dword ptr[UVL16],ecx
\r
8015 mov dword ptr[UVL16],0
\r
8022 ;use float uv for lightmap uv
\r
8023 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
8026 pfmul mm7,mm2 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
8027 pfmul mm5,[QFixedScale]
\r
8029 pfmul mm7,[QFixedScale]
\r
8032 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
8033 paddd mm5,[UVAdjust]
\r
8035 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
8036 paddd mm5,[UVAdjust2]
\r
8041 movd mm7,[Zi16StepX]
\r
8044 pfmul mm1,[QFixedScale16] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
8045 ;right side stuff becomes left ; ULw|VLw DU16|DV16 UL|VL URw|VRw |ZLi |ZY UZL|VZL |
\r
8046 pfadd mm4,mm7 ;ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZL|VZL |
\r
8048 pand mm5,[WrapMask]
\r
8049 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
8051 movq mm7,[UVAdjustL]
\r
8054 pfadd mm6,[UVDivZ16StepX] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
8057 psrlq mm5,[QShiftV]
\r
8064 mov ebx,dword ptr[UVL16+4]
\r
8068 mov dword ptr[UVL16+4],ecx
\r
8074 mov dword ptr[UVL16+4],0
\r
8076 mov eax,dword ptr[UVL16]
\r
8080 mov dword ptr[UVL16],ecx
\r
8086 mov dword ptr[UVL16],0
\r
8089 movq [UVLeftW],mm3
\r
8090 mov eax,dword ptr[UV16V]
\r
8091 mov ebx,dword ptr[UV16+4]
\r
8102 movq mm3,[RGBADelta]
\r
8109 ;grab zbuffer values
\r
8110 movq mm2,[QZVal32_0]
\r
8111 movq mm7,[QZVal32_1]
\r
8122 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
8124 mov al,byte ptr[esi]
\r
8125 movq mm4,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
8129 pand mm4,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
8133 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
8134 psrlq mm4,[QShiftV]
\r
8136 mov ebx,dword ptr[UV16+4]
\r
8139 mov edx,dword ptr[UV16V]
\r
8141 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
8146 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
8158 mov al,byte ptr[esi]
\r
8160 pand mm4,[WrapMask]
\r
8162 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
8164 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
8165 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
8167 psrlq mm4,[QShiftV]
\r
8168 mov ebx,dword ptr[UV16+4]
\r
8173 mov edx,dword ptr[UV16V]
\r
8185 packuswb mm7,mm5 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
8191 pand mm4,[WrapMask]
\r
8193 mov al,byte ptr[esi]
\r
8196 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
8199 mov ebx,dword ptr[UV16+4]
\r
8200 psrlq mm4,[QShiftV]
\r
8202 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
8211 mov edx,dword ptr[UV16V]
\r
8219 pand mm4,[WrapMask]
\r
8222 mov al,byte ptr[esi]
\r
8223 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
8225 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
8228 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
8231 psrlq mm4,[QShiftV]
\r
8237 mov ebx,dword ptr[UV16+4]
\r
8240 mov edx,dword ptr[UV16V]
\r
8244 pand mm4,[WrapMask]
\r
8255 psrlq mm4,[QShiftV]
\r
8257 mov al,byte ptr[esi]
\r
8260 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
8262 mov ebx,dword ptr[UV16+4]
\r
8264 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
8273 mov edx,dword ptr[UV16V]
\r
8282 pand mm4,[WrapMask]
\r
8285 mov al,byte ptr[esi]
\r
8286 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
8288 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
8291 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
8294 psrlq mm4,[QShiftV]
\r
8300 mov ebx,dword ptr[UV16+4]
\r
8303 mov edx,dword ptr[UV16V]
\r
8307 pand mm4,[WrapMask]
\r
8319 movq mm2,[QZVal32_0]
\r
8320 movq mm7,[QZVal32_1]
\r
8322 paddd mm2,[QZDelta]
\r
8323 paddd mm7,[QZDelta]
\r
8325 movq [QZVal32_0],mm2
\r
8326 movq [QZVal32_1],mm7
\r
8337 psrlq mm4,[QShiftV]
\r
8339 mov al,byte ptr[esi]
\r
8342 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
8344 mov ebx,dword ptr[UV16+4]
\r
8346 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
8355 mov edx,dword ptr[UV16V]
\r
8363 pand mm4,[WrapMask]
\r
8366 mov al,byte ptr[esi]
\r
8367 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
8369 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
8372 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
8375 psrlq mm4,[QShiftV]
\r
8381 mov ebx,dword ptr[UV16+4]
\r
8384 mov edx,dword ptr[UV16V]
\r
8388 pand mm4,[WrapMask]
\r
8399 psrlq mm4,[QShiftV]
\r
8401 mov al,byte ptr[esi]
\r
8404 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
8406 mov ebx,dword ptr[UV16+4]
\r
8408 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
8417 mov edx,dword ptr[UV16V]
\r
8425 pand mm4,[WrapMask]
\r
8428 mov al,byte ptr[esi]
\r
8429 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
8431 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
8434 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
8437 psrlq mm4,[QShiftV]
\r
8443 mov ebx,dword ptr[UV16+4]
\r
8446 mov edx,dword ptr[UV16V]
\r
8450 pand mm4,[WrapMask]
\r
8462 movq mm2,[QZVal32_0]
\r
8463 movq mm7,[QZVal32_1]
\r
8465 paddd mm2,[QZDelta]
\r
8466 paddd mm7,[QZDelta]
\r
8468 movq [QZVal32_0],mm2
\r
8469 movq [QZVal32_1],mm7
\r
8479 psrlq mm4,[QShiftV]
\r
8481 mov al,byte ptr[esi]
\r
8484 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
8486 mov ebx,dword ptr[UV16+4]
\r
8488 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
8497 mov edx,dword ptr[UV16V]
\r
8505 pand mm4,[WrapMask]
\r
8508 mov al,byte ptr[esi]
\r
8509 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
8511 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
8514 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
8517 psrlq mm4,[QShiftV]
\r
8523 mov ebx,dword ptr[UV16+4]
\r
8526 mov edx,dword ptr[UV16V]
\r
8530 pand mm4,[WrapMask]
\r
8541 psrlq mm4,[QShiftV]
\r
8543 mov al,byte ptr[esi]
\r
8546 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
8548 mov ebx,dword ptr[UV16+4]
\r
8550 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
8559 mov edx,dword ptr[UV16V]
\r
8567 pand mm4,[WrapMask]
\r
8570 mov al,byte ptr[esi]
\r
8571 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
8573 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
8576 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
8579 psrlq mm4,[QShiftV]
\r
8585 mov ebx,dword ptr[UV16+4]
\r
8588 mov edx,dword ptr[UV16V]
\r
8592 pand mm4,[WrapMask]
\r
8604 movq mm2,[QZVal32_0]
\r
8605 movq mm7,[QZVal32_1]
\r
8607 paddd mm2,[QZDelta]
\r
8608 paddd mm7,[QZDelta]
\r
8610 movq [QZVal32_0],mm2
\r
8611 movq [QZVal32_1],mm7
\r
8621 psrlq mm4,[QShiftV]
\r
8623 mov al,byte ptr[esi]
\r
8626 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
8628 mov ebx,dword ptr[UV16+4]
\r
8630 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
8638 mov edx,dword ptr[UV16V]
\r
8647 mov al,byte ptr[esi]
\r
8649 movd mm5,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
8651 punpcklbw mm5,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
8664 movq mm2,[QZVal32_0]
\r
8665 movq mm7,[QZVal32_1]
\r
8667 paddd mm2,[QZDelta]
\r
8668 paddd mm7,[QZDelta]
\r
8670 movq [QZVal32_0],mm2
\r
8671 movq [QZVal32_1],mm7
\r
8673 ; get corrected right side deltas ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
8679 movq mm0,[UVLeftW] ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
8681 movq mm1,[UVLeft] ; ULw|VLw UL|VL argb|ARGBagAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
8682 punpckldq mm2,mm2 ; ULw|VLw UL|VL ZRi|ZRi agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
8684 pfmul mm2,mm6 ; ULw|VLw UL|VL UR|VR agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
8686 add edi,64 ; move screen pointer to start of next aspan
\r
8690 pfmul mm3,[QFixedScale]
\r
8694 dec [NumASpans] ; dec num affine spans
\r
8697 HandleLeftoverPixelsLit:
\r
8703 cmp [RemainingCount],0
\r
8706 mov eax,[RemainingCount]
\r
8707 mov dword ptr[ZIR],eax
\r
8708 mov dword ptr[ZIR+4],eax
\r
8710 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
8713 pfsub mm6,[UVDivZ16StepX]
\r
8714 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
8716 pfmul mm5,[QFixedScale]
\r
8719 pfmul mm7,[QFixedScale]
\r
8720 pfmul mm3,[UVDivZStepX]
\r
8726 movd mm6,[Zi16StepX]
\r
8728 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
8731 movd mm6,[ZiStepX]
\r
8732 mov ebx,[RemainingCount]
\r
8739 paddd mm5,[UVAdjust]
\r
8746 pfmul mm4,[QZBufferPrec]
\r
8748 paddd mm5,[UVAdjust2]
\r
8752 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
8754 pfmul mm1,qword ptr[QFixedScaleLUT+ebx] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
8757 pand mm5,[WrapMask]
\r
8758 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
8761 movq mm7,[UVAdjustL]
\r
8763 psrlq mm5,[QShiftV]
\r
8772 mov ebx,dword ptr[UVL16+4]
\r
8776 mov dword ptr[UVL16+4],ecx
\r
8782 mov dword ptr[UVL16+4],0
\r
8784 mov eax,dword ptr[UVL16]
\r
8788 mov dword ptr[UVL16],ecx
\r
8794 mov dword ptr[UVL16],0
\r
8797 mov ebx,dword ptr[UV16+4]
\r
8798 mov eax,dword ptr[UV16V]
\r
8808 mov ebp,dword ptr[Dest]
\r
8809 movd mm3,dword ptr[ZiStepX]
\r
8812 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
8815 mov al,byte ptr[esi]
\r
8816 movq mm5,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
8821 pand mm5,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
8824 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
8827 movq [UV16],mm5 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
8830 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
8831 mov word ptr[ebp],ax
\r
8833 mov ebx,dword ptr[UV16+4]
\r
8835 psrlq mm5,[QShiftV]
\r
8843 mov edx,dword ptr[UV16V]
\r
8855 dec [RemainingCount]
\r
8856 jge LeftoverLoopLit
\r
8867 cRet DrawSpan32_AsmGouraudZWrite3DNow
\r
8868 endProc DrawSpan32_AsmGouraudZWrite3DNow
\r
8872 ;same with zbuffer
\r
8874 cProc DrawSpan32_AsmGouraudZBuffer3DNow, 36,<x1 : dword, x2 : dword, y : dword, r1 : dword, g1 : dword, b1 : dword, r2 : dword, g2 : dword, b2 : dword>
\r
8889 mov ebx,offset ClientWindow
\r
8891 mov edi,[ebx].Buffer
\r
8893 imul eax, [ebx].PixelPitch
\r
8901 mov eax,offset QZCan
\r
8902 mov ebx,offset SCan
\r
8905 mov eax,offset QDibCan
\r
8908 mov ebx,offset QDibCan
\r
8909 mov eax,offset QDibOrCan
\r
8912 mov ebx,offset QZCan
\r
8913 mov eax,offset QZOrCan
\r
8926 mov [NumASpans],ecx
\r
8927 mov [RemainingCount],eax
\r
8929 ;grab the left side lights
\r
8933 ; punpckldq mm5,qword ptr[Zero]
\r
8946 ; mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7
\r
8947 movd mm0,x1 ; |x | | | | | | |
\r
8948 movq mm2,[UVDivZStepX] ; |x | UZdX|VZdX | | | | |
\r
8950 movd mm1,y ; |x |y UZdX|VZdX | | | | |
\r
8951 movq mm3,[UVDivZStepY] ; |x |y UZdX|VZdXUZdY|VZdY | | | |
\r
8953 punpckldq mm0,mm0 ; x|x |y UZdX|VZdXUZdY|VZdY | | | |
\r
8954 punpckldq mm1,mm1 ; x|x y|y UZdX|VZdXUZdY|VZdY | | | |
\r
8956 movd mm7,edx ; x|x y|y UZdX|VZdXUZdY|VZdY | | | |wid
\r
8957 movd mm5,b2 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b | |wid
\r
8959 pi2fd mm0,mm0 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b | |wid
\r
8960 movd mm6,b1 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b |b |wid
\r
8962 pi2fd mm7,mm7 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b |b |wid
\r
8963 punpckldq mm5,qword ptr[g2] ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b |b |wid
\r
8965 pi2fd mm1,mm1 ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b |b |wid
\r
8966 punpckldq mm6,qword ptr[g1] ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b g|b |wid
\r
8968 pfmul mm2,mm0 ; x|x y|y UZX|VZX UZdY|VZdY | g|b g|b |wid
\r
8969 pfrcp mm7,mm7 ; x|x y|y UZX|VZX UZdY|VZdY | g|b g|b dw|dw
\r
8971 pfsub mm5,mm6 ; x|x y|y UZX|VZX UZdY|VZdY | gd|bd g|b dw|dw
\r
8972 movd mm4,[r1] ; x|x y|y UZX|VZX UZdY|VZdY |r gd|bd g|b dw|dw
\r
8974 movd mm6,[r2] ; x|x y|y UZX|VZX UZdY|VZdY |r gd|bd g|r dw|dw
\r
8975 pfmul mm3,mm1 ; x|x y|y UZX|VZX UZY|VZY |r gd|bd g|r dw|dw
\r
8977 pfsub mm6,mm4 ; x|x y|y UZX|VZX UZY|VZY |r gd|bd x|rd dw|dw
\r
8978 pfmul mm7,[Q128] ; x|x y|y UZX|VZX UZY|VZY |r gd|bd x|rd DW|DW
\r
8980 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZY|VZY |ZdX gd|bd x|rd DW|DW
\r
8983 pfmul mm4,[QZBufferPrec]
\r
8985 movq [QZDelta],mm4
\r
8987 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZdY|VZdY |ZdX | | |
\r
8988 pfmul mm5,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZdX GD|BD x|rd DW|DW
\r
8990 pfmul mm4,mm0 ; x|x y|y UZX|VZX UZY|VZY |ZX GD|BD x|rd DW|DW
\r
8991 pfmul mm6,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZX GD|BD x|RD DW|DW
\r
8997 movq [RGBADelta],mm5
\r
8999 movd mm5,[ZiStepY] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY x|RD DW|DW
\r
9000 movq mm6,[UVDivZOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY UZO|VZO DW|DW
\r
9002 pfmul mm5,mm1 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO DW|DW
\r
9003 movd mm7,[ZiOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO |ZO
\r
9005 pfadd mm6,mm2 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZXS|VZXS |ZO
\r
9006 pfadd mm4,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZXS|VZXS |ZO
\r
9008 pfadd mm6,mm3 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZ|VZ |ZO
\r
9009 pfadd mm4,mm5 ; x|x y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZO
\r
9012 movq mm1,[QZDelta]
\r
9014 pfmul mm7,[QZBufferPrec]
\r
9020 movq [QZVal32_0],mm0
\r
9024 movq [QZVal32_1],mm0
\r
9026 movd mm7,[Zi16StepX]
\r
9027 movq [QZDelta],mm1
\r
9029 pfrcp mm0,mm4 ; ZL|ZL y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZdX16
\r
9031 movq mm1,mm6 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZdX16
\r
9032 pfadd mm4,mm7 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
9034 pfmul mm1,mm0 ; ZL|ZL UL|VL UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
9035 pfrcp mm2,mm4 ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
9037 pfadd mm6,[UVDivZ16StepX] ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
9038 pfmul mm2,mm6 ; ZL|ZL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
9040 movq mm0,mm1 ; UL|VL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
9041 movq mm3,mm2 ; ULi|VLi UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |ZdX16
\r
9043 pfmul mm0,[QFixedScale] ; UL6|VL6 UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |ZdX16
\r
9044 pfmul mm3,[QFixedScale] ; UL6|VL6 UL|VL UR6|UR6 UR6|UR6 |ZRi |ZY UZR|VZR |ZdX16
\r
9046 pf2id mm0,mm0 ;UL6i|VL6i UL|VL UR6|VR6 UR6|VR6 |ZRi |ZY UZR|VZR |ZdX16
\r
9047 pf2id mm3,mm3 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR |ZdX16
\r
9051 jz HandleLeftoverPixelsLit
\r
9053 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
9054 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
9055 pfmul mm7,[QFixedScale]
\r
9056 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
9057 paddd mm7,[UVAdjustL]
\r
9061 mov ebx,dword ptr[UVL16+4]
\r
9064 jle TryClampU0Litp
\r
9066 mov dword ptr[UVL16+4],ecx
\r
9072 mov dword ptr[UVL16+4],0
\r
9074 mov eax,dword ptr[UVL16]
\r
9076 jle TryClampV0Litp
\r
9078 mov dword ptr[UVL16],ecx
\r
9084 mov dword ptr[UVL16],0
\r
9091 ;use float uv for lightmap uv
\r
9092 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
9095 pfmul mm7,mm2 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
9096 pfmul mm5,[QFixedScale]
\r
9098 pfmul mm7,[QFixedScale]
\r
9101 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
9102 paddd mm5,[UVAdjust]
\r
9104 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
9105 paddd mm5,[UVAdjust2]
\r
9110 movd mm7,[Zi16StepX]
\r
9113 pfmul mm1,[QFixedScale16] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
9114 ;right side stuff becomes left ; ULw|VLw DU16|DV16 UL|VL URw|VRw |ZLi |ZY UZL|VZL |
\r
9115 pfadd mm4,mm7 ;ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZL|VZL |
\r
9117 pand mm5,[WrapMask]
\r
9118 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
9120 movq mm7,[UVAdjustL]
\r
9123 pfadd mm6,[UVDivZ16StepX] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
9126 psrlq mm5,[QShiftV]
\r
9133 mov ebx,dword ptr[UVL16+4]
\r
9137 mov dword ptr[UVL16+4],ecx
\r
9143 mov dword ptr[UVL16+4],0
\r
9145 mov eax,dword ptr[UVL16]
\r
9149 mov dword ptr[UVL16],ecx
\r
9155 mov dword ptr[UVL16],0
\r
9158 movq [UVLeftW],mm3
\r
9159 mov eax,dword ptr[UV16V]
\r
9160 mov ebx,dword ptr[UV16+4]
\r
9171 movq mm3,[RGBADelta]
\r
9178 ;grab zbuffer values
\r
9179 movq mm2,[QZVal32_0]
\r
9180 movq mm7,[QZVal32_1]
\r
9192 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
9194 mov al,byte ptr[esi]
\r
9195 movq mm4,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
9201 pand mm4,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
9204 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
9207 psrlq mm4,[QShiftV]
\r
9212 punpcklwd mm7,[Zero]
\r
9215 paddd mm7,[QZOrCan]
\r
9216 mov edx,dword ptr[UV16V]
\r
9219 mov ebx,dword ptr[UV16+4]
\r
9222 mov edi,dword ptr[QZOut]
\r
9224 punpcklwd mm7,[Zero]
\r
9227 paddd mm7,[QDibOrCan]
\r
9228 mov edi,dword ptr[edi]
\r
9230 movq [QDibOut],mm7
\r
9231 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
9234 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
9239 mov ax,word ptr[QZVal]
\r
9242 mov word ptr[edi],ax
\r
9244 mov edi,dword ptr[QDibOut]
\r
9247 mov edi,dword ptr[edi]
\r
9254 mov al,byte ptr[esi]
\r
9259 pand mm4,[WrapMask]
\r
9260 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
9263 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
9265 psrlq mm4,[QShiftV]
\r
9266 mov ebx,dword ptr[UV16+4]
\r
9271 mov edx,dword ptr[UV16V]
\r
9278 mov edi,dword ptr[QZOut+4]
\r
9283 mov edi,dword ptr[edi]
\r
9286 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
9287 mov ax,word ptr[QZVal+2]
\r
9290 mov word ptr[edi+2],ax
\r
9293 mov edi,dword ptr[QDibOut+4]
\r
9295 mov al,byte ptr[esi]
\r
9298 mov edi,dword ptr[edi]
\r
9299 pand mm4,[WrapMask]
\r
9307 punpckhwd mm7,[Zero]
\r
9308 psrlq mm4,[QShiftV]
\r
9310 paddd mm7,[QZorCan]
\r
9314 mov ebx,dword ptr[UV16+4]
\r
9317 mov edi,dword ptr[QZOut]
\r
9319 punpckhwd mm7,[Zero]
\r
9320 mov edi,dword ptr[edi]
\r
9322 paddd mm7,[QDibOrCan]
\r
9325 movq [QDibOut],mm7
\r
9326 mov edx,dword ptr[UV16V]
\r
9328 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
9331 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
9332 mov ax,word ptr[QZVal+4]
\r
9335 mov word ptr[edi+4],ax
\r
9338 mov edi,dword ptr[QDibOut]
\r
9343 mov edi,dword ptr[edi]
\r
9351 pand mm4,[WrapMask]
\r
9358 mov al,byte ptr[esi]
\r
9360 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
9361 psrlq mm4,[QShiftV]
\r
9363 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
9366 mov ebx,dword ptr[UV16+4]
\r
9367 mov edx,dword ptr[UV16V]
\r
9376 mov edi,dword ptr[QZOut+4]
\r
9381 mov edi,dword ptr[edi]
\r
9383 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
9384 mov ax,word ptr[QZVal+6]
\r
9387 mov word ptr[edi+6],ax
\r
9389 mov edi,dword ptr[QDibOut+4]
\r
9392 mov edi,dword ptr[edi]
\r
9393 mov al,byte ptr[esi]
\r
9399 movq mm2,[QZVal32_0]
\r
9400 movq mm7,[QZVal32_1]
\r
9402 paddd mm2,[QZDelta]
\r
9403 paddd mm7,[QZDelta]
\r
9405 movq [QZVal32_0],mm2
\r
9406 movq [QZVal32_1],mm7
\r
9412 pand mm4,[WrapMask]
\r
9420 pcmpgtw mm2,[ebp+8]
\r
9421 psrlq mm4,[QShiftV]
\r
9429 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
9432 mov ebx,dword ptr[UV16+4]
\r
9434 punpcklwd mm4,[Zero]
\r
9435 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
9437 paddd mm4,[QZorCan]
\r
9441 mov edi,dword ptr[QZOut]
\r
9448 punpcklwd mm4,[Zero]
\r
9449 mov edi,dword ptr[edi]
\r
9453 mov ax,word ptr[QZVal]
\r
9454 paddd mm4,[QDibOrCan]
\r
9456 mov word ptr[edi+8],ax
\r
9457 movq [QDibOut],mm4
\r
9460 mov edi,dword ptr[QDibOut]
\r
9463 mov edx,dword ptr[UV16V]
\r
9465 mov edi,dword ptr[edi]
\r
9473 pand mm4,[WrapMask]
\r
9479 mov al,byte ptr[esi]
\r
9482 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
9483 psrlq mm4,[QShiftV]
\r
9485 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
9488 mov ebx,dword ptr[UV16+4]
\r
9489 mov edx,dword ptr[UV16V]
\r
9497 mov edi,dword ptr[QZOut+4]
\r
9504 mov edi,dword ptr[edi]
\r
9507 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
9508 pand mm4,[WrapMask]
\r
9510 mov ax,word ptr[QZVal+2]
\r
9514 psrlq mm4,[QShiftV]
\r
9516 mov word ptr[edi+10],ax
\r
9520 mov edi,dword ptr[QDibOut+4]
\r
9522 mov al,byte ptr[esi]
\r
9523 mov edi,dword ptr[edi]
\r
9529 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
9531 punpckhwd mm4,[Zero]
\r
9533 mov ebx,dword ptr[UV16+4]
\r
9534 paddd mm4,[QZorCan]
\r
9536 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
9542 mov edi,dword ptr[QZOut]
\r
9545 punpckhwd mm4,[Zero]
\r
9549 mov edi,dword ptr[edi]
\r
9552 mov ax,word ptr[QZVal+4]
\r
9553 paddd mm4,[QDibOrCan]
\r
9555 mov word ptr[edi+12],ax
\r
9556 movq [QDibOut],mm4
\r
9559 mov edi,dword ptr[QDibOut]
\r
9562 mov edi,dword ptr[edi]
\r
9564 mov edx,dword ptr[UV16V]
\r
9572 pand mm4,[WrapMask]
\r
9578 mov al,byte ptr[esi]
\r
9581 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
9582 psrlq mm4,[QShiftV]
\r
9584 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
9587 mov ebx,dword ptr[UV16+4]
\r
9588 mov edx,dword ptr[UV16V]
\r
9598 mov edi,dword ptr[QZOut+4]
\r
9603 mov edi,dword ptr[edi]
\r
9605 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
9606 mov ax,word ptr[QZVal+6]
\r
9609 mov word ptr[edi+14],ax
\r
9612 movq mm2,[QZVal32_0]
\r
9613 movq mm4,[QZVal32_1]
\r
9615 paddd mm2,[QZDelta]
\r
9616 paddd mm4,[QZDelta]
\r
9618 movq [QZVal32_0],mm2
\r
9619 movq [QZVal32_1],mm4
\r
9629 mov edi,dword ptr[QDibOut+4]
\r
9632 mov edi,dword ptr[edi]
\r
9634 mov al,byte ptr[esi]
\r
9637 pand mm4,[WrapMask]
\r
9638 pcmpgtw mm2,[ebp+16]
\r
9646 psrlq mm4,[QShiftV]
\r
9652 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
9653 punpcklwd mm4,[Zero]
\r
9655 mov ebx,dword ptr[UV16+4]
\r
9656 paddd mm4,[QZorCan]
\r
9658 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
9664 mov edi,dword ptr[QZOut]
\r
9667 punpcklwd mm4,[Zero]
\r
9670 mov edi,dword ptr[edi]
\r
9672 mov ax,word ptr[QZVal]
\r
9673 paddd mm4,[QDibOrCan]
\r
9676 mov word ptr[edi+16],ax
\r
9677 movq [QDibOut],mm4
\r
9680 mov edi,dword ptr[QDibOut]
\r
9683 mov edi,dword ptr[edi]
\r
9684 mov edx,dword ptr[UV16V]
\r
9692 pand mm4,[WrapMask]
\r
9698 mov al,byte ptr[esi]
\r
9701 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
9702 psrlq mm4,[QShiftV]
\r
9704 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
9707 mov ebx,dword ptr[UV16+4]
\r
9708 mov edx,dword ptr[UV16V]
\r
9718 mov edi,dword ptr[QZOut+4]
\r
9724 mov edi,dword ptr[edi]
\r
9726 pand mm4,[WrapMask]
\r
9727 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
9730 mov ax,word ptr[QZVal+2]
\r
9732 psrlq mm4,[QShiftV]
\r
9736 mov word ptr[edi+18],ax
\r
9739 mov edi,dword ptr[QDibOut+4]
\r
9741 mov al,byte ptr[esi]
\r
9742 mov edi,dword ptr[edi]
\r
9749 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
9751 punpckhwd mm4,[Zero]
\r
9753 mov ebx,dword ptr[UV16+4]
\r
9754 paddd mm4,[QZorCan]
\r
9756 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
9762 mov edi,dword ptr[QZOut]
\r
9765 punpckhwd mm4,[Zero]
\r
9769 mov edi,dword ptr[edi]
\r
9772 mov ax,word ptr[QZVal+4]
\r
9773 paddd mm4,[QDibOrCan]
\r
9775 mov word ptr[edi+20],ax
\r
9776 movq [QDibOut],mm4
\r
9779 mov edi,dword ptr[QDibOut]
\r
9782 mov edi,dword ptr[edi]
\r
9783 mov edx,dword ptr[UV16V]
\r
9791 pand mm4,[WrapMask]
\r
9797 mov al,byte ptr[esi]
\r
9800 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
9801 psrlq mm4,[QShiftV]
\r
9803 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
9806 mov ebx,dword ptr[UV16+4]
\r
9807 mov edx,dword ptr[UV16V]
\r
9817 mov edi,dword ptr[QZOut+4]
\r
9821 mov edi,dword ptr[edi]
\r
9823 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
9824 mov ax,word ptr[QZVal+6]
\r
9827 mov word ptr[edi+22],ax
\r
9830 movq mm2,[QZVal32_0]
\r
9831 movq mm4,[QZVal32_1]
\r
9833 paddd mm2,[QZDelta]
\r
9834 paddd mm4,[QZDelta]
\r
9836 movq [QZVal32_0],mm2
\r
9837 movq [QZVal32_1],mm4
\r
9847 mov edi,dword ptr[QDibOut+4]
\r
9850 mov edi,dword ptr[edi]
\r
9852 mov al,byte ptr[esi]
\r
9855 pand mm4,[WrapMask]
\r
9856 pcmpgtw mm2,[ebp+24]
\r
9864 psrlq mm4,[QShiftV]
\r
9872 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
9873 punpcklwd mm4,[Zero]
\r
9875 mov ebx,dword ptr[UV16+4]
\r
9876 paddd mm4,[QZorCan]
\r
9878 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
9884 mov edi,dword ptr[QZOut]
\r
9887 punpcklwd mm4,[Zero]
\r
9891 mov edi,dword ptr[edi]
\r
9894 mov ax,word ptr[QZVal]
\r
9895 paddd mm4,[QDibOrCan]
\r
9897 mov word ptr[edi+24],ax
\r
9898 movq [QDibOut],mm4
\r
9901 mov edi,dword ptr[QDibOut]
\r
9904 mov edi,dword ptr[edi]
\r
9905 mov edx,dword ptr[UV16V]
\r
9913 pand mm4,[WrapMask]
\r
9919 mov al,byte ptr[esi]
\r
9922 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
9923 psrlq mm4,[QShiftV]
\r
9925 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
9928 mov ebx,dword ptr[UV16+4]
\r
9929 mov edx,dword ptr[UV16V]
\r
9939 mov edi,dword ptr[QZOut+4]
\r
9943 mov edi,dword ptr[edi]
\r
9945 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
9946 mov ax,word ptr[QZVal+2]
\r
9951 pand mm4,[WrapMask]
\r
9952 mov word ptr[edi+26],ax
\r
9957 psrlq mm4,[QShiftV]
\r
9958 mov edi,dword ptr[QDibOut+4]
\r
9961 mov al,byte ptr[esi]
\r
9963 mov edi,dword ptr[edi]
\r
9970 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
9972 punpckhwd mm4,[Zero]
\r
9974 mov ebx,dword ptr[UV16+4]
\r
9975 paddd mm4,[QZorCan]
\r
9977 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
9983 mov edi,dword ptr[QZOut]
\r
9986 punpckhwd mm4,[Zero]
\r
9987 mov edi,dword ptr[edi]
\r
9991 mov ax,word ptr[QZVal+4]
\r
9992 paddd mm4,[QDibOrCan]
\r
9995 mov word ptr[edi+28],ax
\r
9996 movq [QDibOut],mm4
\r
9999 mov edi,dword ptr[QDibOut]
\r
10002 mov edi,dword ptr[edi]
\r
10003 mov edx,dword ptr[UV16V]
\r
10005 movd [edi+56],mm7
\r
10011 pand mm4,[WrapMask]
\r
10017 mov al,byte ptr[esi]
\r
10019 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
10021 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
10028 mov edi,dword ptr[QZOut+4]
\r
10031 mov edi,dword ptr[edi]
\r
10033 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
10034 mov ax,word ptr[QZVal+6]
\r
10037 mov word ptr[edi+30],ax
\r
10039 mov edi,dword ptr[QDibOut+4]
\r
10042 mov edi,dword ptr[edi]
\r
10045 movq mm2,[QZVal32_0]
\r
10046 movq mm3,[QZVal32_1]
\r
10048 paddd mm2,[QZDelta]
\r
10049 paddd mm3,[QZDelta]
\r
10051 movq [QZVal32_0],mm2
\r
10052 movq [QZVal32_1],mm3
\r
10054 movd [edi+60],mm7
\r
10056 ; get corrected right side deltas ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
10062 movq mm0,[UVLeftW] ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
10064 movq mm1,[UVLeft] ; ULw|VLw UL|VL argb|ARGBagAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
10065 punpckldq mm2,mm2 ; ULw|VLw UL|VL ZRi|ZRi agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
10067 pfmul mm2,mm6 ; ULw|VLw UL|VL UR|VR agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
10068 mov eax,offset QZCan
\r
10069 add dword ptr[Dest],32
\r
10071 add dword ptr[eax+4],32
\r
10072 mov eax,offset QDibCan
\r
10074 add dword ptr[eax+4],64
\r
10077 pfmul mm3,[QFixedScale]
\r
10081 dec [NumASpans] ; dec num affine spans
\r
10084 HandleLeftoverPixelsLit:
\r
10090 cmp [RemainingCount],0
\r
10093 mov eax,[RemainingCount]
\r
10094 mov dword ptr[ZIR],eax
\r
10095 mov dword ptr[ZIR+4],eax
\r
10097 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
10100 pfsub mm6,[UVDivZ16StepX]
\r
10101 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
10103 pfmul mm5,[QFixedScale]
\r
10106 pfmul mm7,[QFixedScale]
\r
10107 pfmul mm3,[UVDivZStepX]
\r
10113 movd mm6,[Zi16StepX]
\r
10115 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
10118 movd mm6,[ZiStepX]
\r
10119 mov ebx,[RemainingCount]
\r
10126 paddd mm5,[UVAdjust]
\r
10128 punpckldq mm4,mm4
\r
10133 pfmul mm4,[QZBufferPrec]
\r
10135 paddd mm5,[UVAdjust2]
\r
10138 movq [UVLeft],mm5
\r
10139 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
10141 pfmul mm1,qword ptr[QFixedScaleLUT+ebx] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
10144 pand mm5,[WrapMask]
\r
10145 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
10148 movq mm7,[UVAdjustL]
\r
10150 psrlq mm5,[QShiftV]
\r
10151 paddd mm7,[UVL16]
\r
10159 mov ebx,dword ptr[UVL16+4]
\r
10161 jle TryClampU1Lit
\r
10163 mov dword ptr[UVL16+4],ecx
\r
10169 mov dword ptr[UVL16+4],0
\r
10171 mov eax,dword ptr[UVL16]
\r
10173 jle TryClampV1Lit
\r
10175 mov dword ptr[UVL16],ecx
\r
10181 mov dword ptr[UVL16],0
\r
10184 mov ebx,dword ptr[UV16+4]
\r
10185 mov eax,dword ptr[UV16V]
\r
10195 mov eax,offset QDibCan
\r
10198 mov edi,dword ptr[eax+4]
\r
10199 mov ebp,dword ptr[Dest]
\r
10201 movd mm3,dword ptr[ZiStepX]
\r
10205 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
10208 mov al,byte ptr[esi]
\r
10209 movq mm5,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
10214 pand mm5,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
10217 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
10219 movq [UV16],mm5 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
10222 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
10225 mov ebx,dword ptr[UV16+4]
\r
10227 psrlq mm5,[QShiftV]
\r
10234 mov edx,dword ptr[UV16V]
\r
10237 ; paddw mm6,mm3 no lerp for this... too much goin on
\r
10241 cmp ax,word ptr[ebp]
\r
10254 dec [RemainingCount]
\r
10255 jge LeftoverLoopLit
\r
10266 cRet DrawSpan32_AsmGouraudZBuffer3DNow
\r
10267 endProc DrawSpan32_AsmGouraudZBuffer3DNow
\r
10270 ;argb alpha (greyscale), affine, gouraud
\r
10272 cProc DrawScanLineGouraudNoZAlphaARGB_Asm3DNow, 8,<pLeft : dword, pRight : dword>
\r
10284 jle GouraudReturnNoZAlphaTex
\r
10291 ; prefetch [GBitPtr]
\r
10293 movd mm1,[ecx].vf
\r
10296 punpckldq mm1,qword ptr[ecx].uf
\r
10300 movd mm2,[ebx].vf
\r
10305 punpckldq mm2,qword ptr[ebx].uf
\r
10306 movd mm3,[ecx].bf
\r
10311 pfmul mm0,[QFixedScale]
\r
10314 punpckldq mm3,qword ptr[ecx].gf
\r
10317 movd mm4,[ecx].rf
\r
10319 pfmul mm2,[QFixedScale]
\r
10322 movd mm5,[ebx].bf
\r
10325 punpckldq mm5,qword ptr[ebx].gf
\r
10328 movd mm6,[ebx].rf
\r
10345 pand mm0,[WrapMask]
\r
10351 mov ebx,dword ptr[UV16+4]
\r
10354 psrlq mm0,[QShiftV]
\r
10364 mov eax,dword ptr[UV16V]
\r
10371 GouraudLoopNoZAlphaTex:
\r
10372 mov al,byte ptr[esi]
\r
10373 movq mm0,mm2 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
10375 ; sub esi,GBitPtr
\r
10376 paddd mm2,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
10378 pand mm0,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
10380 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
10382 movq [UV16],mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
10384 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
10386 mov ebx,dword ptr[UV16+4]
\r
10389 psrlq mm0,[QShiftV]
\r
10392 ; add esi,ABitPtr
\r
10398 ; mov al,byte ptr[esi]
\r
10407 punpckldq mm4,mm4
\r
10408 ; mov ebx,ATexPal
\r
10410 mov ebp,dword ptr[UV16V]
\r
10413 ; movd mm4,[ebx+eax*4]
\r
10416 punpcklbw mm0,[Zero]
\r
10418 movq mm6,[QNegAlpha]
\r
10420 punpcklbw mm4,[Zero]
\r
10440 jge GouraudLoopNoZAlphaTex
\r
10446 GouraudReturnNoZAlphaTex:
\r
10453 cRet DrawScanLineGouraudNoZAlphaARGB_Asm3DNow
\r
10454 endProc DrawScanLineGouraudNoZAlphaARGB_Asm3DNow
\r
10458 ;perspective correct, zbuffered, argb alpha, gouraud
\r
10459 ;mean nasty function
\r
10461 cProc DrawSpan32_AsmGouraudZBufferAlphaARGB3DNow, 36,<x1 : dword, x2 : dword, y : dword, r1 : dword, g1 : dword, b1 : dword, r2 : dword, g2 : dword, b2 : dword>
\r
10477 mov ebx,offset ClientWindow
\r
10479 mov edi,[ebx].Buffer
\r
10481 imul eax, [ebx].PixelPitch
\r
10489 mov eax,offset QZCan
\r
10490 mov ebx,offset SCan
\r
10493 mov eax,offset QDibCan
\r
10496 mov ebx,offset QDibCan
\r
10497 mov eax,offset QDibOrCan
\r
10500 mov ebx,offset QZCan
\r
10501 mov eax,offset QZOrCan
\r
10514 mov [NumASpans],ecx
\r
10515 mov [RemainingCount],eax
\r
10517 ;grab the left side lights
\r
10521 ; punpckldq mm5,qword ptr[Zero]
\r
10525 punpckldq mm4,mm6
\r
10534 ; mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7
\r
10535 movd mm0,x1 ; |x | | | | | | |
\r
10536 movq mm2,[UVDivZStepX] ; |x | UZdX|VZdX | | | | |
\r
10538 movd mm1,y ; |x |y UZdX|VZdX | | | | |
\r
10539 movq mm3,[UVDivZStepY] ; |x |y UZdX|VZdXUZdY|VZdY | | | |
\r
10541 punpckldq mm0,mm0 ; x|x |y UZdX|VZdXUZdY|VZdY | | | |
\r
10542 punpckldq mm1,mm1 ; x|x y|y UZdX|VZdXUZdY|VZdY | | | |
\r
10544 movd mm7,edx ; x|x y|y UZdX|VZdXUZdY|VZdY | | | |wid
\r
10545 movd mm5,b2 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b | |wid
\r
10547 pi2fd mm0,mm0 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b | |wid
\r
10548 movd mm6,b1 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b |b |wid
\r
10550 pi2fd mm7,mm7 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b |b |wid
\r
10551 punpckldq mm5,qword ptr[g2] ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b |b |wid
\r
10553 pi2fd mm1,mm1 ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b |b |wid
\r
10554 punpckldq mm6,qword ptr[g1] ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b g|b |wid
\r
10556 pfmul mm2,mm0 ; x|x y|y UZX|VZX UZdY|VZdY | g|b g|b |wid
\r
10557 pfrcp mm7,mm7 ; x|x y|y UZX|VZX UZdY|VZdY | g|b g|b dw|dw
\r
10559 pfsub mm5,mm6 ; x|x y|y UZX|VZX UZdY|VZdY | gd|bd g|b dw|dw
\r
10560 movd mm4,[r1] ; x|x y|y UZX|VZX UZdY|VZdY |r gd|bd g|b dw|dw
\r
10562 movd mm6,[r2] ; x|x y|y UZX|VZX UZdY|VZdY |r gd|bd g|r dw|dw
\r
10563 pfmul mm3,mm1 ; x|x y|y UZX|VZX UZY|VZY |r gd|bd g|r dw|dw
\r
10565 pfsub mm6,mm4 ; x|x y|y UZX|VZX UZY|VZY |r gd|bd x|rd dw|dw
\r
10566 pfmul mm7,[Q128] ; x|x y|y UZX|VZX UZY|VZY |r gd|bd x|rd DW|DW
\r
10568 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZY|VZY |ZdX gd|bd x|rd DW|DW
\r
10570 punpckldq mm4,mm4
\r
10571 pfmul mm4,[QZBufferPrec]
\r
10573 movq [QZDelta],mm4
\r
10575 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZdY|VZdY |ZdX | | |
\r
10576 pfmul mm5,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZdX GD|BD x|rd DW|DW
\r
10578 pfmul mm4,mm0 ; x|x y|y UZX|VZX UZY|VZY |ZX GD|BD x|rd DW|DW
\r
10579 pfmul mm6,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZX GD|BD x|RD DW|DW
\r
10585 movq [RGBADelta],mm5
\r
10587 movd mm5,[ZiStepY] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY x|RD DW|DW
\r
10588 movq mm6,[UVDivZOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY UZO|VZO DW|DW
\r
10590 pfmul mm5,mm1 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO DW|DW
\r
10591 movd mm7,[ZiOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO |ZO
\r
10593 pfadd mm6,mm2 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZXS|VZXS |ZO
\r
10594 pfadd mm4,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZXS|VZXS |ZO
\r
10596 pfadd mm6,mm3 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZ|VZ |ZO
\r
10597 pfadd mm4,mm5 ; x|x y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZO
\r
10600 movq mm1,[QZDelta]
\r
10601 punpckldq mm7,mm7
\r
10602 pfmul mm7,[QZBufferPrec]
\r
10606 punpckldq mm0,mm7
\r
10608 movq [QZVal32_0],mm0
\r
10611 punpckldq mm0,mm7
\r
10612 movq [QZVal32_1],mm0
\r
10614 movd mm7,[Zi16StepX]
\r
10615 movq [QZDelta],mm1
\r
10617 pfrcp mm0,mm4 ; ZL|ZL y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZdX16
\r
10619 movq mm1,mm6 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZdX16
\r
10620 pfadd mm4,mm7 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
10622 pfmul mm1,mm0 ; ZL|ZL UL|VL UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
10623 pfrcp mm2,mm4 ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
10625 pfadd mm6,[UVDivZ16StepX] ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
10626 pfmul mm2,mm6 ; ZL|ZL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
10628 movq mm0,mm1 ; UL|VL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
10629 movq mm3,mm2 ; ULi|VLi UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |ZdX16
\r
10631 pfmul mm0,[QFixedScale] ; UL6|VL6 UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |ZdX16
\r
10632 pfmul mm3,[QFixedScale] ; UL6|VL6 UL|VL UR6|UR6 UR6|UR6 |ZRi |ZY UZR|VZR |ZdX16
\r
10634 pf2id mm0,mm0 ;UL6i|VL6i UL|VL UR6|VR6 UR6|VR6 |ZRi |ZY UZR|VZR |ZdX16
\r
10635 pf2id mm3,mm3 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR |ZdX16
\r
10639 jz HandleLeftoverPixelsLit
\r
10641 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
10642 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
10643 pfmul mm7,[QFixedScale]
\r
10644 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
10645 paddd mm7,[UVAdjustL]
\r
10649 mov ebx,dword ptr[UVL16+4]
\r
10652 jle TryClampU0Litp
\r
10654 mov dword ptr[UVL16+4],ecx
\r
10655 jmp NoClampU0Litp
\r
10659 jge NoClampU0Litp
\r
10660 mov dword ptr[UVL16+4],0
\r
10662 mov eax,dword ptr[UVL16]
\r
10664 jle TryClampV0Litp
\r
10666 mov dword ptr[UVL16],ecx
\r
10667 jmp NoClampV0Litp
\r
10671 jge NoClampV0Litp
\r
10672 mov dword ptr[UVL16],0
\r
10679 ;use float uv for lightmap uv
\r
10680 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
10683 pfmul mm7,mm2 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
10684 pfmul mm5,[QFixedScale]
\r
10686 pfmul mm7,[QFixedScale]
\r
10689 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
10690 paddd mm5,[UVAdjust]
\r
10692 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
10693 paddd mm5,[UVAdjust2]
\r
10696 movq [UVLeft],mm5
\r
10698 movd mm7,[Zi16StepX]
\r
10701 pfmul mm1,[QFixedScale16] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
10702 ;right side stuff becomes left ; ULw|VLw DU16|DV16 UL|VL URw|VRw |ZLi |ZY UZL|VZL |
\r
10703 pfadd mm4,mm7 ;ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZL|VZL |
\r
10705 pand mm5,[WrapMask]
\r
10706 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
10708 movq mm7,[UVAdjustL]
\r
10711 pfadd mm6,[UVDivZ16StepX] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
10712 paddd mm7,[UVL16]
\r
10714 psrlq mm5,[QShiftV]
\r
10721 mov ebx,dword ptr[UVL16+4]
\r
10723 jle TryClampU0Lit
\r
10725 mov dword ptr[UVL16+4],ecx
\r
10731 mov dword ptr[UVL16+4],0
\r
10733 mov eax,dword ptr[UVL16]
\r
10735 jle TryClampV0Lit
\r
10737 mov dword ptr[UVL16],ecx
\r
10743 mov dword ptr[UVL16],0
\r
10746 movq [UVLeftW],mm3
\r
10747 mov eax,dword ptr[UV16V]
\r
10748 mov ebx,dword ptr[UV16+4]
\r
10755 movq [UVLeft],mm2
\r
10759 ; movq mm3,[RGBADelta]
\r
10766 ;grab zbuffer values
\r
10767 movq mm2,[QZVal32_0]
\r
10768 movq mm7,[QZVal32_1]
\r
10780 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
10782 mov al,byte ptr[esi]
\r
10783 movq mm4,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
10785 pcmpgtw mm2,[ebp]
\r
10789 pand mm4,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
10792 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
10795 psrlq mm4,[QShiftV]
\r
10800 punpcklwd mm7,[Zero]
\r
10803 paddd mm7,[QZOrCan]
\r
10804 mov edx,dword ptr[UV16V]
\r
10807 mov ebx,dword ptr[UV16+4]
\r
10810 mov edi,dword ptr[QZOut]
\r
10812 punpcklwd mm7,[Zero]
\r
10815 paddd mm7,[QDibOrCan]
\r
10816 mov edi,dword ptr[edi]
\r
10818 movq [QDibOut],mm7
\r
10819 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
10822 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
10828 mov ax,word ptr[QZVal]
\r
10833 mov word ptr[edi],ax
\r
10836 mov edi,dword ptr[QDibOut]
\r
10842 mov edi,dword ptr[edi]
\r
10843 punpckldq mm3,mm3
\r
10844 paddw mm6,[RGBADelta]
\r
10849 movq mm5,[QNegAlpha]
\r
10855 mov al,byte ptr[esi]
\r
10857 punpcklbw mm3,[Zero]
\r
10861 pand mm4,[WrapMask]
\r
10864 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
10870 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
10875 psrlq mm4,[QShiftV]
\r
10878 mov ebx,dword ptr[UV16+4]
\r
10887 mov edx,dword ptr[UV16V]
\r
10888 punpckldq mm3,mm3
\r
10890 mov edi,dword ptr[QZOut+4]
\r
10894 mov edi,dword ptr[edi]
\r
10896 mov ax,word ptr[QZVal+2]
\r
10899 movq mm5,[QNegAlpha]
\r
10902 mov word ptr[edi+2],ax
\r
10905 mov edi,dword ptr[QDibOut+4]
\r
10908 mov edi,dword ptr[edi]
\r
10911 paddw mm6,[RGBADelta]
\r
10917 punpcklbw mm3,[Zero]
\r
10918 mov al,byte ptr[esi]
\r
10924 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
10930 pand mm4,[WrapMask]
\r
10938 punpckhwd mm7,[Zero]
\r
10939 psrlq mm4,[QShiftV]
\r
10941 paddd mm7,[QZorCan]
\r
10945 mov ebx,dword ptr[UV16+4]
\r
10948 mov edi,dword ptr[QZOut]
\r
10950 punpckhwd mm7,[Zero]
\r
10951 mov edi,dword ptr[edi]
\r
10953 paddd mm7,[QDibOrCan]
\r
10956 movq [QDibOut],mm7
\r
10957 mov edx,dword ptr[UV16V]
\r
10959 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
10962 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
10963 mov ax,word ptr[QZVal+4]
\r
10969 mov word ptr[edi+4],ax
\r
10975 mov edi,dword ptr[QDibOut]
\r
10978 paddw mm6,[RGBADelta]
\r
10980 punpckldq mm3,mm3
\r
10981 mov edi,dword ptr[edi]
\r
10983 movq mm5,[QNegAlpha]
\r
10991 punpcklbw mm3,[Zero]
\r
10998 pand mm4,[WrapMask]
\r
11010 mov al,byte ptr[esi]
\r
11012 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
11013 psrlq mm4,[QShiftV]
\r
11015 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
11019 mov ebx,dword ptr[UV16+4]
\r
11022 mov edx,dword ptr[UV16V]
\r
11033 paddw mm6,[RGBADelta]
\r
11034 punpckldq mm3,mm3
\r
11036 mov edi,dword ptr[QZOut+4]
\r
11039 movq mm5,[QNegAlpha]
\r
11045 mov edi,dword ptr[edi]
\r
11048 mov ax,word ptr[QZVal+6]
\r
11051 mov word ptr[edi+6],ax
\r
11052 mov edi,dword ptr[QDibOut+4]
\r
11055 mov edi,dword ptr[edi]
\r
11057 mov al,byte ptr[esi]
\r
11058 movq mm3,[edi+12]
\r
11061 punpcklbw mm3,[Zero]
\r
11064 movq mm2,[QZVal32_0]
\r
11067 paddd mm2,[QZDelta]
\r
11070 movq mm7,[QZVal32_1]
\r
11073 paddd mm7,[QZDelta]
\r
11076 movq [QZVal32_0],mm2
\r
11077 movq [QZVal32_1],mm7
\r
11079 movd [edi+12],mm3
\r
11085 pand mm4,[WrapMask]
\r
11093 pcmpgtw mm2,[ebp+8]
\r
11094 psrlq mm4,[QShiftV]
\r
11102 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
11105 mov ebx,dword ptr[UV16+4]
\r
11107 punpcklwd mm4,[Zero]
\r
11108 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
11111 paddd mm4,[QZorCan]
\r
11123 mov edi,dword ptr[QZOut]
\r
11126 mov edi,dword ptr[edi]
\r
11129 mov ax,word ptr[QZVal]
\r
11130 punpckldq mm3,mm3
\r
11132 mov word ptr[edi+8],ax
\r
11135 paddw mm6,[RGBADelta]
\r
11136 punpcklwd mm4,[Zero]
\r
11138 paddd mm4,[QDibOrCan]
\r
11141 movq mm5,[QNegAlpha]
\r
11144 movq [QDibOut],mm4
\r
11147 mov edi,dword ptr[QDibOut]
\r
11148 mov edx,dword ptr[UV16V]
\r
11150 mov edi,dword ptr[edi]
\r
11153 movq mm3,[edi+16]
\r
11156 punpcklbw mm3,[Zero]
\r
11159 pand mm4,[WrapMask]
\r
11171 mov al,byte ptr[esi]
\r
11174 movd [edi+16],mm3
\r
11176 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
11177 psrlq mm4,[QShiftV]
\r
11179 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
11183 mov ebx,dword ptr[UV16+4]
\r
11186 mov edx,dword ptr[UV16V]
\r
11189 mov edi,dword ptr[QZOut+4]
\r
11192 mov edi,dword ptr[edi]
\r
11195 mov ax,word ptr[QZVal+2]
\r
11198 mov word ptr[edi+10],ax
\r
11203 mov edi,dword ptr[QDibOut+4]
\r
11206 punpckldq mm3,mm3
\r
11208 mov edi,dword ptr[edi]
\r
11210 movq mm5,[QNegAlpha]
\r
11214 movq mm3,[edi+20]
\r
11216 punpcklbw mm3,[Zero]
\r
11217 paddw mm6,[RGBADelta]
\r
11230 pand mm4,[WrapMask]
\r
11231 movd [edi+20],mm3
\r
11236 psrlq mm4,[QShiftV]
\r
11240 mov al,byte ptr[esi]
\r
11246 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
11248 punpckhwd mm4,[Zero]
\r
11250 mov ebx,dword ptr[UV16+4]
\r
11251 paddd mm4,[QZorCan]
\r
11253 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
11259 mov edi,dword ptr[QZOut]
\r
11260 punpckhwd mm4,[Zero]
\r
11262 mov edi,dword ptr[edi]
\r
11263 mov ax,word ptr[QZVal+4]
\r
11266 mov word ptr[edi+12],ax
\r
11273 paddd mm4,[QDibOrCan]
\r
11276 paddw mm6,[RGBADelta]
\r
11277 movq [QDibOut],mm4
\r
11279 punpckldq mm3,mm3
\r
11282 mov edi,dword ptr[QDibOut]
\r
11283 movq mm5,[QNegAlpha]
\r
11286 mov edi,dword ptr[edi]
\r
11289 movq mm3,[edi+24]
\r
11292 punpcklbw mm3,[Zero]
\r
11295 mov edx,dword ptr[UV16V]
\r
11306 pand mm4,[WrapMask]
\r
11307 movd [edi+24],mm3
\r
11310 mov edi,dword ptr[QZOut+4]
\r
11313 mov edi,dword ptr[edi]
\r
11316 mov al,byte ptr[esi]
\r
11319 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
11320 psrlq mm4,[QShiftV]
\r
11324 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
11327 mov ax,word ptr[QZVal+6]
\r
11330 mov ebx,dword ptr[UV16+4]
\r
11331 mov word ptr[edi+14],ax
\r
11334 mov edx,dword ptr[UV16V]
\r
11337 mov edi,dword ptr[QDibOut+4]
\r
11346 punpckldq mm3,mm3
\r
11347 paddw mm6,[RGBADelta]
\r
11349 movq mm5,[QNegAlpha]
\r
11352 mov edi,dword ptr[edi]
\r
11357 movq mm3,[edi+28]
\r
11360 punpcklbw mm3,[Zero]
\r
11370 movq mm2,[QZVal32_0]
\r
11373 movq mm4,[QZVal32_1]
\r
11376 paddd mm2,[QZDelta]
\r
11377 paddd mm4,[QZDelta]
\r
11379 movq [QZVal32_0],mm2
\r
11380 movq [QZVal32_1],mm4
\r
11393 mov al,byte ptr[esi]
\r
11396 pand mm4,[WrapMask]
\r
11397 pcmpgtw mm2,[ebp+16]
\r
11399 movd [edi+28],mm3
\r
11405 psrlq mm4,[QShiftV]
\r
11411 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
11412 punpcklwd mm4,[Zero]
\r
11414 mov ebx,dword ptr[UV16+4]
\r
11415 paddd mm4,[QZorCan]
\r
11417 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
11421 mov edi,dword ptr[QZOut]
\r
11424 mov edi,dword ptr[edi]
\r
11433 mov ax,word ptr[QZVal]
\r
11436 punpcklwd mm4,[Zero]
\r
11437 mov word ptr[edi+16],ax
\r
11439 punpckldq mm3,mm3
\r
11440 paddw mm6,[RGBADelta]
\r
11442 paddd mm4,[QDibOrCan]
\r
11445 movq [QDibOut],mm4
\r
11446 movq mm5,[QNegAlpha]
\r
11448 mov edi,dword ptr[QDibOut]
\r
11451 mov edi,dword ptr[edi]
\r
11453 mov edx,dword ptr[UV16V]
\r
11455 movq mm3,[edi+32]
\r
11458 punpcklbw mm3,[Zero]
\r
11473 pand mm4,[WrapMask]
\r
11474 movd [edi+32],mm3
\r
11477 mov edi,dword ptr[QZOut+4]
\r
11479 mov al,byte ptr[esi]
\r
11482 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
11483 psrlq mm4,[QShiftV]
\r
11485 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
11489 mov ebx,dword ptr[UV16+4]
\r
11492 mov edx,dword ptr[UV16V]
\r
11495 mov edi,dword ptr[edi]
\r
11498 mov ax,word ptr[QZVal+2]
\r
11501 mov word ptr[edi+18],ax
\r
11504 mov edi,dword ptr[QDibOut+4]
\r
11507 paddw mm6,[RGBADelta]
\r
11509 punpckldq mm3,mm3
\r
11510 mov edi,dword ptr[edi]
\r
11513 movq mm5,[QNegAlpha]
\r
11518 movq mm3,[edi+36]
\r
11521 punpcklbw mm3,[Zero]
\r
11531 pand mm4,[WrapMask]
\r
11537 psrlq mm4,[QShiftV]
\r
11542 mov al,byte ptr[esi]
\r
11545 movd [edi+36],mm3
\r
11549 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
11551 punpckhwd mm4,[Zero]
\r
11553 mov ebx,dword ptr[UV16+4]
\r
11554 paddd mm4,[QZorCan]
\r
11556 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
11562 mov edi,dword ptr[QZOut]
\r
11565 mov ax,word ptr[QZVal+4]
\r
11568 mov edi,dword ptr[edi]
\r
11571 punpckhwd mm4,[Zero]
\r
11572 mov word ptr[edi+20],ax
\r
11575 paddd mm4,[QDibOrCan]
\r
11578 movq [QDibOut],mm4
\r
11581 mov edi,dword ptr[QDibOut]
\r
11582 paddw mm6,[RGBADelta]
\r
11584 punpckldq mm3,mm3
\r
11586 mov edi,dword ptr[edi]
\r
11589 movq mm5,[QNegAlpha]
\r
11592 mov edx,dword ptr[UV16V]
\r
11595 movq mm3,[edi+40]
\r
11598 punpcklbw mm3,[Zero]
\r
11613 movd [edi+40],mm3
\r
11616 pand mm4,[WrapMask]
\r
11617 mov al,byte ptr[esi]
\r
11621 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
11622 psrlq mm4,[QShiftV]
\r
11624 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
11629 mov ebx,dword ptr[UV16+4]
\r
11632 mov edx,dword ptr[UV16V]
\r
11635 mov edi,dword ptr[QZOut+4]
\r
11638 mov edi,dword ptr[edi]
\r
11641 mov ax,word ptr[QZVal+6]
\r
11644 mov word ptr[edi+22],ax
\r
11647 mov edi,dword ptr[QDibOut+4]
\r
11650 punpckldq mm3,mm3
\r
11651 paddw mm6,[RGBADelta]
\r
11653 movq mm5,[QNegAlpha]
\r
11654 mov edi,dword ptr[edi]
\r
11659 movq mm3,[edi+44]
\r
11662 punpcklbw mm3,[Zero]
\r
11672 movq mm2,[QZVal32_0]
\r
11675 movq mm4,[QZVal32_1]
\r
11678 paddd mm2,[QZDelta]
\r
11679 paddd mm4,[QZDelta]
\r
11681 movq [QZVal32_0],mm2
\r
11682 movq [QZVal32_1],mm4
\r
11695 mov al,byte ptr[esi]
\r
11698 pand mm4,[WrapMask]
\r
11699 pcmpgtw mm2,[ebp+24]
\r
11701 movd [edi+44],mm3
\r
11707 psrlq mm4,[QShiftV]
\r
11715 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
11716 punpcklwd mm4,[Zero]
\r
11718 mov ebx,dword ptr[UV16+4]
\r
11719 paddd mm4,[QZorCan]
\r
11721 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
11726 mov edi,dword ptr[QZOut]
\r
11729 mov ax,word ptr[QZVal]
\r
11732 mov edi,dword ptr[edi]
\r
11735 mov word ptr[edi+24],ax
\r
11738 punpcklwd mm4,[Zero]
\r
11741 paddd mm4,[QDibOrCan]
\r
11744 movq [QDibOut],mm4
\r
11747 paddw mm6,[RGBADelta]
\r
11748 punpckldq mm3,mm3
\r
11750 mov edi,dword ptr[QDibOut]
\r
11753 movq mm5,[QNegAlpha]
\r
11756 mov edi,dword ptr[edi]
\r
11759 movq mm3,[edi+48]
\r
11762 punpcklbw mm3,[Zero]
\r
11768 mov edx,dword ptr[UV16V]
\r
11771 pand mm4,[WrapMask]
\r
11780 movd [edi+48],mm3
\r
11784 mov al,byte ptr[esi]
\r
11786 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
11787 psrlq mm4,[QShiftV]
\r
11789 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
11794 mov ebx,dword ptr[UV16+4]
\r
11797 mov edx,dword ptr[UV16V]
\r
11800 mov edi,dword ptr[QZOut+4]
\r
11803 mov ax,word ptr[QZVal+2]
\r
11806 mov edi,dword ptr[edi]
\r
11809 mov word ptr[edi+26],ax
\r
11812 mov edi,dword ptr[QDibOut+4]
\r
11815 punpckldq mm3,mm3
\r
11816 paddw mm6,[RGBADelta]
\r
11818 movq mm5,[QNegAlpha]
\r
11821 mov edi,dword ptr[edi]
\r
11824 movq mm3,[edi+52]
\r
11827 punpcklbw mm3,[Zero]
\r
11836 pand mm4,[WrapMask]
\r
11842 psrlq mm4,[QShiftV]
\r
11849 movd [edi+52],mm3
\r
11850 mov al,byte ptr[esi]
\r
11855 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
11857 punpckhwd mm4,[Zero]
\r
11859 mov ebx,dword ptr[UV16+4]
\r
11860 paddd mm4,[QZorCan]
\r
11862 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
11866 mov edi,dword ptr[QZOut]
\r
11869 mov ax,word ptr[QZVal+4]
\r
11872 mov edi,dword ptr[edi]
\r
11875 mov word ptr[edi+28],ax
\r
11881 punpckhwd mm4,[Zero]
\r
11884 paddd mm4,[QDibOrCan]
\r
11887 paddw mm6,[RGBADelta]
\r
11888 punpckldq mm3,mm3
\r
11890 movq mm5,[QNegAlpha]
\r
11893 movq [QDibOut],mm4
\r
11896 mov edi,dword ptr[QDibOut]
\r
11899 mov edi,dword ptr[edi]
\r
11902 movq mm3,[edi+56]
\r
11905 punpcklbw mm3,[Zero]
\r
11907 mov edx,dword ptr[UV16V]
\r
11910 pand mm4,[WrapMask]
\r
11920 mov al,byte ptr[esi]
\r
11922 movd [edi+56],mm3
\r
11924 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
11927 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
11928 mov edi,dword ptr[QZOut+4]
\r
11930 mov ax,word ptr[QZVal+6]
\r
11933 mov edi,dword ptr[edi]
\r
11936 mov word ptr[edi+30],ax
\r
11939 mov edi,dword ptr[QDibOut+4]
\r
11942 paddw mm6,[RGBADelta]
\r
11945 mov edi,dword ptr[edi]
\r
11952 punpckldq mm3,mm3
\r
11954 movq mm5,[QNegAlpha]
\r
11960 movq mm2,[QZVal32_0]
\r
11961 movq mm3,[edi+60]
\r
11963 movq mm4,[QZVal32_1]
\r
11964 punpcklbw mm3,[Zero]
\r
11966 paddd mm2,[QZDelta]
\r
11969 paddd mm4,[QZDelta]
\r
11972 movq [QZVal32_0],mm2
\r
11975 movq [QZVal32_1],mm4
\r
11978 movd [edi+60],mm3
\r
11980 ; get corrected right side deltas ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
11986 movq mm0,[UVLeftW] ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
11988 movq mm1,[UVLeft] ; ULw|VLw UL|VL argb|ARGBagAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
11989 punpckldq mm2,mm2 ; ULw|VLw UL|VL ZRi|ZRi agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
11991 pfmul mm2,mm6 ; ULw|VLw UL|VL UR|VR agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
11992 mov eax,offset QZCan
\r
11993 add dword ptr[Dest],32
\r
11995 add dword ptr[eax+4],32
\r
11996 mov eax,offset QDibCan
\r
11998 add dword ptr[eax+4],64
\r
12001 pfmul mm3,[QFixedScale]
\r
12005 dec [NumASpans] ; dec num affine spans
\r
12008 HandleLeftoverPixelsLit:
\r
12014 cmp [RemainingCount],0
\r
12017 mov eax,[RemainingCount]
\r
12018 mov dword ptr[ZIR],eax
\r
12019 mov dword ptr[ZIR+4],eax
\r
12021 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
12024 pfsub mm6,[UVDivZ16StepX]
\r
12025 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
12027 pfmul mm5,[QFixedScale]
\r
12030 pfmul mm7,[QFixedScale]
\r
12031 pfmul mm3,[UVDivZStepX]
\r
12037 movd mm6,[Zi16StepX]
\r
12039 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
12042 movd mm6,[ZiStepX]
\r
12043 mov ebx,[RemainingCount]
\r
12050 paddd mm5,[UVAdjust]
\r
12052 punpckldq mm4,mm4
\r
12057 pfmul mm4,[QZBufferPrec]
\r
12059 paddd mm5,[UVAdjust2]
\r
12062 movq [UVLeft],mm5
\r
12063 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
12065 pfmul mm1,qword ptr[QFixedScaleLUT+ebx] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
12068 pand mm5,[WrapMask]
\r
12069 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
12072 movq mm7,[UVAdjustL]
\r
12074 psrlq mm5,[QShiftV]
\r
12075 paddd mm7,[UVL16]
\r
12083 mov ebx,dword ptr[UVL16+4]
\r
12085 jle TryClampU1Lit
\r
12087 mov dword ptr[UVL16+4],ecx
\r
12093 mov dword ptr[UVL16+4],0
\r
12095 mov eax,dword ptr[UVL16]
\r
12097 jle TryClampV1Lit
\r
12099 mov dword ptr[UVL16],ecx
\r
12105 mov dword ptr[UVL16],0
\r
12108 mov ebx,dword ptr[UV16+4]
\r
12109 mov eax,dword ptr[UV16V]
\r
12119 mov eax,offset QDibCan
\r
12122 mov edi,dword ptr[eax+4]
\r
12123 mov ebp,dword ptr[Dest]
\r
12125 movq [UVZ],mm1 ;using this for a step temp
\r
12127 movd mm3,dword ptr[ZiStepX]
\r
12131 paddd mm0,[UVZ] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
12134 mov al,byte ptr[esi]
\r
12135 movq mm5,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
12140 pand mm5,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
12143 movq [UV16],mm5 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
12144 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
12146 psrlq mm5,[QShiftV]
\r
12150 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
12155 mov ebx,dword ptr[UV16+4]
\r
12164 mov edx,dword ptr[UV16V]
\r
12168 punpckldq mm5,mm5
\r
12171 movq mm1,[QNegAlpha]
\r
12177 paddw mm6,[RGBADelta]
\r
12179 punpcklbw mm5,[Zero]
\r
12183 cmp ax,word ptr[ebp]
\r
12198 dec [RemainingCount]
\r
12199 jge LeftoverLoopLit
\r
12210 cRet DrawSpan32_AsmGouraudZBufferAlphaARGB3DNow
\r
12211 endProc DrawSpan32_AsmGouraudZBufferAlphaARGB3DNow
\r
12215 ; non interpolated alpha at the verts
\r
12217 cProc DrawSpan32_AsmGouraudZBufferVertexAlpha3DNow, 36,<x1 : dword, x2 : dword, y : dword, r1 : dword, g1 : dword, b1 : dword, r2 : dword, g2 : dword, b2 : dword>
\r
12233 mov ebx,offset ClientWindow
\r
12235 mov edi,[ebx].Buffer
\r
12237 imul eax, [ebx].PixelPitch
\r
12245 mov eax,offset QZCan
\r
12246 mov ebx,offset SCan
\r
12249 mov eax,offset QDibCan
\r
12252 mov ebx,offset QDibCan
\r
12253 mov eax,offset QDibOrCan
\r
12256 mov ebx,offset QZCan
\r
12257 mov eax,offset QZOrCan
\r
12270 mov [NumASpans],ecx
\r
12271 mov [RemainingCount],eax
\r
12273 ;grab the left side lights
\r
12277 ; punpckldq mm5,qword ptr[Zero]
\r
12281 punpckldq mm4,mm6
\r
12290 ; mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7
\r
12291 movd mm0,x1 ; |x | | | | | | |
\r
12292 movq mm2,[UVDivZStepX] ; |x | UZdX|VZdX | | | | |
\r
12294 movd mm1,y ; |x |y UZdX|VZdX | | | | |
\r
12295 movq mm3,[UVDivZStepY] ; |x |y UZdX|VZdXUZdY|VZdY | | | |
\r
12297 punpckldq mm0,mm0 ; x|x |y UZdX|VZdXUZdY|VZdY | | | |
\r
12298 punpckldq mm1,mm1 ; x|x y|y UZdX|VZdXUZdY|VZdY | | | |
\r
12300 movd mm7,edx ; x|x y|y UZdX|VZdXUZdY|VZdY | | | |wid
\r
12301 movd mm5,b2 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b | |wid
\r
12303 pi2fd mm0,mm0 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b | |wid
\r
12304 movd mm6,b1 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b |b |wid
\r
12306 pi2fd mm7,mm7 ; x|x y|y UZdX|VZdXUZdY|VZdY | |b |b |wid
\r
12307 punpckldq mm5,qword ptr[g2] ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b |b |wid
\r
12309 pi2fd mm1,mm1 ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b |b |wid
\r
12310 punpckldq mm6,qword ptr[g1] ; x|x y|y UZdX|VZdXUZdY|VZdY | g|b g|b |wid
\r
12312 pfmul mm2,mm0 ; x|x y|y UZX|VZX UZdY|VZdY | g|b g|b |wid
\r
12313 pfrcp mm7,mm7 ; x|x y|y UZX|VZX UZdY|VZdY | g|b g|b dw|dw
\r
12315 pfsub mm5,mm6 ; x|x y|y UZX|VZX UZdY|VZdY | gd|bd g|b dw|dw
\r
12316 movd mm4,[r1] ; x|x y|y UZX|VZX UZdY|VZdY |r gd|bd g|b dw|dw
\r
12318 movd mm6,[r2] ; x|x y|y UZX|VZX UZdY|VZdY |r gd|bd g|r dw|dw
\r
12319 pfmul mm3,mm1 ; x|x y|y UZX|VZX UZY|VZY |r gd|bd g|r dw|dw
\r
12321 pfsub mm6,mm4 ; x|x y|y UZX|VZX UZY|VZY |r gd|bd x|rd dw|dw
\r
12322 pfmul mm7,[Q128] ; x|x y|y UZX|VZX UZY|VZY |r gd|bd x|rd DW|DW
\r
12324 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZY|VZY |ZdX gd|bd x|rd DW|DW
\r
12326 punpckldq mm4,mm4
\r
12327 pfmul mm4,[QZBufferPrec]
\r
12329 movq [QZDelta],mm4
\r
12331 movd mm4,[ZiStepX] ; x|x y|y UZX|VZX UZdY|VZdY |ZdX | | |
\r
12332 pfmul mm5,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZdX GD|BD x|rd DW|DW
\r
12334 pfmul mm4,mm0 ; x|x y|y UZX|VZX UZY|VZY |ZX GD|BD x|rd DW|DW
\r
12335 pfmul mm6,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZX GD|BD x|RD DW|DW
\r
12341 movq [RGBADelta],mm5
\r
12343 movd mm5,[ZiStepY] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY x|RD DW|DW
\r
12344 movq mm6,[UVDivZOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZdY UZO|VZO DW|DW
\r
12346 pfmul mm5,mm1 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO DW|DW
\r
12347 movd mm7,[ZiOrigin] ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZO|VZO |ZO
\r
12349 pfadd mm6,mm2 ; x|x y|y UZX|VZX UZY|VZY |ZX |ZY UZXS|VZXS |ZO
\r
12350 pfadd mm4,mm7 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZXS|VZXS |ZO
\r
12352 pfadd mm6,mm3 ; x|x y|y UZX|VZX UZY|VZY |ZXS |ZY UZ|VZ |ZO
\r
12353 pfadd mm4,mm5 ; x|x y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZO
\r
12356 movq mm1,[QZDelta]
\r
12357 punpckldq mm7,mm7
\r
12358 pfmul mm7,[QZBufferPrec]
\r
12362 punpckldq mm0,mm7
\r
12364 movq [QZVal32_0],mm0
\r
12367 punpckldq mm0,mm7
\r
12368 movq [QZVal32_1],mm0
\r
12370 movd mm7,[Zi16StepX]
\r
12371 movq [QZDelta],mm1
\r
12373 pfrcp mm0,mm4 ; ZL|ZL y|y UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZdX16
\r
12375 movq mm1,mm6 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |Zi |ZY UZ|VZ |ZdX16
\r
12376 pfadd mm4,mm7 ; ZL|ZL UZ|VZ UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
12378 pfmul mm1,mm0 ; ZL|ZL UL|VL UZX|VZX UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
12379 pfrcp mm2,mm4 ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZ|VZ |ZdX16
\r
12381 pfadd mm6,[UVDivZ16StepX] ; ZL|ZL UL|VL ZR|ZR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
12382 pfmul mm2,mm6 ; ZL|ZL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
12384 movq mm0,mm1 ; UL|VL UL|VL UR|VR UZY|VZY |ZRi |ZY UZR|VZR |ZdX16
\r
12385 movq mm3,mm2 ; ULi|VLi UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |ZdX16
\r
12387 pfmul mm0,[QFixedScale] ; UL6|VL6 UL|VL UR|UR UR|UR |ZRi |ZY UZR|VZR |ZdX16
\r
12388 pfmul mm3,[QFixedScale] ; UL6|VL6 UL|VL UR6|UR6 UR6|UR6 |ZRi |ZY UZR|VZR |ZdX16
\r
12390 pf2id mm0,mm0 ;UL6i|VL6i UL|VL UR6|VR6 UR6|VR6 |ZRi |ZY UZR|VZR |ZdX16
\r
12391 pf2id mm3,mm3 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR |ZdX16
\r
12395 jz HandleLeftoverPixelsLit
\r
12397 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
12398 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
12399 pfmul mm7,[QFixedScale]
\r
12400 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
12401 paddd mm7,[UVAdjustL]
\r
12405 mov ebx,dword ptr[UVL16+4]
\r
12408 jle TryClampU0Litp
\r
12410 mov dword ptr[UVL16+4],ecx
\r
12411 jmp NoClampU0Litp
\r
12415 jge NoClampU0Litp
\r
12416 mov dword ptr[UVL16+4],0
\r
12418 mov eax,dword ptr[UVL16]
\r
12420 jle TryClampV0Litp
\r
12422 mov dword ptr[UVL16],ecx
\r
12423 jmp NoClampV0Litp
\r
12427 jge NoClampV0Litp
\r
12428 mov dword ptr[UVL16],0
\r
12435 ;use float uv for lightmap uv
\r
12436 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
12439 pfmul mm7,mm2 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
12440 pfmul mm5,[QFixedScale]
\r
12442 pfmul mm7,[QFixedScale]
\r
12445 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
12446 paddd mm5,[UVAdjust]
\r
12448 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
12449 paddd mm5,[UVAdjust2]
\r
12452 movq [UVLeft],mm5
\r
12454 movd mm7,[Zi16StepX]
\r
12457 pfmul mm1,[QFixedScale16] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
12458 ;right side stuff becomes left ; ULw|VLw DU16|DV16 UL|VL URw|VRw |ZLi |ZY UZL|VZL |
\r
12459 pfadd mm4,mm7 ;ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZL|VZL |
\r
12461 pand mm5,[WrapMask]
\r
12462 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
12464 movq mm7,[UVAdjustL]
\r
12467 pfadd mm6,[UVDivZ16StepX] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
12468 paddd mm7,[UVL16]
\r
12470 psrlq mm5,[QShiftV]
\r
12477 mov ebx,dword ptr[UVL16+4]
\r
12479 jle TryClampU0Lit
\r
12481 mov dword ptr[UVL16+4],ecx
\r
12487 mov dword ptr[UVL16+4],0
\r
12489 mov eax,dword ptr[UVL16]
\r
12491 jle TryClampV0Lit
\r
12493 mov dword ptr[UVL16],ecx
\r
12499 mov dword ptr[UVL16],0
\r
12502 movq [UVLeftW],mm3
\r
12503 mov eax,dword ptr[UV16V]
\r
12504 mov ebx,dword ptr[UV16+4]
\r
12511 movq [UVLeft],mm2
\r
12515 ; movq mm3,[RGBADelta]
\r
12522 ;grab zbuffer values
\r
12523 movq mm2,[QZVal32_0]
\r
12524 movq mm7,[QZVal32_1]
\r
12536 paddd mm0,mm1 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
12538 mov al,byte ptr[esi]
\r
12539 movq mm4,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
12541 pcmpgtw mm2,[ebp]
\r
12545 pand mm4,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
12548 movq [UV16],mm4 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
12551 psrlq mm4,[QShiftV]
\r
12556 punpcklwd mm7,[Zero]
\r
12559 paddd mm7,[QZOrCan]
\r
12560 mov edx,dword ptr[UV16V]
\r
12563 mov ebx,dword ptr[UV16+4]
\r
12566 mov edi,dword ptr[QZOut]
\r
12568 punpcklwd mm7,[Zero]
\r
12571 paddd mm7,[QDibOrCan]
\r
12572 mov edi,dword ptr[edi]
\r
12574 movq [QDibOut],mm7
\r
12575 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
12578 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
12583 mov ax,word ptr[QZVal]
\r
12587 mov word ptr[edi],ax
\r
12589 mov edi,dword ptr[QDibOut]
\r
12593 mov edi,dword ptr[edi]
\r
12594 paddw mm6,[RGBADelta]
\r
12599 movq mm5,[QNegAlpha]
\r
12601 pmullw mm7,qword ptr[VertAlpha]
\r
12602 psubw mm5,qword ptr[VertAlpha]
\r
12605 mov al,byte ptr[esi]
\r
12607 punpcklbw mm3,[Zero]
\r
12611 pand mm4,[WrapMask]
\r
12614 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
12620 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
12624 psrlq mm4,[QShiftV]
\r
12626 mov ebx,dword ptr[UV16+4]
\r
12632 mov edx,dword ptr[UV16V]
\r
12634 mov edi,dword ptr[QZOut+4]
\r
12638 mov edi,dword ptr[edi]
\r
12640 mov ax,word ptr[QZVal+2]
\r
12643 movq mm5,[QNegAlpha]
\r
12644 pmullw mm7,qword ptr[VertAlpha]
\r
12646 mov word ptr[edi+2],ax
\r
12649 mov edi,dword ptr[QDibOut+4]
\r
12652 mov edi,dword ptr[edi]
\r
12653 psubw mm5,qword ptr[VertAlpha]
\r
12655 paddw mm6,[RGBADelta]
\r
12661 punpcklbw mm3,[Zero]
\r
12662 mov al,byte ptr[esi]
\r
12668 packuswb mm7,mm7 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw ARGB|ARGB UZR|VZR AR|GB
\r
12674 pand mm4,[WrapMask]
\r
12682 punpckhwd mm7,[Zero]
\r
12683 psrlq mm4,[QShiftV]
\r
12685 paddd mm7,[QZorCan]
\r
12689 mov ebx,dword ptr[UV16+4]
\r
12692 mov edi,dword ptr[QZOut]
\r
12694 punpckhwd mm7,[Zero]
\r
12695 mov edi,dword ptr[edi]
\r
12697 paddd mm7,[QDibOrCan]
\r
12700 movq [QDibOut],mm7
\r
12701 mov edx,dword ptr[UV16V]
\r
12703 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
12706 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
12707 mov ax,word ptr[QZVal+4]
\r
12711 mov word ptr[edi+4],ax
\r
12715 mov edi,dword ptr[QDibOut]
\r
12717 paddw mm6,[RGBADelta]
\r
12719 mov edi,dword ptr[edi]
\r
12721 movq mm5,[QNegAlpha]
\r
12723 pmullw mm7,qword ptr[VertAlpha]
\r
12724 psubw mm5,qword ptr[VertAlpha]
\r
12729 punpcklbw mm3,[Zero]
\r
12736 pand mm4,[WrapMask]
\r
12748 mov al,byte ptr[esi]
\r
12750 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
12751 psrlq mm4,[QShiftV]
\r
12753 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
12756 mov ebx,dword ptr[UV16+4]
\r
12758 mov edx,dword ptr[UV16V]
\r
12766 paddw mm6,[RGBADelta]
\r
12768 mov edi,dword ptr[QZOut+4]
\r
12771 movq mm5,[QNegAlpha]
\r
12774 pmullw mm7,qword ptr[VertAlpha]
\r
12777 mov edi,dword ptr[edi]
\r
12778 psubw mm5,qword ptr[VertAlpha]
\r
12780 mov ax,word ptr[QZVal+6]
\r
12783 mov word ptr[edi+6],ax
\r
12784 mov edi,dword ptr[QDibOut+4]
\r
12787 mov edi,dword ptr[edi]
\r
12789 mov al,byte ptr[esi]
\r
12790 movq mm3,[edi+12]
\r
12793 punpcklbw mm3,[Zero]
\r
12796 movq mm2,[QZVal32_0]
\r
12799 paddd mm2,[QZDelta]
\r
12802 movq mm7,[QZVal32_1]
\r
12805 paddd mm7,[QZDelta]
\r
12808 movq [QZVal32_0],mm2
\r
12809 movq [QZVal32_1],mm7
\r
12811 movd [edi+12],mm3
\r
12817 pand mm4,[WrapMask]
\r
12825 pcmpgtw mm2,[ebp+8]
\r
12826 psrlq mm4,[QShiftV]
\r
12834 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
12837 mov ebx,dword ptr[UV16+4]
\r
12839 punpcklwd mm4,[Zero]
\r
12840 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
12843 paddd mm4,[QZorCan]
\r
12851 mov edi,dword ptr[QZOut]
\r
12854 mov edi,dword ptr[edi]
\r
12856 mov ax,word ptr[QZVal]
\r
12858 mov word ptr[edi+8],ax
\r
12861 paddw mm6,[RGBADelta]
\r
12862 punpcklwd mm4,[Zero]
\r
12864 paddd mm4,[QDibOrCan]
\r
12865 pmullw mm7,qword ptr[VertAlpha]
\r
12867 movq mm5,[QNegAlpha]
\r
12870 movq [QDibOut],mm4
\r
12871 psubw mm5,qword ptr[VertAlpha]
\r
12873 mov edi,dword ptr[QDibOut]
\r
12874 mov edx,dword ptr[UV16V]
\r
12876 mov edi,dword ptr[edi]
\r
12879 movq mm3,[edi+16]
\r
12882 punpcklbw mm3,[Zero]
\r
12885 pand mm4,[WrapMask]
\r
12897 mov al,byte ptr[esi]
\r
12900 movd [edi+16],mm3
\r
12902 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
12903 psrlq mm4,[QShiftV]
\r
12905 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
12908 mov ebx,dword ptr[UV16+4]
\r
12910 mov edx,dword ptr[UV16V]
\r
12912 mov edi,dword ptr[QZOut+4]
\r
12915 mov edi,dword ptr[edi]
\r
12917 mov ax,word ptr[QZVal+2]
\r
12920 mov word ptr[edi+10],ax
\r
12924 mov edi,dword ptr[QDibOut+4]
\r
12928 mov edi,dword ptr[edi]
\r
12930 movq mm5,[QNegAlpha]
\r
12931 pmullw mm7,qword ptr[VertAlpha]
\r
12933 psubw mm5,qword ptr[VertAlpha]
\r
12934 movq mm3,[edi+20]
\r
12936 punpcklbw mm3,[Zero]
\r
12937 paddw mm6,[RGBADelta]
\r
12950 pand mm4,[WrapMask]
\r
12951 movd [edi+20],mm3
\r
12956 psrlq mm4,[QShiftV]
\r
12960 mov al,byte ptr[esi]
\r
12966 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
12968 punpckhwd mm4,[Zero]
\r
12970 mov ebx,dword ptr[UV16+4]
\r
12971 paddd mm4,[QZorCan]
\r
12973 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
12978 mov edi,dword ptr[QZOut]
\r
12979 punpckhwd mm4,[Zero]
\r
12981 mov edi,dword ptr[edi]
\r
12982 mov ax,word ptr[QZVal+4]
\r
12984 mov word ptr[edi+12],ax
\r
12989 paddd mm4,[QDibOrCan]
\r
12991 paddw mm6,[RGBADelta]
\r
12992 movq [QDibOut],mm4
\r
12996 mov edi,dword ptr[QDibOut]
\r
12997 movq mm5,[QNegAlpha]
\r
12999 pmullw mm7,qword ptr[VertAlpha]
\r
13000 mov edi,dword ptr[edi]
\r
13002 psubw mm5,qword ptr[VertAlpha]
\r
13003 movq mm3,[edi+24]
\r
13006 punpcklbw mm3,[Zero]
\r
13009 mov edx,dword ptr[UV16V]
\r
13020 pand mm4,[WrapMask]
\r
13021 movd [edi+24],mm3
\r
13024 mov edi,dword ptr[QZOut+4]
\r
13027 mov edi,dword ptr[edi]
\r
13030 mov al,byte ptr[esi]
\r
13033 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
13034 psrlq mm4,[QShiftV]
\r
13038 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
13041 mov ax,word ptr[QZVal+6]
\r
13043 mov ebx,dword ptr[UV16+4]
\r
13044 mov word ptr[edi+14],ax
\r
13046 mov edx,dword ptr[UV16V]
\r
13048 mov edi,dword ptr[QDibOut+4]
\r
13055 paddw mm6,[RGBADelta]
\r
13057 movq mm5,[QNegAlpha]
\r
13058 pmullw mm7,qword ptr[VertAlpha]
\r
13060 mov edi,dword ptr[edi]
\r
13061 psubw mm5,qword ptr[VertAlpha]
\r
13065 movq mm3,[edi+28]
\r
13068 punpcklbw mm3,[Zero]
\r
13078 movq mm2,[QZVal32_0]
\r
13081 movq mm4,[QZVal32_1]
\r
13084 paddd mm2,[QZDelta]
\r
13085 paddd mm4,[QZDelta]
\r
13087 movq [QZVal32_0],mm2
\r
13088 movq [QZVal32_1],mm4
\r
13101 mov al,byte ptr[esi]
\r
13104 pand mm4,[WrapMask]
\r
13105 pcmpgtw mm2,[ebp+16]
\r
13107 movd [edi+28],mm3
\r
13113 psrlq mm4,[QShiftV]
\r
13119 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
13120 punpcklwd mm4,[Zero]
\r
13122 mov ebx,dword ptr[UV16+4]
\r
13123 paddd mm4,[QZorCan]
\r
13125 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
13128 mov edi,dword ptr[QZOut]
\r
13131 mov edi,dword ptr[edi]
\r
13137 mov ax,word ptr[QZVal]
\r
13139 punpcklwd mm4,[Zero]
\r
13140 mov word ptr[edi+16],ax
\r
13142 paddw mm6,[RGBADelta]
\r
13144 paddd mm4,[QDibOrCan]
\r
13147 movq [QDibOut],mm4
\r
13148 movq mm5,[QNegAlpha]
\r
13150 mov edi,dword ptr[QDibOut]
\r
13151 pmullw mm7,qword ptr[VertAlpha]
\r
13153 mov edi,dword ptr[edi]
\r
13154 psubw mm5,qword ptr[VertAlpha]
\r
13155 mov edx,dword ptr[UV16V]
\r
13157 movq mm3,[edi+32]
\r
13160 punpcklbw mm3,[Zero]
\r
13175 pand mm4,[WrapMask]
\r
13176 movd [edi+32],mm3
\r
13179 mov edi,dword ptr[QZOut+4]
\r
13181 mov al,byte ptr[esi]
\r
13184 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
13185 psrlq mm4,[QShiftV]
\r
13187 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
13190 mov ebx,dword ptr[UV16+4]
\r
13192 mov edx,dword ptr[UV16V]
\r
13194 mov edi,dword ptr[edi]
\r
13197 mov ax,word ptr[QZVal+2]
\r
13199 mov word ptr[edi+18],ax
\r
13202 mov edi,dword ptr[QDibOut+4]
\r
13204 paddw mm6,[RGBADelta]
\r
13206 mov edi,dword ptr[edi]
\r
13209 movq mm5,[QNegAlpha]
\r
13211 pmullw mm7,qword ptr[VertAlpha]
\r
13212 psubw mm5,qword ptr[VertAlpha]
\r
13214 movq mm3,[edi+36]
\r
13217 punpcklbw mm3,[Zero]
\r
13227 pand mm4,[WrapMask]
\r
13233 psrlq mm4,[QShiftV]
\r
13238 mov al,byte ptr[esi]
\r
13241 movd [edi+36],mm3
\r
13245 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
13247 punpckhwd mm4,[Zero]
\r
13249 mov ebx,dword ptr[UV16+4]
\r
13250 paddd mm4,[QZorCan]
\r
13252 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
13257 mov edi,dword ptr[QZOut]
\r
13259 mov ax,word ptr[QZVal+4]
\r
13262 mov edi,dword ptr[edi]
\r
13264 punpckhwd mm4,[Zero]
\r
13265 mov word ptr[edi+20],ax
\r
13268 paddd mm4,[QDibOrCan]
\r
13270 movq [QDibOut],mm4
\r
13272 mov edi,dword ptr[QDibOut]
\r
13273 paddw mm6,[RGBADelta]
\r
13276 mov edi,dword ptr[edi]
\r
13279 movq mm5,[QNegAlpha]
\r
13280 pmullw mm7,qword ptr[VertAlpha]
\r
13282 mov edx,dword ptr[UV16V]
\r
13283 psubw mm5,qword ptr[VertAlpha]
\r
13285 movq mm3,[edi+40]
\r
13288 punpcklbw mm3,[Zero]
\r
13303 movd [edi+40],mm3
\r
13306 pand mm4,[WrapMask]
\r
13307 mov al,byte ptr[esi]
\r
13311 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
13312 psrlq mm4,[QShiftV]
\r
13314 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
13318 mov ebx,dword ptr[UV16+4]
\r
13320 mov edx,dword ptr[UV16V]
\r
13322 mov edi,dword ptr[QZOut+4]
\r
13325 mov edi,dword ptr[edi]
\r
13327 mov ax,word ptr[QZVal+6]
\r
13330 mov word ptr[edi+22],ax
\r
13332 mov edi,dword ptr[QDibOut+4]
\r
13335 paddw mm6,[RGBADelta]
\r
13337 movq mm5,[QNegAlpha]
\r
13338 mov edi,dword ptr[edi]
\r
13340 pmullw mm7,qword ptr[VertAlpha]
\r
13341 psubw mm5,qword ptr[VertAlpha]
\r
13343 movq mm3,[edi+44]
\r
13346 punpcklbw mm3,[Zero]
\r
13356 movq mm2,[QZVal32_0]
\r
13359 movq mm4,[QZVal32_1]
\r
13362 paddd mm2,[QZDelta]
\r
13363 paddd mm4,[QZDelta]
\r
13365 movq [QZVal32_0],mm2
\r
13366 movq [QZVal32_1],mm4
\r
13379 mov al,byte ptr[esi]
\r
13382 pand mm4,[WrapMask]
\r
13383 pcmpgtw mm2,[ebp+24]
\r
13385 movd [edi+44],mm3
\r
13391 psrlq mm4,[QShiftV]
\r
13399 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
13400 punpcklwd mm4,[Zero]
\r
13402 mov ebx,dword ptr[UV16+4]
\r
13403 paddd mm4,[QZorCan]
\r
13405 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
13409 mov edi,dword ptr[QZOut]
\r
13412 mov ax,word ptr[QZVal]
\r
13414 mov edi,dword ptr[edi]
\r
13417 mov word ptr[edi+24],ax
\r
13419 punpcklwd mm4,[Zero]
\r
13422 paddd mm4,[QDibOrCan]
\r
13424 movq [QDibOut],mm4
\r
13426 paddw mm6,[RGBADelta]
\r
13428 mov edi,dword ptr[QDibOut]
\r
13431 movq mm5,[QNegAlpha]
\r
13432 pmullw mm7,qword ptr[VertAlpha]
\r
13434 mov edi,dword ptr[edi]
\r
13435 psubw mm5,qword ptr[VertAlpha]
\r
13437 movq mm3,[edi+48]
\r
13440 punpcklbw mm3,[Zero]
\r
13446 mov edx,dword ptr[UV16V]
\r
13449 pand mm4,[WrapMask]
\r
13458 movd [edi+48],mm3
\r
13462 mov al,byte ptr[esi]
\r
13464 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
13465 psrlq mm4,[QShiftV]
\r
13467 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
13471 mov ebx,dword ptr[UV16+4]
\r
13473 mov edx,dword ptr[UV16V]
\r
13475 mov edi,dword ptr[QZOut+4]
\r
13478 mov ax,word ptr[QZVal+2]
\r
13480 mov edi,dword ptr[edi]
\r
13483 mov word ptr[edi+26],ax
\r
13485 mov edi,dword ptr[QDibOut+4]
\r
13488 paddw mm6,[RGBADelta]
\r
13490 movq mm5,[QNegAlpha]
\r
13491 pmullw mm7,qword ptr[VertAlpha]
\r
13493 mov edi,dword ptr[edi]
\r
13494 psubw mm5,qword ptr[VertAlpha]
\r
13496 movq mm3,[edi+52]
\r
13499 punpcklbw mm3,[Zero]
\r
13508 pand mm4,[WrapMask]
\r
13514 psrlq mm4,[QShiftV]
\r
13521 movd [edi+52],mm3
\r
13522 mov al,byte ptr[esi]
\r
13527 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |ARGB
\r
13529 punpckhwd mm4,[Zero]
\r
13531 mov ebx,dword ptr[UV16+4]
\r
13532 paddd mm4,[QZorCan]
\r
13534 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
13537 mov edi,dword ptr[QZOut]
\r
13540 mov ax,word ptr[QZVal+4]
\r
13542 mov edi,dword ptr[edi]
\r
13545 mov word ptr[edi+28],ax
\r
13549 punpckhwd mm4,[Zero]
\r
13552 paddd mm4,[QDibOrCan]
\r
13554 paddw mm6,[RGBADelta]
\r
13556 movq mm5,[QNegAlpha]
\r
13557 pmullw mm7,qword ptr[VertAlpha]
\r
13559 movq [QDibOut],mm4
\r
13560 psubw mm5,qword ptr[VertAlpha]
\r
13562 mov edi,dword ptr[QDibOut]
\r
13565 mov edi,dword ptr[edi]
\r
13568 movq mm3,[edi+56]
\r
13571 punpcklbw mm3,[Zero]
\r
13573 mov edx,dword ptr[UV16V]
\r
13576 pand mm4,[WrapMask]
\r
13586 mov al,byte ptr[esi]
\r
13588 movd [edi+56],mm3
\r
13590 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ARGB UZR|VZR AR|GB
\r
13593 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw AR|GB UZR|VZR AR|GB
\r
13594 mov edi,dword ptr[QZOut+4]
\r
13596 mov ax,word ptr[QZVal+6]
\r
13598 mov edi,dword ptr[edi]
\r
13601 mov word ptr[edi+30],ax
\r
13603 mov edi,dword ptr[QDibOut+4]
\r
13605 paddw mm6,[RGBADelta]
\r
13607 mov edi,dword ptr[edi]
\r
13614 movq mm5,[QNegAlpha]
\r
13616 pmullw mm7,qword ptr[VertAlpha]
\r
13617 psubw mm5,qword ptr[VertAlpha]
\r
13620 movq mm2,[QZVal32_0]
\r
13621 movq mm3,[edi+60]
\r
13623 movq mm4,[QZVal32_1]
\r
13624 punpcklbw mm3,[Zero]
\r
13626 paddd mm2,[QZDelta]
\r
13629 paddd mm4,[QZDelta]
\r
13632 movq [QZVal32_0],mm2
\r
13635 movq [QZVal32_1],mm4
\r
13638 movd [edi+60],mm3
\r
13640 ; get corrected right side deltas ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
13646 movq mm0,[UVLeftW] ; ULw|VLw DU16|DV16 argb|ARGBagAG|rbRB |ZRi aA|rR UZR|VZR aArR|gGbB
\r
13648 movq mm1,[UVLeft] ; ULw|VLw UL|VL argb|ARGBagAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
13649 punpckldq mm2,mm2 ; ULw|VLw UL|VL ZRi|ZRi agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
13651 pfmul mm2,mm6 ; ULw|VLw UL|VL UR|VR agAG|rbRB ZRi|ZRi aA|rR UZR|VZR aArR|gGbB
\r
13652 mov eax,offset QZCan
\r
13653 add dword ptr[Dest],32
\r
13655 add dword ptr[eax+4],32
\r
13656 mov eax,offset QDibCan
\r
13658 add dword ptr[eax+4],64
\r
13661 pfmul mm3,[QFixedScale]
\r
13665 dec [NumASpans] ; dec num affine spans
\r
13668 HandleLeftoverPixelsLit:
\r
13674 cmp [RemainingCount],0
\r
13677 mov eax,[RemainingCount]
\r
13678 mov dword ptr[ZIR],eax
\r
13679 mov dword ptr[ZIR+4],eax
\r
13681 movq mm7,[GLMapMulUV] ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR LU|LV
\r
13684 pfsub mm6,[UVDivZ16StepX]
\r
13685 pfmul mm7,mm1 ;UL6i|VL6i UL|VL UR6i|VR6i URi|VRi |ZRi |ZY UZR|VZR ULL|VLL
\r
13687 pfmul mm5,[QFixedScale]
\r
13690 pfmul mm7,[QFixedScale]
\r
13691 pfmul mm3,[UVDivZStepX]
\r
13697 movd mm6,[Zi16StepX]
\r
13699 pf2id mm7,mm7 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
13702 movd mm6,[ZiStepX]
\r
13703 mov ebx,[RemainingCount]
\r
13710 paddd mm5,[UVAdjust]
\r
13712 punpckldq mm4,mm4
\r
13717 pfmul mm4,[QZBufferPrec]
\r
13719 paddd mm5,[UVAdjust2]
\r
13722 movq [UVLeft],mm5
\r
13723 pfsubr mm1,mm2 ; ULw|VLw DU|DV UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
13725 pfmul mm1,qword ptr[QFixedScaleLUT+ebx] ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
13728 pand mm5,[WrapMask]
\r
13729 pf2id mm1,mm1 ; ULw|VLw DU16|DV16 UR|VR URw|VRw |ZRi |ZY UZR|VZR |
\r
13732 movq mm7,[UVAdjustL]
\r
13734 psrlq mm5,[QShiftV]
\r
13735 paddd mm7,[UVL16]
\r
13743 mov ebx,dword ptr[UVL16+4]
\r
13745 jle TryClampU1Lit
\r
13747 mov dword ptr[UVL16+4],ecx
\r
13753 mov dword ptr[UVL16+4],0
\r
13755 mov eax,dword ptr[UVL16]
\r
13757 jle TryClampV1Lit
\r
13759 mov dword ptr[UVL16],ecx
\r
13765 mov dword ptr[UVL16],0
\r
13768 mov ebx,dword ptr[UV16+4]
\r
13769 mov eax,dword ptr[UV16V]
\r
13779 mov eax,offset QDibCan
\r
13782 mov edi,dword ptr[eax+4]
\r
13783 mov ebp,dword ptr[Dest]
\r
13785 movq [UVZ],mm1 ;using this for a step temp
\r
13787 movd mm3,dword ptr[ZiStepX]
\r
13791 paddd mm0,[UVZ] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw |ZRi |ZY UZR|VZR |
\r
13794 mov al,byte ptr[esi]
\r
13795 movq mm5,mm0 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw UL|VL |ZY UZR|VZR |
\r
13800 pand mm5,[WrapMask] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
13803 movq [UV16],mm5 ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |
\r
13804 movd mm7,[ecx+eax*4] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR |ARGB
\r
13806 psrlq mm5,[QShiftV]
\r
13810 punpcklbw mm7,qword ptr[Zero] ; ULw|VLw DU16|DV16 UL|VL ULw|VLw ULw|VLw |ZY UZR|VZR AR|GB
\r
13813 mov ebx,dword ptr[UV16+4]
\r
13819 mov edx,dword ptr[UV16V]
\r
13824 movq mm1,[QNegAlpha]
\r
13826 pmullw mm7,qword ptr[VertAlpha]
\r
13827 psubw mm1,qword ptr[VertAlpha]
\r
13830 paddw mm6,[RGBADelta]
\r
13832 punpcklbw mm5,[Zero]
\r
13836 cmp ax,word ptr[ebp]
\r
13851 dec [RemainingCount]
\r
13852 jge LeftoverLoopLit
\r
13863 cRet DrawSpan32_AsmGouraudZBufferVertexAlpha3DNow
\r
13864 endProc DrawSpan32_AsmGouraudZBufferVertexAlpha3DNow
\r
13867 ;put the machine into 3dnow mode
\r
13869 cProc Femms3DNow, 0,<>
\r
13874 endProc Femms3DNow
\r
13877 ;edge step for 3dnow
\r
13879 cProc StepWorld3DNow, 4,<edge : dword>
\r
13884 movq mm0,qword ptr[eax+12]
\r
13885 movq mm1,qword ptr[eax+20]
\r
13887 pfadd mm0,qword ptr[eax+28]
\r
13888 pfadd mm1,qword ptr[eax+36]
\r
13890 movq qword ptr[eax+12],mm0
\r
13893 movq qword ptr[eax+20],mm1
\r
13896 mov edx,dword ptr[eax+4]
\r
13897 movq qword ptr[eax+52],mm1
\r
13900 movd dword ptr[eax],mm0
\r
13902 mov dword ptr[eax+4],edx
\r
13905 mov edx,dword ptr[eax+8]
\r
13906 movd dword ptr[eax+48],mm0
\r
13909 mov dword ptr[eax+8],edx
\r
13913 cRet StepWorld3DNow
\r
13914 endProc StepWorld3DNow
\r