|
|
|
|
|
|
|
|
DEF PROC_asm LOCAL C%, I%, P% DIM C% 511 FOR I% = 0 TO 2 STEP 2 P% = C% [OPT I% ; // ALIGN ] : P% = (P% + 31) AND -32 : [OPT I% .supersample% ; Parameters: ; ; pBm%, pBm2%, bmW%, bmH%, smpSz% ; pushad mov ebp, esp sub esp, 128 ; ----------------------------------------------------------------------------------------------- ; EBP!36 = pBm% ; EBP!40 = pBm2% ; EBP!44 = bmW% ; EBP!48 = bmH% ; EBP!52 = smpSz% ; ----------------------------------------------------------------------------------------------- ; ; calc. smpSzSq% = smpSz%^2 ; mov eax, [ebp + 52] imul eax, eax mov [esp + 0], eax ; ESP!0 = smpSz%^2 = smpSzSq% ; ; Set FPU rounding mode to ''Truncate'' ; finit xor eax, eax mov DWORD [esp + 8], &00000000 fstcw [esp + 8] mov ax, [esp + 8] and ax, &F3FF or ax, &C00 mov [esp + 8], ax fldcw [esp + 8] ; ; calc. S% = 65536 * (1.0 / smpSzSq%) ; push 65536 fild DWORD [esp] ; st0 = 65536 fld1 ; st0 = 1.0, st1 = 65536 fidiv DWORD [esp + (0 +4)] ; st0 = 1.0 / smpSzSq%, st1 = 65536 fmul ; st0 = 65536 * (1.0 / smpSzSq%) fistp DWORD [esp + (4 +4)] ; add esp, 4 ; ESP!4 = S% ; ; Calc. bm2W% and bm2H% ; ; Where bm2W% = smpSz% * bmW% ; bm2H% = smpSz% * bmH% ; ; mov eax, [ebp + 44] ; bmW% mov ebx, [ebp + 48] ; bmH% imul eax, [ebp + 52] ; bmW% * smpSz% = bm2W% imul ebx, [ebp + 52] ; bmH% * smpSz% = bm2H% mov [esp + 12], eax ; ESP!12 = bm2W% mov [esp + 16], ebx ; ESP!16 = bm2H% ; ; Calc. rowBytesLen% = 4 * bm2W% ; shl eax, 2 ; = 4 * bm2W% mov [esp + 20], eax ; ESP!20 = rowBytesLen% ; ; Calc. bmW%-1 and bmH%-1 ; mov eax, [ebp + 44] ; bmW% mov ebx, [ebp + 48] ; bmH% sub eax, 1 ; bmW% - 1 sub ebx, 1 ; bmH% - 1 mov [esp + 24], eax ; ESP!24 = bmW%-1 mov [esp + 28], ebx ; ESP!28 = bmH%-1 ; ; Calc. rowBytesLen%*smpSz% ; mov eax, [esp + 20] ; rowBytesLen% imul eax, [ebp + 52] ; rowBytesLen% * smpSz% mov [esp + 32], eax ; ESP!32 = rowBytesLen% * smpSz% ; ; Calc. 4*smpSz% ; mov eax, [ebp + 52] ; smpSz% shl eax, 2 ; 4*smpSz% mov [esp + 36], eax ; ESP!36 = 4*smpSz% ; ; So far, we have: ; ; ESP!0 = smpSzSq% ; ESP!4 = S% (= 65536 * 1.0/SmpSzSq%) ; ESP!8 = ... ; ESP!12 = bm2W% ; ESP!16 = bm2H% ; ESP!20 = rowBytesLen% ; ESP!24 = bmW% - 1 ; ESP!28 = bmH% - 1 ; ESP!32 = rowBytesLen% * smpSz% ; ESP!36 = 4 * smpSz% ; mov eax, [ebp + 36] ; EAX = pBm mov ebx, [ebp + 40] ; EBX = pBm2 mov DWORD [esp + 44], 0 ; ESP!44 = Y-loop control variable (Y%) .supersample_yLoop% ; Y-loop control var goes from 0 to bmH%-1 mov DWORD [esp + 40], 0 ; ESP!40 = X-loop control variable (X%) .supersample_xLoop% ; X-loop control var goes from 0 to bmW%-1 ; ; Calc. O% = (rowBytesLen% * smpSz% * Y%) + (4 * smpSz% * X%) ; mov edi, [esp + 32] ; EDI = rowBytesLen% * smpSz% mov esi, [esp + 36] ; ESI = 4 * smpSz% imul edi, [esp + 44] ; EDI = rowBytesLen% * smpSz% * Y% imul esi, [esp + 40] ; ESI = 4 * smpSz% * X% add edi, esi ; EDI = (rowBytesLen% * smpSz% * Y%) + (4 * smpSz% * X%) = O% ; ; ESP!48 = red sum (rSum%) ; ESP!52 = green sum (gSum%) ; ESP!56 = blue sum (bSum% ) ; mov DWORD [esp + 48], 0 ; init. rSum% = 0 mov DWORD [esp + 52], 0 ; init. gSum% = 0 mov DWORD [esp + 56], 0 ; init. bSum% = 0 xor esi, esi ; inner Y-loop counter (y%) (goes from 0 to smpSz%-1) .supersample_innerYloop% push esi ; preserve ESI (inner Y-loop counter) xor esi, esi ; inner X-loop counter (x%) (goes from 0 to smpSz%-1) .supersample_innerXloop% ; EAX = pBm ; EBX = pBm2 ; EDI = O% ; ESI = x% ; ; Calc. O2% = O% + 4*x% ; push esi ; preserve ESI (x%) shl esi, 2 ; 4 * x% add esi, edi ; O% + 4*x% = O2% movzx ecx, BYTE [ebx + esi + 0] ; load blue byte (blueVal) add [esp + (56 +8)], ecx ; bSum% += blueVal movzx ecx, BYTE [ebx + esi + 1] ; load green byte (greenVal) add [esp + (52 +8)], ecx ; gSum% += greenVal movzx ecx, BYTE [ebx + esi + 2] ; load red byte (redVal) add [esp + (48 +8)], ecx ; rSum% += redVal pop esi add esi, 1 ; x% += 1 cmp esi, [ebp + 52] ; x% > smpSz%-1 ? jl supersample_innerXloop% add edi, [esp + (20 +4)] ; O% += rowBytesLen% pop esi ; ESI = y% add esi, 1 ; y% += 1 cmp esi, [ebp + 52] ; y% > smpSz%-1 ? jl supersample_innerYloop% ; ; Calc. 4*(Y%*bmW% + X%) ; mov esi, [esp + 44] ; Y% imul esi, [ebp + 44] ; Y%*bmW% add esi, [esp + 40] ; Y%*bmW% + X% shl esi, 2 ; 4*(bmW% + X%) ; ; Write averaged red, green, blue values to bm1 ; mov edx, [esp + 4] ; EDX = S% (= 65536 * 1.0/SmpSzSq%) mov ecx, [esp + 48] ; ECX = rSum% imul ecx, edx shr ecx, 16 mov [eax + esi + 2], cl mov ecx, [esp + 52] ; ECX = gSum% imul ecx, edx shr ecx, 16 mov [eax + esi + 1], cl mov ecx, [esp + 56] ; ECX = bSum% imul ecx, edx shr ecx, 16 mov [eax + esi + 0], cl mov edx, [ebp + 44] ; EDX = bmW% add DWORD [esp + 40], 1 ; X += 1 cmp DWORD [esp + 40], edx ; X < bmW% ? jl near supersample_xLoop% mov edx, [ebp + 48] ; EDX = bmH% add DWORD [esp + 44], 1 ; Y += 1 cmp DWORD [esp + 44], edx ; Y < bmH% ? jl near supersample_yLoop% add esp, 128 popad ret 20 ] NEXT I% ENDPROC