Author: tkreuzer Date: Fri Jan 28 20:35:22 2011 New Revision: 50545
URL: http://svn.reactos.org/svn/reactos?rev=50545&view=rev Log: [IP] Convert checksum.S to new ML compatible syntax. Resulting obj was compared and is identical to trunk (both GAS and ML)
Modified: branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S
Modified: branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S URL: http://svn.reactos.org/svn/reactos/branches/cmake-bringup/lib/drivers/ip/net... ============================================================================== --- branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S [iso-8859-1] (original) +++ branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S [iso-8859-1] Fri Jan 28 20:35:22 2011 @@ -24,109 +24,111 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - + /* * computes a partial checksum, e.g. for TCP/UDP fragments */
-/* +/* unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) */ - -.text + +#include <asm.inc> + +.code .align 4 -.globl _csum_partial - +PUBLIC _csum_partial + #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
- /* + /* * Experiments with Ethernet and SLIP connections show that buff * is aligned on either a 2-byte or 4-byte boundary. We get at * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. * Fortunately, it is easy to convert 2-byte alignment to 4-byte * alignment for the unrolled loop. - */ -_csum_partial: - pushl %esi - pushl %ebx - movl 20(%esp),%eax # Function arg: unsigned int sum - movl 16(%esp),%ecx # Function arg: int len - movl 12(%esp),%esi # Function arg: unsigned char *buff - testl $3, %esi # Check alignment. - jz 2f # Jump if alignment is ok. - testl $1, %esi # Check alignment. - jz 10f # Jump if alignment is boundary of 2bytes. + */ +_csum_partial: + push esi + push ebx + mov eax, [esp + 20] // Function arg: unsigned int sum + mov ecx, [esp + 16] // Function arg: int len + mov esi, [esp + 12] // Function arg: unsigned char *buff + test esi, 3 // Check alignment. + jz m2 // Jump if alignment is ok. + test esi, 1 // Check alignment. + jz l10 // Jump if alignment is boundary of 2bytes.
// buf is odd - dec %ecx - jl 8f - movzbl (%esi), %ebx - adcl %ebx, %eax - roll $8, %eax - inc %esi - testl $2, %esi - jz 2f -10: - subl $2, %ecx # Alignment uses up two bytes. - jae 1f # Jump if we had at least two bytes. - addl $2, %ecx # ecx was < 2. Deal with it. - jmp 4f -1: movw (%esi), %bx - addl $2, %esi - addw %bx, %ax - adcl $0, %eax -2: - movl %ecx, %edx - shrl $5, %ecx - jz 2f - testl %esi, %esi -1: movl (%esi), %ebx - adcl %ebx, %eax - movl 4(%esi), %ebx - adcl %ebx, %eax - movl 8(%esi), %ebx - adcl %ebx, %eax - movl 12(%esi), %ebx - adcl %ebx, %eax - movl 16(%esi), %ebx - adcl %ebx, %eax - movl 20(%esi), %ebx - adcl %ebx, %eax - movl 24(%esi), %ebx - adcl %ebx, %eax - movl 28(%esi), %ebx - adcl %ebx, %eax - lea 32(%esi), %esi - dec %ecx - jne 1b - adcl $0, %eax -2: movl %edx, %ecx - andl $0x1c, %edx - je 4f - shrl $2, %edx # This clears CF -3: adcl (%esi), %eax - lea 4(%esi), %esi - dec %edx - jne 3b - adcl $0, %eax -4: andl $3, %ecx - jz 7f - cmpl $2, %ecx - jb 5f - movw (%esi),%cx - leal 2(%esi),%esi - je 6f - shll $16,%ecx -5: movb (%esi),%cl -6: addl %ecx,%eax - adcl $0, %eax -7: - testl $1, 12(%esp) - jz 8f - roll $8, %eax -8: - popl %ebx - popl %esi + dec ecx + jl l8 + movzx ebx, byte ptr [esi] + adc eax, ebx + rol eax, 8 + inc esi + test esi, 2 + jz m2 +l10: + sub ecx, 2 // Alignment uses up two bytes. + jae m1 // Jump if we had at least two bytes. + add ecx, 2 // ecx was < 2. Deal with it. + jmp l4 +m1: mov bx, [esi] + add esi, 2 + add ax, bx + adc eax, 0 +m2: + mov edx, ecx + shr ecx, 5 + jz l2 + test esi, esi +l1: mov ebx, [esi] + adc eax, ebx + mov ebx, [esi + 4] + adc eax, ebx + mov ebx, [esi + 8] + adc eax, ebx + mov ebx, [esi + 12] + adc eax, ebx + mov ebx, [esi + 16] + adc eax, ebx + mov ebx, [esi + 20] + adc eax, ebx + mov ebx, [esi + 24] + adc eax, ebx + mov ebx, [esi + 28] + adc eax, ebx + lea esi, [esi + 32] + dec ecx + jne l1 + adc eax, 0 +l2: mov ecx, edx + and edx, HEX(1c) + je l4 + shr edx, 2 // This clears CF +l3: adc eax, [esi] + lea esi, [esi + 4] + dec edx + jne l3 + adc eax, 0 +l4: and ecx, 3 + jz l7 + cmp ecx, 2 + jb l5 + mov cx, [esi] + lea esi, [esi + 2] + je l6 + shl ecx, 16 +l5: mov cl, [esi] +l6: add eax, ecx + adc eax, 0 +l7: + test dword ptr [esp + 12], 1 + jz l8 + rol eax, 8 +l8: + pop ebx + pop esi ret
#else @@ -134,116 +136,118 @@ /* Version for PentiumII/PPro */
csum_partial: - pushl %esi - pushl %ebx - movl 20(%esp),%eax # Function arg: unsigned int sum - movl 16(%esp),%ecx # Function arg: int len - movl 12(%esp),%esi # Function arg: const unsigned char *buf - - testl $3, %esi - jnz 25f -10: - movl %ecx, %edx - movl %ecx, %ebx - andl $0x7c, %ebx - shrl $7, %ecx - addl %ebx,%esi - shrl $2, %ebx - negl %ebx - lea 45f(%ebx,%ebx,2), %ebx - testl %esi, %esi - jmp *%ebx - - # Handle 2-byte-aligned regions -20: addw (%esi), %ax - lea 2(%esi), %esi - adcl $0, %eax - jmp 10b -25: - testl $1, %esi - jz 30f - # buf is odd - dec %ecx - jl 90f - movzbl (%esi), %ebx - addl %ebx, %eax - adcl $0, %eax - roll $8, %eax - inc %esi - testl $2, %esi - jz 10b - -30: subl $2, %ecx - ja 20b - je 32f - addl $2, %ecx - jz 80f - movzbl (%esi),%ebx # csumming 1 byte, 2-aligned - addl %ebx, %eax - adcl $0, %eax - jmp 80f -32: - addw (%esi), %ax # csumming 2 bytes, 2-aligned - adcl $0, %eax - jmp 80f - -40: - addl -128(%esi), %eax - adcl -124(%esi), %eax - adcl -120(%esi), %eax - adcl -116(%esi), %eax - adcl -112(%esi), %eax - adcl -108(%esi), %eax - adcl -104(%esi), %eax - adcl -100(%esi), %eax - adcl -96(%esi), %eax - adcl -92(%esi), %eax - adcl -88(%esi), %eax - adcl -84(%esi), %eax - adcl -80(%esi), %eax - adcl -76(%esi), %eax - adcl -72(%esi), %eax - adcl -68(%esi), %eax - adcl -64(%esi), %eax - adcl -60(%esi), %eax - adcl -56(%esi), %eax - adcl -52(%esi), %eax - adcl -48(%esi), %eax - adcl -44(%esi), %eax - adcl -40(%esi), %eax - adcl -36(%esi), %eax - adcl -32(%esi), %eax - adcl -28(%esi), %eax - adcl -24(%esi), %eax - adcl -20(%esi), %eax - adcl -16(%esi), %eax - adcl -12(%esi), %eax - adcl -8(%esi), %eax - adcl -4(%esi), %eax -45: - lea 128(%esi), %esi - adcl $0, %eax - dec %ecx - jge 40b - movl %edx, %ecx -50: andl $3, %ecx - jz 80f - - # Handle the last 1-3 bytes without jumping - notl %ecx # 1->2, 2->1, 3->0, higher bits are masked - movl $0xffffff,%ebx # by the shll and shrl instructions - shll $3,%ecx - shrl %cl,%ebx - andl -128(%esi),%ebx # esi is 4-aligned so should be ok - addl %ebx,%eax - adcl $0,%eax -80: - testl $1, 12(%esp) - jz 90f - roll $8, %eax -90: - popl %ebx - popl %esi + push esi + push ebx + mov eax, [esp + 20] # Function arg: unsigned int sum + mov ecx, [esp + 16] # Function arg: int len + mov esi, [esp + 12] # Function arg: const unsigned char *buf + + test esi, 3 + jnz l25f +l10: + mov edx, ecx + mov ebx, ecx + and ebx, HEX(7c) + shr ecx, 7 + add esi, ebx + shr ebx, 2 + neg ebx + lea ebx, l45[ebx + ebx * 2] + test esi, esi + jmp dword ptr [ebx] + + // Handle 2-byte-aligned regions +l20: add ax, [esi] + lea esi, [esi + 2] + adc eax, 0 + jmp l10b +l25: + test esi, 1 + jz l30f + // buf is odd + dec ecx + jl l90 + movzb ebx, [esi] + add eax, ebx + adc eax, 0 + rol eax, 8 + inc esi + test esi, 2 + jz l10b + +l30: sub ecx, 2 + ja l20 + je l32 + add ecx, 2 + jz l80 + movzb ebx, [esi] // csumming 1 byte, 2-aligned + add eax, ebx + adc eax, 0 + jmp l80 +l32: + add ax, [esi] // csumming 2 bytes, 2-aligned + adc eax, 0 + jmp l80 + +l40: + add eax, [esi -128] + adc eax, [esi -124] + adc eax, [esi -120] + adc eax, [esi -116] + adc eax, [esi -112] + adc eax, [esi -108] + adc eax, [esi -104] + adc eax, [esi -100] + adc eax, [esi -96] + adc eax, [esi -92] + adc eax, [esi -88] + adc eax, [esi -84] + adc eax, [esi -80] + adc eax, [esi -76] + adc eax, [esi -72] + adc eax, [esi -68] + adc eax, [esi -64] + adc eax, [esi -60] + adc eax, [esi -56] + adc eax, [esi -52] + adc eax, [esi -48] + adc eax, [esi -44] + adc eax, [esi -40] + adc eax, [esi -36] + adc eax, [esi -32] + adc eax, [esi -28] + adc eax, [esi -24] + adc eax, [esi -20] + adc eax, [esi -16] + adc eax, [esi -12] + adc eax, [esi -8] + adc eax, [esi -4] +l45: + lea esi, [esi + 128] + adc eax, 0 + dec ecx + jge l40 + mov ecx, edx +l50: and ecx, 3 + jz l80 + + // Handle the last 1-3 bytes without jumping + not ecx // 1->2, 2->1, 3->0, higher bits are masked + mov ebx, HEX(ffffff) // by the shll and shrl instructions + shl ecx, 3 + shr ebx, cl + and ebx, [esi -128] // esi is 4-aligned so should be ok + add eax, ebx + adc eax, 0 +l80: + test dword ptr [esp + 12], 1 + jz l90 + rol eax, 8 +l90: + pop ebx + pop esi ret - + #endif + +END