Author: tkreuzer
Date: Fri Jan 28 20:35:22 2011
New Revision: 50545
URL:
http://svn.reactos.org/svn/reactos?rev=50545&view=rev
Log:
[IP]
Convert checksum.S to new ML compatible syntax. Resulting obj was compared and is
identical to trunk (both GAS and ML)
Modified:
branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S
Modified: branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S
URL:
http://svn.reactos.org/svn/reactos/branches/cmake-bringup/lib/drivers/ip/ne…
==============================================================================
--- branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S [iso-8859-1] (original)
+++ branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S [iso-8859-1] Fri Jan 28
20:35:22 2011
@@ -24,109 +24,111 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-
+
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
-/*
+/*
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/
-
-.text
+
+#include <asm.inc>
+
+.code
.align 4
-.globl _csum_partial
-
+PUBLIC _csum_partial
+
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
- /*
+ /*
* Experiments with Ethernet and SLIP connections show that buff
* is aligned on either a 2-byte or 4-byte boundary. We get at
* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
* alignment for the unrolled loop.
- */
-_csum_partial:
- pushl %esi
- pushl %ebx
- movl 20(%esp),%eax # Function arg: unsigned int sum
- movl 16(%esp),%ecx # Function arg: int len
- movl 12(%esp),%esi # Function arg: unsigned char *buff
- testl $3, %esi # Check alignment.
- jz 2f # Jump if alignment is ok.
- testl $1, %esi # Check alignment.
- jz 10f # Jump if alignment is boundary of 2bytes.
+ */
+_csum_partial:
+ push esi
+ push ebx
+ mov eax, [esp + 20] // Function arg: unsigned int sum
+ mov ecx, [esp + 16] // Function arg: int len
+ mov esi, [esp + 12] // Function arg: unsigned char *buff
+ test esi, 3 // Check alignment.
+ jz m2 // Jump if alignment is ok.
+ test esi, 1 // Check alignment.
+ jz l10 // Jump if alignment is boundary of 2bytes.
// buf is odd
- dec %ecx
- jl 8f
- movzbl (%esi), %ebx
- adcl %ebx, %eax
- roll $8, %eax
- inc %esi
- testl $2, %esi
- jz 2f
-10:
- subl $2, %ecx # Alignment uses up two bytes.
- jae 1f # Jump if we had at least two bytes.
- addl $2, %ecx # ecx was < 2. Deal with it.
- jmp 4f
-1: movw (%esi), %bx
- addl $2, %esi
- addw %bx, %ax
- adcl $0, %eax
-2:
- movl %ecx, %edx
- shrl $5, %ecx
- jz 2f
- testl %esi, %esi
-1: movl (%esi), %ebx
- adcl %ebx, %eax
- movl 4(%esi), %ebx
- adcl %ebx, %eax
- movl 8(%esi), %ebx
- adcl %ebx, %eax
- movl 12(%esi), %ebx
- adcl %ebx, %eax
- movl 16(%esi), %ebx
- adcl %ebx, %eax
- movl 20(%esi), %ebx
- adcl %ebx, %eax
- movl 24(%esi), %ebx
- adcl %ebx, %eax
- movl 28(%esi), %ebx
- adcl %ebx, %eax
- lea 32(%esi), %esi
- dec %ecx
- jne 1b
- adcl $0, %eax
-2: movl %edx, %ecx
- andl $0x1c, %edx
- je 4f
- shrl $2, %edx # This clears CF
-3: adcl (%esi), %eax
- lea 4(%esi), %esi
- dec %edx
- jne 3b
- adcl $0, %eax
-4: andl $3, %ecx
- jz 7f
- cmpl $2, %ecx
- jb 5f
- movw (%esi),%cx
- leal 2(%esi),%esi
- je 6f
- shll $16,%ecx
-5: movb (%esi),%cl
-6: addl %ecx,%eax
- adcl $0, %eax
-7:
- testl $1, 12(%esp)
- jz 8f
- roll $8, %eax
-8:
- popl %ebx
- popl %esi
+ dec ecx
+ jl l8
+ movzx ebx, byte ptr [esi]
+ adc eax, ebx
+ rol eax, 8
+ inc esi
+ test esi, 2
+ jz m2
+l10:
+ sub ecx, 2 // Alignment uses up two bytes.
+ jae m1 // Jump if we had at least two bytes.
+ add ecx, 2 // ecx was < 2. Deal with it.
+ jmp l4
+m1: mov bx, [esi]
+ add esi, 2
+ add ax, bx
+ adc eax, 0
+m2:
+ mov edx, ecx
+ shr ecx, 5
+ jz l2
+ test esi, esi
+l1: mov ebx, [esi]
+ adc eax, ebx
+ mov ebx, [esi + 4]
+ adc eax, ebx
+ mov ebx, [esi + 8]
+ adc eax, ebx
+ mov ebx, [esi + 12]
+ adc eax, ebx
+ mov ebx, [esi + 16]
+ adc eax, ebx
+ mov ebx, [esi + 20]
+ adc eax, ebx
+ mov ebx, [esi + 24]
+ adc eax, ebx
+ mov ebx, [esi + 28]
+ adc eax, ebx
+ lea esi, [esi + 32]
+ dec ecx
+ jne l1
+ adc eax, 0
+l2: mov ecx, edx
+ and edx, HEX(1c)
+ je l4
+ shr edx, 2 // This clears CF
+l3: adc eax, [esi]
+ lea esi, [esi + 4]
+ dec edx
+ jne l3
+ adc eax, 0
+l4: and ecx, 3
+ jz l7
+ cmp ecx, 2
+ jb l5
+ mov cx, [esi]
+ lea esi, [esi + 2]
+ je l6
+ shl ecx, 16
+l5: mov cl, [esi]
+l6: add eax, ecx
+ adc eax, 0
+l7:
+ test dword ptr [esp + 12], 1
+ jz l8
+ rol eax, 8
+l8:
+ pop ebx
+ pop esi
ret
#else
@@ -134,116 +136,118 @@
/* Version for PentiumII/PPro */
csum_partial:
- pushl %esi
- pushl %ebx
- movl 20(%esp),%eax # Function arg: unsigned int sum
- movl 16(%esp),%ecx # Function arg: int len
- movl 12(%esp),%esi # Function arg: const unsigned char *buf
-
- testl $3, %esi
- jnz 25f
-10:
- movl %ecx, %edx
- movl %ecx, %ebx
- andl $0x7c, %ebx
- shrl $7, %ecx
- addl %ebx,%esi
- shrl $2, %ebx
- negl %ebx
- lea 45f(%ebx,%ebx,2), %ebx
- testl %esi, %esi
- jmp *%ebx
-
- # Handle 2-byte-aligned regions
-20: addw (%esi), %ax
- lea 2(%esi), %esi
- adcl $0, %eax
- jmp 10b
-25:
- testl $1, %esi
- jz 30f
- # buf is odd
- dec %ecx
- jl 90f
- movzbl (%esi), %ebx
- addl %ebx, %eax
- adcl $0, %eax
- roll $8, %eax
- inc %esi
- testl $2, %esi
- jz 10b
-
-30: subl $2, %ecx
- ja 20b
- je 32f
- addl $2, %ecx
- jz 80f
- movzbl (%esi),%ebx # csumming 1 byte, 2-aligned
- addl %ebx, %eax
- adcl $0, %eax
- jmp 80f
-32:
- addw (%esi), %ax # csumming 2 bytes, 2-aligned
- adcl $0, %eax
- jmp 80f
-
-40:
- addl -128(%esi), %eax
- adcl -124(%esi), %eax
- adcl -120(%esi), %eax
- adcl -116(%esi), %eax
- adcl -112(%esi), %eax
- adcl -108(%esi), %eax
- adcl -104(%esi), %eax
- adcl -100(%esi), %eax
- adcl -96(%esi), %eax
- adcl -92(%esi), %eax
- adcl -88(%esi), %eax
- adcl -84(%esi), %eax
- adcl -80(%esi), %eax
- adcl -76(%esi), %eax
- adcl -72(%esi), %eax
- adcl -68(%esi), %eax
- adcl -64(%esi), %eax
- adcl -60(%esi), %eax
- adcl -56(%esi), %eax
- adcl -52(%esi), %eax
- adcl -48(%esi), %eax
- adcl -44(%esi), %eax
- adcl -40(%esi), %eax
- adcl -36(%esi), %eax
- adcl -32(%esi), %eax
- adcl -28(%esi), %eax
- adcl -24(%esi), %eax
- adcl -20(%esi), %eax
- adcl -16(%esi), %eax
- adcl -12(%esi), %eax
- adcl -8(%esi), %eax
- adcl -4(%esi), %eax
-45:
- lea 128(%esi), %esi
- adcl $0, %eax
- dec %ecx
- jge 40b
- movl %edx, %ecx
-50: andl $3, %ecx
- jz 80f
-
- # Handle the last 1-3 bytes without jumping
- notl %ecx # 1->2, 2->1, 3->0, higher bits are masked
- movl $0xffffff,%ebx # by the shll and shrl instructions
- shll $3,%ecx
- shrl %cl,%ebx
- andl -128(%esi),%ebx # esi is 4-aligned so should be ok
- addl %ebx,%eax
- adcl $0,%eax
-80:
- testl $1, 12(%esp)
- jz 90f
- roll $8, %eax
-90:
- popl %ebx
- popl %esi
+ push esi
+ push ebx
+ mov eax, [esp + 20] # Function arg: unsigned int sum
+ mov ecx, [esp + 16] # Function arg: int len
+ mov esi, [esp + 12] # Function arg: const unsigned char *buf
+
+ test esi, 3
+ jnz l25f
+l10:
+ mov edx, ecx
+ mov ebx, ecx
+ and ebx, HEX(7c)
+ shr ecx, 7
+ add esi, ebx
+ shr ebx, 2
+ neg ebx
+ lea ebx, l45[ebx + ebx * 2]
+ test esi, esi
+ jmp dword ptr [ebx]
+
+ // Handle 2-byte-aligned regions
+l20: add ax, [esi]
+ lea esi, [esi + 2]
+ adc eax, 0
+ jmp l10b
+l25:
+ test esi, 1
+ jz l30f
+ // buf is odd
+ dec ecx
+ jl l90
+ movzb ebx, [esi]
+ add eax, ebx
+ adc eax, 0
+ rol eax, 8
+ inc esi
+ test esi, 2
+ jz l10b
+
+l30: sub ecx, 2
+ ja l20
+ je l32
+ add ecx, 2
+ jz l80
+ movzb ebx, [esi] // csumming 1 byte, 2-aligned
+ add eax, ebx
+ adc eax, 0
+ jmp l80
+l32:
+ add ax, [esi] // csumming 2 bytes, 2-aligned
+ adc eax, 0
+ jmp l80
+
+l40:
+ add eax, [esi -128]
+ adc eax, [esi -124]
+ adc eax, [esi -120]
+ adc eax, [esi -116]
+ adc eax, [esi -112]
+ adc eax, [esi -108]
+ adc eax, [esi -104]
+ adc eax, [esi -100]
+ adc eax, [esi -96]
+ adc eax, [esi -92]
+ adc eax, [esi -88]
+ adc eax, [esi -84]
+ adc eax, [esi -80]
+ adc eax, [esi -76]
+ adc eax, [esi -72]
+ adc eax, [esi -68]
+ adc eax, [esi -64]
+ adc eax, [esi -60]
+ adc eax, [esi -56]
+ adc eax, [esi -52]
+ adc eax, [esi -48]
+ adc eax, [esi -44]
+ adc eax, [esi -40]
+ adc eax, [esi -36]
+ adc eax, [esi -32]
+ adc eax, [esi -28]
+ adc eax, [esi -24]
+ adc eax, [esi -20]
+ adc eax, [esi -16]
+ adc eax, [esi -12]
+ adc eax, [esi -8]
+ adc eax, [esi -4]
+l45:
+ lea esi, [esi + 128]
+ adc eax, 0
+ dec ecx
+ jge l40
+ mov ecx, edx
+l50: and ecx, 3
+ jz l80
+
+ // Handle the last 1-3 bytes without jumping
+ not ecx // 1->2, 2->1, 3->0, higher bits are masked
+ mov ebx, HEX(ffffff) // by the shll and shrl instructions
+ shl ecx, 3
+ shr ebx, cl
+ and ebx, [esi -128] // esi is 4-aligned so should be ok
+ add eax, ebx
+ adc eax, 0
+l80:
+ test dword ptr [esp + 12], 1
+ jz l90
+ rol eax, 8
+l90:
+ pop ebx
+ pop esi
ret
-
+
#endif
+
+END