[tkreuzer] 50545: [IP] Convert checksum.S to new ML compatible syntax. Resulting obj was compared and is identical to trunk (both GAS and ML) - Ros-diffs

28 Jan 2011

Author: tkreuzer
Date: Fri Jan 28 20:35:22 2011
New Revision: 50545
URL: http://svn.reactos.org/svn/reactos?rev=50545&view=rev
Log:
[IP]
Convert checksum.S to new ML compatible syntax. Resulting obj was compared and is identical to trunk (both GAS and ML)
Modified:
    branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S
Modified: branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S
URL: http://svn.reactos.org/svn/reactos/branches/cmake-bringup/lib/drivers/ip/net...
==============================================================================

--- branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S [iso-8859-1] (original)
+++ branches/cmake-bringup/lib/drivers/ip/network/i386/checksum.S [iso-8859-1] Fri Jan 28 20:35:22 2011
@@ -24,109 +24,111 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  */
-				
+
 /*
  * computes a partial checksum, e.g. for TCP/UDP fragments
  */
-/*	
+/*
 unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
  */
-		
-.text
+
+#include <asm.inc>
+
+.code
 .align 4
-.globl _csum_partial								
-		
+PUBLIC _csum_partial
+
 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
-	  /*		
+	  /*
       * Experiments with Ethernet and SLIP connections show that buff
       * is aligned on either a 2-byte or 4-byte boundary.  We get at
       * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
       * Fortunately, it is easy to convert 2-byte alignment to 4-byte
       * alignment for the unrolled loop.
-	   */		
-_csum_partial:	
-	pushl %esi
-	pushl %ebx
-	movl 20(%esp),%eax	# Function arg: unsigned int sum
-	movl 16(%esp),%ecx	# Function arg: int len
-	movl 12(%esp),%esi	# Function arg: unsigned char *buff
-	testl $3, %esi		# Check alignment.
-	jz 2f			# Jump if alignment is ok.
-	testl $1, %esi		# Check alignment.
-	jz 10f			# Jump if alignment is boundary of 2bytes.
+	   */
+_csum_partial:
+	push esi
+	push ebx
+	mov eax, [esp + 20]	// Function arg: unsigned int sum
+	mov ecx, [esp + 16]	// Function arg: int len
+	mov esi, [esp + 12]	// Function arg: unsigned char *buff
+	test esi, 3	    	// Check alignment.
+	jz m2			// Jump if alignment is ok.
+	test esi, 1		// Check alignment.
+	jz l10			// Jump if alignment is boundary of 2bytes.
// buf is odd
-	dec %ecx
-	jl 8f
-	movzbl (%esi), %ebx
-	adcl %ebx, %eax
-	roll $8, %eax
-	inc %esi
-	testl $2, %esi
-	jz 2f
-10:
-	subl $2, %ecx		# Alignment uses up two bytes.
-	jae 1f			# Jump if we had at least two bytes.
-	addl $2, %ecx		# ecx was < 2.  Deal with it.
-	jmp 4f
-1:	movw (%esi), %bx
-	addl $2, %esi
-	addw %bx, %ax
-	adcl $0, %eax
-2:
-	movl %ecx, %edx
-	shrl $5, %ecx
-	jz 2f
-	testl %esi, %esi
-1:	movl (%esi), %ebx
-	adcl %ebx, %eax
-	movl 4(%esi), %ebx
-	adcl %ebx, %eax
-	movl 8(%esi), %ebx
-	adcl %ebx, %eax
-	movl 12(%esi), %ebx
-	adcl %ebx, %eax
-	movl 16(%esi), %ebx
-	adcl %ebx, %eax
-	movl 20(%esi), %ebx
-	adcl %ebx, %eax
-	movl 24(%esi), %ebx
-	adcl %ebx, %eax
-	movl 28(%esi), %ebx
-	adcl %ebx, %eax
-	lea 32(%esi), %esi
-	dec %ecx
-	jne 1b
-	adcl $0, %eax
-2:	movl %edx, %ecx
-	andl $0x1c, %edx
-	je 4f
-	shrl $2, %edx		# This clears CF
-3:	adcl (%esi), %eax
-	lea 4(%esi), %esi
-	dec %edx
-	jne 3b
-	adcl $0, %eax
-4:	andl $3, %ecx
-	jz 7f
-	cmpl $2, %ecx
-	jb 5f
-	movw (%esi),%cx
-	leal 2(%esi),%esi
-	je 6f
-	shll $16,%ecx
-5:	movb (%esi),%cl
-6:	addl %ecx,%eax
-	adcl $0, %eax 
-7:	
-	testl $1, 12(%esp)
-	jz 8f
-	roll $8, %eax
-8:
-	popl %ebx
-	popl %esi
+	dec ecx
+	jl l8
+	movzx ebx, byte ptr [esi]
+	adc eax, ebx
+	rol eax, 8
+	inc esi
+	test esi, 2
+	jz m2
+l10:
+	sub ecx, 2		// Alignment uses up two bytes.
+	jae m1			// Jump if we had at least two bytes.
+	add ecx, 2		// ecx was < 2.  Deal with it.
+	jmp l4
+m1:	mov bx, [esi]
+	add esi, 2
+	add ax, bx
+	adc eax, 0
+m2:
+	mov edx, ecx
+	shr ecx, 5
+	jz l2
+	test esi, esi
+l1:	mov ebx, [esi]
+	adc eax, ebx
+	mov ebx, [esi + 4]
+	adc eax, ebx
+	mov ebx, [esi + 8]
+	adc eax, ebx
+	mov ebx, [esi + 12]
+	adc eax, ebx
+	mov ebx, [esi + 16]
+	adc eax, ebx
+	mov ebx, [esi + 20]
+	adc eax, ebx
+	mov ebx, [esi + 24]
+	adc eax, ebx
+	mov ebx, [esi + 28]
+	adc eax, ebx
+	lea esi, [esi + 32]
+	dec ecx
+	jne l1
+	adc eax, 0
+l2:	mov ecx, edx
+	and edx, HEX(1c)
+	je l4
+	shr edx, 2		// This clears CF
+l3:	adc eax, [esi]
+	lea esi, [esi + 4]
+	dec edx
+	jne l3
+	adc eax, 0
+l4:	and ecx, 3
+	jz l7
+	cmp ecx, 2
+	jb l5
+	mov cx, [esi]
+	lea esi, [esi + 2]
+	je l6
+	shl ecx, 16
+l5:	mov cl, [esi]
+l6:	add eax, ecx
+	adc eax, 0
+l7:
+	test dword ptr [esp + 12], 1
+	jz l8
+	rol eax, 8
+l8:
+	pop ebx
+	pop esi
    ret
#else
@@ -134,116 +136,118 @@
 /* Version for PentiumII/PPro */
csum_partial:
-	pushl %esi
-	pushl %ebx
-	movl 20(%esp),%eax	# Function arg: unsigned int sum
-	movl 16(%esp),%ecx	# Function arg: int len
-	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
-
-	testl $3, %esi         
-	jnz 25f                 
-10:
-	movl %ecx, %edx
-	movl %ecx, %ebx
-	andl $0x7c, %ebx
-	shrl $7, %ecx
-	addl %ebx,%esi
-	shrl $2, %ebx  
-	negl %ebx
-	lea 45f(%ebx,%ebx,2), %ebx
-	testl %esi, %esi
-	jmp *%ebx
-
-	# Handle 2-byte-aligned regions
-20:	addw (%esi), %ax
-	lea 2(%esi), %esi
-	adcl $0, %eax
-	jmp 10b
-25:
-	testl $1, %esi         
-	jz 30f                 
-	# buf is odd
-	dec %ecx
-	jl 90f
-	movzbl (%esi), %ebx
-	addl %ebx, %eax
-	adcl $0, %eax
-	roll $8, %eax
-	inc %esi
-	testl $2, %esi
-	jz 10b
-
-30:	subl $2, %ecx          
-	ja 20b                 
-	je 32f
-	addl $2, %ecx
-	jz 80f
-	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
-	addl %ebx, %eax
-	adcl $0, %eax
-	jmp 80f
-32:
-	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
-	adcl $0, %eax
-	jmp 80f
-
-40: 
-	addl -128(%esi), %eax
-	adcl -124(%esi), %eax
-	adcl -120(%esi), %eax
-	adcl -116(%esi), %eax   
-	adcl -112(%esi), %eax   
-	adcl -108(%esi), %eax
-	adcl -104(%esi), %eax
-	adcl -100(%esi), %eax
-	adcl -96(%esi), %eax
-	adcl -92(%esi), %eax
-	adcl -88(%esi), %eax
-	adcl -84(%esi), %eax
-	adcl -80(%esi), %eax
-	adcl -76(%esi), %eax
-	adcl -72(%esi), %eax
-	adcl -68(%esi), %eax
-	adcl -64(%esi), %eax     
-	adcl -60(%esi), %eax     
-	adcl -56(%esi), %eax     
-	adcl -52(%esi), %eax   
-	adcl -48(%esi), %eax   
-	adcl -44(%esi), %eax
-	adcl -40(%esi), %eax
-	adcl -36(%esi), %eax
-	adcl -32(%esi), %eax
-	adcl -28(%esi), %eax
-	adcl -24(%esi), %eax
-	adcl -20(%esi), %eax
-	adcl -16(%esi), %eax
-	adcl -12(%esi), %eax
-	adcl -8(%esi), %eax
-	adcl -4(%esi), %eax
-45:
-	lea 128(%esi), %esi
-	adcl $0, %eax
-	dec %ecx
-	jge 40b
-	movl %edx, %ecx
-50:	andl $3, %ecx
-	jz 80f
-
-	# Handle the last 1-3 bytes without jumping
-	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
-	movl $0xffffff,%ebx	# by the shll and shrl instructions
-	shll $3,%ecx
-	shrl %cl,%ebx
-	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
-	addl %ebx,%eax
-	adcl $0,%eax
-80: 
-	testl $1, 12(%esp)
-	jz 90f
-	roll $8, %eax
-90: 
-	popl %ebx
-	popl %esi
+	push esi
+	push ebx
+	mov eax, [esp + 20]	# Function arg: unsigned int sum
+	mov ecx, [esp + 16]	# Function arg: int len
+	mov esi, [esp + 12]	# Function arg:	const unsigned char *buf
+
+	test esi, 3
+	jnz l25f
+l10:
+	mov edx, ecx
+	mov ebx, ecx
+	and ebx, HEX(7c)
+	shr ecx, 7
+	add esi, ebx
+	shr ebx, 2
+	neg ebx
+	lea ebx, l45[ebx + ebx * 2]
+	test esi, esi
+	jmp dword ptr [ebx]
+
+	// Handle 2-byte-aligned regions
+l20: add ax, [esi]
+	lea esi, [esi + 2]
+	adc eax, 0
+	jmp l10b
+l25:
+	test esi, 1
+	jz l30f
+	// buf is odd
+	dec ecx
+	jl l90
+	movzb ebx, [esi]
+	add eax, ebx
+	adc eax, 0
+	rol eax, 8
+	inc esi
+	test esi, 2
+	jz l10b
+
+l30: sub ecx, 2
+	ja l20
+	je l32
+	add ecx, 2
+	jz l80
+	movzb ebx, [esi]	// csumming 1 byte, 2-aligned
+	add eax, ebx
+	adc eax, 0
+	jmp l80
+l32:
+	add ax, [esi]	// csumming 2 bytes, 2-aligned
+	adc eax, 0
+	jmp l80
+
+l40:
+	add eax, [esi -128]
+	adc eax, [esi -124]
+	adc eax, [esi -120]
+	adc eax, [esi -116]
+	adc eax, [esi -112]
+	adc eax, [esi -108]
+	adc eax, [esi -104]
+	adc eax, [esi -100]
+	adc eax, [esi -96]
+	adc eax, [esi -92]
+	adc eax, [esi -88]
+	adc eax, [esi -84]
+	adc eax, [esi -80]
+	adc eax, [esi -76]
+	adc eax, [esi -72]
+	adc eax, [esi -68]
+	adc eax, [esi -64]
+	adc eax, [esi -60]
+	adc eax, [esi -56]
+	adc eax, [esi -52]
+	adc eax, [esi -48]
+	adc eax, [esi -44]
+	adc eax, [esi -40]
+	adc eax, [esi -36]
+	adc eax, [esi -32]
+	adc eax, [esi -28]
+	adc eax, [esi -24]
+	adc eax, [esi -20]
+	adc eax, [esi -16]
+	adc eax, [esi -12]
+	adc eax, [esi -8]
+	adc eax, [esi -4]
+l45:
+	lea esi, [esi + 128]
+	adc eax, 0
+	dec ecx
+	jge l40
+	mov ecx, edx
+l50:	and ecx, 3
+	jz l80
+
+	// Handle the last 1-3 bytes without jumping
+	not ecx		// 1->2, 2->1, 3->0, higher bits are masked
+	mov ebx, HEX(ffffff)	// by the shll and shrl instructions
+	shl ecx, 3
+	shr ebx, cl
+	and ebx, [esi -128]	// esi is 4-aligned so should be ok
+	add eax, ebx
+	adc eax, 0
+l80:
+	test dword ptr [esp + 12], 1
+	jz l90
+	rol eax, 8
+l90:
+	pop ebx
+	pop esi
    ret
-				
+
 #endif
+
+END