If it should be optimized, inlined asm should be used as following:
Reuel ben Yisrael schrieb:
+.globl _UlongByteSwap
+.intel_syntax noprefix
+/* FUNCTIONS ***************************************************************/
+_UlongByteSwap:
push ebp // save basemov ebp,esp // move stack to basemov eax,[ebp+8] // load the ULONGbswap eax // swap the ULONGpop ebp // restore the baseretthis should work:
_UlongByteSwap: mov eax,[esp+8] // load the ULONG bswap eax // swap the ULONG ret
static force_inline ULONG UlongByteSwap(ULONG x) { asm volatile( "bswap %0;" : "=r" (x) : "0" (x) ); return x; }
+.globl _UlonglongByteSwap
+.intel_syntax noprefix
+/* FUNCTIONS ***************************************************************/
+_UlonglongByteSwap:
push ebp // save basemov ebp,esp // move stack to basemov edx,[ebp+8] // load the higher part of ULONGLONGmov eax,[ebp+12] // load the lower part of ULONGLONGbswap edx // swap the higher partbswap eax // swap the lower partpop ebp // restore the baseret_UlonglongByteSwap: mov edx,[esp+8] // load the higher part of ULONGLONG mov eax,[esp+12] // load the lower part of ULONGLONG bswap edx // swap the higher part bswap eax // swap the lower part ret
static force_inline ULONGLONG UlonglongByteSwap(ULONGLONG x) { ULONG h,l;
asm volatile ("": "=d" (l), "=a" (h): "A" (x));
asm volatile ( "bswap %%eax;" "bswap %%edx;" : "=A" (x) : "d" (l), "a" (h) ); return x; }
+_UshortByteSwap:
push ebp // save basemov ebp,esp // move stack to basemov eax,[ebp+8] // load the USHORTbswap eax // swap the USHORT, xchg is slow so we use bswap with rolrol eax,16 // make it USHORTpop ebp // restore the baseret_UshortByteSwap: mov eax,[esp+8] // load the USHORT bswap eax // swap the USHORT, xchg is slow so we use bswap with rol rol eax,16 // make it USHORT ret
or to save a byte...
_UshortByteSwap: mov ebx,[esp+8] // load the USHORT mov al, bh mov ah, bl ret
static force_inline USHORT UshortByteSwap(USHORT x) { asm volatile( "rolw $8, %0;" : "=r" (x) : "0" (x) ); return x; }