C: Simple Inline Asm Example
Tuesday, 20 December 2011 21:29

The below 'htons' example code uses the x86 rotate instruction to outperform compiler generated code. Please note that the GNU/Linux implementation of 'htons' provides more interesting details than I have provided here.

main.c
#include <stdio.h>
#include <stdlib.h>

#define MAXLOOPS 1000000
unsigned short myHtons(unsigned short s);
unsigned short myInlineHtons(unsigned short s);

int main(void) {
	printf("myHtons: 0x%x\n", (unsigned int) myHtons(0xadde));
	printf("myInlineHtons: 0x%x\n", (unsigned int) myInlineHtons(0xadde));
	return EXIT_SUCCESS;
}

unsigned short myHtons(unsigned short s) {
	int i = 1;
	for (; i < MAXLOOPS; i += 1) {
		s = (s >> 8) | (s << 8);
	}
	return s;
}

unsigned short myInlineHtons(unsigned short s) {
	unsigned int i = 1;
	for (; i < MAXLOOPS; i += 1) {
		asm(
				/*rotate short 8 bits right */
				"rorw $8, %0"
				/* ASM Output:    update s in with register picked by gcc for '=r' */
				: "=r" (s)
				/* ASM Input:     s is assigned to same register as above */
				: "0" (s)
				/* ASM Clobbered: condition code register */
				: "cc");
	}
	return s;
}

Note: 'oprofile' for this example shows the rotate intrinsic is the clear winner:

	...
	...
	samples  %        image name               symbol name
	10327    60.1666  CInlineAsmExample        myHtons
	6824     39.7576  CInlineAsmExample        myInlineHtons
	...
	...