[MLton-devel] detecting overflow with the C codegen

Thu, 21 Nov 2002 10:25:52 -0800

> Still, I would think that
> using the overflow flag would be the fastest when you get it.
> Testing for overflow in C could be done by something like
> 	long long	tmp;
> 
> 	tmp = x * y;		/* or x + y, ... */
> 	if (tmp != (int)tmp)
> 		--- overflow ---;
> The generated code is probably bad but I would guess not too horrible and
> the best you can get out of C.

I am not convinced.  I've attached a C program below that does the
add in 5 different ways

1. no check
2. jo
3. single compare
4. two compares and a sub
5. long long

I've listed them in that order because that corresponds to increasing
order of running time.  For example, when I run the program compiled
with gcc 3.2 -O2, I get

999999999 1006
1000000999 1132
1000000999 1131
1000000999 1346
1000000999 1402
1000000999 1913

Here, the first line is a baseline, and the next five lines are the
five methods.

I've also included the assembly below that gcc generates for each
method.  It seems consistent with the runtime.

Given this, I think we should use add4 in the C codegen, and maybe put
in some code to do add3 sometime (to at least make the code size
smaller, if nothing else).

--------------------------------------------------------------------------------

#include <sys/times.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

enum {
	MAXINT = 0x7FFFFFFF,
	MININT = 0x80000000,
	ADD_CONST = 123456,
};

void die (char *s) {
	fprintf (stderr, "%s\n", s);
	exit (1);
}
static void overflow () {
	die ("overflow");
}

int add0 (int n1, int n2) {
	return n1;
}

int add1 (int n1, int n2) {
	return n1 + n2;
}

int add2 (int n1, int n2) {
 	__asm__ __volatile__ ("addl %1, %0\n\tjo overflow"
			      : "+r" (n1) : "g" (n2) : "cc");

	return n1;
}

int add3 (int n1, int n2) {
	if (n1 > MAXINT - ADD_CONST)
		overflow ();
	return n1 + n2;
}

int add4 (int n1, int n2) {
	if (n1 >= 0) {
		if (n2 > MAXINT - n1)
			overflow ();
	} else
		if (n2 > MININT - n1)
			overflow ();
	return n1 + n2;
}

int add5 (int n1, int n2) {
	long long	tmp;

	tmp = (long long)n1 + n2;
	if (tmp != (int)tmp)
		overflow ();
	return tmp;
}

void test (int (*f) (int, int), int y) {
	struct tms tms1;
	struct tms tms2;
	int i;
	int x;

	times (&tms1);
	for (i = 0; i < 1000 * 1000 * 1000; ++i)
		x = f (i, y);
	times (&tms2);
	fprintf (stderr, "%d %d\n",
			x,
			(int) (tms2.tms_utime + tms2.tms_stime
			- tms1.tms_utime + tms1.tms_stime));
}

int main (int argc, char **argv) {
	int y;

	if (argc != 2)
		die ("need an arg");
	y = atoi (argv[1]);
	test (add0, y);
	test (add1, y);
	test (add2, y);
	test (add3, y);
	test (add4, y);
	test (add5, y);
	return 0;
}

--------------------------------------------------------------------------------

add0:
	pushl	%ebp
	movl	%esp, %ebp
	movl	8(%ebp), %eax
	popl	%ebp
	ret

--------------------------------------------------------------------------------

add1:
	pushl	%ebp
	movl	%esp, %ebp
	movl	12(%ebp), %eax
	movl	8(%ebp), %edx
	popl	%ebp
	addl	%edx, %eax
	ret

--------------------------------------------------------------------------------

add2:
	pushl	%ebp
	movl	%esp, %ebp
	movl	8(%ebp), %eax
#APP
	addl 12(%ebp), %eax
	jo overflow
#NO_APP
	popl	%ebp
	ret

--------------------------------------------------------------------------------

add3:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$8, %esp
	movl	%ebx, -4(%ebp)
	movl	8(%ebp), %ebx
	cmpl	$2147360191, %ebx
	jg	.L8
.L7:
	movl	12(%ebp), %ecx
	movl	%ebx, %eax
	movl	-4(%ebp), %ebx
	movl	%ebp, %esp
	popl	%ebp
	addl	%ecx, %eax
	ret
	.p2align 4,,7
.L8:
	call	overflow
	jmp	.L7

--------------------------------------------------------------------------------

add4:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$8, %esp
	movl	%ebx, (%esp)
	movl	8(%ebp), %ebx
	movl	%esi, 4(%esp)
	movl	12(%ebp), %esi
	testl	%ebx, %ebx
	js	.L10
	movl	$2147483647, %eax
	subl	%ebx, %eax
	cmpl	%eax, %esi
	jg	.L14
.L12:
	leal	(%esi,%ebx), %eax
	movl	4(%esp), %esi
	movl	(%esp), %ebx
	movl	%ebp, %esp
	popl	%ebp
	ret
.L10:
	movl	$-2147483648, %eax
	subl	%ebx, %eax
	cmpl	%eax, %esi
	jbe	.L12
	jmp	.L14

--------------------------------------------------------------------------------

add5:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$8, %esp
	movl	%ebx, (%esp)
	movl	8(%ebp), %eax
	movl	%esi, 4(%esp)
	movl	%eax, %ebx
	movl	%eax, %esi
	movl	12(%ebp), %eax
	sarl	$31, %esi
	cltd
	addl	%eax, %ebx
	movl	%ebx, %eax
	adcl	%edx, %esi
	cltd
	movl	%esi, %ecx
	xorl	%edx, %ecx
	xorl	%ebx, %eax
	orl	%eax, %ecx
	jne	.L18
.L17:
	movl	%ebx, %eax
	movl	4(%esp), %esi
	movl	(%esp), %ebx
	movl	%ebp, %esp
	popl	%ebp
	ret
	.p2align 4,,7
.L18:
	call	overflow
	jmp	.L17

-------------------------------------------------------
This sf.net email is sponsored by:ThinkGeek
Welcome to geek heaven.
http://thinkgeek.com/sf
_______________________________________________
MLton-devel mailing list
MLton-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mlton-devel