[MLton-commit] r5525

Sat Apr 14 11:53:22 PDT 2007

Fixed aliasing problems with basis/Real/*.c files
----------------------------------------------------------------------

U   mlton/branches/on-20050822-x86_64-branch/runtime/Makefile
U   mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/class.c
U   mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/gdtoa.c
U   mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/signBit.c

----------------------------------------------------------------------

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/Makefile
===================================================================

--- mlton/branches/on-20050822-x86_64-branch/runtime/Makefile	2007-04-13 23:20:47 UTC (rev 5524)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/Makefile	2007-04-14 18:53:21 UTC (rev 5525)
@@ -189,14 +189,8 @@
 	basis-ffi.h						\
 	$(shell find basis -type f | grep '\.h$$')
 BASISCFILES :=							\
-	$(shell find basis -type f | grep '\.c$$' | grep -v Real/)
-# REAL_BASISCFILES is for files that we don't want compiled in the
-# big lump when compiling COMPILE_FAST.
-# Real/*.c can't be there because gcc -O2 messes some of them up.
-REAL_BASISCFILES :=						\
-	$(shell find basis -type f | grep '\.c$$' | grep Real/)
+	$(shell find basis -type f | grep '\.c$$')
 
-
 HFILES :=							\
 	cenv.h							\
 	$(UTILHFILES)						\
@@ -223,15 +217,13 @@
 endif
 
 ifeq ($(COMPILE_FAST), yes)
-  OBJS += basis.o basis/Real/real-basis.o
-  DEBUG_OBJS += basis-gdb.o basis/Real/real-basis-gdb.o
+  OBJS += basis.o
+  DEBUG_OBJS += basis-gdb.o
 else
   OBJS += 							\
-	$(foreach f, $(basename $(BASISCFILES)), $(f).o)	\
-	$(foreach f, $(basename $(REAL_BASISCFILES)), $(f).o)
+	$(foreach f, $(basename $(BASISCFILES)), $(f).o)
   DEBUG_OBJS += 						\
-	$(foreach f, $(basename $(BASISCFILES)), $(f)-gdb.o)	\
-	$(foreach f, $(basename $(REAL_BASISCFILES)), $(f)-gdb.o)
+	$(foreach f, $(basename $(BASISCFILES)), $(f)-gdb.o)
 endif
 
 ALL := libgdtoa.a libmlton.a libmlton-gdb.a
@@ -274,10 +266,6 @@
 	rm -f basis.c
 	cat $(BASISCFILES) >> basis.c
 
-basis/Real/real-basis.c: $(REAL_BASISCFILES)
-	rm -f basis/Real/real-basis.c
-	cat $(REAL_BASISCFILES) >> basis/Real/real-basis.c
-
 gen/c-types.h gen/c-types.sml gen/ml-types.h: gen/gen-types.c util.h util.o
 	$(CC) $(OPTCFLAGS) $(WARNCFLAGS) -o gen/gen-types gen/gen-types.c util.o
 	rm -f gen/c-types.h gen/c-types.sml gen/ml-types.h
@@ -330,17 +318,6 @@
 basis.o: basis.c $(BASISCFILES) $(HFILES)
 	$(CC) -Ibasis -Ibasis/Word -Ibasis/Real $(OPTCFLAGS) $(OPTWARNCFLAGS) -Wno-redundant-decls -c -o $@ $<
 
-# It looks like we don't follow the C spec w.r.t. aliasing.  And gcc
-# -O2 catches us on the code in Real/*.c where we treat a double as a
-# chunk of two words.  Files that have been known to cause problems
-# are class.c and gdtoa.c.  But there may be others.  So, we compile
-# with -fno-strict-aliasing to prevent gcc from taking advantage of
-# this aspect of the C spec.
-basis/Real/%-gdb.o: basis/Real/%.c gdtoa/arith.h $(HFILES)
-	$(CC) $(DEBUGCFLAGS) $(DEBUGWARNCFLAGS) -Wno-float-equal -c -o $@ $<
-basis/Real/%.o: basis/Real/%.c gdtoa/arith.h  $(HFILES)
-	$(CC) $(OPTCFLAGS) $(OPTWARNCFLAGS) -Wno-float-equal -O1 -fno-strict-aliasing -c -o $@ $<
-
 %-gdb.o: %.c $(HFILES)
 	$(CC) $(DEBUGCFLAGS) $(DEBUGWARNCFLAGS) -c -o $@ $<
 

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/class.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/class.c	2007-04-13 23:20:47 UTC (rev 5524)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/class.c	2007-04-14 18:53:21 UTC (rev 5525)
@@ -14,9 +14,9 @@
 
 #else
 
-/* This code assumes IEEE 754/854 and little endian.
+/* This code assumes IEEE 754/854.
  *
- * In memory, the 32 bits of a float are layed out as follows.
+ * In little-endian memory, the 32 bits of a float are layed out as follows.
  *
  * d[0]  bits 7-0 of mantissa
  * d[1]  bits 15-8 of mantissa
@@ -24,9 +24,18 @@
  *       bits 22-16 of mantissa
  * d[3]  sign bit
  *       bits 7-1 of exponent
+ *
+ * In big-endian memory, the 32 bits of a float are layed out as follows.
+ *
+ * d[3]  bits 7-0 of mantissa
+ * d[2]  bits 15-8 of mantissa
+ * d[1]  bit  0 of exponent
+ *       bits 22-16 of mantissa
+ * d[0]  sign bit
+ *       bits 7-1 of exponent
  */
 
-/* masks for word 0 */
+/* masks for least/most significant word */
 #define EXPONENT_MASK32 0x7F800000
 #define MANTISSA_MASK32 0x007FFFFF
 #define SIGNBIT_MASK32  0x80000000
@@ -36,7 +45,21 @@
   uint32_t word0;
   int res;
 
-  word0 = ((uint32_t *)&f)[0];  /* this generates a gcc warning */
+  /* Using memcpy;
+   * Technically correct.
+   */
+  uint32_t words[1];
+  memcpy(&words, &f, sizeof(Real32_t));
+  word0 = words[0];
+  /* Using union;
+   * Technically undefined, but widely supported.
+   */
+  /*
+  union {float f; uint32_t words[1];} fws;
+  fws.f = f;
+  word0 = fws.words[0];
+  */
+
   if ((word0 & EXPONENT_MASK32) == EXPONENT_MASK32) {
     if (word0 & MANTISSA_MASK32)
       res = FP_NAN;
@@ -68,11 +91,9 @@
 
 #else
 
-#if (defined __i386__)
-
-/* This code assumes IEEE 754/854 and little endian.
+/* This code assumes IEEE 754/854.
  *
- * In memory, the 64 bits of a double are layed out as follows.
+ * In little-endian memory, the 64 bits of a double are layed out as follows.
  *
  * d[0]  bits 7-0 of mantissa
  * d[1]  bits 15-8 of mantissa
@@ -84,9 +105,22 @@
  *       bits 51-48 of mantissa
  * d[7]  sign bit
  *       bits 10-4 of exponent
+ *
+ * In big-endian memory, the 64 bits of a double are layed out as follows.
+ *
+ * d[7]  bits 7-0 of mantissa
+ * d[6]  bits 15-8 of mantissa
+ * d[5]  bits 23-16 of mantissa
+ * d[4]  bits 31-24 of mantissa
+ * d[3]  bits 39-32 of mantissa
+ * d[2]  bits 47-40 of mantissa
+ * d[1]  bits 3-0 of exponent
+ *       bits 51-48 of mantissa
+ * d[0]  sign bit
+ *       bits 10-4 of exponent
  */
 
-/* masks for word 1 */
+/* masks for most-significant word */
 #define EXPONENT_MASK64 0x7FF00000
 #define MANTISSA_MASK64 0x000FFFFF
 #define SIGNBIT_MASK64  0x80000000
@@ -96,8 +130,33 @@
   uint32_t word0, word1;
   int res;
 
-  word0 = ((uint32_t*)&d)[0];
-  word1 = ((uint32_t*)&d)[1];
+  /* Using memcpy;
+   * Technically correct.
+   */
+  uint32_t words[2];
+  memcpy(&words, &d, sizeof(Real64_t));
+  if (isBigEndian()) {
+    word1 = words[0];
+    word0 = words[1];
+  } else {
+    word0 = words[0];
+    word1 = words[1];
+  }
+  /* Using union;
+   * Technically undefined, but widely supported.
+   */
+  /*
+  union {double d; uint32_t words[2];} dws;
+  dws.d = d;
+  if (isBigEndian()) {
+    word1 = dws.words[0];
+    word0 = dws.words[1];
+  } else {
+    word0 = dws.words[0];
+    word1 = dws.words[1];
+  }
+  */
+
   if ((word1 & EXPONENT_MASK64) == EXPONENT_MASK64) {
     if (word0 or (word1 & MANTISSA_MASK64))
       res = FP_NAN;
@@ -112,10 +171,4 @@
   return res;
 }
 
-#else
-
-#error Real64_class not implemented
-
 #endif
-
-#endif

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/gdtoa.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/gdtoa.c	2007-04-13 23:20:47 UTC (rev 5524)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/gdtoa.c	2007-04-14 18:53:21 UTC (rev 5525)
@@ -11,19 +11,11 @@
   int ex;
   static FPI fpi = { 24, 1-127-24+1,  254-127-24+1, 1, 0 };
   int i;
-  ULong *L;
+  ULong L[1];
   char *result;
   ULong sign;
-  int x0, x1;
 
-  if (MLton_Platform_Arch_bigendian) {
-    x0 = 0;
-    x1 = 1;
-  } else {
-    x0 = 1;
-    x1 = 0;
-  }
-  L = (ULong*)&f;
+  memcpy(L, &f, sizeof(Real32_t));
   sign = L[0] & 0x80000000L;
   bits[0] = L[0] & 0x7fffff;
   if (0 != (ex = (L[0] >> 23) & 0xff))
@@ -44,19 +36,19 @@
   int ex;
   static FPI fpi = { 53, 1-1023-53+1, 2046-1023-53+1, 1, 0 };
   int i;
-  ULong *L;
+  ULong L[2];
   char *result;
   ULong sign;
   int x0, x1;
 
-  if (MLton_Platform_Arch_bigendian) {
+  if (isBigEndian()) {
     x0 = 0;
     x1 = 1;
   } else {
     x0 = 1;
     x1 = 0;
   }
-  L = (ULong*)&d;
+  memcpy(L, &d, sizeof(Real64_t));
   sign = L[x0] & 0x80000000L;
   bits[0] = L[x1];
   bits[1] = L[x0] & 0xfffff;

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/signBit.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/signBit.c	2007-04-13 23:20:47 UTC (rev 5524)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/basis/Real/signBit.c	2007-04-14 18:53:21 UTC (rev 5525)
@@ -12,32 +12,74 @@
 
 #else
 
-#if (defined __i386__)
-
 enum {
-  R32_byte = 3,
-  R64_byte = 7,
+  LITTLE_R32_byte = 3,
+  LITTLE_R64_byte = 7,
 };
 
-#elif (defined __ppc__ || defined __sparc__)
-
 enum {
-  R32_byte = 0,
-  R64_byte = 0,
+  BIG_R32_byte = 0,
+  BIG_R64_byte = 0,
 };
 
-#else
+C_Int_t Real32_signBit (Real32_t f) {
+  int R32_byte;
+  if (isBigEndian()) {
+    R32_byte = BIG_R32_byte;
+  } else {
+    R32_byte = LITTLE_R32_byte;
+  }
 
-#error Real_signBit not implemented
-
-#endif
-
-C_Int_t Real32_signBit (Real32_t f) {
-  return (((unsigned char *)&f)[R32_byte] & 0x80) >> 7;
+  /* Using memcpy. 
+   * Technically correct.
+   */
+  unsigned char chars[4];
+  memcpy(chars, &f, sizeof(Real32_t));
+  return (chars[R32_byte] & 0x80) >> 7;
+  /* Using cast; 
+   * Technically correct, as (unsigned char*) may alias.   
+   */
+  /*
+  return (((unsigned char*)(&f))[R32_byte] & 0x80) >> 7;
+  */
+  /* Using union; 
+   * Technically undefined, but widely supported. 
+   */
+  /*
+  union {float f; unsigned char c[4];} fc;
+  fc.f = f;
+  return (fc.c[R32_byte] & 0x80) >> 7;
+  */
 }
 
 C_Int_t Real64_signBit (Real64_t d) {
-  return (((unsigned char *)&d)[R64_byte] & 0x80) >> 7;
+  int R64_byte;
+  if (isBigEndian()) {
+    R64_byte = BIG_R64_byte;
+  } else {
+    R64_byte = LITTLE_R64_byte;
+  }
+
+  /* Using memcpy. 
+   * Technically correct.
+   */
+  unsigned char chars[8];
+  memcpy(chars, &d, sizeof(Real64_t));
+  return (chars[R64_byte] & 0x80) >> 7;
+  /* Using cast; 
+   * Technically correct, as (unsigned char*) may alias.   
+   */
+  /*
+  return (((unsigned char*)(&d))[R64_byte] & 0x80) >> 7;
+  */
+  /* Using union; 
+   * Technically undefined, but widely supported. 
+   */
+  /*
+  union {double d; unsigned char c[8];} dc;
+  dc.d = d;
+  return (dc.c[R64_byte] & 0x80) >> 7;
+  */
 }
 
 #endif