? t1
? t2
Index: M3x86.m3
===================================================================
RCS file: /usr/cvs/cm3/m3-sys/m3back/src/M3x86.m3,v
retrieving revision 1.164
diff -u -r1.164 M3x86.m3
--- M3x86.m3	8 Mar 2010 12:26:36 -0000	1.164
+++ M3x86.m3	8 Mar 2010 16:03:22 -0000
@@ -2565,20 +2565,7 @@
       u.wr.NL    ();
     END;
 
-    IF u.vstack.doinsert(t) THEN
-      RETURN;
-    END;
-
-    IF NOT TypeIs64(t) THEN
-      Err(u, "insert: stack.doinsert should have handled all 32bit cases");
-    END;
-
-    start_int_proc (u, Builtin.insert64);
-    pop_param(u, Type.Word32);
-    pop_param(u, Type.Word32);
-    pop_param(u, Type.Word64);
-    pop_param(u, Type.Word64);
-    call_64 (u, Builtin.insert64);
+    u.vstack.doinsert(t);
   END insert;
 
 PROCEDURE insert_n  (u: U;  t: IType;  n: INTEGER) =
@@ -2591,16 +2578,7 @@
       u.wr.NL    ();
     END;
 
-    IF u.vstack.doinsert_n(t, n) THEN
-      RETURN;
-    END;
-
-    IF NOT TypeIs64(t) THEN
-      Err(u, "insert_n: stack.doinsert_n should have handled all 32bit cases");
-    END;
-
-    u.vstack.pushimmI(n, Type.Word32);
-    u.insert(t);
+    u.vstack.doinsert_n(t, n);
   END insert_n;
 
 PROCEDURE insert_mn  (u: U;  t: IType;  m, n: INTEGER) =
@@ -2614,17 +2592,7 @@
       u.wr.NL    ();
     END;
 
-    IF u.vstack.doinsert_mn(t, m, n) THEN
-      RETURN;
-    END;
-
-    IF NOT TypeIs64(t) THEN
-      Err(u, "insert_mn: stack.doinsert_mn should have handled all 32bit cases");
-    END;
-
-    u.vstack.pushimmI(m, Type.Word32);
-    u.vstack.pushimmI(n, Type.Word32);
-    u.insert(t);
+    u.vstack.doinsert_mn(t, m, n);
   END insert_mn;
 
 (*------------------------------------------------ misc. stack/memory ops ---*)
@@ -2982,8 +2950,7 @@
     mul64,
     udiv64, umod64,
     div64, mod64,
-    rotate_left64, rotate_right64, rotate64,
-    insert64
+    rotate_left64, rotate_right64, rotate64
   };
 
 (* union .. sym_difference -> (n_bits, *c, *b, *a): Void
@@ -3028,8 +2995,7 @@
     BP { "m3_mod64",         4, Type.Int64,  "__stdcall" },
     BP { "m3_rotate_left64", 3, Type.Word64, "__stdcall" },
     BP { "m3_rotate_right64",3, Type.Word64, "__stdcall" },
-    BP { "m3_rotate64",      3, Type.Word64, "__stdcall" },
-    BP { "m3_insert64",      6, Type.Word64, "__stdcall" }
+    BP { "m3_rotate64",      3, Type.Word64, "__stdcall" }
   };
 
 
Index: Stackx86.i3
===================================================================
RCS file: /usr/cvs/cm3/m3-sys/m3back/src/Stackx86.i3,v
retrieving revision 1.28
diff -u -r1.28 Stackx86.i3
--- Stackx86.i3	8 Mar 2010 12:26:36 -0000	1.28
+++ Stackx86.i3	8 Mar 2010 16:03:22 -0000
@@ -67,9 +67,9 @@
         doextract (type: IType; sign_extend: BOOLEAN);
         doextract_n (type: IType; sign_extend: BOOLEAN; n: INTEGER);
         doextract_mn (type: IType; sign_extend: BOOLEAN; m, n: INTEGER);
-        doinsert (type: IType): BOOLEAN;
-        doinsert_n (type: IType; n: INTEGER): BOOLEAN;
-        doinsert_mn (type: IType; m, n: INTEGER): BOOLEAN;
+        doinsert (type: IType);
+        doinsert_n (type: IType; n: INTEGER);
+        doinsert_mn (type: IType; m, n: INTEGER);
         swap ();
         doloophole (from, to: ZType);
         doindex_address (shift, size: INTEGER; neg: BOOLEAN);
Index: Stackx86.m3
===================================================================
RCS file: /usr/cvs/cm3/m3-sys/m3back/src/Stackx86.m3,v
retrieving revision 1.124
diff -u -r1.124 Stackx86.m3
--- Stackx86.m3	8 Mar 2010 13:18:06 -0000	1.124
+++ Stackx86.m3	8 Mar 2010 16:03:22 -0000
@@ -7,7 +7,7 @@
 
 MODULE Stackx86;
 
-IMPORT M3ID, M3CG, TargetMap, M3CG_Ops, M3x86Rep, Codex86, Wrx86;
+IMPORT M3CG, TargetMap, M3CG_Ops, M3x86Rep, Codex86, Wrx86;
 
 IMPORT TIntN, TWordN;
 IMPORT Target, Fmt;
@@ -34,8 +34,6 @@
         reguse        : ARRAY [0 .. NRegs] OF Register;
         current_proc  : x86Proc;
         rmode         : ARRAY FlToInt OF TIntN.T;
-        lowset_table  : x86Var;
-        highset_table : x86Var;
       OVERRIDES
         init := init;
         end := end;
@@ -130,13 +128,12 @@
 
     FOR i := 0 TO NRegs DO
       IF t.reguse[i].stackp # -1 THEN
-        IF NOT (t.vstack[t.reguse[i].stackp].reg[t.reguse[i].operandPart] = i) THEN
+        IF t.vstack[t.reguse[i].stackp].reg[t.reguse[i].operandPart] # i THEN
           t.Err(where
               & " i:" & RegName[i]
               & " t.reguse[i].stackp:" & Fmt.Int(t.reguse[i].stackp)
               & " t.vstack[t.reguse[i].stackp].reg[t.reguse[i].operandPart]:" & RegName[t.vstack[t.reguse[i].stackp].reg[t.reguse[i].operandPart]]);
         END;
-        <* ASSERT t.vstack[t.reguse[i].stackp].reg[t.reguse[i].operandPart] = i *>
       END
     END;
 
@@ -144,14 +141,13 @@
       IF t.vstack[i].loc = OLoc.register THEN
         size := GetOperandSize(t.vstack[i]);
         FOR j := 0 TO size - 1 DO
-          IF NOT (t.reguse[t.vstack[i].reg[j]].stackp = i) THEN
+          IF t.reguse[t.vstack[i].reg[j]].stackp # i THEN
             t.Err(where
                 & " i:" & Fmt.Int(i)
                 & " j:" & Fmt.Int(j)
                 & " t.vstack[i].reg[j]:" & RegName[t.vstack[i].reg[j]]
                 & " t.reguse[t.vstack[i].reg[j]].stackp:" & Fmt.Int(t.reguse[t.vstack[i].reg[j]].stackp));
           END;
-          <* ASSERT t.reguse[t.vstack[i].reg[j]].stackp = i *>
         END;
       END;
     END;
@@ -1528,7 +1524,7 @@
           END;
 
           newdest(t, stop1);
-          newdest(t, stop0); (* Is this needed? We did not change the value and we are going to discard it. *)
+          newdest(t, stop0);
         END;
       END;
 
@@ -1649,38 +1645,61 @@
          stack_offset = pos(t, 2, "extract"),
          stack_value = pos(t, 3, "extract"),
          op_mask = t.vstack[stack_mask],
-         (*op_count = t.vstack[stack_count],*)
+         op_count = t.vstack[stack_count],
          op_offset = t.vstack[stack_offset],
          op_value = t.vstack[stack_value] DO
 
+(* T extract(T x, uint32 offset, uint32 count) 
+   {
+     x >>= offset;
+     x &= ~((~(T)0) << count);
+     return x;
+   }
+*)
       IF op_offset.loc = OLoc.imm THEN
         TWordN.And(op_offset.imm, BitCountMask[type], op_offset.imm); (* This should be redundant. *)
       ELSE
         find(t, stack_offset, Force.regset, RegSet {ECX});
       END;
       find(t, stack_value, Force.anyreg);
-(*
-UT __stdcall extract(UT x, uint32 offset, uint32 count) 
-{
-    x >>= offset;
-    x &= ~((~(UT)0) << count);
-    return x;
-}
-*)
       IF op_offset.loc = OLoc.imm THEN
-        t.cg.immOp(Op.oSHR, op_value, op_offset.imm); (* shift by ECX *)
+        t.cg.immOp(Op.oSHR, op_value, op_offset.imm);
       ELSE
         t.cg.unOp(Op.oSHR, op_value); (* shift by ECX *)
       END;
 
+      (* throw out offset; what is the right way?
+       * Without most of this, we save it away
+       * in another register and never use that.
+       * With it we get an assertion failure.
+       *)
+      (*IF op_offset.loc = OLoc.register THEN
+        t.newdest(op_offset);
+        t.dealloc_reg(stack_offset, operandPart := 0);
+        t.vstack[stack_offset].loc := OLoc.imm;
+      END;*)
       unlock(t);
-      find(t, stack_count, Force.regset, RegSet{ECX});
-      find(t, stack_value, Force.anyreg);
+
+      IF op_count.loc = OLoc.imm THEN
+        TWordN.And(op_count.imm, BitCountMask[type], op_count.imm); (* This should be redundant. *)
+      ELSE
+        find(t, stack_count, Force.regset, RegSet {ECX});
+      END;
+
       find(t, stack_mask, Force.anyreg);
-      t.cg.unOp(Op.oSHL, op_mask); (* shift by ECX *)
+
+      IF op_count.loc = OLoc.imm THEN
+        t.cg.immOp(Op.oSHL, op_mask, op_count.imm);
+      ELSE
+        t.cg.unOp(Op.oSHL, op_mask); (* shift by ECX *)
+      END;
+
       t.cg.unOp(Op.oNOT, op_mask);
       t.cg.binOp(Op.oAND, op_value, op_mask);
 
+      newdest(t, op_count);
+      newdest(t, op_offset);
+      newdest(t, op_mask);
       newdest(t, op_value);
       discard(t, 3);
     END;
@@ -1806,165 +1825,154 @@
     END;
   END doextract_mn;
 
-PROCEDURE doinsert (t: T; type: IType): BOOLEAN =
-  VAR maskreg: Regno;  tbl: MVar;
-      int: INTEGER;
-      tint: TIntN.T;
-      uint_type := IntType[UnsignedType[type]];
-      is64 := TypeIs64(type);
-      max := TIntN.T{x := uint_type.max};
+PROCEDURE doinsert (t: T; type: IType) =
+  VAR count: INTEGER;
+      offset: INTEGER;
+      utype := UnsignedType[type];
   BEGIN
 
-    unlock(t);
     WITH stack_count = pos(t, 0, "insert"),
          stack_offset = pos(t, 1, "insert"),
-         stack_from = pos(t, 2, "insert"),
-         stack_to = pos(t, 3, "insert"),
+         op_count = t.vstack[stack_count],
+         op_offset = t.vstack[stack_offset] DO
+
+      IF op_count.loc = OLoc.imm AND op_offset.loc = OLoc.imm THEN
+        IF NOT TIntN.ToHostInteger(op_count.imm, count) THEN
+          Err(t, "doinsert: failed to convert count to host integer");
+        END;
+        IF NOT TIntN.ToHostInteger(op_offset.imm, offset) THEN
+          Err(t, "doinsert: failed to convert offset to host integer");
+        END;
+        discard(t, 2);
+        doinsert_mn(t, type, offset, count);
+        RETURN;
+      END;
+    END;
+
+    t.pushimmT(TIntN.T{x := IntType[utype].max}, utype);
+
+    unlock(t);
+
+    WITH stack_mask = pos(t, 0, "insert"),
+         stack_count = pos(t, 1, "insert"),
+         stack_offset = pos(t, 2, "insert"),
+         stack_from = pos(t, 3, "insert"),
+         stack_to = pos(t, 4, "insert"),
+         op_mask = t.vstack[stack_mask],
          op_count = t.vstack[stack_count],
          op_offset = t.vstack[stack_offset],
          op_from = t.vstack[stack_from],
          op_to = t.vstack[stack_to] DO
 
-      IF is64 AND (op_count.loc # OLoc.imm OR op_offset.loc # OLoc.imm OR op_from.loc # OLoc.imm OR op_to.loc # OLoc.imm) THEN
-        RETURN FALSE;
-      END;
 
-      IF op_count.loc = OLoc.imm THEN
-        discard(t, 1);
-        IF NOT TIntN.ToHostInteger(op_count.imm, int) THEN
-          Err(t, "doinsert: failed to convert to host integer");
-        END;
-        RETURN doinsert_n(t, type, int);
-      END;
+(* T insert(T to, T from, uint32 offset, uint32 count)
+   {
+     T mask = ((~((~(T)0) << count)) << offset);
+     return (to & ~mask) | ((from << offset) & mask);
+   }
+*)
 
       IF op_offset.loc = OLoc.imm THEN
-        TWordN.And(op_offset.imm, BitCountMask[type], op_offset.imm);
+        TWordN.And(op_offset.imm, BitCountMask[type], op_offset.imm); (* shouldn't be needed *)
+      END;
+
+      IF op_count.loc = OLoc.imm THEN
+        TWordN.And(op_count.imm, BitCountMask[type], op_count.imm); (* shouldn't be needed *)
       ELSE
-        find(t, stack_offset, Force.regset, RegSet { ECX });
+        find(t, stack_count, Force.regset, RegSet{ECX});
       END;
 
-      find(t, stack_from, Force.anyreg);
-      find(t, stack_to, Force.anyreg);
-      find(t, stack_count, Force.anyreg);
+      find(t, stack_mask, Force.anyreg);
 
-      maskreg := pickreg(t);
-      corrupt(t, maskreg, operandPart := 0);
+      IF op_count.loc = OLoc.register THEN
+        t.cg.unOp(Op.oSHL, op_mask); (* shift by ECX *)
+      ELSE
+        t.cg.immOp(Op.oSHL, op_mask, op_count.imm);
+      END;
+      t.cg.unOp(Op.oNOT, op_mask);
 
-      ImportLowSet (t, tbl);
-      t.cg.tableOp(Op.oMOV, t.cg.reg[maskreg], op_count, 4, tbl);
-      t.cg.binOp(Op.oAND, op_from, t.cg.reg[maskreg]);
+      (* done with count; I can't figure out how to properly dispose of it.
+       * Without most of this, we save it away
+       * and we end up short on registers and save it to the stack.
+       * With it we get an assertion failure.
+       *)
+      (*IF op_count.loc = OLoc.register THEN
+        t.newdest(op_count);
+        t.dealloc_reg(stack_count, operandPart := 0);
+        t.vstack[stack_count].loc := OLoc.imm;
+      END;*)
+      unlock(t);
+      
+      find(t, stack_offset, Force.regset, RegSet{ECX});
+      find(t, stack_mask, Force.anyreg);
 
-      IF op_offset.loc = OLoc.imm THEN
-        IF TIntN.NE(op_offset.imm, TZero) THEN
-          t.cg.immOp(Op.oSHL, op_from, op_offset.imm);
-          t.cg.immOp(Op.oADD, op_count, op_offset.imm);
-        END
+      IF op_offset.loc = OLoc.register THEN
+        t.cg.unOp(Op.oSHL, op_mask); (* shift by ECX *)
       ELSE
-        t.cg.unOp(Op.oSHL, op_from);
-        t.cg.binOp(Op.oADD, op_count, op_offset);
+        t.cg.immOp(Op.oSHL, op_mask, op_offset.imm);
       END;
 
-      ImportLowSet (t, tbl);
-      t.cg.tableOp(Op.oMOV, t.cg.reg[maskreg], op_count, 4, tbl);
-
-      IF op_offset.loc = OLoc.imm THEN
-        IF NOT TIntN.ToHostInteger(op_offset.imm, int) THEN
-          Err(t, "failed to convert op_offset.imm to host integer");
-        END;
-        TWordN.Shift(max, int, tint);
-        t.cg.immOp(Op.oXOR, t.cg.reg[maskreg], tint);
+      find(t, stack_from, Force.anyreg);
+      IF op_offset.loc = OLoc.register THEN
+        t.cg.unOp(Op.oSHL, op_from); (* shift by ECX *)
       ELSE
-        ImportHighSet (t, tbl);
-        t.cg.tableOp(Op.oXOR, t.cg.reg[maskreg], op_offset, 4, tbl);
+        t.cg.immOp(Op.oSHL, op_from, op_offset.imm);
       END;
 
-      t.cg.binOp(Op.oAND, op_to, t.cg.reg[maskreg]);
+      (* done with offset; I can't figure out how to properly dispose of it.
+       * Without most of this, we save it away
+       * and we end up short on registers and save it to the stack.
+       * With it we get an assertion failure.
+       *)
+      (*IF op_offset.loc = OLoc.register THEN
+        t.newdest(op_offset);
+        t.dealloc_reg(stack_offset, operandPart := 0);
+        t.vstack[stack_offset].loc := OLoc.imm;
+      END;*)
+      unlock(t);
+
+      find(t, stack_from, Force.anyreg);
+      find(t, stack_mask, Force.anyreg);
+
+      t.cg.binOp(Op.oAND, op_from, op_mask);
+      t.cg.unOp(Op.oNOT, op_mask);
+      find(t, stack_to, Force.anyreg);
+      t.cg.binOp(Op.oAND, op_to, op_mask);
       t.cg.binOp(Op.oOR, op_to, op_from);
 
       newdest(t, op_count);
-      newdest(t, op_from);
+      newdest(t, op_offset);
       newdest(t, op_to);
-      discard(t, 3);
+      newdest(t, op_from);
+      newdest(t, op_mask);
+      discard(t, 4);
     END;
-
-    RETURN TRUE;
   END doinsert;
 
-PROCEDURE doinsert_n (t: T; type: IType; count: INTEGER): BOOLEAN =
-  VAR tbl: MVar;  maskreg: Regno;
-      offset: INTEGER;
-      tint: TIntN.T;
-      is64 := TypeIs64(type);
-      uint_type := IntType[UnsignedType[type]];
-      typeBitSize := uint_type.size;
-      max := TIntN.T{x := uint_type.max};
+PROCEDURE doinsert_n (t: T; type: IType; count: INTEGER) =
+  VAR offset: INTEGER;
   BEGIN
-
-    unlock(t);
     WITH stack_offset = pos(t, 0, "insert"),
-         stack_from = pos(t, 1, "insert"),
-         stack_to = pos(t, 2, "insert"),
-         op_offset = t.vstack[stack_offset],
-         op_from = t.vstack[stack_from],
-         op_to = t.vstack[stack_to] DO
-
-      IF is64 AND (op_offset.loc # OLoc.imm OR op_from.loc # OLoc.imm OR op_to.loc # OLoc.imm) THEN
-        RETURN FALSE;
-      END;
+         op_offset = t.vstack[stack_offset] DO
 
       IF op_offset.loc = OLoc.imm THEN
-        discard(t, 1);
         IF NOT TIntN.ToHostInteger(op_offset.imm, offset) THEN
           Err(t, "doinsert_n: failed to convert to host integer");
         END;
-        RETURN doinsert_mn(t, type, offset, count);
-      END;
-
-      <* ASSERT NOT is64 *>
-
-      find(t, stack_offset, Force.regset, RegSet { ECX });
-      find(t, stack_to, Force.anyreg);
-      find(t, stack_from, Force.anyreg);
-
-      maskreg := pickreg(t);
-      corrupt(t, maskreg, operandPart := 0);
-
-      <* ASSERT NOT is64 *>
-
-      IF count # typeBitSize THEN
-        TWordN.Shift(max, count - typeBitSize, tint);
-        t.cg.immOp(Op.oAND, op_from, tint);
+        discard(t, 1);
+        doinsert_mn(t, type, offset, count);
+        RETURN;
       END;
-
-      t.cg.unOp(Op.oSHL, op_from);
-
-(****
-      intable := t.lowset_table;
-      INC(intable.o, Word.Shift(count*4, 16));
-      t.cg.tableOp(Op.oMOV, t.cg.reg[maskreg], op_offset, 4, intable);
-      t.cg.tableOp(Op.oXOR, t.cg.reg[maskreg], op_offset, 4, t.highset_table);
-****)
-      ImportLowSet(t, tbl);
-      t.cg.tableOp(Op.oMOV, t.cg.reg[maskreg], op_offset, 4, tbl);
-      ImportHighSet(t, tbl);
-      INC(tbl.mvar_offset, count*4);
-      t.cg.tableOp(Op.oXOR, t.cg.reg[maskreg], op_offset, 4, tbl);
-
-      t.cg.binOp(Op.oAND, op_to, t.cg.reg[maskreg]);
-      t.cg.binOp(Op.oOR, op_to, op_from);
-
-      newdest(t, op_from);
-      newdest(t, op_to);
-      discard(t, 2);
     END;
 
-    RETURN TRUE;
+    t.pushimmI(count, UnsignedType[type]);
+    t.doinsert(type);
+
   END doinsert_n;
   
-PROCEDURE doinsert_mn (t: T; type: IType; offset, count: INTEGER): BOOLEAN =
+PROCEDURE doinsert_mn (t: T; type: IType; offset, count: INTEGER) =
   VAR tint_m, mask_m, mask_m_n, mask: TIntN.T;
       uint_type := IntType[UnsignedType[type]];
-      is64 := TypeIs64(type);
       max := TIntN.T{x := uint_type.max};
       typeBitSize := uint_type.size;
   BEGIN
@@ -1975,17 +1983,6 @@
          op_from = t.vstack[stack_from],
          op_to = t.vstack[stack_to] DO
 
-      (* This check should be removed;
-       * It is ok though, it is here because
-       * I haven't implemented some optimizations.
-       * RETURN FALSE just means "generate a function
-       * call instead of inline code".
-       *)
-
-      IF is64 AND (op_from.loc # OLoc.imm OR op_to.loc # OLoc.imm) THEN
-        RETURN FALSE;
-      END;
-
       find(t, stack_to, Force.any);
       find(t, stack_from, Force.anyregimm);
 
@@ -2042,8 +2039,6 @@
       newdest(t, op_to);
       discard(t, 1);
     END;
-
-    RETURN TRUE;
   END doinsert_mn;
 
 PROCEDURE swap (t: T) =
@@ -2212,6 +2207,7 @@
          stack1 = pos(t, 1, "doindex_address"),
          stop0 = t.vstack[stack0],
          stop1 = t.vstack[stack1] DO
+
       find(t, stack0, Force.any);
       find(t, stack1, Force.anyreg, AllRegisters, TRUE);
 
@@ -2548,36 +2544,8 @@
 
     t.rmode := ARRAY FlToInt OF TIntN.T
       { TZero, TIntN.x0400, TIntN.x0800, TIntN.x0F00 };
-    t.lowset_table := NIL;
-    t.highset_table := NIL;
   END init;
 
-PROCEDURE ImportLowSet (t: T;  VAR(*OUT*)tbl: MVar) =
-  BEGIN
-    IF (t.lowset_table = NIL) THEN
-      t.lowset_table := ImportBitmaskTable (t, "_lowbits");
-    END;
-    tbl.var := t.lowset_table;
-    tbl.mvar_offset   := 0;
-    tbl.mvar_type := Type.Int32;
-  END ImportLowSet;
-
-PROCEDURE ImportHighSet (t: T;  VAR(*OUT*)tbl: MVar) =
-  BEGIN
-    IF (t.highset_table = NIL) THEN
-      t.highset_table := ImportBitmaskTable (t, "_highbits");
-    END;
-    tbl.var := t.highset_table;
-    tbl.mvar_offset   := 0;
-    tbl.mvar_type := Type.Int32;
-  END ImportHighSet;
-
-PROCEDURE ImportBitmaskTable (t: T;  nm: TEXT): x86Var =
-  BEGIN
-    RETURN t.parent.import_global (M3ID.Add (nm), 33 * 4 (*byte size*),
-               4 (*align*), Type.Struct, 0 (*typeuid*));
-  END ImportBitmaskTable;
-
 PROCEDURE end (<*UNUSED*> t: T) =
   BEGIN
   END end;