[Mono-devel-list] My useless optimization.

Miguel de Icaza miguel at novell.com
Sun Mar 28 13:33:49 EST 2004


Hey guys,

    I was looking at some of our code generated on x86, and I figured
`Ah, this could be improved', in particular I noticed that arithmetic
operations always operated on registers, but it was possible for them to
directly use memory locations, so replacing patterns like:

	mov var,%eax
	mov var2,%ecx
	add %ecx,%eax

With:

	mov var,%eax
	add var,%eax

    Was the goal of my patch.

    The good news is that this reduces the code size by two bytes on
each add/sub/mul.  The bad news is that it does not seem to affect
performance very much.  I also felt that this might relieve the pressure
on the register allocator.

    Also, turns out that Mono's -O=all makes most of my micro-tests not
even hit this improvement.  The code generated with -O=all is just much
better than our default (it shortens the size of most routines by half).

    In my Pentium M I can honestly not see a difference in performance
that this makes.  But that is probably a Pentium M/4-ism.  Am wondering
if people could try this patch, and let me know if it makes any
difference on your systems.

    To enable this optimization, try:

	mono --x

    (I know, it is lame, but its my working patch).


-------------- next part --------------
? .mini-x86.c.swp
? .x.swp
? M.cs
? MM.cs
? a.c
? a.cs
? a.out
? a.s
? b.cs
? bx
? c
? core.18893
? cs
? d
? m.c
? m.cs
? m.s
? mcslog
? mcslog2
? n.s
? t.cs
? x
? xbad
? xgood
? y
? z
Index: cpu-pentium.md
===================================================================
RCS file: /cvs/public/mono/mono/mini/cpu-pentium.md,v
retrieving revision 1.22
diff -u -u -r1.22 cpu-pentium.md
--- cpu-pentium.md	20 Mar 2004 17:54:20 -0000	1.22
+++ cpu-pentium.md	28 Mar 2004 17:52:59 -0000
@@ -19,7 +19,11 @@
 #       d  EDX register
 #
 # len:number         describe the maximun length in bytes of the instruction
-# number is a positive integer
+# 		     number is a positive integer.  If the length is not specified
+#                    it defaults to zero.   But lengths are only checked if the given opcode 
+#                    is encountered during compilation. Some opcodes, like CONV_U4 are 
+#                    transformed into other opcodes in the brg files, so they do not show up 
+#                    during code generation.
 #
 # cost:number        describe how many cycles are needed to complete the instruction (unused)
 #
@@ -537,6 +541,10 @@
 x86_fp_load_i8: dest:f src1:b len:7
 x86_fp_load_i4: dest:f src1:b len:7
 x86_seteq_membase: src1:b len:7
+x86_add_membase: dest:i src1:i src2:b clob:1 len:11
+x86_sub_membase: dest:i src1:i src2:b clob:1 len:11
+x86_mul_membase: dest:i src1:i src2:b clob:1 len:13
+x86_div_membase: dest:a src1:i src2:b clob:1 len:26
 adc: dest:i src1:i src2:i len:2 clob:1
 addcc: dest:i src1:i src2:i len:2 clob:1
 subcc: dest:i src1:i src2:i len:2 clob:1
Index: driver.c
===================================================================
RCS file: /cvs/public/mono/mono/mini/driver.c,v
retrieving revision 1.45
diff -u -u -r1.45 driver.c
--- driver.c	20 Mar 2004 22:02:47 -0000	1.45
+++ driver.c	28 Mar 2004 17:52:59 -0000
@@ -88,6 +88,8 @@
 	MONO_OPT_INTRINS |  \
     MONO_OPT_AOT)
 
+int enable_stack = 0;
+
 static guint32
 parse_optimizations (const char* p)
 {
@@ -670,6 +672,8 @@
 			mono_print_vtable = TRUE;
 		} else if (strcmp (argv [i], "--stats") == 0) {
 			mono_jit_stats.enabled = TRUE;
+		} else if (strcmp (argv [i], "--x") == 0){
+			enable_stack = 1;
 		} else if (strcmp (argv [i], "--aot") == 0) {
 			mono_compile_aot = TRUE;
 		} else if (strcmp (argv [i], "--compile-all") == 0) {
Index: inssel-x86.brg
===================================================================
RCS file: /cvs/public/mono/mono/mini/inssel-x86.brg,v
retrieving revision 1.17
diff -u -u -r1.17 inssel-x86.brg
--- inssel-x86.brg	17 Feb 2004 17:15:14 -0000	1.17
+++ inssel-x86.brg	28 Mar 2004 17:52:59 -0000
@@ -584,4 +584,52 @@
 	/* fixme: nothing to do ??*/
 }
 
+reg: CEE_ADD(reg, CEE_LDIND_I4 (base)) {
+	MonoInst *base = state->right->left->tree;
+
+	tree->dreg = state->reg1;
+	tree->sreg1 = state->left->reg1;
+	tree->sreg2 = base->inst_basereg; 
+	tree->inst_offset = base->inst_offset; 
+	tree->opcode = OP_X86_ADD_MEMBASE; 
+	mono_bblock_add_inst (s->cbb, tree);
+} cost {
+	if (enable_stack){
+		return 1;
+	} else 
+		return 10000000;
+}
+
+reg: CEE_SUB(reg, CEE_LDIND_I4 (base)) {
+	MonoInst *base = state->right->left->tree;
+
+	tree->dreg = state->reg1;
+	tree->sreg1 = state->left->reg1;
+	tree->sreg2 = base->inst_basereg; 
+	tree->inst_offset = base->inst_offset; 
+	tree->opcode = OP_X86_SUB_MEMBASE; 
+	mono_bblock_add_inst (s->cbb, tree);
+} cost {
+	if (enable_stack){
+		return 1;
+	} else 
+		return 10000000;
+}
+
+reg: CEE_MUL(reg, CEE_LDIND_I4 (base)) {
+	MonoInst *base = state->right->left->tree;
+
+	tree->dreg = state->reg1;
+	tree->sreg1 = state->left->reg1;
+	tree->sreg2 = base->inst_basereg; 
+	tree->inst_offset = base->inst_offset; 
+	tree->opcode = OP_X86_MUL_MEMBASE; 
+	mono_bblock_add_inst (s->cbb, tree);
+} cost {
+	if (enable_stack){
+		return 1;
+	} else 
+		return 10000000;
+}
+
 %%
Index: inssel.brg
===================================================================
RCS file: /cvs/public/mono/mono/mini/inssel.brg,v
retrieving revision 1.36
diff -u -u -r1.36 inssel.brg
--- inssel.brg	26 Mar 2004 15:24:16 -0000	1.36
+++ inssel.brg	28 Mar 2004 17:53:00 -0000
@@ -8,7 +8,7 @@
  * (C) 2002 Ximian, Inc.
  *
  */
-
+extern int enable_stack;
 #include <config.h>
 #include <string.h>
 
Index: mini-ops.h
===================================================================
RCS file: /cvs/public/mono/mono/mini/mini-ops.h,v
retrieving revision 1.28
diff -u -u -r1.28 mini-ops.h
--- mini-ops.h	26 Mar 2004 15:24:16 -0000	1.28
+++ mini-ops.h	28 Mar 2004 17:53:00 -0000
@@ -4,14 +4,14 @@
 MINI_OP(OP_STORE,	"store")
 MINI_OP(OP_OBJADDR,	"objaddr")
 MINI_OP(OP_VTADDR,	"vtaddr")
-MINI_OP(OP_PHI,	"phi")
+MINI_OP(OP_PHI,		"phi")
 MINI_OP(OP_RENAME,	"rename")
 MINI_OP(OP_COMPARE,	"compare")
 MINI_OP(OP_COMPARE_IMM,	"compare_imm")
 MINI_OP(OP_FCOMPARE,	"fcompare")
 MINI_OP(OP_LCOMPARE,	"lcompare")
 MINI_OP(OP_LOCAL,	"local")
-MINI_OP(OP_ARG,	"arg")
+MINI_OP(OP_ARG,		"arg")
 MINI_OP(OP_ARGLIST,	"oparglist")
 MINI_OP(OP_OUTARG,	"outarg")
 MINI_OP(OP_OUTARG_IMM,	"outarg_imm")
@@ -344,7 +344,11 @@
 MINI_OP(OP_X86_FP_LOAD_I8,         "x86_fp_load_i8")
 MINI_OP(OP_X86_FP_LOAD_I4,         "x86_fp_load_i4")
 MINI_OP(OP_X86_SETEQ_MEMBASE,      "x86_seteq_membase")
-
+MINI_OP(OP_X86_ADD_MEMBASE,        "x86_add_membase")
+MINI_OP(OP_X86_SUB_MEMBASE,        "x86_sub_membase")
+MINI_OP(OP_X86_MUL_MEMBASE,        "x86_mul_membase")
+MINI_OP(OP_X86_DIV_MEMBASE,        "x86_div_membase")
+	
 MINI_OP(OP_PPC_SUBFIC,             "ppc_subfic")
 MINI_OP(OP_PPC_SUBFZE,             "ppc_subfze")
 
Index: mini-x86.c
===================================================================
RCS file: /cvs/public/mono/mono/mini/mini-x86.c,v
retrieving revision 1.80
diff -u -u -r1.80 mini-x86.c
--- mini-x86.c	28 Mar 2004 17:16:54 -0000	1.80
+++ mini-x86.c	28 Mar 2004 17:53:00 -0000
@@ -1308,6 +1308,7 @@
 	/* forward pass on the instructions to collect register liveness info */
 	while (ins) {
 		spec = ins_spec [ins->opcode];
+		
 		DEBUG (print_ins (i, ins));
 
 		if (spec [MONO_INST_SRC1]) {
@@ -2086,9 +2087,15 @@
 		case OP_X86_ADD_MEMBASE_IMM:
 			x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
 			break;
+		case OP_X86_ADD_MEMBASE:
+			x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
+			break;
 		case OP_X86_SUB_MEMBASE_IMM:
 			x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
 			break;
+		case OP_X86_SUB_MEMBASE:
+			x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
+			break;
 		case OP_X86_INC_MEMBASE:
 			x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
 			break;
@@ -2100,6 +2107,9 @@
 			break;
 		case OP_X86_DEC_REG:
 			x86_dec_reg (code, ins->dreg);
+			break;
+		case OP_X86_MUL_MEMBASE:
+			x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
 			break;
 		case CEE_BREAK:
 			x86_breakpoint (code);


More information about the Mono-devel-list mailing list