[Mono-devel-list] My useless optimization.
Miguel de Icaza
miguel at novell.com
Sun Mar 28 13:33:49 EST 2004
Hey guys,
I was looking at some of our code generated on x86, and I figured
`Ah, this could be improved', in particular I noticed that arithmetic
operations always operated on registers, but it was possible for them to
directly use memory locations, so replacing patterns like:
mov var,%eax
mov var2,%ecx
add %ecx,%eax
With:
mov var,%eax
add var,%eax
Was the goal of my patch.
The good news is that this reduces the code size by two bytes on
each add/sub/mul. The bad news is that it does not seem to affect
performance very much. I also felt that this might relieve the pressure
on the register allocator.
Also, turns out that Mono's -O=all makes most of my micro-tests not
even hit this improvement. The code generated with -O=all is just much
better than our default (it shortens the size of most routines by half).
In my Pentium M I can honestly not see a difference in performance
that this makes. But that is probably a Pentium M/4-ism. Am wondering
if people could try this patch, and let me know if it makes any
difference on your systems.
To enable this optimization, try:
mono --x
(I know, it is lame, but its my working patch).
-------------- next part --------------
? .mini-x86.c.swp
? .x.swp
? M.cs
? MM.cs
? a.c
? a.cs
? a.out
? a.s
? b.cs
? bx
? c
? core.18893
? cs
? d
? m.c
? m.cs
? m.s
? mcslog
? mcslog2
? n.s
? t.cs
? x
? xbad
? xgood
? y
? z
Index: cpu-pentium.md
===================================================================
RCS file: /cvs/public/mono/mono/mini/cpu-pentium.md,v
retrieving revision 1.22
diff -u -u -r1.22 cpu-pentium.md
--- cpu-pentium.md 20 Mar 2004 17:54:20 -0000 1.22
+++ cpu-pentium.md 28 Mar 2004 17:52:59 -0000
@@ -19,7 +19,11 @@
# d EDX register
#
# len:number describe the maximun length in bytes of the instruction
-# number is a positive integer
+# number is a positive integer. If the length is not specified
+# it defaults to zero. But lengths are only checked if the given opcode
+# is encountered during compilation. Some opcodes, like CONV_U4 are
+# transformed into other opcodes in the brg files, so they do not show up
+# during code generation.
#
# cost:number describe how many cycles are needed to complete the instruction (unused)
#
@@ -537,6 +541,10 @@
x86_fp_load_i8: dest:f src1:b len:7
x86_fp_load_i4: dest:f src1:b len:7
x86_seteq_membase: src1:b len:7
+x86_add_membase: dest:i src1:i src2:b clob:1 len:11
+x86_sub_membase: dest:i src1:i src2:b clob:1 len:11
+x86_mul_membase: dest:i src1:i src2:b clob:1 len:13
+x86_div_membase: dest:a src1:i src2:b clob:1 len:26
adc: dest:i src1:i src2:i len:2 clob:1
addcc: dest:i src1:i src2:i len:2 clob:1
subcc: dest:i src1:i src2:i len:2 clob:1
Index: driver.c
===================================================================
RCS file: /cvs/public/mono/mono/mini/driver.c,v
retrieving revision 1.45
diff -u -u -r1.45 driver.c
--- driver.c 20 Mar 2004 22:02:47 -0000 1.45
+++ driver.c 28 Mar 2004 17:52:59 -0000
@@ -88,6 +88,8 @@
MONO_OPT_INTRINS | \
MONO_OPT_AOT)
+int enable_stack = 0;
+
static guint32
parse_optimizations (const char* p)
{
@@ -670,6 +672,8 @@
mono_print_vtable = TRUE;
} else if (strcmp (argv [i], "--stats") == 0) {
mono_jit_stats.enabled = TRUE;
+ } else if (strcmp (argv [i], "--x") == 0){
+ enable_stack = 1;
} else if (strcmp (argv [i], "--aot") == 0) {
mono_compile_aot = TRUE;
} else if (strcmp (argv [i], "--compile-all") == 0) {
Index: inssel-x86.brg
===================================================================
RCS file: /cvs/public/mono/mono/mini/inssel-x86.brg,v
retrieving revision 1.17
diff -u -u -r1.17 inssel-x86.brg
--- inssel-x86.brg 17 Feb 2004 17:15:14 -0000 1.17
+++ inssel-x86.brg 28 Mar 2004 17:52:59 -0000
@@ -584,4 +584,52 @@
/* fixme: nothing to do ??*/
}
+reg: CEE_ADD(reg, CEE_LDIND_I4 (base)) {
+ MonoInst *base = state->right->left->tree;
+
+ tree->dreg = state->reg1;
+ tree->sreg1 = state->left->reg1;
+ tree->sreg2 = base->inst_basereg;
+ tree->inst_offset = base->inst_offset;
+ tree->opcode = OP_X86_ADD_MEMBASE;
+ mono_bblock_add_inst (s->cbb, tree);
+} cost {
+ if (enable_stack){
+ return 1;
+ } else
+ return 10000000;
+}
+
+reg: CEE_SUB(reg, CEE_LDIND_I4 (base)) {
+ MonoInst *base = state->right->left->tree;
+
+ tree->dreg = state->reg1;
+ tree->sreg1 = state->left->reg1;
+ tree->sreg2 = base->inst_basereg;
+ tree->inst_offset = base->inst_offset;
+ tree->opcode = OP_X86_SUB_MEMBASE;
+ mono_bblock_add_inst (s->cbb, tree);
+} cost {
+ if (enable_stack){
+ return 1;
+ } else
+ return 10000000;
+}
+
+reg: CEE_MUL(reg, CEE_LDIND_I4 (base)) {
+ MonoInst *base = state->right->left->tree;
+
+ tree->dreg = state->reg1;
+ tree->sreg1 = state->left->reg1;
+ tree->sreg2 = base->inst_basereg;
+ tree->inst_offset = base->inst_offset;
+ tree->opcode = OP_X86_MUL_MEMBASE;
+ mono_bblock_add_inst (s->cbb, tree);
+} cost {
+ if (enable_stack){
+ return 1;
+ } else
+ return 10000000;
+}
+
%%
Index: inssel.brg
===================================================================
RCS file: /cvs/public/mono/mono/mini/inssel.brg,v
retrieving revision 1.36
diff -u -u -r1.36 inssel.brg
--- inssel.brg 26 Mar 2004 15:24:16 -0000 1.36
+++ inssel.brg 28 Mar 2004 17:53:00 -0000
@@ -8,7 +8,7 @@
* (C) 2002 Ximian, Inc.
*
*/
-
+extern int enable_stack;
#include <config.h>
#include <string.h>
Index: mini-ops.h
===================================================================
RCS file: /cvs/public/mono/mono/mini/mini-ops.h,v
retrieving revision 1.28
diff -u -u -r1.28 mini-ops.h
--- mini-ops.h 26 Mar 2004 15:24:16 -0000 1.28
+++ mini-ops.h 28 Mar 2004 17:53:00 -0000
@@ -4,14 +4,14 @@
MINI_OP(OP_STORE, "store")
MINI_OP(OP_OBJADDR, "objaddr")
MINI_OP(OP_VTADDR, "vtaddr")
-MINI_OP(OP_PHI, "phi")
+MINI_OP(OP_PHI, "phi")
MINI_OP(OP_RENAME, "rename")
MINI_OP(OP_COMPARE, "compare")
MINI_OP(OP_COMPARE_IMM, "compare_imm")
MINI_OP(OP_FCOMPARE, "fcompare")
MINI_OP(OP_LCOMPARE, "lcompare")
MINI_OP(OP_LOCAL, "local")
-MINI_OP(OP_ARG, "arg")
+MINI_OP(OP_ARG, "arg")
MINI_OP(OP_ARGLIST, "oparglist")
MINI_OP(OP_OUTARG, "outarg")
MINI_OP(OP_OUTARG_IMM, "outarg_imm")
@@ -344,7 +344,11 @@
MINI_OP(OP_X86_FP_LOAD_I8, "x86_fp_load_i8")
MINI_OP(OP_X86_FP_LOAD_I4, "x86_fp_load_i4")
MINI_OP(OP_X86_SETEQ_MEMBASE, "x86_seteq_membase")
-
+MINI_OP(OP_X86_ADD_MEMBASE, "x86_add_membase")
+MINI_OP(OP_X86_SUB_MEMBASE, "x86_sub_membase")
+MINI_OP(OP_X86_MUL_MEMBASE, "x86_mul_membase")
+MINI_OP(OP_X86_DIV_MEMBASE, "x86_div_membase")
+
MINI_OP(OP_PPC_SUBFIC, "ppc_subfic")
MINI_OP(OP_PPC_SUBFZE, "ppc_subfze")
Index: mini-x86.c
===================================================================
RCS file: /cvs/public/mono/mono/mini/mini-x86.c,v
retrieving revision 1.80
diff -u -u -r1.80 mini-x86.c
--- mini-x86.c 28 Mar 2004 17:16:54 -0000 1.80
+++ mini-x86.c 28 Mar 2004 17:53:00 -0000
@@ -1308,6 +1308,7 @@
/* forward pass on the instructions to collect register liveness info */
while (ins) {
spec = ins_spec [ins->opcode];
+
DEBUG (print_ins (i, ins));
if (spec [MONO_INST_SRC1]) {
@@ -2086,9 +2087,15 @@
case OP_X86_ADD_MEMBASE_IMM:
x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
break;
+ case OP_X86_ADD_MEMBASE:
+ x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
+ break;
case OP_X86_SUB_MEMBASE_IMM:
x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
break;
+ case OP_X86_SUB_MEMBASE:
+ x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
+ break;
case OP_X86_INC_MEMBASE:
x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
break;
@@ -2100,6 +2107,9 @@
break;
case OP_X86_DEC_REG:
x86_dec_reg (code, ins->dreg);
+ break;
+ case OP_X86_MUL_MEMBASE:
+ x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
break;
case CEE_BREAK:
x86_breakpoint (code);
More information about the Mono-devel-list
mailing list