[Mono-dev] [PATCH] FP convert optimization based on AuxV info
Steven Munroe
munroesj at linux.vnet.ibm.com
Mon Mar 16 15:13:33 EDT 2009
This patch adds optimization to for FP/int conversions available to
later versions of the PowerISA. This requires the
ppc-part4-opt-20090303.txt patch as a prereq.
-------------- next part --------------
An embedded message was scrubbed...
From: Steven Munroe <munroesj at us.ibm.com>
Subject: [PATCH] Update AuxV memcpy and icbi optimizations
Date: Thu, 05 Mar 2009 21:03:16 -0600
Size: 18558
Url: http://lists.ximian.com/pipermail/mono-devel-list/attachments/20090316/9ba99ce9/attachment-0001.mht
-------------- next part --------------
2009-03-15 Steven Munroe <munroesj at us.ibm.com>
This patch is contributed under the terms of the MIT/X11 license
* arch/ppc/ppc-codegen.h (ppc_fcfidx, ppc_fctidx, ppc_fctidzx):
Share with PPC32. These instructions are availble to 32-bit
programs on 64-bit hardware and both starting with
PowerISA V2.01.
[__mono_ppc64__]: Define ppc_mftgpr and ppc_mffgpr for Power6
native mode.
* mini-ppc.c: Define HAS_MOVE_FPR_GPR and HAS_64BIT_ISA.
(mono_arch_decompose_opts): Make OP_ICONV_TO_R4 and
OP_ICONV_TO_R8 decompose conditional on !HAS_64BIT_ISA.
(mono_arch_output_basic_block) [__mono_ppc64__]: Replace
store/load sequence with mffgpr if HAS_MOVE_FPR_GPR id true.
(mono_arch_output_basic_block) [!__mono_ppc64__]: For
OP_ICONV_TO_R4 or OP_ICONV_TO_R8 and HAS_64BIT_ISA use fcfid
to convert.
diff -urN mono-svn-base/mono/mono/arch/ppc/ppc-codegen.h mono-svn/mono/mono/arch/ppc/ppc-codegen.h
--- mono-svn-base/mono/mono/arch/ppc/ppc-codegen.h 2009-03-05 18:39:48.000000000 -0600
+++ mono-svn/mono/mono/arch/ppc/ppc-codegen.h 2009-03-07 15:31:30.000000000 -0600
@@ -706,6 +706,23 @@
/* PPC64 */
+/* The following FP instructions are not are available to 32-bit
+ implementations (prior to PowerISA-V2.01 but are available to
+ 32-bit mode programs on 64-bit PowerPC implementations and all
+ processors compliant with PowerISA-2.01 or later. */
+
+#define ppc_fcfidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (846 << 1) | (Rc))
+#define ppc_fcfid(c,D,B) ppc_fcfidx(c,D,B,0)
+#define ppc_fcfidd(c,D,B) ppc_fcfidx(c,D,B,1)
+
+#define ppc_fctidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (814 << 1) | (Rc))
+#define ppc_fctid(c,D,B) ppc_fctidx(c,D,B,0)
+#define ppc_fctidd(c,D,B) ppc_fctidx(c,D,B,1)
+
+#define ppc_fctidzx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (815 << 1) | (Rc))
+#define ppc_fctidz(c,D,B) ppc_fctidzx(c,D,B,0)
+#define ppc_fctidzd(c,D,B) ppc_fctidzx(c,D,B,1)
+
#ifdef __mono_ppc64__
#define ppc_load_sequence(c,D,v) G_STMT_START { \
@@ -800,17 +817,14 @@
#define ppc_extsw(c,A,S) ppc_extswx(c,S,A,0)
#define ppc_extswd(c,A,S) ppc_extswx(c,S,A,1)
-#define ppc_fcfidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (846 << 1) | (Rc))
-#define ppc_fcfid(c,D,B) ppc_fcfidx(c,D,B,0)
-#define ppc_fcfidd(c,D,B) ppc_fcfidx(c,D,B,1)
-
-#define ppc_fctidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (814 << 1) | (Rc))
-#define ppc_fctid(c,D,B) ppc_fctidx(c,D,B,0)
-#define ppc_fctidd(c,D,B) ppc_fctidx(c,D,B,1)
-
-#define ppc_fctidzx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (815 << 1) | (Rc))
-#define ppc_fctidz(c,D,B) ppc_fctidzx(c,D,B,0)
-#define ppc_fctidzd(c,D,B) ppc_fctidzx(c,D,B,1)
+/* These move float to/from instuctions are only available on POWER6 in
+ native mode. These instruction are faster then the equivalent
+ store/load because they avoid the store queue and associated delays.
+ These instructions should only be used in 64-bit mode unless the
+ kernel preserves the 64-bit GPR on signals and dispatch in 32-bit
+ mode. The Linux kernel does not. */
+#define ppc_mftgpr(c,T,B) ppc_emit32(c, (31 << 26) | ((T) << 21) | (0 << 16) | ((B) << 11) | (735 << 1) | 0)
+#define ppc_mffgpr(c,T,B) ppc_emit32(c, (31 << 26) | ((T) << 21) | (0 << 16) | ((B) << 11) | (607 << 1) | 0)
#define ppc_ld(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((guint32)(ds) & 0xfffc) | 0)
#define ppc_lwa(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((ds) & 0xfffc) | 2)
diff -urN mono-svn-base/mono/mono/mini/mini-ppc.c mono-svn/mono/mono/mini/mini-ppc.c
--- mono-svn-base/mono/mono/mini/mini-ppc.c 2009-03-08 21:42:54.000000000 -0500
+++ mono-svn/mono/mono/mini/mini-ppc.c 2009-03-08 21:26:17.000000000 -0500
@@ -293,6 +293,19 @@
#define HAS_ICACHE_SNOOP 0
#endif
+#ifdef PPC_FEATURE_POWER6_EXT
+#define HAS_MOVE_FPR_GPR (linux_ppc_hwcap & PPC_FEATURE_POWER6_EXT)
+#else
+#define HAS_MOVE_FPR_GPR 0
+#endif
+
+#ifdef PPC_FEATURE_64
+#define HAS_64BIT_ISA (linux_ppc_hwcap & PPC_FEATURE_64)
+#else
+#define HAS_64BIT_ISA 0
+#endif
+
+
static int
linux_init_ppc_SMP(void)
{
@@ -2250,28 +2263,34 @@
#ifndef __mono_ppc64__
case OP_ICONV_TO_R4:
case OP_ICONV_TO_R8: {
- /* FIXME: change precision for CEE_CONV_R4 */
- static const guint64 adjust_val = 0x4330000080000000ULL;
- int msw_reg = mono_alloc_ireg (cfg);
- int xored = mono_alloc_ireg (cfg);
- int adj_reg = mono_alloc_freg (cfg);
- int tmp_reg = mono_alloc_freg (cfg);
- int basereg = ppc_sp;
- int offset = -8;
- if (!ppc_is_imm16 (offset + 4)) {
- basereg = mono_alloc_ireg (cfg);
- MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset);
+ /* If we have a PPC_FEATURE_64 machine we can avoid
+ this and use the fcfid instruction. Otherwise
+ on an old 32-bit chip and we have to do this the
+ hard way. */
+ if (!HAS_64BIT_ISA) {
+ /* FIXME: change precision for CEE_CONV_R4 */
+ static const guint64 adjust_val = 0x4330000080000000ULL;
+ int msw_reg = mono_alloc_ireg (cfg);
+ int xored = mono_alloc_ireg (cfg);
+ int adj_reg = mono_alloc_freg (cfg);
+ int tmp_reg = mono_alloc_freg (cfg);
+ int basereg = ppc_sp;
+ int offset = -8;
+ if (!ppc_is_imm16 (offset + 4)) {
+ basereg = mono_alloc_ireg (cfg);
+ MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset);
+ }
+ MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000);
+ MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg);
+ MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000);
+ MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, xored);
+ MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val);
+ MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset);
+ MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
+ if (ins->opcode == OP_ICONV_TO_R4)
+ MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg);
+ ins->opcode = OP_NOP;
}
- MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000);
- MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg);
- MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000);
- MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, xored);
- MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val);
- MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset);
- MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
- if (ins->opcode == OP_ICONV_TO_R4)
- MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg);
- ins->opcode = OP_NOP;
break;
}
#endif
@@ -4361,8 +4380,12 @@
} else {
tmp = ins->sreg1;
}
- ppc_store_reg (code, tmp, -8, ppc_r1);
- ppc_lfd (code, ins->dreg, -8, ppc_r1);
+ if (HAS_MOVE_FPR_GPR) {
+ ppc_mffgpr (code, ins->dreg, tmp);
+ } else {
+ ppc_store_reg (code, tmp, -8, ppc_r1);
+ ppc_lfd (code, ins->dreg, -8, ppc_r1);
+ }
ppc_fcfid (code, ins->dreg, ins->dreg);
if (ins->opcode == OP_ICONV_TO_R4 || ins->opcode == OP_LCONV_TO_R4)
ppc_frsp (code, ins->dreg, ins->dreg);
@@ -4443,6 +4466,20 @@
ppc_mr (code, ins->dreg, ppc_r0);
break;
}
+#else
+ case OP_ICONV_TO_R4:
+ case OP_ICONV_TO_R8: {
+ if (HAS_64BIT_ISA) {
+ ppc_srawi(code, ppc_r0, ins->sreg1, 31);
+ ppc_store_reg (code, ppc_r0, -8, ppc_r1);
+ ppc_store_reg (code, ins->sreg1, -4, ppc_r1);
+ ppc_lfd (code, ins->dreg, -8, ppc_r1);
+ ppc_fcfid (code, ins->dreg, ins->dreg);
+ if (ins->opcode == OP_ICONV_TO_R4)
+ ppc_frsp (code, ins->dreg, ins->dreg);
+ }
+ break;
+ }
#endif
default:
More information about the Mono-devel-list
mailing list