[Mono-dev] [PATCH] FP convert optimization based on AuxV info

Steven Munroe munroesj at linux.vnet.ibm.com
Mon Mar 16 15:13:33 EDT 2009


This patch adds optimization to for FP/int conversions available to
later versions of the PowerISA. This requires the
ppc-part4-opt-20090303.txt patch as a prereq.

-------------- next part --------------
An embedded message was scrubbed...
From: Steven Munroe <munroesj at us.ibm.com>
Subject: [PATCH] Update AuxV memcpy and icbi optimizations
Date: Thu, 05 Mar 2009 21:03:16 -0600
Size: 18558
Url: http://lists.ximian.com/pipermail/mono-devel-list/attachments/20090316/9ba99ce9/attachment-0001.mht 
-------------- next part --------------
2009-03-15  Steven Munroe  <munroesj at us.ibm.com>

This patch is contributed under the terms of the MIT/X11 license

	* arch/ppc/ppc-codegen.h (ppc_fcfidx, ppc_fctidx, ppc_fctidzx):
	Share with PPC32.  These instructions are availble to 32-bit
	programs on 64-bit hardware and both starting with
	PowerISA V2.01.
	[__mono_ppc64__]: Define ppc_mftgpr and ppc_mffgpr for Power6
	native mode.

	* mini-ppc.c: Define HAS_MOVE_FPR_GPR and HAS_64BIT_ISA.
	(mono_arch_decompose_opts): Make OP_ICONV_TO_R4 and
	OP_ICONV_TO_R8 decompose conditional on !HAS_64BIT_ISA.
	(mono_arch_output_basic_block) [__mono_ppc64__]: Replace
	store/load sequence with mffgpr if HAS_MOVE_FPR_GPR id true.
	(mono_arch_output_basic_block) [!__mono_ppc64__]: For
	OP_ICONV_TO_R4 or OP_ICONV_TO_R8 and HAS_64BIT_ISA use fcfid
	to convert.


diff -urN mono-svn-base/mono/mono/arch/ppc/ppc-codegen.h mono-svn/mono/mono/arch/ppc/ppc-codegen.h
--- mono-svn-base/mono/mono/arch/ppc/ppc-codegen.h	2009-03-05 18:39:48.000000000 -0600
+++ mono-svn/mono/mono/arch/ppc/ppc-codegen.h	2009-03-07 15:31:30.000000000 -0600
@@ -706,6 +706,23 @@
 
 /* PPC64 */
 
+/* The following FP instructions are not are available to 32-bit
+   implementations (prior to PowerISA-V2.01 but are available to
+   32-bit mode programs on 64-bit PowerPC implementations and all
+   processors compliant with PowerISA-2.01 or later.  */
+
+#define ppc_fcfidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (846 << 1) | (Rc))
+#define ppc_fcfid(c,D,B)  ppc_fcfidx(c,D,B,0)
+#define ppc_fcfidd(c,D,B) ppc_fcfidx(c,D,B,1)
+
+#define ppc_fctidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (814 << 1) | (Rc))
+#define ppc_fctid(c,D,B)  ppc_fctidx(c,D,B,0)
+#define ppc_fctidd(c,D,B) ppc_fctidx(c,D,B,1)
+
+#define ppc_fctidzx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (815 << 1) | (Rc))
+#define ppc_fctidz(c,D,B)  ppc_fctidzx(c,D,B,0)
+#define ppc_fctidzd(c,D,B) ppc_fctidzx(c,D,B,1)
+
 #ifdef __mono_ppc64__
 
 #define ppc_load_sequence(c,D,v) G_STMT_START {	\
@@ -800,17 +817,14 @@
 #define ppc_extsw(c,A,S)  ppc_extswx(c,S,A,0)
 #define ppc_extswd(c,A,S) ppc_extswx(c,S,A,1)
 
-#define ppc_fcfidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (846 << 1) | (Rc))
-#define ppc_fcfid(c,D,B)  ppc_fcfidx(c,D,B,0)
-#define ppc_fcfidd(c,D,B) ppc_fcfidx(c,D,B,1)
-
-#define ppc_fctidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (814 << 1) | (Rc))
-#define ppc_fctid(c,D,B)  ppc_fctidx(c,D,B,0)
-#define ppc_fctidd(c,D,B) ppc_fctidx(c,D,B,1)
-
-#define ppc_fctidzx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (815 << 1) | (Rc))
-#define ppc_fctidz(c,D,B)  ppc_fctidzx(c,D,B,0)
-#define ppc_fctidzd(c,D,B) ppc_fctidzx(c,D,B,1)
+/* These move float to/from instuctions are only available on POWER6 in
+   native mode.  These instruction are faster then the equivalent
+   store/load because they avoid the store queue and associated delays.
+   These instructions should only be used in 64-bit mode unless the
+   kernel preserves the 64-bit GPR on signals and dispatch in 32-bit
+   mode.  The Linux kernel does not.  */
+#define ppc_mftgpr(c,T,B) ppc_emit32(c, (31 << 26) | ((T) << 21) | (0 << 16) | ((B) << 11) | (735 << 1) | 0)
+#define ppc_mffgpr(c,T,B) ppc_emit32(c, (31 << 26) | ((T) << 21) | (0 << 16) | ((B) << 11) | (607 << 1) | 0)
 
 #define ppc_ld(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((guint32)(ds) & 0xfffc) | 0)
 #define ppc_lwa(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((ds) & 0xfffc) | 2)
diff -urN mono-svn-base/mono/mono/mini/mini-ppc.c mono-svn/mono/mono/mini/mini-ppc.c
--- mono-svn-base/mono/mono/mini/mini-ppc.c	2009-03-08 21:42:54.000000000 -0500
+++ mono-svn/mono/mono/mini/mini-ppc.c	2009-03-08 21:26:17.000000000 -0500
@@ -293,6 +293,19 @@
 #define HAS_ICACHE_SNOOP 0
 #endif
 
+#ifdef PPC_FEATURE_POWER6_EXT
+#define HAS_MOVE_FPR_GPR (linux_ppc_hwcap & PPC_FEATURE_POWER6_EXT)
+#else
+#define HAS_MOVE_FPR_GPR 0
+#endif
+
+#ifdef PPC_FEATURE_64
+#define HAS_64BIT_ISA (linux_ppc_hwcap & PPC_FEATURE_64)
+#else
+#define HAS_64BIT_ISA 0
+#endif
+
+
 static int
 linux_init_ppc_SMP(void)
 {
@@ -2250,28 +2263,34 @@
 #ifndef __mono_ppc64__
 	case OP_ICONV_TO_R4:
 	case OP_ICONV_TO_R8: {
-		/* FIXME: change precision for CEE_CONV_R4 */
-		static const guint64 adjust_val = 0x4330000080000000ULL;
-		int msw_reg = mono_alloc_ireg (cfg);
-		int xored = mono_alloc_ireg (cfg);
-		int adj_reg = mono_alloc_freg (cfg);
-		int tmp_reg = mono_alloc_freg (cfg);
-		int basereg = ppc_sp;
-		int offset = -8;
-		if (!ppc_is_imm16 (offset + 4)) {
-			basereg = mono_alloc_ireg (cfg);
-			MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset);
+		/* If we have a PPC_FEATURE_64 machine we can avoid
+		   this and use the fcfid instruction.  Otherwise
+		   on an old 32-bit chip and we have to do this the
+		   hard way.  */
+		if (!HAS_64BIT_ISA) {
+			/* FIXME: change precision for CEE_CONV_R4 */
+			static const guint64 adjust_val = 0x4330000080000000ULL;
+			int msw_reg = mono_alloc_ireg (cfg);
+			int xored = mono_alloc_ireg (cfg);
+			int adj_reg = mono_alloc_freg (cfg);
+			int tmp_reg = mono_alloc_freg (cfg);
+			int basereg = ppc_sp;
+			int offset = -8;
+			if (!ppc_is_imm16 (offset + 4)) {
+				basereg = mono_alloc_ireg (cfg);
+				MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset);
+			}
+			MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000);
+			MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg);
+			MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000);
+			MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, xored);
+			MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val);
+			MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset);
+			MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
+			if (ins->opcode == OP_ICONV_TO_R4)
+				MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg);
+			ins->opcode = OP_NOP;
 		}
-		MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000);
-		MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg);
-		MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000);
-		MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, xored);
-		MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val);
-		MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset);
-		MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
-		if (ins->opcode == OP_ICONV_TO_R4)
-			MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg);
-		ins->opcode = OP_NOP;
 		break;
 	}
 #endif
@@ -4361,8 +4380,12 @@
 			} else {
 				tmp = ins->sreg1;
 			}
-			ppc_store_reg (code, tmp, -8, ppc_r1);
-			ppc_lfd (code, ins->dreg, -8, ppc_r1);
+			if (HAS_MOVE_FPR_GPR) {
+				ppc_mffgpr (code, ins->dreg, tmp);
+			} else {
+				ppc_store_reg (code, tmp, -8, ppc_r1);
+				ppc_lfd (code, ins->dreg, -8, ppc_r1);
+			}
 			ppc_fcfid (code, ins->dreg, ins->dreg);
 			if (ins->opcode == OP_ICONV_TO_R4 || ins->opcode == OP_LCONV_TO_R4)
 				ppc_frsp (code, ins->dreg, ins->dreg);
@@ -4443,6 +4466,20 @@
 			ppc_mr (code, ins->dreg, ppc_r0);
 			break;
 		}
+#else
+		case OP_ICONV_TO_R4:
+		case OP_ICONV_TO_R8: {
+			if (HAS_64BIT_ISA) {
+				ppc_srawi(code, ppc_r0, ins->sreg1, 31);
+				ppc_store_reg (code, ppc_r0, -8, ppc_r1);
+				ppc_store_reg (code, ins->sreg1, -4, ppc_r1);
+				ppc_lfd (code, ins->dreg, -8, ppc_r1);
+				ppc_fcfid (code, ins->dreg, ins->dreg);
+				if (ins->opcode == OP_ICONV_TO_R4)
+					ppc_frsp (code, ins->dreg, ins->dreg);
+				}
+			break;
+		}
 #endif
 
 		default:


More information about the Mono-devel-list mailing list