[Mono-dev] [PATCH] Enable TLS for PPC32/64

Steven Munroe munroesj at linux.vnet.ibm.com
Thu Jan 22 10:03:22 EST 2009


this patch completes the PPC64 port and enables Thread Local Storage
under Linux/NPTL. This patch also provides the infrastructure for
detecting PPC hardware attributes (via the SYSV Aux Vector) that we will
need to optimize JIT code. For example this patch fixes
mono_arch_flush_icache() used aux vector Hardware Capabilities
(AT_HWCAP) info to avoid unnecessary dcbf/icbi sequences.

This patch is contributed under the terms of the MIT/X11 license.


-------------- next part --------------
2009-01-21  Steven Munroe  <munroesj at us.ibm.com>

This patch is contributed under the terms of the MIT/X11 license

	* ppc-codegen.h: Change ppc_is_imm16 and ppc_is_imm32 to avoid
	compiler warnings in 64-bit. Make operand order and case consistent
	for ppc_load_reg_update, ppc_load_multiple_regs,
	ppc_store_multiple_regs, ppc_addi, ppc_addis, ppc_lwz, ppc_lhz,
	ppc_lbz, ppc_stw, ppc_sth, ppc_stb, ppc_stwu, ppc_ori, ppc_lbzu,
	ppc_lfdu, ppc_lfsu, ppc_lfsux, ppc_lfsx, ppc_lha, ppc_lhau,
	ppc_lhzu, ppc_lmw, ppc_addic, ppc_addicd, ppc_andid, ppc_andisd,
	ppc_lbzu. ppc_lfdu, ppc_lfsu, ppc_lfsux, ppc_lfsx, ppc_lha,
	ppc_lhau, ppc_lhzu, ppc_lmw, ppc_lwzu, ppc_stbu, ppc_stfdu,
	ppc_stfsu, ppc_sthu, ppc_stmw.
	[__mono_ppc64__]: Simplify the DS form and make them consistent
	with D forms for ppc_ld, ppc_lwa, ppc_ldu, ppc_std, ppc_stdu.
	Added ppc_load48 and modified ppc_load to use it.

	* exceptions-ppc.c (restore_regs_from_context): Correct operand
	order for ppc_load_multiple_regs.
	(mono_arch_get_restore_context): Correct g_assert test.
	(emit_save_saved_regs) Correct operand order for
	ppc_store_multiple_regs.
	(mono_arch_get_call_filter): Correct operand order for
	ppc_load_multiple_regs.
	* mini-ppc.c: Add includes to get at Aux vector definitions.
	(emit_nptl_tls): Define for PPC32 and PPC64.
	(emit_tls_access): Update emit_tls_access to use emit_nptl_tls.
	[__linux__]: Define functions __aux_find, linux_get_auxv,
	linux_auxv_init_once, linux_query_auxv, linix_init_ppc_SMP,
	linux_init_ppc_platform.
	Define flags/values uxv_once_control, linux_ppc_hwcap,
	linux_ppc_platform, linux_ppc_ISA2x, linux_ppc_ISA2x_mask,
	linux_ppc_SMP, linux_ppc_LSUs, linux_ppc_FXUs, linux_ppc_FPUs.
	Define macros HAS_ICACHE_SNOOP.
	(emit_memcpy): Fix operand order and compiler warnings in 64-bit.
	Generate optimized code for longer moves where linux_ppc_LSUs
	is 2 or more.
	(mono_arch_cpu_init) [__linux__]: Add calls to linux_auxv_init_once
	and linux_init_ppc_platform.
	(mono_arch_flush_icache) [__linux__]: Use linux_query_auxv
	(AT_DCACHEBSIZE) and set cachelineinc and cachelinesize.
	Optimize using HAS_ICACHE_SNOOP, linux_ppc_SMP, and linux_ppc_ISA2x.
	(emit_float_to_int); Fix compiler warnings for 64-bit.
	(emit_reserve_param_area): Likewise.
	(emit_unreserve_param_area: Likewise.
	(mono_arch_output_basic_block)[__mono_ppc64__]: Replace
	lwz/lwzx/extsw with lwa/lwax.
	(mono_arch_output_basic_block): Correct operand order for ppc_lha.
	Replace ppc_addic with ppc_addi for case OP_JMP.
	Fix compiler warnings for 64-bit.
	(mono_arch_emit_prolog): Fix compiler warnings for 64-bit.
	Fix hadling of long (int64) operands for 64-bit.
	(mono_arch_emit_epilog): Correct operand order loads,
	Fix compiler warnings for 64-bit. Replace addic with addi.
	(setup_tls_access)[__linux__ & _CS_GNU_LIBPTHREAD_VERSION]: 
	Use confstr to determine pthread implementation for TLS.
	Use mono_domain_get_tls_offset() to set monodomain_key.
	Use mono_get_lmf_addr_tls_offset() to set lmf_pthread_key.
	Use mono_thread_get_tls_offset() to set monothread_key.
	* mini-ppc.h (MONO_ARCH_HAVE_TLS_GET): Defined.
	(MONO_ARCH_ENABLE_MONITOR_IL_FASTPATH): Likewise.
	(PPC_THREAD_PTR_REG): Likewise.
	* tramp-ppc.c (mono_arch_create_trampoline_code): Correct
	operand order for ppc_store_multiple_regs and
	ppc_load_multiple_regs. Use MONO_PPC_32_64_CASE to set
	tramp_size.

	* Makefile.am: Disable mkbundle for POWERPC64.

	* mono-compiler.h: Define MONO_TLS_FAST and
	MONO_THREAD_VAR_OFFSET.

diff -urN mono-svn-20090121/mono/mono/arch/ppc/ppc-codegen.h mono-svn/mono/mono/arch/ppc/ppc-codegen.h
--- mono-svn-20090121/mono/mono/arch/ppc/ppc-codegen.h	2009-01-21 10:36:00.000000000 -0600
+++ mono-svn/mono/mono/arch/ppc/ppc-codegen.h	2009-01-21 14:29:46.000000000 -0600
@@ -125,7 +125,7 @@
 
 #define ppc_emit32(c,x) do { *((guint32 *) (c)) = x; (c) = (gpointer)((guint8 *)(c) + sizeof (guint32));} while (0)
 
-#define ppc_is_imm16(val) ((glong)(val) >= (glong)-(1L<<15) && (glong)(val) <= (glong)((1L<<15)-1L))
+#define ppc_is_imm16(val) ((((val)>> 15) == 0) || (((val)>> 15) == -1))
 #define ppc_is_uimm16(val) ((glong)(val) >= 0L && (glong)(val) <= 65535L)
 
 #define ppc_load32(c,D,v) G_STMT_START {	\
@@ -150,16 +150,16 @@
 #define ppc_load_func(c,D,V)	      ppc_load_sequence ((c), (D), (V))
 
 #define ppc_load_reg(c,D,d,A)         ppc_lwz  ((c), (D), (d), (A))
-#define ppc_load_reg_update(c,D,d,A)  ppc_lwzu ((c), (D), (A), (d))
+#define ppc_load_reg_update(c,D,d,A)  ppc_lwzu ((c), (D), (d), (A))
 #define ppc_load_reg_indexed(c,D,A,B)        ppc_lwzx ((c), (D), (A), (B))
 #define ppc_load_reg_update_indexed(c,D,A,B) ppc_lwzux ((c), (D), (A), (B))
-#define ppc_load_multiple_regs(c,D,A,d)      ppc_lmw   ((c), (D), (A), (d))
+#define ppc_load_multiple_regs(c,D,d,A)      ppc_lmw   ((c), (D), (d), (A))
 
 #define ppc_store_reg(c,S,d,A)        ppc_stw  ((c), (S), (d), (A))
 #define ppc_store_reg_update(c,S,d,A) ppc_stwu ((c), (S), (d), (A))
 #define ppc_store_reg_indexed(c,S,A,B)        ppc_stwx  ((c), (S), (A), (B))
 #define ppc_store_reg_update_indexed(c,S,A,B) ppc_stwux ((c), (S), (A), (B))
-#define ppc_store_multiple_regs(c,S,A,D)      ppc_stmw  ((c), (S), (A), (D))
+#define ppc_store_multiple_regs(c,S,d,A)      ppc_stmw  ((c), (S), (d), (A))
 
 #define ppc_compare(c,cfrD,A,B)		      ppc_cmp((c), (cfrD), 0, (A), (B))
 #define ppc_compare_reg_imm(c,cfrD,A,B)	      ppc_cmpi((c), (cfrD), 0, (A), (B))
@@ -183,20 +183,20 @@
 #define ppc_split_5_1(x) ((ppc_split_5_1_5(x) << 1) | ppc_split_5_1_1(x))
 
 #define ppc_break(c) ppc_tw((c),31,0,0)
-#define  ppc_addi(c,D,A,d) ppc_emit32 (c, (14 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
-#define ppc_addis(c,D,A,d) ppc_emit32 (c, (15 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
+#define  ppc_addi(c,D,A,i) ppc_emit32 (c, (14 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i))
+#define ppc_addis(c,D,A,i) ppc_emit32 (c, (15 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i))
 #define    ppc_li(c,D,v)   ppc_addi   (c, D, 0, (guint16)(v))
 #define   ppc_lis(c,D,v)   ppc_addis  (c, D, 0, (guint16)(v))
-#define   ppc_lwz(c,D,d,a) ppc_emit32 (c, (32 << 26) | ((D) << 21) | ((a) << 16) | (guint16)(d))
-#define   ppc_lhz(c,D,d,a) ppc_emit32 (c, (40 << 26) | ((D) << 21) | ((a) << 16) | (guint16)(d))
-#define   ppc_lbz(c,D,d,a) ppc_emit32 (c, (34 << 26) | ((D) << 21) | ((a) << 16) | (guint16)(d))
-#define   ppc_stw(c,S,d,a) ppc_emit32 (c, (36 << 26) | ((S) << 21) | ((a) << 16) | (guint16)(d))
-#define   ppc_sth(c,S,d,a) ppc_emit32 (c, (44 << 26) | ((S) << 21) | ((a) << 16) | (guint16)(d))
-#define   ppc_stb(c,S,d,a) ppc_emit32 (c, (38 << 26) | ((S) << 21) | ((a) << 16) | (guint16)(d))
-#define  ppc_stwu(c,s,d,a) ppc_emit32 (c, (37 << 26) | ((s) << 21) | ((a) << 16) | (guint16)(d))
+#define   ppc_lwz(c,D,d,A) ppc_emit32 (c, (32 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
+#define   ppc_lhz(c,D,d,A) ppc_emit32 (c, (40 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
+#define   ppc_lbz(c,D,d,A) ppc_emit32 (c, (34 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
+#define   ppc_stw(c,S,d,A) ppc_emit32 (c, (36 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(d))
+#define   ppc_sth(c,S,d,A) ppc_emit32 (c, (44 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(d))
+#define   ppc_stb(c,S,d,A) ppc_emit32 (c, (38 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(d))
+#define  ppc_stwu(c,s,d,A) ppc_emit32 (c, (37 << 26) | ((s) << 21) | ((A) << 16) | (guint16)(d))
 #define    ppc_or(c,a,s,b) ppc_emit32 (c, (31 << 26) | ((s) << 21) | ((a) << 16) | ((b) << 11) | 888)
 #define    ppc_mr(c,a,s)   ppc_or     (c, a, s, s)
-#define   ppc_ori(c,S,A,u) ppc_emit32 (c, (24 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(u))
+#define   ppc_ori(c,S,A,ui) ppc_emit32 (c, (24 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(ui))
 #define	  ppc_nop(c)       ppc_ori    (c, 0, 0, 0)
 #define ppc_mfspr(c,D,spr) ppc_emit32 (c, (31 << 26) | ((D) << 21) | ((spr) << 11) | (339 << 1))
 #define  ppc_mflr(c,D)     ppc_mfspr  (c, D, ppc_lr)
@@ -257,8 +257,8 @@
 #define ppc_addeo(c,D,A,B) ppc_addex(c,D,A,B,1,0)
 #define ppc_addeod(c,D,A,B) ppc_addex(c,D,A,B,1,1)
 
-#define ppc_addic(c,D,A,d) ppc_emit32(c, (12 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d)) 
-#define ppc_addicd(c,D,A,d) ppc_emit32(c, (13 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d)) 
+#define ppc_addic(c,D,A,i) ppc_emit32(c, (12 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i)) 
+#define ppc_addicd(c,D,A,i) ppc_emit32(c, (13 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i)) 
 
 #define ppc_addmex(c,D,A,OE,RC) ppc_emit32(c, (31 << 26) | ((D) << 21 ) | ((A) << 16) | (0 << 11) | ((OE) << 10) | (234 << 1) | RC)
 #define ppc_addme(c,D,A) ppc_addmex(c,D,A,0,0)
@@ -280,8 +280,8 @@
 #define ppc_andc(c,S,A,B) ppc_andcx(c,S,A,B,0)
 #define ppc_andcd(c,S,A,B) ppc_andcx(c,S,A,B,1)
 
-#define ppc_andid(c,S,A,d) ppc_emit32(c, (28 << 26) | ((S) << 21 ) | ((A) << 16) | ((guint16)(d)))
-#define ppc_andisd(c,S,A,d) ppc_emit32(c, (29 << 26) | ((S) << 21 ) | ((A) << 16) | ((guint16)(d)))
+#define ppc_andid(c,S,A,ui) ppc_emit32(c, (28 << 26) | ((S) << 21 ) | ((A) << 16) | ((guint16)(ui)))
+#define ppc_andisd(c,S,A,ui) ppc_emit32(c, (29 << 26) | ((S) << 21 ) | ((A) << 16) | ((guint16)(ui)))
 
 #define ppc_bcx(c,BO,BI,BD,AA,LK) ppc_emit32(c, (16 << 26) | (BO << 21 )| (BI << 16) | (BD << 2) | ((AA) << 1) | LK)
 #define ppc_bc(c,BO,BI,BD) ppc_bcx(c,BO,BI,BD,0,0) 
@@ -477,36 +477,36 @@
 
 #define ppc_isync(c) ppc_emit32(c, (19 << 26) | (0 << 11) | (150 << 1) | 0)
 
-#define ppc_lbzu(c,D,A,d) ppc_emit32(c, (35 << 26) | (D << 21) | (A << 16) | (guint16)d)
+#define ppc_lbzu(c,D,d,A) ppc_emit32(c, (35 << 26) | (D << 21) | (A << 16) | (guint16)d)
 #define ppc_lbzux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (119 << 1) | 0)
 #define ppc_lbzx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (87 << 1) | 0)
 
-#define ppc_lfdu(c,D,A,d) ppc_emit32(c, (51 << 26) | (D << 21) | (A << 16) | (guint16)d)
+#define ppc_lfdu(c,D,d,A) ppc_emit32(c, (51 << 26) | (D << 21) | (A << 16) | (guint16)d)
 #define ppc_lfdux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (631 << 1) | 0)
 #define ppc_lfdx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (599 << 1) | 0)
 
-#define ppc_lfsu(c,D,A,d) ppc_emit32(c, (49 << 26) | (D << 21) | (A << 16) | (guint16)d)
-#define ppc_lfsux(c,D,A,d) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (567 << 1) | 0)
-#define ppc_lfsx(c,D,A,d) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (535 << 1) | 0)
+#define ppc_lfsu(c,D,d,A) ppc_emit32(c, (49 << 26) | (D << 21) | (A << 16) | (guint16)d)
+#define ppc_lfsux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (567 << 1) | 0)
+#define ppc_lfsx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (535 << 1) | 0)
 
-#define ppc_lha(c,D,A,d) ppc_emit32(c, (42 << 26) | (D << 21) | (A << 16) | (guint16)d)
-#define ppc_lhau(c,D,A,d) ppc_emit32(c, (43 << 26) | (D << 21) | (A << 16) | (guint16)d)
+#define ppc_lha(c,D,d,A) ppc_emit32(c, (42 << 26) | (D << 21) | (A << 16) | (guint16)d)
+#define ppc_lhau(c,D,d,A) ppc_emit32(c, (43 << 26) | (D << 21) | (A << 16) | (guint16)d)
 #define ppc_lhaux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (375 << 1) | 0)
 #define ppc_lhax(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (343 << 1) | 0)
 #define ppc_lhbrx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (790 << 1) | 0)
-#define ppc_lhzu(c,D,A,d) ppc_emit32(c, (41 << 26) | (D << 21) | (A << 16) | (guint16)d)
+#define ppc_lhzu(c,D,d,A) ppc_emit32(c, (41 << 26) | (D << 21) | (A << 16) | (guint16)d)
 
 #define ppc_lhzux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (311 << 1) | 0)
 #define ppc_lhzx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (279 << 1) | 0)
 
-#define ppc_lmw(c,D,A,d) ppc_emit32(c, (46 << 26) | (D << 21) | (A << 16) | (guint16)d)
+#define ppc_lmw(c,D,d,A) ppc_emit32(c, (46 << 26) | (D << 21) | (A << 16) | (guint16)d)
 
 #define ppc_lswi(c,D,A,NB) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (NB << 11) | (597 << 1) | 0)
 #define ppc_lswx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (533 << 1) | 0)
 #define ppc_lwarx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (20 << 1) | 0)
 #define ppc_lwbrx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (534 << 1) | 0)
 
-#define ppc_lwzu(c,D,A,d) ppc_emit32(c, (33 << 26) | (D << 21) | (A << 16) | (guint16)d)
+#define ppc_lwzu(c,D,d,A) ppc_emit32(c, (33 << 26) | (D << 21) | (A << 16) | (guint16)d)
 #define ppc_lwzux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (55 << 1) | 0)
 #define ppc_lwzx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (23 << 1) | 0)
 
@@ -628,24 +628,24 @@
 #define ppc_srw(c,A,S,B) ppc_srwx(c,A,S,B,0)
 #define ppc_srwd(c,A,S,B) ppc_srwx(c,A,S,B,1)
 
-#define ppc_stbu(c,S,A,D) ppc_emit32(c, (39 << 26) | (S << 21) | (A << 16) | (guint16)(D))
+#define ppc_stbu(c,S,d,A) ppc_emit32(c, (39 << 26) | (S << 21) | (A << 16) | (guint16)(d))
 
 #define ppc_stbux(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (247 << 1) | 0)
 #define ppc_stbx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (215 << 1) | 0)
 
-#define ppc_stfdu(c,S,A,D) ppc_emit32(c, (55 << 26) | (S << 21) | (A << 16) | (guint16)(D))
+#define ppc_stfdu(c,S,d,A) ppc_emit32(c, (55 << 26) | (S << 21) | (A << 16) | (guint16)(d))
 
 #define ppc_stfdx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (727 << 1) | 0)
 #define ppc_stfiwx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (983 << 1) | 0)
 
-#define ppc_stfsu(c,S,A,D) ppc_emit32(c, (53 << 26) | (S << 21) | (A << 16) | (guint16)(D))
+#define ppc_stfsu(c,S,d,A) ppc_emit32(c, (53 << 26) | (S << 21) | (A << 16) | (guint16)(d))
 #define ppc_stfsux(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (695 << 1) | 0)  
 #define ppc_stfsx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (663 << 1) | 0)  
 #define ppc_sthbrx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (918 << 1) | 0)  
-#define ppc_sthu(c,S,A,D) ppc_emit32(c, (45 << 26) | (S << 21) | (A << 16) | (guint16)(D))
+#define ppc_sthu(c,S,d,A) ppc_emit32(c, (45 << 26) | (S << 21) | (A << 16) | (guint16)(d))
 #define ppc_sthux(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (439 << 1) | 0)
 #define ppc_sthx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (407 << 1) | 0)
-#define ppc_stmw(c,S,A,D) ppc_emit32(c, (47 << 26) | (S << 21) | (A << 16) | (guint16)D)
+#define ppc_stmw(c,S,d,A) ppc_emit32(c, (47 << 26) | (S << 21) | (A << 16) | (guint16)d)
 #define ppc_stswi(c,S,A,NB) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (NB << 11) | (725 << 1) | 0)
 #define ppc_stswx(c,S,A,NB) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (NB << 11) | (661 << 1) | 0)
 #define ppc_stwbrx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (662 << 1) | 0)
@@ -718,13 +718,23 @@
 
 #define PPC_LOAD_SEQUENCE_LENGTH	20
 
-#define ppc_is_imm32(val) ((glong)(val) >= (glong)-(1L<<31) && (glong)(val) <= (glong)((1L<<31)-1))
+#define ppc_is_imm32(val) (((((long)val)>> 31) == 0) || ((((long)val)>> 31) == -1))
+#define ppc_is_imm48(val) (((((long)val)>> 47) == 0) || ((((long)val)>> 47) == -1))
 
+#define ppc_load48(c,D,v) G_STMT_START {	\
+		ppc_li   ((c), (D), ((gint64)(v) >> 32) & 0xffff);	\
+		ppc_sldi ((c), (D), (D), 32); \
+		ppc_oris ((c), (D), (D), ((guint64)(v) >> 16) & 0xffff);	\
+		ppc_ori  ((c), (D), (D),  (guint64)(v)        & 0xffff);	\
+	} G_STMT_END
+	
 #define ppc_load(c,D,v) G_STMT_START {	\
 		if (ppc_is_imm16 ((gulong)(v)))	{	\
 			ppc_li ((c), (D), (guint16)(guint64)(v));	\
 		} else if (ppc_is_imm32 ((gulong)(v))) {	\
 			ppc_load32 ((c), (D), (guint32)(guint64)(v)); \
+		} else if (ppc_is_imm48 ((gulong)(v))) {	\
+			ppc_load48 ((c), (D), (guint64)(v)); \
 		} else {	\
 			ppc_load_sequence ((c), (D), (guint64)(v)); \
 		}	\
@@ -736,11 +746,11 @@
 		ppc_load_reg ((c), (D), 0, ppc_r11);	\
 	} G_STMT_END
 
-#define ppc_load_reg(c,D,d,A)         ppc_ld   ((c), (D), (d) >> 2, (A))
-#define ppc_load_reg_update(c,D,d,A)  ppc_ldu  ((c), (D), (d) >> 2, (A))
+#define ppc_load_reg(c,D,d,A)         ppc_ld   ((c), (D), (d), (A))
+#define ppc_load_reg_update(c,D,d,A)  ppc_ldu  ((c), (D), (d), (A))
 #define ppc_load_reg_indexed(c,D,A,B)        ppc_ldx  ((c), (D), (A), (B))
 #define ppc_load_reg_update_indexed(c,D,A,B) ppc_ldux ((c), (D), (A), (B))
-#define ppc_load_multiple_regs(c,D,A,d) G_STMT_START { \
+#define ppc_load_multiple_regs(c,D,d,A) G_STMT_START { \
 		int __i, __o = (d);			\
 		for (__i = (D); __i <= 31; ++__i) {	\
 			ppc_load_reg ((c), __i, __o, (A));		\
@@ -748,12 +758,12 @@
 		} \
 	} G_STMT_END
 
-#define ppc_store_reg(c,S,d,A)        ppc_std  ((c), (S), (d) >> 2, (A))
-#define ppc_store_reg_update(c,S,d,A) ppc_stdu ((c), (S), (d) >> 2, (A))
+#define ppc_store_reg(c,S,d,A)        ppc_std  ((c), (S), (d), (A))
+#define ppc_store_reg_update(c,S,d,A) ppc_stdu ((c), (S), (d), (A))
 #define ppc_store_reg_indexed(c,S,A,B)        ppc_stdx  ((c), (S), (A), (B))
 #define ppc_store_reg_update_indexed(c,S,A,B) ppc_stdux ((c), (S), (A), (B))
-#define ppc_store_multiple_regs(c,S,A,D) G_STMT_START { \
-		int __i, __o = (D);			\
+#define ppc_store_multiple_regs(c,S,d,A) G_STMT_START { \
+		int __i, __o = (d);			\
 		for (__i = (S); __i <= 31; ++__i) {	\
 			ppc_store_reg ((c), __i, __o, (A));		\
 			__o += sizeof (gulong);				\
@@ -802,11 +812,14 @@
 #define ppc_fctidz(c,D,B)  ppc_fctidzx(c,D,B,0)
 #define ppc_fctidzd(c,D,B) ppc_fctidzx(c,D,B,1)
 
-#define ppc_ld(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | (guint16)((ds) << 2) | 0)
+#define ppc_ld(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((ds) & 0xfffc) | 0)
+#define ppc_lwa(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((ds) & 0xfffc) | 2)
 #define ppc_ldarx(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (84 << 1) | 0)
-#define ppc_ldu(c,D,ds,A) ppc_emit32(c, (58 <<	26) | ((D) << 21) | ((A) << 16) | (guint16)((ds) << 2) | 1)
+#define ppc_ldu(c,D,ds,A) ppc_emit32(c, (58 <<	26) | ((D) << 21) | ((A) << 16) | ((ds) & 0xfffc) | 1)
 #define ppc_ldux(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (53 << 1) | 0)
+#define ppc_lwaux(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (373 << 1) | 0)
 #define ppc_ldx(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (21 << 1) | 0)
+#define ppc_lwax(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (341 << 1) | 0)
 
 #define ppc_mulhdx(c,D,A,B,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (0 << 10) | (73 << 1) | (Rc))
 #define ppc_mulhd(c,D,A,B)  ppc_mulhdx(c,D,A,B,0)
@@ -871,9 +884,9 @@
 #define ppc_srd(c,A,S,B)  ppc_srdx(c,S,A,B,0)
 #define ppc_srdd(c,A,S,B) ppc_srdx(c,S,A,B,1)
 
-#define ppc_std(c,S,ds,A)   ppc_emit32(c, (62 << 26) | ((S) << 21) | ((A) << 16) | (guint16)((ds) << 2) | 0)
+#define ppc_std(c,S,ds,A)   ppc_emit32(c, (62 << 26) | ((S) << 21) | ((A) << 16) | ((ds) & 0xfffc) | 0)
 #define ppc_stdcxd(c,S,A,B) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (214 << 1) | 1)
-#define ppc_stdu(c,S,ds,A)  ppc_emit32(c, (62 << 26) | ((S) << 21) | ((A) << 16) | (guint16)((ds) << 2) | 1)
+#define ppc_stdu(c,S,ds,A)  ppc_emit32(c, (62 << 26) | ((S) << 21) | ((A) << 16) | ((ds) & 0xfffc) | 1)
 #define ppc_stdux(c,S,A,B)  ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (181 << 1) | 0)
 #define ppc_stdx(c,S,A,B)   ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (149 << 1) | 0)
 
diff -urN mono-svn-20090121/mono/mono/mini/exceptions-ppc.c mono-svn/mono/mono/mini/exceptions-ppc.c
--- mono-svn-20090121/mono/mono/mini/exceptions-ppc.c	2009-01-21 10:35:59.000000000 -0600
+++ mono-svn/mono/mono/mini/exceptions-ppc.c	2009-01-21 14:30:54.000000000 -0600
@@ -164,7 +164,7 @@
 #define restore_regs_from_context(ctx_reg,ip_reg,tmp_reg) do {	\
 		int reg;	\
 		ppc_load_reg (code, ip_reg, G_STRUCT_OFFSET (MonoContext, sc_ir), ctx_reg);	\
-		ppc_load_multiple_regs (code, ppc_r13, ctx_reg, G_STRUCT_OFFSET (MonoContext, regs));	\
+		ppc_load_multiple_regs (code, ppc_r13, G_STRUCT_OFFSET (MonoContext, regs), ctx_reg);	\
 		for (reg = 0; reg < MONO_SAVED_FREGS; ++reg) {	\
 			ppc_lfd (code, (14 + reg),	\
 				G_STRUCT_OFFSET(MonoContext, fregs) + reg * sizeof (gdouble), ctx_reg);	\
@@ -218,7 +218,7 @@
 	/* never reached */
 	ppc_break (code);
 
-	g_assert ((code - start) < size);
+	g_assert ((code - start) <= size);
 	mono_arch_flush_icache (start, code - start);
 	return start;
 }
@@ -239,7 +239,7 @@
 		ppc_stfd (code, i, pos, ppc_sp);
 	}
 	pos -= sizeof (gpointer) * MONO_SAVED_GREGS;
-	ppc_store_multiple_regs (code, ppc_r13, ppc_sp, pos);
+	ppc_store_multiple_regs (code, ppc_r13, pos, ppc_sp);
 
 	return code;
 }
@@ -299,7 +299,7 @@
 		ppc_lfd (code, i, pos, ppc_sp);
 	}
 	pos -= sizeof (gpointer) * MONO_SAVED_GREGS;
-	ppc_load_multiple_regs (code, ppc_r13, ppc_sp, pos);
+	ppc_load_multiple_regs (code, ppc_r13, pos, ppc_sp);
 
 	ppc_addic (code, ppc_sp, ppc_sp, alloc_size);
 	ppc_blr (code);
diff -urN mono-svn-20090121/mono/mono/mini/mini-ppc.c mono-svn/mono/mono/mini/mini-ppc.c
--- mono-svn-20090121/mono/mono/mini/mini-ppc.c	2009-01-21 10:36:00.000000000 -0600
+++ mono-svn/mono/mono/mini/mini-ppc.c	2009-01-21 14:27:11.000000000 -0600
@@ -11,6 +11,17 @@
  */
 #include "mini.h"
 #include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <elf.h>
+#include <link.h>
+#include <pthread.h>
+#ifdef __linux__
+#include <asm/cputable.h>
+#include <sys/utsname.h>
+#include <string.h>
+#endif
 
 #include <mono/metadata/appdomain.h>
 #include <mono/metadata/debug-helpers.h>
@@ -81,9 +92,36 @@
 		if ((dreg) != ppc_r3) ppc_mr ((code), ppc_r3, ppc_r11);	\
 	} while (0);
 
+#ifdef __mono_ppc64__
+#define emit_nptl_tls(code,dreg,key) do { \
+		int off1 = key; \
+		int off2 = key >> 15; \
+		if ((off2 == 0) || (off2 == -1)) { \
+			ppc_load_reg ((code), (dreg), off1, ppc_r13);	\
+		} else { \
+			int off3 = (off2 + 1) > 1; \
+			ppc_addis ((code), ppc_r11, ppc_r13, off3); \
+			ppc_load_reg ((code), (dreg), off1, ppc_r11);	\
+		} \
+	} while (0);
+#else
+#define emit_nptl_tls(code,dreg,key) do { \
+		int off1 = key; \
+		int off2 = key >> 15; \
+		if ((off2 == 0) || (off2 == -1)) { \
+			ppc_load_reg ((code), (dreg), off1, ppc_r2);	\
+		} else { \
+			int off3 = (off2 + 1) > 1; \
+			ppc_addis ((code), ppc_r11, ppc_r2, off3); \
+			ppc_load_reg ((code), (dreg), off1, ppc_r11);	\
+		} \
+	} while (0);
+#endif
+
 #define emit_tls_access(code,dreg,key) do {	\
 		switch (tls_mode) {	\
 		case TLS_MODE_LTHREADS: emit_linuxthreads_tls(code,dreg,key); break;	\
+		case TLS_MODE_NPTL: emit_nptl_tls(code,dreg,key); break;	\
 		case TLS_MODE_DARWIN_G5: emit_darwing5_tls(code,dreg,key); break;	\
 		case TLS_MODE_DARWIN_G4: emit_darwing4_tls(code,dreg,key); break;	\
 		default: g_assert_not_reached ();	\
@@ -131,23 +169,231 @@
 	return "unknown";
 }
 
+#ifdef __linux__
+
+/* Scan the env to find the aux vector. */
+static inline char** __aux_find (void)
+{
+  char **result = __environ;
+  /* Scan over the env vector looking for the ending NULL */
+  for (; *result != NULL; ++result)
+    {
+    }
+  /* Bump the pointer one more step, which should be the auxv. */
+  return ++result;
+}
+
+volatile static ElfW(auxv_t) *linux_auxv_buf = NULL;
+
+static ElfW(auxv_t) * 
+linux_get_auxv(void)
+{
+  ElfW(auxv_t) *auxv_temp = (ElfW(auxv_t) *)linux_auxv_buf;
+  int auxv_f;
+  size_t page_size = getpagesize();
+  ssize_t bytes;
+
+  /* If the /proc/self/auxv file has not been copied into the heap
+     yet, then do it */
+
+  if(auxv_temp == NULL)
+  {
+    auxv_f = open("/proc/self/auxv", O_RDONLY);
+
+    if(auxv_f == -1) {
+      /* Older kernels did not support /proc/<PID>/auxv. But the auvx
+	 table does exist in the process address space following the
+	 env table. So try scanning over the environment table to find
+	 the auxv. */
+      if (errno == ENOENT) {
+	auxv_temp = (ElfW(auxv_t)*) __aux_find();
+	/* If someone has done a setenv() the __environ pointer may
+	   have been moved and the assumption that the auxv follows is
+	   not true. So look at the first entry and verify that it is
+	   an auxv entry. */
+	if (auxv_temp->a_type != AT_IGNOREPPC) {
+	  perror("Error opening /proc/self/auxv and auvx not following env");
+	  return 0;
+	}
+      } else {
+	perror("Error opening file for reading");
+	return 0;
+      }
+    } else {
+      auxv_temp = (ElfW(auxv_t) *)malloc(page_size);
+
+      bytes = read (auxv_f, (void*)auxv_temp, page_size);
+
+      if (bytes > 0) {
+      } else {
+	free (auxv_temp);
+	auxv_temp = NULL;
+	perror("Error /proc/self/auxv read failed");
+	return 0;
+      }
+      /* We have the auxv buffered and the file handle is no longer
+	 needed. so close the file. */
+      if ( close (auxv_f) ) {
+	perror("Error close failed");
+      }
+    }
+    linux_auxv_buf = auxv_temp;
+  }
+  return (ElfW(auxv_t) *)auxv_temp;
+}
+
+static pthread_once_t auxv_once_control = PTHREAD_ONCE_INIT;
+
+static void
+linux_auxv_init_once (void)
+{
+  ElfW(auxv_t) *auxv_temp = (ElfW(auxv_t) *)linux_auxv_buf;
+
+  if (!auxv_temp) {
+    if (pthread_once (&auxv_once_control,
+			(void (*)(void))linux_get_auxv)) {
+      perror("Error pthread_once(linux_get_auxv()) failed");
+    }
+  }
+}
+
+static ElfW(Addr) 
+linux_query_auxv(int type)
+{
+  ElfW(auxv_t) *auxv_temp;
+  int i = 0;
+
+  /* if the /proc/self/auxv file has not been manually copied into the heap
+     yet, then do it */
+
+  auxv_temp = (ElfW(auxv_t) *)linux_auxv_buf;
+  if(auxv_temp == NULL)
+    {
+      auxv_temp = linux_get_auxv();
+    }
+
+  i = 0;
+  do
+    {
+      if(auxv_temp[i].a_type == type)
+	{
+	  return auxv_temp[i].a_un.a_val;
+	}
+      i++;
+    } while (auxv_temp[i].a_type != AT_NULL);
+
+  return 0;
+}
+
+static unsigned long	linux_ppc_hwcap;
+static char 		*linux_ppc_platform;
+static int		linux_ppc_ISA2x; /* PowerISA-2.0 or newer */
+static unsigned long	linux_ppc_ISA2x_mask = 0
+#ifdef PPC_FEATURE_POWER4 /* PowerISA-2.01 */
+			| PPC_FEATURE_POWER4
+#endif
+#ifdef PPC_FEATURE_POWER5 /* PowerISA-2.03 */
+			| PPC_FEATURE_POWER5
+#endif
+#ifdef PPC_FEATURE_POWER5_PLUS  /* PowerISA-2.04 */
+			| PPC_FEATURE_POWER5_PLUS
+#endif
+#ifdef PPC_FEATURE_CELL
+			| PPC_FEATURE_CELL
+#endif
+#ifdef PPC_FEATURE_PA6T
+			| PPC_FEATURE_PA6T
+#endif
+#ifdef PPC_FEATURE_ARCH_2_05
+			| PPC_FEATURE_ARCH_2_05
+#endif
+	; /* it took a while to figure out that the AT_HWCAP should represent
+	     ISA versions and optional categories/features and the AT_PLATFORM 
+	     should represent the CHIP design and specific micro-architecture.
+	     Which explains the mess above.*/
+
+/* Default to SMP true in case we can't find out.  */
+static int		linux_ppc_SMP = 1;
+
+/* Number of independent load store pipes in each core. */
+static int		linux_ppc_LSUs = 0;
+/* Number of independent fixed point pipes in each core. */
+static int		linux_ppc_FXUs = 0;
+/* Number of independent floating point pipes in each core. */
+static int		linux_ppc_FPUs = 0;
+
+#ifdef PPC_FEATURE_ICACHE_SNOOP
+#define HAS_ICACHE_SNOOP (linux_ppc_hwcap & PPC_FEATURE_ICACHE_SNOOP)
+#else
+#define HAS_ICACHE_SNOOP 0
+#endif
+
+static int
+linix_init_ppc_SMP(void)
+{
+    struct utsname u;
+    
+    /* FIXME For 2.6.26 kernels we can try to use /sys/devices/system/cpu/possible,
+       but we would have to fall back to uname for early kernels anyway. */
+
+    if (uname(&u) != 0) {
+        perror("Error uname syscall failer\n");
+        return 1;
+    }
+    
+    if (strstr(u.version, "SMP"))
+	return 1;
+    else {
+	if (strstr(u.version, "smp"))
+	    return 1;
+	else
+	    return 0;
+    }
+}
+
+
+static void
+linux_init_ppc_platform(void)
+{
+	linux_ppc_hwcap = (unsigned long) linux_query_auxv (AT_HWCAP);
+	linux_ppc_platform = (char*) linux_query_auxv (AT_PLATFORM);
+	linux_ppc_ISA2x = (linux_ppc_hwcap & linux_ppc_ISA2x_mask) != 0L;
+	linux_ppc_SMP = linix_init_ppc_SMP();
+	
+	if ((strcmp(linux_ppc_platform, "power4") >= 0)
+	&&  (strcmp(linux_ppc_platform, "power6x") <= 0)) {
+		linux_ppc_LSUs = 2;
+		linux_ppc_FXUs = 2;
+		linux_ppc_FPUs = 2;
+	} else if  (strcmp(linux_ppc_platform, "ppc970") == 0) {
+		linux_ppc_LSUs = 2;
+		linux_ppc_FXUs = 2;
+		linux_ppc_FPUs = 2;
+	} else if  (strcmp(linux_ppc_platform, "cell") == 0) {
+		linux_ppc_LSUs = 1;
+		linux_ppc_FXUs = 1;
+		linux_ppc_FPUs = 1;
+	}
+}
+#endif
+
 /* this function overwrites r0, r11, r12 */
 static guint8*
 emit_memcpy (guint8 *code, int size, int dreg, int doffset, int sreg, int soffset)
 {
 	/* unrolled, use the counter in big */
 	if (size > sizeof (gpointer) * 5) {
-		int shifted = size >> MONO_PPC_32_64_CASE (2, 3);
+		long shifted = size >> MONO_PPC_32_64_CASE (2, 3);
 		guint8 *copy_loop_start, *copy_loop_jump;
-
+		
 		ppc_load (code, ppc_r0, shifted);
 		ppc_mtctr (code, ppc_r0);
 		g_assert (sreg == ppc_r11);
 		ppc_addi (code, ppc_r12, dreg, (doffset - sizeof (gpointer)));
 		ppc_addi (code, ppc_r11, sreg, (soffset - sizeof (gpointer)));
 		copy_loop_start = code;
-		ppc_load_reg_update (code, ppc_r0, sizeof (gpointer), ppc_r11);
-		ppc_store_reg_update (code, ppc_r0, sizeof (gpointer), ppc_r12);
+		ppc_load_reg_update (code, ppc_r0, (unsigned int)sizeof (gpointer), ppc_r11);
+		ppc_store_reg_update (code, ppc_r0, (unsigned int)sizeof (gpointer), ppc_r12);
 		copy_loop_jump = code;
 		ppc_bc (code, PPC_BR_DEC_CTR_NONZERO, 0, 0);
 		ppc_patch (copy_loop_jump, copy_loop_start);
@@ -156,6 +402,20 @@
 		dreg = ppc_r12;
 	}
 #ifdef __mono_ppc64__
+	/* the hardware has multiple load/store units and the move is long
+	   enough to use more then one regiester, then use load/load/store/store
+	   to execute 2 instructions per cycle. */
+	if ((linux_ppc_LSUs > 1) && (dreg != ppc_r12) && (sreg != ppc_r12)) { 
+		while (size >= 16) {
+			ppc_load_reg (code, ppc_r0, soffset, sreg);
+			ppc_load_reg (code, ppc_r12, soffset+8, sreg);
+			ppc_store_reg (code, ppc_r0, doffset, dreg);
+			ppc_store_reg (code, ppc_r12, doffset+8, dreg);
+			size -= 16;
+			soffset += 16;
+			doffset += 16; 
+		}
+	}
 	while (size >= 8) {
 		ppc_load_reg (code, ppc_r0, soffset, sreg);
 		ppc_store_reg (code, ppc_r0, doffset, dreg);
@@ -163,6 +423,18 @@
 		soffset += 8;
 		doffset += 8;
 	}
+#else
+	if ((linux_ppc_LSUs > 1) && (dreg != ppc_r12) && (sreg != ppc_r12)) { 
+		while (size >= 8) {
+			ppc_load_reg (code, ppc_r0, soffset, sreg);
+			ppc_load_reg (code, ppc_r12, soffset+4, sreg);
+			ppc_store_reg (code, ppc_r0, doffset, dreg);
+			ppc_store_reg (code, ppc_r12, doffset+4, dreg);
+			size -= 8;
+			soffset += 8;
+			doffset += 8; 
+		}
+	}
 #endif
 	while (size >= 4) {
 		ppc_lwz (code, ppc_r0, soffset, sreg);
@@ -457,6 +729,10 @@
 void
 mono_arch_cpu_init (void)
 {
+#ifdef __linux__
+	linux_auxv_init_once();
+	linux_init_ppc_platform();
+#endif
 }
 
 /*
@@ -585,11 +861,6 @@
 	return 2;
 }
 
-typedef struct {
-	long int type;
-	long int value;
-} AuxVec;
-
 void
 mono_arch_flush_icache (guint8 *code, gint size)
 {
@@ -613,20 +884,10 @@
 			/*g_print ("setting cl size to %d\n", cachelinesize);*/
 		}
 #elif defined(__linux__)
-		/* sadly this will work only with 2.6 kernels... */
-		FILE* f = fopen ("/proc/self/auxv", "rb");
-		if (f) {
-			AuxVec vec;
-			while (fread (&vec, sizeof (vec), 1, f) == 1) {
-				if (vec.type == 19) {
-					cachelinesize = vec.value;
-					break;
-				}
-			}
-			fclose (f);
-		}
+		cachelinesize = (long) linux_query_auxv (AT_DCACHEBSIZE);
 		if (!cachelinesize)
 			cachelinesize = 128;
+		cachelineinc = cachelinesize;
 #elif defined(G_COMPILER_CODEWARRIOR)
 	cachelinesize = 32;
 	cachelineinc = 32;
@@ -662,21 +923,29 @@
 		isync
 	}
 #else
-	if (1) {
-		for (p = start; p < endp; p += cachelineinc) {
-			asm ("dcbf 0,%0;" : : "r"(p) : "memory");
+	/* For POWER5/6 with ICACHE_SNOOP the dcbst/icbi is not required.  */
+	if (!HAS_ICACHE_SNOOP) {
+		if (linux_ppc_SMP) {
+			for (p = start; p < endp; p += cachelineinc) {
+				asm ("dcbf 0,%0;" : : "r"(p) : "memory");
+			}
+		} else {
+			for (p = start; p < endp; p += cachelineinc) {
+				asm ("dcbst 0,%0;" : : "r"(p) : "memory");
+			}
 		}
-	} else {
+		asm ("sync");
+		p = code;
+	/* for ISA2.0+ implementations we should not need any extra sync instructions. */
 		for (p = start; p < endp; p += cachelineinc) {
-			asm ("dcbst 0,%0;" : : "r"(p) : "memory");
+			if (linux_ppc_ISA2x)
+				asm ("icbi 0,%0;" : : "r"(p) : "memory");
+			else
+				asm ("icbi 0,%0; sync;" : : "r"(p) : "memory");
 		}
 	}
-	asm ("sync");
-	p = code;
-	for (p = start; p < endp; p += cachelineinc) {
-		asm ("icbi 0,%0; sync;" : : "r"(p) : "memory");
-	}
-	asm ("sync");
+	if (!linux_ppc_ISA2x)
+		asm ("sync");
 	asm ("isync");
 #endif
 }
@@ -2441,8 +2710,8 @@
 static guchar*
 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed)
 {
-	int offset = cfg->arch.fp_conv_var_offset;
-	int sub_offset;
+	long offset = cfg->arch.fp_conv_var_offset;
+	long sub_offset;
 	/* sreg is a float, dreg is an integer reg. ppc_f0 is used a scratch */
 #ifdef __mono_ppc64__
 	if (size == 8) {
@@ -2953,7 +3222,7 @@
 static guint8*
 emit_reserve_param_area (MonoCompile *cfg, guint8 *code)
 {
-	int size = cfg->param_area;
+	long size = cfg->param_area;
 
 	size += MONO_ARCH_FRAME_ALIGNMENT - 1;
 	size &= -MONO_ARCH_FRAME_ALIGNMENT;
@@ -2975,7 +3244,7 @@
 static guint8*
 emit_unreserve_param_area (MonoCompile *cfg, guint8 *code)
 {
-	int size = cfg->param_area;
+	long size = cfg->param_area;
 
 	size += MONO_ARCH_FRAME_ALIGNMENT - 1;
 	size &= -MONO_ARCH_FRAME_ALIGNMENT;
@@ -3109,6 +3378,15 @@
 			}
 			break;
 		case OP_LOADI4_MEMBASE:
+#ifdef __mono_ppc64__
+			if (ppc_is_imm16 (ins->inst_offset)) {
+				ppc_lwa (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+			} else {
+				ppc_load (code, ppc_r0, ins->inst_offset);
+				ppc_lwax (code, ins->dreg, ins->inst_basereg, ppc_r0);
+			}
+			break;
+#endif
 		case OP_LOADU4_MEMBASE:
 			if (ppc_is_imm16 (ins->inst_offset)) {
 				ppc_lwz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
@@ -3116,10 +3394,6 @@
 				ppc_load (code, ppc_r0, ins->inst_offset);
 				ppc_lwzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
 			}
-#ifdef __mono_ppc64__
-			if (ins->opcode == OP_LOADI4_MEMBASE)
-				ppc_extsw (code, ins->dreg, ins->dreg);
-#endif
 			break;
 		case OP_LOADI1_MEMBASE:
 		case OP_LOADU1_MEMBASE:
@@ -3142,7 +3416,7 @@
 			break;
 		case OP_LOADI2_MEMBASE:
 			if (ppc_is_imm16 (ins->inst_offset)) {
-				ppc_lha (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+				ppc_lha (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
 			} else {
 				ppc_load (code, ppc_r0, ins->inst_offset);
 				ppc_lhax (code, ins->dreg, ins->inst_basereg, ppc_r0);
@@ -3554,10 +3828,11 @@
 			 * we're leaving the method.
 			 */
 			if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
-				if (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET)) {
-					ppc_load_reg (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg);
+				long ret_offset = cfg->stack_usage + PPC_RET_ADDR_OFFSET;
+				if (ppc_is_imm16 (ret_offset)) {
+					ppc_load_reg (code, ppc_r0, ret_offset, cfg->frame_reg);
 				} else {
-					ppc_load (code, ppc_r11, cfg->stack_usage + PPC_RET_ADDR_OFFSET);
+					ppc_load (code, ppc_r11, ret_offset);
 					ppc_load_reg_indexed (code, ppc_r0, cfg->frame_reg, ppc_r11);
 				}
 				ppc_mtlr (code, ppc_r0);
@@ -3566,7 +3841,7 @@
 			code = emit_load_volatile_arguments (cfg, code);
 
 			if (ppc_is_imm16 (cfg->stack_usage)) {
-				ppc_addic (code, ppc_r11, cfg->frame_reg, cfg->stack_usage);
+				ppc_addi (code, ppc_r11, cfg->frame_reg, cfg->stack_usage);
 			} else {
 				ppc_load (code, ppc_r11, cfg->stack_usage);
 				ppc_add (code, ppc_r11, cfg->frame_reg, ppc_r11);
@@ -3598,10 +3873,11 @@
 			ppc_load_reg (code, ppc_r0, 0, ins->sreg1);
 			break;
 		case OP_ARGLIST: {
-			if (ppc_is_imm16 (cfg->sig_cookie + cfg->stack_usage)) {
-				ppc_addi (code, ppc_r0, cfg->frame_reg, cfg->sig_cookie + cfg->stack_usage);
+			long cookie_offset = cfg->sig_cookie + cfg->stack_usage;
+			if (ppc_is_imm16 (cookie_offset)) {
+				ppc_addi (code, ppc_r0, cfg->frame_reg, cookie_offset);
 			} else {
-				ppc_load (code, ppc_r0, cfg->sig_cookie + cfg->stack_usage);
+				ppc_load (code, ppc_r0, cookie_offset);
 				ppc_add (code, ppc_r0, cfg->frame_reg, ppc_r0);
 			}
 			ppc_store_reg (code, ppc_r0, 0, ins->sreg1);
@@ -4072,7 +4348,11 @@
 			break;
 		case OP_JUMP_TABLE:
 			mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
+#ifdef __mono_ppc64__
 			ppc_load_sequence (code, ins->dreg, (gulong)0x0f0f0f0f0f0f0f0fL);
+#else
+			ppc_load_sequence (code, ins->dreg, (gulong)0x0f0f0f0fL);
+#endif
 			break;
 		}
 
@@ -4352,7 +4632,8 @@
 	MonoBasicBlock *bb;
 	MonoMethodSignature *sig;
 	MonoInst *inst;
-	int alloc_size, pos, max_offset, i;
+	long alloc_size, pos, max_offset;
+	int i;
 	guint8 *code;
 	CallInfo *cinfo;
 	int tracing = 0;
@@ -4470,7 +4751,7 @@
 				g_assert_not_reached ();
 
 			if (cfg->verbose_level > 2)
-				g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
+				g_print ("Argument %ld assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
 		} else {
 			/* the argument should be put on the stack: FIXME handle size != word  */
 			if (ainfo->regtype == RegTypeGeneral) {
@@ -4491,7 +4772,6 @@
 						ppc_sthx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
 					}
 					break;
-#ifdef __mono_ppc64__
 				case 4:
 					if (ppc_is_imm16 (inst->inst_offset)) {
 						ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
@@ -4500,8 +4780,16 @@
 						ppc_stwx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
 					}
 					break;
-#else
 				case 8:
+#ifdef __mono_ppc64__
+					if (ppc_is_imm16 (inst->inst_offset)) {
+						ppc_std (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+					} else {
+						ppc_load (code, ppc_r11, inst->inst_offset);
+						ppc_stdx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+					}
+					break;
+#else
 					if (ppc_is_imm16 (inst->inst_offset + 4)) {
 						ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
 						ppc_stw (code, ainfo->reg + 1, inst->inst_offset + 4, inst->inst_basereg);
@@ -4511,8 +4799,8 @@
 						ppc_stw (code, ainfo->reg, 0, ppc_r11);
 						ppc_stw (code, ainfo->reg + 1, 4, ppc_r11);
 					}
-					break;
 #endif
+					break;
 				default:
 					if (ppc_is_imm16 (inst->inst_offset)) {
 						ppc_store_reg (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
@@ -4682,7 +4970,7 @@
 	if (method->save_lmf) {
 		if (lmf_pthread_key != -1) {
 			emit_tls_access (code, ppc_r3, lmf_pthread_key);
-			if (G_STRUCT_OFFSET (MonoJitTlsData, lmf))
+			if (tls_mode != TLS_MODE_NPTL && G_STRUCT_OFFSET (MonoJitTlsData, lmf))
 				ppc_addi (code, ppc_r3, ppc_r3, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
 		} else {
 			mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
@@ -4716,7 +5004,11 @@
 		ppc_store_reg (code, ppc_sp, G_STRUCT_OFFSET(MonoLMF, ebp), ppc_r11);
 		/* save the current IP */
 		mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
+#ifdef __mono_ppc64__
 		ppc_load_sequence (code, ppc_r0, (gulong)0x0101010101010101L);
+#else
+		ppc_load_sequence (code, ppc_r0, (gulong)0x01010101L);
+#endif
 		ppc_store_reg (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, eip), ppc_r11);
 	}
 
@@ -4780,7 +5072,7 @@
 		 * we didn't actually change them (idea from Zoltan).
 		 */
 		/* restore iregs */
-		ppc_load_multiple_regs (code, ppc_r13, ppc_r11, G_STRUCT_OFFSET(MonoLMF, iregs));
+		ppc_load_multiple_regs (code, ppc_r13, G_STRUCT_OFFSET(MonoLMF, iregs), ppc_r11);
 		/* restore fregs */
 		/*for (i = 14; i < 32; i++) {
 			ppc_lfd (code, i, G_STRUCT_OFFSET(MonoLMF, fregs) + ((i-14) * sizeof (gdouble)), ppc_r11);
@@ -4794,10 +5086,11 @@
 		ppc_addic (code, ppc_sp, ppc_r8, cfg->stack_usage);
 	} else {
 		if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
-			if (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET)) {
-				ppc_load_reg (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg);
+			long return_offset = cfg->stack_usage + PPC_RET_ADDR_OFFSET;
+			if (ppc_is_imm16 (return_offset)) {
+				ppc_load_reg (code, ppc_r0, return_offset, cfg->frame_reg);
 			} else {
-				ppc_load (code, ppc_r11, cfg->stack_usage + PPC_RET_ADDR_OFFSET);
+				ppc_load (code, ppc_r11, return_offset);
 				ppc_load_reg_indexed (code, ppc_r0, cfg->frame_reg, ppc_r11);
 			}
 			ppc_mtlr (code, ppc_r0);
@@ -4818,9 +5111,9 @@
 				}
 			}
 			if (cfg->frame_reg != ppc_sp)
-				ppc_addic (code, ppc_sp, ppc_r11, cfg->stack_usage);
+				ppc_addi (code, ppc_sp, ppc_r11, cfg->stack_usage);
 			else
-				ppc_addic (code, ppc_sp, ppc_sp, cfg->stack_usage);
+				ppc_addi (code, ppc_sp, ppc_sp, cfg->stack_usage);
 		} else {
 			ppc_load (code, ppc_r11, cfg->stack_usage);
 			if (cfg->used_int_regs) {
@@ -5004,23 +5297,30 @@
 static void
 setup_tls_access (void)
 {
-#ifdef __mono_ppc64__
-	/* FIXME: implement */
-	tls_mode = TLS_MODE_FAILED;
-	return;
-#else
 	guint32 ptk;
+
+#if defined(__linux__) && defined(_CS_GNU_LIBPTHREAD_VERSION)
+	size_t conf_size = 0;
+	char confbuf[128];
+#else
+	/* FIXME for darwin */
 	guint32 *ins, *code;
 	guint32 cmplwi_1023, li_0x48, blr_ins;
-	if (tls_mode == TLS_MODE_FAILED)
-		return;
+#endif
 
 	if (g_getenv ("MONO_NO_TLS")) {
 		tls_mode = TLS_MODE_FAILED;
 		return;
 	}
-
-	if (tls_mode == TLS_MODE_DETECT) {
+ 
+ 	if (tls_mode == TLS_MODE_DETECT) {
+#if defined(__linux__) && defined(_CS_GNU_LIBPTHREAD_VERSION)
+		conf_size = confstr ( _CS_GNU_LIBPTHREAD_VERSION, confbuf, sizeof(confbuf));
+		if ((conf_size > 4) && (strncmp (confbuf, "NPTL", 4) == 0))
+			tls_mode = TLS_MODE_NPTL;
+		else
+			tls_mode = TLS_MODE_LTHREADS;
+#else
 		ins = (guint32*)pthread_getspecific;
 		/* uncond branch to the real method */
 		if ((*ins >> 26) == 18) {
@@ -5092,7 +5392,13 @@
 			tls_mode = TLS_MODE_FAILED;
 			return;
 		}
+#endif
 	}
+	if ((monodomain_key == -1) && (tls_mode == TLS_MODE_NPTL)) {
+		monodomain_key = mono_domain_get_tls_offset();
+ 	}
+	/* if not TLS_MODE_NPTL or local dynamic (as indicated by
+	   mono_domain_get_tls_offset returning -1) then use keyed access. */
 	if (monodomain_key == -1) {
 		ptk = mono_domain_get_tls_key ();
 		if (ptk < 1024) {
@@ -5102,6 +5408,12 @@
 			}
 		}
 	}
+
+	if ((lmf_pthread_key == -1) && (tls_mode == TLS_MODE_NPTL)) {
+		lmf_pthread_key = mono_get_lmf_addr_tls_offset();
+	}
+	/* if not TLS_MODE_NPTL or local dynamic (as indicated by
+	   mono_get_lmf_addr_tls_offset returning -1) then use keyed access. */
 	if (lmf_pthread_key == -1) {
 		ptk = mono_pthread_key_for_tls (mono_jit_tls_id);
 		if (ptk < 1024) {
@@ -5113,6 +5425,12 @@
 			lmf_pthread_key = ptk;
 		}
 	}
+
+	if ((monothread_key == -1) && (tls_mode == TLS_MODE_NPTL)) {
+		monothread_key = mono_thread_get_tls_offset();
+	}
+	/* if not TLS_MODE_NPTL or local dynamic (as indicated by
+	   mono_get_lmf_addr_tls_offset returning -1) then use keyed access. */
 	if (monothread_key == -1) {
 		ptk = mono_thread_get_tls_key ();
 		if (ptk < 1024) {
@@ -5125,7 +5443,6 @@
 			/*g_print ("thread not inited yet %d\n", ptk);*/
 		}
 	}
-#endif
 }
 
 void
diff -urN mono-svn-20090121/mono/mono/mini/mini-ppc.h mono-svn/mono/mono/mini/mini-ppc.h
--- mono-svn-20090121/mono/mono/mini/mini-ppc.h	2009-01-21 10:36:00.000000000 -0600
+++ mono-svn/mono/mono/mini/mini-ppc.h	2009-01-21 12:39:32.000000000 -0600
@@ -57,7 +57,18 @@
 #define MONO_ARCH_NO_EMULATE_LONG_MUL_OPTS
 #define MONO_ARCH_HAVE_ATOMIC_ADD 1
 #define PPC_USES_FUNCTION_DESCRIPTOR
-#else
+#define MONO_ARCH_HAVE_TLS_GET 1
+#define MONO_ARCH_ENABLE_MONITOR_IL_FASTPATH 1
+#else /* must be __mono_ppc__ */
+#if 0
+/* enabling this for PPC32 causes hangs in the thread/delegate tests.
+   So disable for now. */
+#if defined(__linux__)
+#define MONO_ARCH_ENABLE_MONITOR_IL_FASTPATH 1
+#endif
+#endif
+
+#define MONO_ARCH_HAVE_TLS_GET 1
 #define MONO_ARCH_EMULATE_FCONV_TO_I8 1
 #define MONO_ARCH_EMULATE_LCONV_TO_R8 1
 #define MONO_ARCH_EMULATE_LCONV_TO_R4 1
@@ -120,6 +131,7 @@
 #define PPC_LAST_FPARG_REG ppc_f13
 #define PPC_PASS_STRUCTS_BY_VALUE 1
 #define PPC_SMALL_RET_STRUCT_IN_REG 0
+#define PPC_THREAD_PTR_REG ppc_r13
 #else
 #define PPC_RET_ADDR_OFFSET 4
 #define PPC_STACK_PARAM_OFFSET 8
@@ -128,6 +140,7 @@
 #define PPC_LAST_FPARG_REG ppc_f8
 #define PPC_PASS_STRUCTS_BY_VALUE 0
 #define PPC_SMALL_RET_STRUCT_IN_REG 1
+#define PPC_THREAD_PTR_REG ppc_r2
 #endif
 #define PPC_FIRST_ARG_REG ppc_r3
 #define PPC_LAST_ARG_REG ppc_r10
diff -urN mono-svn-20090121/mono/mono/mini/tramp-ppc.c mono-svn/mono/mono/mini/tramp-ppc.c
--- mono-svn-20090121/mono/mono/mini/tramp-ppc.c	2009-01-21 10:35:59.000000000 -0600
+++ mono-svn/mono/mono/mini/tramp-ppc.c	2009-01-21 12:39:32.000000000 -0600
@@ -193,7 +193,7 @@
 	 * now the integer registers.
 	 */
 	offset = STACK - sizeof (MonoLMF) + G_STRUCT_OFFSET (MonoLMF, iregs);
-	ppc_store_multiple_regs (buf, ppc_r13, ppc_r1, offset);
+	ppc_store_multiple_regs (buf, ppc_r13, offset, ppc_r1);
 
 	/* Now save the rest of the registers below the MonoLMF struct, first 14
 	 * fp regs and then the 13 gregs.
@@ -297,7 +297,7 @@
 	/* *(lmf_addr) = previous_lmf */
 	ppc_store_reg (buf, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r6);
 	/* restore iregs */
-	ppc_load_multiple_regs (buf, ppc_r13, ppc_r11, G_STRUCT_OFFSET(MonoLMF, iregs));
+	ppc_load_multiple_regs (buf, ppc_r13, G_STRUCT_OFFSET(MonoLMF, iregs), ppc_r11);
 	/* restore fregs */
 	for (i = 14; i < 32; i++)
 		ppc_lfd (buf, i, G_STRUCT_OFFSET(MonoLMF, fregs) + ((i-14) * sizeof (gdouble)), ppc_r11);
@@ -505,7 +505,7 @@
 	guint8 *jump;
 	int tramp_size;
 
-	tramp_size = 32;
+	tramp_size = MONO_PPC_32_64_CASE (32, 44);
 
 	code = buf = mono_global_codeman_reserve (tramp_size);
 
diff -urN mono-svn-20090121/mono/mono/tests/Makefile.am mono-svn/mono/mono/tests/Makefile.am
--- mono-svn-20090121/mono/mono/tests/Makefile.am	2009-01-21 15:17:23.000000000 -0600
+++ mono-svn/mono/mono/tests/Makefile.am	2009-01-21 15:01:33.000000000 -0600
@@ -466,6 +462,9 @@
 if POWERPC
 test_platform:
 else
+if POWERPC64
+test_platform:
+else
 # Can't use mkbundle on win32 since there is no static build there
 # Can't run test-unhandled-exception on Windows because of all the debug popups...
 if PLATFORM_WIN32
@@ -474,6 +473,7 @@
 test_platform:	testbundle test-iomap-regression
 endif
 endif
+endif
 
 if INSTALL_2_1
 test_2_1 : test-coreclr-security
diff -urN mono-svn-20090121/mono/mono/utils/mono-compiler.h mono-svn/mono/mono/utils/mono-compiler.h
--- mono-svn-20090121/mono/mono/utils/mono-compiler.h	2009-01-21 10:35:48.000000000 -0600
+++ mono-svn/mono/mono/utils/mono-compiler.h	2009-01-21 12:39:32.000000000 -0600
@@ -26,15 +26,24 @@
  */
 //#define PIC_INITIAL_EXEC
 
-#if defined (__powerpc__)
-#define MONO_TLS_FAST
-#elif defined(PIC)
+#if defined(PIC)
 
 #ifdef PIC_INITIAL_EXEC
 #define MONO_TLS_FAST __attribute__((tls_model("initial-exec")))
 #else
+#if defined (__powerpc__)
+/* local dynamic requires a call to __tls_get_addr to look up the 
+   TLS block address via the Dynamic Thread Vector. In this case Thread
+   Pointer relative offsets can't be used as this modules TLS was
+   allocated separately (none contiguoiusly) from the initial TLS
+   block.
+
+   For now we will disable this. */
+#define MONO_TLS_FAST
+#else
 #define MONO_TLS_FAST __attribute__((tls_model("local-dynamic")))
 #endif
+#endif
 
 #else
 
@@ -65,6 +74,43 @@
 #else
 #define MONO_THREAD_VAR_OFFSET(var,offset) __asm ("addl %0 = @tprel(" #var "#), r0 ;;\n" : "=r" (offset))
 #endif
+
+#elif defined(__mono_ppc__) && defined(__GNUC__)
+#if defined(PIC)
+#ifdef PIC_INITIAL_EXEC
+
+#if defined(__mono_ppc64__)
+#define MONO_THREAD_VAR_OFFSET(var,offset) \
+	do { long off; \
+	__asm (	"ld	%0," #var "@got at tprel(2)\n" \
+	: "=r" (off)); \
+	(offset) = off; } while (0)
+#else
+/* must be powerpc32 */
+#define MONO_THREAD_VAR_OFFSET(var,offset) \
+	__asm (	"lwz	%0," #var "@got at tprel(30)\n" \
+	: "=r" (offset))
+#endif
+
+#else
+
+/* local dynamic requires a call to __tls_get_addr to look up the 
+   TLS block address via the Dynamic Thread Vector. In this case Thread
+   Pointer relative offsets can't be used as this modules TLS was
+   allocated separately (none contiguoiusly) from the initial TLS
+   block.
+
+   For now we will disable this. */
+#define MONO_THREAD_VAR_OFFSET(var,offset) (offset) = -1
+
+#endif
+#else
+/* Must be local-exec TLS */
+#define MONO_THREAD_VAR_OFFSET(var,offset) \
+	__asm (	"lis	%0," #var "@tprel at ha\n" \
+		"addi	%0,%0, " #var "@tprel at l\n" \
+	: "=r" (offset))
+#endif
 #else
 #define MONO_THREAD_VAR_OFFSET(var,offset) (offset) = -1
 #endif


More information about the Mono-devel-list mailing list