[Mono-dev] Proposed Patch - Google Native Client

Elijah Taylor elijahtaylor at google.com
Thu Jul 15 19:30:14 EDT 2010


Hi, here's an updated patch with your feedback addressed.  I re-based the
diff closer to head revision (r160382) to include the other changes of ours
that already landed, as well as make sure we're still compatible with
current Mono development.

In general this diff should have a smaller impact on the .c files:
mini-x86.c, exceptions-x86.c, tramp-x86.c specifically, and the Native
Client changes are a little more grouped together rather than spread out.

A couple of points separate from the feedback:
1) I fixed a bug in my implementation of genmdesc.pl changes, so that will
be different from the previous patch
2) There's a small typo at head revision in mono/mini/tramp-x86.c which says
"rethow" instead of "reth*r*ow" for your rethrow exception trampoline.  This
is also fixed in my patch.

As always feedback is appreciated from everyone.


-Elijah


On Tue, Jul 6, 2010 at 6:35 AM, Zoltan Varga <vargaz at gmail.com> wrote:

> Hi,
>
>
>> One possibility is to pad out all x86_prefix instructions to the nearest
>> 32-byte boundary, but that could really bloat things depending on how often
>> they're used.  Do you have any idea of the prefix to non-prefix instruction
>> ratio?  It seems like it'd be pretty low based on looking at the code but I
>> haven't looked at any actual metrics.
>>
>>
> I think that would be ok, they are seldom used.
>
>                                  Zoltan
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.ximian.com/pipermail/mono-devel-list/attachments/20100715/8a4ac2b1/attachment-0001.html 
-------------- next part --------------
Index: eglib/src/eglib-config.h.in
===================================================================
--- eglib/src/eglib-config.h.in	(revision 160382)
+++ eglib/src/eglib-config.h.in	(working copy)
@@ -29,4 +29,12 @@
 #define G_HAVE_ISO_VARARGS
 #endif
 
+#if defined (__native_client__)
+#define sem_trywait(x) sem_wait(x)
+#define sem_timedwait(x,y) sem_wait(x)
+#define getdtablesize() (32768)
+#undef G_BREAKPOINT
+#define G_BREAKPOINT()
 #endif
+
+#endif
Index: eglib/src/gmodule-unix.c
===================================================================
--- eglib/src/gmodule-unix.c	(revision 160382)
+++ eglib/src/gmodule-unix.c	(working copy)
@@ -31,6 +31,42 @@
 #include <glib.h>
 #include <gmodule.h>
 
+#if defined(__native_client__)
+GModule *
+g_module_open (const gchar *file, GModuleFlags flags)
+{
+	printf("dlopen() not supported on Native Client.\n");
+	return NULL;
+}
+
+
+gboolean
+g_module_symbol (GModule *module, const gchar *symbol_name, gpointer *symbol)
+{
+	return FALSE;
+}
+
+
+const gchar*
+g_module_error(void)
+{
+	return "dlopen not supported on Native Client.";
+}
+
+gboolean
+g_module_close (GModule *module)
+{
+	return FALSE;
+}
+
+gchar*
+g_module_build_path (const gchar *directory, const gchar *module_name)
+{
+	return NULL;
+}
+
+#else
+
 #ifdef G_OS_UNIX
 #include <dlfcn.h>
 
@@ -286,3 +322,5 @@
 	return g_strdup_printf ("%s%s" LIBSUFFIX, lib_prefix, module_name); 
 }
 
+#endif /* __native_client__ */
+
Index: runtime/mono-wrapper.in
===================================================================
--- runtime/mono-wrapper.in	(revision 160382)
+++ runtime/mono-wrapper.in	(working copy)
@@ -3,5 +3,6 @@
 MONO_CFG_DIR='@mono_cfg_dir@'
 PATH="$r/runtime/_tmpinst/bin:$PATH"
 MONO_SHARED_DIR=$r/runtime
+export MONO_NACL_ALIGN_MASK_OFF=@MONO_NACL_ALIGN_MASK_OFF@
 export MONO_CFG_DIR MONO_SHARED_DIR PATH
 exec "$r/libtool" --mode=execute "$r/@mono_runtime@" --config "@mono_cfg_dir@/mono/config" "$@"
Index: mono/metadata/assembly.c
===================================================================
--- mono/metadata/assembly.c	(revision 160382)
+++ mono/metadata/assembly.c	(working copy)
@@ -126,6 +126,26 @@
 static GList *loaded_assemblies = NULL;
 static MonoAssembly *corlib;
 
+#if defined(__native_client__)
+
+/* On Native Client, allow mscorlib to be loaded from memory  */
+/* instead of loaded off disk.  If these are not set, default */
+/* mscorlib loading will take place                           */
+
+/* NOTE: If mscorlib data is passed to mono in this way then */
+/* it needs to remain allocated during the use of mono.      */
+
+static void *corlibData = NULL;
+static size_t corlibSize = 0;
+
+void mono_set_corlib_data(void *data, size_t size)
+{
+  corlibData = data;
+  corlibSize = size;
+}
+
+#endif
+
 /* This protects loaded_assemblies and image->references */
 #define mono_assemblies_lock() EnterCriticalSection (&assemblies_mutex)
 #define mono_assemblies_unlock() LeaveCriticalSection (&assemblies_mutex)
@@ -2514,7 +2534,6 @@
 	return result;
 }
 
-
 MonoAssembly*
 mono_assembly_load_corlib (const MonoRuntimeInfo *runtime, MonoImageOpenStatus *status)
 {
@@ -2524,6 +2543,22 @@
 		/* g_print ("corlib already loaded\n"); */
 		return corlib;
 	}
+
+#if defined(__native_client__)
+	if (corlibData != NULL && corlibSize != 0) {
+		int status = 0;
+		/* First "FALSE" instructs mono not to make a copy. */
+		/* Second "FALSE" says this is not just a ref.      */
+		MonoImage* image = mono_image_open_from_data_full (corlibData, corlibSize, FALSE, &status, FALSE);
+		if (image == NULL || status != 0)
+			g_print("mono_image_open_from_data_full failed: %d\n", status);
+		corlib = mono_assembly_load_from_full (image, "mscorlib", &status, FALSE);
+		if (corlib == NULL || status != 0)
+			g_print ("mono_assembly_load_from_full failed: %d\n", status);
+		if (corlib)
+			return corlib;
+	}
+#endif
 	
 	if (assemblies_path) {
 		corlib = load_in_path ("mscorlib.dll", (const char**)assemblies_path, status, FALSE);
Index: mono/metadata/console-unix.c
===================================================================
--- mono/metadata/console-unix.c	(revision 160382)
+++ mono/metadata/console-unix.c	(working copy)
@@ -6,6 +6,9 @@
  *
  * Copyright (C) 2005-2009 Novell, Inc. (http://www.novell.com)
  */
+#if defined(__native_client__)
+#include "console-null.c"
+#else
 
 #include <config.h>
 #include <glib.h>
@@ -485,3 +488,5 @@
 
 	return TRUE;
 }
+#endif /* #if defined(__native_client__) */
+
Index: mono/metadata/Makefile.am
===================================================================
--- mono/metadata/Makefile.am	(revision 160382)
+++ mono/metadata/Makefile.am	(working copy)
@@ -142,6 +142,7 @@
 	mono-wsq.h		\
 	monitor.c		\
 	monitor.h		\
+	nacl-stub.c		\
 	normalization-tables.h	\
 	null-gc.c		\
 	number-formatter.h	\
Index: mono/metadata/process.c
===================================================================
--- mono/metadata/process.c	(revision 160382)
+++ mono/metadata/process.c	(working copy)
@@ -29,7 +29,6 @@
 #define LOGDEBUG(...)  
 /* define LOGDEBUG(...) g_message(__VA_ARGS__)  */
 
-
 HANDLE ves_icall_System_Diagnostics_Process_GetProcess_internal (guint32 pid)
 {
 	HANDLE handle;
Index: mono/metadata/rand.c
===================================================================
--- mono/metadata/rand.c	(revision 160382)
+++ mono/metadata/rand.c	(working copy)
@@ -26,6 +26,17 @@
 #include <mono/metadata/rand.h>
 #include <mono/metadata/exception.h>
 
+#if defined(__native_client__)
+#include <errno.h>
+
+static void
+get_entropy_from_server (const char *path, guchar *buf, int len)
+{
+    return;
+}
+
+#else /* defined(__native_client__) */
+
 #if !defined(HOST_WIN32)
 #include <sys/socket.h>
 #include <sys/un.h>
@@ -96,6 +107,7 @@
     close (file);
 }
 #endif
+#endif /* __native_client__ */
 
 #if defined (HOST_WIN32)
 
Index: mono/metadata/nacl-stub.c
===================================================================
--- mono/metadata/nacl-stub.c	(revision 0)
+++ mono/metadata/nacl-stub.c	(revision 0)
@@ -0,0 +1,16 @@
+
+#if defined(__native_client__)
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <eglib/src/glib.h>
+#include <errno.h>
+#include <sys/types.h>
+
+struct group *getgrnam(const char *name) { return NULL; }
+struct group *getgrgid(gid_t gid) { errno=EIO; return NULL; }
+int fsync(int fd) { errno=EINVAL; return -1; }
+dev_t makedev(guint32 maj, guint32 min) { return (maj)*256+(min); }
+
+#endif
Index: mono/metadata/socket-io.c
===================================================================
--- mono/metadata/socket-io.c	(revision 160382)
+++ mono/metadata/socket-io.c	(working copy)
@@ -11,6 +11,8 @@
 
 #include <config.h>
 
+#if !defined(__native_client__)
+
 #include <glib.h>
 #include <string.h>
 #include <stdlib.h>
@@ -3075,3 +3077,5 @@
 	WSACleanup();
 }
 
+
+#endif /* #if !defined(__native_client__) */
Index: mono/io-layer/sockets.c
===================================================================
--- mono/io-layer/sockets.c	(revision 160382)
+++ mono/io-layer/sockets.c	(working copy)
@@ -7,6 +7,8 @@
  * (C) 2002 Ximian, Inc.
  */
 
+#if !defined(__native_client__)
+
 #include <config.h>
 #include <glib.h>
 #include <pthread.h>
@@ -1581,3 +1583,5 @@
 	*sent = ret;
 	return 0;
 }
+
+#endif /* if !defined(__native_client__) */
Index: mono/io-layer/locking.c
===================================================================
--- mono/io-layer/locking.c	(revision 160382)
+++ mono/io-layer/locking.c	(working copy)
@@ -23,6 +23,11 @@
 gboolean
 _wapi_lock_file_region (int fd, off_t offset, off_t length)
 {
+#if defined(__native_client__)
+	printf("WARNING: locking.c: _wapi_lock_file_region(): fcntl() not available on Native Client!\n");
+	// behave as below -- locks are not available
+	return(TRUE);
+#else
 	struct flock lock_data;
 	int ret;
 
@@ -58,11 +63,16 @@
 	}
 
 	return(TRUE);
+#endif /* __native_client__ */
 }
 
 gboolean
 _wapi_unlock_file_region (int fd, off_t offset, off_t length)
 {
+#if defined(__native_client__)
+	printf("WARNING: locking.c: _wapi_unlock_file_region(): fcntl() not available on Native Client!\n");
+	return (TRUE);
+#else
 	struct flock lock_data;
 	int ret;
 
@@ -98,6 +108,7 @@
 	}
 
 	return(TRUE);
+#endif /* __native_client__ */
 }
 
 gboolean
Index: mono/mini/method-to-ir.c
===================================================================
--- mono/mini/method-to-ir.c	(revision 160382)
+++ mono/mini/method-to-ir.c	(working copy)
@@ -5403,6 +5403,7 @@
 		cfg->bb_exit = end_bblock;
 		end_bblock->cil_code = NULL;
 		end_bblock->cil_length = 0;
+		end_bblock->flags |= BB_INDIRECT_JUMP_TARGET;
 		g_assert (cfg->num_bblocks == 2);
 
 		arg_array = cfg->args;
@@ -7007,11 +7008,13 @@
 			target = ip + n * sizeof (guint32);
 
 			GET_BBLOCK (cfg, default_bblock, target);
+			default_bblock->flags |= BB_INDIRECT_JUMP_TARGET;
 
 			targets = mono_mempool_alloc (cfg->mempool, sizeof (MonoBasicBlock*) * n);
 			for (i = 0; i < n; ++i) {
 				GET_BBLOCK (cfg, tblock, target + (gint32)read32(ip));
 				targets [i] = tblock;
+				targets [i]->flags |= BB_INDIRECT_JUMP_TARGET;
 				ip += 4;
 			}
 
Index: mono/mini/mini.c
===================================================================
--- mono/mini/mini.c	(revision 160382)
+++ mono/mini/mini.c	(working copy)
@@ -81,6 +81,11 @@
 MonoMethodSignature *helper_sig_monitor_enter_exit_trampoline = NULL;
 MonoMethodSignature *helper_sig_monitor_enter_exit_trampoline_llvm = NULL;
 
+#ifdef __native_client_codegen__
+/* Default alignment for Native Client is 32-byte. */
+guint8 nacl_align_byte = 0xe0;
+#endif
+
 static guint32 default_opt = 0;
 static gboolean default_opt_set = FALSE;
 
@@ -157,6 +162,82 @@
 
 gboolean mono_dont_free_global_codeman;
 
+#ifdef __native_client_codegen__
+
+/* Prevent instructions from straddling a 32-byte alignment boundary.   */
+/* Instructions longer than 32 bytes must be aligned internally.        */
+/* IN: pcode, instlen                                                   */
+/* OUT: pcode                                                           */
+void mono_nacl_align_inst(guint8 **pcode, int instlen) {
+  int space_in_block;
+
+  space_in_block = kNaClAlignment - ((uintptr_t)(*pcode) & kNaClAlignmentMask);
+
+  if (G_UNLIKELY (instlen >= kNaClAlignment)) {
+    g_assert_not_reached();
+  } else if (instlen > space_in_block) {
+    *pcode = mono_arch_nacl_pad(*pcode, space_in_block);
+  }
+}
+
+/* Move emitted call sequence to the end of a kNaClAlignment-byte block.  */
+/* IN: start    pointer to start of call sequence                         */
+/* IN: pcode    pointer to end of call sequence (current "IP")            */
+/* OUT: start   pointer to the start of the call sequence after padding   */
+/* OUT: pcode   pointer to the end of the call sequence after padding     */
+void mono_nacl_align_call(guint8 **start, guint8 **pcode) {
+  const size_t MAX_NACL_CALL_LENGTH = kNaClAlignment;
+  guint8 copy_of_call[MAX_NACL_CALL_LENGTH];
+  guint8 *temp;
+
+  const size_t length = (size_t)((*pcode)-(*start));
+  g_assert(length < MAX_NACL_CALL_LENGTH);
+
+  memcpy(copy_of_call, *start, length);
+  temp = mono_nacl_pad_call(*start, (guint8)length);
+  memcpy(temp, copy_of_call, length);
+  (*start) = temp;
+  (*pcode) = temp + length;
+}
+
+/* mono_nacl_pad_call(): Insert padding for Native Client call instructions */
+/*    code     pointer to buffer for emitting code                          */
+/*    ilength  length of call instruction                                   */
+guint8 *mono_nacl_pad_call(guint8 *code, guint8 ilength) {
+  int freeSpaceInBlock = kNaClAlignment - ((uintptr_t)code & kNaClAlignmentMask);
+  int padding = freeSpaceInBlock - ilength;
+
+  if (padding < 0) {
+    /* There isn't enough space in this block for the instruction. */
+    /* Fill this block and start a new one.                        */
+    code = mono_arch_nacl_pad(code, freeSpaceInBlock);
+    freeSpaceInBlock = kNaClAlignment;
+    padding = freeSpaceInBlock - ilength;
+  }
+  g_assert(ilength > 0);
+  g_assert(padding >= 0);
+  g_assert(padding < kNaClAlignment);
+  if (0 == padding) return code;
+  return mono_arch_nacl_pad(code, padding);
+}
+
+guint8 *mono_nacl_align(guint8 *code) {
+  int padding = kNaClAlignment - ((uintptr_t)code & kNaClAlignmentMask);
+  if (padding != kNaClAlignment) code = mono_arch_nacl_pad(code, padding);
+  return code;
+}
+
+void mono_nacl_fix_patches(const guint8 *code, MonoJumpInfo *ji)
+{
+  MonoJumpInfo *patch_info;
+  for (patch_info = ji; patch_info; patch_info = patch_info->next) {
+    unsigned char *ip = patch_info->ip.i + code;
+    ip = mono_arch_nacl_skip_nops(ip);
+    patch_info->ip.i = ip - code;
+  }
+}
+#endif  /* __native_client_codegen__ */
+
 gboolean
 mono_running_on_valgrind (void)
 {
@@ -3330,7 +3411,17 @@
 	}
 
 	memcpy (code, cfg->native_code, cfg->code_len);
+#ifdef __native_client_codegen__
+	if (cfg->native_code_alloc) {
+		g_free (cfg->native_code_alloc);
+		cfg->native_code_alloc = 0;
+	}
+	else if (cfg->native_code) {
+		g_free (cfg->native_code);
+	}
+#else
 	g_free (cfg->native_code);
+#endif
 	cfg->native_code = code;
 	code = cfg->native_code + cfg->code_len;
   
@@ -3369,6 +3460,10 @@
 	mono_arch_save_unwind_info (cfg);
 #endif
 	
+#ifdef __native_client_codegen__
+	mono_nacl_fix_patches (cfg->native_code, cfg->patch_info);
+#endif
+
 	mono_arch_patch_code (cfg->method, cfg->domain, cfg->native_code, cfg->patch_info, cfg->run_cctors);
 
 	if (cfg->method->dynamic) {
@@ -5662,7 +5757,7 @@
 
 	MONO_PROBE_VES_INIT_BEGIN ();
 
-#ifdef __linux__
+#if defined(__linux__) && !defined(__native_client__)
 	if (access ("/proc/self/maps", F_OK) != 0) {
 		g_print ("Mono requires /proc to be mounted.\n");
 		exit (1);
Index: mono/mini/mini.h
===================================================================
--- mono/mini/mini.h	(revision 160382)
+++ mono/mini/mini.h	(working copy)
@@ -560,11 +560,13 @@
 
 /* BBlock flags */
 enum {
-	BB_VISITED            = 1 << 0,
-	BB_REACHABLE          = 1 << 1,
-	BB_EXCEPTION_DEAD_OBJ = 1 << 2,
-	BB_EXCEPTION_UNSAFE   = 1 << 3,
-	BB_EXCEPTION_HANDLER  = 1 << 4
+	BB_VISITED              = 1 << 0,
+	BB_REACHABLE            = 1 << 1,
+	BB_EXCEPTION_DEAD_OBJ   = 1 << 2,
+	BB_EXCEPTION_UNSAFE     = 1 << 3,
+	BB_EXCEPTION_HANDLER    = 1 << 4,
+	/* for Native Client, mark the blocks that can be jumped to indirectly */
+	BB_INDIRECT_JUMP_TARGET = 1 << 5 
 };
 
 typedef struct MonoMemcpyArgs {
@@ -1068,6 +1070,11 @@
 	MonoGenericSharingContext *generic_sharing_context;
 
 	unsigned char   *cil_start;
+#ifdef __native_client_codegen__
+	/* this alloc is not aligned, native_code */
+	/* is the 32-byte aligned version of this */
+	unsigned char   *native_code_alloc;
+#endif
 	unsigned char   *native_code;
 	guint            code_size;
 	guint            code_len;
@@ -1587,6 +1594,19 @@
 void      mono_linterval_split              (MonoCompile *cfg, MonoLiveInterval *interval, MonoLiveInterval **i1, MonoLiveInterval **i2, int pos) MONO_INTERNAL;
 void      mono_liveness_handle_exception_clauses (MonoCompile *cfg) MONO_INTERNAL;
 
+/* Native Client functions */
+#ifdef __native_client_codegen__
+void mono_nacl_align_inst(guint8 **pcode, int instlen);
+void mono_nacl_align_call(guint8 **start, guint8 **pcode);
+guint8 *mono_nacl_pad_call(guint8 *code, guint8 ilength);
+guint8 *mono_nacl_align(guint8 *code);
+void mono_nacl_fix_patches(const guint8 *code, MonoJumpInfo *ji);
+/* Defined for each arch */
+guint8 *mono_arch_nacl_pad(guint8 *code, int pad);
+guint8 *mono_arch_nacl_skip_nops(guint8 *code);
+
+#endif
+
 /* AOT */
 void      mono_aot_init                     (void) MONO_INTERNAL;
 gpointer  mono_aot_get_method               (MonoDomain *domain,
Index: mono/mini/cpu-x86.md
===================================================================
--- mono/mini/cpu-x86.md	(revision 160382)
+++ mono/mini/cpu-x86.md	(working copy)
@@ -63,12 +63,12 @@
 # See the code in mini-x86.c for more details on how the specifiers are used.
 #
 break: len:1
-jmp: len:32
+jmp: len:32 clob:c
 call: dest:a clob:c len:17
 br: len:5
 seq_point: len:16
 
-int_beq: len:6 nacl:28
+int_beq: len:6
 int_bge: len:6
 int_bgt: len:6
 int_ble: len:6
@@ -117,8 +117,8 @@
 throw: src1:i len:13
 rethrow: src1:i len:13
 start_handler: len:16
-endfinally: len:16
-endfilter: src1:a len:16
+endfinally: len:16 nacl:21
+endfilter: src1:a len:16 nacl:21
 
 ckfinite: dest:f src1:f len:32
 ceq: dest:y len:6
@@ -134,18 +134,18 @@
 checkthis: src1:b len:3
 voidcall: len:17 clob:c
 voidcall_reg: src1:i len:11 clob:c
-voidcall_membase: src1:b len:16 clob:c
+voidcall_membase: src1:b len:16 nacl:17 clob:c
 fcall: dest:f len:17 clob:c
 fcall_reg: dest:f src1:i len:11 clob:c
-fcall_membase: dest:f src1:b len:16 clob:c
+fcall_membase: dest:f src1:b len:16 nacl:17 clob:c
 lcall: dest:l len:17 clob:c
 lcall_reg: dest:l src1:i len:11 clob:c
-lcall_membase: dest:l src1:b len:16 clob:c
+lcall_membase: dest:l src1:b len:16 nacl:17 clob:c
 vcall: len:17 clob:c
 vcall_reg: src1:i len:11 clob:c
-vcall_membase: src1:b len:16 clob:c
-call_reg: dest:a src1:i len:11 clob:c
-call_membase: dest:a src1:b len:16 clob:c
+vcall_membase: src1:b len:16 nacl:17 clob:c
+call_reg: dest:a src1:i len:11 nacl:14 clob:c
+call_membase: dest:a src1:b len:16 nacl:18 clob:c
 iconst: dest:i len:5
 r4const: dest:f len:15
 r8const: dest:f len:16
@@ -284,7 +284,7 @@
 adc_imm: dest:i src1:i len:6 clob:1
 sbb: dest:i src1:i src2:i len:2 clob:1
 sbb_imm: dest:i src1:i len:6 clob:1
-br_reg: src1:i len:2
+br_reg: src1:i len:2 nacl:5
 sin: dest:f src1:f len:6
 cos: dest:f src1:f len:6
 abs: dest:f src1:f len:2
@@ -386,7 +386,7 @@
 
 vcall2: len:17 clob:c
 vcall2_reg: src1:i len:11 clob:c
-vcall2_membase: src1:b len:16 clob:c
+vcall2_membase: src1:b len:16 nacl:17 clob:c
 
 localloc_imm: dest:i len:120
 
Index: mono/mini/genmdesc.pl
===================================================================
--- mono/mini/genmdesc.pl	(revision 160382)
+++ mono/mini/genmdesc.pl	(working copy)
@@ -13,6 +13,8 @@
 sub INST_SRC3  () {return 3;}
 sub INST_LEN   () {return 4;}
 sub INST_CLOB  () {return 5;}
+# making INST_NACL the same as INST_MAX is not a mistake,
+# INST_NACL writes over INST_LEN, it's not its own field
 sub INST_NACL  () {return 6;}
 sub INST_MAX   () {return 6;}
 
@@ -23,6 +25,8 @@
 my %template_table =();
 my @opcodes = ();
 
+my $nacl = 0;
+
 sub parse_file
 {
 	my ($define, $file) = @_;
@@ -167,21 +171,22 @@
 	my $res = "";
 	my $n = 0;
 	for (my $i = 0; $i < @vals; ++$i) {
+		next if $i == INST_NACL;
 		if (defined $vals [$i]) {
 			if ($i == INST_LEN) {
 			        $n = $vals [$i];
-			        if (defined $vals [INST_NACL]){
-				    $n += $vals [INST_NACL];
+			        if ((defined $vals [INST_NACL]) and $nacl == 1){
+				    $n = $vals [INST_NACL];
 			        }
 				$res .= sprintf ("\\x%x\" \"", + $n);
-			} elsif ($i != INST_NACL) {
+			} else {
 				if ($vals [$i] =~ /^[a-zA-Z0-9]$/) {
 					$res .= $vals [$i];
 				} else {
 					$res .= sprintf ("\\x%x\" \"", $vals [$i]);
 				}
 			}
-		} elsif ($i != INST_NACL) {
+		} else {
 			$res .= "\\x0\" \"";
 		}
 	}
@@ -221,12 +226,17 @@
 }
 
 sub usage {
-	die "genmdesc.pl arch srcdir output name desc [desc2 ...]\n";
+	die "genmdesc.pl arch srcdir [--nacl] output name desc [desc2 ...]\n";
 }
 
 my $arch = shift || usage ();
 my $srcdir = shift || usage ();
 my $output = shift || usage ();
+if ($output eq "--nacl")
+{
+  $nacl = 1;  
+  $output = shift || usage();
+}
 my $name = shift || usage ();
 usage () unless @ARGV;
 my @files = @ARGV;
Index: mono/mini/mini-x86.c
===================================================================
--- mono/mini/mini-x86.c	(revision 160382)
+++ mono/mini/mini-x86.c	(working copy)
@@ -65,6 +65,60 @@
 MonoBreakpointInfo
 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
 
+static gpointer mono_realloc_native_code(MonoCompile *cfg)
+{
+#ifdef __native_client_codegen__
+  guint old_padding;
+  gpointer native_code;
+  guint alignment_check;
+
+  /* Save the old alignment offset so we can re-align after the realloc. */
+  old_padding = (guint)(cfg->native_code - cfg->native_code_alloc);
+
+  cfg->native_code_alloc = g_realloc( cfg->native_code_alloc, 
+                                      cfg->code_size + kNaClAlignment );
+
+  /* Align native_code to next nearest kNaClAlignment byte. */
+  native_code = (guint)cfg->native_code_alloc + kNaClAlignment;
+  native_code = (guint)native_code & ~kNaClAlignmentMask;
+
+  /* Shift the data to be 32-byte aligned again. */
+  memmove(native_code, cfg->native_code_alloc + old_padding, cfg->code_size);
+
+  alignment_check = (guint)native_code & kNaClAlignmentMask;
+  g_assert(alignment_check == 0);
+  return native_code;
+#else
+  return g_realloc(cfg->native_code, cfg->code_size);
+#endif
+}
+
+#ifdef __native_client_codegen__
+
+/* mono_arch_nacl_pad: Add pad bytes of alignment instructions at code,       */
+/* Check that alignment doesn't cross an alignment boundary.        */
+guint8 *mono_arch_nacl_pad(guint8 *code, int pad) {
+  const int kMaxPadding = 7;    /* see x86-codegen.h: x86_padding() */
+
+  if (pad == 0) return code;
+  /* assertion: alignment cannot cross a block boundary */
+  g_assert(((uintptr_t)code & (~kNaClAlignmentMask)) ==
+          (((uintptr_t)code + pad - 1) & (~kNaClAlignmentMask)));
+  while (pad >= kMaxPadding) {
+    x86_padding(code, kMaxPadding);
+    pad -= kMaxPadding;
+  }
+  if (pad != 0) x86_padding (code, pad);
+  return code;
+}
+
+guint8 *mono_arch_nacl_skip_nops(guint8 *code) {
+  x86_skip_nops(code);
+  return code;
+}
+
+#endif /* __native_client_codegen__ */
+
 /*
  * The code generated for sequence points reads from this location, which is
  * made read-only when single stepping is enabled.
@@ -617,6 +671,14 @@
 static int 
 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
 {
+#if defined(__native_client__)
+	/* Taken from below, the bug listed in the comment is */
+	/* only valid for non-static cases.                   */
+	__asm__ __volatile__ ("cpuid"
+		: "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
+		: "a" (id));
+	return 1;
+#else
 	int have_cpuid = 0;
 #ifndef _MSC_VER
 	__asm__  __volatile__ (
@@ -671,6 +733,7 @@
 		return 1;
 	}
 	return 0;
+#endif
 }
 
 /*
@@ -724,6 +787,7 @@
 guint32
 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
 {
+#if !defined(__native_client__)
 	int eax, ebx, ecx, edx;
 	guint32 opts = 0;
 	
@@ -755,6 +819,9 @@
 #endif
 	}
 	return opts;
+#else
+	return MONO_OPT_CMOV | MONO_OPT_FCMOV | MONO_OPT_SSE2;
+#endif
 }
 
 /*
@@ -2211,6 +2278,11 @@
 x86_pop_reg (code, X86_EDX); \
 x86_pop_reg (code, X86_EAX);
 
+/* REAL_PRINT_REG does not appear to be used, and was not adapted to work with Native Client. */
+#ifdef __native__client_codegen__
+#define REAL_PRINT_REG(text, reg) g_assert_not_reached()
+#endif
+
 /* benchmark and set based on cpu */
 #define LOOP_ALIGNMENT 8
 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
@@ -2237,7 +2309,23 @@
 			bb->native_offset = cfg->code_len;
 		}
 	}
+#ifdef __native_client_codegen__
+        {
+          /* For Native Client, all indirect call/jump targets must be   */
+          /* 32-byte aligned.  Exception handler blocks are jumped to    */
+          /* indirectly as well.                                         */
+          gboolean bb_needs_alignment = (bb->flags & BB_INDIRECT_JUMP_TARGET) ||
+                                        (bb->flags & BB_EXCEPTION_HANDLER);
 
+          /* if ((cfg->code_len & kNaClAlignmentMask) != 0) { */
+          if ( bb_needs_alignment && ((cfg->code_len & kNaClAlignmentMask) != 0)) {
+            int pad = kNaClAlignment - (cfg->code_len & kNaClAlignmentMask);
+            if (pad != kNaClAlignment) code = mono_arch_nacl_pad(code, pad);
+            cfg->code_len += pad;
+            bb->native_offset = cfg->code_len;
+          }
+        }
+#endif  /* __native_client_codegen__ */
 	if (cfg->verbose_level > 2)
 		g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
 
@@ -2262,9 +2350,14 @@
 
 		max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
 
-		if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
+#ifdef  __native_client_codegen__
+#define EXTRA_CODE_SPACE (16 + kNaClAlignment)
+#else
+#define EXTRA_CODE_SPACE  16
+#endif  /* __native_client_codegen__ */
+		if (G_UNLIKELY (offset > (cfg->code_size - max_len - EXTRA_CODE_SPACE))) {
 			cfg->code_size *= 2;
-			cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+			cfg->native_code = mono_realloc_native_code(cfg);
 			code = cfg->native_code + offset;
 			mono_jit_stats.code_reallocs++;
 		}
@@ -4463,9 +4556,11 @@
 		}
 
 		if (G_UNLIKELY ((code - cfg->native_code - offset) > max_len)) {
+#ifndef __native_client_codegen__
 			g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
 				   mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
 			g_assert_not_reached ();
+#endif  /* __native_client_codegen__ */
 		}
 	       
 		cpos += max_len;
@@ -4548,13 +4643,30 @@
 	int alloc_size, pos, max_offset, i, cfa_offset;
 	guint8 *code;
 	gboolean need_stack_frame;
+#ifdef __native_client_codegen__
+	guint alignment_check;
+#endif
 
 	cfg->code_size = MAX (cfg->header->code_size * 4, 10240);
 
 	if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
 		cfg->code_size += 512;
 
+#ifdef __native_client_codegen__
+	/* native_code_alloc is not 32-byte aligned, native_code is. */
+	cfg->native_code_alloc = g_malloc (cfg->code_size + kNaClAlignment);
+
+	/* Align native_code to next nearest kNaclAlignment byte. */
+	cfg->native_code = (guint)cfg->native_code_alloc + kNaClAlignment; 
+	cfg->native_code = (guint)cfg->native_code & ~kNaClAlignmentMask;
+	
+	code = cfg->native_code;
+
+	alignment_check = (guint)cfg->native_code & kNaClAlignmentMask;
+  	g_assert(alignment_check == 0);
+#else
 	code = cfg->native_code = g_malloc (cfg->code_size);
+#endif
 
 	/* Offset between RSP and the CFA */
 	cfa_offset = 0;
@@ -4741,7 +4853,7 @@
 		if (G_UNLIKELY (required_code_size >= (cfg->code_size - offset))) {
 			while (required_code_size >= (cfg->code_size - offset))
 				cfg->code_size *= 2;
-			cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+			cfg->native_code = mono_realloc_native_code(cfg);
 			code = cfg->native_code + offset;
 			mono_jit_stats.code_reallocs++;
 		}
@@ -4787,11 +4899,23 @@
 			/* max alignment for loops */
 			if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
 				max_offset += LOOP_ALIGNMENT;
-
+#ifdef __native_client_codegen__
+                        /* max alignment for native client */
+                        max_offset += kNaClAlignment;
+#endif
 			MONO_BB_FOR_EACH_INS (bb, ins) {
 				if (ins->opcode == OP_LABEL)
 					ins->inst_c1 = max_offset;
-				
+#ifdef __native_client_codegen__
+				{
+				  int space_in_block = kNaClAlignment -
+				    ((max_offset + cfg->code_len) & kNaClAlignmentMask);
+				  int max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
+				  if (space_in_block < max_len && max_len < kNaClAlignment) {
+				    max_offset += space_in_block;
+				  }
+				}
+#endif  /* __native_client_codegen__ */
 				max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
 			}
 		}
@@ -4846,7 +4970,7 @@
 
 	while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
 		cfg->code_size *= 2;
-		cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+		cfg->native_code = mono_realloc_native_code(cfg);
 		mono_jit_stats.code_reallocs++;
 	}
 
@@ -5027,7 +5151,7 @@
 
 	while (cfg->code_len + code_size > (cfg->code_size - 16)) {
 		cfg->code_size *= 2;
-		cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+		cfg->native_code = mono_realloc_native_code(cfg);
 		mono_jit_stats.code_reallocs++;
 	}
 
@@ -5060,8 +5184,12 @@
 				guint32 size;
 
 				/* Compute size of code following the push <OFFSET> */
+#ifdef __native_client_codegen__
+				code = mono_nacl_align(code);
+				size = kNaClAlignment;
+#else
 				size = 5 + 5;
-
+#endif
 				/*This is aligned to 16 bytes by the callee. This way we save a few bytes here.*/
 
 				if ((code - cfg->native_code) - throw_ip < 126 - size) {
@@ -5176,8 +5304,16 @@
 //[1 + 5] x86_jump_mem(inst,mem)
 
 #define CMP_SIZE 6
+#ifdef __native_client_codegen__
+/* These constants should be coming from cpu-x86.md            */
+/* I suspect the size calculation below is actually incorrect. */
+/* TODO: fix the calculation that uses these sizes.            */
+#define BR_SMALL_SIZE 16
+#define BR_LARGE_SIZE 12
+#else
 #define BR_SMALL_SIZE 2
 #define BR_LARGE_SIZE 5
+#endif  /* __native_client_codegen__ */
 #define JUMP_IMM_SIZE 6
 #define ENABLE_WRONG_METHOD_CHECK 0
 #define DEBUG_IMT 0
@@ -5202,6 +5338,9 @@
 	int size = 0;
 	guint8 *code, *start;
 
+#ifdef __native_client_codegen__
+	/* g_print("mono_arch_build_imt_thunk needs to be aligned.\n"); */
+#endif
 	for (i = 0; i < count; ++i) {
 		MonoIMTCheckItem *item = imt_entries [i];
 		if (item->is_equals) {
@@ -5496,17 +5635,19 @@
 gpointer
 mono_arch_get_vcall_slot (guint8 *code, mgreg_t *regs, int *displacement)
 {
-	guint8 buf [8];
+#if defined(__native_client_codegen__) || defined(__native_client__)
+  const int kBufSize = 16;
+#else
+  const int kBufSize = 8;	
+#endif  /* __native_client_codegen__ */
+	guint8 buf [kBufSize];
 	guint8 reg = 0;
 	gint32 disp = 0;
 
-	mono_breakpoint_clean_code (NULL, code, 8, buf, sizeof (buf));
-	code = buf + 8;
-
+	mono_breakpoint_clean_code (NULL, code, kBufSize, buf, sizeof (buf));
+	code = buf + kBufSize - 6;
 	*displacement = 0;
 
-	code -= 6;
-
 	/*
 	 * This function is no longer used, the only caller is
 	 * mono_arch_nullify_class_init_trampoline ().
@@ -5514,6 +5655,15 @@
 	if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
 		reg = code [1] & 0x07;
 		disp = *((gint32*)(code + 2));
+#if defined(__native_client_codegen__) || defined(__native_client__)
+	} else if ((code[1] == 0x83) && (code[2] == 0xe1) && (code[4] == 0xff) &&
+			(code[5] == 0xd1) && (code[-5] == 0x8b)) {
+		disp = *((gint32*)(code - 3));
+		reg = code[-4] & 0x07;
+	} else if ((code[-2] == 0x8b) && (code[1] == 0x83) && (code[4] == 0xff)) {
+		reg = code[-1] & 0x07;
+		disp = (signed char)code[0];
+#endif
 	} else {
 		g_assert_not_reached ();
 		return NULL;
@@ -5585,8 +5735,12 @@
 	} else {
 		int i = 0;
 		/* 8 for mov_reg and jump, plus 8 for each parameter */
-		int code_reserve = 8 + (param_count * 8);
-
+#ifdef __native_client_codegen__
+                /* TODO: calculate this size correctly */
+                int code_reserve = 13 + (param_count * 8) + 2 * kNaClAlignment;
+#else
+                int code_reserve = 8 + (param_count * 8);
+#endif  /* __native_client_codegen__ */
 		/*
 		 * The stack contains:
 		 * <args in reverse order>
Index: mono/mini/mini-x86.h
===================================================================
--- mono/mini/mini-x86.h	(revision 160382)
+++ mono/mini/mini-x86.h	(working copy)
@@ -55,6 +55,10 @@
 #define MONO_ARCH_USE_SIGACTION
 #endif
 
+#if defined(__native_client__)
+#undef MONO_ARCH_USE_SIGACTION
+#endif
+
 #ifndef HOST_WIN32
 
 #ifdef HAVE_WORKING_SIGALTSTACK
@@ -286,7 +290,7 @@
 
 #define MONO_ARCH_HAVE_DECOMPOSE_LONG_OPTS 1
 
-#if !defined(__APPLE__)
+#if !defined(__APPLE__) || defined(__native_client_codegen__)
 #define MONO_ARCH_AOT_SUPPORTED 1
 #endif
 
Index: mono/mini/genmdesc.c
===================================================================
--- mono/mini/genmdesc.c	(revision 160382)
+++ mono/mini/genmdesc.c	(working copy)
@@ -43,7 +43,7 @@
 	char spec [MONO_INST_MAX];
 } OpDesc;
 
-static int nacl;
+static int nacl = 0;
 static GHashTable *table;
 static GHashTable *template_table;
 
@@ -75,6 +75,8 @@
 	line = 0;
 	while ((str = fgets (buf, sizeof (buf), f))) {
 		gboolean is_template = FALSE;
+		gboolean nacl_length_set = FALSE;
+
 		++line;
 		eat_whitespace (str);
 		if (!str [0])
@@ -132,14 +134,20 @@
 				p += 7;
 				*/
 			} else if (strncmp (p, "len:", 4) == 0) {
+				unsigned long size;
 				p += 4;
-				desc->spec [MONO_INST_LEN] += strtoul (p, &p, 10);
-			} else if (strncmp (p, "nacl:", 5) == 0){
+				size = strtoul (p, &p, 10);
+				if (!nacl_length_set) {
+					desc->spec [MONO_INST_LEN] = size;
+				}
+			} else if (strncmp (p, "nacl:", 5) == 0) {
 				unsigned long size;
 				p += 5;
 				size = strtoul (p, &p, 10);
-				if (nacl)
-					desc->spec [MONO_INST_LEN] += size;
+				if (nacl) {
+					desc->spec [MONO_INST_LEN] = size;
+					nacl_length_set = TRUE;
+				}
 			} else if (strncmp (p, "template:", 9) == 0) {
 				char *tname;
 				int i;
@@ -298,7 +306,7 @@
 		return 1;
 	} else {
 		int i = 3;
-		if (strcmp (argv [1], "--nacl") == 0){
+		if (strcmp (argv [1], "--nacl") == 0) {
 			nacl = 1;
 			i++;
 		}
Index: mono/mini/Makefile.am
===================================================================
--- mono/mini/Makefile.am	(revision 160382)
+++ mono/mini/Makefile.am	(working copy)
@@ -368,6 +368,7 @@
 	basic-simd.cs
 
 regtests=basic.exe basic-float.exe basic-long.exe basic-calls.exe objects.exe arrays.exe basic-math.exe exceptions.exe iltests.exe devirtualization.exe generics.exe basic-simd.exe
+fsatests=basic.exe basic-float.exe basic-long.exe basic-calls.exe objects.exe arrays.exe basic-math.exe exceptions.exe devirtualization.exe basic-simd.exe
 
 if X86
 if MONO_DEBUGGER_SUPPORTED
@@ -541,14 +542,20 @@
 generics-variant-types.dll: generics-variant-types.il
 	$(ILASM) -dll -output=$@ $<
 
+if NACL_CODEGEN
+GENMDESC_OPTS=--nacl
+else !NACL_CODEGEN
+GENMDESC_OPTS=
+endif !NACL_CODEGEN
+
 # we don't always use the perl impl because it's an additional
 # build dependency for the poor windows users
 # $(arch_define) is the preprocessor symbol that enables all the opcodes
 # for the specific platform in mini-ops.h
 if CROSS_COMPILING
-GENMDESC_PRG=perl $(srcdir)/genmdesc.pl $(arch_define) $(srcdir)
+GENMDESC_PRG=perl $(srcdir)/genmdesc.pl $(arch_define) $(srcdir) $(GENMDESC_OPTS)
 else !CROSS_COMPILING
-GENMDESC_PRG=./genmdesc
+GENMDESC_PRG=./genmdesc $(GENMDESC_OPTS)
 endif !CROSS_COMPILING
 
 cpu-x86.h: cpu-x86.md genmdesc$(EXEEXT)
@@ -612,6 +619,20 @@
 	MONO_PATH=fullaot-tmp $(top_builddir)/runtime/mono-wrapper --aot=full fullaot-tmp/* || exit 1
 	for i in $(regtests); do echo $$i; MONO_PATH=fullaot-tmp $(top_builddir)/runtime/mono-wrapper --full-aot fullaot-tmp/$$i --exclude '!FULLAOT' || exit 1; done
 
+fsacheck: mono $(fsatests) fsacheck.c generics.exe
+	rm -rf fsa-tmp
+	mkdir fsa-tmp
+	cp $(CLASS)/mscorlib.dll $(CLASS)/System.Core.dll $(CLASS)/System.dll $(CLASS)/Mono.Posix.dll $(CLASS)/System.Configuration.dll $(CLASS)/System.Security.dll $(CLASS)/System.Xml.dll $(CLASS)/Mono.Security.dll $(CLASS)/Mono.Simd.dll \
+	$(fsatests) generics-variant-types.dll TestDriver.dll fsa-tmp/
+	cp $(fsatests) fsa-tmp/
+	MONO_PATH=fsa-tmp $(top_builddir)/runtime/mono-wrapper --aot=full,static fsa-tmp/*.dll || exit 1
+	MONO_PATH=fsa-tmp $(top_builddir)/runtime/mono-wrapper --aot=full,static fsa-tmp/*.exe || exit 1
+	$(CC) -o $@.out -g -static $(VPATH)/fsacheck.c fsa-tmp/*.o \
+	-lmono-2.0 -lpthread -lm -ldl -lrt \
+	-DTARGET_X86 -L.libs -I${prefix}/include/mono-2.0 \
+	-I${prefix} -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include
+	for i in $(fsatests); do echo $$i; MONO_PATH=fsa-tmp ./$@.out $$i || exit 1; done
+
 bench: mono test.exe
 	time env $(RUNTIME) --ncompile $(count) --compile Test:$(mtest) test.exe
 
Index: mono/mini/image-writer.c
===================================================================
--- mono/mini/image-writer.c	(revision 160382)
+++ mono/mini/image-writer.c	(working copy)
@@ -53,7 +53,7 @@
  * TARGET_ASM_GAS == GNU assembler
  */
 #if !defined(TARGET_ASM_APPLE) && !defined(TARGET_ASM_GAS)
-#ifdef __MACH__
+#if defined(__MACH__) && !defined(__native_client_codegen__)
 #define TARGET_ASM_APPLE
 #else
 #define TARGET_ASM_GAS
@@ -313,6 +313,11 @@
 		while (new_size <= new_offset)
 			new_size *= 2;
 		data = g_malloc0 (new_size);
+#ifdef __native_client_codegen__
+		/* for Native Client, fill empty space with HLT instruction */
+		/* instead of 00.                                           */
+		memset(data, 0xf4, new_size);
+#endif		
 		memcpy (data, section->data, section->data_len);
 		g_free (section->data);
 		section->data = data;
@@ -355,7 +360,23 @@
 	}
 }
 
+#ifdef __native_client_codegen__
 static void
+bin_writer_emit_nacl_call_alignment (MonoImageWriter *acfg) {
+  int offset = acfg->cur_section->cur_offset;
+  int padding = kNaClAlignment - (offset & kNaClAlignmentMask) - kNaClLengthOfCallImm;
+  guint8 padc = '\x90';
+
+  if (padding < 0) padding += kNaClAlignment;
+
+  while (padding > 0) {
+    bin_writer_emit_bytes(acfg, &padc, 1);
+    padding -= 1;
+  }
+}
+#endif  /* __native_client_codegen__ */
+
+static void
 bin_writer_emit_pointer_unaligned (MonoImageWriter *acfg, const char *target)
 {
 	BinReloc *reloc;
@@ -1627,7 +1648,21 @@
 #endif
 }
 
+#ifdef __native_client_codegen__
 static void
+asm_writer_emit_nacl_call_alignment (MonoImageWriter *acfg) {
+  int padding = kNaClAlignment - kNaClLengthOfCallImm;
+  guint8 padc = '\x90';
+
+  fprintf (acfg->fp, "\n\t.align %d", kNaClAlignment);
+  while (padding > 0) {
+    fprintf (acfg->fp, "\n\t.byte %d", padc);
+    padding -= 1;
+  }
+}
+#endif  /* __native_client_codegen__ */
+
+static void
 asm_writer_emit_pointer_unaligned (MonoImageWriter *acfg, const char *target)
 {
 	asm_writer_emit_unset_mode (acfg);
@@ -1909,7 +1944,21 @@
 #endif
 }
 
+#ifdef __native_client_codegen__
 void
+img_writer_emit_nacl_call_alignment (MonoImageWriter *acfg) {
+#ifdef USE_BIN_WRITER
+	if (acfg->use_bin_writer)
+		bin_writer_emit_nacl_call_alignment (acfg);
+	else
+		asm_writer_emit_nacl_call_alignment (acfg);
+#else
+	g_assert_not_reached();
+#endif
+}
+#endif  /* __native_client_codegen__ */
+
+void
 img_writer_emit_pointer_unaligned (MonoImageWriter *acfg, const char *target)
 {
 #ifdef USE_BIN_WRITER
Index: mono/mini/image-writer.h
===================================================================
--- mono/mini/image-writer.h	(revision 160382)
+++ mono/mini/image-writer.h	(working copy)
@@ -62,6 +62,10 @@
 
 void img_writer_emit_alignment (MonoImageWriter *w, int size) MONO_INTERNAL;
 
+#ifdef __native_client_codegen__
+void img_writer_emit_nacl_call_alignment (MonoImageWriter *w) MONO_INTERNAL;
+#endif
+
 void img_writer_emit_pointer_unaligned (MonoImageWriter *w, const char *target) MONO_INTERNAL;
 
 void img_writer_emit_pointer (MonoImageWriter *w, const char *target) MONO_INTERNAL;
Index: mono/mini/exceptions-x86.c
===================================================================
--- mono/mini/exceptions-x86.c	(revision 160382)
+++ mono/mini/exceptions-x86.c	(working copy)
@@ -324,9 +324,14 @@
 	guint8 *code;
 	MonoJumpInfo *ji = NULL;
 	GSList *unwind_ops = NULL;
+#ifdef __native_client_codegen__
+	guint kMaxCodeSize = 128;
+#else
+	guint kMaxCodeSize = 64;
+#endif  /* __native_client_codegen__ */
 
 	/* call_filter (MonoContext *ctx, unsigned long eip) */
-	start = code = mono_global_codeman_reserve (64);
+	start = code = mono_global_codeman_reserve (kMaxCodeSize);
 
 	x86_push_reg (code, X86_EBP);
 	x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
@@ -374,7 +379,7 @@
 	if (info)
 		*info = mono_tramp_info_create (g_strdup_printf ("call_filter"), start, code - start, ji, unwind_ops);
 
-	g_assert ((code - start) < 64);
+	g_assert ((code - start) < kMaxCodeSize);
 	return start;
 }
 
@@ -492,9 +497,13 @@
 	int i, stack_size, stack_offset, arg_offsets [5], regs_offset;
 	MonoJumpInfo *ji = NULL;
 	GSList *unwind_ops = NULL;
+#ifdef __native_client_codegen__
+	guint kMaxCodeSize = 256;
+#else
+	guint kMaxCodeSize = 128;
+#endif
+	start = code = mono_global_codeman_reserve (kMaxCodeSize);
 
-	start = code = mono_global_codeman_reserve (128);
-
 	stack_size = 128;
 
 	/* 
@@ -599,7 +608,7 @@
 	}
 	x86_breakpoint (code);
 
-	g_assert ((code - start) < 128);
+	g_assert ((code - start) < kMaxCodeSize);
 
 	if (info)
 		*info = mono_tramp_info_create (g_strdup (name), start, code - start, ji, unwind_ops);
@@ -628,7 +637,7 @@
 gpointer 
 mono_arch_get_rethrow_exception (MonoTrampInfo **info, gboolean aot)
 {
-	return get_throw_trampoline ("rethow_exception", TRUE, FALSE, FALSE, FALSE, FALSE, info, aot);
+	return get_throw_trampoline ("rethrow_exception", TRUE, FALSE, FALSE, FALSE, FALSE, info, aot);
 }
 
 /**
@@ -848,6 +857,18 @@
 void
 mono_arch_sigctx_to_monoctx (void *sigctx, MonoContext *mctx)
 {
+#if defined (__native_client__)
+	printf("WARNING: mono_arch_sigctx_to_monoctx() called!\n");
+	mctx->eax = 0xDEADBEEF;
+	mctx->ebx = 0xDEADBEEF;
+	mctx->ecx = 0xDEADBEEF;
+	mctx->edx = 0xDEADBEEF;
+	mctx->ebp = 0xDEADBEEF;
+	mctx->esp = 0xDEADBEEF;
+	mctx->esi = 0xDEADBEEF;
+	mctx->edi = 0xDEADBEEF;
+	mctx->eip = 0xDEADBEEF;
+#else
 #ifdef MONO_ARCH_USE_SIGACTION
 	ucontext_t *ctx = (ucontext_t*)sigctx;
 	
@@ -873,11 +894,15 @@
 	mctx->edi = ctx->SC_EDI;
 	mctx->eip = ctx->SC_EIP;
 #endif
+#endif /* if defined(__native_client__) */
 }
 
 void
 mono_arch_monoctx_to_sigctx (MonoContext *mctx, void *sigctx)
 {
+#if defined(__native_client__)
+	printf("WARNING: mono_arch_monoctx_to_sigctx() called!\n");
+#else
 #ifdef MONO_ARCH_USE_SIGACTION
 	ucontext_t *ctx = (ucontext_t*)sigctx;
 
@@ -903,18 +928,24 @@
 	ctx->SC_EDI = mctx->edi;
 	ctx->SC_EIP = mctx->eip;
 #endif
+#endif /* __native_client__ */
 }	
 
 gpointer
 mono_arch_ip_from_context (void *sigctx)
 {
+#if defined(__native_client__)
+	printf("WARNING: mono_arch_ip_from_context() called!\n");
+	return (NULL);
+#else
 #ifdef MONO_ARCH_USE_SIGACTION
 	ucontext_t *ctx = (ucontext_t*)sigctx;
 	return (gpointer)UCONTEXT_REG_EIP (ctx);
 #else
 	struct sigcontext *ctx = sigctx;
 	return (gpointer)ctx->SC_EIP;
-#endif	
+#endif
+#endif	/* __native_client__ */
 }
 
 /*
@@ -1166,6 +1197,9 @@
 	static guint8* saved = NULL;
 	guint8 *code, *start;
 
+#ifdef __native_client_codegen__
+	g_print("mono_tasklets_arch_restore needs to be aligned for Native Client\n");
+#endif
 	if (saved)
 		return (MonoContinuationRestore)saved;
 	code = start = mono_global_codeman_reserve (48);
Index: mono/mini/aot-compiler.c
===================================================================
--- mono/mini/aot-compiler.c	(revision 160382)
+++ mono/mini/aot-compiler.c	(working copy)
@@ -68,7 +68,7 @@
 
 #if !defined(DISABLE_AOT) && !defined(DISABLE_JIT)
 
-#if defined(__linux__)
+#if defined(__linux__) || defined(__native_client_codegen__)
 #define RODATA_SECT ".rodata"
 #else
 #define RODATA_SECT ".text"
@@ -330,6 +330,13 @@
 	img_writer_emit_byte (acfg->w, val); 
 }
 
+#ifdef __native_client_codegen__
+static inline void
+emit_nacl_call_alignment (MonoAotCompile *acfg) {
+	img_writer_emit_nacl_call_alignment(acfg->w);
+}
+#endif
+
 static G_GNUC_UNUSED void
 emit_global_inner (MonoAotCompile *acfg, const char *name, gboolean func)
 {
@@ -460,6 +467,10 @@
 #else
 #define AOT_FUNC_ALIGNMENT 16
 #endif
+#if defined(TARGET_X86) && defined(__native_client_codegen__)
+#undef AOT_FUNC_ALIGNMENT
+#define AOT_FUNC_ALIGNMENT 32
+#endif
  
 #if defined(TARGET_POWERPC64) && !defined(__mono_ilp32__)
 #define PPC_LD_OP "ld"
@@ -654,12 +665,26 @@
 #if defined(TARGET_X86)
 		guint32 offset = (acfg->plt_got_offset_base + index) * sizeof (gpointer);
 
+#ifdef __native_client_codegen__
+		const guint8 kSizeOfNaClJmp = 11;
+		guint8 bytes[kSizeOfNaClJmp];
+		guint8 *pbytes = &bytes[0];
+		
+		x86_jump_membase32(pbytes, X86_EBX, offset);
+		emit_bytes (acfg, bytes, kSizeOfNaClJmp);
+		/* four bytes of data, used by mono_arch_patch_plt_entry              */
+		/* For Native Client, make this work with data embedded in push.      */
+		emit_byte (acfg, 0x68);  /* hide data in a push */
+		emit_int32 (acfg, acfg->plt_got_info_offsets [index]);
+		emit_alignment(acfg, AOT_FUNC_ALIGNMENT);
+#else
 		/* jmp *<offset>(%ebx) */
 		emit_byte (acfg, 0xff);
 		emit_byte (acfg, 0xa3);
 		emit_int32 (acfg, offset);
 		/* Used by mono_aot_get_plt_info_offset */
 		emit_int32 (acfg, acfg->plt_got_info_offsets [index]);
+#endif  /* __native_client_codegen__ */
 #elif defined(TARGET_AMD64)
 		/*
 		 * We can't emit jumps because they are 32 bits only so they can't be patched.
@@ -846,9 +871,20 @@
 	/* Branch to generic trampoline */
 	x86_jump_reg (code, X86_ECX);
 
+#ifdef __native_client_codegen__
+	{
+		/* emit nops to next 32 byte alignment */
+		int a = (~kNaClAlignmentMask) & ((code - buf) + kNaClAlignment - 1);
+		while (code < (buf + a)) x86_nop(code);
+	}
+#endif
 	emit_bytes (acfg, buf, code - buf);
 
+#ifdef __native_client_codegen__
+	*tramp_size = kNaClAlignment;
+#else
 	*tramp_size = 17;
+#endif
 	g_assert (code - buf == *tramp_size);
 #else
 	g_assert_not_reached ();
@@ -1028,9 +1064,21 @@
 	/* Branch to the target address */
 	x86_jump_membase (code, X86_ECX, (offset + 1) * sizeof (gpointer));
 
+#ifdef __native_client_codegen__
+	{
+		/* emit nops to next 32 byte alignment */
+		int a = (~kNaClAlignmentMask) & ((code - buf) + kNaClAlignment - 1);
+		while (code < (buf + a)) x86_nop(code);
+	}
+#endif
+
 	emit_bytes (acfg, buf, code - buf);
 
+#ifdef __native_client_codegen__
+	*tramp_size = kNaClAlignment;
+#else
 	*tramp_size = 15;
+#endif
 	g_assert (code - buf == *tramp_size);
 #else
 	g_assert_not_reached ();
@@ -1099,9 +1147,17 @@
 	*tramp_size = code - buf + 7;
 #elif defined(TARGET_X86)
 	guint8 *buf, *code;
+#ifdef __native_client_codegen__
+	guint8 *buf_alloc;
+#endif
 	guint8 *labels [3];
 
+#ifdef __native_client_codegen__
+	buf_alloc = g_malloc (256 + kNaClAlignment);
+	code = buf = ((guint)buf_alloc + kNaClAlignment) & ~kNaClAlignmentMask;
+#else
 	code = buf = g_malloc (256);
+#endif
 
 	/* Allocate a temporary stack slot */
 	x86_push_reg (code, X86_EAX);
@@ -1143,6 +1199,13 @@
 	mono_x86_patch (labels [1], code);
 	x86_breakpoint (code);
 
+#ifdef __native_client_codegen__
+	{
+	  /* emit nops to next 32 byte alignment */
+	  int a = (~kNaClAlignmentMask) & ((code - buf) + kNaClAlignment - 1);
+	  while (code < (buf + a)) x86_nop(code);
+	}
+#endif
 	emit_bytes (acfg, buf, code - buf);
 	
 	*tramp_size = code - buf;
@@ -3805,13 +3868,17 @@
 	ji = info->ji;
 	unwind_ops = info->unwind_ops;
 
+#ifdef __native_client_codegen__
+	mono_nacl_fix_patches(code, ji);
+#endif
+
 	/* Emit code */
 
 	sprintf (start_symbol, "%s", name);
 
 	emit_section_change (acfg, ".text", 0);
 	emit_global (acfg, start_symbol, TRUE);
-	emit_alignment (acfg, 16);
+	emit_alignment (acfg, AOT_FUNC_ALIGNMENT);
 	emit_label (acfg, start_symbol);
 
 	sprintf (symbol, "%snamed_%s", acfg->temp_prefix, name);
@@ -4010,7 +4077,7 @@
 			}
 
 			emit_global (acfg, symbol, TRUE);
-			emit_alignment (acfg, 16);
+			emit_alignment (acfg, AOT_FUNC_ALIGNMENT);
 			emit_label (acfg, symbol);
 
 			acfg->trampoline_got_offset_base [ntype] = tramp_got_offset;
@@ -4034,6 +4101,10 @@
 				default:
 					g_assert_not_reached ();
 				}
+#ifdef __native_client_codegen__
+				/* align to avoid 32-byte boundary crossings */
+				emit_alignment(acfg, AOT_FUNC_ALIGNMENT);
+#endif
 
 				if (!acfg->trampoline_size [ntype]) {
 					g_assert (tramp_size);
@@ -4810,6 +4881,9 @@
 			}
 
 			emit_section_change (acfg, ".text", 0);
+#ifdef __native_client_codegen__
+			emit_alignment(acfg, AOT_FUNC_ALIGNMENT);
+#endif
 			emit_global (acfg, symbol, TRUE);
 			emit_label (acfg, symbol);
 
@@ -5682,7 +5756,7 @@
 		 * Emit a global symbol which can be passed by an embedding app to
 		 * mono_aot_register_module ().
 		 */
-#if defined(__MACH__)
+#if defined(__MACH__) && !defined(__native_client_codegen__)
 		sprintf (symbol, "_mono_aot_module_%s_info", acfg->image->assembly->aname.name);
 #else
 		sprintf (symbol, "mono_aot_module_%s_info", acfg->image->assembly->aname.name);
@@ -5938,6 +6012,12 @@
 #define AS_OPTIONS ""
 #endif
 
+#ifdef __native_client_codegen__
+#define AS_NAME "nacl-as"
+#else
+#define AS_NAME "as"
+#endif
+
 #ifndef LD_OPTIONS
 #define LD_OPTIONS ""
 #endif
@@ -5963,7 +6043,7 @@
 	} else {
 		objfile = g_strdup_printf ("%s.o", acfg->tmpfname);
 	}
-	command = g_strdup_printf ("%sas %s %s -o %s", tool_prefix, AS_OPTIONS, acfg->tmpfname, objfile);
+	command = g_strdup_printf ("%s%s %s %s -o %s", tool_prefix, AS_NAME, AS_OPTIONS, acfg->tmpfname, objfile);
 	printf ("Executing the native assembler: %s\n", command);
 	if (system (command) != 0) {
 		g_free (command);
Index: mono/mini/tramp-x86.c
===================================================================
--- mono/mini/tramp-x86.c	(revision 160382)
+++ mono/mini/tramp-x86.c	(working copy)
@@ -144,12 +144,23 @@
 
 	/* Patch the jump table entry used by the plt entry */
 
+#if defined(__native_client_codegen__) || defined(__native_client__)
+	/* for both compiler and runtime      */
+	/* A PLT entry:                       */
+	/*        mov <DISP>(%ebx), %ecx      */
+	/*        and 0xffffffe0, %ecx        */
+	/*        jmp *%ecx                   */
+	g_assert (code [0] == 0x8b);
+	g_assert (code [1] == 0x8b);
+
+	offset = *(guint32*)(code + 2);
+#else
 	/* A PLT entry: jmp *<DISP>(%ebx) */
 	g_assert (code [0] == 0xff);
 	g_assert (code [1] == 0xa3);
 
 	offset = *(guint32*)(code + 2);
-
+#endif  /* __native_client_codegen__ */
 	if (!got)
 		got = (gpointer*)(gsize) regs [MONO_ARCH_GOT_REG];
 	*(guint8**)((guint8*)got + offset) = addr;
@@ -481,7 +492,11 @@
 	
 	tramp = mono_get_trampoline_code (tramp_type);
 
-	code = buf = mono_domain_code_reserve_align (domain, TRAMPOLINE_SIZE, 4);
+#ifdef __native_client_codegen__
+        code = buf = mono_domain_code_reserve_align (domain, TRAMPOLINE_SIZE, kNaClAlignment);
+#else
+        code = buf = mono_domain_code_reserve_align (domain, TRAMPOLINE_SIZE, 4);
+#endif  /* __native_client_codegen__ */
 
 	x86_push_imm (buf, arg1);
 	x86_jump_code (buf, tramp);
@@ -522,7 +537,13 @@
 		index -= size - 1;
 	}
 
+#ifdef __native_client_codegen__
+	/* TODO: align for Native Client */
+	tramp_size = (aot ? 64 : 36) + 2 * kNaClAlignment +
+	  6 * (depth + kNaClAlignment);
+#else
 	tramp_size = (aot ? 64 : 36) + 6 * depth;
+#endif  /* __native_client_codegen__ */
 
 	code = buf = mono_global_codeman_reserve (tramp_size);
 
@@ -635,7 +656,9 @@
 	mono_arch_flush_icache (code, code - buf);
 
 	g_assert (code - buf <= tramp_size);
-
+#ifdef __native_client_codegen__
+	g_assert (code - buf <= kNaClAlignment);
+#endif
 	if (info)
 		*info = mono_tramp_info_create (g_strdup_printf ("generic_class_init_trampoline"), buf, code - buf, ji, unwind_ops);
 
@@ -680,7 +703,11 @@
 	owner_offset = MONO_THREADS_SYNC_MEMBER_OFFSET (owner_offset);
 	nest_offset = MONO_THREADS_SYNC_MEMBER_OFFSET (nest_offset);
 
+#ifdef __native_client_codegen__
+	tramp_size = 128;
+#else
 	tramp_size = 64;
+#endif
 
 	code = buf = mono_global_codeman_reserve (tramp_size);
 
@@ -796,7 +823,11 @@
 	nest_offset = MONO_THREADS_SYNC_MEMBER_OFFSET (nest_offset);
 	entry_count_offset = MONO_THREADS_SYNC_MEMBER_OFFSET (entry_count_offset);
 
+#ifdef __native_client_codegen__
+	tramp_size = 128;
+#else
 	tramp_size = 64;
+#endif  /* __native_client_codegen__ */
 
 	code = buf = mono_global_codeman_reserve (tramp_size);
 
@@ -955,5 +986,10 @@
 guint32
 mono_arch_get_plt_info_offset (guint8 *plt_entry, mgreg_t *regs, guint8 *code)
 {
+#if defined(__native_client_codegen__) || defined(__native_client__)
+	// both code gen and runtime need this
+	return *(guint32*)(plt_entry + 12);
+#else
 	return *(guint32*)(plt_entry + 6);
+#endif
 }
Index: mono/mini/fsacheck.c
===================================================================
--- mono/mini/fsacheck.c	(revision 0)
+++ mono/mini/fsacheck.c	(revision 0)
@@ -0,0 +1,141 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <mono/metadata/appdomain.h>
+#include <mono/metadata/assembly.h>
+#include <mono/metadata/debug-helpers.h>
+#include <mono/metadata/object.h>
+#include <mono/jit/jit.h>
+
+extern void* mono_aot_module_mscorlib_info;
+extern void* mono_aot_module_System_Core_info;
+extern void* mono_aot_module_System_info;
+extern void* mono_aot_module_Mono_Posix_info;
+extern void* mono_aot_module_System_Configuration_info;
+extern void* mono_aot_module_System_Security_info;
+extern void* mono_aot_module_System_Xml_info;
+/* extern void* mono_aot_module_System_Threading_info; */
+extern void* mono_aot_module_Mono_Security_info;
+extern void* mono_aot_module_Mono_Simd_info;
+extern void* mono_aot_module_TestDriver_info;
+
+extern void* mono_aot_module_basic_info;
+extern void* mono_aot_module_basic_float_info;
+extern void* mono_aot_module_basic_long_info;
+extern void* mono_aot_module_basic_calls_info;
+extern void* mono_aot_module_basic_simd_info;
+extern void* mono_aot_module_objects_info;
+extern void* mono_aot_module_arrays_info;
+extern void* mono_aot_module_basic_math_info;
+extern void* mono_aot_module_exceptions_info;
+extern void* mono_aot_module_devirtualization_info;
+extern void* mono_aot_module_generics_info;
+extern void* mono_aot_module_generics_variant_types_info;
+extern void* mono_aot_module_basic_simd_info;
+/* extern void* mono_aot_module_thread_stress_info; */
+
+
+extern void mono_aot_register_module(void *aot_info);
+extern void mono_aot_init(void);
+extern void mono_jit_set_aot_only(mono_bool aot_only);
+extern MonoDomain * mini_init (const char *filename, const char *runtime_version);
+
+
+void try_one(char *mname) {
+  MonoDomain *domain;
+  MonoAssembly *ma;
+  MonoImage *mi;
+  MonoClass *mc;
+  MonoMethodDesc *mmd;
+  MonoMethod *mm;
+  MonoObject *mo;
+  MonoArray *arg_array;
+  void *args [1];
+  char *cstr_arg = "20";
+
+  mono_jit_set_aot_only(1);
+  domain = mono_jit_init(mname);
+  printf("mono domain: %p\n", domain);
+
+  ma = mono_domain_assembly_open(domain, mname);
+  if (0 == ma) {
+    printf("ERROR: could not open mono assembly\n");
+    exit(-1);
+  }
+  printf("opened mono assembly: %p\n", ma);
+
+  mi = mono_assembly_get_image(ma);
+  printf("mono image: %p\n", mi);
+
+  mo = mono_string_new(domain, cstr_arg);
+  mc = mono_class_from_name(mono_get_corlib(), "System", "String");
+  printf("string class: %p\n", mc);
+  arg_array = mono_array_new(domain, mc, 1);
+  mono_array_setref(arg_array, 0, mo);
+  args[0] = arg_array;
+
+  mmd = mono_method_desc_new("Tests:Main()", 1);
+  mm = mono_method_desc_search_in_image(mmd, mi);
+  if (0 == mm) {
+    mmd = mono_method_desc_new("Tests:Main(string[])", 1);
+    mm = mono_method_desc_search_in_image(mmd, mi);
+    if (0 == mm) {
+      mmd = mono_method_desc_new("SimdTests:Main(string[])", 1);
+      mm = mono_method_desc_search_in_image(mmd, mi);
+      if (0 == mm) {
+        printf("Couldn't find Tests:Main(), Tests:Main(string[]) or SimdTests:Main(string[])\n");
+        exit(-1);
+      }
+    }
+  }
+  printf("mono desc method: %p\n", mmd);
+  printf("mono method: %p\n", mm);
+
+  mo = mono_runtime_invoke(mm, NULL, args, NULL);
+  printf("mono object: %p\n", mo);
+
+  mono_jit_cleanup(domain);
+}
+
+int main(int argc, char *argv[]) {
+  mono_aot_register_module(mono_aot_module_mscorlib_info);
+  mono_aot_register_module(mono_aot_module_TestDriver_info);
+  mono_aot_register_module(mono_aot_module_System_Core_info);
+  mono_aot_register_module(mono_aot_module_System_info);
+  mono_aot_register_module(mono_aot_module_Mono_Posix_info);
+  mono_aot_register_module(mono_aot_module_System_Configuration_info);
+  mono_aot_register_module(mono_aot_module_System_Security_info);
+  mono_aot_register_module(mono_aot_module_System_Xml_info);
+  mono_aot_register_module(mono_aot_module_Mono_Security_info);
+  /*  mono_aot_register_module(mono_aot_module_System_Threading_info); */
+  mono_aot_register_module(mono_aot_module_Mono_Simd_info);
+
+  mono_aot_register_module(mono_aot_module_basic_info);
+  mono_aot_register_module(mono_aot_module_basic_float_info);
+  mono_aot_register_module(mono_aot_module_basic_long_info);
+  mono_aot_register_module(mono_aot_module_basic_calls_info);
+  mono_aot_register_module(mono_aot_module_basic_simd_info);
+  mono_aot_register_module(mono_aot_module_objects_info);
+  mono_aot_register_module(mono_aot_module_arrays_info);
+  mono_aot_register_module(mono_aot_module_basic_math_info);
+  mono_aot_register_module(mono_aot_module_exceptions_info);
+  mono_aot_register_module(mono_aot_module_devirtualization_info);
+  /*
+  mono_aot_register_module(mono_aot_module_generics_info);
+  mono_aot_register_module(mono_aot_module_generics_variant_types_info);
+  */
+
+  /*  mono_aot_register_module(mono_aot_module_thread_stress_info); */
+  if (argc < 2) {
+    printf("no test specified; running basic.exe\n");
+    printf("==========================\n");
+    try_one("basic.exe");
+    printf("==========================\n");
+  } else {
+    printf("\nProgram %s %s output:\n", argv[0], argv[1]);
+    printf("==========================\n\n");
+    try_one(argv[1]);
+  }
+
+  return 0;
+}
Index: mono/mini/driver.c
===================================================================
--- mono/mini/driver.c	(revision 160382)
+++ mono/mini/driver.c	(working copy)
@@ -114,6 +114,9 @@
 	NULL
 };
 
+#ifdef __native_client_codegen__
+extern guint8 nacl_align_byte;
+#endif
 
 #define DEFAULT_OPTIMIZATIONS (	\
 	MONO_OPT_PEEPHOLE |	\
@@ -1118,6 +1121,9 @@
 		"    --trace[=EXPR]         Enable tracing, use --help-trace for details\n"
 		"    --jitmap               Output a jit method map to /tmp/perf-PID.map\n"
 		"    --help-devel           Shows more options available to developers\n"
+#ifdef __native_client_codegen__
+		"    --nacl-align-mask-off  Turn off Native Client 32-byte alignment mask (for debug only)\n"
+#endif
 		"\n"
 		"Runtime:\n"
 		"    --config FILE          Loads FILE as the Mono config\n"
@@ -1627,12 +1633,23 @@
 #endif
 		} else if (strcmp (argv [i], "--nollvm") == 0){
 			mono_use_llvm = FALSE;
+#ifdef __native_client_codegen__
+		} else if (strcmp (argv [i], "--nacl-align-mask-off") == 0){
+			nacl_align_byte = 0xff;	
+#endif
 		} else {
 			fprintf (stderr, "Unknown command line option: '%s'\n", argv [i]);
 			return 1;
 		}
 	}
 
+#ifdef __native_client_codegen__
+	if (getenv ("MONO_NACL_ALIGN_MASK_OFF"))
+	{
+		nacl_align_byte = 0xff;
+	}
+#endif
+
 	if (!argv [i]) {
 		mini_usage ();
 		return 1;
Index: mono/mini/mini-posix.c
===================================================================
--- mono/mini/mini-posix.c	(revision 160382)
+++ mono/mini/mini-posix.c	(working copy)
@@ -63,6 +63,46 @@
 
 #include "jit-icalls.h"
 
+#if defined(__native_client__)
+
+void
+mono_runtime_setup_stat_profiler (void)
+{
+	printf("WARNING: mono_runtime_setup_stat_profiler() called!\n");
+}
+
+
+void
+mono_runtime_shutdown_stat_profiler (void)
+{
+}
+
+
+gboolean
+SIG_HANDLER_SIGNATURE (mono_chain_signal)
+{
+	return FALSE;
+}
+
+void
+mono_runtime_install_handlers (void)
+{
+}
+
+void
+mono_runtime_shutdown_handlers (void)
+{
+}
+
+void
+mono_runtime_cleanup_handlers (void)
+{
+}
+
+
+
+#else
+
 static GHashTable *mono_saved_signal_handlers = NULL;
 
 static gpointer
@@ -620,3 +660,5 @@
 	return TRUE;
 }
 #endif
+#endif /* __native_client__ */
+
Index: mono/utils/dlmalloc.c
===================================================================
--- mono/utils/dlmalloc.c	(revision 160382)
+++ mono/utils/dlmalloc.c	(working copy)
@@ -483,6 +483,13 @@
 #endif  /* HAVE_MORECORE */
 #endif  /* DARWIN */
 
+#if defined(__native_client__)
+#undef HAVE_MMAP
+#undef HAVE_MREMAP
+#define HAVE_MMAP 0
+#define HAVE_MREMAP 0
+#endif
+
 #ifndef LACKS_SYS_TYPES_H
 #include <sys/types.h>  /* For size_t */
 #endif  /* LACKS_SYS_TYPES_H */
Index: mono/utils/mono-codeman.c
===================================================================
--- mono/utils/mono-codeman.c	(revision 160382)
+++ mono/utils/mono-codeman.c	(working copy)
@@ -39,6 +39,14 @@
 #else
 #define MIN_ALIGN 8
 #endif
+#ifdef __native_client_codegen__
+/* For Google Native Client, all targets of indirect control flow need to    */
+/* be aligned to a 32-byte boundary. MIN_ALIGN was updated to 32 to force    */
+/* alignment for calls from tramp-x86.c to mono_global_codeman_reserve()     */
+/* and mono_domain_code_reserve().                                           */
+#undef MIN_ALIGN
+#define MIN_ALIGN 32
+#endif
 
 /* if a chunk has less than this amount of free space it's considered full */
 #define MAX_WASTAGE 32
Index: mono/arch/x86/x86-codegen.h
===================================================================
--- mono/arch/x86/x86-codegen.h	(revision 160382)
+++ mono/arch/x86/x86-codegen.h	(working copy)
@@ -15,6 +15,26 @@
 #ifndef X86_H
 #define X86_H
 #include <assert.h>
+
+#ifdef __native_client_codegen__
+#define kNaClAlignment 32
+#define kNaClAlignmentMask (kNaClAlignment - 1)
+extern guint8 nacl_align_byte;
+#endif /* __native_client_codegen__ */
+
+
+#if defined( __native_client_codegen__ ) && defined( TARGET_X86 )
+#define x86_codegen_pre(inst_ptr_ptr, inst_len) do { mono_nacl_align_inst(inst_ptr_ptr, inst_len); } while (0)
+#define x86_call_sequence_pre(inst) guint8* _code_start = (inst);
+#define x86_call_sequence_post(inst) \
+  (mono_nacl_align_call(&_code_start, &(inst)), _code_start);
+#else
+#define x86_codegen_pre(inst_ptr_ptr, inst_len) do {} while (0)
+#define x86_call_sequence_pre(inst) guint8* _code_start = (inst);
+#define x86_call_sequence_post(inst) _code_start
+#endif  /* __native_client_codegen__ */
+
+
 /*
 // x86 register numbers
 */
@@ -278,6 +298,8 @@
 #define x86_regp_emit(inst,r,regno)  do { x86_address_byte ((inst), 0, (r), (regno)); } while (0)
 #define x86_mem_emit(inst,r,disp)    do { x86_address_byte ((inst), 0, (r), 5); x86_imm_emit32((inst), (disp)); } while (0)
 
+#define kMaxMembaseEmitPadding 6
+
 #define x86_membase_emit(inst,r,basereg,disp)	do {\
 	if ((basereg) == X86_ESP) {	\
 		if ((disp) == 0) {	\
@@ -307,6 +329,8 @@
 	}	\
 	} while (0)
 
+#define kMaxMemindexEmitPadding 6
+
 #define x86_memindex_emit(inst,r,basereg,disp,indexreg,shift)	\
 	do {	\
 		if ((basereg) == X86_NOBASEREG) {	\
@@ -343,7 +367,7 @@
  * the instruction is inspected for validity and the correct displacement
  * is inserted.
  */
-#define x86_patch(ins,target)	\
+#define x86_do_patch(ins,target)	\
 	do {	\
 		unsigned char* pos = (ins) + 1;	\
 		int disp, size = 0;	\
@@ -367,10 +391,73 @@
 		else assert (0);	\
 	} while (0)
 
+#if defined( __native_client_codegen__ ) && defined(TARGET_X86)
+
+#define x86_skip_nops(inst) \
+  do {    \
+    int in_nop = 0; \
+    do { \
+      in_nop = 0; \
+      if (inst[0] == 0x90) { \
+        in_nop = 1; \
+        inst += 1; \
+      } \
+      if (inst[0] == 0x8b && inst[1] == 0xc0) { \
+        in_nop = 1; \
+        inst += 2; \
+      } \
+      if (inst[0] == 0x8d && inst[1] == 0x6d \
+       && inst[2] == 0x00) { \
+        in_nop = 1; \
+        inst += 3; \
+      } \
+      if (inst[0] == 0x8d && inst[1] == 0x64 \
+       && inst[2] == 0x24 && inst[3] == 0x00) { \
+        in_nop = 1; \
+        inst += 4; \
+      } \
+      /* skip inst+=5 case because it's the 4-byte + 1-byte case */ \
+      if (inst[0] == 0x8d && inst[1] == 0xad \
+       && inst[2] == 0x00 && inst[3] == 0x00 \
+       && inst[4] == 0x00 && inst[5] == 0x00) { \
+        in_nop = 1; \
+        inst += 6; \
+      } \
+      if (inst[0] == 0x8d && inst[1] == 0xa4 \
+       && inst[2] == 0x24 && inst[3] == 0x00 \
+       && inst[4] == 0x00 && inst[5] == 0x00 \
+       && inst[6] == 0x00 ) { \
+        in_nop = 1; \
+        inst += 7; \
+      } \
+    } while ( in_nop );  \
+  } while (0)
+
+#define x86_patch(ins,target) \
+  do { \
+    unsigned char* inst = (ins); \
+    x86_skip_nops((inst)); \
+    x86_do_patch((inst), (target)); \
+  } while (0)
+
+#else
+#define x86_patch(ins,target) do { x86_do_patch((ins), (target)); } while (0)
+#endif /* __native_client_codegen__ */
+
+#ifdef __native_client_codegen__
+/* The breakpoint instruction is illegal in Native Client, although the HALT   */
+/* instruction is allowed. The breakpoint is used several places in mini-x86.c */
+/* and exceptions-x86.c.                                                       */
 #define x86_breakpoint(inst) \
 	do {	\
+		*(inst)++ = 0xf4;	\
+	} while (0)
+#else
+#define x86_breakpoint(inst) \
+	do {	\
 		*(inst)++ = 0xcc;	\
 	} while (0)
+#endif
 
 #define x86_cld(inst) do { *(inst)++ =(unsigned char)0xfc; } while (0)
 #define x86_stosb(inst) do { *(inst)++ =(unsigned char)0xaa; } while (0)
@@ -380,7 +467,15 @@
 #define x86_movsl(inst) do { *(inst)++ =(unsigned char)0xa5; } while (0)
 #define x86_movsd(inst) x86_movsl((inst))
 
-#define x86_prefix(inst,p) do { *(inst)++ =(unsigned char) (p); } while (0)
+/* kNaClAlignment - 1 is the max value we can pass into x86_codegen_pre. */
+/* This keeps us from having to call x86_codegen_pre with specific       */
+/* knowledge of the size of the instruction that follows it, and         */
+/* localizes the alignment requirement to this spot.                     */
+#define x86_prefix(inst,p) \
+	do { \
+		x86_codegen_pre(&(inst), kNaClAlignment - 1); \
+		*(inst)++ =(unsigned char) (p); \
+	} while (0)
 
 #define x86_rdtsc(inst) \
 	do {	\
@@ -390,6 +485,7 @@
 
 #define x86_cmpxchg_reg_reg(inst,dreg,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 3); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0xb1;	\
 		x86_reg_emit ((inst), (reg), (dreg));	\
@@ -397,6 +493,7 @@
 	
 #define x86_cmpxchg_mem_reg(inst,mem,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 7); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0xb1;	\
 		x86_mem_emit ((inst), (reg), (mem));	\
@@ -404,6 +501,7 @@
 	
 #define x86_cmpxchg_membase_reg(inst,basereg,disp,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0xb1;	\
 		x86_membase_emit ((inst), (reg), (basereg), (disp));	\
@@ -411,6 +509,7 @@
 
 #define x86_xchg_reg_reg(inst,dreg,reg,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		if ((size) == 1)	\
 			*(inst)++ = (unsigned char)0x86;	\
 		else	\
@@ -420,6 +519,7 @@
 
 #define x86_xchg_mem_reg(inst,mem,reg,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		if ((size) == 1)	\
 			*(inst)++ = (unsigned char)0x86;	\
 		else	\
@@ -429,6 +529,7 @@
 
 #define x86_xchg_membase_reg(inst,basereg,disp,reg,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		if ((size) == 1)	\
 			*(inst)++ = (unsigned char)0x86;	\
 		else	\
@@ -438,6 +539,7 @@
 
 #define x86_xadd_reg_reg(inst,dreg,reg,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 4); \
 		*(inst)++ = (unsigned char)0x0F;     \
 		if ((size) == 1)	\
 			*(inst)++ = (unsigned char)0xC0;	\
@@ -448,6 +550,7 @@
 
 #define x86_xadd_mem_reg(inst,mem,reg,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 7); \
 		*(inst)++ = (unsigned char)0x0F;     \
 		if ((size) == 1)	\
 			*(inst)++ = (unsigned char)0xC0;	\
@@ -458,6 +561,7 @@
 
 #define x86_xadd_membase_reg(inst,basereg,disp,reg,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x0F;     \
 		if ((size) == 1)	\
 			*(inst)++ = (unsigned char)0xC0;	\
@@ -468,12 +572,14 @@
 
 #define x86_inc_mem(inst,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xff;	\
 		x86_mem_emit ((inst), 0, (mem)); 	\
 	} while (0)
 
 #define x86_inc_membase(inst,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xff;	\
 		x86_membase_emit ((inst), 0, (basereg), (disp));	\
 	} while (0)
@@ -482,12 +588,14 @@
 
 #define x86_dec_mem(inst,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xff;	\
 		x86_mem_emit ((inst), 1, (mem));	\
 	} while (0)
 
 #define x86_dec_membase(inst,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xff;	\
 		x86_membase_emit ((inst), 1, (basereg), (disp));	\
 	} while (0)
@@ -496,36 +604,42 @@
 
 #define x86_not_mem(inst,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_mem_emit ((inst), 2, (mem));	\
 	} while (0)
 
 #define x86_not_membase(inst,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_membase_emit ((inst), 2, (basereg), (disp));	\
 	} while (0)
 
 #define x86_not_reg(inst,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_reg_emit ((inst), 2, (reg));	\
 	} while (0)
 
 #define x86_neg_mem(inst,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_mem_emit ((inst), 3, (mem));	\
 	} while (0)
 
 #define x86_neg_membase(inst,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_membase_emit ((inst), 3, (basereg), (disp));	\
 	} while (0)
 
 #define x86_neg_reg(inst,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_reg_emit ((inst), 3, (reg));	\
 	} while (0)
@@ -535,15 +649,18 @@
 #define x86_alu_reg_imm(inst,opc,reg,imm) 	\
 	do {	\
 		if ((reg) == X86_EAX) {	\
+			x86_codegen_pre(&(inst), 5); \
 			*(inst)++ = (((unsigned char)(opc)) << 3) + 5;	\
 			x86_imm_emit32 ((inst), (imm));	\
 			break;	\
 		}	\
 		if (x86_is_imm8((imm))) {	\
+			x86_codegen_pre(&(inst), 3); \
 			*(inst)++ = (unsigned char)0x83;	\
 			x86_reg_emit ((inst), (opc), (reg));	\
 			x86_imm_emit8 ((inst), (imm));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 6); \
 			*(inst)++ = (unsigned char)0x81;	\
 			x86_reg_emit ((inst), (opc), (reg));	\
 			x86_imm_emit32 ((inst), (imm));	\
@@ -553,10 +670,12 @@
 #define x86_alu_mem_imm(inst,opc,mem,imm) 	\
 	do {	\
 		if (x86_is_imm8((imm))) {	\
+			x86_codegen_pre(&(inst), 7); \
 			*(inst)++ = (unsigned char)0x83;	\
 			x86_mem_emit ((inst), (opc), (mem));	\
 			x86_imm_emit8 ((inst), (imm));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 10); \
 			*(inst)++ = (unsigned char)0x81;	\
 			x86_mem_emit ((inst), (opc), (mem));	\
 			x86_imm_emit32 ((inst), (imm));	\
@@ -566,10 +685,12 @@
 #define x86_alu_membase_imm(inst,opc,basereg,disp,imm) 	\
 	do {	\
 		if (x86_is_imm8((imm))) {	\
+			x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 			*(inst)++ = (unsigned char)0x83;	\
 			x86_membase_emit ((inst), (opc), (basereg), (disp));	\
 			x86_imm_emit8 ((inst), (imm));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 5 + kMaxMembaseEmitPadding); \
 			*(inst)++ = (unsigned char)0x81;	\
 			x86_membase_emit ((inst), (opc), (basereg), (disp));	\
 			x86_imm_emit32 ((inst), (imm));	\
@@ -578,6 +699,7 @@
 	
 #define x86_alu_membase8_imm(inst,opc,basereg,disp,imm) 	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x80;	\
 		x86_membase_emit ((inst), (opc), (basereg), (disp));	\
 		x86_imm_emit8 ((inst), (imm)); \
@@ -585,18 +707,21 @@
 
 #define x86_alu_mem_reg(inst,opc,mem,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (((unsigned char)(opc)) << 3) + 1;	\
 		x86_mem_emit ((inst), (reg), (mem));	\
 	} while (0)
 
 #define x86_alu_membase_reg(inst,opc,basereg,disp,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (((unsigned char)(opc)) << 3) + 1;	\
 		x86_membase_emit ((inst), (reg), (basereg), (disp));	\
 	} while (0)
 
 #define x86_alu_reg_reg(inst,opc,dreg,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (((unsigned char)(opc)) << 3) + 3;	\
 		x86_reg_emit ((inst), (dreg), (reg));	\
 	} while (0)
@@ -612,24 +737,28 @@
  */
 #define x86_alu_reg8_reg8(inst,opc,dreg,reg,is_dreg_h,is_reg_h)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (((unsigned char)(opc)) << 3) + 2;	\
 		x86_reg8_emit ((inst), (dreg), (reg), (is_dreg_h), (is_reg_h));	\
 	} while (0)
 
 #define x86_alu_reg_mem(inst,opc,reg,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (((unsigned char)(opc)) << 3) + 3;	\
 		x86_mem_emit ((inst), (reg), (mem));	\
 	} while (0)
 
 #define x86_alu_reg_membase(inst,opc,reg,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (((unsigned char)(opc)) << 3) + 3;	\
 		x86_membase_emit ((inst), (reg), (basereg), (disp));	\
 	} while (0)
 
 #define x86_test_reg_imm(inst,reg,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		if ((reg) == X86_EAX) {	\
 			*(inst)++ = (unsigned char)0xa9;	\
 		} else {	\
@@ -641,6 +770,7 @@
 
 #define x86_test_mem_imm(inst,mem,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 10); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_mem_emit ((inst), 0, (mem));	\
 		x86_imm_emit32 ((inst), (imm));	\
@@ -648,6 +778,7 @@
 
 #define x86_test_membase_imm(inst,basereg,disp,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 5 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_membase_emit ((inst), 0, (basereg), (disp));	\
 		x86_imm_emit32 ((inst), (imm));	\
@@ -655,18 +786,21 @@
 
 #define x86_test_reg_reg(inst,dreg,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0x85;	\
 		x86_reg_emit ((inst), (reg), (dreg));	\
 	} while (0)
 
 #define x86_test_mem_reg(inst,mem,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0x85;	\
 		x86_mem_emit ((inst), (reg), (mem));	\
 	} while (0)
 
 #define x86_test_membase_reg(inst,basereg,disp,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x85;	\
 		x86_membase_emit ((inst), (reg), (basereg), (disp));	\
 	} while (0)
@@ -674,9 +808,11 @@
 #define x86_shift_reg_imm(inst,opc,reg,imm)	\
 	do {	\
 		if ((imm) == 1) {	\
+			x86_codegen_pre(&(inst), 2); \
 			*(inst)++ = (unsigned char)0xd1;	\
 			x86_reg_emit ((inst), (opc), (reg));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 3); \
 			*(inst)++ = (unsigned char)0xc1;	\
 			x86_reg_emit ((inst), (opc), (reg));	\
 			x86_imm_emit8 ((inst), (imm));	\
@@ -686,9 +822,11 @@
 #define x86_shift_mem_imm(inst,opc,mem,imm)	\
 	do {	\
 		if ((imm) == 1) {	\
+			x86_codegen_pre(&(inst), 6); \
 			*(inst)++ = (unsigned char)0xd1;	\
 			x86_mem_emit ((inst), (opc), (mem));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 7); \
 			*(inst)++ = (unsigned char)0xc1;	\
 			x86_mem_emit ((inst), (opc), (mem));	\
 			x86_imm_emit8 ((inst), (imm));	\
@@ -698,9 +836,11 @@
 #define x86_shift_membase_imm(inst,opc,basereg,disp,imm)	\
 	do {	\
 		if ((imm) == 1) {	\
+			x86_codegen_pre(&(inst), 6); \
 			*(inst)++ = (unsigned char)0xd1;	\
 			x86_membase_emit ((inst), (opc), (basereg), (disp));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 7); \
 			*(inst)++ = (unsigned char)0xc1;	\
 			x86_membase_emit ((inst), (opc), (basereg), (disp));	\
 			x86_imm_emit8 ((inst), (imm));	\
@@ -709,18 +849,21 @@
 
 #define x86_shift_reg(inst,opc,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xd3;	\
 		x86_reg_emit ((inst), (opc), (reg));	\
 	} while (0)
 
 #define x86_shift_mem(inst,opc,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xd3;	\
 		x86_mem_emit ((inst), (opc), (mem));	\
 	} while (0)
 
 #define x86_shift_membase(inst,opc,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xd3;	\
 		x86_membase_emit ((inst), (opc), (basereg), (disp));	\
 	} while (0)
@@ -731,6 +874,7 @@
 
 #define x86_shrd_reg(inst,dreg,reg)                     \
         do {                                            \
+		x86_codegen_pre(&(inst), 3); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0xad;	\
 		x86_reg_emit ((inst), (reg), (dreg));	\
@@ -738,6 +882,7 @@
 
 #define x86_shrd_reg_imm(inst,dreg,reg,shamt)           \
         do {                                            \
+		x86_codegen_pre(&(inst), 4); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0xac;	\
 		x86_reg_emit ((inst), (reg), (dreg));	\
@@ -746,6 +891,7 @@
 
 #define x86_shld_reg(inst,dreg,reg)                     \
         do {                                            \
+		x86_codegen_pre(&(inst), 3); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0xa5;	\
 		x86_reg_emit ((inst), (reg), (dreg));	\
@@ -753,6 +899,7 @@
 
 #define x86_shld_reg_imm(inst,dreg,reg,shamt)           \
         do {                                            \
+		x86_codegen_pre(&(inst), 4); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0xa4;	\
 		x86_reg_emit ((inst), (reg), (dreg));	\
@@ -764,18 +911,21 @@
  */
 #define x86_mul_reg(inst,reg,is_signed)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_reg_emit ((inst), 4 + ((is_signed) ? 1 : 0), (reg));	\
 	} while (0)
 
 #define x86_mul_mem(inst,mem,is_signed)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_mem_emit ((inst), 4 + ((is_signed) ? 1 : 0), (mem));	\
 	} while (0)
 
 #define x86_mul_membase(inst,basereg,disp,is_signed)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_membase_emit ((inst), 4 + ((is_signed) ? 1 : 0), (basereg), (disp));	\
 	} while (0)
@@ -785,6 +935,7 @@
  */
 #define x86_imul_reg_reg(inst,dreg,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 3); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0xaf;	\
 		x86_reg_emit ((inst), (dreg), (reg));	\
@@ -792,6 +943,7 @@
 
 #define x86_imul_reg_mem(inst,reg,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 7); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0xaf;	\
 		x86_mem_emit ((inst), (reg), (mem));	\
@@ -799,6 +951,7 @@
 
 #define x86_imul_reg_membase(inst,reg,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0xaf;	\
 		x86_membase_emit ((inst), (reg), (basereg), (disp));	\
@@ -810,10 +963,12 @@
 #define x86_imul_reg_reg_imm(inst,dreg,reg,imm)	\
 	do {	\
 		if (x86_is_imm8 ((imm))) {	\
+			x86_codegen_pre(&(inst), 3); \
 			*(inst)++ = (unsigned char)0x6b;	\
 			x86_reg_emit ((inst), (dreg), (reg));	\
 			x86_imm_emit8 ((inst), (imm));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 6); \
 			*(inst)++ = (unsigned char)0x69;	\
 			x86_reg_emit ((inst), (dreg), (reg));	\
 			x86_imm_emit32 ((inst), (imm));	\
@@ -823,10 +978,12 @@
 #define x86_imul_reg_mem_imm(inst,reg,mem,imm)	\
 	do {	\
 		if (x86_is_imm8 ((imm))) {	\
+			x86_codegen_pre(&(inst), 7); \
 			*(inst)++ = (unsigned char)0x6b;	\
 			x86_mem_emit ((inst), (reg), (mem));	\
 			x86_imm_emit8 ((inst), (imm));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 6); \
 			*(inst)++ = (unsigned char)0x69;	\
 			x86_reg_emit ((inst), (reg), (mem));	\
 			x86_imm_emit32 ((inst), (imm));	\
@@ -836,10 +993,12 @@
 #define x86_imul_reg_membase_imm(inst,reg,basereg,disp,imm)	\
 	do {	\
 		if (x86_is_imm8 ((imm))) {	\
+			x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 			*(inst)++ = (unsigned char)0x6b;	\
 			x86_membase_emit ((inst), (reg), (basereg), (disp));	\
 			x86_imm_emit8 ((inst), (imm));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 5 + kMaxMembaseEmitPadding); \
 			*(inst)++ = (unsigned char)0x69;	\
 			x86_membase_emit ((inst), (reg), (basereg), (disp));	\
 			x86_imm_emit32 ((inst), (imm));	\
@@ -853,24 +1012,28 @@
 
 #define x86_div_reg(inst,reg,is_signed)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_reg_emit ((inst), 6 + ((is_signed) ? 1 : 0), (reg));	\
 	} while (0)
 
 #define x86_div_mem(inst,mem,is_signed)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_mem_emit ((inst), 6 + ((is_signed) ? 1 : 0), (mem));	\
 	} while (0)
 
 #define x86_div_membase(inst,basereg,disp,is_signed)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xf7;	\
 		x86_membase_emit ((inst), 6 + ((is_signed) ? 1 : 0), (basereg), (disp));	\
 	} while (0)
 
 #define x86_mov_mem_reg(inst,mem,reg,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 7); \
 		switch ((size)) {	\
 		case 1: *(inst)++ = (unsigned char)0x88; break;	\
 		case 2: *(inst)++ = (unsigned char)0x66; /* fall through */	\
@@ -882,6 +1045,7 @@
 
 #define x86_mov_regp_reg(inst,regp,reg,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 3); \
 		switch ((size)) {	\
 		case 1: *(inst)++ = (unsigned char)0x88; break;	\
 		case 2: *(inst)++ = (unsigned char)0x66; /* fall through */	\
@@ -893,6 +1057,7 @@
 
 #define x86_mov_membase_reg(inst,basereg,disp,reg,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		switch ((size)) {	\
 		case 1: *(inst)++ = (unsigned char)0x88; break;	\
 		case 2: *(inst)++ = (unsigned char)0x66; /* fall through */	\
@@ -904,6 +1069,7 @@
 
 #define x86_mov_memindex_reg(inst,basereg,disp,indexreg,shift,reg,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMemindexEmitPadding); \
 		switch ((size)) {	\
 		case 1: *(inst)++ = (unsigned char)0x88; break;	\
 		case 2: *(inst)++ = (unsigned char)0x66; /* fall through */	\
@@ -915,6 +1081,7 @@
 
 #define x86_mov_reg_reg(inst,dreg,reg,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 3); \
 		switch ((size)) {	\
 		case 1: *(inst)++ = (unsigned char)0x8a; break;	\
 		case 2: *(inst)++ = (unsigned char)0x66; /* fall through */	\
@@ -926,6 +1093,7 @@
 
 #define x86_mov_reg_mem(inst,reg,mem,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 7); \
 		switch ((size)) {	\
 		case 1: *(inst)++ = (unsigned char)0x8a; break;	\
 		case 2: *(inst)++ = (unsigned char)0x66; /* fall through */	\
@@ -935,8 +1103,11 @@
 		x86_mem_emit ((inst), (reg), (mem));	\
 	} while (0)
 
+#define kMovRegMembasePadding (2 + kMaxMembaseEmitPadding)
+
 #define x86_mov_reg_membase(inst,reg,basereg,disp,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), kMovRegMembasePadding); \
 		switch ((size)) {	\
 		case 1: *(inst)++ = (unsigned char)0x8a; break;	\
 		case 2: *(inst)++ = (unsigned char)0x66; /* fall through */	\
@@ -948,6 +1119,7 @@
 
 #define x86_mov_reg_memindex(inst,reg,basereg,disp,indexreg,shift,size)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMemindexEmitPadding); \
 		switch ((size)) {	\
 		case 1: *(inst)++ = (unsigned char)0x8a; break;	\
 		case 2: *(inst)++ = (unsigned char)0x66; /* fall through */	\
@@ -964,6 +1136,7 @@
 
 #define x86_mov_reg_imm(inst,reg,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 5); \
 		*(inst)++ = (unsigned char)0xb8 + (reg);	\
 		x86_imm_emit32 ((inst), (imm));	\
 	} while (0)
@@ -971,15 +1144,18 @@
 #define x86_mov_mem_imm(inst,mem,imm,size)	\
 	do {	\
 		if ((size) == 1) {	\
+			x86_codegen_pre(&(inst), 7); \
 			*(inst)++ = (unsigned char)0xc6;	\
 			x86_mem_emit ((inst), 0, (mem));	\
 			x86_imm_emit8 ((inst), (imm));	\
 		} else if ((size) == 2) {	\
+			x86_codegen_pre(&(inst), 9); \
 			*(inst)++ = (unsigned char)0x66;	\
 			*(inst)++ = (unsigned char)0xc7;	\
 			x86_mem_emit ((inst), 0, (mem));	\
 			x86_imm_emit16 ((inst), (imm));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 10); \
 			*(inst)++ = (unsigned char)0xc7;	\
 			x86_mem_emit ((inst), 0, (mem));	\
 			x86_imm_emit32 ((inst), (imm));	\
@@ -989,15 +1165,18 @@
 #define x86_mov_membase_imm(inst,basereg,disp,imm,size)	\
 	do {	\
 		if ((size) == 1) {	\
+			x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 			*(inst)++ = (unsigned char)0xc6;	\
 			x86_membase_emit ((inst), 0, (basereg), (disp));	\
 			x86_imm_emit8 ((inst), (imm));	\
 		} else if ((size) == 2) {	\
+			x86_codegen_pre(&(inst), 4 + kMaxMembaseEmitPadding); \
 			*(inst)++ = (unsigned char)0x66;	\
 			*(inst)++ = (unsigned char)0xc7;	\
 			x86_membase_emit ((inst), 0, (basereg), (disp));	\
 			x86_imm_emit16 ((inst), (imm));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 5 + kMaxMembaseEmitPadding); \
 			*(inst)++ = (unsigned char)0xc7;	\
 			x86_membase_emit ((inst), 0, (basereg), (disp));	\
 			x86_imm_emit32 ((inst), (imm));	\
@@ -1007,15 +1186,18 @@
 #define x86_mov_memindex_imm(inst,basereg,disp,indexreg,shift,imm,size)	\
 	do {	\
 		if ((size) == 1) {	\
+			x86_codegen_pre(&(inst), 2 + kMaxMemindexEmitPadding); \
 			*(inst)++ = (unsigned char)0xc6;	\
 			x86_memindex_emit ((inst), 0, (basereg), (disp), (indexreg), (shift));	\
 			x86_imm_emit8 ((inst), (imm));	\
 		} else if ((size) == 2) {	\
+			x86_codegen_pre(&(inst), 4 + kMaxMemindexEmitPadding); \
 			*(inst)++ = (unsigned char)0x66;	\
 			*(inst)++ = (unsigned char)0xc7;	\
 			x86_memindex_emit ((inst), 0, (basereg), (disp), (indexreg), (shift));	\
 			x86_imm_emit16 ((inst), (imm));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 5 + kMaxMemindexEmitPadding); \
 			*(inst)++ = (unsigned char)0xc7;	\
 			x86_memindex_emit ((inst), 0, (basereg), (disp), (indexreg), (shift));	\
 			x86_imm_emit32 ((inst), (imm));	\
@@ -1024,18 +1206,21 @@
 
 #define x86_lea_mem(inst,reg,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 5); \
 		*(inst)++ = (unsigned char)0x8d;	\
 		x86_mem_emit ((inst), (reg), (mem));	\
 	} while (0)
 
 #define x86_lea_membase(inst,reg,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x8d;	\
 		x86_membase_emit ((inst), (reg), (basereg), (disp));	\
 	} while (0)
 
 #define x86_lea_memindex(inst,reg,basereg,disp,indexreg,shift)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMemindexEmitPadding); \
 		*(inst)++ = (unsigned char)0x8d;	\
 		x86_memindex_emit ((inst), (reg), (basereg), (disp), (indexreg), (shift));	\
 	} while (0)
@@ -1044,6 +1229,7 @@
 	do {	\
 		unsigned char op = 0xb6;	\
                 g_assert (is_half ||  X86_IS_BYTE_REG (reg)); \
+		x86_codegen_pre(&(inst), 3); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		if ((is_signed)) op += 0x08;	\
 		if ((is_half)) op += 0x01;	\
@@ -1054,6 +1240,7 @@
 #define x86_widen_mem(inst,dreg,mem,is_signed,is_half)	\
 	do {	\
 		unsigned char op = 0xb6;	\
+		x86_codegen_pre(&(inst), 7); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		if ((is_signed)) op += 0x08;	\
 		if ((is_half)) op += 0x01;	\
@@ -1064,6 +1251,7 @@
 #define x86_widen_membase(inst,dreg,basereg,disp,is_signed,is_half)	\
 	do {	\
 		unsigned char op = 0xb6;	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		if ((is_signed)) op += 0x08;	\
 		if ((is_half)) op += 0x01;	\
@@ -1074,6 +1262,7 @@
 #define x86_widen_memindex(inst,dreg,basereg,disp,indexreg,shift,is_signed,is_half)	\
 	do {	\
 		unsigned char op = 0xb6;	\
+		x86_codegen_pre(&(inst), 2 + kMaxMemindexEmitPadding); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		if ((is_signed)) op += 0x08;	\
 		if ((is_half)) op += 0x01;	\
@@ -1086,18 +1275,21 @@
 
 #define x86_fp_op_mem(inst,opc,mem,is_double)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (is_double) ? (unsigned char)0xdc : (unsigned char)0xd8;	\
 		x86_mem_emit ((inst), (opc), (mem));	\
 	} while (0)
 
 #define x86_fp_op_membase(inst,opc,basereg,disp,is_double)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (is_double) ? (unsigned char)0xdc : (unsigned char)0xd8;	\
 		x86_membase_emit ((inst), (opc), (basereg), (disp));	\
 	} while (0)
 
 #define x86_fp_op(inst,opc,index)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xd8;	\
 		*(inst)++ = (unsigned char)0xc0+((opc)<<3)+((index)&0x07);	\
 	} while (0)
@@ -1105,6 +1297,7 @@
 #define x86_fp_op_reg(inst,opc,index,pop_stack)	\
 	do {	\
 		static const unsigned char map[] = { 0, 1, 2, 3, 5, 4, 7, 6, 8};	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (pop_stack) ? (unsigned char)0xde : (unsigned char)0xdc;	\
 		*(inst)++ = (unsigned char)0xc0+(map[(opc)]<<3)+((index)&0x07);	\
 	} while (0)
@@ -1118,126 +1311,147 @@
  */
 #define x86_fp_int_op_membase(inst,opc,basereg,disp,is_int)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (is_int) ? (unsigned char)0xda : (unsigned char)0xde;	\
 		x86_membase_emit ((inst), opc, (basereg), (disp));	\
 	} while (0)
 
 #define x86_fstp(inst,index)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xdd;	\
 		*(inst)++ = (unsigned char)0xd8+(index);	\
 	} while (0)
 
 #define x86_fcompp(inst)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xde;	\
 		*(inst)++ = (unsigned char)0xd9;	\
 	} while (0)
 
 #define x86_fucompp(inst)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xda;	\
 		*(inst)++ = (unsigned char)0xe9;	\
 	} while (0)
 
 #define x86_fnstsw(inst)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xdf;	\
 		*(inst)++ = (unsigned char)0xe0;	\
 	} while (0)
 
 #define x86_fnstcw(inst,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xd9;	\
 		x86_mem_emit ((inst), 7, (mem));	\
 	} while (0)
 
 #define x86_fnstcw_membase(inst,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xd9;	\
 		x86_membase_emit ((inst), 7, (basereg), (disp));	\
 	} while (0)
 
 #define x86_fldcw(inst,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xd9;	\
 		x86_mem_emit ((inst), 5, (mem));	\
 	} while (0)
 
 #define x86_fldcw_membase(inst,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xd9;	\
 		x86_membase_emit ((inst), 5, (basereg), (disp));	\
 	} while (0)
 
 #define x86_fchs(inst)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xd9;	\
 		*(inst)++ = (unsigned char)0xe0;	\
 	} while (0)
 
 #define x86_frem(inst)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xd9;	\
 		*(inst)++ = (unsigned char)0xf8;	\
 	} while (0)
 
 #define x86_fxch(inst,index)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xd9;	\
 		*(inst)++ = (unsigned char)0xc8 + ((index) & 0x07);	\
 	} while (0)
 
 #define x86_fcomi(inst,index)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xdb;	\
 		*(inst)++ = (unsigned char)0xf0 + ((index) & 0x07);	\
 	} while (0)
 
 #define x86_fcomip(inst,index)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xdf;	\
 		*(inst)++ = (unsigned char)0xf0 + ((index) & 0x07);	\
 	} while (0)
 
 #define x86_fucomi(inst,index)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xdb;	\
 		*(inst)++ = (unsigned char)0xe8 + ((index) & 0x07);	\
 	} while (0)
 
 #define x86_fucomip(inst,index)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xdf;	\
 		*(inst)++ = (unsigned char)0xe8 + ((index) & 0x07);	\
 	} while (0)
 
 #define x86_fld(inst,mem,is_double)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (is_double) ? (unsigned char)0xdd : (unsigned char)0xd9;	\
 		x86_mem_emit ((inst), 0, (mem));	\
 	} while (0)
 
 #define x86_fld_membase(inst,basereg,disp,is_double)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (is_double) ? (unsigned char)0xdd : (unsigned char)0xd9;	\
 		x86_membase_emit ((inst), 0, (basereg), (disp));	\
 	} while (0)
 
 #define x86_fld80_mem(inst,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xdb;	\
 		x86_mem_emit ((inst), 5, (mem));	\
 	} while (0)
 
 #define x86_fld80_membase(inst,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xdb;	\
 		x86_membase_emit ((inst), 5, (basereg), (disp));	\
 	} while (0)
 
 #define x86_fild(inst,mem,is_long)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		if ((is_long)) {	\
 			*(inst)++ = (unsigned char)0xdf;	\
 			x86_mem_emit ((inst), 5, (mem));	\
@@ -1249,6 +1463,7 @@
 
 #define x86_fild_membase(inst,basereg,disp,is_long)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		if ((is_long)) {	\
 			*(inst)++ = (unsigned char)0xdf;	\
 			x86_membase_emit ((inst), 5, (basereg), (disp));	\
@@ -1260,42 +1475,49 @@
 
 #define x86_fld_reg(inst,index)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xd9;	\
 		*(inst)++ = (unsigned char)0xc0 + ((index) & 0x07);	\
 	} while (0)
 
 #define x86_fldz(inst)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xd9;	\
 		*(inst)++ = (unsigned char)0xee;	\
 	} while (0)
 
 #define x86_fld1(inst)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xd9;	\
 		*(inst)++ = (unsigned char)0xe8;	\
 	} while (0)
 
 #define x86_fldpi(inst)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xd9;	\
 		*(inst)++ = (unsigned char)0xeb;	\
 	} while (0)
 
 #define x86_fst(inst,mem,is_double,pop_stack)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (is_double) ? (unsigned char)0xdd: (unsigned char)0xd9;	\
 		x86_mem_emit ((inst), 2 + ((pop_stack) ? 1 : 0), (mem));	\
 	} while (0)
 
 #define x86_fst_membase(inst,basereg,disp,is_double,pop_stack)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (is_double) ? (unsigned char)0xdd: (unsigned char)0xd9;	\
 		x86_membase_emit ((inst), 2 + ((pop_stack) ? 1 : 0), (basereg), (disp));	\
 	} while (0)
 
 #define x86_fst80_mem(inst,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xdb;	\
 		x86_mem_emit ((inst), 7, (mem));	\
 	} while (0)
@@ -1303,6 +1525,7 @@
 
 #define x86_fst80_membase(inst,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xdb;	\
 		x86_membase_emit ((inst), 7, (basereg), (disp));	\
 	} while (0)
@@ -1310,6 +1533,7 @@
 
 #define x86_fist_pop(inst,mem,is_long)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		if ((is_long)) {	\
 			*(inst)++ = (unsigned char)0xdf;	\
 			x86_mem_emit ((inst), 7, (mem));	\
@@ -1321,6 +1545,7 @@
 
 #define x86_fist_pop_membase(inst,basereg,disp,is_long)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		if ((is_long)) {	\
 			*(inst)++ = (unsigned char)0xdf;	\
 			x86_membase_emit ((inst), 7, (basereg), (disp));	\
@@ -1332,6 +1557,7 @@
 
 #define x86_fstsw(inst)	\
 	do {	\
+			x86_codegen_pre(&(inst), 3); \
 			*(inst)++ = (unsigned char)0x9b;	\
 			*(inst)++ = (unsigned char)0xdf;	\
 			*(inst)++ = (unsigned char)0xe0;	\
@@ -1345,6 +1571,7 @@
  */
 #define x86_fist_membase(inst,basereg,disp,is_int)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		if ((is_int)) {	\
 			*(inst)++ = (unsigned char)0xdb;	\
 			x86_membase_emit ((inst), 2, (basereg), (disp));	\
@@ -1362,24 +1589,28 @@
 
 #define x86_push_regp(inst,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xff;	\
 		x86_regp_emit ((inst), 6, (reg));	\
 	} while (0)
 
 #define x86_push_mem(inst,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0xff;	\
 		x86_mem_emit ((inst), 6, (mem));	\
 	} while (0)
 
 #define x86_push_membase(inst,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xff;	\
 		x86_membase_emit ((inst), 6, (basereg), (disp));	\
 	} while (0)
 
 #define x86_push_memindex(inst,basereg,disp,indexreg,shift)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMemindexEmitPadding); \
 		*(inst)++ = (unsigned char)0xff;	\
 		x86_memindex_emit ((inst), 6, (basereg), (disp), (indexreg), (shift));	\
 	} while (0)
@@ -1390,9 +1621,11 @@
 	do {	\
 		int _imm = (int) (imm);	\
 		if (x86_is_imm8 (_imm)) {	\
+			x86_codegen_pre(&(inst), 2); \
 			*(inst)++ = (unsigned char)0x6A;	\
 			x86_imm_emit8 ((inst), (_imm));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 5); \
 			*(inst)++ = (unsigned char)0x68;	\
 			x86_imm_emit32 ((inst), (_imm));	\
 		}	\
@@ -1405,12 +1638,14 @@
 
 #define x86_pop_mem(inst,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0x87;	\
 		x86_mem_emit ((inst), 0, (mem));	\
 	} while (0)
 
 #define x86_pop_membase(inst,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 1 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x87;	\
 		x86_membase_emit ((inst), 0, (basereg), (disp));	\
 	} while (0)
@@ -1422,34 +1657,70 @@
 
 #define x86_loop(inst,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xe2;	\
 		x86_imm_emit8 ((inst), (imm));	\
 	} while (0)
 
 #define x86_loope(inst,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xe1;	\
 		x86_imm_emit8 ((inst), (imm));	\
 	} while (0)
 
 #define x86_loopne(inst,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xe0;	\
 		x86_imm_emit8 ((inst), (imm));	\
 	} while (0)
 
 #define x86_jump32(inst,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 5); \
 		*(inst)++ = (unsigned char)0xe9;	\
 		x86_imm_emit32 ((inst), (imm));	\
 	} while (0)
 
 #define x86_jump8(inst,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		*(inst)++ = (unsigned char)0xeb;	\
 		x86_imm_emit8 ((inst), (imm));	\
 	} while (0)
 
+
+#ifdef __native_client_codegen__
+#define x86_jump_reg(inst,reg)	do {	\
+    x86_codegen_pre(&(inst), 5);			\
+    *(inst)++ = (unsigned char)0x83;  /* and */		\
+    x86_reg_emit ((inst), 4, (reg));  /* reg */		\
+    *(inst)++ = (unsigned char)nacl_align_byte;		\
+    *(inst)++ = (unsigned char)0xff;			\
+    x86_reg_emit ((inst), 4, (reg));			\
+  } while (0)
+
+/* Let's hope ECX is available for these... */
+#define x86_jump_mem(inst,mem)	do {	\
+    x86_mov_reg_mem(inst, (X86_ECX), (mem), 4);		\
+    x86_jump_reg(inst, (X86_ECX));			\
+  } while (0)
+
+#define x86_jump_membase(inst,basereg,disp) do {	\
+    x86_mov_reg_membase(inst, (X86_ECX), basereg, disp, 4);	\
+    x86_jump_reg(inst, (X86_ECX));				\
+  } while (0)
+
+/* like x86_jump_membase, but force a 32-bit displacement  */
+#define x86_jump_membase32(inst,basereg,disp) do {	\
+    x86_codegen_pre(&(inst), 6); \
+    *(inst)++ = (unsigned char)0x8b;			\
+    x86_address_byte ((inst), 2, X86_ECX, (basereg));	\
+    x86_imm_emit32 ((inst), (disp));			\
+    x86_jump_reg(inst, (X86_ECX));			\
+  } while (0)
+#else  /* __native_client_codegen__ */
 #define x86_jump_reg(inst,reg)	\
 	do {	\
 		*(inst)++ = (unsigned char)0xff;	\
@@ -1467,17 +1738,20 @@
 		*(inst)++ = (unsigned char)0xff;	\
 		x86_membase_emit ((inst), 4, (basereg), (disp));	\
 	} while (0)
-
+#endif  /* __native_client_codegen__ */
 /*
  * target is a pointer in our buffer.
  */
 #define x86_jump_code(inst,target)	\
 	do {	\
-		int t = (unsigned char*)(target) - (inst) - 2;	\
+		int t; \
+		x86_codegen_pre(&(inst), 2); \
+		t = (unsigned char*)(target) - (inst) - 2;	\
 		if (x86_is_imm8(t)) {	\
 			x86_jump8 ((inst), t);	\
 		} else {	\
-			t -= 3;	\
+			x86_codegen_pre(&(inst), 5); \
+			t = (unsigned char*)(target) - (inst) - 5;	\
 			x86_jump32 ((inst), t);	\
 		}	\
 	} while (0)
@@ -1495,6 +1769,7 @@
 
 #define x86_branch8(inst,cond,imm,is_signed)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		if ((is_signed))	\
 			*(inst)++ = x86_cc_signed_map [(cond)];	\
 		else	\
@@ -1504,6 +1779,7 @@
 
 #define x86_branch32(inst,cond,imm,is_signed)	\
 	do {	\
+		x86_codegen_pre(&(inst), 6); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		if ((is_signed))	\
 			*(inst)++ = x86_cc_signed_map [(cond)] + 0x10;	\
@@ -1514,11 +1790,13 @@
 
 #define x86_branch(inst,cond,target,is_signed)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2); \
 		int offset = (target) - (inst) - 2;	\
 		if (x86_is_imm8 ((offset)))	\
 			x86_branch8 ((inst), (cond), offset, (is_signed));	\
 		else {	\
-			offset -= 4;	\
+			x86_codegen_pre(&(inst), 6); \
+			offset = (target) - (inst) - 6;	\
 			x86_branch32 ((inst), (cond), offset, (is_signed));	\
 		}	\
 	} while (0)
@@ -1537,6 +1815,7 @@
 #define x86_set_reg(inst,cond,reg,is_signed)	\
 	do {	\
                 g_assert (X86_IS_BYTE_REG (reg)); \
+		x86_codegen_pre(&(inst), 3); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		if ((is_signed))	\
 			*(inst)++ = x86_cc_signed_map [(cond)] + 0x20;	\
@@ -1547,6 +1826,7 @@
 
 #define x86_set_mem(inst,cond,mem,is_signed)	\
 	do {	\
+		x86_codegen_pre(&(inst), 7); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		if ((is_signed))	\
 			*(inst)++ = x86_cc_signed_map [(cond)] + 0x20;	\
@@ -1557,6 +1837,7 @@
 
 #define x86_set_membase(inst,cond,basereg,disp,is_signed)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		if ((is_signed))	\
 			*(inst)++ = x86_cc_signed_map [(cond)] + 0x20;	\
@@ -1565,12 +1846,48 @@
 		x86_membase_emit ((inst), 0, (basereg), (disp));	\
 	} while (0)
 
-#define x86_call_imm(inst,disp)	\
+#define x86_call_imm_body(inst,disp) \
 	do {	\
 		*(inst)++ = (unsigned char)0xe8;	\
 		x86_imm_emit32 ((inst), (int)(disp));	\
 	} while (0)
 
+#define x86_call_imm(inst,disp)	\
+	do {	\
+		x86_call_sequence_pre((inst)); \
+		x86_call_imm_body((inst), (disp)); \
+		x86_call_sequence_post((inst)); \
+	} while (0)
+
+#ifdef __native_client_codegen__
+#define x86_call_reg_internal(inst,reg)	\
+  do {							\
+    x86_codegen_pre(&(inst), 5);			\
+    *(inst)++ = (unsigned char)0x83;  /* and */		\
+    x86_reg_emit ((inst), 4, (reg));  /* reg */		\
+    *(inst)++ = (unsigned char)nacl_align_byte;		\
+    *(inst)++ = (unsigned char)0xff;  /* call */	\
+    x86_reg_emit ((inst), 2, (reg));  /* reg */		\
+  } while (0)
+
+#define x86_call_reg(inst, reg) do {		\
+    x86_call_sequence_pre((inst));              \
+    x86_call_reg_internal(inst, reg);		\
+    x86_call_sequence_post((inst));             \
+  } while (0)
+
+
+/* It appears that x86_call_mem() is never used, so I'm leaving it out. */
+#define x86_call_membase(inst,basereg,disp)  do {		\
+    x86_call_sequence_pre((inst));                              \
+    /* x86_mov_reg_membase() inlined so its fixed size */	\
+    *(inst)++ = (unsigned char)0x8b;				\
+    x86_address_byte ((inst), 2, (X86_ECX), (basereg));		\
+    x86_imm_emit32 ((inst), (disp));				\
+    x86_call_reg_internal(inst, X86_ECX);			\
+    x86_call_sequence_post((inst));                             \
+  } while (0)
+#else  /* __native_client_codegen__ */
 #define x86_call_reg(inst,reg)	\
 	do {	\
 		*(inst)++ = (unsigned char)0xff;	\
@@ -1588,14 +1905,59 @@
 		*(inst)++ = (unsigned char)0xff;	\
 		x86_membase_emit ((inst), 2, (basereg), (disp));	\
 	} while (0)
+#endif  /* __native_client_codegen__ */
 
+
+#ifdef __native_client_codegen__
+
 #define x86_call_code(inst,target)	\
 	do {	\
-		int _x86_offset = (unsigned char*)(target) - (inst);	\
+		int _x86_offset; \
+		guint8* _aligned_start; \
+		x86_call_sequence_pre((inst)); \
+		_x86_offset = (unsigned char*)(target) - (inst);	\
 		_x86_offset -= 5;	\
-		x86_call_imm ((inst), _x86_offset);	\
+		x86_call_imm_body ((inst), _x86_offset);	\
+		_aligned_start = x86_call_sequence_post((inst)); \
+		_x86_offset = (unsigned char*)(target) - (_aligned_start);	\
+		_x86_offset -= 5;	\
+		x86_call_imm_body ((_aligned_start), _x86_offset);	\
 	} while (0)
 
+#define SIZE_OF_RET 6
+#define x86_ret(inst) do { \
+    *(inst)++ = (unsigned char)0x59;  /* pop ecx */		\
+    x86_codegen_pre(&(inst), 5); \
+    *(inst)++ = (unsigned char)0x83;  /* and 0xffffffff, ecx */ \
+    *(inst)++ = (unsigned char)0xe1;				\
+    *(inst)++ = (unsigned char)nacl_align_byte;			\
+    *(inst)++ = (unsigned char)0xff;  /* jmp ecx */ 		\
+    *(inst)++ = (unsigned char)0xe1;				\
+  } while (0)
+
+/* pop return address */
+/* pop imm bytes from stack */
+/* return */
+#define x86_ret_imm(inst,imm)	do {	\
+    *(inst)++ = (unsigned char)0x59;  /* pop ecx */		\
+    x86_alu_reg_imm ((inst), X86_ADD, X86_ESP, imm);		\
+    x86_codegen_pre(&(inst), 5); \
+    *(inst)++ = (unsigned char)0x83;  /* and 0xffffffff, ecx */ \
+    *(inst)++ = (unsigned char)0xe1;				\
+    *(inst)++ = (unsigned char)nacl_align_byte;			\
+    *(inst)++ = (unsigned char)0xff;  /* jmp ecx */ 		\
+    *(inst)++ = (unsigned char)0xe1;				\
+} while (0)
+#else  /* __native_client_codegen__ */
+
+#define x86_call_code(inst,target)	\
+	do {	\
+		int _x86_offset; \
+		_x86_offset = (unsigned char*)(target) - (inst);	\
+		_x86_offset -= 5;	\
+		x86_call_imm_body ((inst), _x86_offset);	\
+	} while (0)
+
 #define x86_ret(inst) do { *(inst)++ = (unsigned char)0xc3; } while (0)
 
 #define x86_ret_imm(inst,imm)	\
@@ -1603,13 +1965,16 @@
 		if ((imm) == 0) {	\
 			x86_ret ((inst));	\
 		} else {	\
+			x86_codegen_pre(&(inst), 3); \
 			*(inst)++ = (unsigned char)0xc2;	\
 			x86_imm_emit16 ((inst), (imm));	\
 		}	\
 	} while (0)
+#endif  /* __native_client_codegen__ */
 
 #define x86_cmov_reg(inst,cond,is_signed,dreg,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 3); \
 		*(inst)++ = (unsigned char) 0x0f;	\
 		if ((is_signed))	\
 			*(inst)++ = x86_cc_signed_map [(cond)] - 0x30;	\
@@ -1620,6 +1985,7 @@
 
 #define x86_cmov_mem(inst,cond,is_signed,reg,mem)	\
 	do {	\
+		x86_codegen_pre(&(inst), 7); \
 		*(inst)++ = (unsigned char) 0x0f;	\
 		if ((is_signed))	\
 			*(inst)++ = x86_cc_signed_map [(cond)] - 0x30;	\
@@ -1630,6 +1996,7 @@
 
 #define x86_cmov_membase(inst,cond,is_signed,reg,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char) 0x0f;	\
 		if ((is_signed))	\
 			*(inst)++ = x86_cc_signed_map [(cond)] - 0x30;	\
@@ -1640,6 +2007,7 @@
 
 #define x86_enter(inst,framesize)	\
 	do {	\
+		x86_codegen_pre(&(inst), 4); \
 		*(inst)++ = (unsigned char)0xc8;	\
 		x86_imm_emit16 ((inst), (framesize));	\
 		*(inst)++ = 0;	\
@@ -1648,17 +2016,17 @@
 #define x86_leave(inst) do { *(inst)++ = (unsigned char)0xc9; } while (0)
 #define x86_sahf(inst)  do { *(inst)++ = (unsigned char)0x9e; } while (0)
 
-#define x86_fsin(inst) do { *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xfe; } while (0)
-#define x86_fcos(inst) do { *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xff; } while (0)
-#define x86_fabs(inst) do { *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xe1; } while (0)
-#define x86_ftst(inst) do { *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xe4; } while (0)
-#define x86_fxam(inst) do { *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xe5; } while (0)
-#define x86_fpatan(inst) do { *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xf3; } while (0)
-#define x86_fprem(inst) do { *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xf8; } while (0)
-#define x86_fprem1(inst) do { *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xf5; } while (0)
-#define x86_frndint(inst) do { *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xfc; } while (0)
-#define x86_fsqrt(inst) do { *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xfa; } while (0)
-#define x86_fptan(inst) do { *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xf2; } while (0)
+#define x86_fsin(inst) do { x86_codegen_pre(&(inst), 2); *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xfe; } while (0)
+#define x86_fcos(inst) do { x86_codegen_pre(&(inst), 2); *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xff; } while (0)
+#define x86_fabs(inst) do { x86_codegen_pre(&(inst), 2); *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xe1; } while (0)
+#define x86_ftst(inst) do { x86_codegen_pre(&(inst), 2); *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xe4; } while (0)
+#define x86_fxam(inst) do { x86_codegen_pre(&(inst), 2); *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xe5; } while (0)
+#define x86_fpatan(inst) do { x86_codegen_pre(&(inst), 2); *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xf3; } while (0)
+#define x86_fprem(inst) do { x86_codegen_pre(&(inst), 2); *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xf8; } while (0)
+#define x86_fprem1(inst) do { x86_codegen_pre(&(inst), 2); *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xf5; } while (0)
+#define x86_frndint(inst) do { x86_codegen_pre(&(inst), 2); *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xfc; } while (0)
+#define x86_fsqrt(inst) do { x86_codegen_pre(&(inst), 2); *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xfa; } while (0)
+#define x86_fptan(inst) do { x86_codegen_pre(&(inst), 2); *(inst)++ = (unsigned char)0xd9; *(inst)++ = (unsigned char)0xf2; } while (0)
 
 #define x86_padding(inst,size)	\
 	do {	\
@@ -1686,6 +2054,14 @@
 		}	\
 	} while (0)
 
+#ifdef __native_client_codegen__
+
+#define kNaClLengthOfCallReg 5
+#define kNaClLengthOfCallImm 5
+#define kNaClLengthOfCallMembase (kNaClLengthOfCallReg + 6)
+
+#endif  /* __native_client_codegen__ */
+
 #define x86_prolog(inst,frame_size,reg_mask)	\
 	do {	\
 		unsigned i, m = 1;	\
@@ -1853,6 +2229,7 @@
 /* minimal SSE* support */
 #define x86_movsd_reg_membase(inst,dreg,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 3 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xf2;	\
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0x10;	\
@@ -1861,6 +2238,7 @@
 
 #define x86_cvttsd2si(inst,dreg,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 4); \
 		*(inst)++ = (unsigned char)0xf2;	\
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0x2c;	\
@@ -1869,6 +2247,7 @@
 
 #define x86_sse_alu_reg_reg(inst,opc,dreg,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 3); \
 		*(inst)++ = (unsigned char)0x0F;	\
 		*(inst)++ = (unsigned char)(opc);	\
 		x86_reg_emit ((inst), (dreg), (reg));	\
@@ -1876,6 +2255,7 @@
 
 #define x86_sse_alu_reg_membase(inst,opc,sreg,basereg,disp)	\
 		do {	\
+			x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 			*(inst)++ = (unsigned char)0x0f;	\
 			*(inst)++ = (unsigned char)(opc);	\
 			x86_membase_emit ((inst), (sreg), (basereg), (disp));	\
@@ -1883,6 +2263,7 @@
 
 #define x86_sse_alu_membase_reg(inst,opc,basereg,disp,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x0F;	\
 		*(inst)++ = (unsigned char)(opc);	\
 		x86_membase_emit ((inst), (reg), (basereg), (disp));	\
@@ -1891,30 +2272,35 @@
 
 #define x86_sse_alu_pd_reg_reg(inst,opc,dreg,reg)       \
 	do {    \
+		x86_codegen_pre(&(inst), 4); \
 		*(inst)++ = (unsigned char)0x66;        \
 		x86_sse_alu_reg_reg ((inst), (opc), (dreg), (reg)); \
 	} while (0)
 
 #define x86_sse_alu_pd_membase_reg(inst,opc,basereg,disp,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 3 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x66;	\
 		x86_sse_alu_membase_reg ((inst), (opc), (basereg), (disp), (reg)); \
 	} while (0)
 
 #define x86_sse_alu_pd_reg_membase(inst,opc,dreg,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 3 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x66;	\
 		x86_sse_alu_reg_membase ((inst), (opc), (dreg),(basereg), (disp)); \
 	} while (0)
 
 #define x86_sse_alu_pd_reg_reg_imm(inst,opc,dreg,reg,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 5); \
 		x86_sse_alu_pd_reg_reg ((inst), (opc), (dreg), (reg)); \
 		*(inst)++ = (unsigned char)(imm);	\
 	} while (0)
 
 #define x86_sse_alu_pd_reg_membase_imm(inst,opc,dreg,basereg,disp,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 4 + kMaxMembaseEmitPadding); \
 		x86_sse_alu_pd_reg_membase ((inst), (opc), (dreg),(basereg), (disp)); \
 		*(inst)++ = (unsigned char)(imm);	\
 	} while (0)
@@ -1927,6 +2313,7 @@
 
 #define x86_sse_alu_ps_reg_reg_imm(inst,opc,dreg,reg, imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 4); \
 		x86_sse_alu_reg_reg ((inst), (opc), (dreg), (reg)); \
 		*(inst)++ = (unsigned char)imm;	\
 	} while (0)
@@ -1934,12 +2321,14 @@
 
 #define x86_sse_alu_sd_reg_reg(inst,opc,dreg,reg)       \
 	do {    \
+		x86_codegen_pre(&(inst), 4); \
 		*(inst)++ = (unsigned char)0xF2;        \
 		x86_sse_alu_reg_reg ((inst), (opc), (dreg), (reg)); \
 	} while (0)
 
 #define x86_sse_alu_sd_membase_reg(inst,opc,basereg,disp,reg)	\
 	do {    \
+		x86_codegen_pre(&(inst), 3 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xF2;        \
 		x86_sse_alu_membase_reg ((inst), (opc), (basereg), (disp), (reg));	\
 	} while (0)
@@ -1947,12 +2336,14 @@
 
 #define x86_sse_alu_ss_reg_reg(inst,opc,dreg,reg)       \
 	do {    \
+		x86_codegen_pre(&(inst), 4); \
 		*(inst)++ = (unsigned char)0xF3;        \
 		x86_sse_alu_reg_reg ((inst), (opc), (dreg), (reg)); \
 	} while (0)
 
 #define x86_sse_alu_ss_membase_reg(inst,opc,basereg,disp,reg)       \
 	do {    \
+		x86_codegen_pre(&(inst), 3 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0xF3;        \
 		x86_sse_alu_membase_reg ((inst), (opc), (basereg), (disp), (reg));	\
 	} while (0)
@@ -1961,6 +2352,7 @@
 
 #define x86_sse_alu_sse41_reg_reg(inst,opc,dreg,reg)       \
 	do {    \
+		x86_codegen_pre(&(inst), 5); \
 		*(inst)++ = (unsigned char)0x66;        \
 		*(inst)++ = (unsigned char)0x0F;	\
 		*(inst)++ = (unsigned char)0x38;	\
@@ -1970,6 +2362,7 @@
 
 #define x86_movups_reg_membase(inst,sreg,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0x10;	\
 		x86_membase_emit ((inst), (sreg), (basereg), (disp));	\
@@ -1977,6 +2370,7 @@
 
 #define x86_movups_membase_reg(inst,basereg,disp,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0x11;	\
 		x86_membase_emit ((inst), (reg), (basereg), (disp));	\
@@ -1984,6 +2378,7 @@
 
 #define x86_movaps_reg_membase(inst,sreg,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0x28;	\
 		x86_membase_emit ((inst), (sreg), (basereg), (disp));	\
@@ -1991,6 +2386,7 @@
 
 #define x86_movaps_membase_reg(inst,basereg,disp,reg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 2 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0x29;	\
 		x86_membase_emit ((inst), (reg), (basereg), (disp));	\
@@ -1998,6 +2394,7 @@
 
 #define x86_movaps_reg_reg(inst,dreg,sreg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 3); \
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0x28;	\
 		x86_reg_emit ((inst), (dreg), (sreg));	\
@@ -2006,6 +2403,7 @@
 
 #define x86_movd_reg_xreg(inst,dreg,sreg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 4); \
 		*(inst)++ = (unsigned char)0x66;	\
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0x7e;	\
@@ -2014,6 +2412,7 @@
 
 #define x86_movd_xreg_reg(inst,dreg,sreg)	\
 	do {	\
+		x86_codegen_pre(&(inst), 4); \
 		*(inst)++ = (unsigned char)0x66;	\
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0x6e;	\
@@ -2022,6 +2421,7 @@
 
 #define x86_movd_xreg_membase(inst,sreg,basereg,disp)	\
 	do {	\
+		x86_codegen_pre(&(inst), 3 + kMaxMembaseEmitPadding); \
 		*(inst)++ = (unsigned char)0x66;	\
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0x6e;	\
@@ -2030,6 +2430,7 @@
 
 #define x86_pshufw_reg_reg(inst,dreg,sreg,mask,high_words)	\
 	do {	\
+		x86_codegen_pre(&(inst), 5); \
 		*(inst)++ = (unsigned char)(high_words) ? 0xF3 : 0xF2;	\
 		*(inst)++ = (unsigned char)0x0f;	\
 		*(inst)++ = (unsigned char)0x70;	\
@@ -2039,6 +2440,7 @@
 
 #define x86_sse_shift_reg_imm(inst,opc,mode, dreg,imm)	\
 	do {	\
+		x86_codegen_pre(&(inst), 5); \
 		x86_sse_alu_pd_reg_reg (inst, opc, mode, dreg);	\
 		x86_imm_emit8 ((inst), (imm));	\
 	} while (0)
Index: configure.in
===================================================================
--- configure.in	(revision 160382)
+++ configure.in	(working copy)
@@ -182,6 +182,19 @@
 		AOT_SUPPORTED="yes"
 		use_sigposix=yes
 		;;
+	*-*-nacl*)
+		host_win32=no
+		CPPFLAGS="$CPPFLAGS -DGC_LINUX_THREADS -D_GNU_SOURCE -D_REENTRANT -DUSE_MMAP"
+		if test "x$disable_munmap" != "xyes"; then
+			CPPFLAGS="$CPPFLAGS -DUSE_MUNMAP"
+		fi
+		libmono_cflags="-D_REENTRANT"
+		libdl=
+		libgc_threads=pthreads
+		AOT_SUPPORTED="yes"
+		gc_default=boehm
+		use_sigposix=yes
+		;;
 	*-*-hpux*)
 	        host_win32=no
 		CPPFLAGS="$CPPFLAGS -DGC_HPUX_THREADS -D_HPUX_SOURCE -D_XOPEN_SOURCE_EXTENDED -D_REENTRANT"
@@ -2001,6 +2014,21 @@
 AM_CONDITIONAL(DTRACE_G_REQUIRED, [test x$dtrace_g = xyes])
 
 dnl **************
+dnl ***  NaCl  ***
+dnl **************
+
+AC_ARG_ENABLE(nacl_codegen, [  --enable-nacl-codegen      Enable Native Client code generation], enable_nacl_codegen=$enableval, enable_nacl_codegen=no)
+
+AM_CONDITIONAL(NACL_CODEGEN, test x$enable_nacl_codegen != xno)
+if test "x$enable_nacl_codegen" = "xyes"; then
+   MONO_NACL_ALIGN_MASK_OFF=1
+   CPPFLAGS="$CPPFLAGS -D__native_client_codegen__"
+else
+   MONO_NACL_ALIGN_MASK_OFF=0
+fi
+AC_SUBST(MONO_NACL_ALIGN_MASK_OFF)
+
+dnl **************
 dnl ***  LLVM  ***
 dnl **************
 
Index: ikvm-native/jni.c
===================================================================
--- ikvm-native/jni.c	(revision 160382)
+++ ikvm-native/jni.c	(working copy)
@@ -21,6 +21,9 @@
   jeroen at frijters.net
   
 */
+
+#if !defined(__native_client__)
+
 #include <stdarg.h>
 #include "jni.h"
 
@@ -502,3 +505,4 @@
 {
 	return method(vm, reserved);
 }
+#endif /* __native_client__ */
Index: ikvm-native/os.c
===================================================================
--- ikvm-native/os.c	(revision 160382)
+++ ikvm-native/os.c	(working copy)
@@ -21,6 +21,9 @@
   jeroen at frijters.net
   
 */
+
+#if !defined(__native_client__)
+
 #ifdef _WIN32
 	#include <windows.h>
 	#include "jni.h"
@@ -98,3 +101,4 @@
 		return msync(address, size, MS_SYNC);
 	}
 #endif
+#endif /* __native_client__ */


More information about the Mono-devel-list mailing list