[Mono-dev] simd version patch

Miguel de Icaza miguel at novell.com
Sat Jan 16 18:03:13 EST 2010


Hello,

    This would need a ChangeLog describing the rationale for the changes;   I quickly scanned it and I could not figure out why those changes are there, which means that we need the ChangeLog first.

    Second, for code that goes into the LGPL parts of Mono, you either have to release the patches under the MIT X11 license, or sign an agreement that allows Novell to redistribute the code under licenses other than the GNU LGPL.

On Jan 16, 2010, at 3:38 PM, Jerry Maine - KF5ADY wrote:

> Here is a patch to update the runtime simd version tests so that it
> would be easier and less hackish to implement ports beyond x86/amd64
> (like arm and altavic).
> 
> Comments? Questions?
> 
> Jerry
> Index: mono/mini/simd-intrinsics.c
> ===================================================================
> --- mono/mini/simd-intrinsics.c	(revision 149699)
> +++ mono/mini/simd-intrinsics.c	(working copy)
> @@ -121,15 +121,15 @@
> typedef struct {
> 	guint16 name;
> 	guint16 opcode;
> -	guint8 simd_emit_mode : 4;
> -	guint8 simd_version : 4;
> +	guint8 simd_emit_mode;
> +	guint32 simd_version_flags;
> 	guint8 flags;
> } SimdIntrinsc;
> 
> static const SimdIntrinsc vector4f_intrinsics[] = {
> -	{ SN_ctor, OP_EXPAND_R4, SIMD_EMIT_CTOR },
> +	{ SN_ctor, OP_EXPAND_R4, SIMD_EMIT_CTOR, SIMD_VERSION_SSE1 },
> 	{ SN_AddSub, OP_ADDSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
> -	{ SN_AndNot, OP_ANDNPS, SIMD_EMIT_BINARY },
> +	{ SN_AndNot, OP_ANDNPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_CompareEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
> 	{ SN_CompareLessEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LE },
> 	{ SN_CompareLessThan, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LT },
> @@ -142,45 +142,45 @@
> 	{ SN_DuplicateLow, OP_DUPPS_LOW, SIMD_EMIT_UNARY, SIMD_VERSION_SSE3 },
> 	{ SN_HorizontalAdd, OP_HADDPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
> 	{ SN_HorizontalSub, OP_HSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },	
> -	{ SN_InterleaveHigh, OP_UNPACK_HIGHPS, SIMD_EMIT_BINARY },
> -	{ SN_InterleaveLow, OP_UNPACK_LOWPS, SIMD_EMIT_BINARY },
> -	{ SN_InvSqrt, OP_RSQRTPS, SIMD_EMIT_UNARY },
> -	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
> -	{ SN_Max, OP_MAXPS, SIMD_EMIT_BINARY },
> -	{ SN_Min, OP_MINPS, SIMD_EMIT_BINARY },
> +	{ SN_InterleaveHigh, OP_UNPACK_HIGHPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_InterleaveLow, OP_UNPACK_LOWPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_InvSqrt, OP_RSQRTPS, SIMD_EMIT_UNARY, SIMD_VERSION_SSE1 },
> +	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED, SIMD_VERSION_SSE1 },
> +	{ SN_Max, OP_MAXPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_Min, OP_MINPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 },
> 	{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 },
> 	{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 },
> 	{ SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA },
> -	{ SN_Reciprocal, OP_RCPPS, SIMD_EMIT_UNARY },
> -	{ SN_Shuffle, OP_PSHUFLED, SIMD_EMIT_SHUFFLE },
> -	{ SN_Sqrt, OP_SQRTPS, SIMD_EMIT_UNARY },
> -	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE },
> -	{ SN_StoreNonTemporal, OP_STOREX_NTA_MEMBASE_REG, SIMD_EMIT_STORE },
> -	{ SN_get_W, 3, SIMD_EMIT_GETTER },
> -	{ SN_get_X, 0, SIMD_EMIT_GETTER },
> -	{ SN_get_Y, 1, SIMD_EMIT_GETTER },
> -	{ SN_get_Z, 2, SIMD_EMIT_GETTER },
> -	{ SN_op_Addition, OP_ADDPS, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseAnd, OP_ANDPS, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseOr, OP_ORPS, SIMD_EMIT_BINARY },
> -	{ SN_op_Division, OP_DIVPS, SIMD_EMIT_BINARY },
> +	{ SN_Reciprocal, OP_RCPPS, SIMD_EMIT_UNARY, SIMD_VERSION_SSE1 },
> +	{ SN_Shuffle, OP_PSHUFLED, SIMD_EMIT_SHUFFLE, SIMD_VERSION_SSE1 },
> +	{ SN_Sqrt, OP_SQRTPS, SIMD_EMIT_UNARY, SIMD_VERSION_SSE1 },
> +	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE, SIMD_VERSION_SSE1 },
> +	{ SN_StoreNonTemporal, OP_STOREX_NTA_MEMBASE_REG, SIMD_EMIT_STORE, SIMD_VERSION_SSE1 },
> +	{ SN_get_W, 3, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_X, 0, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_Y, 1, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_Z, 2, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_op_Addition, OP_ADDPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseAnd, OP_ANDPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseOr, OP_ORPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Division, OP_DIVPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_op_Equality, OP_COMPPS, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
> -	{ SN_op_ExclusiveOr, OP_XORPS, SIMD_EMIT_BINARY },
> -	{ SN_op_Explicit, 0, SIMD_EMIT_CAST }, 
> +	{ SN_op_ExclusiveOr, OP_XORPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Explicit, 0, SIMD_EMIT_CAST, SIMD_VERSION_SSE1 }, 
> 	{ SN_op_Inequality, OP_COMPPS, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
> -	{ SN_op_Multiply, OP_MULPS, SIMD_EMIT_BINARY },
> -	{ SN_op_Subtraction, OP_SUBPS, SIMD_EMIT_BINARY },
> -	{ SN_set_W, 3, SIMD_EMIT_SETTER },
> -	{ SN_set_X, 0, SIMD_EMIT_SETTER },
> -	{ SN_set_Y, 1, SIMD_EMIT_SETTER },
> -	{ SN_set_Z, 2, SIMD_EMIT_SETTER },
> +	{ SN_op_Multiply, OP_MULPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Subtraction, OP_SUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_set_W, 3, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_X, 0, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_Y, 1, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_Z, 2, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> };
> 
> static const SimdIntrinsc vector2d_intrinsics[] = {
> -	{ SN_ctor, OP_EXPAND_R8, SIMD_EMIT_CTOR },
> +	{ SN_ctor, OP_EXPAND_R8, SIMD_EMIT_CTOR, SIMD_VERSION_SSE1 },
> 	{ SN_AddSub, OP_ADDSUBPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
> -	{ SN_AndNot, OP_ANDNPD, SIMD_EMIT_BINARY },
> +	{ SN_AndNot, OP_ANDNPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_CompareEqual, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
> 	{ SN_CompareLessEqual, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LE },
> 	{ SN_CompareLessThan, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LT },
> @@ -192,321 +192,321 @@
> 	{ SN_Duplicate, OP_DUPPD, SIMD_EMIT_UNARY, SIMD_VERSION_SSE3 },
> 	{ SN_HorizontalAdd, OP_HADDPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
> 	{ SN_HorizontalSub, OP_HSUBPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },	
> -	{ SN_InterleaveHigh, OP_UNPACK_HIGHPD, SIMD_EMIT_BINARY },
> -	{ SN_InterleaveLow, OP_UNPACK_LOWPD, SIMD_EMIT_BINARY },
> -	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
> -	{ SN_Max, OP_MAXPD, SIMD_EMIT_BINARY },
> -	{ SN_Min, OP_MINPD, SIMD_EMIT_BINARY },
> +	{ SN_InterleaveHigh, OP_UNPACK_HIGHPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_InterleaveLow, OP_UNPACK_LOWPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED, SIMD_VERSION_SSE1 },
> +	{ SN_Max, OP_MAXPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_Min, OP_MINPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 },
> 	{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 },
> 	{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 },
> 	{ SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA },
> -	{ SN_Sqrt, OP_SQRTPD, SIMD_EMIT_UNARY },
> -	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE },
> -	{ SN_get_X, 0, SIMD_EMIT_GETTER_QWORD },
> -	{ SN_get_Y, 1, SIMD_EMIT_GETTER_QWORD },
> -	{ SN_op_Addition, OP_ADDPD, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseAnd, OP_ANDPD, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseOr, OP_ORPD, SIMD_EMIT_BINARY },
> -	{ SN_op_Division, OP_DIVPD, SIMD_EMIT_BINARY },
> -	{ SN_op_ExclusiveOr, OP_XORPD, SIMD_EMIT_BINARY },
> -	{ SN_op_Explicit, 0, SIMD_EMIT_CAST }, 
> -	{ SN_op_Multiply, OP_MULPD, SIMD_EMIT_BINARY },
> -	{ SN_op_Subtraction, OP_SUBPD, SIMD_EMIT_BINARY },
> -	{ SN_set_X, 0, SIMD_EMIT_SETTER },
> -	{ SN_set_Y, 1, SIMD_EMIT_SETTER },
> +	{ SN_Sqrt, OP_SQRTPD, SIMD_EMIT_UNARY, SIMD_VERSION_SSE1 },
> +	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE, SIMD_VERSION_SSE1 },
> +	{ SN_get_X, 0, SIMD_EMIT_GETTER_QWORD, SIMD_VERSION_SSE1 },
> +	{ SN_get_Y, 1, SIMD_EMIT_GETTER_QWORD, SIMD_VERSION_SSE1 },
> +	{ SN_op_Addition, OP_ADDPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseAnd, OP_ANDPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseOr, OP_ORPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Division, OP_DIVPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_ExclusiveOr, OP_XORPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Explicit, 0, SIMD_EMIT_CAST, SIMD_VERSION_SSE1 }, 
> +	{ SN_op_Multiply, OP_MULPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Subtraction, OP_SUBPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_set_X, 0, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_Y, 1, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> };
> 
> static const SimdIntrinsc vector2ul_intrinsics[] = {
> -	{ SN_ctor, OP_EXPAND_I8, SIMD_EMIT_CTOR },
> +	{ SN_ctor, OP_EXPAND_I8, SIMD_EMIT_CTOR, SIMD_VERSION_SSE1 },
> 	{ SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> -	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
> +	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED, SIMD_VERSION_SSE1 },
> 	{ SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 },
> 	{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 },
> 	{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 },
> 	{ SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA },
> -	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE },
> -	{ SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_EMIT_BINARY },
> -	{ SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_EMIT_BINARY },
> -	{ SN_get_X, 0, SIMD_EMIT_GETTER_QWORD },
> -	{ SN_get_Y, 1, SIMD_EMIT_GETTER_QWORD },
> -	{ SN_op_Addition, OP_PADDQ, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
> -	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY },
> -	{ SN_op_Explicit, 0, SIMD_EMIT_CAST },
> -	{ SN_op_LeftShift, OP_PSHLQ, SIMD_EMIT_SHIFT },
> -	{ SN_op_Multiply, OP_PMULQ, SIMD_EMIT_BINARY },
> -	{ SN_op_RightShift, OP_PSHRQ, SIMD_EMIT_SHIFT },
> -	{ SN_op_Subtraction, OP_PSUBQ, SIMD_EMIT_BINARY },
> -	{ SN_set_X, 0, SIMD_EMIT_SETTER },
> -	{ SN_set_Y, 1, SIMD_EMIT_SETTER },
> +	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_get_X, 0, SIMD_EMIT_GETTER_QWORD, SIMD_VERSION_SSE1 },
> +	{ SN_get_Y, 1, SIMD_EMIT_GETTER_QWORD, SIMD_VERSION_SSE1 },
> +	{ SN_op_Addition, OP_PADDQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Explicit, 0, SIMD_EMIT_CAST, SIMD_VERSION_SSE1 },
> +	{ SN_op_LeftShift, OP_PSHLQ, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_op_Multiply, OP_PMULQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_RightShift, OP_PSHRQ, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_op_Subtraction, OP_PSUBQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_set_X, 0, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_Y, 1, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> };
> 
> static const SimdIntrinsc vector2l_intrinsics[] = {
> -	{ SN_ctor, OP_EXPAND_I8, SIMD_EMIT_CTOR },
> +	{ SN_ctor, OP_EXPAND_I8, SIMD_EMIT_CTOR, SIMD_VERSION_SSE1 },
> 	{ SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> 	{ SN_CompareGreaterThan, OP_PCMPGTQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE42 },
> -	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
> -	{ SN_LogicalRightShift, OP_PSHRQ, SIMD_EMIT_SHIFT },
> +	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED, SIMD_VERSION_SSE1 },
> +	{ SN_LogicalRightShift, OP_PSHRQ, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> 	{ SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 },
> 	{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 },
> 	{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 },
> 	{ SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA },
> 	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE },
> -	{ SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_EMIT_BINARY },
> -	{ SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_EMIT_BINARY },
> -	{ SN_get_X, 0, SIMD_EMIT_GETTER_QWORD },
> -	{ SN_get_Y, 1, SIMD_EMIT_GETTER_QWORD },
> -	{ SN_op_Addition, OP_PADDQ, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
> -	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY },
> -	{ SN_op_Explicit, 0, SIMD_EMIT_CAST },
> -	{ SN_op_LeftShift, OP_PSHLQ, SIMD_EMIT_SHIFT },
> -	{ SN_op_Multiply, OP_PMULQ, SIMD_EMIT_BINARY },
> -	{ SN_op_Subtraction, OP_PSUBQ, SIMD_EMIT_BINARY },
> -	{ SN_set_X, 0, SIMD_EMIT_SETTER },
> -	{ SN_set_Y, 1, SIMD_EMIT_SETTER },
> +	{ SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_get_X, 0, SIMD_EMIT_GETTER_QWORD, SIMD_VERSION_SSE1 },
> +	{ SN_get_Y, 1, SIMD_EMIT_GETTER_QWORD, SIMD_VERSION_SSE1 },
> +	{ SN_op_Addition, OP_PADDQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Explicit, 0, SIMD_EMIT_CAST, SIMD_VERSION_SSE1 },
> +	{ SN_op_LeftShift, OP_PSHLQ, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_op_Multiply, OP_PMULQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Subtraction, OP_PSUBQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_set_X, 0, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_Y, 1, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> };
> 
> static const SimdIntrinsc vector4ui_intrinsics[] = {
> -	{ SN_ctor, OP_EXPAND_I4, SIMD_EMIT_CTOR },
> -	{ SN_ArithmeticRightShift, OP_PSARD, SIMD_EMIT_SHIFT },
> -	{ SN_CompareEqual, OP_PCMPEQD, SIMD_EMIT_BINARY },
> -	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
> +	{ SN_ctor, OP_EXPAND_I4, SIMD_EMIT_CTOR, SIMD_VERSION_SSE1 },
> +	{ SN_ArithmeticRightShift, OP_PSARD, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_CompareEqual, OP_PCMPEQD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED, SIMD_VERSION_SSE1 },
> 	{ SN_Max, OP_PMAXD_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> 	{ SN_Min, OP_PMIND_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> 	{ SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 },
> 	{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 },
> 	{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 },
> 	{ SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA },
> -	{ SN_Shuffle, OP_PSHUFLED, SIMD_EMIT_SHUFFLE },
> -	{ SN_SignedPackWithSignedSaturation, OP_PACKD, SIMD_EMIT_BINARY },
> +	{ SN_Shuffle, OP_PSHUFLED, SIMD_EMIT_SHUFFLE, SIMD_VERSION_SSE1 },
> +	{ SN_SignedPackWithSignedSaturation, OP_PACKD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_SignedPackWithUnsignedSaturation, OP_PACKD_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> -	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE },
> -	{ SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_EMIT_BINARY },
> -	{ SN_UnpackLow, OP_UNPACK_LOWD, SIMD_EMIT_BINARY },
> -	{ SN_get_W, 3, SIMD_EMIT_GETTER },
> -	{ SN_get_X, 0, SIMD_EMIT_GETTER },
> -	{ SN_get_Y, 1, SIMD_EMIT_GETTER },
> -	{ SN_get_Z, 2, SIMD_EMIT_GETTER },
> -	{ SN_op_Addition, OP_PADDD, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
> +	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackLow, OP_UNPACK_LOWD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_get_W, 3, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_X, 0, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_Y, 1, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_Z, 2, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_op_Addition, OP_PADDD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_op_Equality, OP_PCMPEQD, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
> -	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY },
> -	{ SN_op_Explicit, 0, SIMD_EMIT_CAST },
> +	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Explicit, 0, SIMD_EMIT_CAST, SIMD_VERSION_SSE1 },
> 	{ SN_op_Inequality, OP_PCMPEQD, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
> -	{ SN_op_LeftShift, OP_PSHLD, SIMD_EMIT_SHIFT },
> +	{ SN_op_LeftShift, OP_PSHLD, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> 	{ SN_op_Multiply, OP_PMULD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> -	{ SN_op_RightShift, OP_PSHRD, SIMD_EMIT_SHIFT },
> -	{ SN_op_Subtraction, OP_PSUBD, SIMD_EMIT_BINARY },
> -	{ SN_set_W, 3, SIMD_EMIT_SETTER },
> -	{ SN_set_X, 0, SIMD_EMIT_SETTER },
> -	{ SN_set_Y, 1, SIMD_EMIT_SETTER },
> -	{ SN_set_Z, 2, SIMD_EMIT_SETTER },
> +	{ SN_op_RightShift, OP_PSHRD, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_op_Subtraction, OP_PSUBD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_set_W, 3, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_X, 0, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_Y, 1, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_Z, 2, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> };
> 
> static const SimdIntrinsc vector4i_intrinsics[] = {
> -	{ SN_ctor, OP_EXPAND_I4, SIMD_EMIT_CTOR },
> -	{ SN_CompareEqual, OP_PCMPEQD, SIMD_EMIT_BINARY },
> -	{ SN_CompareGreaterThan, OP_PCMPGTD, SIMD_EMIT_BINARY },
> -	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
> -	{ SN_LogicalRightShift, OP_PSHRD, SIMD_EMIT_SHIFT },
> -	{ SN_Max, OP_PMAXD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> -	{ SN_Min, OP_PMIND, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> -	{ SN_PackWithSignedSaturation, OP_PACKD, SIMD_EMIT_BINARY },
> +	{ SN_ctor, OP_EXPAND_I4, SIMD_EMIT_CTOR, SIMD_VERSION_SSE1 },
> +	{ SN_CompareEqual, OP_PCMPEQD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_CompareGreaterThan, OP_PCMPGTD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED, SIMD_VERSION_SSE1 },
> +	{ SN_LogicalRightShift, OP_PSHRD, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_Max, OP_PMAXD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41, SIMD_VERSION_SSE1 },
> +	{ SN_Min, OP_PMIND, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41, SIMD_VERSION_SSE1 },
> +	{ SN_PackWithSignedSaturation, OP_PACKD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_PackWithUnsignedSaturation, OP_PACKD_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> 	{ SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 },
> 	{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 },
> 	{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 },
> 	{ SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA },
> -	{ SN_Shuffle, OP_PSHUFLED, SIMD_EMIT_SHUFFLE },
> -	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE },
> -	{ SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_EMIT_BINARY },
> -	{ SN_UnpackLow, OP_UNPACK_LOWD, SIMD_EMIT_BINARY },
> -	{ SN_get_W, 3, SIMD_EMIT_GETTER },
> -	{ SN_get_X, 0, SIMD_EMIT_GETTER },
> -	{ SN_get_Y, 1, SIMD_EMIT_GETTER },
> -	{ SN_get_Z, 2, SIMD_EMIT_GETTER },
> -	{ SN_op_Addition, OP_PADDD, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
> +	{ SN_Shuffle, OP_PSHUFLED, SIMD_EMIT_SHUFFLE, SIMD_VERSION_SSE1 },
> +	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackLow, OP_UNPACK_LOWD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_get_W, 3, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_X, 0, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_Y, 1, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_Z, 2, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_op_Addition, OP_PADDD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_op_Equality, OP_PCMPEQD, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
> -	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY },
> -	{ SN_op_Explicit, 0, SIMD_EMIT_CAST },
> +	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Explicit, 0, SIMD_EMIT_CAST, SIMD_VERSION_SSE1 },
> 	{ SN_op_Inequality, OP_PCMPEQD, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
> -	{ SN_op_LeftShift, OP_PSHLD, SIMD_EMIT_SHIFT },
> +	{ SN_op_LeftShift, OP_PSHLD, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> 	{ SN_op_Multiply, OP_PMULD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> -	{ SN_op_RightShift, OP_PSARD, SIMD_EMIT_SHIFT },
> -	{ SN_op_Subtraction, OP_PSUBD, SIMD_EMIT_BINARY },
> -	{ SN_set_W, 3, SIMD_EMIT_SETTER },
> -	{ SN_set_X, 0, SIMD_EMIT_SETTER },
> -	{ SN_set_Y, 1, SIMD_EMIT_SETTER },
> -	{ SN_set_Z, 2, SIMD_EMIT_SETTER },
> +	{ SN_op_RightShift, OP_PSARD, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_op_Subtraction, OP_PSUBD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_set_W, 3, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_X, 0, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_Y, 1, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_Z, 2, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> };
> 
> static const SimdIntrinsc vector8us_intrinsics[] = {
> -	{ SN_ctor, OP_EXPAND_I2, SIMD_EMIT_CTOR },
> -	{ SN_AddWithSaturation, OP_PADDW_SAT_UN, SIMD_EMIT_BINARY },
> -	{ SN_ArithmeticRightShift, OP_PSARW, SIMD_EMIT_SHIFT },
> -	{ SN_Average, OP_PAVGW_UN, SIMD_EMIT_BINARY },
> -	{ SN_CompareEqual, OP_PCMPEQW, SIMD_EMIT_BINARY },
> -	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
> -	{ SN_Max, OP_PMAXW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> -	{ SN_Min, OP_PMINW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> -	{ SN_MultiplyStoreHigh, OP_PMULW_HIGH_UN, SIMD_EMIT_BINARY },
> +	{ SN_ctor, OP_EXPAND_I2, SIMD_EMIT_CTOR, SIMD_VERSION_SSE1 },
> +	{ SN_AddWithSaturation, OP_PADDW_SAT_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_ArithmeticRightShift, OP_PSARW, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_Average, OP_PAVGW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_CompareEqual, OP_PCMPEQW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED, SIMD_VERSION_SSE1 },
> +	{ SN_Max, OP_PMAXW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41, SIMD_VERSION_SSE1 },
> +	{ SN_Min, OP_PMINW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41, SIMD_VERSION_SSE1 },
> +	{ SN_MultiplyStoreHigh, OP_PMULW_HIGH_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 },
> 	{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 },
> 	{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 },
> 	{ SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA },
> -	{ SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_EMIT_SHUFFLE },
> -	{ SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_EMIT_SHUFFLE },
> -	{ SN_SignedPackWithSignedSaturation, OP_PACKW, SIMD_EMIT_BINARY },
> -	{ SN_SignedPackWithUnsignedSaturation, OP_PACKW_UN, SIMD_EMIT_BINARY },
> -	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE },
> -	{ SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_EMIT_BINARY },
> -	{ SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_EMIT_BINARY },
> -	{ SN_UnpackLow, OP_UNPACK_LOWW, SIMD_EMIT_BINARY },
> -	{ SN_get_V0, 0, SIMD_EMIT_GETTER },
> -	{ SN_get_V1, 1, SIMD_EMIT_GETTER },
> -	{ SN_get_V2, 2, SIMD_EMIT_GETTER },
> -	{ SN_get_V3, 3, SIMD_EMIT_GETTER },
> -	{ SN_get_V4, 4, SIMD_EMIT_GETTER },
> -	{ SN_get_V5, 5, SIMD_EMIT_GETTER },
> -	{ SN_get_V6, 6, SIMD_EMIT_GETTER },
> -	{ SN_get_V7, 7, SIMD_EMIT_GETTER },
> -	{ SN_op_Addition, OP_PADDW, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
> +	{ SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_EMIT_SHUFFLE, SIMD_VERSION_SSE1 },
> +	{ SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_EMIT_SHUFFLE, SIMD_VERSION_SSE1 },
> +	{ SN_SignedPackWithSignedSaturation, OP_PACKW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_SignedPackWithUnsignedSaturation, OP_PACKW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE, SIMD_VERSION_SSE1 },
> +	{ SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackLow, OP_UNPACK_LOWW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_get_V0, 0, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V1, 1, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V2, 2, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V3, 3, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V4, 4, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V5, 5, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V6, 6, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V7, 7, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_op_Addition, OP_PADDW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_op_Equality, OP_PCMPEQW, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
> -	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY },
> -	{ SN_op_Explicit, 0, SIMD_EMIT_CAST },
> +	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Explicit, 0, SIMD_EMIT_CAST, SIMD_VERSION_SSE1 },
> 	{ SN_op_Inequality, OP_PCMPEQW, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
> -	{ SN_op_LeftShift, OP_PSHLW, SIMD_EMIT_SHIFT },
> -	{ SN_op_Multiply, OP_PMULW, SIMD_EMIT_BINARY },
> -	{ SN_op_RightShift, OP_PSHRW, SIMD_EMIT_SHIFT },
> -	{ SN_op_Subtraction, OP_PSUBW, SIMD_EMIT_BINARY },
> -	{ SN_set_V0, 0, SIMD_EMIT_SETTER },
> -	{ SN_set_V1, 1, SIMD_EMIT_SETTER },
> -	{ SN_set_V2, 2, SIMD_EMIT_SETTER },
> -	{ SN_set_V3, 3, SIMD_EMIT_SETTER },
> -	{ SN_set_V4, 4, SIMD_EMIT_SETTER },
> -	{ SN_set_V5, 5, SIMD_EMIT_SETTER },
> -	{ SN_set_V6, 6, SIMD_EMIT_SETTER },
> -	{ SN_set_V7, 7, SIMD_EMIT_SETTER },
> +	{ SN_op_LeftShift, OP_PSHLW, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_op_Multiply, OP_PMULW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_RightShift, OP_PSHRW, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_op_Subtraction, OP_PSUBW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_set_V0, 0, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V1, 1, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V2, 2, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V3, 3, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V4, 4, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V5, 5, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V6, 6, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V7, 7, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> };
> 
> static const SimdIntrinsc vector8s_intrinsics[] = {
> -	{ SN_ctor, OP_EXPAND_I2, SIMD_EMIT_CTOR },
> -	{ SN_AddWithSaturation, OP_PADDW_SAT, SIMD_EMIT_BINARY },
> -	{ SN_CompareEqual, OP_PCMPEQW, SIMD_EMIT_BINARY },
> -	{ SN_CompareGreaterThan, OP_PCMPGTW, SIMD_EMIT_BINARY },
> -	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
> -	{ SN_LogicalRightShift, OP_PSHRW, SIMD_EMIT_SHIFT },
> -	{ SN_Max, OP_PMAXW, SIMD_EMIT_BINARY },
> -	{ SN_Min, OP_PMINW, SIMD_EMIT_BINARY },
> -	{ SN_MultiplyStoreHigh, OP_PMULW_HIGH, SIMD_EMIT_BINARY },
> -	{ SN_PackWithSignedSaturation, OP_PACKW, SIMD_EMIT_BINARY },
> -	{ SN_PackWithUnsignedSaturation, OP_PACKW_UN, SIMD_EMIT_BINARY },
> +	{ SN_ctor, OP_EXPAND_I2, SIMD_EMIT_CTOR, SIMD_VERSION_SSE1 },
> +	{ SN_AddWithSaturation, OP_PADDW_SAT, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_CompareEqual, OP_PCMPEQW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_CompareGreaterThan, OP_PCMPGTW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED, SIMD_VERSION_SSE1 },
> +	{ SN_LogicalRightShift, OP_PSHRW, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_Max, OP_PMAXW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_Min, OP_PMINW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_MultiplyStoreHigh, OP_PMULW_HIGH, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_PackWithSignedSaturation, OP_PACKW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_PackWithUnsignedSaturation, OP_PACKW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 },
> 	{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 },
> 	{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 },
> 	{ SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA },
> -	{ SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_EMIT_SHUFFLE },
> -	{ SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_EMIT_SHUFFLE },
> -	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE },
> -	{ SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_EMIT_BINARY },
> -	{ SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_EMIT_BINARY },
> -	{ SN_UnpackLow, OP_UNPACK_LOWW, SIMD_EMIT_BINARY },
> -	{ SN_get_V0, 0, SIMD_EMIT_GETTER },
> -	{ SN_get_V1, 1, SIMD_EMIT_GETTER },
> -	{ SN_get_V2, 2, SIMD_EMIT_GETTER },
> -	{ SN_get_V3, 3, SIMD_EMIT_GETTER },
> -	{ SN_get_V4, 4, SIMD_EMIT_GETTER },
> -	{ SN_get_V5, 5, SIMD_EMIT_GETTER },
> -	{ SN_get_V6, 6, SIMD_EMIT_GETTER },
> -	{ SN_get_V7, 7, SIMD_EMIT_GETTER },
> -	{ SN_op_Addition, OP_PADDW, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
> +	{ SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_EMIT_SHUFFLE, SIMD_VERSION_SSE1 },
> +	{ SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_EMIT_SHUFFLE, SIMD_VERSION_SSE1 },
> +	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE, SIMD_VERSION_SSE1 },
> +	{ SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackLow, OP_UNPACK_LOWW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_get_V0, 0, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V1, 1, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V2, 2, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V3, 3, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V4, 4, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V5, 5, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V6, 6, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V7, 7, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_op_Addition, OP_PADDW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_op_Equality, OP_PCMPEQW, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
> -	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY },
> -	{ SN_op_Explicit, 0, SIMD_EMIT_CAST },
> +	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Explicit, 0, SIMD_EMIT_CAST, SIMD_VERSION_SSE1 },
> 	{ SN_op_Inequality, OP_PCMPEQW, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
> -	{ SN_op_LeftShift, OP_PSHLW, SIMD_EMIT_SHIFT },
> -	{ SN_op_Multiply, OP_PMULW, SIMD_EMIT_BINARY },
> -	{ SN_op_RightShift, OP_PSARW, SIMD_EMIT_SHIFT },
> -	{ SN_op_Subtraction, OP_PSUBW, SIMD_EMIT_BINARY },
> -	{ SN_set_V0, 0, SIMD_EMIT_SETTER },
> -	{ SN_set_V1, 1, SIMD_EMIT_SETTER },
> -	{ SN_set_V2, 2, SIMD_EMIT_SETTER },
> -	{ SN_set_V3, 3, SIMD_EMIT_SETTER },
> -	{ SN_set_V4, 4, SIMD_EMIT_SETTER },
> -	{ SN_set_V5, 5, SIMD_EMIT_SETTER },
> -	{ SN_set_V6, 6, SIMD_EMIT_SETTER },
> -	{ SN_set_V7, 7, SIMD_EMIT_SETTER },
> +	{ SN_op_LeftShift, OP_PSHLW, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_op_Multiply, OP_PMULW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_RightShift, OP_PSARW, SIMD_EMIT_SHIFT, SIMD_VERSION_SSE1 },
> +	{ SN_op_Subtraction, OP_PSUBW, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_set_V0, 0, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V1, 1, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V2, 2, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V3, 3, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V4, 4, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V5, 5, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V6, 6, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V7, 7, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> };
> 
> static const SimdIntrinsc vector16b_intrinsics[] = {
> -	{ SN_ctor, OP_EXPAND_I1, SIMD_EMIT_CTOR },
> -	{ SN_AddWithSaturation, OP_PADDB_SAT_UN, SIMD_EMIT_BINARY },
> -	{ SN_Average, OP_PAVGB_UN, SIMD_EMIT_BINARY },
> -	{ SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY },
> -	{ SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK },
> -	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
> -	{ SN_Max, OP_PMAXB_UN, SIMD_EMIT_BINARY },
> -	{ SN_Min, OP_PMINB_UN, SIMD_EMIT_BINARY },
> +	{ SN_ctor, OP_EXPAND_I1, SIMD_EMIT_CTOR, SIMD_VERSION_SSE1 },
> +	{ SN_AddWithSaturation, OP_PADDB_SAT_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_Average, OP_PAVGB_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK, SIMD_VERSION_SSE1 },
> +	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED, SIMD_VERSION_SSE1 },
> +	{ SN_Max, OP_PMAXB_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_Min, OP_PMINB_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 },
> 	{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 },
> 	{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 },
> 	{ SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA },
> -	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE },
> -	{ SN_SubtractWithSaturation, OP_PSUBB_SAT_UN, SIMD_EMIT_BINARY },
> -	{ SN_SumOfAbsoluteDifferences, OP_PSUM_ABS_DIFF, SIMD_EMIT_BINARY },
> -	{ SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_EMIT_BINARY },
> -	{ SN_UnpackLow, OP_UNPACK_LOWB, SIMD_EMIT_BINARY },
> -	{ SN_get_V0, 0, SIMD_EMIT_GETTER },
> -	{ SN_get_V1, 1, SIMD_EMIT_GETTER },
> -	{ SN_get_V10, 10, SIMD_EMIT_GETTER },
> -	{ SN_get_V11, 11, SIMD_EMIT_GETTER },
> -	{ SN_get_V12, 12, SIMD_EMIT_GETTER },
> -	{ SN_get_V13, 13, SIMD_EMIT_GETTER },
> -	{ SN_get_V14, 14, SIMD_EMIT_GETTER },
> -	{ SN_get_V15, 15, SIMD_EMIT_GETTER },
> -	{ SN_get_V2, 2, SIMD_EMIT_GETTER },
> -	{ SN_get_V3, 3, SIMD_EMIT_GETTER },
> -	{ SN_get_V4, 4, SIMD_EMIT_GETTER },
> -	{ SN_get_V5, 5, SIMD_EMIT_GETTER },
> -	{ SN_get_V6, 6, SIMD_EMIT_GETTER },
> -	{ SN_get_V7, 7, SIMD_EMIT_GETTER },
> -	{ SN_get_V8, 8, SIMD_EMIT_GETTER },
> -	{ SN_get_V9, 9, SIMD_EMIT_GETTER },
> -	{ SN_op_Addition, OP_PADDB, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
> +	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE, SIMD_VERSION_SSE1 },
> +	{ SN_SubtractWithSaturation, OP_PSUBB_SAT_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_SumOfAbsoluteDifferences, OP_PSUM_ABS_DIFF, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackLow, OP_UNPACK_LOWB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_get_V0, 0, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V1, 1, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V10, 10, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V11, 11, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V12, 12, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V13, 13, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V14, 14, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V15, 15, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V2, 2, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V3, 3, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V4, 4, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V5, 5, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V6, 6, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V7, 7, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V8, 8, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V9, 9, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_op_Addition, OP_PADDB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_op_Equality, OP_PCMPEQB, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
> -	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY },
> -	{ SN_op_Explicit, 0, SIMD_EMIT_CAST },
> +	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Explicit, 0, SIMD_EMIT_CAST, SIMD_VERSION_SSE1 },
> 	{ SN_op_Inequality, OP_PCMPEQB, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
> -	{ SN_op_Subtraction, OP_PSUBB, SIMD_EMIT_BINARY },
> -	{ SN_set_V0, 0, SIMD_EMIT_SETTER },
> -	{ SN_set_V1, 1, SIMD_EMIT_SETTER },
> -	{ SN_set_V10, 10, SIMD_EMIT_SETTER },
> -	{ SN_set_V11, 11, SIMD_EMIT_SETTER },
> -	{ SN_set_V12, 12, SIMD_EMIT_SETTER },
> -	{ SN_set_V13, 13, SIMD_EMIT_SETTER },
> -	{ SN_set_V14, 14, SIMD_EMIT_SETTER },
> -	{ SN_set_V15, 15, SIMD_EMIT_SETTER },
> -	{ SN_set_V2, 2, SIMD_EMIT_SETTER },
> -	{ SN_set_V3, 3, SIMD_EMIT_SETTER },
> -	{ SN_set_V4, 4, SIMD_EMIT_SETTER },
> -	{ SN_set_V5, 5, SIMD_EMIT_SETTER },
> -	{ SN_set_V6, 6, SIMD_EMIT_SETTER },
> -	{ SN_set_V7, 7, SIMD_EMIT_SETTER },
> -	{ SN_set_V8, 8, SIMD_EMIT_SETTER },
> -	{ SN_set_V9, 9, SIMD_EMIT_SETTER },
> +	{ SN_op_Subtraction, OP_PSUBB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_set_V0, 0, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V1, 1, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V10, 10, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V11, 11, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V12, 12, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V13, 13, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V14, 14, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V15, 15, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V2, 2, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V3, 3, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V4, 4, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V5, 5, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V6, 6, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V7, 7, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V8, 8, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V9, 9, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> };
> 
> /*
> @@ -514,62 +514,62 @@
> setters
>  */
> static const SimdIntrinsc vector16sb_intrinsics[] = {
> -	{ SN_ctor, OP_EXPAND_I1, SIMD_EMIT_CTOR },
> -	{ SN_AddWithSaturation, OP_PADDB_SAT, SIMD_EMIT_BINARY },
> -	{ SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY },
> -	{ SN_CompareGreaterThan, OP_PCMPGTB, SIMD_EMIT_BINARY },
> -	{ SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK },
> -	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
> +	{ SN_ctor, OP_EXPAND_I1, SIMD_EMIT_CTOR, SIMD_VERSION_SSE1 },
> +	{ SN_AddWithSaturation, OP_PADDB_SAT, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_CompareGreaterThan, OP_PCMPGTB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK, SIMD_VERSION_SSE1 },
> +	{ SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED, SIMD_VERSION_SSE1 },
> 	{ SN_Max, OP_PMAXB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> 	{ SN_Min, OP_PMINB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
> 	{ SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 },
> 	{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 },
> 	{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 },
> 	{ SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA },
> -	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE },
> -	{ SN_SubtractWithSaturation, OP_PSUBB_SAT, SIMD_EMIT_BINARY },
> -	{ SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_EMIT_BINARY },
> -	{ SN_UnpackLow, OP_UNPACK_LOWB, SIMD_EMIT_BINARY },
> -	{ SN_get_V0, 0, SIMD_EMIT_GETTER },
> -	{ SN_get_V1, 1, SIMD_EMIT_GETTER },
> -	{ SN_get_V10, 10, SIMD_EMIT_GETTER },
> -	{ SN_get_V11, 11, SIMD_EMIT_GETTER },
> -	{ SN_get_V12, 12, SIMD_EMIT_GETTER },
> -	{ SN_get_V13, 13, SIMD_EMIT_GETTER },
> -	{ SN_get_V14, 14, SIMD_EMIT_GETTER },
> -	{ SN_get_V15, 15, SIMD_EMIT_GETTER },
> -	{ SN_get_V2, 2, SIMD_EMIT_GETTER },
> -	{ SN_get_V3, 3, SIMD_EMIT_GETTER },
> -	{ SN_get_V4, 4, SIMD_EMIT_GETTER },
> -	{ SN_get_V5, 5, SIMD_EMIT_GETTER },
> -	{ SN_get_V6, 6, SIMD_EMIT_GETTER },
> -	{ SN_get_V7, 7, SIMD_EMIT_GETTER },
> -	{ SN_get_V8, 8, SIMD_EMIT_GETTER },
> -	{ SN_get_V9, 9, SIMD_EMIT_GETTER },
> -	{ SN_op_Addition, OP_PADDB, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
> -	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
> +	{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE, SIMD_VERSION_SSE1 },
> +	{ SN_SubtractWithSaturation, OP_PSUBB_SAT, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_UnpackLow, OP_UNPACK_LOWB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_get_V0, 0, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V1, 1, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V10, 10, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V11, 11, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V12, 12, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V13, 13, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V14, 14, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V15, 15, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V2, 2, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V3, 3, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V4, 4, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V5, 5, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V6, 6, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V7, 7, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V8, 8, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_get_V9, 9, SIMD_EMIT_GETTER, SIMD_VERSION_SSE1 },
> +	{ SN_op_Addition, OP_PADDB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> 	{ SN_op_Equality, OP_PCMPEQB, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
> -	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY },
> -	{ SN_op_Explicit, 0, SIMD_EMIT_CAST },
> +	{ SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_op_Explicit, 0, SIMD_EMIT_CAST, SIMD_VERSION_SSE1 },
> 	{ SN_op_Inequality, OP_PCMPEQB, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
> -	{ SN_op_Subtraction, OP_PSUBB, SIMD_EMIT_BINARY },
> -	{ SN_set_V0, 0, SIMD_EMIT_SETTER },
> -	{ SN_set_V1, 1, SIMD_EMIT_SETTER },
> -	{ SN_set_V10, 10, SIMD_EMIT_SETTER },
> -	{ SN_set_V11, 11, SIMD_EMIT_SETTER },
> -	{ SN_set_V12, 12, SIMD_EMIT_SETTER },
> -	{ SN_set_V13, 13, SIMD_EMIT_SETTER },
> -	{ SN_set_V14, 14, SIMD_EMIT_SETTER },
> -	{ SN_set_V15, 15, SIMD_EMIT_SETTER },
> -	{ SN_set_V2, 2, SIMD_EMIT_SETTER },
> -	{ SN_set_V3, 3, SIMD_EMIT_SETTER },
> -	{ SN_set_V4, 4, SIMD_EMIT_SETTER },
> -	{ SN_set_V5, 5, SIMD_EMIT_SETTER },
> -	{ SN_set_V6, 6, SIMD_EMIT_SETTER },
> -	{ SN_set_V7, 7, SIMD_EMIT_SETTER },
> -	{ SN_set_V8, 8, SIMD_EMIT_SETTER },
> -	{ SN_set_V9, 9, SIMD_EMIT_SETTER },
> +	{ SN_op_Subtraction, OP_PSUBB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
> +	{ SN_set_V0, 0, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V1, 1, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V10, 10, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V11, 11, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V12, 12, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V13, 13, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V14, 14, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V15, 15, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V2, 2, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V3, 3, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V4, 4, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V5, 5, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V6, 6, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V7, 7, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V8, 8, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> +	{ SN_set_V9, 9, SIMD_EMIT_SETTER, SIMD_VERSION_SSE1 },
> };
> 
> static guint32 simd_supported_versions;
> @@ -1381,9 +1381,16 @@
> 			mono_print_ins (args [i]);
> 		}
> 	}
> -	if (result->simd_version && !((1 << result->simd_version) & simd_supported_versions)) {
> -		if (IS_DEBUG_ON (cfg))
> -			printf ("function %s::%s/%d requires unsuported SIMD instruction set %s \n", cmethod->klass->name, cmethod->name, fsig->param_count, simd_version_name (result->simd_version));
> +	if (result->simd_version_flags && !(result->simd_version_flags & simd_supported_versions)) {
> +		if (IS_DEBUG_ON (cfg)) {
> +			int x;
> +			printf ("function %s::%s/%d requires one of unsuported SIMD instruction set(s): ", cmethod->klass->name, cmethod->name, fsig->param_count);
> +			for (x = 1; x & SIMD_VERSION_ALL; x <<= 1)
> +				if (result->simd_version_flags & x)
> +					printf ("%s ", simd_version_name (x));
> +
> +			printf ("\n");
> +		}
> 		return NULL;
> 	}
> 
> Index: mono/mini/mini.h
> ===================================================================
> --- mono/mini/mini.h	(revision 149699)
> +++ mono/mini/mini.h	(working copy)
> @@ -1918,18 +1918,23 @@
> 
> /*
> This enum MUST be kept in sync with its managed mirror Mono.Simd.AccelMode.
> -The AccelMode values are masks while the ones here are the bit indexes.
>  */
> enum {
> -	SIMD_VERSION_SSE1	= 0,
> -	SIMD_VERSION_SSE2	= 1,
> -	SIMD_VERSION_SSE3	= 2,
> -	SIMD_VERSION_SSSE3	= 3,
> -	SIMD_VERSION_SSE41	= 4,
> -	SIMD_VERSION_SSE42	= 5,
> -	SIMD_VERSION_SSE4a	= 6,
> +	SIMD_VERSION_SSE1	= 1 << 0,
> +	SIMD_VERSION_SSE2	= 1 << 1,
> +	SIMD_VERSION_SSE3	= 1 << 2,
> +	SIMD_VERSION_SSSE3	= 1 << 3,
> +	SIMD_VERSION_SSE41	= 1 << 4,
> +	SIMD_VERSION_SSE42	= 1 << 5,
> +	SIMD_VERSION_SSE4a	= 1 << 6,
> +	SIMD_VERSION_ALL	= SIMD_VERSION_SSE1 | SIMD_VERSION_SSE2 |
> +			  SIMD_VERSION_SSE3 | SIMD_VERSION_SSSE3 |
> +			  SIMD_VERSION_SSE41 | SIMD_VERSION_SSE42 |
> +			  SIMD_VERSION_SSE4a 
> };
> 
> +#define MASK(x) (1 << x)
> +
> enum {
> 	SIMD_COMP_EQ,
> 	SIMD_COMP_LT,
> Index: mono/mini/mini-amd64.c
> ===================================================================
> --- mono/mini/mini-amd64.c	(revision 149699)
> +++ mono/mini/mini-amd64.c	(working copy)
> @@ -966,17 +966,17 @@
> 
> 	if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
> 		if (edx & (1 << 25))
> -			sse_opts |= 1 << SIMD_VERSION_SSE1;
> +			sse_opts |= SIMD_VERSION_SSE1;
> 		if (edx & (1 << 26))
> -			sse_opts |= 1 << SIMD_VERSION_SSE2;
> +			sse_opts |= SIMD_VERSION_SSE2;
> 		if (ecx & (1 << 0))
> -			sse_opts |= 1 << SIMD_VERSION_SSE3;
> +			sse_opts |= SIMD_VERSION_SSE3;
> 		if (ecx & (1 << 9))
> -			sse_opts |= 1 << SIMD_VERSION_SSSE3;
> +			sse_opts |= SIMD_VERSION_SSSE3;
> 		if (ecx & (1 << 19))
> -			sse_opts |= 1 << SIMD_VERSION_SSE41;
> +			sse_opts |= SIMD_VERSION_SSE41;
> 		if (ecx & (1 << 20))
> -			sse_opts |= 1 << SIMD_VERSION_SSE42;
> +			sse_opts |= SIMD_VERSION_SSE42;
> 	}
> 
> 	/* Yes, all this needs to be done to check for sse4a.
> @@ -987,7 +987,7 @@
> 		if ((((unsigned int) eax) >= 0x80000001) && (ebx == 0x68747541) && (ecx == 0x444D4163) && (edx == 0x69746E65)) {
> 			cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
> 			if (ecx & (1 << 6))
> -				sse_opts |= 1 << SIMD_VERSION_SSE4a;
> +				sse_opts |= SIMD_VERSION_SSE4a;
> 		}
> 	}
> 
> Index: mono/mini/mini-x86.c
> ===================================================================
> --- mono/mini/mini-x86.c	(revision 149699)
> +++ mono/mini/mini-x86.c	(working copy)
> @@ -733,17 +733,17 @@
> 
> 	if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
> 		if (edx & (1 << 25))
> -			sse_opts |= 1 << SIMD_VERSION_SSE1;
> +			sse_opts |= SIMD_VERSION_SSE1;
> 		if (edx & (1 << 26))
> -			sse_opts |= 1 << SIMD_VERSION_SSE2;
> +			sse_opts |= SIMD_VERSION_SSE2;
> 		if (ecx & (1 << 0))
> -			sse_opts |= 1 << SIMD_VERSION_SSE3;
> +			sse_opts |= SIMD_VERSION_SSE3;
> 		if (ecx & (1 << 9))
> -			sse_opts |= 1 << SIMD_VERSION_SSSE3;
> +			sse_opts |= SIMD_VERSION_SSSE3;
> 		if (ecx & (1 << 19))
> -			sse_opts |= 1 << SIMD_VERSION_SSE41;
> +			sse_opts |= SIMD_VERSION_SSE41;
> 		if (ecx & (1 << 20))
> -			sse_opts |= 1 << SIMD_VERSION_SSE42;
> +			sse_opts |= SIMD_VERSION_SSE42;
> 	}
> 
> 	/* Yes, all this needs to be done to check for sse4a.
> @@ -754,7 +754,7 @@
> 		if ((((unsigned int) eax) >= 0x80000001) && (ebx == 0x68747541) && (ecx == 0x444D4163) && (edx == 0x69746E65)) {
> 			cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
> 			if (ecx & (1 << 6))
> -				sse_opts |= 1 << SIMD_VERSION_SSE4a;
> +				sse_opts |= SIMD_VERSION_SSE4a;
> 		}
> 	}
> 
> _______________________________________________
> Mono-devel-list mailing list
> Mono-devel-list at lists.ximian.com
> http://lists.ximian.com/mailman/listinfo/mono-devel-list



More information about the Mono-devel-list mailing list