[Mono-dev] sse_mathfun convert
jetthink
jetthink at gmail.com
Thu Oct 1 11:23:11 EDT 2009
Hi,
I have converted exp_ps(from http://gruntthepeon.free.fr/ssemath/) to
Mono.
using System;
using Mono.Simd;
public static class Myext{
public static unsafe Vector4i LogicalLeftShift(this Vector4i v1, int
amount)
{
Vector4i res = new Vector4i();
int* a = (int*)&v1;
int* b =(int*)&res;
for (int i = 0; i < 4; ++i)
*b++ = (int)((uint)(*a++) << amount);
return res;
}
public static unsafe Vector4ui LogicalLeftShift(this Vector4ui v1, int
amount)
{
Vector4ui res = new Vector4ui();
uint* a = (uint*)&v1;
uint* b =(uint*)&res;
for (int i = 0; i < 4; ++i)
*b++ = ((uint)(*a++) << amount);
return res;
}
public static unsafe Vector4f Cast2Vector4f(this Vector4i v1)
{
Vector4f res = new Vector4f();
int* a = (int*)&v1;
float* b = (float*)&res;
for (int i = 0; i < 4; ++i)
*b++ = ((float)(*a++));
return res;
}
public static unsafe Vector4f Cast2Vector4f(this Vector4ui v1)
{
Vector4f res = new Vector4f();
uint* a = (uint*)&v1;
float* b = (float*)&res;
for (int i = 0; i < 4; ++i)
*b++ = ((float)(*a++));
return res;
}
public static unsafe Vector4i Cast2Vector4i(this Vector4f v1)
{
Vector4i res = new Vector4i();
float* a = (float*)&v1;
int* b = (int*)&res;
for (int i = 0; i < 4; ++i)
*b++ = ((int)(*a++));
return res;
}
public static unsafe Vector4ui Cast2Vector4ui(this Vector4f v1)
{
Vector4ui res = new Vector4ui();
float* a = (float*)&v1;
uint* b = (uint*)&res;
for (int i = 0; i < 4; ++i)
*b++ = ((uint)(*a++));
return res;
}
static Vector4f v4sf_0p5 = new Vector4f(0.5f);
static Vector4ui v4sui_0x7f = new Vector4ui(0x7f);
static Vector4i v4si_0x7f = new Vector4i(0x7f);
static Vector4f v4sf_one = Vector4f.One;
static Vector4f v4sf_exp_hi = new Vector4f(88.3762626647949f);
static Vector4f v4sf_exp_lo = new Vector4f(-88.3762626647949f);
static Vector4f v4sf_cephes_LOG2EF = new Vector4f(1.44269504088896341f);
static Vector4f v4sf_cephes_exp_C1 = new Vector4f(0.693359375f);
static Vector4f v4sf_cephes_exp_C2 = new Vector4f(-2.12194440e-4f);
static Vector4f v4sf_cephes_exp_p0 = new Vector4f(1.9875691500E-4f);
static Vector4f v4sf_cephes_exp_p1 = new Vector4f(1.3981999507E-3f);
static Vector4f v4sf_cephes_exp_p2 = new Vector4f(8.3334519073E-3f);
static Vector4f v4sf_cephes_exp_p3 = new Vector4f(4.1665795894E-2f);
static Vector4f v4sf_cephes_exp_p4 = new Vector4f(1.6666665459E-1f);
static Vector4f v4sf_cephes_exp_p5 = new Vector4f(5.0000001201E-1f);
public static Vector4f ExpSSE(Vector4f x)
{
//Vector4f tmp = Vector4f.Zero;
Vector4f fx = Vector4f.Zero;
Vector4i emm0;
x = VectorOperations.Min(x, v4sf_exp_hi);
x = VectorOperations.Max(x, v4sf_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */
fx = x * v4sf_cephes_LOG2EF;
fx = fx + v4sf_0p5;
//Console.WriteLine(fx);
/* how to perform a floorf with SSE: just below */
//Console.WriteLine(fx);
emm0 = Cast2Vector4i(fx);
//Console.WriteLine(emm0);
Vector4f tmp = Cast2Vector4f(emm0);
//Console.WriteLine(tmp);
//Vector4f tmp = new Vector4f(Math.Truncate(fx.X),
Math.Truncate(fx.Y), Math.Truncate(fx.Z), Math.Truncate(fx.W));
/* if greater, substract 1 */
Vector4f mask = VectorOperations.CompareLessEqual(fx, tmp);
mask = mask & v4sf_one;
fx = tmp - mask;
//Console.WriteLine(fx);
tmp = fx * v4sf_cephes_exp_C1;
//Console.WriteLine("tmp:{0}",tmp);
Vector4f z = fx * v4sf_cephes_exp_C2;
x = x - tmp;
x = x - z;
//Console.WriteLine("x:{0}",x);
z = x * x;
Vector4f y = v4sf_cephes_exp_p0;
y = y * x;
y = y + v4sf_cephes_exp_p1;
y = y * x;
y = y + v4sf_cephes_exp_p2;
y = y * x;
y = y + v4sf_cephes_exp_p3;
y = y * x;
y = y + v4sf_cephes_exp_p4;
y = y * x;
y = y + v4sf_cephes_exp_p5;
y = y * z;
y = y + x;
y = y + v4sf_one;
//Console.WriteLine("y:{0}",y);
/* build 2^n */
//Console.WriteLine(fx);
//Vector4ui emm1 = Cast2Vector4ui(fx);
emm0 = Cast2Vector4i(fx);
//Console.WriteLine("ui:{0}", Cast2Vector4ui(fx));
//Console.WriteLine("i:{0}",Cast2Vector4i(fx));
//emm1 = emm1 + v4sui_0x7f;
emm0 = emm0 +v4si_0x7f;
//Console.WriteLine(emm1);
//emm1 = LogicalLeftShift(emm1, 23);
emm0 = LogicalLeftShift(emm0, 23);
//Console.WriteLine(emm1);
//Vector4f pow2n = (Vector4f)emm1;
Vector4f pow2n = (Vector4f)emm0;
//Console.WriteLine("pow2n:{0}",pow2n);
//Console.WriteLine("pow2n:{0}",);
y = y * pow2n;
return y;
}
}
public class SampleRuntimeDetection
{
public static void Main()
{
Vector4f x = new Vector4f(1f, -2f, 0.5f, 0);
Vector4f z= new Vector4f(1f, -2f, 0.5f, 0);
double uz=0;
DateTime start_at=DateTime.Now;
for(int i=0;i<40000000;i++)
uz=Math.Exp(1);
Console.WriteLine("Math.exp:{0}", DateTime.Now-start_at);
Console.WriteLine(uz);
start_at=DateTime.Now;
for(int i=0;i<10000000;i++)
z = Myext.ExpSSE(x);
Console.WriteLine("expSSE:{0}", DateTime.Now-start_at);
//Console.WriteLine(y);
//Console.WriteLine(f[0]);
Console.WriteLine(z);
}
}
gmcs -unsafe -r:Mono.Simd.dll exp.cs && mono exp.exe
exp.cs(63,22): warning CS0414: The private field `Myext.v4sui_0x7f' is
assigned
but its value is never used
Compilation succeeded - 1 warning(s)
SSE1, SSE2, SSE3, SSSE3
Math.exp:00:00:05.1405921
2.71828182845905
expSSE:00:00:01.9999872
<2.718282, 0.1353353, 1.648721, 1>
I only convert exp function. expSSE is 2.6 times faster than Math.exp. But
on origin c version,
sse_mathfun_test.exe show sse verion four times faster than none sse
version. And I also test
Math.exp on .net 3.5 sp1, the speed is as fastas my expSSE on mono.
I dont know what problem cause mono sse exp slow.
Thanks,
Jet
--
View this message in context: http://www.nabble.com/sse_mathfun-convert-tp25696934p25696934.html
Sent from the Mono - Dev mailing list archive at Nabble.com.
More information about the Mono-devel-list
mailing list