[Mono-dev] Mono.SIMD
Alan McGovern
alan.mcgovern at gmail.com
Mon Feb 23 08:32:56 EST 2009
Hey,
The C++ code seems very similar to the C# SIMD code, so I don't know what
would make the C# version any faster. This question would be best directed
at jit guys, who may know what causes the difference.
If you want to try speeding up the mono version, you should just use trial
and error to see if you can rewrite things so that you can get better
performance. For example, unrolling the loop may improve performance
noticably.
Alan.
On Mon, Feb 23, 2009 at 1:16 PM, Johann Nadalutti <jnadalutti at gmail.com>wrote:
> Hey,
> thanks a lot for your modifications.
> I have now SIMD x3 faster than 4DFloat version !
> I make the same code in C++ and It's x3 more faster than Mono.SIMD.
> I just want to know why and how to optimize my Mono code.
> What do you use as IDE to develop and debug Mono ?
>
>
> My Visual C++ code for test:
>
> class VectorSIMD
> {
> public:
>
> VectorSIMD();
> VectorSIMD(float x, float y, float z, float w);
>
> VectorSIMD operator*(const VectorSIMD& other)
> {
> VectorSIMD r;
> r.vec = _mm_mul_ps(vec, other.vec);
> return r;
> }
>
> VectorSIMD operator*(float f)
> {
> VectorSIMD r;
> __m128 b = _mm_load1_ps(&f);
> r.vec = _mm_mul_ps(vec, b);
> return r;
> }
>
>
> VectorSIMD operator+(const VectorSIMD& other)
> {
> VectorSIMD r;
> r.vec = _mm_add_ps(vec, other.vec);
> return r;
> }
>
> //Datas
> union
> {
> __m128 vec;
> struct { float x, y, z, w; };
> };
>
> };
>
> VectorSIMD::VectorSIMD()
> {
> }
>
> VectorSIMD::VectorSIMD(float _x, float _y, float _z, float _w)
> {
> x=_x; y=_y; z=_z; w=_w;
> }
>
>
> VectorSIMD GradientSIMD()
> {
> VectorSIMD finv_WH(1.0f / (_W*_H), 1.0f / (_W*_H), 1.0f / (_W*_H), 1.0f /
> (_W*_H));
> VectorSIMD ret(0.0, 0.0, 0.0, 0.0);
>
> VectorSIMD a(0.0f, 0.0f, 1.0f, 1.0f);
> a =a + VectorSIMD(0.0f, 1.0f, 0.0f, 1.0f);
> a =a + VectorSIMD(1.0f, 0.0f, 0.0f, 1.0f);
> a =a + VectorSIMD(0.5f, 0.5f, 1.0f, 1.0f);
>
>
> //Process operator
> VectorSIMD yVec(_H, _H, 0, 0);
> VectorSIMD yDiff(-1.0f, -1.0f, 1.0f, 1.0f);
> for (int y=0; y<_H; y++)
> {
> VectorSIMD factor = yVec * finv_WH;
> yVec = yVec + yDiff;
>
> VectorSIMD xVec(_W, 0, _W, 0);
> VectorSIMD xDiff(-1.0f, 1.0f, -1.0f, 1.0f);
> for (int x=0; x<_W; x++)
> {
> ret=ret+(a*xVec*factor);
> xVec=xVec+xDiff;
> }
> }
>
> return ret;
> }
>
>
> Johann.
>
>
>
>
> 2009/2/23 Alan McGovern <alan.mcgovern at gmail.com>
>
> Hey,
>>
>> The big issue you're having is that you haven't implemented a SIMD
>> algorithm ;) I spent 15 mins 'optimising' your code and came up with this.
>> Notice that I made everything a SIMD operation. There is no scalar code in
>> the method anymore. This tripled performance as compared to the non-SIMD
>> version. On my machine:
>>
>> -FLOAT 00:00:00.3888930 Color
>> -SIMD 00:00:00.1266820 Mono.Simd.Vector4f
>>
>> You'd want to double check the result just in case I made a mistake with
>> my alterations.
>>
>> Alan.
>>
>> public static Vector4f GradientSIMD()
>> {
>> Vector4f finv_WH = new Vector4f (1.0f / (w*h), 1.0f / (w*h),
>> 1.0f / (w*h), 1.0f / (w*h));
>> Vector4f ret = new Vector4f();
>>
>> Vector4f a = new Vector4f(0.0f, 0.0f, 1.0f, 1.0f);
>> a += new Vector4f(0.0f, 1.0f, 0.0f, 1.0f);
>> a += new Vector4f(1.0f, 0.0f, 0.0f, 1.0f);
>> a += new Vector4f(0.5f, 0.5f, 1.0f, 1.0f);
>>
>> //Process operator
>> Vector4f yVec = new Vector4f (h, h, 0, 0);
>> Vector4f yDiff = new Vector4f (-1, -1, 1, 1);
>> for (int y=0; y<h; y++)
>> {
>> Vector4f factor = yVec * finv_WH;
>> yVec += yDiff;
>>
>> Vector4f xVec = new Vector4f (w, 0, w, 0);
>> Vector4f xDiff = new Vector4f (-1, 1, -1, 1);
>> for (int x=0; x<w; x++)
>> {
>> ret += (a * xVec * factor);
>> xVec += xDiff;
>> }
>> }
>> return ret;
>> }
>>
>> On Fri, Feb 20, 2009 at 8:12 AM, Johann_fxgen <jnadalutti at gmail.com>wrote:
>>
>>>
>>> I have done some performance tests of SIMD under windows.
>>>
>>> Results tests in ms:
>>> In MS C 235 (Visual Studio Release Mode With SIMD)
>>> In MS C 360 (Visual Studio Release Mode With 4D Float)
>>> In Mono C# 453 (With Mono SIMD)
>>> In Mono C# 562 (With Mono 4D Float)
>>> In MS C# 609 (Visual Studio With 4D Float)
>>> In MS C 672 (Visual Studio Debug Mode)
>>>
>>> I'm just surprise by difference between C SIMD and mono SIMD version.
>>>
>>> Is Mono.SIMD under linux speeder than under windows ?
>>>
>>> Johann.
>>>
>>> My mono code for test:
>>>
>>> using Mono.Simd;
>>> using System;
>>> using Mono;
>>>
>>> public struct Color
>>> {
>>> public float r,g,b,a;
>>> };
>>>
>>> public class TestMonoSIMD
>>> {
>>> public Color m_pixels;
>>> const int w = 4096;
>>> const int h = 4096;
>>>
>>> public static void Main ()
>>> {
>>> //Debug
>>> Console.WriteLine("AccelMode: {0}",
>>> Mono.Simd.SimdRuntime.AccelMode );
>>>
>>> //Without SIMD
>>> DateTime start1 = DateTime.Now;
>>> Color ret1 = Gradient();
>>> TimeSpan ts1 = DateTime.Now - start1;
>>> Console.WriteLine("-FLOAT {0} {1}", ts1, ret1);
>>>
>>> //With SIMD
>>> DateTime start2 = DateTime.Now;
>>> Vector4f ret2 = GradientSIMD();
>>> TimeSpan ts2 = DateTime.Now - start2;
>>> Console.WriteLine("-SIMD {0} {1}", ts2, ret2);
>>> }
>>>
>>> public static Color Gradient()
>>> {
>>> float finv_WH = 1.0f / (float)(w*h);
>>> Color ret = new Color();
>>> ret.r=ret.g=ret.b=ret.a=0.0f;
>>>
>>> Color a = new Color();
>>> Color b = new Color();
>>> Color c = new Color();
>>> Color d = new Color();
>>> a.r=0.0f; a.g=0.0f; a.b=1.0f; a.a=1.0f;
>>> b.r=0.0f; b.g=1.0f; b.b=0.0f; b.a=1.0f;
>>> c.r=1.0f; c.g=0.0f; c.b=0.0f; c.a=1.0f;
>>> d.r=0.5f; d.g=0.5f; d.b=1.0f; d.a=1.0f;
>>>
>>> //Process operator
>>> for (int y=0; y<h; y++)
>>> {
>>> for (int x=0; x<w; x++)
>>> {
>>> //Calc percent A,B,C,D
>>> float pa = (float)((w-x) *
>>> (h-y)) * finv_WH;
>>> float pb = (float)((x) *
>>> (h-y)) * finv_WH;
>>> float pc = (float)((w-x) *
>>> (y)) * finv_WH;
>>> float pd = (float)((x) *
>>> (y)) * finv_WH;
>>>
>>> float cr= ((a.r*pa) + (b.r*pb) +
>>> (c.r*pc) + (d.r*pd));
>>> float cg= ((a.g*pa) + (b.g*pb) +
>>> (c.g*pc) + (d.g*pd));
>>> float cb= ((a.b*pa) + (b.b*pb) +
>>> (c.b*pc) + (d.b*pd));
>>> float ca= ((a.a*pa) + (b.a*pb) +
>>> (c.a*pc) + (d.a*pd));
>>> ret.r+=cr; ret.g+=cg;
>>> ret.b+=cb; ret.a+=ca;
>>> }
>>> }
>>> return ret;
>>> }
>>>
>>> public static Vector4f GradientSIMD()
>>> {
>>> float finv_WH = 1.0f / (float)(w*h);
>>> Vector4f ret = new Vector4f(0.0f, 0.0f, 0.0f,
>>> 0.0f);
>>>
>>> Vector4f a = new Vector4f(0.0f, 0.0f, 1.0f, 1.0f);
>>> Vector4f b = new Vector4f(0.0f, 1.0f, 0.0f, 1.0f);
>>> Vector4f c = new Vector4f(1.0f, 0.0f, 0.0f, 1.0f);
>>> Vector4f d = new Vector4f(0.5f, 0.5f, 1.0f, 1.0f);
>>>
>>> //Process operator
>>> Vector4f p = new Vector4f();
>>> Vector4f r = new Vector4f();
>>> for (int y=0; y<h; y++)
>>> {
>>> for (int x=0; x<w; x++)
>>> {
>>> //Calc percent A,B,C,D
>>> p.X = (float)((w-x) * (h-y)) *
>>> finv_WH;
>>> p.Y = (float)((x) *
>>> (h-y)) * finv_WH;
>>> p.Z = (float)((w-x) * (y)) *
>>> finv_WH;
>>> p.W = (float)((x) *
>>> (y)) * finv_WH;
>>>
>>> ret+=a*p + b*p + c*p + d*p;
>>> }
>>> }
>>> return ret;
>>> }
>>>
>>> }
>>>
>>>
>>> --
>>> View this message in context:
>>> http://www.nabble.com/Mono.SIMD-tp22116483p22116483.html
>>> Sent from the Mono - Dev mailing list archive at Nabble.com.
>>>
>>> _______________________________________________
>>> Mono-devel-list mailing list
>>> Mono-devel-list at lists.ximian.com
>>> http://lists.ximian.com/mailman/listinfo/mono-devel-list
>>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.ximian.com/pipermail/mono-devel-list/attachments/20090223/8fa0cbb0/attachment-0001.html
More information about the Mono-devel-list
mailing list