[Mono-devel-list] Patch to speed up String
Paolo Molaro
lupus at ximian.com
Thu Sep 9 04:38:09 EDT 2004
On 09/08/04 Andreas Nahr wrote:
> after some time I try again...
> Here is the first patch to speed up the String class by making it managed
> where possible.
It looks like the comments made earlier have not been addressed:
*) it will fail on architectures that require aligned reads/writes
*) it seems it won't handle the case where fixed is applied to an empty
array or to the last element+1 (like, when length to copy is 0)
A memcpy function like the one in the attached test is needed
(it needs testing and more optimizations for unaligned buffers):
we plan to use it in other parts of the system, too.
The can_unaligned flag could be a special icall that the jit recognizes
and turns into a constant.
lupus
--
-----------------------------------------------------------------
lupus at debian.org debian/rules
lupus at ximian.com Monkeys do it better
-------------- next part --------------
using System;
class T {
const bool can_unaligned = true;
static unsafe void memcpy4 (byte *dest, byte *src, int size) {
while (can_unaligned && size >= 32) {
// using long is better than int and slower than double
// FIXME: enable this only on correct alignment or on platforms
// that can tolerate unaligned reads/writes of doubles
((double*)dest) [0] = ((double*)src) [0];
((double*)dest) [1] = ((double*)src) [1];
((double*)dest) [2] = ((double*)src) [2];
((double*)dest) [3] = ((double*)src) [3];
dest += 32;
src += 32;
size -= 32;
}
while (size >= 16) {
((int*)dest) [0] = ((int*)src) [0];
((int*)dest) [1] = ((int*)src) [1];
((int*)dest) [2] = ((int*)src) [2];
((int*)dest) [3] = ((int*)src) [3];
dest += 16;
src += 16;
size -= 16;
}
while (size >= 4) {
((int*)dest) [0] = ((int*)src) [0];
dest += 4;
src += 4;
size -= 4;
}
while (size > 0) {
((byte*)dest) [0] = ((byte*)src) [0];
dest += 1;
src += 1;
--size;
}
}
static unsafe void memcpy2 (byte *dest, byte *src, int size) {
while (size >= 8) {
((short*)dest) [0] = ((short*)src) [0];
((short*)dest) [1] = ((short*)src) [1];
((short*)dest) [2] = ((short*)src) [2];
((short*)dest) [3] = ((short*)src) [3];
dest += 8;
src += 8;
size -= 8;
}
while (size >= 2) {
((short*)dest) [0] = ((short*)src) [0];
dest += 2;
src += 2;
size -= 2;
}
if (size > 0)
((byte*)dest) [0] = ((byte*)src) [0];
}
static unsafe void memcpy1 (byte *dest, byte *src, int size) {
while (size >= 8) {
((byte*)dest) [0] = ((byte*)src) [0];
((byte*)dest) [1] = ((byte*)src) [1];
((byte*)dest) [2] = ((byte*)src) [2];
((byte*)dest) [3] = ((byte*)src) [3];
((byte*)dest) [4] = ((byte*)src) [4];
((byte*)dest) [5] = ((byte*)src) [5];
((byte*)dest) [6] = ((byte*)src) [6];
((byte*)dest) [7] = ((byte*)src) [7];
dest += 8;
src += 8;
size -= 8;
}
while (size >= 2) {
((byte*)dest) [0] = ((byte*)src) [0];
((byte*)dest) [1] = ((byte*)src) [1];
dest += 2;
src += 2;
size -= 2;
}
if (size > 0)
((byte*)dest) [0] = ((byte*)src) [0];
}
static unsafe void memcpy (byte *dest, byte *src, int size) {
// FIXME: if pointers are not aligned, try to align them
// so a faster routine can be used. Handle the case where
// the pointers can't be reduced to have the same alignment
// (just ignore the issue on x86?)
if (((int)dest & 3) == 0 && ((int)src & 3) == 0) {
memcpy4 (dest, src, size);
} else if (((int)dest & 1) == 0 && ((int)src & 1) == 0) {
if (size >= 6) {
dest [0] = src [0];
dest [1] = src [1];
memcpy4 (dest + 2, src + 2, size - 2);
} else {
memcpy2 (dest, src, size);
}
} else {
if (size >= 6) {
dest [0] = src [0];
++dest;
++src;
--size;
if (((int)dest & 3) == 0 && ((int)src & 3) == 0) {
memcpy4 (dest, src, size);
} else if (((int)dest & 1) == 0 && ((int)src & 1) == 0) {
if (size >= 6) {
dest [0] = src [0];
dest [1] = src [1];
memcpy4 (dest + 2, src + 2, size - 2);
} else {
memcpy2 (dest, src, size);
}
} else {
memcpy1 (dest, src, size);
}
} else {
memcpy1 (dest, src, size);
}
}
}
static unsafe void BlockCopy (Array src, int srcOffset, Array dest, int destOffset, int count) {
if (srcOffset < 0)
throw new ArgumentOutOfRangeException ("srcOffset", "non-neg");
if (destOffset < 0)
throw new ArgumentOutOfRangeException ("destOffset", "non-neg");
if (count < 0)
throw new ArgumentOutOfRangeException ("count", "non-neg");
// must check otherwise the fixed expression throws out-of-range exception
if (count == 0)
return;
// need icall to simulate this and use ByteLength instead of Length
byte[] s = (byte[])src;
byte[] d = (byte[])dest;
if (srcOffset + count > s.Length || destOffset + count > d.Length)
throw new ArgumentException ("out of bounds");
fixed (byte *A = s, B = d) {
memcpy (B + destOffset, A + srcOffset, count);
}
}
const int count = 1000000;
static void run (int sz, int ofs, byte[] src, byte[] dst) {
int start, end;
start = Environment.TickCount;
for (int i = 0; i < count; i++) {
System.Buffer.BlockCopy (src, ofs, dst, ofs, sz - ofs);
}
end = Environment.TickCount;
Console.Write ("{0}: {1} ", sz, end-start);
}
static unsafe void run1 (int sz, int ofs, byte[] src, byte[] dst) {
int start, end;
start = Environment.TickCount;
for (int i = 0; i < count; i++) {
BlockCopy (src, ofs, dst, ofs, sz - ofs);
}
end = Environment.TickCount;
Console.Write (" {0} ", end-start);
}
static void run2 (int sz, int ofs, byte[] src, byte[] dst) {
int start, end;
start = Environment.TickCount;
for (int i = 0; i < count; i++) {
System.Array.Copy (src, ofs, dst, ofs, sz - ofs);
}
end = Environment.TickCount;
Console.WriteLine (end-start);
}
static void Main (string[] args) {
int i, ofs;
i = Environment.TickCount;
byte [] src;
byte [] dst;
ofs = 0;
if (args.Length > 0)
ofs = int.Parse (args [0]);
Console.WriteLine ("Size: Buffer.bcopy memcpy Array.Copy (align: {0})", ofs);
for (i = 1; i <= 10000;) {
src = new byte [i];
dst = new byte [i];
if (i >= ofs) {
run (i, ofs, src, dst);
run1 (i, ofs, src, dst);
run2 (i, ofs, src, dst);
}
if (i < 5)
++i;
else if (i < 50)
i += 5;
else if (i < 100)
i += 10;
else if (i < 1000)
i += 100;
else
i += 1000;
}
src = new byte [100000];
dst = new byte [100000];
run (100000, ofs, src, dst);
run1 (100000, ofs, src, dst);
run2 (100000, ofs, src, dst);
}
}
More information about the Mono-devel-list
mailing list