[Mono-devel-list] Patch to speed up String

Paolo Molaro lupus at ximian.com
Thu Sep 9 04:38:09 EDT 2004


On 09/08/04 Andreas Nahr wrote:
> after some time I try again...
> Here is the first patch to speed up the String class by making it managed 
> where possible.

It looks like the comments made earlier have not been addressed:
*) it will fail on architectures that require aligned reads/writes
*) it seems it won't handle the case where fixed is applied to an empty
array or to the last element+1 (like, when length to copy is 0)

A memcpy function like the one in the attached test is needed
(it needs testing and more optimizations for unaligned buffers):
we plan to use it in other parts of the system, too.
The can_unaligned flag could be a special icall that the jit recognizes
and turns into a constant.

lupus

-- 
-----------------------------------------------------------------
lupus at debian.org                                     debian/rules
lupus at ximian.com                             Monkeys do it better
-------------- next part --------------
using System;

class T {
	const bool can_unaligned = true;
	static unsafe void memcpy4 (byte *dest, byte *src, int size) {
		while (can_unaligned && size >= 32) {
			// using long is better than int and slower than double
			// FIXME: enable this only on correct alignment or on platforms
			// that can tolerate unaligned reads/writes of doubles
			((double*)dest) [0] = ((double*)src) [0];
			((double*)dest) [1] = ((double*)src) [1];
			((double*)dest) [2] = ((double*)src) [2];
			((double*)dest) [3] = ((double*)src) [3];
			dest += 32;
			src += 32;
			size -= 32;
		}
		while (size >= 16) {
			((int*)dest) [0] = ((int*)src) [0];
			((int*)dest) [1] = ((int*)src) [1];
			((int*)dest) [2] = ((int*)src) [2];
			((int*)dest) [3] = ((int*)src) [3];
			dest += 16;
			src += 16;
			size -= 16;
		}
		while (size >= 4) {
			((int*)dest) [0] = ((int*)src) [0];
			dest += 4;
			src += 4;
			size -= 4;
		}
		while (size > 0) {
			((byte*)dest) [0] = ((byte*)src) [0];
			dest += 1;
			src += 1;
			--size;
		}
	}
	static unsafe void memcpy2 (byte *dest, byte *src, int size) {
		while (size >= 8) {
			((short*)dest) [0] = ((short*)src) [0];
			((short*)dest) [1] = ((short*)src) [1];
			((short*)dest) [2] = ((short*)src) [2];
			((short*)dest) [3] = ((short*)src) [3];
			dest += 8;
			src += 8;
			size -= 8;
		}
		while (size >= 2) {
			((short*)dest) [0] = ((short*)src) [0];
			dest += 2;
			src += 2;
			size -= 2;
		}
		if (size > 0)
			((byte*)dest) [0] = ((byte*)src) [0];
	}
	static unsafe void memcpy1 (byte *dest, byte *src, int size) {
		while (size >= 8) {
			((byte*)dest) [0] = ((byte*)src) [0];
			((byte*)dest) [1] = ((byte*)src) [1];
			((byte*)dest) [2] = ((byte*)src) [2];
			((byte*)dest) [3] = ((byte*)src) [3];
			((byte*)dest) [4] = ((byte*)src) [4];
			((byte*)dest) [5] = ((byte*)src) [5];
			((byte*)dest) [6] = ((byte*)src) [6];
			((byte*)dest) [7] = ((byte*)src) [7];
			dest += 8;
			src += 8;
			size -= 8;
		}
		while (size >= 2) {
			((byte*)dest) [0] = ((byte*)src) [0];
			((byte*)dest) [1] = ((byte*)src) [1];
			dest += 2;
			src += 2;
			size -= 2;
		}
		if (size > 0)
			((byte*)dest) [0] = ((byte*)src) [0];
	}
	static unsafe void memcpy (byte *dest, byte *src, int size) {
		// FIXME: if pointers are not aligned, try to align them
		// so a faster routine can be used. Handle the case where
		// the pointers can't be reduced to have the same alignment
		// (just ignore the issue on x86?)
		if (((int)dest & 3) == 0 && ((int)src & 3) == 0) {
			memcpy4 (dest, src, size);
		} else if (((int)dest & 1) == 0 && ((int)src & 1) == 0) {
			if (size >= 6) {
				dest [0] = src [0];
				dest [1] = src [1];
				memcpy4 (dest + 2, src + 2, size - 2);
			} else {
				memcpy2 (dest, src, size);
			}
		} else {
			if (size >= 6) {
				dest [0] = src [0];
				++dest;
				++src;
				--size;
				if (((int)dest & 3) == 0 && ((int)src & 3) == 0) {
					memcpy4 (dest, src, size);
				} else if (((int)dest & 1) == 0 && ((int)src & 1) == 0) {
					if (size >= 6) {
						dest [0] = src [0];
						dest [1] = src [1];
						memcpy4 (dest + 2, src + 2, size - 2);
					} else {
						memcpy2 (dest, src, size);
					}
				} else {
					memcpy1 (dest, src, size);
				}
			} else {
				memcpy1 (dest, src, size);
			}
		}
	}
	static unsafe void BlockCopy (Array src, int srcOffset, Array dest, int destOffset, int count) {
		if (srcOffset < 0)
			throw new ArgumentOutOfRangeException ("srcOffset", "non-neg");
		if (destOffset < 0)
			throw new ArgumentOutOfRangeException ("destOffset", "non-neg");
		if (count < 0)
			throw new ArgumentOutOfRangeException ("count", "non-neg");
		// must check otherwise the fixed expression throws out-of-range exception
		if (count == 0)
			return;
		// need icall to simulate this and use ByteLength instead of Length
		byte[] s = (byte[])src;
		byte[] d = (byte[])dest;
		if (srcOffset + count > s.Length || destOffset + count > d.Length)
			throw new ArgumentException ("out of bounds");
		fixed (byte *A = s, B = d) {
			memcpy (B + destOffset, A + srcOffset, count);
		}
	}
	const int count = 1000000;
	static void run (int sz, int ofs, byte[] src, byte[] dst) {
		int start, end;
		
		start = Environment.TickCount;
		for (int i = 0; i < count; i++) {
			System.Buffer.BlockCopy (src, ofs, dst, ofs, sz - ofs);
		}
		end = Environment.TickCount;
		Console.Write ("{0}: {1} ", sz, end-start);
	}
	static unsafe void run1 (int sz, int ofs, byte[] src, byte[] dst) {
		int start, end;
		
		start = Environment.TickCount;
		for (int i = 0; i < count; i++) {
			BlockCopy (src, ofs, dst, ofs, sz - ofs);
		}
		end = Environment.TickCount;
		Console.Write (" {0} ", end-start);
	}
	static void run2 (int sz, int ofs, byte[] src, byte[] dst) {
		int start, end;
		
		start = Environment.TickCount;
		for (int i = 0; i < count; i++) {
			System.Array.Copy (src, ofs, dst, ofs, sz - ofs);
		}
		end = Environment.TickCount;
		Console.WriteLine (end-start);
	}
	static void Main (string[] args) {
		int i, ofs;
		i = Environment.TickCount;
		byte [] src;
		byte [] dst;
		ofs = 0;
		if (args.Length > 0)
			ofs = int.Parse (args [0]);
		Console.WriteLine ("Size: Buffer.bcopy memcpy Array.Copy (align: {0})", ofs);
		for (i = 1; i <= 10000;) {
			src = new byte [i];
			dst = new byte [i];
			if (i >= ofs) {
				run (i, ofs, src, dst);
				run1 (i, ofs, src, dst);
				run2 (i, ofs, src, dst);
			}
			if (i < 5)
				++i;
			else if (i < 50)
				i += 5;
			else if (i < 100)
				i += 10;
			else if (i < 1000)
				i += 100;
			else
				i += 1000;
		}
		src = new byte [100000];
		dst = new byte [100000];
		run (100000, ofs, src, dst);
		run1 (100000, ofs, src, dst);
		run2 (100000, ofs, src, dst);
	}
}


More information about the Mono-devel-list mailing list