[Mono-list] Some code for System.Text.UTF8Encoding
A Rafael D Teixeira
Tue, 02 Oct 2001 18:42:05 -0300
Sean, here is some code to advance the state of System.Text.UTF8Encoding. I
hope it is of some help.
// System.Text.UTF8Encoding.cs
// Authors:
// Sean MacIsaac (macisaac@ximian.com)
// (C) Ximian, Inc. http://www.ximian.com
namespace System.Text
public class UTF8Encoding : Encoding
public override int GetByteCount(char[] chars, int index, int count)
return count*6;
public override int GetBytes(char[] chars, int charIndex, int charCount,
byte[] bytes, int byteIndex)
if (chars == null || bytes == null)
throw new ArgumentNullException();
if (charIndex < 0 || charCount < 0 || byteIndex < 0 ||
charIndex + charCount > chars.Length ||
byteIndex + GetByteCount(chars, charIndex, charCount) > bytes.Length)
throw new ArgumentOutOfRangeException();
// this is slow implementation just to get the things going
int outputIndex = byteIndex;
for (int i = 0; i < charCount; i++)
int charCode = (int)chars[charIndex + i];
if (charCode < 0) // negative chars are invalid
throw new ArgumentOutOfRangeException();
if (charCode < 0x80)
bytes [outputIndex++] = (byte)charCode;
if (charCode < 0x800)
bytes [outputIndex++] = (byte)((charCode >> 6) | 0xC0);
bytes [outputIndex++] = (byte)((charCode & 0x3F) | 0x80);
// LAME: if chars[] come as UTF-16 - here we have to decode the
surrogate pair, before proceeding
// charCode = some magic with charCode and (int)chars[++i + charIndex]
if needed
if (charCode < 0x10000)
bytes [outputIndex++] = (byte)((charCode >> 12) | 0xE0);
bytes [outputIndex++] = (byte)(((charCode >> 6) & 0x3F) | 0x80);
bytes [outputIndex++] = (byte)((charCode & 0x3F) | 0x80);
if (charCode < 0x200000)
bytes [outputIndex++] = (byte)((charCode >> 18) | 0xF0);
bytes [outputIndex++] = (byte)(((charCode >> 12) & 0x3F) | 0x80);
bytes [outputIndex++] = (byte)(((charCode >> 6) & 0x3F) | 0x80);
bytes [outputIndex++] = (byte)((charCode & 0x3F) | 0x80);
if (charCode < 0x4000000)
bytes [outputIndex++] = (byte)((charCode >> 24) | 0xF8);
bytes [outputIndex++] = (byte)(((charCode >> 18) & 0x3F) | 0x80);
bytes [outputIndex++] = (byte)(((charCode >> 12) & 0x3F) | 0x80);
bytes [outputIndex++] = (byte)(((charCode >> 6) & 0x3F) | 0x80);
bytes [outputIndex++] = (byte)((charCode & 0x3F) | 0x80);
bytes [outputIndex++] = (byte)((charCode >> 30) | 0xFC);
bytes [outputIndex++] = (byte)(((charCode >> 24) & 0x3F) | 0x80);
bytes [outputIndex++] = (byte)(((charCode >> 18) & 0x3F) | 0x80);
bytes [outputIndex++] = (byte)(((charCode >> 12) & 0x3F) | 0x80);
bytes [outputIndex++] = (byte)(((charCode >> 6) & 0x3F) | 0x80);
bytes [outputIndex++] = (byte)((charCode & 0x3F) | 0x80);
return (outputIndex - byteIndex);
public override int GetBytes(string s, int charIndex, int charCount,
byte[] bytes, int byteIndex)
char[] chars = s.ToCharArray(charIndex, charCount);
return GetBytes(chars, 0, charCount, bytes, byteIndex);
public override byte[] GetBytes(string s)
char[] chars = s.ToCharArray();
byte[] bytes = new byte[GetByteCount(chars, 0, chars.Length)];
GetBytes(chars, 0, chars.Length, bytes, bytes.Length);
return bytes;
public override int GetCharCount(byte[] bytes, int byteIndex, int
int count = 0;
for (int i = byteIndex; i < byteIndex + byteCount; i++)
if ((bytes[i] & 0xC0) != 0x80)
return count;
public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
char[] chars, int charIndex)
if (chars == null || bytes == null)
throw new ArgumentNullException();
if (charIndex < 0 || byteCount < 0 || byteIndex < 0 ||
charIndex + GetCharCount(bytes, byteIndex, byteCount) > chars.Length
byteIndex + byteCount > bytes.Length)
throw new ArgumentOutOfRangeException();
return 0;
public override int GetMaxByteCount(int charCount)
return charCount*6;
public override int GetMaxCharCount(int byteCount)
return byteCount;
It compiled well with csc, but I couldnīt test if it does what is needed.
Miguel, when compiled with mcs (based on snapshot Sep-25) it gave:
Parsing successful
Unhandled Exception: System.NullReferenceException: Value null was found
where an instance of an object was required.
at CIR.TypeManager.FindMembers(Type t, MemberTypes mt, BindingFlags bf,
MemberFilter filter, Object criteria) in
C:\cygwin\tmp\mcs\typemanager.cs:line 197
at CIR.Expression.MemberLookup(RootContext rc, Type t, String name,
Boolean same_type, MemberTypes mt, BindingFlags bf) in
C:\cygwin\tmp\mcs\expression.cs:line 132
at CIR.Binary.ResolveOperator(TypeContainer tc) in
C:\cygwin\tmp\mcs\expression.cs:line 1355
at CIR.Binary.Resolve(TypeContainer tc) in
C:\cygwin\tmp\mcs\expression.cs:line 1446
at CIR.Binary.Resolve(TypeContainer tc) in
C:\cygwin\tmp\mcs\expression.cs:line 1440
at CIR.Binary.Resolve(TypeContainer tc) in
C:\cygwin\tmp\mcs\expression.cs:line 1440
at CIR.EmitContext.EmitBoolExpression(Expression e) in
C:\cygwin\tmp\mcs\codegen.cs:line 110
at CIR.EmitContext.EmitIf(If s) in C:\cygwin\tmp\mcs\codegen.cs:line 148
at CIR.EmitContext.EmitStatement(Statement s) in
C:\cygwin\tmp\mcs\codegen.cs:line 265
at CIR.EmitContext.EmitBlock(Block block) in
C:\cygwin\tmp\mcs\codegen.cs:line 298
at CIR.EmitContext.EmitTopBlock(Block block) in
C:\cygwin\tmp\mcs\codegen.cs:line 310
at CIR.Method.Emit(TypeContainer parent) in
C:\cygwin\tmp\mcs\class.cs:line 1010
at CIR.TypeContainer.Emit() in C:\cygwin\tmp\mcs\class.cs:line 646
at CIR.RootContext.EmitCode() in C:\cygwin\tmp\mcs\rootcontext.cs:line
at CIR.Driver..ctor(String[] args) in C:\cygwin\tmp\mcs\driver.cs:line
at CIR.Driver.Main(String[] args) in C:\cygwin\tmp\mcs\driver.cs:line 119
Rafael Teixeira
Brazilian Developer
Get your FREE download of MSN Explorer at http://explorer.msn.com/intl.asp