[Mono-dev] [Mono-patches] [mono/moon] caef081d: Implemented an iconv-like API for unicode conversion

Rodrigo Kumpera kumpera at gmail.com
Wed Apr 20 00:00:35 EDT 2011


Jeff, shouldn't this be part of eglib as a build time option?
On Apr 19, 2011 10:06 PM, "Jeffrey Stedfast (fejj at gnome.org)" <
mono-patches at lists.ximian.com> wrote:
>
> Branch: refs/heads/master
> Home: https://github.com/mono/moon
>
> Commit: caef081dc43c17327c37932f65264deec19d8022
> Author: Jeffrey Stedfast <fejj at gnome.org>
> Date: 04/19/2011 21:05:59
> URL:
https://github.com/mono/moon/commit/caef081dc43c17327c37932f65264deec19d8022
>
> Implemented an iconv-like API for unicode conversion
>
> Changed paths:
> M src/Makefile.am
> Added paths:
> A src/miconv.cpp
> A src/miconv.h
>
> Modified: src/Makefile.am
> ===================================================================
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -78,6 +78,7 @@ libmoon_include_headers = \
> medialog.h \
> mediaplayer.h \
> messaging.h \
> + miconv.h \
> moon-curves.h \
> moonlightconfiguration.h\
> moon-path.h \
> @@ -234,6 +235,7 @@ dist_libmoon_la_SOURCES = \
> medialog.cpp \
> mediaplayer.cpp \
> messaging.cpp \
> + miconv.cpp \
> moon-curves.c \
> moonlightconfiguration.cpp \
> moon-path.c \
>
> Added: src/miconv.cpp
> ===================================================================
> --- /dev/null
> +++ b/src/miconv.cpp
> @@ -0,0 +1,344 @@
> +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8
-*- */
> +/*
> + * miconv.cpp:
> + *
> + * Contact:
> + * Moonlight List (moonlight-list at lists.ximian.com)
> + *
> + * Copyright 2011 Novell, Inc. (http://www.novell.com)
> + *
> + * See the LICENSE file included with the distribution for details.
> + */
> +
> +#include <config.h>
> +
> +#include <glib.h>
> +#include <string.h>
> +#include <errno.h>
> +
> +#include "miconv.h"
> +
> +namespace Moonlight {
> +
> +enum Endian {
> + LittleEndian,
> + BigEndian
> +};
> +
> +typedef int (* Decoder) (Endian endian, char **inbytes, size_t
*inbytesleft, gunichar *outchar);
> +typedef int (* Encoder) (gunichar c, char **outbytes, size_t
*outbytesleft);
> +
> +static int decode_utf32 (Endian endian, char **inbytes, size_t
*inbytesleft, gunichar *outchar);
> +//static int encode_utf32 (gunichar c, char **outbytes, size_t
*outbytesleft);
> +
> +static int decode_utf16 (Endian endian, char **inbytes, size_t
*inbytesleft, gunichar *outchar);
> +//static int encode_utf16 (gunichar c, char **outbytes, size_t
*outbytesleft);
> +
> +static int decode_utf8 (Endian endian, char **inbytes, size_t
*inbytesleft, gunichar *outchar);
> +static int encode_utf8 (gunichar c, char **outbytes, size_t
*outbytesleft);
> +
> +static struct {
> + const char *name;
> + Decoder decoder;
> + Encoder encoder;
> + Endian endian;
> +} charsets[] = {
> + { "UTF-32BE", decode_utf32, NULL, BigEndian },
> + { "UTF-32LE", decode_utf32, NULL, LittleEndian },
> + { "UTF-16BE", decode_utf16, NULL, BigEndian },
> + { "UTF-16LE", decode_utf16, NULL, LittleEndian },
> + { "UTF-8", decode_utf8, encode_utf8, LittleEndian },
> +};
> +
> +struct _miconv_t {
> + Decoder decode;
> + Encoder encode;
> + Endian endian;
> + gunichar c;
> +};
> +
> +
> +miconv_t
> +miconv_open (const char *to, const char *from)
> +{
> + Decoder decoder = NULL;
> + Encoder encoder = NULL;
> + Endian endian;
> + miconv_t cd;
> + guint i;
> +
> + if (!to || !from)
> + return (miconv_t) -1;
> +
> + for (i = 0; i < G_N_ELEMENTS (charsets); i++) {
> + if (!strcmp (charsets[i].name, from)) {
> + decoder = charsets[i].decoder;
> + endian = charsets[i].endian;
> + }
> +
> + if (!strcmp (charsets[i].name, to))
> + encoder = charsets[i].encoder;
> + }
> +
> + if (encoder == NULL || decoder == NULL)
> + return (miconv_t) -1;
> +
> + cd = (miconv_t) g_malloc (sizeof (*cd));
> + cd->decode = decoder;
> + cd->encode = encoder;
> + cd->endian = endian;
> + cd->c = -1;
> +
> + return cd;
> +}
> +
> +int
> +miconv_close (miconv_t cd)
> +{
> + g_free (cd);
> + return 0;
> +}
> +
> +int
> +miconv (miconv_t cd, char **inbytes, size_t *inbytesleft,
> + char **outbytes, size_t *outbytesleft)
> +{
> + size_t inleft, outleft;
> + char *inptr, *outptr;
> + gunichar c;
> + int rc = 0;
> +
> + if (outbytes == NULL || outbytesleft == NULL) {
> + /* reset converter */
> + cd->c = -1;
> + return 0;
> + }
> +
> + inleft = inbytesleft ? *inbytesleft : 0;
> + inptr = inbytes ? *inbytes : NULL;
> + outleft = *outbytesleft;
> + outptr = *outbytes;
> + c = cd->c;
> +
> + while (inleft >= 0 && outleft > 0) {
> + if (c == (gunichar) -1 && cd->decode (cd->endian, &inptr, &inleft, &c)
== -1) {
> + rc = -1;
> + break;
> + }
> +
> + if (cd->encode (c, &outptr, &outleft) == -1) {
> + rc = -1;
> + break;
> + }
> +
> + c = -1;
> + }
> +
> + if (inbytesleft)
> + *inbytesleft = inleft;
> +
> + if (inbytes)
> + *inbytes = inptr;
> +
> + *outbytesleft = outleft;
> + *outbytes = outptr;
> + cd->c = c;
> +
> + return rc;
> +}
> +
> +
> +static int
> +decode_utf32 (Endian endian, char **inbytes, size_t *inbytesleft,
gunichar *outchar)
> +{
> + gunichar *inptr = (gunichar *) *inbytes;
> + size_t inleft = *inbytesleft;
> + gunichar c;
> +
> + if (inleft < 4) {
> + errno = EINVAL;
> + return -1;
> + }
> +
> + if (endian == BigEndian)
> + c = GUINT32_FROM_BE (*inptr);
> + else
> + c = GUINT32_FROM_LE (*inptr);
> +
> + inleft -= 4;
> + inptr++;
> +
> + if (c >= 2147483648UL) {
> + errno = EILSEQ;
> + return -1;
> + }
> +
> + *inbytes = (char *) inptr;
> + *inbytesleft = inleft;
> + *outchar = c;
> +
> + return 0;
> +}
> +
> +//static int encode_utf32 (gunichar c, char **outbytes, size_t
*outbytesleft);
> +
> +static int
> +decode_utf16 (Endian endian, char **inbytes, size_t *inbytesleft,
gunichar *outchar)
> +{
> + gunichar2 *inptr = (gunichar2 *) *inbytes;
> + size_t inleft = *inbytesleft;
> + gunichar2 c;
> + gunichar u;
> +
> + if (inleft < 2) {
> + errno = EINVAL;
> + return -1;
> + }
> +
> + if (endian == BigEndian)
> + u = GUINT16_FROM_BE (*inptr);
> + else
> + u = GUINT16_FROM_LE (*inptr);
> +
> + inleft -= 2;
> + inptr++;
> +
> + if (u >= 0xdc00 && u <= 0xdfff) {
> + errno = EILSEQ;
> + return -1;
> + } else if (u >= 0xd800 && u <= 0xdbff) {
> + if (inleft < 2) {
> + errno = EINVAL;
> + return -1;
> + }
> +
> + if (endian == BigEndian)
> + c = GUINT16_FROM_BE (*inptr);
> + else
> + c = GUINT16_FROM_LE (*inptr);
> +
> + inleft -= 2;
> + inptr++;
> +
> + if (c < 0xdc00 || c > 0xdfff) {
> + errno = EILSEQ;
> + return -1;
> + }
> +
> + u = ((u - 0xd800) << 10) + (c - 0xdc00) + 0x0010000UL;
> + }
> +
> + *inbytes = (char *) inptr;
> + *inbytesleft = inleft;
> + *outchar = u;
> +
> + return 0;
> +}
> +
> +//static int encode_utf16 (gunichar c, char **outbytes, size_t
*outbytesleft);
> +
> +static int
> +decode_utf8 (Endian endian, char **inbytes, size_t *inbytesleft, gunichar
*outchar)
> +{
> + size_t inleft = *inbytesleft;
> + char *inptr = *inbytes;
> + size_t i, len = 0;
> + unsigned char c;
> + gunichar u;
> +
> + c = *inptr++;
> +
> + if (c < 0x80) {
> + /* simple ascii case */
> + len = 1;
> + } else if ((c & 0xe0) == 0xc0) {
> + c &= 0x1f;
> + len = 2;
> + } else if ((c & 0xf0) == 0xe0) {
> + c &= 0x0f;
> + len = 3;
> + } else if ((c & 0xf8) == 0xf0) {
> + c &= 0x07;
> + len = 4;
> + } else if ((c & 0xfc) == 0xf8) {
> + c &= 0x03;
> + len = 5;
> + } else if ((c & 0xfe) == 0xfc) {
> + c &= 0x01;
> + len = 6;
> + } else {
> + errno = EILSEQ;
> + return -1;
> + }
> +
> + if (len > inleft) {
> + errno = EINVAL;
> + return -1;
> + }
> +
> + u = c;
> + for (i = 1; i < len; i++) {
> + u <<= 6 | ((*inptr) & 0x3f);
> + inptr++;
> + }
> +
> + *inbytesleft = inleft - len;
> + *inbytes = inptr;
> + *outchar = u;
> +
> + return 0;
> +}
> +
> +static int
> +encode_utf8 (gunichar c, char **outbytes, size_t *outbytesleft)
> +{
> + size_t outleft = *outbytesleft;
> + char *outptr = *outbytes;
> + size_t len, i;
> + char base;
> +
> + if (c < 128UL) {
> + base = 0;
> + len = 1;
> + } else if (c < 2048UL) {
> + base = 192;
> + len = 2;
> + } else if (c < 65536UL) {
> + base = 224;
> + len = 3;
> + } else if (c < 2097152UL) {
> + base = 240;
> + len = 4;
> + } else if (c < 67108864UL) {
> + base = 248;
> + len = 5;
> + } else if (c < 2147483648UL) {
> + base = 252;
> + len = 6;
> + } else {
> + errno = EINVAL;
> + return -1;
> + }
> +
> + if (outleft < len) {
> + errno = E2BIG;
> + return -1;
> + }
> +
> + for (i = len - 1; i > 0; i--) {
> + /* mask off 6 bits worth and add 128 */
> + outptr[i] = 128 + (c & 0x3f);
> + c >>= 6;
> + }
> +
> + /* first character has a different base */
> + outptr[0] = base + (c & 0x3f);
> +
> + *outbytesleft = outleft - len;
> + *outbytes = outptr + len;
> +
> + return 0;
> +}
> +
> +};
> +
>
> Added: src/miconv.h
> ===================================================================
> --- /dev/null
> +++ b/src/miconv.h
> @@ -0,0 +1,35 @@
> +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8
-*- */
> +/*
> + * miconv.h:
> + *
> + * Contact:
> + * Moonlight List (moonlight-list at lists.ximian.com)
> + *
> + * Copyright 2011 Novell, Inc. (http://www.novell.com)
> + *
> + * See the LICENSE file included with the distribution for details.
> + */
> +
> +#ifndef __MICONV_H__
> +#define __MICONV_H__
> +
> +#include <glib.h>
> +#include <sys/types.h>
> +
> +namespace Moonlight {
> +
> +G_BEGIN_DECLS
> +
> +typedef struct _miconv_t *miconv_t;
> +
> +int miconv (miconv_t cd, char **inbytes, size_t *inbytesleft,
> + char **outbytes, size_t *outbytesleft);
> +miconv_t miconv_open (const char *to, const char *from);
> +int miconv_close (miconv_t cd);
> +
> +G_END_DECLS
> +
> +};
> +
> +#endif /* __MICONV_H__ */
> +
>
>
>
>
>
> _______________________________________________
> Mono-patches maillist - Mono-patches at lists.ximian.com
> http://lists.ximian.com/mailman/listinfo/mono-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.ximian.com/pipermail/mono-devel-list/attachments/20110420/6868159f/attachment.html 


More information about the Mono-devel-list mailing list