[Mono-list] Unterminated string

Jonathan Pryor jonpryor@vt.edu
Tue, 26 Oct 2004 09:28:25 -0400


--=-s9As0fW3pK2EzhHqL1/5
Content-Type: text/plain
Content-Transfer-Encoding: 7bit

On Fri, 2004-10-22 at 15:59, Neale.Ferguson@SoftwareAG-USA.com wrote: 
> I am trying to write a .Net interface to a system of ours. The system
> takes the following structure:
> 
> struct XXXid {
> 	short level;
> 	short size;
> 	char	node[8];
> 	char  user[8];
> 	int	no;
> }
>
> Both user & node a 8 byte character strings padded with blanks and not
> zero terminated. I wanted to code the equivalent as a C# structure.
> Initially I came up with:
> 
> private struct XXXId {
>        [FieldOffset (0)]  public  short  level;
>        [FieldOffset (2)]  public  short  size;
>        [MarshalAs(UnmanagedType.ByValTStr, SizeConst=8)]
>        [FieldOffset (4)]  public  string node;
>        [MarshalAs(UnmanagedType.ByValArray, SizeConst=8)]
>        [FieldOffset (12)] public  string user;
>        [FieldOffset (20)] public  int    pid;
> }

This is bad for two reasons: (1) FieldOffset is evil, and should be
avoided if at all possible (to permit portability between 32-bit and
64-bit platforms), and (2) as you already note below, this doesn't work
since your strings aren't null terminated.

<snip/>

> Now this almost works except that the character strings are zero
> terminated (which is what you'd expect from using ByValTStr I guess).
> I am at a loss as to how I should achieve what I'm after. I thought
> about using ByValArray and defining the fields as byte[] and then
> using 
> 
> sessInit.user  = Encoding.ASCII.GetBytes(user);
> 
> But that leaves me with addresses in those fields in the structure.

I don't see that behavior.  The following structure declaration works
for my sample apps:

	struct XxxId {
	  public short level;
	  public short size;
	  [MarshalAs (UnmanagedType.ByValArray, SizeConst=8)]
	    public byte[] node;
	  [MarshalAs (UnmanagedType.ByValArray, SizeConst=8)]
	    public byte[] user;
	  public int pid;
	}

Usage is thus:

	XxxId = new XxxId ();
	id.node = id.user = Encoding.GetBytes ("the info");

The one thing this doesn't do is space-pad the string; null padding will
be used instead.  You can work around this with a utility function
(untested):

	public static byte[] GetBytes (string s)
	{
		const int max = 8;
		byte[] sb = Encoding.ASCII.GetBytes (s);
		if (sb.Length == max)
			return sb;

		// copy original string
		byte[] r = new byte[max];
		int stop = sb.Length < max ? sb.Length : max;
		for (int i = 0; i < stop; ++i)
			r[i] = sb[i];

		// pad with spaces if necessary
		for (int i = stop; i < max; ++i)
			r[i] = (byte) ' ';
		return r;
	}

The above should truncate strings > 8 characters, and pad strings < 8
characters with spaces.

I've attached the code to my sample app to demonstrate how this
works/how I tested this.

Alternate array marshaling strategies can be found at: 

	http://www.jprl.com/~jon/interop.html#marshaling-arrays

 - Jon


--=-s9As0fW3pK2EzhHqL1/5
Content-Disposition: attachment; filename=uts.c
Content-Type: text/x-csrc; name=uts.c; charset=UTF-8
Content-Transfer-Encoding: 7bit

/* un-terminated strings within structures */

#include <stdio.h>
#include <string.h>

struct XxxId {
  short level;
  short size;
  char  node[8];
  char  user[8];
  int   pid;
};

struct XxxId g_id = {
  1, 2, 
  {'t', 'h', 'e', ' ', 'n', 'o', 'd', 'e'},
  {'t', 'h', 'e', ' ', 'u', 's', 'e', 'r'},
  42
};

void
PrintId (struct XxxId *id)
{
  char node[9], user[9];
  memcpy (node, id->node, 8);
  memcpy (user, id->user, 8);
  node[8] = user[8] = '\0';
  printf ("XxxId: level=%i; size=%i; node='%s'; user='%s'; pid=%i\n",
      id->level, id->size, node, user, id->pid);
}

void
GetMyId (struct XxxId* id)
{
  printf ("Copying XxxId structure...\n");
  printf ("Source: ");
  PrintId (&g_id);
  memcpy (id, &g_id, sizeof(g_id));
}


--=-s9As0fW3pK2EzhHqL1/5
Content-Disposition: attachment; filename=uts.cs
Content-Type: text/x-csharp; name=uts.cs; charset=UTF-8
Content-Transfer-Encoding: 7bit

// Use un-terminated strings...

using System;
using System.Runtime.InteropServices;
using System.Text;

struct XxxId {
  public short level;
  public short size;
#if BAD1
  // Strings need a null terminator, which is missing.
  [MarshalAs (UnmanagedType.ByValTStr, SizeConst=8)]
  public string node;
  [MarshalAs (UnmanagedType.ByValTStr, SizeConst=8)]
  public string user;
#elif BAD_V2_UNSUPPORTED
  // mcs doesn't support C# v2 fixed arrays
  public fixed byte node[8];
  public fixed byte user[8];
#else
  [MarshalAs (UnmanagedType.ByValArray, SizeConst=8)]
    public byte[] node;
  [MarshalAs (UnmanagedType.ByValArray, SizeConst=8)]
    public byte[] user;
#endif
  public int pid;
}

class Test {
  [DllImport ("uts")]
  private static extern void GetMyId (out XxxId id);

  [DllImport ("uts")]
  private static extern void PrintId (ref XxxId id);

  public static void Main (string[] args)
  {
    XxxId id;
    GetMyId (out id);
    string node = Encoding.ASCII.GetString (id.node);
    string user = Encoding.ASCII.GetString (id.user);
    Console.WriteLine ("Managed XxxId: level={0}; size={1}; node='{2}'; " +
      "user='{3}'; pid={4}", id.level, id.size, node, user, id.pid);

    id.node = Encoding.ASCII.GetBytes ("new-node");
    id.user = Encoding.ASCII.GetBytes ("new-user");
    PrintId (ref id);

    id.node = id.user = GetBytes ("really long string");
    PrintId (ref id);

    id.node = id.user = GetBytes ("short");
    PrintId (ref id);
  }

  public static byte[] GetBytes (string s)
  {
    const int max = 8;
    byte[] sb = Encoding.ASCII.GetBytes (s);
    if (sb.Length == max)
      return sb;
    byte[] r = new byte[max];

    // copy original string
    int stop = sb.Length < max ? sb.Length : max;
    for (int i = 0; i < stop; ++i)
      r[i] = sb[i];

    // pad with spaces if necessary
    for (int i = stop; i < max; ++i)
      r[i] = (byte) ' ';
    return r;
  }
}


--=-s9As0fW3pK2EzhHqL1/5--