[Mono-docs-list] Ecma CIL class docs DTD and XML Schema
Per Arneng
pt99par@student.bth.se
Tue, 8 Apr 2003 01:56:24 -0400
--Boundary-00=_ISmk+z3OCv532dV
Content-Type: text/plain;
charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline
Hi!
I found a DTD in a zip file containing the class library documentation
submitted to ecma. Converted it using w3c's little perl script to xml schema.
Best regards
Per Arneng
--Boundary-00=_ISmk+z3OCv532dV
Content-Type: text/x-dtd;
charset="us-ascii";
name="All.dtd"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="All.dtd"
<?xml version="1.0" encoding="UTF-8"?>
<!-- edited with XML Spy v4.4 U (http://www.xmlspy.com) by Rex Jaeschke (private) -->
<!ELEMENT AssemblyCulture (#PCDATA)>
<!ELEMENT AssemblyInfo (AssemblyName, AssemblyPublicKey, AssemblyVersion, AssemblyCulture, Attributes)>
<!ELEMENT AssemblyName (#PCDATA)>
<!ELEMENT AssemblyPublicKey (#PCDATA)>
<!ELEMENT AssemblyVersion (#PCDATA)>
<!ELEMENT Attribute (AttributeName, Excluded, ExcludedTypeName?, ExcludedLibraryName?)>
<!ELEMENT AttributeName (#PCDATA)>
<!ELEMENT Attributes (Attribute*)>
<!ELEMENT Base (BaseTypeName?, ExcludedBaseTypeName?, ExcludedLibraryName?)>
<!ELEMENT BaseTypeName (#PCDATA)>
<!ELEMENT Docs (summary?, altmember?, altcompliant?, param*, returns?, value?, exception*, threadsafe?, remarks?, example?, permission?, example?)>
<!ELEMENT Excluded (#PCDATA)>
<!ELEMENT ExcludedBaseTypeName (#PCDATA)>
<!ELEMENT ExcludedLibrary (#PCDATA)>
<!ELEMENT ExcludedLibraryName (#PCDATA)>
<!ELEMENT ExcludedTypeName (#PCDATA)>
<!ELEMENT Interface (InterfaceName, Excluded)>
<!ELEMENT InterfaceName (#PCDATA)>
<!ELEMENT Interfaces (Interface*)>
<!ELEMENT Libraries (Types+)>
<!ELEMENT Member (MemberSignature+, MemberType, Attributes?, ReturnValue, Parameters, MemberValue?, Docs, Excluded, ExcludedLibrary*)>
<!ATTLIST Member
MemberName NMTOKEN #REQUIRED
>
<!ELEMENT MemberOfLibrary (#PCDATA)>
<!ELEMENT MemberSignature EMPTY>
<!ATTLIST MemberSignature
Language CDATA #REQUIRED
Value CDATA #REQUIRED
>
<!ELEMENT MemberType (#PCDATA)>
<!ELEMENT MemberValue (#PCDATA)>
<!ELEMENT Members (Member*)>
<!ELEMENT PRE EMPTY>
<!ELEMENT Parameter (Attributes?)>
<!ATTLIST Parameter
Name NMTOKEN #REQUIRED
Type CDATA #REQUIRED
>
<!ELEMENT Parameters (Parameter*)>
<!ELEMENT ReturnType (#PCDATA)>
<!ELEMENT ReturnValue (ReturnType?)>
<!ELEMENT SPAN (#PCDATA | para | paramref | SPAN | see | block)*>
<!ELEMENT ThreadingSafetyStatement (#PCDATA)>
<!ELEMENT Type (TypeSignature+, MemberOfLibrary, AssemblyInfo, ThreadingSafetyStatement?, Docs, Base, Interfaces, Attributes?, Members, TypeExcluded)>
<!ATTLIST Type
Name NMTOKEN #REQUIRED
FullName NMTOKEN #REQUIRED
FullNameSP NMTOKEN #REQUIRED
>
<!ELEMENT TypeExcluded (#PCDATA)>
<!ELEMENT TypeSignature EMPTY>
<!ATTLIST TypeSignature
Language CDATA #REQUIRED
Value CDATA #REQUIRED
>
<!ELEMENT Types (Type+)>
<!ATTLIST Types
Library NMTOKEN #REQUIRED
>
<!ELEMENT altcompliant EMPTY>
<!ATTLIST altcompliant
cref CDATA #REQUIRED
>
<!ELEMENT altmember EMPTY>
<!ATTLIST altmember
cref CDATA #REQUIRED
>
<!ELEMENT block (#PCDATA | see | para | paramref | list | block | c | subscript | code | sup | pi)*>
<!ATTLIST block
subset CDATA #REQUIRED
type NMTOKEN #REQUIRED
>
<!ELEMENT c (#PCDATA | para | paramref | code | see)*>
<!ELEMENT code (#PCDATA)>
<!ATTLIST code
lang CDATA #IMPLIED
>
<!ELEMENT codelink EMPTY>
<!ATTLIST codelink
SampleID CDATA #REQUIRED
SnippetID CDATA #REQUIRED
>
<!ELEMENT description (#PCDATA | SPAN | paramref | para | see | c | permille | block | sub)*>
<!ELEMENT example (#PCDATA | para | code | c | codelink | see)*>
<!ELEMENT exception (#PCDATA | paramref | see | para | SPAN | block)*>
<!ATTLIST exception
cref CDATA #REQUIRED
>
<!ELEMENT i (#PCDATA)>
<!ELEMENT item (term, description*)>
<!ELEMENT list (listheader?, item*)>
<!ATTLIST list
type NMTOKEN #REQUIRED
>
<!ELEMENT listheader (term, description+)>
<!ELEMENT onequarter EMPTY>
<!ELEMENT para (#PCDATA | see | block | paramref | c | onequarter | superscript | sup | permille | SPAN | list | pi | theta | sub)*>
<!ELEMENT param (#PCDATA | c | paramref | see | block | para | SPAN)*>
<!ATTLIST param
name CDATA #REQUIRED
>
<!ELEMENT paramref EMPTY>
<!ATTLIST paramref
name CDATA #REQUIRED
>
<!ELEMENT permille EMPTY>
<!ELEMENT permission (#PCDATA | see | paramref | para | block)*>
<!ATTLIST permission
cref CDATA #REQUIRED
>
<!ELEMENT pi EMPTY>
<!ELEMENT pre EMPTY>
<!ELEMENT remarks (#PCDATA | para | block | list | c | paramref | see | pre | SPAN | code | PRE)*>
<!ELEMENT returns (#PCDATA | para | list | paramref | see)*>
<!ELEMENT see EMPTY>
<!ATTLIST see
cref CDATA #IMPLIED
langword CDATA #IMPLIED
qualify CDATA #IMPLIED
>
<!ELEMENT sub (#PCDATA | paramref)*>
<!ELEMENT subscript EMPTY>
<!ATTLIST subscript
term CDATA #REQUIRED
>
<!ELEMENT summary (#PCDATA | para | see | block | list)*>
<!ELEMENT sup (#PCDATA | i | paramref)*>
<!ELEMENT superscript EMPTY>
<!ATTLIST superscript
term CDATA #REQUIRED
>
<!ELEMENT term (#PCDATA | block | see | paramref | para | c | sup | pi | theta)*>
<!ELEMENT theta EMPTY>
<!ELEMENT threadsafe (para+)>
<!ELEMENT value (#PCDATA | para | list | see)*>
--Boundary-00=_ISmk+z3OCv532dV
Content-Type: text/plain;
charset="us-ascii";
name="All.xsd"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="All.xsd"
<schema
xmlns='http://www.w3.org/2000/10/XMLSchema'
targetNamespace='http://www.w3.org/namespace/'
xmlns:t='http://www.w3.org/namespace/'>
<element name='AssemblyCulture'>
<complexType mixed='true'>
</complexType>
</element>
<element name='AssemblyInfo'>
<complexType>
<sequence>
<element ref='t:AssemblyName'/>
<element ref='t:AssemblyPublicKey'/>
<element ref='t:AssemblyVersion'/>
<element ref='t:AssemblyCulture'/>
<element ref='t:Attributes'/>
</sequence>
</complexType>
</element>
<element name='AssemblyName'>
<complexType mixed='true'>
</complexType>
</element>
<element name='AssemblyPublicKey'>
<complexType mixed='true'>
</complexType>
</element>
<element name='AssemblyVersion'>
<complexType mixed='true'>
</complexType>
</element>
<element name='Attribute'>
<complexType>
<sequence>
<element ref='t:AttributeName'/>
<element ref='t:Excluded'/>
<element ref='t:ExcludedTypeName' minOccurs='0' maxOccurs='1'/>
<element ref='t:ExcludedLibraryName' minOccurs='0' maxOccurs='1'/>
</sequence>
</complexType>
</element>
<element name='AttributeName'>
<complexType mixed='true'>
</complexType>
</element>
<element name='Attributes'>
<complexType>
<sequence>
<element ref='t:Attribute' minOccurs='0' maxOccurs='unbounded'/>
</sequence>
</complexType>
</element>
<element name='Base'>
<complexType>
<sequence>
<element ref='t:BaseTypeName' minOccurs='0' maxOccurs='1'/>
<element ref='t:ExcludedBaseTypeName' minOccurs='0' maxOccurs='1'/>
<element ref='t:ExcludedLibraryName' minOccurs='0' maxOccurs='1'/>
</sequence>
</complexType>
</element>
<element name='BaseTypeName'>
<complexType mixed='true'>
</complexType>
</element>
<element name='Docs'>
<complexType>
<sequence>
<element ref='t:summary' minOccurs='0' maxOccurs='1'/>
<element ref='t:altmember' minOccurs='0' maxOccurs='1'/>
<element ref='t:altcompliant' minOccurs='0' maxOccurs='1'/>
<element ref='t:param' minOccurs='0' maxOccurs='unbounded'/>
<element ref='t:returns' minOccurs='0' maxOccurs='1'/>
<element ref='t:value' minOccurs='0' maxOccurs='1'/>
<element ref='t:exception' minOccurs='0' maxOccurs='unbounded'/>
<element ref='t:threadsafe' minOccurs='0' maxOccurs='1'/>
<element ref='t:remarks' minOccurs='0' maxOccurs='1'/>
<element ref='t:example' minOccurs='0' maxOccurs='1'/>
<element ref='t:permission' minOccurs='0' maxOccurs='1'/>
<element ref='t:example' minOccurs='0' maxOccurs='1'/>
</sequence>
</complexType>
</element>
<element name='Excluded'>
<complexType mixed='true'>
</complexType>
</element>
<element name='ExcludedBaseTypeName'>
<complexType mixed='true'>
</complexType>
</element>
<element name='ExcludedLibrary'>
<complexType mixed='true'>
</complexType>
</element>
<element name='ExcludedLibraryName'>
<complexType mixed='true'>
</complexType>
</element>
<element name='ExcludedTypeName'>
<complexType mixed='true'>
</complexType>
</element>
<element name='Interface'>
<complexType>
<sequence>
<element ref='t:InterfaceName'/>
<element ref='t:Excluded'/>
</sequence>
</complexType>
</element>
<element name='InterfaceName'>
<complexType mixed='true'>
</complexType>
</element>
<element name='Interfaces'>
<complexType>
<sequence>
<element ref='t:Interface' minOccurs='0' maxOccurs='unbounded'/>
</sequence>
</complexType>
</element>
<element name='Libraries'>
<complexType>
<sequence>
<element ref='t:Types' maxOccurs='unbounded'/>
</sequence>
</complexType>
</element>
<element name='Member'>
<complexType>
<sequence>
<element ref='t:MemberSignature' maxOccurs='unbounded'/>
<element ref='t:MemberType'/>
<element ref='t:Attributes' minOccurs='0' maxOccurs='1'/>
<element ref='t:ReturnValue'/>
<element ref='t:Parameters'/>
<element ref='t:MemberValue' minOccurs='0' maxOccurs='1'/>
<element ref='t:Docs'/>
<element ref='t:Excluded'/>
<element ref='t:ExcludedLibrary' minOccurs='0' maxOccurs='unbounded'/>
</sequence>
<attribute name='MemberName' type='NMTOKEN' use='required'/>
</complexType>
</element>
<element name='MemberOfLibrary'>
<complexType mixed='true'>
</complexType>
</element>
<element name='MemberSignature'>
<complexType>
<attribute name='Language' type='string' use='required'/>
<attribute name='Value' type='string' use='required'/>
</complexType>
</element>
<element name='MemberType'>
<complexType mixed='true'>
</complexType>
</element>
<element name='MemberValue'>
<complexType mixed='true'>
</complexType>
</element>
<element name='Members'>
<complexType>
<sequence>
<element ref='t:Member' minOccurs='0' maxOccurs='unbounded'/>
</sequence>
</complexType>
</element>
<element name='PRE'>
<complexType/>
</element>
<element name='Parameter'>
<complexType>
<sequence>
<element ref='t:Attributes' minOccurs='0' maxOccurs='1'/>
</sequence>
<attribute name='Name' type='NMTOKEN' use='required'/>
<attribute name='Type' type='string' use='required'/>
</complexType>
</element>
<element name='Parameters'>
<complexType>
<sequence>
<element ref='t:Parameter' minOccurs='0' maxOccurs='unbounded'/>
</sequence>
</complexType>
</element>
<element name='ReturnType'>
<complexType mixed='true'>
</complexType>
</element>
<element name='ReturnValue'>
<complexType>
<sequence>
<element ref='t:ReturnType' minOccurs='0' maxOccurs='1'/>
</sequence>
</complexType>
</element>
<element name='SPAN'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:para'/>
<element ref='t:paramref'/>
<element ref='t:SPAN'/>
<element ref='t:see'/>
<element ref='t:block'/>
</choice>
</complexType>
</element>
<element name='ThreadingSafetyStatement'>
<complexType mixed='true'>
</complexType>
</element>
<element name='Type'>
<complexType>
<sequence>
<element ref='t:TypeSignature' maxOccurs='unbounded'/>
<element ref='t:MemberOfLibrary'/>
<element ref='t:AssemblyInfo'/>
<element ref='t:ThreadingSafetyStatement' minOccurs='0' maxOccurs='1'/>
<element ref='t:Docs'/>
<element ref='t:Base'/>
<element ref='t:Interfaces'/>
<element ref='t:Attributes' minOccurs='0' maxOccurs='1'/>
<element ref='t:Members'/>
<element ref='t:TypeExcluded'/>
</sequence>
<attribute name='Name' type='NMTOKEN' use='required'/>
<attribute name='FullName' type='NMTOKEN' use='required'/>
<attribute name='FullNameSP' type='NMTOKEN' use='required'/>
</complexType>
</element>
<element name='TypeExcluded'>
<complexType mixed='true'>
</complexType>
</element>
<element name='TypeSignature'>
<complexType>
<attribute name='Language' type='string' use='required'/>
<attribute name='Value' type='string' use='required'/>
</complexType>
</element>
<element name='Types'>
<complexType>
<sequence>
<element ref='t:Type' maxOccurs='unbounded'/>
</sequence>
<attribute name='Library' type='NMTOKEN' use='required'/>
</complexType>
</element>
<element name='altcompliant'>
<complexType>
<attribute name='cref' type='string' use='required'/>
</complexType>
</element>
<element name='altmember'>
<complexType>
<attribute name='cref' type='string' use='required'/>
</complexType>
</element>
<element name='block'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:see'/>
<element ref='t:para'/>
<element ref='t:paramref'/>
<element ref='t:list'/>
<element ref='t:block'/>
<element ref='t:c'/>
<element ref='t:subscript'/>
<element ref='t:code'/>
<element ref='t:sup'/>
<element ref='t:pi'/>
</choice>
<attribute name='subset' type='string' use='required'/>
<attribute name='type' type='NMTOKEN' use='required'/>
</complexType>
</element>
<element name='c'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:para'/>
<element ref='t:paramref'/>
<element ref='t:code'/>
<element ref='t:see'/>
</choice>
</complexType>
</element>
<element name='code'>
<complexType mixed='true'>
<attribute name='lang' type='string' use='optional'/>
</complexType>
</element>
<element name='codelink'>
<complexType>
<attribute name='SampleID' type='string' use='required'/>
<attribute name='SnippetID' type='string' use='required'/>
</complexType>
</element>
<element name='description'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:SPAN'/>
<element ref='t:paramref'/>
<element ref='t:para'/>
<element ref='t:see'/>
<element ref='t:c'/>
<element ref='t:permille'/>
<element ref='t:block'/>
<element ref='t:sub'/>
</choice>
</complexType>
</element>
<element name='example'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:para'/>
<element ref='t:code'/>
<element ref='t:c'/>
<element ref='t:codelink'/>
<element ref='t:see'/>
</choice>
</complexType>
</element>
<element name='exception'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:paramref'/>
<element ref='t:see'/>
<element ref='t:para'/>
<element ref='t:SPAN'/>
<element ref='t:block'/>
</choice>
<attribute name='cref' type='string' use='required'/>
</complexType>
</element>
<element name='i'>
<complexType mixed='true'>
</complexType>
</element>
<element name='item'>
<complexType>
<sequence>
<element ref='t:term'/>
<element ref='t:description' minOccurs='0' maxOccurs='unbounded'/>
</sequence>
</complexType>
</element>
<element name='list'>
<complexType>
<sequence>
<element ref='t:listheader' minOccurs='0' maxOccurs='1'/>
<element ref='t:item' minOccurs='0' maxOccurs='unbounded'/>
</sequence>
<attribute name='type' type='NMTOKEN' use='required'/>
</complexType>
</element>
<element name='listheader'>
<complexType>
<sequence>
<element ref='t:term'/>
<element ref='t:description' maxOccurs='unbounded'/>
</sequence>
</complexType>
</element>
<element name='onequarter'>
<complexType/>
</element>
<element name='para'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:see'/>
<element ref='t:block'/>
<element ref='t:paramref'/>
<element ref='t:c'/>
<element ref='t:onequarter'/>
<element ref='t:superscript'/>
<element ref='t:sup'/>
<element ref='t:permille'/>
<element ref='t:SPAN'/>
<element ref='t:list'/>
<element ref='t:pi'/>
<element ref='t:theta'/>
<element ref='t:sub'/>
</choice>
</complexType>
</element>
<element name='param'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:c'/>
<element ref='t:paramref'/>
<element ref='t:see'/>
<element ref='t:block'/>
<element ref='t:para'/>
<element ref='t:SPAN'/>
</choice>
<attribute name='name' type='string' use='required'/>
</complexType>
</element>
<element name='paramref'>
<complexType>
<attribute name='name' type='string' use='required'/>
</complexType>
</element>
<element name='permille'>
<complexType/>
</element>
<element name='permission'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:see'/>
<element ref='t:paramref'/>
<element ref='t:para'/>
<element ref='t:block'/>
</choice>
<attribute name='cref' type='string' use='required'/>
</complexType>
</element>
<element name='pi'>
<complexType/>
</element>
<element name='pre'>
<complexType/>
</element>
<element name='remarks'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:para'/>
<element ref='t:block'/>
<element ref='t:list'/>
<element ref='t:c'/>
<element ref='t:paramref'/>
<element ref='t:see'/>
<element ref='t:pre'/>
<element ref='t:SPAN'/>
<element ref='t:code'/>
<element ref='t:PRE'/>
</choice>
</complexType>
</element>
<element name='returns'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:para'/>
<element ref='t:list'/>
<element ref='t:paramref'/>
<element ref='t:see'/>
</choice>
</complexType>
</element>
<element name='see'>
<complexType>
<attribute name='cref' type='string' use='optional'/>
<attribute name='langword' type='string' use='optional'/>
<attribute name='qualify' type='string' use='optional'/>
</complexType>
</element>
<element name='sub'>
<complexType mixed='true'>
<sequence minOccurs='0' maxOccurs='unbounded'>
<element ref='t:paramref'/>
</sequence>
</complexType>
</element>
<element name='subscript'>
<complexType>
<attribute name='term' type='string' use='required'/>
</complexType>
</element>
<element name='summary'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:para'/>
<element ref='t:see'/>
<element ref='t:block'/>
<element ref='t:list'/>
</choice>
</complexType>
</element>
<element name='sup'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:i'/>
<element ref='t:paramref'/>
</choice>
</complexType>
</element>
<element name='superscript'>
<complexType>
<attribute name='term' type='string' use='required'/>
</complexType>
</element>
<element name='term'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:block'/>
<element ref='t:see'/>
<element ref='t:paramref'/>
<element ref='t:para'/>
<element ref='t:c'/>
<element ref='t:sup'/>
<element ref='t:pi'/>
<element ref='t:theta'/>
</choice>
</complexType>
</element>
<element name='theta'>
<complexType/>
</element>
<element name='threadsafe'>
<complexType>
<sequence>
<element ref='t:para' maxOccurs='unbounded'/>
</sequence>
</complexType>
</element>
<element name='value'>
<complexType mixed='true'>
<choice minOccurs='0' maxOccurs='unbounded'>
<element ref='t:para'/>
<element ref='t:list'/>
<element ref='t:see'/>
</choice>
</complexType>
</element>
</schema>
--Boundary-00=_ISmk+z3OCv532dV
Content-Type: text/x-perl;
charset="us-ascii";
name="dtd2xsd.pl"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="dtd2xsd.pl"
#! perl
#
# by Dan Connolly http://www.w3.org/People/Connolly/ connolly@w3.org
# Bert Bos http://www.w3.org/People/Bos/ <bert@w3.org>
# Yuichi Koike
# Mary Holstege (holstege@mathling.com)
# initial hack by DC Apr 2000, based on dtd2bnf by BB Mar 1998;
# major revision to Apr 2000 make it actually usable by YK;
# tweaks by DC; major update Jan 2001 by MH
#
# see Log since then at end.
# $Id: dtd2xsd.pl,v 1.17 2001/01/19 05:59:12 connolly Exp $
use strict;
# Handling command line argument
my $targetNS = "http://www.w3.org/namespace/";
my $prefix = "t";
my $alias = 0;
my $file = "";
my %SimpleTypes;
my @AttrGroupPatterns;
my @ModelGroupPatterns;
my @SubstitutionGroupPatterns;
my %SubstitutionGroup;
my %Mixed;
my %ModelGroup;
my $mapping_file;
my $pcdata_flag = 0;
my $pcdata_simpletype = "string";
my $debug = 0;
while ($#ARGV >= 0) {
my $para = shift(@ARGV);
if ($para eq "-ns") {
$targetNS = shift(@ARGV);
} elsif ($para eq "-prefix") {
$prefix = shift(@ARGV);
} elsif ($para eq "-alias") {
$alias = 1;
} elsif ($para eq "-pcdata") {
# Treat #PCDATA by itself as being string (or other simple type
# if so designated in the mapping file)
$pcdata_flag = 1;
} elsif ($para eq "-mapfile") {
$mapping_file = shift(@ARGV);
} elsif ($para eq "-simpletype") {
my($pat) = shift(@ARGV);
my($b) = shift(@ARGV);
$SimpleTypes{$pat} = $b;
} elsif ($para eq "-attrgroup") {
push(@AttrGroupPatterns, shift(@ARGV));
} elsif ($para eq "-modelgroup") {
push(@ModelGroupPatterns, shift(@ARGV));
} elsif ($para eq "-substgroup") {
push(@SubstitutionGroupPatterns, shift(@ARGV));
} elsif ($para eq "-debug") {
$debug = 1;
} else {
$file = $para;
}
}
# Alias dictionary: defaults
my %alias_dic;
$alias_dic{"URI"} = "uriReference";
$alias_dic{"LANG"} = "language";
$alias_dic{"NUMBER"} = "nonNegativeInteger";
$alias_dic{"Date"} = "date";
$alias_dic{"Boolean"} = "boolean";
if ( $mapping_file )
{
print STDERR "Open mapping $mapping_file ";
if ( !open( MAPPINGS, "<$mapping_file" ) )
{
print STDERR "unsuccessful.\n";
}
else {
print STDERR "successful.\n";
while ( <MAPPINGS> ) {
chop;
if ( /^alias\s+([^ \t]+)\s*=\s*([^ \t]+)\s*/i ) {
$alias_dic{$1} = $2;
}
elsif ( /^simpletype\s+([^ \t]+)\s*=\s*([^ \t]+)\s*/i ) {
$SimpleTypes{$1} = $2;
}
elsif ( /^attrgroup\s+([^ \t]+)\s*/i ) {
push( @AttrGroupPatterns, $1 );
}
elsif ( /^modelgroup\s+([^ \t]+)\s*/i ) {
push( @ModelGroupPatterns, $1 );
}
elsif ( /^substgroup\s+([^ \t]+)\s*/i ) {
push( @SubstitutionGroupPatterns, $1 );
}
elsif ( /^pcdata\s+([^ \t]+)\s*/i ) {
## BUGLET: doesn't pay attention to prefix; just a special alias
$pcdata_simpletype = $1;
}
}
}
foreach my $key (keys(%alias_dic))
{
print STDERR "Alias \%$key to $alias_dic{$key}\n"
}
}
# Variable declaration
my $linelen = 72;
my $PROG = substr($0, rindex($0, "/") + 1);
my $USAGE = "Usage: $PROG file\n";
my $str = "(?:\"([^\"]*)\"|\'([^\']*)\')";
my %pent; # Parameter entities
my %attributes; # Attribute lists
my @element; # Elements in source order
my %model; # Content models
# Main
$/ = undef;
# Open file, remove comment and include external entity
my $buf = openFile($file);
# Alias treatment
my $alias_ident = "_alias_";
if ($alias eq 1) {
foreach my $key (keys(%alias_dic)) {
my $aliaskey = sprintf("%s%s%s", $alias_ident, $key, $alias_ident);
$buf =~ s/\%$key;/$aliaskey/gsie;
}
}
# store all parameter entities
while ($buf =~ s/<!ENTITY\s+%\s+(\S+)\s+$str\s*>//sie) {
my($n, $repltext) = ($1, $2.$3);
my ($pat);
next if $pent{$n}; # only the first declaration of an entity counts
foreach $pat (keys %SimpleTypes){
if ($n =~ /^$pat$/){
$buf .= " <!_DATATYPE $n $SimpleTypes{$pat} $repltext> ";
$pent{$n} = "#DATATYPEREF $n";
undef $n;
last;
}
}
foreach $pat (@AttrGroupPatterns){
if ($n =~ /^$pat$/){
$buf .= " <!_ATTRGROUP $n $repltext> ";
$pent{$n} = "#ATTRGROUPREF $n";
undef $n;
last;
}
}
foreach $pat (@ModelGroupPatterns){
if ($n =~ /^$pat$/){
$buf .= " <!_MODELGROUP $n $repltext> ";
$pent{$n} = "#MODELGROUPREF $n";
undef $n;
last;
}
}
foreach $pat (@SubstitutionGroupPatterns){
if ($n =~ /^$pat$/){
$buf .= " <!_SUBSTGROUP $n $repltext> ";
$pent{$n} = "#SUBSTGROUPREF $n";
undef $n;
last;
}
}
$pent{$n}=$repltext if $n;
}
# remove all general entities
$buf =~ s/<!ENTITY\s+.*?>//gsie;
# loop until parameter entities fully expanded
my $i;
do {
# count # of substitutions
$i = 0;
# expand parameter entities
$buf =~ s/%([a-zA-Z0-9_\.-]+);?/$i++,$pent{$1}/gse;
} while ($i != 0);
# treat conditional sections
while($buf =~ s/<!\[\s*?INCLUDE\s*?\[(.*)\]\]>/\1/gsie) {};
while($buf =~ s/<!\[\s*?IGNORE\s*?\[.*\]\]>//gsie) {};
# store attribute lists
$buf =~ s/<!ATTLIST\s+(\S+)\s+(.*?)>/store_att($1, $2)/gsie;
# store content models
$buf =~ s/<!ELEMENT\s+(\S+)\s+(.+?)>/store_elt($1, $2)/gsie;
#print "<?xml version='1.0'?>\n";
print "<schema
xmlns='http://www.w3.org/2000/10/XMLSchema'
targetNamespace='$targetNS'
xmlns:$prefix='$targetNS'>\n";
# find maximum length of non-terminals
#my $maxlen = max(map(length, @element)) + 4;
# write simple type declarations
$buf =~ s/<!_DATATYPE\s+(\S+)\s+(\S+)\s+(.+?)>/write_simpleType($1, $2, $3)/gsie;
# write attribute groups
$buf =~ s/<!_ATTRGROUP\s+(\S+)\s+(.+?)>/write_attrGroup($1, $2)/gsie;
# write model groups
$buf =~ s/<!_MODELGROUP\s+(\S+)\s+(.+?)>/write_modelGroup($1, $2)/gsie;
# write subsitution groups
$buf =~ s/<!_SUBSTGROUP\s+(\S+)\s+(.+?)>/write_substitutionGroup($1, $2)/gsie;
my($e);
# loop over elements, writing XML schema
foreach $e (@element) {
my $h = $model{$e};
my $h2 = $attributes{$e};
my @model = @$h;
my $isSimple = ($pcdata_flag eq 1) && ($model[1] eq '#PCDATA') &&
( ($#model eq 2) ||
( ($#model eq 3) && ($model[3] eq '*') ) );
my $substGroup = $SubstitutionGroup{$e};
if ( $substGroup )
{
$substGroup = " substitutionGroup='$substGroup'";
}
# print rule for element $e
if ( $isSimple && ! $h2 )
{
# Assume (#PCDATA) is string
print "\n <element name='$e' type='$pcdata_simpletype'$substGroup>\n";
}
else {
print "\n <element name='$e'$substGroup>\n";
}
if ( $isSimple )
{
# Assume (#PCDATA) is string
if ( $h2 )
{
print " <complexType>\n";
print " <simpleContent>\n";
print " <extension base='string'>\n";
}
}
else {
# print rule for $e's content model
print " <complexType";
if ($model[0] eq 'EMPTY') {
if (! $h2 ) {
print "/>\n";
} else {
print ">\n";
}
}
elsif ( $model[0] eq 'ANY' )
{
print ">\n";
print " <sequence>\n";
print " <any namespace='$targetNS'/>\n";
print " </sequence>\n";
}
else {
if ( $debug eq 1 ) {
print STDERR "==mixed? @model\n"; #@@
}
if (&isMixed(@model)) {
print " mixed='true'>\n";
}
else {
print ">\n";
}
my @list = &makeChildList('', @model);
&printChildList(3, @list);
}
}
# print rule for $e's attributes
if (! $h2) {
# nothing
} else {
&printAttrDecls(@$h2);
if ( $isSimple ) {
print " </extension>\n";
print " </simpleContent>\n";
}
}
if ( !$h2 && $isSimple ) {
# Do nothing
}
elsif ($h2 || $model[0] ne 'EMPTY') {
print " </complexType>\n";
}
print " </element>\n";
}
print "</schema>\n";
exit;
sub printSpace
{
my ($num) = $_[0];
for (my $i=0; $i<$num; $i++) {
print " ";
}
}
sub printChildList
{
my ($num, @list) = @_;
my @currentTag = ();
for (my $i=0; $i<= $#list; $i++) {
my $n = $list[$i];
if ($n eq 0 || $n eq 1 || $n eq 2 || $n eq 3) {
if ( ($pcdata_flag eq 0) && ($n eq 0 || $n eq 1) && $list[$i+1] eq 20)
{
# The whole list is 0 20 or 1 20; i.e. (#PCDATA) or (#PCDATA)*.
# Don't generate a sequence child; mixed handles all this.
}
else {
# my $do_it_flag = 1;
if ( $currentTag[$#currentTag] eq "" && $n eq 0 )
{
push(@currentTag, "");
# my $n_1 = $list[$i+1];
# if ( $n_1 eq 10 || $n_1 eq 11 || $n_1 eq 12 || $n_1 eq 13 )
# {
# # do nothing: we have a phantom sequence wrapping a choice
# # that we want to not want to appear. OTOH we want a top
# # level sequence in other cases.
# $do_it_flag = 0;
# }
}
# if ( $do_it_flag eq 1 )
# {
printSpace($num); $num++;
print "<sequence";
if ($n eq 1) {
print " minOccurs='0' maxOccurs='unbounded'";
} elsif ($n eq 2) {
print " maxOccurs='unbounded'";
} elsif ($n eq 3) {
print " minOccurs='0' maxOccurs='1'";
}
print ">\n";
push(@currentTag, "sequence");
}
#}
} elsif ($n eq 10 || $n eq 11 || $n eq 12 || $n eq 13) {
printSpace($num); $num++;
print "<choice";
if ($n eq 11) {
print " minOccurs='0' maxOccurs='unbounded'";
} elsif ($n eq 12) {
print " maxOccurs='unbounded'";
} elsif ($n eq 13) {
print " minOccurs='0' maxOccurs='1'";
}
print ">\n";
push(@currentTag, "choice");
} elsif ($n eq 20) {
my $tag = pop(@currentTag);
if ($tag ne "") {
$num--; printSpace($num);
print "</", $tag, ">\n";
}
} else {
printSpace($num);
if ($n eq '#MODELGROUPREF') {
print "<group ref='$prefix:$list[++$i]'";
}
elsif ($n eq '#SUBSTGROUPREF') {
print "<element ref='$prefix:$list[++$i]'";
} else {
print "<element ref='$prefix:$n'";
}
if ($currentTag[$#currentTag] ne "choice") {
if ($list[$i+1] eq "+") {
print " maxOccurs='unbounded'";
$i++;
} elsif ($list[$i+1] eq "?") {
print " minOccurs='0' maxOccurs='1'";
$i++;
} elsif ($list[$i+1] eq "*") {
print " minOccurs='0' maxOccurs='unbounded'";
$i++;
}
}
print "/>\n";
}
}
}
sub makeChildList {
my ($groupName, @model) = @_;
my @ret = ();
my @brace = ();
for (my $i=0; $i<=$#model; $i++) {
my $n = $model[$i];
if ($n eq "(") {
push(@ret, 0);
push(@brace, $#ret);
} elsif ($n eq ")") {
if ($model[$i+1] eq "*") {
$ret[$brace[$#brace]] += 1;
$i++;
} elsif ($model[$i+1] eq "+") {
$ret[$brace[$#brace]] += 2;
$i++;
} elsif ($model[$i+1] eq "?") {
$ret[$brace[$#brace]] += 3;
$i++;
}
pop(@brace);
push(@ret, 20);
} elsif ($n eq ",") {
$ret[$brace[$#brace]] = 0;
} elsif ($n eq "|") {
$ret[$brace[$#brace]] = 10;
} elsif ($n eq "#PCDATA") {
if ($model[$i+1] eq "|") {
$i++;
}
if($groupName){
$Mixed{$groupName} = 1;
}
} else {
push(@ret, $n);
}
}
# "( ( a | b | c )* )" gets mapped to "0 10 a b c 20 20" which will generate
# a spurious sequence element. This is not too harmful when this is an
# element content model, but with model groups it is incorrect.
# In general we need to strip off 0 20 from the ends when it is redundant.
# Redundant means: there is some other group that bounds the whole list.
# Note that it gets a little tricky:
# ( (a|b),(c|d) ) gets mapped to "0 10 a b 20 10 c d 20 20". If one
# naively chops off the 0 and 20 on the groups that there is a 10 on one
# end and a 20 on the other, one loses the bounding sequence, which is
# required in this case.
#
if ( $ret[0] eq 0 && $ret[$#ret] eq 20 && $ret[$#ret-1] eq 20 &&
( $ret[1] eq 0 || $ret[1] eq 1 || $ret[1] eq 2 || $ret[1] eq 3 ||
$ret[1] eq 10 || $ret[1] eq 11 || $ret[1] eq 12 || $ret[1] eq 13 )
)
{
# OK, it is possible that the 0 20 is redundant. Now scan for balance:
# All interim 20 between the proposed new start and the proposed new
# final one should be at level 1 or above.
my $depth = 0;
my $redundant_paren = 1; # Assume redundant until proved otherwise
for ( my $i = 1; $i <= $#ret-1; $i++ )
{
if ( $ret[$i] eq 20 )
{
$depth--;
if ( $i < $#ret-1 && $depth < 1 )
{
$redundant_paren = 0;
print STDERR "i=$i,depth=$depth\n";
}
}
elsif ( $ret[$i] eq 0 ||
$ret[$i] eq 1 ||
$ret[$i] eq 2 ||
$ret[$i] eq 3 ||
$ret[$i] eq 10 ||
$ret[$i] eq 11 ||
$ret[$i] eq 12 ||
$ret[$i] eq 13
)
{
$depth++;
}
} # for
if ( $redundant_paren eq 1 )
{
print STDERR "Truncating @ret\n";
@ret = @ret[1..$#ret-1];
}
}
if ( $debug eq 1 ) {
print STDERR "@model to @ret\n";
}
return @ret;
}
sub printAttrDecls{
my @atts = @_;
for (my $i = 0; $i <= $#atts; $i++) {
if ($atts[$i] eq '#ATTRGROUPREF'){
print " <attributeGroup ref='$prefix:$atts[$i+1]'/>\n";
$i ++;
} else {
# attribute name
print " <attribute name='$atts[$i]'";
# attribute type
my @enume;
$i++;
if ($atts[$i] eq "(") {
# like `attname ( yes | no ) #REQUIRED`
$i++;
while ($atts[$i] ne ")") {
if ($atts[$i] ne "|") {
push(@enume, $atts[$i]);
}
$i++;
}
} elsif ($atts[$i] eq '#DATATYPEREF'){
print " type='$prefix:$atts[++$i]'";
} elsif ($alias eq 1 && $atts[$i] =~ s/$alias_ident//gsie) {
# alias special
print " type='$alias_dic{$atts[$i]}'";
} elsif ($atts[$i] =~ /ID|IDREF|ENTITY|NOTATION|IDREFS|ENTITIES|NMTOKEN|NMTOKENS/) {
# common type for DTD and Schema
print " type='$atts[$i]'";
} else {
# `attname CDATA #REQUIRED`
print " type='string'";
}
$i++;
# #FIXED
if($atts[$i] eq "#FIXED") {
$i++;
print " use='fixed' value='$atts[$i]'/>\n";
} else {
# minOccurs
if ($atts[$i] eq "#REQUIRED") {
print " use='required'";
} elsif ($atts[$i] eq "#IMPLIED") {
print " use='optional'";
} else {
print " use='default' value='$atts[$i]'";
}
# enumerate
if ($#enume eq -1) {
print "/>\n";
} else {
print ">\n";
print " <simpleType>\n";
print " <restriction base='string'>\n";
&write_enum(@enume);
print " </restriction>\n";
print " </simpleType>\n";
print " </attribute>\n";
}
}
}
}
}
sub write_enum{
my(@enume) = @_;
for (my $j = 0; $j <= $#enume; $j++) {
print " <enumeration value='$enume[$j]'/>\n";
}
}
# Parse a string into an array of "words".
# Words are whitespace-separated sequences of non-whitespace characters,
# or quoted strings ("" or ''), with the quotes removed.
# HACK: added () stuff for attlist stuff
# Parse words for attribute list
sub parsewords {
my $line = $_[0];
$line =~ s/(\(|\)|\|)/ $1 /g;
my @words = ();
while ($line ne '') {
if ($line =~ /^\s+/) {
# Skip whitespace
} elsif ($line =~ /^\"((?:[^\"]|\\\")*)\"/) {
push(@words, $1);
} elsif ($line =~ /^\'((?:[^\']|\\\')*)\'/) {
push(@words, $1);
} elsif ($line =~ /^\S+/) {
push(@words, $&);
} else {
die "Cannot happen\n";
}
$line = $';
}
return @words;
}
# Store content model, return empty string
sub store_elt
{
my ($name, $model) = @_;
$model =~ s/\s+/ /gso;
push(@element, $name);
my @words;
while ($model =~ s/^\s*(\(|\)|,|\+|\?|\||[\w_\.-]+|\#\w+|\*)//) {
push(@words, $1);
};
$model{$name} = [ @words ];
return '';
}
# Store attribute list, return empty string
sub store_att
{
my ($element, $atts) = @_;
my @words = parsewords($atts);
$attributes{$element} = [ @words ];
return '';
}
sub write_simpleType{
my($n, $b, $stuff) = @_;
my @words = parsewords($stuff);
print "\n <simpleType name='$n'>\n";
print " <restriction base='$b'>\n";
# print STDERR "\n==stuff:\n$stuff \n\n===\n", join('|', @words);
my $i = 0;
my @enume;
if ($words[$i] eq "(") {
$i++;
while ($words[$i] ne ")") {
if ($words[$i] ne "|") {
push(@enume, $words[$i]);
}
$i++;
}
write_enum(@enume);
}
print " </restriction>\n";
print " </simpleType>\n";
}
sub write_attrGroup{
my($n, $stuff) = @_;
my @words = parsewords($stuff);
print "\n <attributeGroup name='$n'>\n";
# print STDERR "\n==stuff:\n$stuff \n\n===\n", join('|', @words);
printAttrDecls(@words);
print " </attributeGroup>\n";
}
sub write_modelGroup{
my($n, $stuff) = @_;
my @words = parsewords($stuff);
print "\n <group name='$n'>\n";
print "<!-- $stuff -->\n";
my @list = &makeChildList($n, '(', @words, ')');
&printChildList(3, @list);
$ModelGroup{$n} = \@list;
print " </group>\n";
}
sub write_substitutionGroup
{
my($n, $stuff) = @_;
my @words = parsewords($stuff);
print "\n <element name='$n' abstract='true'>\n";
my @list = &makeChildList($n, '(', @words, ')');
for ( my $i = 0; $i < $#list; $i++ )
{
$SubstitutionGroup{ $list[$i] } = $n;
}
print " </element>\n";
}
sub isMixed{
my(@model) = @_;
my $isSimple = ($pcdata_flag eq 1) && ($model[1] eq '#PCDATA') &&
( ($#model eq 2) ||
( ($#model eq 3) && ($model[3] eq '*') ) );
if ( $debug eq 1 ) {
print STDERR "++ mixed? @model\n"; #@@
}
if ( $isSimple )
{
if ( $debug eq 1 )
{
print STDERR "++ no; simple type. @model\n"; #@@
}
return 0;
}
my($i);
for ($i = 0; $i <= $#model; $i++) {
if ( $model[$i] eq '#PCDATA' ||
($model[$i] eq '#MODELGROUPREF' && $Mixed{$model[$i+1]}) ||
($model[$i] eq '#SUBSTGROUPREF' && $Mixed{$model[$i+1]}) )
{
if ( $debug eq 1 ) {
print STDERR "++ yes! $i @model\n"; #@@
}
return 1;
}
}
if ( $debug eq 1 ) {
print STDERR "++ no. @model\n"; #@@
}
return 0;
}
# Return maximum value of an array of numbers
sub max
{
my $max = $_[0];
foreach my $i (@_) {
if ($i > $max) {$max = $i;}
}
return $max;
}
# 1) Open file
# 2) Remove comment, processing instructions, and general entities
# 3) Include external parameter entities recursively
# 4) Return the contents of opened file
sub openFile {
my $file = $_[0];
my %extent;
my $bufbuf;
if ($file ne "") {
print STDERR "open $file ";
if(! open AAA, $file) {
print STDERR " failed!!\n";
return "";
}
print STDERR " successful\n";
$bufbuf = <AAA>;
} else {
print STDERR "open STDIN successful\n";
$bufbuf = <>;
}
# remove comments
$bufbuf =~ s/<!--.*?-->//gso;
# remove processing instructions
$bufbuf =~ s/<\?.*?>//gso;
# store external parameter entities
while ($bufbuf =~ s/<!ENTITY\s+%\s+(\S+)\s+PUBLIC\s+$str\s+$str.*?>//sie) {
$extent{$1} = $4.$5;
}
while ($bufbuf =~ s/<!ENTITY\s+%\s+(\S+)\s+SYSTEM\s+$str.*?>//sie) {
$extent{$1} = $2.$3;
}
# read external entity files
foreach my $key (keys(%extent)) {
$bufbuf =~ s/%$key;/openFile($extent{$key})/gsie;
}
return $bufbuf;
}
# $Log: dtd2xsd.pl,v $
# Revision 1.17 2001/01/19 05:59:12 connolly
# more changelog stuff; link to MH's announcement etc.
#
# Revision 1.16 2001/01/19 05:55:56 connolly
# added Log at end
#
# Changes: 2001/01/10
# Date: Thu, 11 Jan 2001 14:51:44 -0800
# From: Mary Holstege <holstege@mathling.com>
# To: xml-dev@lists.xml.org
# Subject: [ANN] Updated version of DTD to XML Schema tool
# http://lists.xml.org/archives/xml-dev/200101/msg00481.html
# http://www.mathling.com/xmlschema/
# Switch to CR syntax
# Support external mapping file for type aliases, simple types, model and
# attribute groups
# Map ANY correctly to wildcard rather than element 'ANY'
# Support treating lead PCDATA as string or other aliased simple type instead
# of as mixed content (may be more appropriate for data-oriented DTDs)
# e.g. <!ELEMENT title (#PCDATA)> => <element name="title" type="string"/>
# Support subsitution groups.
--Boundary-00=_ISmk+z3OCv532dV--