Plateau dtd

PLATEAU.DTD
used to encode the e-MED

<!-- May 1999 revision of the TEI-like 'TEIish' dtd -->
<!-- designed to take e-MED coding to a new plateau -->
<!-- with further revisions as noted in comments    -->

<!-- FIRST DRAFT 4May1999 / pfs -->
<!-- Revised Oct 1999 / pfs     -->

<!ELEMENT  text	- - (body) +(lb|hi)>

<!ELEMENT  body	- - (entryfree)+ >

<!ELEMENT  entryfree    - - (#PCDATA | form | etym | sense | sensegrp | usg | note | xr)* >
<!ATTLIST  entryfree	id  ID #IMPLIED
                             seq  CDATA #IMPLIED
                             dub  (y|n) #IMPLIED
                            type  (xref|affx|main) #IMPLIED
                             new  CDATA #IMPLIED
                             rev  CDATA #IMPLIED >

<!-- entryfree: name inherited from TEI dictionaries module. Each entryfree contains either  -->
<!-- a 'real' entry or a block of cross-references (the latter inherited from legacy data    -->
<!-- and stripped before indexing)                                                           -->

<!-- dwr added usg to content model to handle syntactic usage information living between the -->
<!--  etym and first sense and applying to entire entry, 1997-07-21                          -->
<!-- pfs added note to content model, following MED, to handle "Cp...." notes after (or in)  -->
<!--  form sections. TEI equivalence dictates replacing this eventually with XR element      -->
<!-- plateau added dub (dubious=queried entries), type (following TEI), and new/rev          -->
<!--  (added/revision date) attributes, though none of these is yet used                     -->
<!-- pfs added seq attribute to allow automatic assignment of sequence to MED entries Oct 99 -->
<!-- ======================================================================================= -->

<!ELEMENT  form	- - (#pcdata | hdorth | orth | pos | lbl)* >

<!-- form: contains the MED 'form section' that runs from headword through spelling variants -->
<!-- plateau added hdorth and lbl to content model                                           -->
<!-- ======================================================================================= -->

<!ELEMENT  orth	- - (#pcdata) >
<!ATTLIST  orth	id   ID    #IMPLIED
                   norm  CDATA #IMPLIED
                   split CDATA #IMPLIED
                   exp   CDATA #IMPLIED >

<!-- orth: contains spelling form (as well as some layered phonological info, which is       -->
<!-- not separately tagged at present and probably never will be).                           -->

<!-- plateau added last three attributes to handle possible form expansion in attributes,    -->
<!-- using TEI 'editorial view' to expand compressed information.                            -->
<!-- ======================================================================================= -->

<!ELEMENT  hdorth	- - (#pcdata) >
<!ATTLIST  hdorth	id   ID    #IMPLIED 
                   norm  CDATA #IMPLIED
                   split CDATA #IMPLIED
                   exp   CDATA #IMPLIED >

<!-- hdorth: added by plateau as optional way to handle headwords. Currently tagged simply   -->
<!-- as orth.                                                                                -->
<!-- plateau added last three attributes to handle possible form expansion in attributes,    -->
<!-- using TEI 'editorial view' to expand compressed information                             -->
<!-- ======================================================================================= -->

<!ELEMENT  pos	- - (#pcdata) >
<!ATTLIST  pos    exp CDATA #IMPLIED
                  n   CDATA #IMPLIED  >

<!-- pos: part of speech label, borrowed from TEI but not really equivalent.                 -->
<!-- plateau added attributes exp (to expand abbreviated POS) and n                          -->
<!-- (to code numerators, e.g. the "2" in "n.(2)") if desired.                               -->
<!-- note that MEC uses pos as rough equivalent to TEI gramgrp element, i.e. groups          -->
<!-- grammatical information related to lemma                                                -->
<!-- ======================================================================================= -->

<!ELEMENT  sensegrp - - (def,sense+) >
<!ATTLIST  sensegrp n CDATA #IMPLIED id ID #IMPLIED>

<!-- sensegrp added to cover roman-numeral sense groups in E and F vols., pfs 15 Oct 98      -->
<!-- Occasionally used also to code groups of subsenses that begin with an introductory      -->
<!-- phrase ("in prepositional phrases:"), though the latter *may* also be found tagged      -->
<!-- as a usg or usg type="def"                                                              -->
<!-- ======================================================================================= -->

<!ELEMENT  sense	- - ((def)+,(eg)+) >
<!ATTLIST  sense	n  CDATA #IMPLIED	id  ID #IMPLIED>

<!-- nigelk and dwr changed eg+ to eg*, 1997-07-18, for affix entries without attached       -->
<!-- quotations; changed back temporarily to catch affix entries, dwr 1997-07-24             -->
<!-- left in place for now by plateau                                                        -->
<!-- ======================================================================================= -->

<!ELEMENT  def	- - ((#pcdata | date | hi | title | bibl | note | usg | def | p | pos | lang | tax | xref | word | re)*) >
<!ATTLIST  def	n  CDATA #IMPLIED>

<!-- p added to def content model by nigelk, 1997-07-23, to preserve paragraphing in affix   -->
<!-- entries; plateau added pos lang tax xref word                                           -->
<!-- ======================================================================================= -->

<!ELEMENT  p  - - ((#pcdata | date | hi | title | bibl | note | usg | pos | lang | tax | xref | word)*) >
<!ATTLIST  p  n  CDATA  #IMPLIED>

<!-- p: paragraph                                                                            -->
<!-- added p to the dtd, for them durn affixes, nigelk 1997-07-24                            -->
<!-- plateau removed p from itself, added other elements to match def                        -->
<!-- ======================================================================================= -->

<!ELEMENT  eg	- - (cit)+ +(etym|form|usg)>
<!ATTLIST  eg	n  CDATA #IMPLIED	id  ID #IMPLIED>

<!-- eg: examples. groups quotation/citation blocks under each sense                         -->
<!-- the inclusion is inherited from previous versions but has no obvious justification.     -->
<!-- need to check if it is being used or can safely be removed (pfs/Oct 99)                 -->
<!-- ======================================================================================= -->

<!ELEMENT  cit	- - (bibl,q) >
<!ATTLIST  cit	
             n  CDATA    #IMPLIED	
            id  ID       #IMPLIED
          type  (n|b)    n>

<!-- cit: citation: groups bibliographical information and quotation.                        -->
<!-- type added to attributes of cit by pfs 27May98 to handle bracketed quots. (those        -->
<!-- regarded as not Middle English and provided only as illustrative of the word's history  -->
<!-- ======================================================================================= -->

<!ELEMENT  bibl	- - (#pcdata | STNCL)* >
<!ATTLIST  bibl	id  ID #IMPLIED
                    ibid  (y|n)    n>

<!-- bibl: contains bibliographic information (stencil and page reference)                   -->
<!-- ibid added as attribute of bibl by pfs 8 Oct 99 to flag ibids removed automatically by  -->
<!-- de-ibidizing script; manually treated ibids not so flagged, on the assumption that      -->
<!-- machines may make mistakes but people never do :-)                                      -->
<!-- ======================================================================================= -->

<!ELEMENT  q	- - (#pcdata | title | added | ovar)* >
<!ATTLIST  q	n  CDATA #IMPLIED	id  ID #IMPLIED	>

<!-- q: quotation                                                                            -->
<!-- nigelk and dwr added TITLE to content model of q, to handle variant readings 1997-07-21 -->
<!-- Plateau left title in place to handle legacy coding, but all instances really belong    -->
<!-- in "added" tags; if that element is used, title should be removed from this content     -->
<!-- Plateau added ovar to content to allow tagging of headword in quotations, if desired    -->
<!-- ======================================================================================= -->

<!ELEMENT STNCL - - (#PCDATA | date | hi | title | author | ms)* >
<!ATTLIST STNCL     id ID #IMPLIED 
                    rid CDATA #IMPLIED>       

<!-- stncl: stencil (MED bibliographic short form)                                           -->
<!-- rid attribute added Nov 98/pfs to accommodate pointers to hyperbib                      -->
<!-- (R)IDs generated by merger process of bibliographic harmonization with hb               -->
<!-- ======================================================================================= -->

<!ELEMENT  ms       - - (#pcdata) >
<!ATTLIST  ms  exp CDATA #IMPLIED
               rid CDATA #IMPLIED >

<!-- ms element added by pfs to allow tagging of ms abbreviations during                     -->
<!-- stencil merger, May 98. HB's "abbr" used otherwise in plateau.dtd                       -->
<!-- exp and rid added since this element may also be used eventually to tag variant         -->
<!-- readings in quots.                                                                      -->
<!-- ======================================================================================= -->

<!ELEMENT  author	- - (#pcdata) >
<!ATTLIST  author	id  ID #IMPLIED
                    sort CDATA #IMPLIED >
<!-- ======================================================================================= -->

<!ELEMENT  date	- - (#pcdata) >
<!ATTLIST  date     cp  NUMBER #IMPLIED
                    ms  NUMBER #IMPLIED >

<!-- attributes added by plateau to simplify date searching by COMPOSITION and MS date       -->
<!-- these attributes not currently used.                                                    -->
<!-- ======================================================================================= -->

<!ELEMENT  title	- - (#pcdata) >
<!ATTLIST  title  rid CDATA #IMPLIED
                  exp CDATA #IMPLIED >
                  
<!-- title: used broadly within stencil to mark everything rendered in italics in MED.       -->
<!-- also used elsewhere either to code real (book) titles or to refer to pieces of stencils -->
<!-- ======================================================================================= -->

<!ELEMENT  usg	- - (#pcdata | ovar)* >
<!ATTLIST  usg  TYPE (general|field|def|syntax) #IMPLIED
                REND (norm|ital) ital 
                EXP CDATA #IMPLIED  >

<!-- usg: usage labels                                                                       -->
<!-- REND attribute experimentally added 3Apr98/pfs in order to handle the                   -->
<!-- non-italic usg labels being employed by current MED production.                         -->
<!-- "general" removed as default usg type in favor of #implied, 29July98/pfs, because       -->
<!-- MED is using TYPE="general" very specifically                                           -->
<!-- Plateau added exp attribute to handle attribute-expanded abbreviated usage labels       -->
<!-- Plateau added ovar to content to allow tagging of bold participles as ovar instead      -->
<!-- of hi within usg tags. Neither exp or ovar currently used                               -->
<!-- ======================================================================================= -->

<!ELEMENT  etym	- - (#pcdata | lang | ety | xref)* >

<!-- etym: etymology statement, rendered in square brackets                                  -->
<!-- Plateau added ety and xref tags. all bold text in etymologies should be tagged either   -->
<!-- as ety or within xref tags. eventually. there may be exceptions.                        -->
<!-- ======================================================================================= -->

<!ELEMENT  hi	- - (#pcdata) >
<!ATTLIST  hi	rend  CDATA #IMPLIED>
<!-- ======================================================================================= -->

<!ELEMENT  lang	- - (#pcdata) >
<!ATTLIST  lang  exp CDATA #IMPLIED >

<!-- Plateau added exp attribute to expand abbreviated languages if desired. -->
<!-- ======================================================================================= -->

<!ELEMENT  lb	- O EMPTY >
<!-- ======================================================================================= -->

<!ELEMENT  xr - - (#pcdata | pos | xref | word | abbr)* >
<!ELEMENT  note - - (#pcdata | pos | xref | word | abbr | lang | ety | author)* >
<!ATTLIST  note rend (b|p|n) #IMPLIED
                type CDATA #IMPLIED >

<!-- xr: extended reference note                                                             -->
<!-- Plateau added xr alternative to match TEI semantics; also pos, xref, word. Most current -->
<!-- "notes" should be changed to XR                                                         -->
<!-- Note element may be used for more general purposes than cross-reference notes; plateau  -->
<!-- enlarged content model of note.                                                         -->
<!-- Plateau added rend attrib to note to cover bracketed, parenthesised and unmarked notes. -->
<!-- Plateau added type attrib to note to allow searching specific types of notes for        -->
<!-- specific types of info (e.g. etymological)                                              -->
<!-- ======================================================================================= -->

<!ELEMENT  ety   - - (#pcdata) >
<!ATTLIST  ety      rid CDATA #IMPLIED
                    lang CDATA #IMPLIED
                    type CDATA #IMPLIED
                    src  CDATA #IMPLIED >

<!-- ety: etymon                                                                             -->
<!-- Plateau added this element to replace bold text in etymologies and etymological notes.  -->
<!-- type attrib designed for head/variant distinction                                       -->
<!-- Src and rid attribs are hooks to attach related dictionaries                            -->
<!-- ======================================================================================= -->

<!ELEMENT  tax      - - (#pcdata) >
<!ATTLIST  tax      par (y|n) y
                    exp  CDATA #IMPLIED
                    type CDATA #IMPLIED   >

<!-- Plateau added this element to tag botanical/zoological taxonomic names in definitions   -->
<!-- ======================================================================================= -->

<!ELEMENT  xref     - - (#pcdata | word | pos | ety)* >
<!ATTLIST  xref     rid CDATA #IMPLIED
                    sense CDATA #IMPLIED              >

<!-- Plateau added this element as optional way to group text elements constituting an       -->
<!-- internal cross-reference. currently tagged as xref containing hi and pos                -->
<!-- ======================================================================================= -->

<!ELEMENT  lbl      - - (#pcdata | dial | era | word | xref)* >
<!ATTLIST  lbl      exp CDATA #IMPLIED 
                    rid CDATA #IMPLIED
                    par (y|n) y >

<!-- Plateau added this element to tag parenthesised (and nonpar, see par attrib) labels in  -->
<!-- form sections, either free form or using specific kinds of labels (dial, era)           -->
<!-- ======================================================================================= -->

<!ELEMENT  (dial|era) - - (#pcdata) >
<!ATTLIST  (dial|era)     exp CDATA #IMPLIED 
                          rid CDATA #IMPLIED >

<!-- Plateau added these elements to tag specific abbreviated dialect and period labels in   -->
<!-- form sections                                                                           -->
<!-- ======================================================================================= -->

<!ELEMENT  phrase - - (#pcdata | word | ovar | oref)* >
<!ATTLIST  phrase  norm  CDATA #IMPLIED
                   split CDATA #IMPLIED
                   exp   CDATA #IMPLIED >

<!-- Plateau added this element to tag bold phrases and combinations in definitions          -->
<!-- ======================================================================================= -->

<!ELEMENT  abbr - - (#pcdata | title | author)* >
<!ATTLIST  abbr   exp CDATA #IMPLIED
                  rid CDATA #IMPLIED >

<!-- Plateau added this element to tag (optionally) abbreviations other than pos lbl usg etc -->
<!-- ======================================================================================= -->

<!ELEMENT  word - - (#pcdata) >
<!ATTLIST  word  lang CDATA #IMPLIED
                 rend (i|b|n) #IMPLIED
                 rid  CDATA #IMPLIED >

<!-- Plateau added this element to tag citations of words and implicit cross-references in   -->
<!-- notes, etc.                                                                             -->
<!-- lang attrib distinguishes ME words from others                                          -->
<!-- rend attrib distinguishes italic, bold, and normal                                      -->
<!-- ======================================================================================= -->

<!ELEMENT  added - - (#pcdata | lang | title | stncl | abbr | ms | me)* >

<!-- Plateau added this element to tag bracketed non-ME material in quotations. Similar to   -->
<!-- TEI add/supplied                                                                        -->
<!-- ======================================================================================= -->

<!ELEMENT  me    - - (#pcdata) >

<!-- Plateau added this element to tag exceptions to added, i.e. ME stuff within bracketed   -->
<!-- additions. None of these tags currently used or supported by middleware                 -->
<!-- ======================================================================================= -->

<!ELEMENT  oref  - o EMPTY >
<!ATTLIST  oref  exp CDATA #IMPLIED  >

<!ELEMENT  ovar  - - (#pcdata) >
<!ATTLIST  ovar  type CDATA #IMPLIED
                 rend (b|i|n) #IMPLIED >

<!-- Plateau added ovar and oref, used more or less as in TEI                                -->
<!-- ======================================================================================= -->

<!ELEMENT  re    - - (phrase, (#pcdata | note | def)*) >

<!-- Plateau added re (related entry) to allow grouping of definitions with the phrases that -->
<!-- they define. Not used.                                                                  -->
<!-- ======================================================================================= -->

<!-- The following entities file contains all the necessary MED character entities -->

<!ENTITY % MEDsel1 PUBLIC "MEC 1998-1//ENTITIES MED 1//EN">
%MEDsel1;

<!-- END -->
PLATEAU.DTDused to encode the e-MED

PLATEAU.DTD
used to encode the e-MED