第005回_XML構文の一覧
»
XML構文の列挙
字句解析が生成するトークン粒度を決定するためにXML構文規則をW3C勧告-XML1.1から列挙します。| [1] document ::= ( prolog element Misc* ) |
| - ( Char* RestrictedChar Char* ) |
| [2] Char ::= [#x1-#xD7FF] |
| | [#xE000-#xFFFD] |
| | [#x10000-#x10FFFF] |
| [2a] RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] |
| | [#x7F-#x84] | [#x86-#x9F] |
| [3] S ::= (#x20 | #x9 | #xD | #xA)+ |
| [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] |
| | [#xD8-#xF6] | [#xF8-#x2FF] |
| | [#x370-#x37D] | [#x37F-#x1FFF] |
| | [#x200C-#x200D] | [#x2070-#x218F] |
| | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
| | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
| | [#x10000-#xEFFFF] |
| [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 |
| | [#x0300-#x036F] | [#x203F-#x2040] |
| [5] Name ::= NameStartChar (NameChar)* |
| [6] Names ::= Name (#x20 Name)* |
| [7] Nmtoken ::= (NameChar)+ |
| [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* |
| [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
| | "'" ([^%&'] | PEReference | Reference)* "'" |
| [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
| | "'" ([^<&'] | Reference)* "'" |
| [11] SystemLiteral ::= ('"' [ˆ"]* '"') | ("'" [ˆ']* "'") |
| [12] PubidLiteral ::= '"' PubidChar* '"' |
| | "'" (PubidChar - "'")* "'" |
| [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] |
| | [-'()+,./:=?;!*#@$_%] |
| [14] CharData ::= [ˆ<&]* - ([ˆ<&]* ']]>' [^<&]*) |
| [15] Comment ::= '<!--' ((Char − '−') | ('−' (Char − '−')))* '-->' |
| [16] PI ::= '<?' PITarget (S (Char* − (Char* '?>' Char*)))? '?>' |
| [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) |
| [18] CDSect ::= CDStart CData CDEnd |
| [19] CDStart ::= '<![CDATA[' |
| [20] CData ::= (Char* − (Char* ']]>' Char*)) |
| [21] CDEnd ::= ']]>' |
| [22] prolog ::= XMLDecl Misc* (doctypedecl Misc*)? |
| [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? |
| SDDecl? S? '?>' |
| [24] VersionInfo ::= S 'version' Eq |
| ("'" VersionNum "'" | '"' VersionNum '"') |
| [25] Eq ::= S? '=' S? |
| [26] VersionNum ::= '1.1' |
| [27] Misc ::= Comment | PI | S |
| [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? |
| S? ('[' intSubset ']' S?)? '>' |
| [28a] DeclSep ::= PEReference | S |
| [28b] intSubset ::= (markupdecl | DeclSep)* |
| [29] markupdecl ::= elementdecl | AttlistDecl |
| | EntityDecl | NotationDecl | PI | Comment |
| [30] extSubset ::= TextDecl? extSubsetDecl |
| [31] extSubsetDecl::= |
| ( markupdecl | conditionalSect | DeclSep)* |
| [32] SDDecl ::= S 'standalone' Eq |
| (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) |
| ※[33]−[38]は削除済 |
| [39] element ::= EmptyElemTag | STag content ETag |
| [40] STag ::= '<' Name (S Attribute)* S? '>' |
| [41] Attribute ::= Name Eq AttValue |
| [42] ETag ::= '</' Name S? '>' |
| [43] content ::= CharData? |
| ( (element | Reference | CDSect | PI | Comment) |
| CharData?)* |
| [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' |
| [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' |
| [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children |
| [47] children ::= (choice | seq) ('?' | '*' | '+')? |
| [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? |
| [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' |
| [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' |
| [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
| | '(' S? '#PCDATA' S? ')' |
| [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' |
| [53] AttDef ::= S Name S AttType S DefaultDecl |
| [54] AttType ::= StringType | TokenizedType | EnumeratedType |
| [55] StringType ::= 'CDATA' |
| [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
| | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' |
| [57] EnumeratedType ::= NotationType | Enumeration |
| [58] NotationType ::= 'NOTATION' S '(' S? Name (S? |
| '|' S? Name)* S? ')' |
| [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' |
| [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' |
| | (('#FIXED' S)? AttValue) |
| [61] conditionalSect ::= includeSect | ignoreSect |
| [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' |
| [63] ignoreSect ::= '<![' S? 'IGNORE' S? |
| '[' ignoreSectContents* ']]>' |
| [64] ignoreSectContents ::= Ignore |
| ('<![' ignoreSectContents ']]>' Ignore)* |
| [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) |
| [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' |
| [67] Reference ::= EntityRef | CharRef |
| [68] EntityRef ::= '&' Name ';' |
| [69] PEReference ::= '%' Name ';' |
| [70] EntityDecl ::= GEDecl | PEDecl |
| [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' |
| [72] PEDecl ::= ''<!ENTITY' S '%' S Name S PEDef S? ''>' |
| [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) |
| [74] PEDef ::= EntityValue | ExternalID |
| [75] ExternalID ::= 'SYSTEM' S SystemLiteral |
| | 'PUBLIC' S PubidLiteral S SystemLiteral |
| [76] NDataDecl ::= S 'NDATA' S Name |
| [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' |
| [78] extParsedEnt ::= ( TextDecl? content ) |
| − ( Char* RestrictedChar Char* ) |
| ※[79]は削除済 |
| [80] EncodingDecl ::= S 'encoding' Eq |
| ('"' EncName '"' | "'" EncName "'" ) |
| [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* |
| [82] NotationDecl ::= '<!NOTATION' S Name S |
| (ExternalID | PublicID) S? '>' |
| [83] PublicID ::= 'PUBLIC' S PubidLiteral |
XML構文に現れる集合演算について
[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )
[14] CharData ::= [ˆ<&]* - ([ˆ<&]* ']]>' [^<&]*)
[15] Comment ::= '<!--' ((Char − '−') | ('−' (Char − '−')))* '-->'
[16] PI ::= '<?' PITarget (S (Char* − (Char* '?>' Char*)))? '?>'
[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
[20] CData ::= (Char* − (Char* ']]>' Char*))
[65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
[78] extParsedEnt ::= ( TextDecl? content ) - ( Char* RestrictedChar Char* )
には集合の演算記号が含まれています。これはEBNFではありませんので注意してください。[14] CharData ::= [ˆ<&]* - ([ˆ<&]* ']]>' [^<&]*)
[15] Comment ::= '<!--' ((Char − '−') | ('−' (Char − '−')))* '-->'
[16] PI ::= '<?' PITarget (S (Char* − (Char* '?>' Char*)))? '?>'
[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
[20] CData ::= (Char* − (Char* ']]>' Char*))
[65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
[78] extParsedEnt ::= ( TextDecl? content ) - ( Char* RestrictedChar Char* )
documentは少しわかり難いので、CDataから確認すると、
[20] CData ::= (Char* − (Char* ']]>' Char*))
の表現は、
CData は、
任意数のChar
で構成するが、
']]>'を含まない
ということを意味します。
次に、
[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )
の表現を見ると2つのことを同時に述べています。
まず、文字集合の見地からすると、CDataと同じ書き方をすれば、
document ::= Char* - ( Char* RestrictedChar Char* )
を意味しているので
非終端記号document は、
任意数のChar
で構成するが、
RestrictedCharを含まない
ということを意味します。
そして、構文の見地からすると、
非終端記号document は、
非終端記号prolog
非終端記号element
任意数の非終端記号Misc
で構成するということを意味します。
# 少し手抜き回のようになってしまいましたが、
# 後で見返すときにこの方が見やすいと思います。
コメント
コメントを投稿する
SpecialPR