第005回_XML構文の一覧
»
XML構文の列挙
字句解析が生成するトークン粒度を決定するためにXML構文規則をW3C勧告-XML1.1から列挙します。[1] document ::= ( prolog element Misc* ) |
- ( Char* RestrictedChar Char* ) |
[2] Char ::= [#x1-#xD7FF] |
| [#xE000-#xFFFD] |
| [#x10000-#x10FFFF] |
[2a] RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] |
| [#x7F-#x84] | [#x86-#x9F] |
[3] S ::= (#x20 | #x9 | #xD | #xA)+ |
[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] |
| [#xD8-#xF6] | [#xF8-#x2FF] |
| [#x370-#x37D] | [#x37F-#x1FFF] |
| [#x200C-#x200D] | [#x2070-#x218F] |
| [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
| [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
| [#x10000-#xEFFFF] |
[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 |
| [#x0300-#x036F] | [#x203F-#x2040] |
[5] Name ::= NameStartChar (NameChar)* |
[6] Names ::= Name (#x20 Name)* |
[7] Nmtoken ::= (NameChar)+ |
[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* |
[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
| "'" ([^%&'] | PEReference | Reference)* "'" |
[10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
| "'" ([^<&'] | Reference)* "'" |
[11] SystemLiteral ::= ('"' [ˆ"]* '"') | ("'" [ˆ']* "'") |
[12] PubidLiteral ::= '"' PubidChar* '"' |
| "'" (PubidChar - "'")* "'" |
[13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] |
| [-'()+,./:=?;!*#@$_%] |
[14] CharData ::= [ˆ<&]* - ([ˆ<&]* ']]>' [^<&]*) |
[15] Comment ::= '<!--' ((Char − '−') | ('−' (Char − '−')))* '-->' |
[16] PI ::= '<?' PITarget (S (Char* − (Char* '?>' Char*)))? '?>' |
[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) |
[18] CDSect ::= CDStart CData CDEnd |
[19] CDStart ::= '<![CDATA[' |
[20] CData ::= (Char* − (Char* ']]>' Char*)) |
[21] CDEnd ::= ']]>' |
[22] prolog ::= XMLDecl Misc* (doctypedecl Misc*)? |
[23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? |
SDDecl? S? '?>' |
[24] VersionInfo ::= S 'version' Eq |
("'" VersionNum "'" | '"' VersionNum '"') |
[25] Eq ::= S? '=' S? |
[26] VersionNum ::= '1.1' |
[27] Misc ::= Comment | PI | S |
[28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? |
S? ('[' intSubset ']' S?)? '>' |
[28a] DeclSep ::= PEReference | S |
[28b] intSubset ::= (markupdecl | DeclSep)* |
[29] markupdecl ::= elementdecl | AttlistDecl |
| EntityDecl | NotationDecl | PI | Comment |
[30] extSubset ::= TextDecl? extSubsetDecl |
[31] extSubsetDecl::= |
( markupdecl | conditionalSect | DeclSep)* |
[32] SDDecl ::= S 'standalone' Eq |
(("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) |
※[33]−[38]は削除済 |
[39] element ::= EmptyElemTag | STag content ETag |
[40] STag ::= '<' Name (S Attribute)* S? '>' |
[41] Attribute ::= Name Eq AttValue |
[42] ETag ::= '</' Name S? '>' |
[43] content ::= CharData? |
( (element | Reference | CDSect | PI | Comment) |
CharData?)* |
[44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' |
[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' |
[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children |
[47] children ::= (choice | seq) ('?' | '*' | '+')? |
[48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? |
[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' |
[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' |
[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
| '(' S? '#PCDATA' S? ')' |
[52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' |
[53] AttDef ::= S Name S AttType S DefaultDecl |
[54] AttType ::= StringType | TokenizedType | EnumeratedType |
[55] StringType ::= 'CDATA' |
[56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
| 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' |
[57] EnumeratedType ::= NotationType | Enumeration |
[58] NotationType ::= 'NOTATION' S '(' S? Name (S? |
'|' S? Name)* S? ')' |
[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' |
[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' |
| (('#FIXED' S)? AttValue) |
[61] conditionalSect ::= includeSect | ignoreSect |
[62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' |
[63] ignoreSect ::= '<![' S? 'IGNORE' S? |
'[' ignoreSectContents* ']]>' |
[64] ignoreSectContents ::= Ignore |
('<![' ignoreSectContents ']]>' Ignore)* |
[65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) |
[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' |
[67] Reference ::= EntityRef | CharRef |
[68] EntityRef ::= '&' Name ';' |
[69] PEReference ::= '%' Name ';' |
[70] EntityDecl ::= GEDecl | PEDecl |
[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' |
[72] PEDecl ::= ''<!ENTITY' S '%' S Name S PEDef S? ''>' |
[73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) |
[74] PEDef ::= EntityValue | ExternalID |
[75] ExternalID ::= 'SYSTEM' S SystemLiteral |
| 'PUBLIC' S PubidLiteral S SystemLiteral |
[76] NDataDecl ::= S 'NDATA' S Name |
[77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' |
[78] extParsedEnt ::= ( TextDecl? content ) |
− ( Char* RestrictedChar Char* ) |
※[79]は削除済 |
[80] EncodingDecl ::= S 'encoding' Eq |
('"' EncName '"' | "'" EncName "'" ) |
[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* |
[82] NotationDecl ::= '<!NOTATION' S Name S |
(ExternalID | PublicID) S? '>' |
[83] PublicID ::= 'PUBLIC' S PubidLiteral |
XML構文に現れる集合演算について
[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )
[14] CharData ::= [ˆ<&]* - ([ˆ<&]* ']]>' [^<&]*)
[15] Comment ::= '<!--' ((Char − '−') | ('−' (Char − '−')))* '-->'
[16] PI ::= '<?' PITarget (S (Char* − (Char* '?>' Char*)))? '?>'
[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
[20] CData ::= (Char* − (Char* ']]>' Char*))
[65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
[78] extParsedEnt ::= ( TextDecl? content ) - ( Char* RestrictedChar Char* )
には集合の演算記号が含まれています。これはEBNFではありませんので注意してください。[14] CharData ::= [ˆ<&]* - ([ˆ<&]* ']]>' [^<&]*)
[15] Comment ::= '<!--' ((Char − '−') | ('−' (Char − '−')))* '-->'
[16] PI ::= '<?' PITarget (S (Char* − (Char* '?>' Char*)))? '?>'
[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
[20] CData ::= (Char* − (Char* ']]>' Char*))
[65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
[78] extParsedEnt ::= ( TextDecl? content ) - ( Char* RestrictedChar Char* )
documentは少しわかり難いので、CDataから確認すると、
[20] CData ::= (Char* − (Char* ']]>' Char*))
の表現は、
CData は、
任意数のChar
で構成するが、
']]>'を含まない
ということを意味します。
次に、
[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )
の表現を見ると2つのことを同時に述べています。
まず、文字集合の見地からすると、CDataと同じ書き方をすれば、
document ::= Char* - ( Char* RestrictedChar Char* )
を意味しているので
非終端記号document は、
任意数のChar
で構成するが、
RestrictedCharを含まない
ということを意味します。
そして、構文の見地からすると、
非終端記号document は、
非終端記号prolog
非終端記号element
任意数の非終端記号Misc
で構成するということを意味します。
# 少し手抜き回のようになってしまいましたが、
# 後で見返すときにこの方が見やすいと思います。
コメント
コメントを投稿する
SpecialPR