RPA Toolkit
Added modified version of the XML bnf.
authorMartin Stoilov <martin@sigmadrone.org>
Tue, 5 Apr 2022 14:02:10 +0000 (16:02 +0200)
committerMartin Stoilov <martin@sigmadrone.org>
Tue, 5 Apr 2022 14:02:10 +0000 (16:02 +0200)
tests/datapatterns/xml2.rpa [new file with mode: 0644]

diff --git a/tests/datapatterns/xml2.rpa b/tests/datapatterns/xml2.rpa
new file mode 100644 (file)
index 0000000..99adbe8
--- /dev/null
@@ -0,0 +1,122 @@
+#!emitall
+#!emit STag
+#!emit ETag
+#!emit Attribute
+#!abort ETag
+
+# BNF productsions from the XML spcification.
+
+Char           ::= [#x9] | [#xA] | [#xD] | [#x20-#xD7FF] | 
+                   [#xE000-#xFFFD] | [#x10000-#x10FFFF]                                                                                # [2]
+S              ::= ( [#x20] | [#x9] | [#xD] | [#xA] )*                                                                                 # [3]
+NameStartChar  ::= [:] | [A-Z] | [_] | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] |
+                   [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] |
+                   [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | 
+                   [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]                                                                                 # [4]
+NameChar       ::= <NameStartChar> | [\-\.]|[0-9]|[#xB7]|[#x0300-#x036F]|[#x203F-#x2040]       # [4a]
+Name           ::= <NameStartChar><NameChar>*                                                                                          # [5]
+Names          ::= <Name>([#x20]<Name>)*                                                                                                       # [6]
+Nmtoken                ::= <NameChar>+                                                                                                                         # [7]
+Nmtokens       ::= <Nmtoken>([#x20]<Nmtoken>)*                                                                                         # [8]
+
+# Literals
+EntityValue    ::= ( '"' ( ^[%&"] | <PEReference> | <Reference> )* '"' ) | 
+                   ( "'" ( ^[%&'] | <PEReference> | <Reference> )* "'" )                                               # [9]
+AttValue       ::= '"' (^[<&"] | <Reference>)* '"' |  "'" (^[<&'] | <Reference>)* "'"          # [10]
+SystemLiteral  ::= ('"' ^["]* '"') | ("'" ^[']* "'")                                                                   # [11]
+PubidLiteral   ::= ('"' <PubidChar>* '"' ) | ("'" (<PubidChar> - "'")* "'")                    # [12]
+PubidChar      ::= [#x20] | [#xD] | [#xA] | [a-zA-Z0-9] | '-' | '#' | ['()+,./::=?;!*@$_%]     # [13]
+
+# Character Data
+CharData       ::= ^[<&]
+Comment                ::= '<!--' ((<Char> - '-') | ('-' (<Char> - '-')))* '-->'                                       # [15]
+
+# Processing Instructions
+PI             ::= '<?' <PITarget> (<S> ((<Char> - '?>')*))? '?>'                                                              # [16]
+PITarget       ::= <Name> - [Xx][Mm][Ll]                                                                                                       # [17]
+
+
+# CDATA Section
+CDSect         ::= <CDStart> <CData> <CDEnd>                                                                                           # [18]
+CDStart                ::= '<![CDATA['                                                                                                                         # [19]
+CData          ::= (<Char> - ']]>')*                                                                                                           # [20]
+CDEnd          ::= ']]>'                                                                                                                                       # [21]
+
+# Prolog
+XMLDecl                ::= '<?xml' <VersionInfo> <EncodingDecl>? <SDDecl>? <S>? '?>'                           # [23]
+VersionInfo    ::= <S> 'version' <Eq> ("'" <VersionNum> "'" | '"' <VersionNum> '"')            # [24]
+Eq             ::= <S>? '=' <S>?                                                                                                                               # [25]
+VersionNum     ::= '1.' [0-9]+                                                                                                                         # [26]
+Misc           ::= <Comment> | <PI> | <S>                                                                                                      # [27]  
+doctypedecl    ::= '<!DOCTYPE'<S><Name>(<S><ExternalID>)?<S>?('['<intSubset>']'<S>?)?'>'       # [28]
+DeclSep                ::= <PEReference> | <S>                                                                                                         # [28a]
+intSubset      ::= ( <markupdecl> | <DeclSep> )*                                                                                       # [28b]
+
+markupdecl     ::= <elementdecl> | <AttlistDecl> | 
+                   <EntityDecl> | <NotationDecl> | <PI> | <Comment>                                                    # [29]
+prolog         ::= (<XMLDecl> | <doctypedecl>)<Misc>* (<doctypedecl> <Misc>*)?                         # [22]
+
+# External Subset
+extSubset      ::= <TextDecl>? <extSubsetDecl>                                                                                         # [30]
+extSubsetDecl  ::= (<markupdecl> | <conditionalSect> | <DeclSep>)*                                             # [31]
+SDDecl         ::= <S> 'standalone' <Eq>(("'" ('yes' | 'no') "'") |
+                   ('"' ('yes' | 'no') '"'))                                                                                                   # [32]
+
+#Start-tag
+STag           ::= '<' <Name> (<S><Attribute>)* <S>? '>'                                                                       # [40]
+Attribute      ::= <Name> <Eq> <AttValue>                                                                                                      # [41]
+ETag           ::= '</' <Name> <S>? '>'                                                                                                        # [42]
+content                ::= <CharData>?((<element>|<Reference>|<CDSect>|<PI>|
+                   <Comment>)<CharData>?)*                                                                                                             # [43]
+EmptyElemTag   ::= '<' <Name> (<S><Attribute>)* <S>? '/>'                                                              # [44]
+
+# Element
+element                ::= <EmptyElemTag> | (<STag> <content>* (<ETag>))                                                       # [39]
+
+elementdecl    ::= '<!ELEMENT'<S><Name><S><contentspec><S>? '>'                                                        # [45]
+contentspec    ::= 'EMPTY' | 'ANY' | <Mixed> | <children>                                                                      # [46]
+children       ::= (<choice> | <seq>) ('?' | '*' | '+')?                                                                       # [47]
+cp             ::= (<Name>|<choice>|<seq>) ('?' | '*' | '+')?                                                                  # [48]
+choice         ::= '(' <S>? <cp> (<S>? '|' <S>? <cp>)+ <S>? ')'                                                        # [49]
+seq            ::= '(' <S>? <cp> (<S>? ',' <S>? <cp>)* <S>? ')'                                                                # [50]
+Mixed          ::= '(' <S>? '#PCDATA' (<S>? '|' <S>? <Name>)* <S>? ')*'|
+                   '(' <S>? '#PCDATA' <S>? ')'                                                                                                 # [51]
+
+AttlistDecl    ::= '<!ATTLIST' <S>  <Name> <AttDef>* <S>? '>'                                                          # [52]
+AttDef         ::= <S> <Name> <S> <AttType> <S> <DefaultDecl>                                                          # [53]
+AttType                ::= <StringType> | <TokenizedType> | <EnumeratedType>                                           # [54]
+StringType     ::= 'CDATA'                                                                                                                     # [55]
+TokenizedType  ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' 
+                               | 'NMTOKEN' | 'NMTOKENS'                                                                                                # [56]
+EnumeratedType ::= <NotationType> | <Enumeration>                                                                              # [57]
+NotationType   ::= 'NOTATION' <S> '(' <S>? <Name>(<S>? '|' <S>?<Name>)*<S>? ')'                # [58]
+Enumeration    ::= '(' <S>? <Nmtoken> (<S>? '|' <S>? <Nmtoken>)* <S>? ')'                                      # [59]
+DefaultDecl    ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' <S>)? <AttValue>)                                     # [60]
+conditionalSect        ::= <includeSect> | <ignoreSect>                                                                                # [61]
+includeSect    ::= '<![' <S>? 'INCLUDE' <S>? '[' <extSubsetDecl> ']]>'                                         # [62]
+ignoreSect     ::= '<![' <S>? 'IGNORE' <S>? '[' <ignoreSectContents>* ']]>'                            # [63]
+ignoreSectContents ::= <Ignore>? ('<![' <ignoreSectContents> ']]>' <Ignore>? )*                        # [64]
+Ignore         ::= (<Char> - ('<![' | ']]>'))+
+
+
+CharRef                ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'                                                            # [66]
+Reference      ::= <EntityRef> | <CharRef>                                                                                                     # [67]
+EntityRef      ::= '&' <Name> ';'                                                                                                                      # [68]
+PEReference    ::= '%' <Name> ';'                                                                                                                      # [69]
+EntityDecl     ::= <GEDecl> | <PEDecl>                                                                                                         # [70]
+GEDecl         ::= '<!ENTITY' <S> <Name> <S> <EntityDef> <S>? '>'                                                      # [71]
+PEDecl         ::= '<!ENTITY' <S> '%' <S> <Name> <S> <PEDef> <S>? '>'                                          # [72]
+EntityDef      ::= <EntityValue> | (<ExternalID> <NDataDecl>?)                                                 # [73]
+PEDef          ::= <EntityValue> | <ExternalID>                                                                                        # [74]
+ExternalID     ::= 'SYSTEM' <S>  <SystemLiteral> | 
+                   'PUBLIC' <S> <PubidLiteral> <S> <SystemLiteral>                                                             # [75]
+NDataDecl      ::= <S> 'NDATA' <S> <Name>                                                                                              # [76]
+TextDecl       ::= '<?xml' <VersionInfo>? <EncodingDecl>  <S>? '?>'                                            # [77]
+extParsedEnt   ::= <TextDecl>? <content>                                                                               # [78]
+EncName                ::= [A-Za-z] ([A-Za-z0-9._] | '-')*                                                             # [81]
+NotationDecl   ::= '<!NOTATION' <S> <Name> <S> (<ExternalID> | <PublicID>)<S>? '>'     # [82]
+PublicID       ::= 'PUBLIC' <S> <PubidLiteral>                                                 # [83]
+EncodingDecl   ::= <S> 'encoding' <Eq> ('"' <EncName> '"' | "'" <EncName> "'" )                # [80]
+
+document       ::= <prolog>? <element> <Misc>*                                                                                         # [1]
+exec           ::= <document>