Browsable XML Grammar

Grammar extracted by Vadim Zaytsev, see the Grammar Zoo entry for details: xmlware/xml/zhang-cordy/extracted
Source used for this grammar: Hongyu Zhang, James R. Cordy, XML 1.0 (Second edition) Grammar, and Well-formedness Checker, August 2001 [TXL Grammars]

Summary

Total 74 production rules with 126 top alternatives and 532 symbols.
Vocabulary: 133 = 86 nonterminals + 47 terminals + 0 labels + 0 markers.
Total 86 nonterminal symbols: 74 defined (program, prolog, dtd_body, misc, doctypedecl, declbody, literal, pi, pitarget, sub1_pi, sub2_pi, sp_external_id, external_id, exid_sys, exid_pub, decl, markupdecl, elementdecl, contentspec, children, repetition, children_body, choice, choice_item, cp, cp_body, seq, more_cps, mixed, pcdata_or_name, more_names, pcdata, attlistdecl, attdef, atttype, stringtype, tokenizedtype, enumeratedtype, notationtype, enumeration, nmtoken, more_nmtokens, defaultdecl, fixed, attvalue, longstringlit, not_dquote, longcharlit, not_quote, entitydecl, gedecl, pedecl, entitydef, pedef, ndatadecl, entityvalue, notationdecl, exid_or_pubid, spaces, space_or_newline, element, tag_content, empty_elem_tag, attribute, stag, content, sub1_content, sub2_content, chardata, etag, cdata, cdend, reference, cdsect), 1 root (program), 1 top (sub2_pi), 12 bottom (comment⁴, charref, token⁷, newline², dname, name¹⁹, regexp, space², stringlit³, IN, charlit³, EX).
Total 47 terminal symbols: 14 keywords ("SYSTEM", "PUBLIC"², "EMPTY", "ANY", "CDATA", "ID", "IDREF", "IDREFS", "ENTITY", "ENTITIES", "NMTOKEN", "NMTOKENS", "NOTATION", "NDATA"), 0 letters (—), 0 numerics (—), 23 signs (">"⁸, "[", "]", "<?", "?>"², "%"², ";"², "?", "*", "+", "("⁶, ")"⁵, "|"³, ",", ")*", """², "'"², "<"², "/>", "=", "</", "]]>", "&").

Syntax

program ::=
	prolog element

prolog ::=
	dtd_body

dtd_body ::=
	misc* doctypedecl?

misc ::=
	comment
	pi
	space
	newline

doctypedecl ::=
	"<!DOCTYPE" spaces name spaces sp_external_id? spaces declbody? spaces ">" spaces

declbody ::=
	"[" decl* "]"

literal ::=
	charlit
	stringlit

pi ::=
	"<?" pitarget sub1_pi? "?>"

pitarget ::=
	name

sub1_pi ::=
	spaces token

sub2_pi ::=
	token "?>" token

sp_external_id ::=
	spaces external_id

external_id ::=
	exid_sys
	exid_pub

exid_sys ::=
	"SYSTEM" spaces literal

exid_pub ::=
	"PUBLIC" spaces literal spaces literal

decl ::=
	markupdecl
	"%" name ";"
	spaces

markupdecl ::=
	elementdecl
	attlistdecl
	entitydecl
	notationdecl
	pi
	comment

elementdecl ::=
	"<!ELEMENT" spaces name spaces contentspec spaces ">" spaces

contentspec ::=
	"EMPTY"
	"ANY"
	mixed
	children

children ::=
	children_body repetition?

repetition ::=
	"?"
	"*"
	"+"

children_body ::=
	choice
	seq

choice ::=
	"(" spaces cp choice_item choice_item* spaces ")"

choice_item ::=
	spaces "|" spaces cp

cp ::=
	cp_body repetition?

cp_body ::=
	name
	choice
	seq

seq ::=
	"(" spaces cp more_cps* spaces ")"

more_cps ::=
	spaces "," spaces cp

mixed ::=
	pcdata_or_name
	pcdata

pcdata_or_name ::=
	"(" spaces "#PCDATA" more_names* spaces ")*"

more_names ::=
	spaces "|" spaces name

pcdata ::=
	"(" spaces "#PCDATA" spaces ")"

attlistdecl ::=
	"<!ATTLIST" spaces name attdef* spaces ">" spaces

attdef ::=
	spaces name spaces atttype spaces defaultdecl

atttype ::=
	stringtype
	tokenizedtype
	enumeratedtype

stringtype ::=
	"CDATA"

tokenizedtype ::=
	"ID"
	"IDREF"
	"IDREFS"
	"ENTITY"
	"ENTITIES"
	"NMTOKEN"
	"NMTOKENS"

enumeratedtype ::=
	notationtype
	enumeration

notationtype ::=
	"NOTATION" spaces "(" spaces name more_names* spaces ")"

enumeration ::=
	"(" spaces nmtoken more_nmtokens* spaces ")"

nmtoken ::=
	name
	dname

more_nmtokens ::=
	spaces "|" spaces nmtoken

defaultdecl ::=
	"#REQUIRED"
	"#IMPLIED"
	fixed? attvalue

fixed ::=
	"#FIXED" spaces

attvalue ::=
	stringlit
	charlit
	longstringlit
	longcharlit

longstringlit ::=
	""" not_dquote* """

not_dquote ::=
	token

longcharlit ::=
	"'" not_quote* "'"

not_quote ::=
	token

entitydecl ::=
	gedecl
	pedecl

gedecl ::=
	"<!ENTITY" spaces name spaces entitydef spaces ">" spaces

pedecl ::=
	"<!ENTITY" spaces "%" spaces name spaces pedef spaces ">" spaces

entitydef ::=
	entityvalue
	external_id ndatadecl?

pedef ::=
	entityvalue
	external_id

ndatadecl ::=
	spaces "NDATA" spaces name

entityvalue ::=
	charlit
	stringlit
	longstringlit
	longcharlit

notationdecl ::=
	"<!NOTATION" spaces name spaces exid_or_pubid spaces ">" spaces

exid_or_pubid ::=
	external_id
	"PUBLIC" spaces literal

spaces ::=
	space_or_newline*

space_or_newline ::=
	space
	newline

element ::=
	empty_elem_tag spaces
	tag_content spaces

tag_content ::=
	stag IN content EX etag

empty_elem_tag ::=
	"<" name spaces attribute* "/>"

attribute ::=
	spaces name "=" attvalue spaces

stag ::=
	"<" name spaces attribute* ">"

content ::=
	spaces chardata* spaces sub1_content* spaces

sub1_content ::=
	spaces sub2_content spaces chardata* spaces

sub2_content ::=
	element
	reference
	cdsect
	pi
	comment
	regexp

chardata ::=
	token

etag ::=
	"</" name spaces ">"

cdata ::=
	token
	comment

cdend ::=
	"]]>"

reference ::=
	"&" name ";"
	charref

cdsect ::=
	"<![CDATA[" cdata* cdend

Maintained by Dr. Vadim Zaytsev a.k.a. @grammarware. Last updated in September 2015. [↑]