Grammar XPath Tokens S = /(\x20|\x09|\x0D|\x0A)+/ Letter = /\w/ Digits = /\d+/ NCNameChar = /[\w\d.-_]/ Literal = /('[^']*')|("[^"]*")/ Op = /[\/|+-=<>]|\/\/|!=|<=|>=/ NCName = /[\w_]+/ OperatorName = /and|or|mod|div/ NodeType = /comment|text|processing-instruction|node/ FunctionName = /(?!\b(comment|text|processing-instruction|node)\b)(([\w_]+):)?([\w_]+)/ AxisName = /ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self/ PI = /processing-instruction/ Productions LocationPath -> RelativeLocationPath | AbsoluteLocationPath AbsoluteLocationPath -> '/' RelativeLocationPath? | AbbreviatedAbsoluteLocationPath RelativeLocationPath -> Step | RelativeLocationPath '/' Step | AbbreviatedRelativeLocationPath Step -> AxisSpecifier NodeTest Predicate | AbbreviatedStep AxisSpecifier -> AxisName '::' | AbbreviatedAxisSpecifier NodeTest -> NameTest | NodeType '(' ')' | PI '(' Literal ')' Predicate -> '[' PredicateExpr ']' PredicateExpr -> Expr AbbreviatedAbsoluteLocationPath -> '//' RelativeLocationPath AbbreviatedRelativeLocationPath -> RelativeLocationPath '//' Step AbbreviatedStep -> '.' | '..' AbbreviatedAxisSpecifier -> '@'? Expr -> OrExpr UnionExpr -> PathExpr | UnionExpr '|' PathExpr PathExpr -> LocationPath | FilterExpr | FilterExpr '/' RelativeLocationPath | FilterExpr '//' RelativeLocationPath FilterExpr -> PrimaryExpr | FilterExpr Predicate OrExpr -> AndExpr | OrExpr 'or' AndExpr AndExpr -> EqualityExpr | AndExpr 'and' EqualityExpr EqualityExpr -> RelationalExpr | EqualityExpr '=' RelationalExpr | EqualityExpr '!=' RelationalExpr RelationalExpr -> AdditiveExpr | RelationalExpr '<' AdditiveExpr | RelationalExpr '>' AdditiveExpr | RelationalExpr '<=' AdditiveExpr | RelationalExpr '>=' AdditiveExpr AdditiveExpr -> MultiplicativeExpr | AdditiveExpr '+' MultiplicativeExpr | AdditiveExpr '-' MultiplicativeExpr MultiplicativeExpr -> UnaryExpr | MultiplicativeExpr MultiplyOperator UnaryExpr | MultiplicativeExpr 'div' UnaryExpr | MultiplicativeExpr 'mod' UnaryExpr UnaryExpr -> UnionExpr | '-' UnaryExpr PrimaryExpr -> VariableReference | '(' Expr ')' | Literal | Number | FunctionCall FunctionCall -> FunctionName '(' ( Argument ( ',' Argument )* )? ')' Argument -> Expr ExprToken -> '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::' | NameTest | NodeType | Operator | FunctionName | AxisName | Literal | Number | VariableReference Number -> Digits ('.' Digits?)? | '.' Digits Operator -> OperatorName | MultiplyOperator | Op MultiplyOperator -> '*' VariableReference -> '$' QName NameTest -> '*' | NCName ':' '*' | QName ExprWhitespace -> S QName -> (Prefix ':')? LocalPart Prefix -> NCName LocalPart -> NCName # FunctionName -> NCName - NodeType # NodeType -> 'comment' | 'text' | 'processing-instruction' | 'node' # OperatorName -> 'and' | 'or' | 'mod' | 'div' # AxisName -> 'ancestor' | ''ancestor-or-self' | 'attribute' | 'child' # | 'descendant' | 'descendant-or-self' | 'following' # | 'following-sibling' | 'namespace' | 'parent' | 'preceding' # | 'preceding-sibling' | 'self' # PI = 'processing-instruction' #You could try a trick with Regexps, the "zero width negative # lookahead". Essentially # # (?!but_this_tokens)(every_token) # # but you have to fill in the details. # # Example is # # (?!a|bc)(\w+) # # which would be # # something -> words - specials # words -> \w+ # specials -> 'a' | 'bc'