|
def | __init__ (self, savelist=False) |
|
def | copy (self) |
|
def | setName (self, name) |
|
def | setResultsName (self, name, listAllMatches=False) |
|
def | setBreak (self, breakFlag=True) |
|
def | setParseAction (self, *fns, **kwargs) |
|
def | addParseAction (self, *fns, **kwargs) |
|
def | addCondition (self, *fns, **kwargs) |
|
def | setFailAction (self, fn) |
|
def | preParse (self, instring, loc) |
|
def | parseImpl (self, instring, loc, doActions=True) |
|
def | postParse (self, instring, loc, tokenlist) |
|
def | tryParse (self, instring, loc) |
|
def | canParseNext (self, instring, loc) |
|
def | parseString (self, instring, parseAll=False) |
|
def | scanString (self, instring, maxMatches=_MAX_INT, overlap=False) |
|
def | transformString (self, instring) |
|
def | searchString (self, instring, maxMatches=_MAX_INT) |
|
def | split (self, instring, maxsplit=_MAX_INT, includeSeparators=False) |
|
def | __add__ (self, other) |
|
def | __radd__ (self, other) |
|
def | __sub__ (self, other) |
|
def | __rsub__ (self, other) |
|
def | __mul__ (self, other) |
|
def | __rmul__ (self, other) |
|
def | __or__ (self, other) |
|
def | __ror__ (self, other) |
|
def | __xor__ (self, other) |
|
def | __rxor__ (self, other) |
|
def | __and__ (self, other) |
|
def | __rand__ (self, other) |
|
def | __invert__ (self) |
|
def | __call__ (self, name=None) |
|
def | suppress (self) |
|
def | leaveWhitespace (self) |
|
def | setWhitespaceChars (self, chars) |
|
def | parseWithTabs (self) |
|
def | ignore (self, other) |
|
def | setDebugActions (self, startAction, successAction, exceptionAction) |
|
def | setDebug (self, flag=True) |
|
def | __str__ (self) |
|
def | __repr__ (self) |
|
def | streamline (self) |
|
def | checkRecursion (self, parseElementList) |
|
def | validate (self, validateTrace=[]) |
|
def | parseFile (self, file_or_filename, parseAll=False) |
|
def | __eq__ (self, other) |
|
def | __ne__ (self, other) |
|
def | __hash__ (self) |
|
def | __req__ (self, other) |
|
def | __rne__ (self, other) |
|
def | matches (self, testString, parseAll=True) |
|
def | runTests (self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False) |
|
Abstract base level parser element class.
def __add__ |
( |
|
self, |
|
|
|
other |
|
) |
| |
Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
converts them to L{Literal}s by default.
Example::
greet = Word(alphas) + "," + Word(alphas) + "!"
hello = "Hello, World!"
print (hello, "->", greet.parseString(hello))
Prints::
Hello, World! -> ['Hello', ',', 'World', '!']
def __mul__ |
( |
|
self, |
|
|
|
other |
|
) |
| |
Implementation of * operator, allows use of C{expr * 3} in place of
C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
may also include C{None} as in:
- C{expr*(n,None)} or C{expr*(n,)} is equivalent
to C{expr*n + L{ZeroOrMore}(expr)}
(read as "at least n instances of C{expr}")
- C{expr*(None,n)} is equivalent to C{expr*(0,n)}
(read as "0 to n instances of C{expr}")
- C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
- C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
Note that C{expr*(None,n)} does not raise an exception if
more than n exprs exist in the input stream; that is,
C{expr*(None,n)} does not enforce a maximum number of expr
occurrences. If this behavior is desired, then write
C{expr*(None,n) + ~expr}
def addCondition |
( |
|
self, |
|
|
* |
fns, |
|
|
** |
kwargs |
|
) |
| |
Add a boolean predicate function to expression's list of parse actions. See
L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
functions passed to C{addCondition} need to return boolean success/fail of the condition.
Optional keyword arguments:
- message = define a custom message to be used in the raised exception
- fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
Example::
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
year_int = integer.copy()
year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
date_str = year_int + '/' + integer + '/' + integer
result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
Make a copy of this C{ParserElement}. Useful for defining different parse actions
for the same parsing pattern, using copies of the original parse element.
Example::
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
prints::
[5120, 100, 655360, 268435456]
Equivalent form of C{expr.copy()} is just C{expr()}::
integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
Reimplemented in Forward, ParseExpression, and Keyword.
def enablePackrat |
( |
|
cache_size_limit = 128 | ) |
|
|
static |
Enables "packrat" parsing, which adds memoizing to the parsing logic.
Repeated parse attempts at the same string location (which happens
often in many complex grammars) can immediately return a cached value,
instead of re-executing parsing/validating code. Memoizing is done of
both valid results and parsing exceptions.
Parameters:
- cache_size_limit - (default=C{128}) - if an integer value is provided
will limit the size of the packrat cache; if None is passed, then
the cache size will be unbounded; if 0 is passed, the cache will
be effectively disabled.
This speedup may break existing programs that use parse actions that
have side-effects. For this reason, packrat parsing is disabled when
you first import pyparsing. To activate the packrat feature, your
program must call the class method C{ParserElement.enablePackrat()}. If
your program uses C{psyco} to "compile as you go", you must call
C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
Python will crash. For best results, call C{enablePackrat()} immediately
after importing pyparsing.
Example::
import pyparsing
pyparsing.ParserElement.enablePackrat()
def ignore |
( |
|
self, |
|
|
|
other |
|
) |
| |
Define expression to be ignored (e.g., comments) while doing pattern
matching; may be called repeatedly, to define multiple comment or other
ignorable patterns.
Example::
patt = OneOrMore(Word(alphas))
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
patt.ignore(cStyleComment)
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
Reimplemented in Combine, ParseElementEnhance, and ParseExpression.
def inlineLiteralsUsing |
( |
|
cls | ) |
|
|
static |
Set class to be used for inclusion of string literals into a parser.
Example::
# default literal class used is Literal
integer = Word(nums)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
# change to Suppress
ParserElement.inlineLiteralsUsing(Suppress)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
def parseString |
( |
|
self, |
|
|
|
instring, |
|
|
|
parseAll = False |
|
) |
| |
Execute the parse expression with the given string.
This is the main interface to the client code, once the complete
expression has been built.
If you want the grammar to require that the entire input string be
successfully parsed, then set C{parseAll} to True (equivalent to ending
the grammar with C{L{StringEnd()}}).
Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
in order to report proper column numbers in parse actions.
If the input string contains tabs and
the grammar uses parse actions that use the C{loc} argument to index into the
string being parsed, you can ensure you have a consistent view of the input
string by:
- calling C{parseWithTabs} on your grammar before calling C{parseString}
(see L{I{parseWithTabs}<parseWithTabs>})
- define your parse action using the full C{(s,loc,toks)} signature, and
reference the input string using the parse action's C{s} argument
- explicitly expand the tabs in your input string before calling
C{parseString}
Example::
Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
def runTests |
( |
|
self, |
|
|
|
tests, |
|
|
|
parseAll = True , |
|
|
|
comment = '#' , |
|
|
|
fullDump = True , |
|
|
|
printResults = True , |
|
|
|
failureTests = False |
|
) |
| |
Execute the parse expression on a series of test strings, showing each
test, the parsed results or where the parse failed. Quick and easy way to
run a parse expression against a list of sample strings.
Parameters:
- tests - a list of separate test strings, or a multiline string of test strings
- parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
- comment - (default=C{'#'}) - expression for indicating embedded comments in the test
string; pass None to disable comment filtering
- fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
if False, only dump nested list
- printResults - (default=C{True}) prints test output to stdout
- failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
Returns: a (success, results) tuple, where success indicates that all tests succeeded
(or failed if C{failureTests} is True), and the results contain a list of lines of each
test's output
Example::
number_expr = pyparsing_common.number.copy()
result = number_expr.runTests('''
# unsigned integer
100
# negative integer
-100
# float with scientific notation
6.02e23
# integer with scientific notation
1e-12
''')
print("Success" if result[0] else "Failed!")
result = number_expr.runTests('''
# stray character
100Z
# missing leading digit before '.'
-.100
# too many '.'
3.14.159
''', failureTests=True)
print("Success" if result[0] else "Failed!")
prints::
# unsigned integer
100
[100]
# negative integer
-100
[-100]
# float with scientific notation
6.02e23
[6.02e+23]
# integer with scientific notation
1e-12
[1e-12]
Success
# stray character
100Z
^
FAIL: Expected end of text (at char 3), (line:1, col:4)
# missing leading digit before '.'
-.100
^
FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
# too many '.'
3.14.159
^
FAIL: Expected end of text (at char 4), (line:1, col:5)
Success
Each test string must be on a single line. If you want to test a string that spans multiple
lines, create a test like this::
expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
(Note that this is a raw string literal, you must include the leading 'r'.)
def scanString |
( |
|
self, |
|
|
|
instring, |
|
|
|
maxMatches = _MAX_INT , |
|
|
|
overlap = False |
|
) |
| |
Scan the input string for expression matches. Each match will return the
matching tokens, start location, and end location. May be called with optional
C{maxMatches} argument, to clip scanning after 'n' matches are found. If
C{overlap} is specified, then overlapping matches will be reported.
Note that the start and end locations are reported relative to the string
being parsed. See L{I{parseString}<parseString>} for more information on parsing
strings with embedded tabs.
Example::
source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
print(source)
for tokens,start,end in Word(alphas).scanString(source):
print(' '*start + '^'*(end-start))
print(' '*start + tokens[0])
prints::
sldjf123lsdjjkf345sldkjf879lkjsfd987
^^^^^
sldjf
^^^^^^^
lsdjjkf
^^^^^^
sldkjf
^^^^^^
lkjsfd
def searchString |
( |
|
self, |
|
|
|
instring, |
|
|
|
maxMatches = _MAX_INT |
|
) |
| |
Another extension to C{L{scanString}}, simplifying the access to the tokens found
to match the given parse expression. May be called with optional
C{maxMatches} argument, to clip searching after 'n' matches are found.
Example::
# a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
cap_word = Word(alphas.upper(), alphas.lower())
print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
# the sum() builtin can be used to merge results into a single ParseResults object
print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
prints::
[['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
def setDebug |
( |
|
self, |
|
|
|
flag = True |
|
) |
| |
Enable display of debugging messages while doing pattern matching.
Set C{flag} to True to enable, False to disable.
Example::
wd = Word(alphas).setName("alphaword")
integer = Word(nums).setName("numword")
term = wd | integer
# turn on debugging for wd
wd.setDebug()
OneOrMore(term).parseString("abc 123 xyz 890")
prints::
Match alphaword at loc 0(1,1)
Matched alphaword -> ['abc']
Match alphaword at loc 3(1,4)
Exception raised:Expected alphaword (at char 4), (line:1, col:5)
Match alphaword at loc 7(1,8)
Matched alphaword -> ['xyz']
Match alphaword at loc 11(1,12)
Exception raised:Expected alphaword (at char 12), (line:1, col:13)
Match alphaword at loc 15(1,16)
Exception raised:Expected alphaword (at char 15), (line:1, col:16)
The output shown is that produced by the default debug actions - custom debug actions can be
specified using L{setDebugActions}. Prior to attempting
to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
which makes debugging and exception messages easier to understand - for instance, the default
name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
def setDefaultWhitespaceChars |
( |
|
chars | ) |
|
|
static |
Overrides the default whitespace chars
Example::
# default whitespace chars are space, <TAB> and newline
OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
# change to just treat newline as significant
ParserElement.setDefaultWhitespaceChars(" \t")
OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
def setParseAction |
( |
|
self, |
|
|
* |
fns, |
|
|
** |
kwargs |
|
) |
| |
Define one or more actions to perform when successfully matching parse element definition.
Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
- s = the original string being parsed (see note below)
- loc = the location of the matching substring
- toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
If the functions in fns modify the tokens, they can return them as the return
value from fn, and the modified list of tokens will replace the original.
Otherwise, fn does not need to return any value.
Optional keyword arguments:
- callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{parseString}<parseString>} for more information
on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
consistent view of the parsed string, the parse location, and line and column
positions within the parsed string.
Example::
integer = Word(nums)
date_str = integer + '/' + integer + '/' + integer
date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
# use parse action to convert to ints at parse time
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
date_str = integer + '/' + integer + '/' + integer
# note that integer fields are now ints, not strings
date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
def split |
( |
|
self, |
|
|
|
instring, |
|
|
|
maxsplit = _MAX_INT , |
|
|
|
includeSeparators = False |
|
) |
| |
Generator method to split a string using the given expression as a separator.
May be called with optional C{maxsplit} argument, to limit the number of splits;
and the optional C{includeSeparators} argument (default=C{False}), if the separating
matching text should be included in the split results.
Example::
punc = oneOf(list(".,;:/-!?"))
print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
prints::
['This', ' this', '', ' this sentence', ' is badly punctuated', '']