|
OpenQuizz
Une application de gestion des contenus pédagogiques
|


Public Member Functions | |
| def | __init__ (self, tree=None, strict=False, namespaceHTMLElements=True, debug=False) |
| def | reset (self) |
| def | documentEncoding (self) |
| def | isHTMLIntegrationPoint (self, element) |
| def | isMathMLTextIntegrationPoint (self, element) |
| def | mainLoop (self) |
| def | parse (self, stream, *args, **kwargs) |
| def | parseFragment (self, stream, *args, **kwargs) |
| def | parseError (self, errorcode="XXX-undefined-error", datavars=None) |
| def | adjustMathMLAttributes (self, token) |
| def | adjustSVGAttributes (self, token) |
| def | adjustForeignAttributes (self, token) |
| def | reparseTokenNormal (self, token) |
| def | resetInsertionMode (self) |
| def | parseRCDataRawtext (self, token, contentType) |
Data Fields | |
| strict | |
| tree | |
| errors | |
| phases | |
| innerHTMLMode | |
| container | |
| scripting | |
| tokenizer | |
| firstStartTag | |
| log | |
| compatMode | |
| innerHTML | |
| phase | |
| lastPhase | |
| beforeRCDataPhase | |
| framesetOK | |
| originalPhase | |
HTML parser Generates a tree structure from a stream of (possibly malformed) HTML.
| def __init__ | ( | self, | |
tree = None, |
|||
strict = False, |
|||
namespaceHTMLElements = True, |
|||
debug = False |
|||
| ) |
:arg tree: a treebuilder class controlling the type of tree that will be
returned. Built in treebuilders can be accessed through
html5lib.treebuilders.getTreeBuilder(treeType)
:arg strict: raise an exception when a parse error is encountered
:arg namespaceHTMLElements: whether or not to namespace HTML elements
:arg debug: whether or not to enable debug mode which logs things
Example:
>>> from html5lib.html5parser import HTMLParser
>>> parser = HTMLParser() # generates parser with etree builder
>>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict
| def adjustForeignAttributes | ( | self, | |
| token | |||
| ) |
| def adjustMathMLAttributes | ( | self, | |
| token | |||
| ) |
| def adjustSVGAttributes | ( | self, | |
| token | |||
| ) |
| def documentEncoding | ( | self | ) |
Name of the character encoding that was used to decode the input stream, or :obj:`None` if that is not determined yet
| def isHTMLIntegrationPoint | ( | self, | |
| element | |||
| ) |
| def isMathMLTextIntegrationPoint | ( | self, | |
| element | |||
| ) |
| def mainLoop | ( | self | ) |
| def parse | ( | self, | |
| stream, | |||
| * | args, | ||
| ** | kwargs | ||
| ) |
Parse a HTML document into a well-formed tree
:arg stream: a file-like object or string containing the HTML to be parsed
The optional encoding parameter must be a string that indicates
the encoding. If specified, that encoding will be used,
regardless of any BOM or later declaration (such as in a meta
element).
:arg scripting: treat noscript elements as if JavaScript was turned on
:returns: parsed tree
Example:
>>> from html5lib.html5parser import HTMLParser
>>> parser = HTMLParser()
>>> parser.parse('<html><body><p>This is a doc</p></body></html>')
<Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>
| def parseError | ( | self, | |
errorcode = "XXX-undefined-error", |
|||
datavars = None |
|||
| ) |
| def parseFragment | ( | self, | |
| stream, | |||
| * | args, | ||
| ** | kwargs | ||
| ) |
Parse a HTML fragment into a well-formed tree fragment
:arg container: name of the element we're setting the innerHTML
property if set to None, default to 'div'
:arg stream: a file-like object or string containing the HTML to be parsed
The optional encoding parameter must be a string that indicates
the encoding. If specified, that encoding will be used,
regardless of any BOM or later declaration (such as in a meta
element)
:arg scripting: treat noscript elements as if JavaScript was turned on
:returns: parsed tree
Example:
>>> from html5lib.html5libparser import HTMLParser
>>> parser = HTMLParser()
>>> parser.parseFragment('<b>this is a fragment</b>')
<Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>
| def parseRCDataRawtext | ( | self, | |
| token, | |||
| contentType | |||
| ) |
| def reparseTokenNormal | ( | self, | |
| token | |||
| ) |
| def reset | ( | self | ) |
| def resetInsertionMode | ( | self | ) |
| beforeRCDataPhase |
| compatMode |
| container |
| errors |
| firstStartTag |
| framesetOK |
| innerHTML |
| innerHTMLMode |
| lastPhase |
| log |
| originalPhase |
| phase |
| phases |
| scripting |
| strict |
| tokenizer |
| tree |