OpenQuizz
Une application de gestion des contenus pédagogiques
HTMLTokenizer Class Reference
Inheritance diagram for HTMLTokenizer:
Collaboration diagram for HTMLTokenizer:

Public Member Functions

def __init__ (self, stream, parser=None, **kwargs)
 
def __iter__ (self)
 
def consumeNumberEntity (self, isHex)
 
def consumeEntity (self, allowedChar=None, fromAttribute=False)
 
def processEntityInAttribute (self, allowedChar)
 
def emitCurrentToken (self)
 
def dataState (self)
 
def entityDataState (self)
 
def rcdataState (self)
 
def characterReferenceInRcdata (self)
 
def rawtextState (self)
 
def scriptDataState (self)
 
def plaintextState (self)
 
def tagOpenState (self)
 
def closeTagOpenState (self)
 
def tagNameState (self)
 
def rcdataLessThanSignState (self)
 
def rcdataEndTagOpenState (self)
 
def rcdataEndTagNameState (self)
 
def rawtextLessThanSignState (self)
 
def rawtextEndTagOpenState (self)
 
def rawtextEndTagNameState (self)
 
def scriptDataLessThanSignState (self)
 
def scriptDataEndTagOpenState (self)
 
def scriptDataEndTagNameState (self)
 
def scriptDataEscapeStartState (self)
 
def scriptDataEscapeStartDashState (self)
 
def scriptDataEscapedState (self)
 
def scriptDataEscapedDashState (self)
 
def scriptDataEscapedDashDashState (self)
 
def scriptDataEscapedLessThanSignState (self)
 
def scriptDataEscapedEndTagOpenState (self)
 
def scriptDataEscapedEndTagNameState (self)
 
def scriptDataDoubleEscapeStartState (self)
 
def scriptDataDoubleEscapedState (self)
 
def scriptDataDoubleEscapedDashState (self)
 
def scriptDataDoubleEscapedDashDashState (self)
 
def scriptDataDoubleEscapedLessThanSignState (self)
 
def scriptDataDoubleEscapeEndState (self)
 
def beforeAttributeNameState (self)
 
def attributeNameState (self)
 
def afterAttributeNameState (self)
 
def beforeAttributeValueState (self)
 
def attributeValueDoubleQuotedState (self)
 
def attributeValueSingleQuotedState (self)
 
def attributeValueUnQuotedState (self)
 
def afterAttributeValueState (self)
 
def selfClosingStartTagState (self)
 
def bogusCommentState (self)
 
def markupDeclarationOpenState (self)
 
def commentStartState (self)
 
def commentStartDashState (self)
 
def commentState (self)
 
def commentEndDashState (self)
 
def commentEndState (self)
 
def commentEndBangState (self)
 
def doctypeState (self)
 
def beforeDoctypeNameState (self)
 
def doctypeNameState (self)
 
def afterDoctypeNameState (self)
 
def afterDoctypePublicKeywordState (self)
 
def beforeDoctypePublicIdentifierState (self)
 
def doctypePublicIdentifierDoubleQuotedState (self)
 
def doctypePublicIdentifierSingleQuotedState (self)
 
def afterDoctypePublicIdentifierState (self)
 
def betweenDoctypePublicAndSystemIdentifiersState (self)
 
def afterDoctypeSystemKeywordState (self)
 
def beforeDoctypeSystemIdentifierState (self)
 
def doctypeSystemIdentifierDoubleQuotedState (self)
 
def doctypeSystemIdentifierSingleQuotedState (self)
 
def afterDoctypeSystemIdentifierState (self)
 
def bogusDoctypeState (self)
 
def cdataSectionState (self)
 

Data Fields

 stream
 
 parser
 
 escapeFlag
 
 lastFourChars
 
 state
 
 escape
 
 currentToken
 
 tokenQueue
 
 temporaryBuffer
 

Detailed Description

This class takes care of tokenizing HTML.

* self.currentToken
  Holds the token that is currently being processed.

* self.state
  Holds a reference to the method to be invoked... XXX

* self.stream
  Points to HTMLInputStream object.

Constructor & Destructor Documentation

◆ __init__()

def __init__ (   self,
  stream,
  parser = None,
**  kwargs 
)

Member Function Documentation

◆ __iter__()

def __iter__ (   self)
This is where the magic happens.

We do our usually processing through the states and when we have a token
to return we yield the token which pauses processing until the next token
is requested.

◆ afterAttributeNameState()

def afterAttributeNameState (   self)

◆ afterAttributeValueState()

def afterAttributeValueState (   self)

◆ afterDoctypeNameState()

def afterDoctypeNameState (   self)

◆ afterDoctypePublicIdentifierState()

def afterDoctypePublicIdentifierState (   self)

◆ afterDoctypePublicKeywordState()

def afterDoctypePublicKeywordState (   self)

◆ afterDoctypeSystemIdentifierState()

def afterDoctypeSystemIdentifierState (   self)

◆ afterDoctypeSystemKeywordState()

def afterDoctypeSystemKeywordState (   self)

◆ attributeNameState()

def attributeNameState (   self)

◆ attributeValueDoubleQuotedState()

def attributeValueDoubleQuotedState (   self)

◆ attributeValueSingleQuotedState()

def attributeValueSingleQuotedState (   self)

◆ attributeValueUnQuotedState()

def attributeValueUnQuotedState (   self)

◆ beforeAttributeNameState()

def beforeAttributeNameState (   self)

◆ beforeAttributeValueState()

def beforeAttributeValueState (   self)

◆ beforeDoctypeNameState()

def beforeDoctypeNameState (   self)

◆ beforeDoctypePublicIdentifierState()

def beforeDoctypePublicIdentifierState (   self)

◆ beforeDoctypeSystemIdentifierState()

def beforeDoctypeSystemIdentifierState (   self)

◆ betweenDoctypePublicAndSystemIdentifiersState()

def betweenDoctypePublicAndSystemIdentifiersState (   self)

◆ bogusCommentState()

def bogusCommentState (   self)

◆ bogusDoctypeState()

def bogusDoctypeState (   self)

◆ cdataSectionState()

def cdataSectionState (   self)

◆ characterReferenceInRcdata()

def characterReferenceInRcdata (   self)

◆ closeTagOpenState()

def closeTagOpenState (   self)

◆ commentEndBangState()

def commentEndBangState (   self)

◆ commentEndDashState()

def commentEndDashState (   self)

◆ commentEndState()

def commentEndState (   self)

◆ commentStartDashState()

def commentStartDashState (   self)

◆ commentStartState()

def commentStartState (   self)

◆ commentState()

def commentState (   self)

◆ consumeEntity()

def consumeEntity (   self,
  allowedChar = None,
  fromAttribute = False 
)

◆ consumeNumberEntity()

def consumeNumberEntity (   self,
  isHex 
)
This function returns either U+FFFD or the character based on the
decimal or hexadecimal representation. It also discards ";" if present.
If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.

◆ dataState()

def dataState (   self)

◆ doctypeNameState()

def doctypeNameState (   self)

◆ doctypePublicIdentifierDoubleQuotedState()

def doctypePublicIdentifierDoubleQuotedState (   self)

◆ doctypePublicIdentifierSingleQuotedState()

def doctypePublicIdentifierSingleQuotedState (   self)

◆ doctypeState()

def doctypeState (   self)

◆ doctypeSystemIdentifierDoubleQuotedState()

def doctypeSystemIdentifierDoubleQuotedState (   self)

◆ doctypeSystemIdentifierSingleQuotedState()

def doctypeSystemIdentifierSingleQuotedState (   self)

◆ emitCurrentToken()

def emitCurrentToken (   self)
This method is a generic handler for emitting the tags. It also sets
the state to "data" because that's what's needed after a token has been
emitted.

◆ entityDataState()

def entityDataState (   self)

◆ markupDeclarationOpenState()

def markupDeclarationOpenState (   self)

◆ plaintextState()

def plaintextState (   self)

◆ processEntityInAttribute()

def processEntityInAttribute (   self,
  allowedChar 
)
This method replaces the need for "entityInAttributeValueState".

◆ rawtextEndTagNameState()

def rawtextEndTagNameState (   self)

◆ rawtextEndTagOpenState()

def rawtextEndTagOpenState (   self)

◆ rawtextLessThanSignState()

def rawtextLessThanSignState (   self)

◆ rawtextState()

def rawtextState (   self)

◆ rcdataEndTagNameState()

def rcdataEndTagNameState (   self)

◆ rcdataEndTagOpenState()

def rcdataEndTagOpenState (   self)

◆ rcdataLessThanSignState()

def rcdataLessThanSignState (   self)

◆ rcdataState()

def rcdataState (   self)

◆ scriptDataDoubleEscapedDashDashState()

def scriptDataDoubleEscapedDashDashState (   self)

◆ scriptDataDoubleEscapedDashState()

def scriptDataDoubleEscapedDashState (   self)

◆ scriptDataDoubleEscapedLessThanSignState()

def scriptDataDoubleEscapedLessThanSignState (   self)

◆ scriptDataDoubleEscapedState()

def scriptDataDoubleEscapedState (   self)

◆ scriptDataDoubleEscapeEndState()

def scriptDataDoubleEscapeEndState (   self)

◆ scriptDataDoubleEscapeStartState()

def scriptDataDoubleEscapeStartState (   self)

◆ scriptDataEndTagNameState()

def scriptDataEndTagNameState (   self)

◆ scriptDataEndTagOpenState()

def scriptDataEndTagOpenState (   self)

◆ scriptDataEscapedDashDashState()

def scriptDataEscapedDashDashState (   self)

◆ scriptDataEscapedDashState()

def scriptDataEscapedDashState (   self)

◆ scriptDataEscapedEndTagNameState()

def scriptDataEscapedEndTagNameState (   self)

◆ scriptDataEscapedEndTagOpenState()

def scriptDataEscapedEndTagOpenState (   self)

◆ scriptDataEscapedLessThanSignState()

def scriptDataEscapedLessThanSignState (   self)

◆ scriptDataEscapedState()

def scriptDataEscapedState (   self)

◆ scriptDataEscapeStartDashState()

def scriptDataEscapeStartDashState (   self)

◆ scriptDataEscapeStartState()

def scriptDataEscapeStartState (   self)

◆ scriptDataLessThanSignState()

def scriptDataLessThanSignState (   self)

◆ scriptDataState()

def scriptDataState (   self)

◆ selfClosingStartTagState()

def selfClosingStartTagState (   self)

◆ tagNameState()

def tagNameState (   self)

◆ tagOpenState()

def tagOpenState (   self)

Field Documentation

◆ currentToken

currentToken

◆ escape

escape

◆ escapeFlag

escapeFlag

◆ lastFourChars

lastFourChars

◆ parser

parser

◆ state

state

◆ stream

stream

◆ temporaryBuffer

temporaryBuffer

◆ tokenQueue

tokenQueue

The documentation for this class was generated from the following file: