You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							182 lines
						
					
					
						
							5.6 KiB
						
					
					
				
			
		
		
	
	
							182 lines
						
					
					
						
							5.6 KiB
						
					
					
				/** All the states the tokenizer can be in. */ | 
						|
declare const enum State { | 
						|
    Text = 1, | 
						|
    BeforeTagName = 2, | 
						|
    InTagName = 3, | 
						|
    InSelfClosingTag = 4, | 
						|
    BeforeClosingTagName = 5, | 
						|
    InClosingTagName = 6, | 
						|
    AfterClosingTagName = 7, | 
						|
    BeforeAttributeName = 8, | 
						|
    InAttributeName = 9, | 
						|
    AfterAttributeName = 10, | 
						|
    BeforeAttributeValue = 11, | 
						|
    InAttributeValueDq = 12, | 
						|
    InAttributeValueSq = 13, | 
						|
    InAttributeValueNq = 14, | 
						|
    BeforeDeclaration = 15, | 
						|
    InDeclaration = 16, | 
						|
    InProcessingInstruction = 17, | 
						|
    BeforeComment = 18, | 
						|
    InComment = 19, | 
						|
    InSpecialComment = 20, | 
						|
    AfterComment1 = 21, | 
						|
    AfterComment2 = 22, | 
						|
    BeforeCdata1 = 23, | 
						|
    BeforeCdata2 = 24, | 
						|
    BeforeCdata3 = 25, | 
						|
    BeforeCdata4 = 26, | 
						|
    BeforeCdata5 = 27, | 
						|
    BeforeCdata6 = 28, | 
						|
    InCdata = 29, | 
						|
    AfterCdata1 = 30, | 
						|
    AfterCdata2 = 31, | 
						|
    BeforeSpecialS = 32, | 
						|
    BeforeSpecialSEnd = 33, | 
						|
    BeforeScript1 = 34, | 
						|
    BeforeScript2 = 35, | 
						|
    BeforeScript3 = 36, | 
						|
    BeforeScript4 = 37, | 
						|
    BeforeScript5 = 38, | 
						|
    AfterScript1 = 39, | 
						|
    AfterScript2 = 40, | 
						|
    AfterScript3 = 41, | 
						|
    AfterScript4 = 42, | 
						|
    AfterScript5 = 43, | 
						|
    BeforeStyle1 = 44, | 
						|
    BeforeStyle2 = 45, | 
						|
    BeforeStyle3 = 46, | 
						|
    BeforeStyle4 = 47, | 
						|
    AfterStyle1 = 48, | 
						|
    AfterStyle2 = 49, | 
						|
    AfterStyle3 = 50, | 
						|
    AfterStyle4 = 51, | 
						|
    BeforeSpecialT = 52, | 
						|
    BeforeSpecialTEnd = 53, | 
						|
    BeforeTitle1 = 54, | 
						|
    BeforeTitle2 = 55, | 
						|
    BeforeTitle3 = 56, | 
						|
    BeforeTitle4 = 57, | 
						|
    AfterTitle1 = 58, | 
						|
    AfterTitle2 = 59, | 
						|
    AfterTitle3 = 60, | 
						|
    AfterTitle4 = 61, | 
						|
    BeforeEntity = 62, | 
						|
    BeforeNumericEntity = 63, | 
						|
    InNamedEntity = 64, | 
						|
    InNumericEntity = 65, | 
						|
    InHexEntity = 66 | 
						|
} | 
						|
export interface Callbacks { | 
						|
    onattribdata(value: string): void; | 
						|
    onattribend(quote: string | undefined | null): void; | 
						|
    onattribname(name: string): void; | 
						|
    oncdata(data: string): void; | 
						|
    onclosetag(name: string): void; | 
						|
    oncomment(data: string): void; | 
						|
    ondeclaration(content: string): void; | 
						|
    onend(): void; | 
						|
    onerror(error: Error, state?: State): void; | 
						|
    onopentagend(): void; | 
						|
    onopentagname(name: string): void; | 
						|
    onprocessinginstruction(instruction: string): void; | 
						|
    onselfclosingtag(): void; | 
						|
    ontext(value: string): void; | 
						|
} | 
						|
export default class Tokenizer { | 
						|
    /** The current state the tokenizer is in. */ | 
						|
    _state: State; | 
						|
    /** The read buffer. */ | 
						|
    private buffer; | 
						|
    /** The beginning of the section that is currently being read. */ | 
						|
    sectionStart: number; | 
						|
    /** The index within the buffer that we are currently looking at. */ | 
						|
    _index: number; | 
						|
    /** | 
						|
     * Data that has already been processed will be removed from the buffer occasionally. | 
						|
     * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate. | 
						|
     */ | 
						|
    private bufferOffset; | 
						|
    /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ | 
						|
    private baseState; | 
						|
    /** For special parsing behavior inside of script and style tags. */ | 
						|
    private special; | 
						|
    /** Indicates whether the tokenizer has been paused. */ | 
						|
    private running; | 
						|
    /** Indicates whether the tokenizer has finished running / `.end` has been called. */ | 
						|
    private ended; | 
						|
    private readonly cbs; | 
						|
    private readonly xmlMode; | 
						|
    private readonly decodeEntities; | 
						|
    constructor(options: { | 
						|
        xmlMode?: boolean; | 
						|
        decodeEntities?: boolean; | 
						|
    } | null, cbs: Callbacks); | 
						|
    reset(): void; | 
						|
    write(chunk: string): void; | 
						|
    end(chunk?: string): void; | 
						|
    pause(): void; | 
						|
    resume(): void; | 
						|
    /** | 
						|
     * The current index within all of the written data. | 
						|
     */ | 
						|
    getAbsoluteIndex(): number; | 
						|
    private stateText; | 
						|
    /** | 
						|
     * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. | 
						|
     * | 
						|
     * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). | 
						|
     * We allow anything that wouldn't end the tag. | 
						|
     */ | 
						|
    private isTagStartChar; | 
						|
    private stateBeforeTagName; | 
						|
    private stateInTagName; | 
						|
    private stateBeforeClosingTagName; | 
						|
    private stateInClosingTagName; | 
						|
    private stateAfterClosingTagName; | 
						|
    private stateBeforeAttributeName; | 
						|
    private stateInSelfClosingTag; | 
						|
    private stateInAttributeName; | 
						|
    private stateAfterAttributeName; | 
						|
    private stateBeforeAttributeValue; | 
						|
    private handleInAttributeValue; | 
						|
    private stateInAttributeValueDoubleQuotes; | 
						|
    private stateInAttributeValueSingleQuotes; | 
						|
    private stateInAttributeValueNoQuotes; | 
						|
    private stateBeforeDeclaration; | 
						|
    private stateInDeclaration; | 
						|
    private stateInProcessingInstruction; | 
						|
    private stateBeforeComment; | 
						|
    private stateInComment; | 
						|
    private stateInSpecialComment; | 
						|
    private stateAfterComment1; | 
						|
    private stateAfterComment2; | 
						|
    private stateBeforeCdata6; | 
						|
    private stateInCdata; | 
						|
    private stateAfterCdata1; | 
						|
    private stateAfterCdata2; | 
						|
    private stateBeforeSpecialS; | 
						|
    private stateBeforeSpecialSEnd; | 
						|
    private stateBeforeSpecialLast; | 
						|
    private stateAfterSpecialLast; | 
						|
    private parseFixedEntity; | 
						|
    private parseLegacyEntity; | 
						|
    private stateInNamedEntity; | 
						|
    private decodeNumericEntity; | 
						|
    private stateInNumericEntity; | 
						|
    private stateInHexEntity; | 
						|
    private cleanup; | 
						|
    /** | 
						|
     * Iterates through the buffer, calling the function corresponding to the current state. | 
						|
     * | 
						|
     * States that are more likely to be hit are higher up, as a performance improvement. | 
						|
     */ | 
						|
    private parse; | 
						|
    private finish; | 
						|
    private handleTrailingData; | 
						|
    private getSection; | 
						|
    private emitToken; | 
						|
    private emitPartial; | 
						|
} | 
						|
export {}; | 
						|
//# sourceMappingURL=Tokenizer.d.ts.map
 |