You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
			
				
					182 lines
				
				5.6 KiB
			
		
		
			
		
	
	
					182 lines
				
				5.6 KiB
			| 
								 
											4 years ago
										 
									 | 
							
								/** All the states the tokenizer can be in. */
							 | 
						||
| 
								 | 
							
								declare const enum State {
							 | 
						||
| 
								 | 
							
								    Text = 1,
							 | 
						||
| 
								 | 
							
								    BeforeTagName = 2,
							 | 
						||
| 
								 | 
							
								    InTagName = 3,
							 | 
						||
| 
								 | 
							
								    InSelfClosingTag = 4,
							 | 
						||
| 
								 | 
							
								    BeforeClosingTagName = 5,
							 | 
						||
| 
								 | 
							
								    InClosingTagName = 6,
							 | 
						||
| 
								 | 
							
								    AfterClosingTagName = 7,
							 | 
						||
| 
								 | 
							
								    BeforeAttributeName = 8,
							 | 
						||
| 
								 | 
							
								    InAttributeName = 9,
							 | 
						||
| 
								 | 
							
								    AfterAttributeName = 10,
							 | 
						||
| 
								 | 
							
								    BeforeAttributeValue = 11,
							 | 
						||
| 
								 | 
							
								    InAttributeValueDq = 12,
							 | 
						||
| 
								 | 
							
								    InAttributeValueSq = 13,
							 | 
						||
| 
								 | 
							
								    InAttributeValueNq = 14,
							 | 
						||
| 
								 | 
							
								    BeforeDeclaration = 15,
							 | 
						||
| 
								 | 
							
								    InDeclaration = 16,
							 | 
						||
| 
								 | 
							
								    InProcessingInstruction = 17,
							 | 
						||
| 
								 | 
							
								    BeforeComment = 18,
							 | 
						||
| 
								 | 
							
								    InComment = 19,
							 | 
						||
| 
								 | 
							
								    InSpecialComment = 20,
							 | 
						||
| 
								 | 
							
								    AfterComment1 = 21,
							 | 
						||
| 
								 | 
							
								    AfterComment2 = 22,
							 | 
						||
| 
								 | 
							
								    BeforeCdata1 = 23,
							 | 
						||
| 
								 | 
							
								    BeforeCdata2 = 24,
							 | 
						||
| 
								 | 
							
								    BeforeCdata3 = 25,
							 | 
						||
| 
								 | 
							
								    BeforeCdata4 = 26,
							 | 
						||
| 
								 | 
							
								    BeforeCdata5 = 27,
							 | 
						||
| 
								 | 
							
								    BeforeCdata6 = 28,
							 | 
						||
| 
								 | 
							
								    InCdata = 29,
							 | 
						||
| 
								 | 
							
								    AfterCdata1 = 30,
							 | 
						||
| 
								 | 
							
								    AfterCdata2 = 31,
							 | 
						||
| 
								 | 
							
								    BeforeSpecialS = 32,
							 | 
						||
| 
								 | 
							
								    BeforeSpecialSEnd = 33,
							 | 
						||
| 
								 | 
							
								    BeforeScript1 = 34,
							 | 
						||
| 
								 | 
							
								    BeforeScript2 = 35,
							 | 
						||
| 
								 | 
							
								    BeforeScript3 = 36,
							 | 
						||
| 
								 | 
							
								    BeforeScript4 = 37,
							 | 
						||
| 
								 | 
							
								    BeforeScript5 = 38,
							 | 
						||
| 
								 | 
							
								    AfterScript1 = 39,
							 | 
						||
| 
								 | 
							
								    AfterScript2 = 40,
							 | 
						||
| 
								 | 
							
								    AfterScript3 = 41,
							 | 
						||
| 
								 | 
							
								    AfterScript4 = 42,
							 | 
						||
| 
								 | 
							
								    AfterScript5 = 43,
							 | 
						||
| 
								 | 
							
								    BeforeStyle1 = 44,
							 | 
						||
| 
								 | 
							
								    BeforeStyle2 = 45,
							 | 
						||
| 
								 | 
							
								    BeforeStyle3 = 46,
							 | 
						||
| 
								 | 
							
								    BeforeStyle4 = 47,
							 | 
						||
| 
								 | 
							
								    AfterStyle1 = 48,
							 | 
						||
| 
								 | 
							
								    AfterStyle2 = 49,
							 | 
						||
| 
								 | 
							
								    AfterStyle3 = 50,
							 | 
						||
| 
								 | 
							
								    AfterStyle4 = 51,
							 | 
						||
| 
								 | 
							
								    BeforeSpecialT = 52,
							 | 
						||
| 
								 | 
							
								    BeforeSpecialTEnd = 53,
							 | 
						||
| 
								 | 
							
								    BeforeTitle1 = 54,
							 | 
						||
| 
								 | 
							
								    BeforeTitle2 = 55,
							 | 
						||
| 
								 | 
							
								    BeforeTitle3 = 56,
							 | 
						||
| 
								 | 
							
								    BeforeTitle4 = 57,
							 | 
						||
| 
								 | 
							
								    AfterTitle1 = 58,
							 | 
						||
| 
								 | 
							
								    AfterTitle2 = 59,
							 | 
						||
| 
								 | 
							
								    AfterTitle3 = 60,
							 | 
						||
| 
								 | 
							
								    AfterTitle4 = 61,
							 | 
						||
| 
								 | 
							
								    BeforeEntity = 62,
							 | 
						||
| 
								 | 
							
								    BeforeNumericEntity = 63,
							 | 
						||
| 
								 | 
							
								    InNamedEntity = 64,
							 | 
						||
| 
								 | 
							
								    InNumericEntity = 65,
							 | 
						||
| 
								 | 
							
								    InHexEntity = 66
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								export interface Callbacks {
							 | 
						||
| 
								 | 
							
								    onattribdata(value: string): void;
							 | 
						||
| 
								 | 
							
								    onattribend(quote: string | undefined | null): void;
							 | 
						||
| 
								 | 
							
								    onattribname(name: string): void;
							 | 
						||
| 
								 | 
							
								    oncdata(data: string): void;
							 | 
						||
| 
								 | 
							
								    onclosetag(name: string): void;
							 | 
						||
| 
								 | 
							
								    oncomment(data: string): void;
							 | 
						||
| 
								 | 
							
								    ondeclaration(content: string): void;
							 | 
						||
| 
								 | 
							
								    onend(): void;
							 | 
						||
| 
								 | 
							
								    onerror(error: Error, state?: State): void;
							 | 
						||
| 
								 | 
							
								    onopentagend(): void;
							 | 
						||
| 
								 | 
							
								    onopentagname(name: string): void;
							 | 
						||
| 
								 | 
							
								    onprocessinginstruction(instruction: string): void;
							 | 
						||
| 
								 | 
							
								    onselfclosingtag(): void;
							 | 
						||
| 
								 | 
							
								    ontext(value: string): void;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								export default class Tokenizer {
							 | 
						||
| 
								 | 
							
								    /** The current state the tokenizer is in. */
							 | 
						||
| 
								 | 
							
								    _state: State;
							 | 
						||
| 
								 | 
							
								    /** The read buffer. */
							 | 
						||
| 
								 | 
							
								    private buffer;
							 | 
						||
| 
								 | 
							
								    /** The beginning of the section that is currently being read. */
							 | 
						||
| 
								 | 
							
								    sectionStart: number;
							 | 
						||
| 
								 | 
							
								    /** The index within the buffer that we are currently looking at. */
							 | 
						||
| 
								 | 
							
								    _index: number;
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Data that has already been processed will be removed from the buffer occasionally.
							 | 
						||
| 
								 | 
							
								     * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    private bufferOffset;
							 | 
						||
| 
								 | 
							
								    /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
							 | 
						||
| 
								 | 
							
								    private baseState;
							 | 
						||
| 
								 | 
							
								    /** For special parsing behavior inside of script and style tags. */
							 | 
						||
| 
								 | 
							
								    private special;
							 | 
						||
| 
								 | 
							
								    /** Indicates whether the tokenizer has been paused. */
							 | 
						||
| 
								 | 
							
								    private running;
							 | 
						||
| 
								 | 
							
								    /** Indicates whether the tokenizer has finished running / `.end` has been called. */
							 | 
						||
| 
								 | 
							
								    private ended;
							 | 
						||
| 
								 | 
							
								    private readonly cbs;
							 | 
						||
| 
								 | 
							
								    private readonly xmlMode;
							 | 
						||
| 
								 | 
							
								    private readonly decodeEntities;
							 | 
						||
| 
								 | 
							
								    constructor(options: {
							 | 
						||
| 
								 | 
							
								        xmlMode?: boolean;
							 | 
						||
| 
								 | 
							
								        decodeEntities?: boolean;
							 | 
						||
| 
								 | 
							
								    } | null, cbs: Callbacks);
							 | 
						||
| 
								 | 
							
								    reset(): void;
							 | 
						||
| 
								 | 
							
								    write(chunk: string): void;
							 | 
						||
| 
								 | 
							
								    end(chunk?: string): void;
							 | 
						||
| 
								 | 
							
								    pause(): void;
							 | 
						||
| 
								 | 
							
								    resume(): void;
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * The current index within all of the written data.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    getAbsoluteIndex(): number;
							 | 
						||
| 
								 | 
							
								    private stateText;
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
							 | 
						||
| 
								 | 
							
								     *
							 | 
						||
| 
								 | 
							
								     * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
							 | 
						||
| 
								 | 
							
								     * We allow anything that wouldn't end the tag.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    private isTagStartChar;
							 | 
						||
| 
								 | 
							
								    private stateBeforeTagName;
							 | 
						||
| 
								 | 
							
								    private stateInTagName;
							 | 
						||
| 
								 | 
							
								    private stateBeforeClosingTagName;
							 | 
						||
| 
								 | 
							
								    private stateInClosingTagName;
							 | 
						||
| 
								 | 
							
								    private stateAfterClosingTagName;
							 | 
						||
| 
								 | 
							
								    private stateBeforeAttributeName;
							 | 
						||
| 
								 | 
							
								    private stateInSelfClosingTag;
							 | 
						||
| 
								 | 
							
								    private stateInAttributeName;
							 | 
						||
| 
								 | 
							
								    private stateAfterAttributeName;
							 | 
						||
| 
								 | 
							
								    private stateBeforeAttributeValue;
							 | 
						||
| 
								 | 
							
								    private handleInAttributeValue;
							 | 
						||
| 
								 | 
							
								    private stateInAttributeValueDoubleQuotes;
							 | 
						||
| 
								 | 
							
								    private stateInAttributeValueSingleQuotes;
							 | 
						||
| 
								 | 
							
								    private stateInAttributeValueNoQuotes;
							 | 
						||
| 
								 | 
							
								    private stateBeforeDeclaration;
							 | 
						||
| 
								 | 
							
								    private stateInDeclaration;
							 | 
						||
| 
								 | 
							
								    private stateInProcessingInstruction;
							 | 
						||
| 
								 | 
							
								    private stateBeforeComment;
							 | 
						||
| 
								 | 
							
								    private stateInComment;
							 | 
						||
| 
								 | 
							
								    private stateInSpecialComment;
							 | 
						||
| 
								 | 
							
								    private stateAfterComment1;
							 | 
						||
| 
								 | 
							
								    private stateAfterComment2;
							 | 
						||
| 
								 | 
							
								    private stateBeforeCdata6;
							 | 
						||
| 
								 | 
							
								    private stateInCdata;
							 | 
						||
| 
								 | 
							
								    private stateAfterCdata1;
							 | 
						||
| 
								 | 
							
								    private stateAfterCdata2;
							 | 
						||
| 
								 | 
							
								    private stateBeforeSpecialS;
							 | 
						||
| 
								 | 
							
								    private stateBeforeSpecialSEnd;
							 | 
						||
| 
								 | 
							
								    private stateBeforeSpecialLast;
							 | 
						||
| 
								 | 
							
								    private stateAfterSpecialLast;
							 | 
						||
| 
								 | 
							
								    private parseFixedEntity;
							 | 
						||
| 
								 | 
							
								    private parseLegacyEntity;
							 | 
						||
| 
								 | 
							
								    private stateInNamedEntity;
							 | 
						||
| 
								 | 
							
								    private decodeNumericEntity;
							 | 
						||
| 
								 | 
							
								    private stateInNumericEntity;
							 | 
						||
| 
								 | 
							
								    private stateInHexEntity;
							 | 
						||
| 
								 | 
							
								    private cleanup;
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Iterates through the buffer, calling the function corresponding to the current state.
							 | 
						||
| 
								 | 
							
								     *
							 | 
						||
| 
								 | 
							
								     * States that are more likely to be hit are higher up, as a performance improvement.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    private parse;
							 | 
						||
| 
								 | 
							
								    private finish;
							 | 
						||
| 
								 | 
							
								    private handleTrailingData;
							 | 
						||
| 
								 | 
							
								    private getSection;
							 | 
						||
| 
								 | 
							
								    private emitToken;
							 | 
						||
| 
								 | 
							
								    private emitPartial;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								export {};
							 | 
						||
| 
								 | 
							
								//# sourceMappingURL=Tokenizer.d.ts.map
							 |