File size: 4,203 Bytes
4fb0ce9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | /**
* Incremental streaming parser for <think>...</think> reasoning tags.
*
* Tokens are pushed one-by-one as they arrive from the streamer.
* The parser tracks whether we are currently inside a `<think>` block
* and emits deltas accordingly.
*/
export interface ThinkDelta {
type: "reasoning" | "content";
textDelta: string;
}
export class ThinkStreamParser {
/** Accumulated reasoning text (inside <think>…</think>). */
reasoning = "";
/** Accumulated content text (outside think tags). */
content = "";
/** Whether we are currently inside a <think> block. */
private _inThink = false;
/** Buffer for detecting partial opening/closing tags at chunk boundaries. */
private _buf = "";
private static readonly OPEN_TAG = "<think>";
private static readonly CLOSE_TAG = "</think>";
reset(): void {
this.reasoning = "";
this.content = "";
this._inThink = false;
this._buf = "";
}
/**
* Push a chunk of text (one or more tokens) and return an array of deltas.
* Most calls will return a single delta; the array handles the rare case
* where a chunk contains a full tag transition.
*/
push(text: string): ThinkDelta[] {
const deltas: ThinkDelta[] = [];
this._buf += text;
while (this._buf.length > 0) {
if (this._inThink) {
const closeIdx = this._buf.indexOf(ThinkStreamParser.CLOSE_TAG);
if (closeIdx !== -1) {
const before = this._buf.slice(0, closeIdx);
if (before) {
this.reasoning += before;
deltas.push({ type: "reasoning", textDelta: before });
}
this._buf = this._buf.slice(
closeIdx + ThinkStreamParser.CLOSE_TAG.length,
);
this._inThink = false;
continue;
}
// No close tag yet — hold back any tail that could be a partial tag.
const safeLen = this._safeFlushLength(
this._buf,
ThinkStreamParser.CLOSE_TAG,
);
if (safeLen > 0) {
const chunk = this._buf.slice(0, safeLen);
this.reasoning += chunk;
deltas.push({ type: "reasoning", textDelta: chunk });
this._buf = this._buf.slice(safeLen);
}
break;
} else {
const openIdx = this._buf.indexOf(ThinkStreamParser.OPEN_TAG);
if (openIdx !== -1) {
const before = this._buf.slice(0, openIdx);
if (before) {
this.content += before;
deltas.push({ type: "content", textDelta: before });
}
this._buf = this._buf.slice(
openIdx + ThinkStreamParser.OPEN_TAG.length,
);
this._inThink = true;
continue;
}
// No open tag yet — hold back any tail that could be a partial tag.
const safeLen = this._safeFlushLength(
this._buf,
ThinkStreamParser.OPEN_TAG,
);
if (safeLen > 0) {
const chunk = this._buf.slice(0, safeLen);
this.content += chunk;
deltas.push({ type: "content", textDelta: chunk });
this._buf = this._buf.slice(safeLen);
}
break;
}
}
return deltas;
}
/**
* Flush any remaining buffered text. Call this when generation is complete
* to ensure no text is left in the partial-tag buffer.
*/
flush(): ThinkDelta[] {
if (!this._buf) return [];
const deltas: ThinkDelta[] = [];
if (this._inThink) {
this.reasoning += this._buf;
deltas.push({ type: "reasoning", textDelta: this._buf });
} else {
this.content += this._buf;
deltas.push({ type: "content", textDelta: this._buf });
}
this._buf = "";
return deltas;
}
/**
* How many characters from the start of `buf` can be safely emitted
* without risking cutting a partial `tag` at the end.
*/
private _safeFlushLength(buf: string, tag: string): number {
// Check if the tail of buf could be the start of the tag
for (let overlap = Math.min(buf.length, tag.length - 1); overlap > 0; overlap--) {
if (buf.endsWith(tag.slice(0, overlap))) {
return buf.length - overlap;
}
}
return buf.length;
}
}
|