File size: 4,203 Bytes
4fb0ce9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/**
 * Incremental streaming parser for <think>...</think> reasoning tags.
 *
 * Tokens are pushed one-by-one as they arrive from the streamer.
 * The parser tracks whether we are currently inside a `<think>` block
 * and emits deltas accordingly.
 */

export interface ThinkDelta {
  type: "reasoning" | "content";
  textDelta: string;
}

export class ThinkStreamParser {
  /** Accumulated reasoning text (inside <think>…</think>). */
  reasoning = "";
  /** Accumulated content text (outside think tags). */
  content = "";

  /** Whether we are currently inside a <think> block. */
  private _inThink = false;
  /** Buffer for detecting partial opening/closing tags at chunk boundaries. */
  private _buf = "";

  private static readonly OPEN_TAG = "<think>";
  private static readonly CLOSE_TAG = "</think>";

  reset(): void {
    this.reasoning = "";
    this.content = "";
    this._inThink = false;
    this._buf = "";
  }

  /**
   * Push a chunk of text (one or more tokens) and return an array of deltas.
   * Most calls will return a single delta; the array handles the rare case
   * where a chunk contains a full tag transition.
   */
  push(text: string): ThinkDelta[] {
    const deltas: ThinkDelta[] = [];
    this._buf += text;

    while (this._buf.length > 0) {
      if (this._inThink) {
        const closeIdx = this._buf.indexOf(ThinkStreamParser.CLOSE_TAG);
        if (closeIdx !== -1) {
          const before = this._buf.slice(0, closeIdx);
          if (before) {
            this.reasoning += before;
            deltas.push({ type: "reasoning", textDelta: before });
          }
          this._buf = this._buf.slice(
            closeIdx + ThinkStreamParser.CLOSE_TAG.length,
          );
          this._inThink = false;
          continue;
        }

        // No close tag yet — hold back any tail that could be a partial tag.
        const safeLen = this._safeFlushLength(
          this._buf,
          ThinkStreamParser.CLOSE_TAG,
        );
        if (safeLen > 0) {
          const chunk = this._buf.slice(0, safeLen);
          this.reasoning += chunk;
          deltas.push({ type: "reasoning", textDelta: chunk });
          this._buf = this._buf.slice(safeLen);
        }
        break;
      } else {
        const openIdx = this._buf.indexOf(ThinkStreamParser.OPEN_TAG);
        if (openIdx !== -1) {
          const before = this._buf.slice(0, openIdx);
          if (before) {
            this.content += before;
            deltas.push({ type: "content", textDelta: before });
          }
          this._buf = this._buf.slice(
            openIdx + ThinkStreamParser.OPEN_TAG.length,
          );
          this._inThink = true;
          continue;
        }

        // No open tag yet — hold back any tail that could be a partial tag.
        const safeLen = this._safeFlushLength(
          this._buf,
          ThinkStreamParser.OPEN_TAG,
        );
        if (safeLen > 0) {
          const chunk = this._buf.slice(0, safeLen);
          this.content += chunk;
          deltas.push({ type: "content", textDelta: chunk });
          this._buf = this._buf.slice(safeLen);
        }
        break;
      }
    }

    return deltas;
  }

  /**
   * Flush any remaining buffered text. Call this when generation is complete
   * to ensure no text is left in the partial-tag buffer.
   */
  flush(): ThinkDelta[] {
    if (!this._buf) return [];
    const deltas: ThinkDelta[] = [];
    if (this._inThink) {
      this.reasoning += this._buf;
      deltas.push({ type: "reasoning", textDelta: this._buf });
    } else {
      this.content += this._buf;
      deltas.push({ type: "content", textDelta: this._buf });
    }
    this._buf = "";
    return deltas;
  }

  /**
   * How many characters from the start of `buf` can be safely emitted
   * without risking cutting a partial `tag` at the end.
   */
  private _safeFlushLength(buf: string, tag: string): number {
    // Check if the tail of buf could be the start of the tag
    for (let overlap = Math.min(buf.length, tag.length - 1); overlap > 0; overlap--) {
      if (buf.endsWith(tag.slice(0, overlap))) {
        return buf.length - overlap;
      }
    }
    return buf.length;
  }
}