maksym-work commited on
Commit
db4cfdb
·
1 Parent(s): df27471

update generate

Browse files
src/lib/server/textGeneration/generate.ts CHANGED
@@ -22,10 +22,12 @@ export async function* generate(
22
  ): AsyncIterable<MessageUpdate> {
23
  // reasoning mode is false by default
24
  let reasoning = false;
25
- let reasoningBuffer = "";
 
26
  let lastReasoningUpdate = new Date();
27
  let status = "";
28
  const startTime = new Date();
 
29
  if (
30
  model.reasoning &&
31
  // if the beginToken is an empty string, the model starts in reasoning mode
@@ -111,9 +113,9 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
111
 
112
  // if the beginToken is an empty string, we don't need to remove anything
113
  const beginIndex = model.reasoning.beginToken
114
- ? reasoningBuffer.indexOf(model.reasoning.beginToken)
115
  : 0;
116
- const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
117
 
118
  if (beginIndex !== -1 && endIndex !== -1) {
119
  // Remove the reasoning section (including tokens) from final answer
@@ -131,19 +133,57 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
131
  continue;
132
  }
133
 
 
 
134
  if (model.reasoning && model.reasoning.type === "tokens") {
135
- if (output.token.text === model.reasoning.beginToken) {
 
 
 
 
 
 
 
136
  reasoning = true;
137
- reasoningBuffer += output.token.text;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  continue;
139
- } else if (output.token.text === model.reasoning.endToken) {
 
140
  reasoning = false;
141
- reasoningBuffer += output.token.text;
 
142
  yield {
143
  type: MessageUpdateType.Reasoning,
144
  subtype: MessageReasoningUpdateType.Status,
145
  status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
146
  };
 
 
 
 
 
 
 
 
 
147
  continue;
148
  }
149
  }
@@ -152,42 +192,6 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
152
 
153
  // pass down normal token
154
  if (reasoning) {
155
- reasoningBuffer += output.token.text;
156
-
157
- if (model.reasoning && model.reasoning.type === "tokens") {
158
- // split reasoning buffer so that anything that comes after the end token is separated
159
- // add it to the normal buffer, and yield two updates, one for the reasoning and one for the normal content
160
- // also set reasoning to false
161
-
162
- if (reasoningBuffer.lastIndexOf(model.reasoning.endToken) !== -1) {
163
- const endTokenIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
164
- const textBuffer = reasoningBuffer.slice(endTokenIndex + model.reasoning.endToken.length);
165
- reasoningBuffer = reasoningBuffer.slice(
166
- 0,
167
- endTokenIndex + model.reasoning.endToken.length + 1
168
- );
169
-
170
- yield {
171
- type: MessageUpdateType.Reasoning,
172
- subtype: MessageReasoningUpdateType.Stream,
173
- token: output.token.text,
174
- };
175
-
176
- yield {
177
- type: MessageUpdateType.Stream,
178
- token: textBuffer,
179
- };
180
-
181
- yield {
182
- type: MessageUpdateType.Reasoning,
183
- subtype: MessageReasoningUpdateType.Status,
184
- status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
185
- };
186
-
187
- reasoning = false;
188
- continue;
189
- }
190
- }
191
  // yield status update if it has changed
192
  if (status !== "") {
193
  yield {
 
22
  ): AsyncIterable<MessageUpdate> {
23
  // reasoning mode is false by default
24
  let reasoning = false;
25
+ const reasoningBuffer = "";
26
+ let buffer = "";
27
  let lastReasoningUpdate = new Date();
28
  let status = "";
29
  const startTime = new Date();
30
+ let lastProcessedBeginTokenIndex = -1; // Track the last processed begin token position
31
  if (
32
  model.reasoning &&
33
  // if the beginToken is an empty string, the model starts in reasoning mode
 
113
 
114
  // if the beginToken is an empty string, we don't need to remove anything
115
  const beginIndex = model.reasoning.beginToken
116
+ ? buffer.indexOf(model.reasoning.beginToken)
117
  : 0;
118
+ const endIndex = buffer.lastIndexOf(model.reasoning.endToken);
119
 
120
  if (beginIndex !== -1 && endIndex !== -1) {
121
  // Remove the reasoning section (including tokens) from final answer
 
133
  continue;
134
  }
135
 
136
+ buffer += output.token.text;
137
+
138
  if (model.reasoning && model.reasoning.type === "tokens") {
139
+ const currentBeginTokenIndex = buffer.indexOf(model.reasoning.beginToken);
140
+
141
+ if (
142
+ !reasoning &&
143
+ currentBeginTokenIndex !== -1 &&
144
+ currentBeginTokenIndex > lastProcessedBeginTokenIndex &&
145
+ buffer.indexOf(model.reasoning.endToken) === -1
146
+ ) {
147
  reasoning = true;
148
+ lastProcessedBeginTokenIndex = currentBeginTokenIndex;
149
+ const beginTokenIndex = buffer.indexOf(model.reasoning.beginToken);
150
+ const textBuffer = buffer.slice(beginTokenIndex + model.reasoning.beginToken.length);
151
+ yield {
152
+ type: MessageUpdateType.Reasoning,
153
+ subtype: MessageReasoningUpdateType.Status,
154
+ status: "Started reasoning...",
155
+ };
156
+ yield {
157
+ type: MessageUpdateType.Reasoning,
158
+ subtype: MessageReasoningUpdateType.Back,
159
+ content: buffer.length - buffer.lastIndexOf(model.reasoning.beginToken),
160
+ };
161
+ yield {
162
+ type: MessageUpdateType.Reasoning,
163
+ subtype: MessageReasoningUpdateType.Stream,
164
+ token: textBuffer,
165
+ };
166
+
167
  continue;
168
+ }
169
+ if (reasoning && buffer.indexOf(model.reasoning.endToken) !== -1) {
170
  reasoning = false;
171
+ const endTokenIndex = buffer.lastIndexOf(model.reasoning.endToken);
172
+ const textBuffer = buffer.slice(endTokenIndex + model.reasoning.endToken.length);
173
  yield {
174
  type: MessageUpdateType.Reasoning,
175
  subtype: MessageReasoningUpdateType.Status,
176
  status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
177
  };
178
+ yield {
179
+ type: MessageUpdateType.Reasoning,
180
+ subtype: MessageReasoningUpdateType.Back,
181
+ reasoning: buffer.length - buffer.lastIndexOf(model.reasoning.endToken),
182
+ };
183
+ yield {
184
+ type: MessageUpdateType.Stream,
185
+ token: textBuffer,
186
+ };
187
  continue;
188
  }
189
  }
 
192
 
193
  // pass down normal token
194
  if (reasoning) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  // yield status update if it has changed
196
  if (status !== "") {
197
  yield {
src/lib/types/MessageUpdate.ts CHANGED
@@ -120,9 +120,13 @@ export interface MessageStreamUpdate {
120
  export enum MessageReasoningUpdateType {
121
  Stream = "stream",
122
  Status = "status",
 
123
  }
124
 
125
- export type MessageReasoningUpdate = MessageReasoningStreamUpdate | MessageReasoningStatusUpdate;
 
 
 
126
 
127
  export interface MessageReasoningStreamUpdate {
128
  type: MessageUpdateType.Reasoning;
@@ -135,6 +139,13 @@ export interface MessageReasoningStatusUpdate {
135
  status: string;
136
  }
137
 
 
 
 
 
 
 
 
138
  export interface MessageFileUpdate {
139
  type: MessageUpdateType.File;
140
  name: string;
 
120
  export enum MessageReasoningUpdateType {
121
  Stream = "stream",
122
  Status = "status",
123
+ Back = "back",
124
  }
125
 
126
+ export type MessageReasoningUpdate =
127
+ | MessageReasoningStreamUpdate
128
+ | MessageReasoningStatusUpdate
129
+ | MessageReasoningBackUpdate;
130
 
131
  export interface MessageReasoningStreamUpdate {
132
  type: MessageUpdateType.Reasoning;
 
139
  status: string;
140
  }
141
 
142
+ export interface MessageReasoningBackUpdate {
143
+ type: MessageUpdateType.Reasoning;
144
+ subtype: MessageReasoningUpdateType.Back;
145
+ reasoning?: number;
146
+ content?: number;
147
+ }
148
+
149
  export interface MessageFileUpdate {
150
  type: MessageUpdateType.File;
151
  name: string;
src/routes/conversation/[id]/+page.svelte CHANGED
@@ -351,6 +351,17 @@
351
  reasoningLastUpdate = currentTime;
352
  }
353
  }
 
 
 
 
 
 
 
 
 
 
 
354
  }
355
  }
356
  } catch (err) {
 
351
  reasoningLastUpdate = currentTime;
352
  }
353
  }
354
+ if (update.subtype === MessageReasoningUpdateType.Back) {
355
+ if (update.content) {
356
+ messageToWriteTo.content = messageToWriteTo.content?.slice(0, -update.content);
357
+ }
358
+ if (update.reasoning) {
359
+ messageToWriteTo.reasoning = messageToWriteTo.reasoning?.slice(
360
+ 0,
361
+ -update.reasoning
362
+ );
363
+ }
364
+ }
365
  }
366
  }
367
  } catch (err) {