Spaces:
Paused
Paused
Commit
·
eef2ebb
1
Parent(s):
0da76bb
tentative tool call support
Browse files- app/api_helpers.py +0 -2
- app/message_processing.py +0 -20
- app/openai_handler.py +13 -50
app/api_helpers.py
CHANGED
|
@@ -423,7 +423,6 @@ async def execute_gemini_call(
|
|
| 423 |
block_msg+=f" ({response_obj_call.prompt_feedback.block_reason_message})"
|
| 424 |
raise ValueError(block_msg)
|
| 425 |
|
| 426 |
-
print(f"DEBUG: Raw Gemini response_obj_call before conversion: {response_obj_call}") # Kilo Code Added Log
|
| 427 |
if not is_gemini_response_valid(response_obj_call):
|
| 428 |
error_details = f"Invalid non-streaming Gemini response for model string '{model_to_call}'. "
|
| 429 |
if hasattr(response_obj_call, 'candidates'):
|
|
@@ -446,5 +445,4 @@ async def execute_gemini_call(
|
|
| 446 |
raise ValueError(error_details)
|
| 447 |
|
| 448 |
openai_response_content = convert_to_openai_format(response_obj_call, request_obj.model)
|
| 449 |
-
print(f"DEBUG: OpenAI formatted response content before JSONResponse: {openai_response_content}") # Kilo Code Added Log
|
| 450 |
return JSONResponse(content=openai_response_content)
|
|
|
|
| 423 |
block_msg+=f" ({response_obj_call.prompt_feedback.block_reason_message})"
|
| 424 |
raise ValueError(block_msg)
|
| 425 |
|
|
|
|
| 426 |
if not is_gemini_response_valid(response_obj_call):
|
| 427 |
error_details = f"Invalid non-streaming Gemini response for model string '{model_to_call}'. "
|
| 428 |
if hasattr(response_obj_call, 'candidates'):
|
|
|
|
| 445 |
raise ValueError(error_details)
|
| 446 |
|
| 447 |
openai_response_content = convert_to_openai_format(response_obj_call, request_obj.model)
|
|
|
|
| 448 |
return JSONResponse(content=openai_response_content)
|
app/message_processing.py
CHANGED
|
@@ -311,29 +311,20 @@ def parse_gemini_response_for_reasoning_and_content(gemini_response_candidate: A
|
|
| 311 |
|
| 312 |
if gemini_candidate_content and hasattr(gemini_candidate_content, 'parts') and gemini_candidate_content.parts:
|
| 313 |
for part_item in gemini_candidate_content.parts:
|
| 314 |
-
print(f"DEBUG: Parsing part_item: {part_item}") # Kilo Code Added Log
|
| 315 |
if hasattr(part_item, 'function_call') and part_item.function_call is not None: # Kilo Code: Added 'is not None' check
|
| 316 |
-
print(f"DEBUG: part_item is a function_call, skipping for text parsing.") # Kilo Code Added Log
|
| 317 |
continue
|
| 318 |
|
| 319 |
part_text = ""
|
| 320 |
if hasattr(part_item, 'text') and part_item.text is not None:
|
| 321 |
part_text = str(part_item.text)
|
| 322 |
|
| 323 |
-
# Kilo Code Added Logs
|
| 324 |
part_is_thought = hasattr(part_item, 'thought') and part_item.thought is True
|
| 325 |
-
print(f"DEBUG: part_text: '{part_text}', is_thought: {part_is_thought}")
|
| 326 |
|
| 327 |
if part_is_thought:
|
| 328 |
reasoning_text_parts.append(part_text)
|
| 329 |
-
print(f"DEBUG: Appended to reasoning_text_parts. Current count: {len(reasoning_text_parts)}") # Kilo Code Added Log
|
| 330 |
elif part_text: # Only add if it's not a function_call and has text
|
| 331 |
normal_text_parts.append(part_text)
|
| 332 |
-
print(f"DEBUG: Appended to normal_text_parts. Current count: {len(normal_text_parts)}") # Kilo Code Added Log
|
| 333 |
-
else:
|
| 334 |
-
print(f"DEBUG: part_text is empty or not appended. is_thought: {part_is_thought}") # Kilo Code Added Log
|
| 335 |
elif candidate_part_text:
|
| 336 |
-
print(f"DEBUG: Using candidate_part_text: '{candidate_part_text}'") # Kilo Code Added Log
|
| 337 |
normal_text_parts.append(candidate_part_text)
|
| 338 |
elif gemini_candidate_content and hasattr(gemini_candidate_content, 'text') and gemini_candidate_content.text is not None:
|
| 339 |
normal_text_parts.append(str(gemini_candidate_content.text))
|
|
@@ -371,12 +362,6 @@ def process_gemini_response_to_openai_dict(gemini_response_obj: Any, request_mod
|
|
| 371 |
for part in candidate.content.parts:
|
| 372 |
if hasattr(part, 'function_call') and part.function_call is not None: # Kilo Code: Added 'is not None' check
|
| 373 |
fc = part.function_call
|
| 374 |
-
# Kilo Code Added Logs
|
| 375 |
-
print(f"DEBUG: Processing part with function_call. Part: {part}")
|
| 376 |
-
print(f"DEBUG: FunctionCall object (fc): {fc}")
|
| 377 |
-
if fc:
|
| 378 |
-
print(f"DEBUG: fc.name: {getattr(fc, 'name', 'Name attribute does not exist or is None')}")
|
| 379 |
-
# End Kilo Code Added Logs
|
| 380 |
tool_call_id = f"call_{base_id}_{i}_{fc.name.replace(' ', '_')}_{int(time.time()*10000 + random.randint(0,9999))}"
|
| 381 |
|
| 382 |
if "tool_calls" not in message_payload:
|
|
@@ -400,9 +385,7 @@ def process_gemini_response_to_openai_dict(gemini_response_obj: Any, request_mod
|
|
| 400 |
reasoning_str = deobfuscate_text(reasoning_str)
|
| 401 |
normal_content_str = deobfuscate_text(normal_content_str)
|
| 402 |
|
| 403 |
-
print(f"DEBUG_ASSIGN: normal_content_str before assignment to message_payload: '{normal_content_str}'") # Kilo Code Added Log
|
| 404 |
message_payload["content"] = normal_content_str
|
| 405 |
-
print(f"DEBUG_ASSIGN: message_payload['content'] after assignment: '{message_payload['content']}'") # Kilo Code Added Log
|
| 406 |
if reasoning_str:
|
| 407 |
message_payload['reasoning_content'] = reasoning_str
|
| 408 |
|
|
@@ -494,12 +477,10 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
|
|
| 494 |
break
|
| 495 |
|
| 496 |
if not function_call_detected_in_chunk:
|
| 497 |
-
print(f"DEBUG_STREAM: Raw candidate list in chunk for text processing: {candidate}") # Kilo Code Added Log (Note: 'candidate' here is chunk.candidates)
|
| 498 |
if candidate and len(candidate) > 0: # Kilo Code: Ensure candidate list is not empty
|
| 499 |
reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate[0]) # Kilo Code: Pass the first Candidate object
|
| 500 |
else:
|
| 501 |
reasoning_text, normal_text = "", "" # Default to empty if no candidates
|
| 502 |
-
print(f"DEBUG_STREAM: Parsed from chunk - reasoning_text: '{reasoning_text}', normal_text: '{normal_text}'") # Kilo Code Added Log
|
| 503 |
if is_encrypt_full:
|
| 504 |
reasoning_text = deobfuscate_text(reasoning_text)
|
| 505 |
normal_text = deobfuscate_text(normal_text)
|
|
@@ -516,7 +497,6 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
|
|
| 516 |
# and it's not a terminal chunk, we still send a delta with empty content.
|
| 517 |
delta_payload['content'] = ""
|
| 518 |
|
| 519 |
-
print(f"DEBUG_STREAM: Final delta_payload for chunk: {delta_payload}") # Kilo Code Added Log
|
| 520 |
chunk_data = {
|
| 521 |
"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model_name,
|
| 522 |
"choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": openai_finish_reason}]
|
|
|
|
| 311 |
|
| 312 |
if gemini_candidate_content and hasattr(gemini_candidate_content, 'parts') and gemini_candidate_content.parts:
|
| 313 |
for part_item in gemini_candidate_content.parts:
|
|
|
|
| 314 |
if hasattr(part_item, 'function_call') and part_item.function_call is not None: # Kilo Code: Added 'is not None' check
|
|
|
|
| 315 |
continue
|
| 316 |
|
| 317 |
part_text = ""
|
| 318 |
if hasattr(part_item, 'text') and part_item.text is not None:
|
| 319 |
part_text = str(part_item.text)
|
| 320 |
|
|
|
|
| 321 |
part_is_thought = hasattr(part_item, 'thought') and part_item.thought is True
|
|
|
|
| 322 |
|
| 323 |
if part_is_thought:
|
| 324 |
reasoning_text_parts.append(part_text)
|
|
|
|
| 325 |
elif part_text: # Only add if it's not a function_call and has text
|
| 326 |
normal_text_parts.append(part_text)
|
|
|
|
|
|
|
|
|
|
| 327 |
elif candidate_part_text:
|
|
|
|
| 328 |
normal_text_parts.append(candidate_part_text)
|
| 329 |
elif gemini_candidate_content and hasattr(gemini_candidate_content, 'text') and gemini_candidate_content.text is not None:
|
| 330 |
normal_text_parts.append(str(gemini_candidate_content.text))
|
|
|
|
| 362 |
for part in candidate.content.parts:
|
| 363 |
if hasattr(part, 'function_call') and part.function_call is not None: # Kilo Code: Added 'is not None' check
|
| 364 |
fc = part.function_call
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
tool_call_id = f"call_{base_id}_{i}_{fc.name.replace(' ', '_')}_{int(time.time()*10000 + random.randint(0,9999))}"
|
| 366 |
|
| 367 |
if "tool_calls" not in message_payload:
|
|
|
|
| 385 |
reasoning_str = deobfuscate_text(reasoning_str)
|
| 386 |
normal_content_str = deobfuscate_text(normal_content_str)
|
| 387 |
|
|
|
|
| 388 |
message_payload["content"] = normal_content_str
|
|
|
|
| 389 |
if reasoning_str:
|
| 390 |
message_payload['reasoning_content'] = reasoning_str
|
| 391 |
|
|
|
|
| 477 |
break
|
| 478 |
|
| 479 |
if not function_call_detected_in_chunk:
|
|
|
|
| 480 |
if candidate and len(candidate) > 0: # Kilo Code: Ensure candidate list is not empty
|
| 481 |
reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate[0]) # Kilo Code: Pass the first Candidate object
|
| 482 |
else:
|
| 483 |
reasoning_text, normal_text = "", "" # Default to empty if no candidates
|
|
|
|
| 484 |
if is_encrypt_full:
|
| 485 |
reasoning_text = deobfuscate_text(reasoning_text)
|
| 486 |
normal_text = deobfuscate_text(normal_text)
|
|
|
|
| 497 |
# and it's not a terminal chunk, we still send a delta with empty content.
|
| 498 |
delta_payload['content'] = ""
|
| 499 |
|
|
|
|
| 500 |
chunk_data = {
|
| 501 |
"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model_name,
|
| 502 |
"choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": openai_finish_reason}]
|
app/openai_handler.py
CHANGED
|
@@ -233,19 +233,11 @@ class OpenAIDirectHandler:
|
|
| 233 |
del delta['extra_content']
|
| 234 |
|
| 235 |
content = delta.get('content', '')
|
| 236 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Original delta content: '{content}'") # Kilo Code Added Log
|
| 237 |
if content:
|
| 238 |
-
# print(f"DEBUG: Chunk {chunk_count} - Raw content: '{content}'")
|
| 239 |
# Use the processor to extract reasoning
|
| 240 |
processed_content, current_reasoning = reasoning_processor.process_chunk(content)
|
| 241 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Processed by StreamingProcessor: processed_content='{processed_content}', current_reasoning='{current_reasoning}'") # Kilo Code Added Log
|
| 242 |
-
|
| 243 |
-
# Debug logging for processing results
|
| 244 |
-
# if processed_content or current_reasoning:
|
| 245 |
-
# print(f"DEBUG: Chunk {chunk_count} - Processed content: '{processed_content}', Reasoning: '{current_reasoning[:50]}...' if len(current_reasoning) > 50 else '{current_reasoning}'")
|
| 246 |
|
| 247 |
# Send chunks for both reasoning and content as they arrive
|
| 248 |
-
# Kilo Code: Revised payload construction
|
| 249 |
original_choice = chunk_as_dict['choices'][0]
|
| 250 |
original_finish_reason = original_choice.get('finish_reason')
|
| 251 |
original_usage = original_choice.get('usage')
|
|
@@ -257,15 +249,10 @@ class OpenAIDirectHandler:
|
|
| 257 |
"created": chunk_as_dict["created"], "model": chunk_as_dict["model"],
|
| 258 |
"choices": [{"index": 0, "delta": reasoning_delta, "finish_reason": None}]
|
| 259 |
}
|
| 260 |
-
|
| 261 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding REASONING: {yielded_data_str.strip()}")
|
| 262 |
-
yield yielded_data_str
|
| 263 |
|
| 264 |
if processed_content:
|
| 265 |
content_delta = {'content': processed_content}
|
| 266 |
-
# Determine if this processed_content chunk should carry the original finish_reason and usage.
|
| 267 |
-
# It should if the reasoning processor is NOT inside a tag after this,
|
| 268 |
-
# meaning this processed_content is the final part of any tagged content from original_content_from_delta.
|
| 269 |
finish_reason_for_this_content_delta = None
|
| 270 |
usage_for_this_content_delta = None
|
| 271 |
|
|
@@ -282,25 +269,13 @@ class OpenAIDirectHandler:
|
|
| 282 |
if usage_for_this_content_delta:
|
| 283 |
content_payload['choices'][0]['usage'] = usage_for_this_content_delta
|
| 284 |
|
| 285 |
-
|
| 286 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding CONTENT: {yielded_data_str.strip()}")
|
| 287 |
-
yield yielded_data_str
|
| 288 |
has_sent_content = True
|
| 289 |
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
elif original_finish_reason: # Original delta had no content, but had a finish_reason
|
| 295 |
-
# This case handles chunks that are purely for signaling stream end or other non-content states.
|
| 296 |
-
yielded_data_str = f"data: {json.dumps(chunk_as_dict)}\n\n" # Yield original chunk as is
|
| 297 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding (original delta no content, but has finish_reason): {yielded_data_str.strip()}")
|
| 298 |
-
yield yielded_data_str
|
| 299 |
-
# If original delta had no content and no finish_reason, it's an empty delta, yield as is.
|
| 300 |
-
elif not content and not original_finish_reason : # Kilo Code: Added this condition
|
| 301 |
-
yielded_data_str = f"data: {json.dumps(chunk_as_dict)}\n\n"
|
| 302 |
-
print(f"DEBUG_OPENAI_STREAM: Chunk {chunk_count} - Yielding (original delta empty): {yielded_data_str.strip()}")
|
| 303 |
-
yield yielded_data_str
|
| 304 |
else:
|
| 305 |
# Yield chunks without choices too (they might contain metadata)
|
| 306 |
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
|
@@ -319,55 +294,43 @@ class OpenAIDirectHandler:
|
|
| 319 |
# print(f"DEBUG: Stream ended after {chunk_count} chunks. Buffer state - tag_buffer: '{reasoning_processor.tag_buffer}', "
|
| 320 |
# f"inside_tag: {reasoning_processor.inside_tag}, "
|
| 321 |
# f"reasoning_buffer: '{reasoning_processor.reasoning_buffer[:50]}...' if reasoning_processor.reasoning_buffer else ''")
|
| 322 |
-
|
| 323 |
# Flush any remaining buffered content
|
| 324 |
remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
|
| 325 |
-
print(f"DEBUG_OPENAI_STREAM: Flushed from StreamingProcessor: remaining_content='{remaining_content}', remaining_reasoning='{remaining_reasoning}'") # Kilo Code Added Log
|
| 326 |
|
| 327 |
# Send any remaining reasoning first
|
| 328 |
if remaining_reasoning:
|
| 329 |
-
# print(f"DEBUG: Flushing remaining reasoning: '{remaining_reasoning[:50]}...' if len(remaining_reasoning) > 50 else '{remaining_reasoning}'")
|
| 330 |
reasoning_flush_payload = {
|
| 331 |
-
"id": f"chatcmpl-flush-{int(time.time())}",
|
| 332 |
"object": "chat.completion.chunk",
|
| 333 |
"created": int(time.time()),
|
| 334 |
"model": request.model,
|
| 335 |
"choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
|
| 336 |
}
|
| 337 |
-
|
| 338 |
-
print(f"DEBUG_OPENAI_STREAM: Yielding from flush (reasoning): {yielded_data_str.strip()}") # Kilo Code Added Log
|
| 339 |
-
yield yielded_data_str
|
| 340 |
|
| 341 |
# Send any remaining content
|
| 342 |
if remaining_content:
|
| 343 |
-
# print(f"DEBUG: Flushing remaining content: '{remaining_content}'")
|
| 344 |
content_flush_payload = {
|
| 345 |
-
"id": f"chatcmpl-flush-{int(time.time())}",
|
| 346 |
"object": "chat.completion.chunk",
|
| 347 |
"created": int(time.time()),
|
| 348 |
"model": request.model,
|
| 349 |
"choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
|
| 350 |
}
|
| 351 |
-
|
| 352 |
-
print(f"DEBUG_OPENAI_STREAM: Yielding from flush (content): {yielded_data_str.strip()}") # Kilo Code Added Log
|
| 353 |
-
yield yielded_data_str
|
| 354 |
has_sent_content = True
|
| 355 |
|
| 356 |
# Always send a finish reason chunk
|
| 357 |
finish_payload = {
|
| 358 |
-
"id": f"chatcmpl-{int(time.time())}",
|
| 359 |
"object": "chat.completion.chunk",
|
| 360 |
"created": int(time.time()),
|
| 361 |
"model": request.model,
|
| 362 |
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
|
| 363 |
}
|
| 364 |
-
|
| 365 |
-
print(f"DEBUG_OPENAI_STREAM: Yielding finish chunk: {yielded_data_str.strip()}") # Kilo Code Added Log
|
| 366 |
-
yield yielded_data_str
|
| 367 |
|
| 368 |
-
|
| 369 |
-
print(f"DEBUG_OPENAI_STREAM: Yielding DONE: {yielded_data_str.strip()}") # Kilo Code Added Log
|
| 370 |
-
yield yielded_data_str
|
| 371 |
|
| 372 |
except Exception as stream_error:
|
| 373 |
error_msg = str(stream_error)
|
|
|
|
| 233 |
del delta['extra_content']
|
| 234 |
|
| 235 |
content = delta.get('content', '')
|
|
|
|
| 236 |
if content:
|
|
|
|
| 237 |
# Use the processor to extract reasoning
|
| 238 |
processed_content, current_reasoning = reasoning_processor.process_chunk(content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
# Send chunks for both reasoning and content as they arrive
|
|
|
|
| 241 |
original_choice = chunk_as_dict['choices'][0]
|
| 242 |
original_finish_reason = original_choice.get('finish_reason')
|
| 243 |
original_usage = original_choice.get('usage')
|
|
|
|
| 249 |
"created": chunk_as_dict["created"], "model": chunk_as_dict["model"],
|
| 250 |
"choices": [{"index": 0, "delta": reasoning_delta, "finish_reason": None}]
|
| 251 |
}
|
| 252 |
+
yield f"data: {json.dumps(reasoning_payload)}\n\n"
|
|
|
|
|
|
|
| 253 |
|
| 254 |
if processed_content:
|
| 255 |
content_delta = {'content': processed_content}
|
|
|
|
|
|
|
|
|
|
| 256 |
finish_reason_for_this_content_delta = None
|
| 257 |
usage_for_this_content_delta = None
|
| 258 |
|
|
|
|
| 269 |
if usage_for_this_content_delta:
|
| 270 |
content_payload['choices'][0]['usage'] = usage_for_this_content_delta
|
| 271 |
|
| 272 |
+
yield f"data: {json.dumps(content_payload)}\n\n"
|
|
|
|
|
|
|
| 273 |
has_sent_content = True
|
| 274 |
|
| 275 |
+
elif original_choice.get('finish_reason'): # Check original_choice for finish_reason
|
| 276 |
+
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
| 277 |
+
elif not content and not original_choice.get('finish_reason') :
|
| 278 |
+
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
else:
|
| 280 |
# Yield chunks without choices too (they might contain metadata)
|
| 281 |
yield f"data: {json.dumps(chunk_as_dict)}\n\n"
|
|
|
|
| 294 |
# print(f"DEBUG: Stream ended after {chunk_count} chunks. Buffer state - tag_buffer: '{reasoning_processor.tag_buffer}', "
|
| 295 |
# f"inside_tag: {reasoning_processor.inside_tag}, "
|
| 296 |
# f"reasoning_buffer: '{reasoning_processor.reasoning_buffer[:50]}...' if reasoning_processor.reasoning_buffer else ''")
|
|
|
|
| 297 |
# Flush any remaining buffered content
|
| 298 |
remaining_content, remaining_reasoning = reasoning_processor.flush_remaining()
|
|
|
|
| 299 |
|
| 300 |
# Send any remaining reasoning first
|
| 301 |
if remaining_reasoning:
|
|
|
|
| 302 |
reasoning_flush_payload = {
|
| 303 |
+
"id": f"chatcmpl-flush-{int(time.time())}",
|
| 304 |
"object": "chat.completion.chunk",
|
| 305 |
"created": int(time.time()),
|
| 306 |
"model": request.model,
|
| 307 |
"choices": [{"index": 0, "delta": {"reasoning_content": remaining_reasoning}, "finish_reason": None}]
|
| 308 |
}
|
| 309 |
+
yield f"data: {json.dumps(reasoning_flush_payload)}\n\n"
|
|
|
|
|
|
|
| 310 |
|
| 311 |
# Send any remaining content
|
| 312 |
if remaining_content:
|
|
|
|
| 313 |
content_flush_payload = {
|
| 314 |
+
"id": f"chatcmpl-flush-{int(time.time())}",
|
| 315 |
"object": "chat.completion.chunk",
|
| 316 |
"created": int(time.time()),
|
| 317 |
"model": request.model,
|
| 318 |
"choices": [{"index": 0, "delta": {"content": remaining_content}, "finish_reason": None}]
|
| 319 |
}
|
| 320 |
+
yield f"data: {json.dumps(content_flush_payload)}\n\n"
|
|
|
|
|
|
|
| 321 |
has_sent_content = True
|
| 322 |
|
| 323 |
# Always send a finish reason chunk
|
| 324 |
finish_payload = {
|
| 325 |
+
"id": f"chatcmpl-final-{int(time.time())}", # Kilo Code: Changed ID for clarity
|
| 326 |
"object": "chat.completion.chunk",
|
| 327 |
"created": int(time.time()),
|
| 328 |
"model": request.model,
|
| 329 |
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
|
| 330 |
}
|
| 331 |
+
yield f"data: {json.dumps(finish_payload)}\n\n"
|
|
|
|
|
|
|
| 332 |
|
| 333 |
+
yield "data: [DONE]\n\n"
|
|
|
|
|
|
|
| 334 |
|
| 335 |
except Exception as stream_error:
|
| 336 |
error_msg = str(stream_error)
|