Spaces:
Runtime error
Runtime error
Commit
·
608950e
1
Parent(s):
ab616bd
Fixed streaming in chat_completion
Browse files- main/routes.py +7 -7
main/routes.py
CHANGED
|
@@ -70,12 +70,10 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
| 70 |
last_message = request.messages[-1].content
|
| 71 |
|
| 72 |
if request.stream:
|
| 73 |
-
# For streaming, we need to create a generator that yields OpenAI-compatible chunks
|
| 74 |
async def generate_stream():
|
| 75 |
async for chunk in api.generate_stream(
|
| 76 |
prompt=last_message,
|
| 77 |
):
|
| 78 |
-
# Create a streaming response chunk in OpenAI format
|
| 79 |
response_chunk = {
|
| 80 |
"id": "chatcmpl-123",
|
| 81 |
"object": "chat.completion.chunk",
|
|
@@ -89,16 +87,18 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
| 89 |
"finish_reason": None
|
| 90 |
}]
|
| 91 |
}
|
|
|
|
| 92 |
yield f"data: {json.dumps(response_chunk)}\n\n"
|
| 93 |
-
|
| 94 |
-
# Send the final chunk
|
| 95 |
-
yield f"data: [DONE]\n\n"
|
| 96 |
|
| 97 |
return StreamingResponse(
|
| 98 |
generate_stream(),
|
| 99 |
-
media_type="text/event-stream"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
)
|
| 101 |
-
|
| 102 |
else:
|
| 103 |
# For non-streaming, generate the full response
|
| 104 |
response_text = await api.generate_response(
|
|
|
|
| 70 |
last_message = request.messages[-1].content
|
| 71 |
|
| 72 |
if request.stream:
|
|
|
|
| 73 |
async def generate_stream():
|
| 74 |
async for chunk in api.generate_stream(
|
| 75 |
prompt=last_message,
|
| 76 |
):
|
|
|
|
| 77 |
response_chunk = {
|
| 78 |
"id": "chatcmpl-123",
|
| 79 |
"object": "chat.completion.chunk",
|
|
|
|
| 87 |
"finish_reason": None
|
| 88 |
}]
|
| 89 |
}
|
| 90 |
+
# Need to format this exactly as SSE requires
|
| 91 |
yield f"data: {json.dumps(response_chunk)}\n\n"
|
| 92 |
+
yield "data: [DONE]\n\n"
|
|
|
|
|
|
|
| 93 |
|
| 94 |
return StreamingResponse(
|
| 95 |
generate_stream(),
|
| 96 |
+
media_type="text/event-stream",
|
| 97 |
+
headers={
|
| 98 |
+
"Cache-Control": "no-cache",
|
| 99 |
+
"Connection": "keep-alive",
|
| 100 |
+
}
|
| 101 |
)
|
|
|
|
| 102 |
else:
|
| 103 |
# For non-streaming, generate the full response
|
| 104 |
response_text = await api.generate_response(
|