Spaces:
Sleeping
Sleeping
Commit ·
611ae9c
1
Parent(s): 504d810
output parsing fix
Browse files
main.py
CHANGED
|
@@ -208,46 +208,40 @@ async def chat_completions(
|
|
| 208 |
detail_msg = f"Flowith API Error ({response.status_code})"
|
| 209 |
raise HTTPException(status_code=response.status_code, detail=detail_msg)
|
| 210 |
|
| 211 |
-
#
|
| 212 |
-
|
| 213 |
-
flowith_data = response.json()
|
| 214 |
-
except json.JSONDecodeError as e:
|
| 215 |
-
print(f"Error decoding Flowith JSON response: {e}. Response text: {response.text[:200]}...")
|
| 216 |
-
raise HTTPException(status_code=502, detail=f"Invalid JSON response from Flowith: {e}")
|
| 217 |
|
| 218 |
# 7. Handle response based on *client's* request.stream preference
|
| 219 |
if not request.stream:
|
| 220 |
-
# Client wants non-streaming:
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
else:
|
| 223 |
-
# Client wants streaming: Simulate streaming from the
|
| 224 |
-
# Client wants streaming: Simulate streaming word-by-word from the complete response
|
| 225 |
async def stream_generator() -> AsyncGenerator[str, None]:
|
| 226 |
# Ensure necessary imports are available (time, json, uuid are already imported)
|
| 227 |
-
# import time # Already imported around line 186
|
| 228 |
-
# import json # Already imported at top
|
| 229 |
-
# import uuid # Already imported at top
|
| 230 |
-
# import asyncio # Needed only if adding delay
|
| 231 |
|
| 232 |
chunk_id = f"chatcmpl-{uuid.uuid4()}"
|
| 233 |
model_name = request.model # Use the model requested by the client
|
| 234 |
|
| 235 |
-
#
|
| 236 |
-
full_content =
|
| 237 |
-
try:
|
| 238 |
-
# Try the expected structure first
|
| 239 |
-
full_content = flowith_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
| 240 |
-
if not full_content:
|
| 241 |
-
# Fallback: Check for other common fields
|
| 242 |
-
full_content = flowith_data.get("text", flowith_data.get("completion", ""))
|
| 243 |
-
|
| 244 |
-
if not full_content:
|
| 245 |
-
print(f"Warning: Could not extract content for streaming from Flowith response: {flowith_data}")
|
| 246 |
-
full_content = "" # Default to empty if extraction fails
|
| 247 |
-
|
| 248 |
-
except (AttributeError, IndexError, TypeError) as e:
|
| 249 |
-
print(f"Error extracting content for streaming: {e}. Data: {flowith_data}")
|
| 250 |
-
full_content = "" # Default to empty on error
|
| 251 |
|
| 252 |
# Define chunk size
|
| 253 |
chunk_size = 20
|
|
|
|
| 208 |
detail_msg = f"Flowith API Error ({response.status_code})"
|
| 209 |
raise HTTPException(status_code=response.status_code, detail=detail_msg)
|
| 210 |
|
| 211 |
+
# Get the plain text response directly
|
| 212 |
+
flowith_text = response.text
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
# 7. Handle response based on *client's* request.stream preference
|
| 215 |
if not request.stream:
|
| 216 |
+
# Client wants non-streaming: Construct OpenAI-compatible JSON from plain text
|
| 217 |
+
completion_id = f"chatcmpl-{uuid.uuid4()}"
|
| 218 |
+
created_timestamp = int(time.time())
|
| 219 |
+
response_payload = {
|
| 220 |
+
"id": completion_id,
|
| 221 |
+
"object": "chat.completion",
|
| 222 |
+
"created": created_timestamp,
|
| 223 |
+
"model": request.model, # Use the model from the original request
|
| 224 |
+
"choices": [{
|
| 225 |
+
"index": 0,
|
| 226 |
+
"message": {
|
| 227 |
+
"role": "assistant",
|
| 228 |
+
"content": flowith_text # Use the plain text here
|
| 229 |
+
},
|
| 230 |
+
"finish_reason": "stop" # Assume stop
|
| 231 |
+
}],
|
| 232 |
+
# "usage": {...} # Usage stats are typically not available/meaningful here
|
| 233 |
+
}
|
| 234 |
+
return JSONResponse(content=response_payload)
|
| 235 |
else:
|
| 236 |
+
# Client wants streaming: Simulate streaming from the plain text response
|
|
|
|
| 237 |
async def stream_generator() -> AsyncGenerator[str, None]:
|
| 238 |
# Ensure necessary imports are available (time, json, uuid are already imported)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
chunk_id = f"chatcmpl-{uuid.uuid4()}"
|
| 241 |
model_name = request.model # Use the model requested by the client
|
| 242 |
|
| 243 |
+
# Use the plain text directly as the full content
|
| 244 |
+
full_content = flowith_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
# Define chunk size
|
| 247 |
chunk_size = 20
|