Update main.py
Browse files
main.py
CHANGED
|
@@ -241,8 +241,11 @@ async def stream_replicate_response(replicate_model_id: str, input_payload: dict
|
|
| 241 |
except (json.JSONDecodeError, TypeError):
|
| 242 |
content_token = raw_data
|
| 243 |
|
|
|
|
| 244 |
# There is NO lstrip() or strip() here.
|
| 245 |
-
# This sends the raw, unmodified token.
|
|
|
|
|
|
|
| 246 |
|
| 247 |
accumulated_content += content_token
|
| 248 |
completion_tokens += 1
|
|
@@ -330,10 +333,10 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
| 330 |
else:
|
| 331 |
output = ""
|
| 332 |
|
| 333 |
-
# ###
|
| 334 |
-
# Removed output.strip() to return the raw response
|
| 335 |
-
#
|
| 336 |
-
#
|
| 337 |
|
| 338 |
end_time = time.time()
|
| 339 |
prompt_tokens = len(replicate_input.get("prompt", "")) // 4
|
|
|
|
| 241 |
except (json.JSONDecodeError, TypeError):
|
| 242 |
content_token = raw_data
|
| 243 |
|
| 244 |
+
# ### THIS IS THE FIX ###
|
| 245 |
# There is NO lstrip() or strip() here.
|
| 246 |
+
# This sends the raw, unmodified token from Replicate.
|
| 247 |
+
# If the log shows "HowcanI", it's because the model
|
| 248 |
+
# sent "How", "can", "I" as separate tokens.
|
| 249 |
|
| 250 |
accumulated_content += content_token
|
| 251 |
completion_tokens += 1
|
|
|
|
| 333 |
else:
|
| 334 |
output = ""
|
| 335 |
|
| 336 |
+
# ### THIS IS THE FIX ###
|
| 337 |
+
# Removed output.strip() to return the raw response.
|
| 338 |
+
# This fixes the bug where a single space (" ") response
|
| 339 |
+
# would become "" and show content: "" in the JSON.
|
| 340 |
|
| 341 |
end_time = time.time()
|
| 342 |
prompt_tokens = len(replicate_input.get("prompt", "")) // 4
|