Update main.py
Browse files
main.py
CHANGED
|
@@ -16,7 +16,7 @@ if not REPLICATE_API_TOKEN:
|
|
| 16 |
raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
|
| 17 |
|
| 18 |
# FastAPI Init
|
| 19 |
-
app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="
|
| 20 |
|
| 21 |
# --- Pydantic Models ---
|
| 22 |
class ModelCard(BaseModel):
|
|
@@ -104,40 +104,45 @@ async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
|
|
| 104 |
return
|
| 105 |
|
| 106 |
try:
|
| 107 |
-
async with client.stream("GET", stream_url, headers={"Accept": "text
|
| 108 |
current_event = None
|
| 109 |
async for line in sse.aiter_lines():
|
|
|
|
|
|
|
| 110 |
if line.startswith("event:"):
|
| 111 |
current_event = line[len("event:"):].strip()
|
| 112 |
elif line.startswith("data:"):
|
| 113 |
-
# --- START OF DEFINITIVE
|
| 114 |
-
#
|
| 115 |
-
#
|
| 116 |
-
|
| 117 |
-
if raw_data.startswith(" "):
|
| 118 |
-
# Remove only the single, optional leading space
|
| 119 |
-
data = raw_data[1:]
|
| 120 |
-
else:
|
| 121 |
-
data = raw_data
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
if current_event == "output":
|
| 124 |
-
#
|
| 125 |
-
|
| 126 |
-
|
| 127 |
content_token = ""
|
| 128 |
try:
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
|
|
|
| 135 |
chunk = {
|
| 136 |
"id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
|
| 137 |
"choices": [{"index": 0, "delta": {"content": content_token}, "finish_reason": None}]
|
| 138 |
}
|
| 139 |
yield json.dumps(chunk)
|
| 140 |
-
# --- END OF DEFINITIVE
|
| 141 |
elif current_event == "done":
|
| 142 |
break
|
| 143 |
except httpx.ReadTimeout:
|
|
|
|
| 16 |
raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
|
| 17 |
|
| 18 |
# FastAPI Init
|
| 19 |
+
app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="9.0.0 (Definitive Streaming Fix)")
|
| 20 |
|
| 21 |
# --- Pydantic Models ---
|
| 22 |
class ModelCard(BaseModel):
|
|
|
|
| 104 |
return
|
| 105 |
|
| 106 |
try:
|
| 107 |
+
async with client.stream("GET", stream_url, headers={"Accept": "text/event-stream"}, timeout=None) as sse:
|
| 108 |
current_event = None
|
| 109 |
async for line in sse.aiter_lines():
|
| 110 |
+
if not line: # Skip empty lines
|
| 111 |
+
continue
|
| 112 |
if line.startswith("event:"):
|
| 113 |
current_event = line[len("event:"):].strip()
|
| 114 |
elif line.startswith("data:"):
|
| 115 |
+
# --- START OF DEFINITIVE FIX ---
|
| 116 |
+
# Previous logic was flawed and removed critical whitespace,
|
| 117 |
+
# causing both spacing issues and silent failures.
|
| 118 |
+
# This new logic is simple, robust, and correct.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
+
# 1. Get the entire payload after "data:"
|
| 121 |
+
raw_payload = line[len("data:"):]
|
| 122 |
+
|
| 123 |
+
# 2. The SSE spec allows an optional leading space. Remove it if it exists.
|
| 124 |
+
# This prevents parsing errors without destroying content.
|
| 125 |
+
payload = raw_payload.lstrip(" ")
|
| 126 |
+
|
| 127 |
if current_event == "output":
|
| 128 |
+
if not payload: # Skip if the payload is now empty after lstrip
|
| 129 |
+
continue
|
| 130 |
+
|
| 131 |
content_token = ""
|
| 132 |
try:
|
| 133 |
+
# This handles JSON-encoded strings like "\" Hello\""
|
| 134 |
+
content_token = json.loads(payload)
|
| 135 |
+
except (json.JSONDecodeError, TypeError):
|
| 136 |
+
# This handles plain text tokens
|
| 137 |
+
content_token = payload
|
| 138 |
+
|
| 139 |
+
# Yield the token. It can now correctly be a single space " ".
|
| 140 |
chunk = {
|
| 141 |
"id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
|
| 142 |
"choices": [{"index": 0, "delta": {"content": content_token}, "finish_reason": None}]
|
| 143 |
}
|
| 144 |
yield json.dumps(chunk)
|
| 145 |
+
# --- END OF DEFINITIVE FIX ---
|
| 146 |
elif current_event == "done":
|
| 147 |
break
|
| 148 |
except httpx.ReadTimeout:
|