Spaces:

rkihacker
/

R2OAI

Paused

App Files Files Community

rkihacker commited on Oct 21

Commit

c5a8085

verified ·

1 Parent(s): 3a333bb

Update main.py

Browse files

Files changed (1) hide show

main.py +26 -21

main.py CHANGED Viewed

@@ -16,7 +16,7 @@ if not REPLICATE_API_TOKEN:
     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
 # FastAPI Init
-app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="8.0.0 (Definitive Spacing Fix)")
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
@@ -104,40 +104,45 @@ async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
             return
         try:
-            async with client.stream("GET", stream_url, headers={"Accept": "text-event-stream"}, timeout=None) as sse:
                 current_event = None
                 async for line in sse.aiter_lines():
                     if line.startswith("event:"):
                         current_event = line[len("event:"):].strip()
                     elif line.startswith("data:"):
-                        # --- START OF DEFINITIVE SPACING FIX ---
-                        # The .strip() method was the bug. It removed crucial whitespace.
-                        # This new logic correctly implements the SSE spec.
-                        raw_data = line[len("data:"):]
-                        if raw_data.startswith(" "):
-                            # Remove only the single, optional leading space
-                            data = raw_data[1:]
-                        else:
-                            data = raw_data
                         if current_event == "output":
-                            # The data is now guaranteed to have its whitespace preserved.
-                            # Replicate sometimes sends tokens as JSON strings (e.g., "\" a\""),
-                            # so we still need to decode them.
                             content_token = ""
                             try:
-                                content_token = json.loads(data)
-                            except json.JSONDecodeError:
-                                # Not a JSON string, use the raw data
-                                content_token = data
-                            # We must send content_token even if it's just a space
                             chunk = {
                                 "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
                                 "choices": [{"index": 0, "delta": {"content": content_token}, "finish_reason": None}]
                             }
                             yield json.dumps(chunk)
-                        # --- END OF DEFINITIVE SPACING FIX ---
                         elif current_event == "done":
                             break
         except httpx.ReadTimeout:

     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
 # FastAPI Init
+app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="9.0.0 (Definitive Streaming Fix)")
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
             return
         try:
+            async with client.stream("GET", stream_url, headers={"Accept": "text/event-stream"}, timeout=None) as sse:
                 current_event = None
                 async for line in sse.aiter_lines():
+                    if not line: # Skip empty lines
+                        continue
                     if line.startswith("event:"):
                         current_event = line[len("event:"):].strip()
                     elif line.startswith("data:"):
+                        # --- START OF DEFINITIVE FIX ---
+                        # Previous logic was flawed and removed critical whitespace,
+                        # causing both spacing issues and silent failures.
+                        # This new logic is simple, robust, and correct.
+                        # 1. Get the entire payload after "data:"
+                        raw_payload = line[len("data:"):]
+                        # 2. The SSE spec allows an optional leading space. Remove it if it exists.
+                        # This prevents parsing errors without destroying content.
+                        payload = raw_payload.lstrip(" ")
                         if current_event == "output":
+                            if not payload: # Skip if the payload is now empty after lstrip
+                                continue
                             content_token = ""
                             try:
+                                # This handles JSON-encoded strings like "\" Hello\""
+                                content_token = json.loads(payload)
+                            except (json.JSONDecodeError, TypeError):
+                                # This handles plain text tokens
+                                content_token = payload
+                            # Yield the token. It can now correctly be a single space " ".
                             chunk = {
                                 "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
                                 "choices": [{"index": 0, "delta": {"content": content_token}, "finish_reason": None}]
                             }
                             yield json.dumps(chunk)
+                        # --- END OF DEFINITIVE FIX ---
                         elif current_event == "done":
                             break
         except httpx.ReadTimeout: