Spaces:

rkihacker
/

R2OAI

Paused

App Files Files Community

rkihacker commited on Oct 21

Commit

0f4286e

verified ·

1 Parent(s): c5a8085

Update main.py

Browse files

Files changed (1) hide show

main.py +88 -30

main.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import httpx
 import json
@@ -92,7 +93,7 @@ async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
             stream_url = prediction.get("urls", {}).get("stream")
             prediction_id = prediction.get("id", "stream-unknown")
             if not stream_url:
-                yield json.dumps({"error": {"message": "Model did not return a stream URL."}})
                 return
         except httpx.HTTPStatusError as e:
             error_details = e.response.text
@@ -100,7 +101,7 @@ async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
                 error_json = e.response.json()
                 error_details = error_json.get("detail", error_details)
             except json.JSONDecodeError: pass
-            yield json.dumps({"error": {"message": f"Upstream Error: {error_details}", "type": "replicate_error"}})
             return
         try:
@@ -112,49 +113,106 @@ async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
                     if line.startswith("event:"):
                         current_event = line[len("event:"):].strip()
                     elif line.startswith("data:"):
-                        # --- START OF DEFINITIVE FIX ---
-                        # Previous logic was flawed and removed critical whitespace,
-                        # causing both spacing issues and silent failures.
-                        # This new logic is simple, robust, and correct.
-                        # 1. Get the entire payload after "data:"
-                        raw_payload = line[len("data:"):]
-                        # 2. The SSE spec allows an optional leading space. Remove it if it exists.
-                        # This prevents parsing errors without destroying content.
-                        payload = raw_payload.lstrip(" ")
                         if current_event == "output":
-                            if not payload: # Skip if the payload is now empty after lstrip
                                 continue
                             content_token = ""
                             try:
-                                # This handles JSON-encoded strings like "\" Hello\""
-                                content_token = json.loads(payload)
                             except (json.JSONDecodeError, TypeError):
-                                # This handles plain text tokens
-                                content_token = payload
-                            # Yield the token. It can now correctly be a single space " ".
                             chunk = {
-                                "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
-                                "choices": [{"index": 0, "delta": {"content": content_token}, "finish_reason": None}]
                             }
-                            yield json.dumps(chunk)
-                        # --- END OF DEFINITIVE FIX ---
                         elif current_event == "done":
                             break
         except httpx.ReadTimeout:
-            yield json.dumps({"error": {"message": "Stream timed out.", "type": "timeout_error"}})
             return
-    final_chunk = {
-        "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
-        "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
-    }
-    yield json.dumps(final_chunk)
-    yield "[DONE]"
 # --- Endpoints ---
 @app.get("/v1/models")
@@ -186,4 +244,4 @@ async def create_chat_completion(request: OpenAIChatCompletionRequest):
                 "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
             }
         except httpx.HTTPStatusError as e:
-            raise HTTPException(status_code=e.response.status_code, detail=f"Error from Replicate API: {e.response.text}")

 import os
 import httpx
 import json
             stream_url = prediction.get("urls", {}).get("stream")
             prediction_id = prediction.get("id", "stream-unknown")
             if not stream_url:
+                yield f"data: {json.dumps({'error': {'message': 'Model did not return a stream URL.'}})}\n\n"
                 return
         except httpx.HTTPStatusError as e:
             error_details = e.response.text
                 error_json = e.response.json()
                 error_details = error_json.get("detail", error_details)
             except json.JSONDecodeError: pass
+            yield f"data: {json.dumps({'error': {'message': f'Upstream Error: {error_details}', 'type': 'replicate_error'}})}\n\n"
             return
         try:
                     if line.startswith("event:"):
                         current_event = line[len("event:"):].strip()
                     elif line.startswith("data:"):
+                        # FIXED: Preserve all whitespace including leading/trailing spaces
+                        raw_data = line[5:]  # Remove "data:" prefix
+                        # Handle empty data lines (preserve them)
+                        if not raw_data:
+                            continue
+                        # Remove only the optional single space after data: if present
+                        # This is per SSE spec and preserves actual content spaces
+                        if raw_data.startswith(" "):
+                            data_content = raw_data[1:]  # Remove the first space only
+                        else:
+                            data_content = raw_data
                         if current_event == "output":
+                            if not data_content:
                                 continue
                             content_token = ""
                             try:
+                                # Handle JSON-encoded strings properly (including spaces)
+                                content_token = json.loads(data_content)
                             except (json.JSONDecodeError, TypeError):
+                                # Handle plain text tokens (preserve as-is)
+                                content_token = data_content
+                            # Create chunk with exact format you specified
                             chunk = {
+                                "choices": [{
+                                    "delta": {"content": content_token},
+                                    "finish_reason": None,
+                                    "index": 0,
+                                    "logprobs": None,
+                                    "native_finish_reason": None
+                                }],
+                                "created": int(time.time()),
+                                "id": f"gen-{int(time.time())}-{prediction_id[-12:]}",  # Format like your example
+                                "model": replicate_model_id,
+                                "object": "chat.completion.chunk",
+                                "provider": "Anthropic" if "anthropic" in replicate_model_id else "Replicate"
                             }
+                            yield f"data: {json.dumps(chunk)}\n\n"
                         elif current_event == "done":
+                            # Send usage chunk before done
+                            usage_chunk = {
+                                "choices": [{
+                                    "delta": {},
+                                    "finish_reason": None,
+                                    "index": 0,
+                                    "logprobs": None,
+                                    "native_finish_reason": None
+                                }],
+                                "created": int(time.time()),
+                                "id": f"gen-{int(time.time())}-{prediction_id[-12:]}",
+                                "model": replicate_model_id,
+                                "object": "chat.completion.chunk",
+                                "provider": "Anthropic" if "anthropic" in replicate_model_id else "Replicate",
+                                "usage": {
+                                    "cache_discount": 0,
+                                    "completion_tokens": 0,
+                                    "completion_tokens_details": {"image_tokens": 0, "reasoning_tokens": 0},
+                                    "cost": 0,
+                                    "cost_details": {
+                                        "upstream_inference_completions_cost": 0,
+                                        "upstream_inference_cost": None,
+                                        "upstream_inference_prompt_cost": 0
+                                    },
+                                    "input_tokens": 0,
+                                    "is_byok": False,
+                                    "prompt_tokens": 0,
+                                    "prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0},
+                                    "total_tokens": 0
+                                }
+                            }
+                            yield f"data: {json.dumps(usage_chunk)}\n\n"
+                            # Send final chunk with stop reason
+                            final_chunk = {
+                                "choices": [{
+                                    "delta": {},
+                                    "finish_reason": "stop",
+                                    "index": 0,
+                                    "logprobs": None,
+                                    "native_finish_reason": "end_turn"
+                                }],
+                                "created": int(time.time()),
+                                "id": f"gen-{int(time.time())}-{prediction_id[-12:]}",
+                                "model": replicate_model_id,
+                                "object": "chat.completion.chunk",
+                                "provider": "Anthropic" if "anthropic" in replicate_model_id else "Replicate"
+                            }
+                            yield f"data: {json.dumps(final_chunk)}\n\n"
                             break
         except httpx.ReadTimeout:
+            yield f"data: {json.dumps({'error': {'message': 'Stream timed out.', 'type': 'timeout_error'}})}\n\n"
             return
+    # Send [DONE] event
+    yield "data: [DONE]\n\n"
 # --- Endpoints ---
 @app.get("/v1/models")
                 "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
             }
         except httpx.HTTPStatusError as e:
+            raise HTTPException(status_code=e.response.status_code, detail=f"Error from Replicate API: {e.response.text}")