Spaces:

rkihacker
/

R2OAI

Paused

App Files Files Community

rkihacker commited on Oct 21

Commit

a135be4

verified ·

1 Parent(s): c466862

Update main.py

Browse files

Files changed (1) hide show

main.py +45 -59

main.py CHANGED Viewed

@@ -16,7 +16,7 @@ if not REPLICATE_API_TOKEN:
     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
 # FastAPI Init
-app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="6.0.0 (Claude Vision Enabled)")
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
@@ -30,71 +30,59 @@ class OpenAIChatCompletionRequest(BaseModel):
 # --- Supported Models ---
 SUPPORTED_MODELS = {
-    # Text Models
     "llama3-8b-instruct": "meta/meta-llama-3-8b-instruct",
-    # Anthropic Claude Models (Vision Enabled)
     "claude-4.5-haiku": "anthropic/claude-4.5-haiku",
     "claude-4.5-sonnet": "anthropic/claude-4.5-sonnet",
-    # Other Vision Model (uses different input format)
     "llava-13b": "yorickvp/llava-13b:e272157381e2a3bf12df3a8edd1f38d1dbd736bbb7437277c8b34175f8fce358"
 }
 # --- Core Logic ---
-def prepare_replicate_input(request: OpenAIChatCompletionRequest, replicate_id: str) -> Dict[str, Any]:
     """
-    Formats the input for the Replicate API based on the model's requirements.
-    - Modern Claude models accept the 'messages' array directly for multimodal input.
-    - Other models may require a flattened 'prompt' string and a separate 'image' field.
     """
     payload = {}
-    # --- MODEL-AWARE PAYLOAD PREPARATION ---
-    if "anthropic/claude" in replicate_id:
-        # These models support the OpenAI-like 'messages' array directly.
-        # This is the correct way to handle multimodal (image) inputs for Claude.
-        messages_for_payload = []
-        system_prompt = None
-        for msg in request.messages:
-            if msg.role == "system":
-                system_prompt = str(msg.content)
             else:
-                # Convert Pydantic model to dict and add to the list
-                messages_for_payload.append(msg.dict())
-        payload["messages"] = messages_for_payload
-        if system_prompt:
-            payload["system_prompt"] = system_prompt
-    else:
-        # Fallback for models that require a flattened prompt string (e.g., Llama, Llava)
-        prompt_parts = []
-        image_input = None
-        for msg in request.messages:
-            if msg.role == "system":
-                # System prompts are handled differently or prepended by the user
-                # for these models, often as part of the main prompt.
-                # For simplicity, we'll place it at the beginning.
-                prompt_parts.insert(0, str(msg.content))
-            elif msg.role == "assistant":
-                prompt_parts.append(f"Assistant: {msg.content}")
-            elif msg.role == "user":
-                user_text_content = ""
-                if isinstance(msg.content, list):
-                    for item in msg.content:
-                        if item.get("type") == "text":
-                            user_text_content += item.get("text", "")
-                        elif item.get("type") == "image_url":
-                            image_url_data = item.get("image_url", {})
-                            image_input = image_url_data.get("url")
-                else:
-                    user_text_content = str(msg.content)
-                prompt_parts.append(f"User: {user_text_content}")
-        prompt_parts.append("Assistant:")
-        payload["prompt"] = "\n\n".join(prompt_parts)
-        if image_input:
-            payload["image"] = image_input
     # Map common OpenAI parameters to Replicate equivalents
     if request.max_tokens: payload["max_new_tokens"] = request.max_tokens
     if request.temperature: payload["temperature"] = request.temperature
@@ -140,7 +128,7 @@ async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
                         if current_event == "output":
                             if data:
                                 chunk = {
-                                    "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_id,
                                     "choices": [{"index": 0, "delta": {"content": data}, "finish_reason": None}]
                                 }
                                 yield json.dumps(chunk)
@@ -151,7 +139,7 @@ async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
             return
     final_chunk = {
-        "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_id,
         "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
     }
     yield json.dumps(final_chunk)
@@ -169,15 +157,13 @@ async def create_chat_completion(request: OpenAIChatCompletionRequest):
     if request.model not in SUPPORTED_MODELS:
         raise HTTPException(status_code=404, detail=f"Model not found. Available models: {list(SUPPORTED_MODELS.keys())}")
-    replicate_id = SUPPORTED_MODELS[request.model]
-    # Pass the replicate_id to the prepare function so it knows which format to use
-    replicate_input = prepare_replicate_input(request, replicate_id)
     if request.stream:
-        return EventSourceResponse(stream_replicate_sse(replicate_id, replicate_input), media_type="text/event-stream")
     # Non-streaming fallback
-    url = f"https://api.replicate.com/v1/models/{replicate_id}/predictions"
     headers = {"Authorization": f"Bearer {REPLICATE_API_TOKEN}", "Content-Type": "application/json", "Prefer": "wait=120"}
     async with httpx.AsyncClient() as client:
         try:

     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
 # FastAPI Init
+app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="7.0.0 (Unified Prompt Fix)")
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
 # --- Supported Models ---
 SUPPORTED_MODELS = {
     "llama3-8b-instruct": "meta/meta-llama-3-8b-instruct",
     "claude-4.5-haiku": "anthropic/claude-4.5-haiku",
     "claude-4.5-sonnet": "anthropic/claude-4.5-sonnet",
     "llava-13b": "yorickvp/llava-13b:e272157381e2a3bf12df3a8edd1f38d1dbd736bbb7437277c8b34175f8fce358"
 }
 # --- Core Logic ---
+def prepare_replicate_input(request: OpenAIChatCompletionRequest) -> Dict[str, Any]:
     """
+    Formats the input for the Replicate API. This function now uses a unified approach
+    for all models, flattening the message history into a single 'prompt' string
+    and handling images separately, as required by Replicate's API.
     """
     payload = {}
+    prompt_parts = []
+    system_prompt = None
+    image_input = None
+    for msg in request.messages:
+        if msg.role == "system":
+            # Extract system prompt; it will be a separate parameter.
+            system_prompt = str(msg.content)
+        elif msg.role == "assistant":
+            prompt_parts.append(f"Assistant: {msg.content}")
+        elif msg.role == "user":
+            user_text_content = ""
+            if isinstance(msg.content, list):
+                # Handle multimodal (vision) input from OpenAI format
+                for item in msg.content:
+                    if item.get("type") == "text":
+                        user_text_content += item.get("text", "")
+                    elif item.get("type") == "image_url":
+                        image_url_data = item.get("image_url", {})
+                        # The 'image' parameter is used by Claude, Llava, etc., on Replicate
+                        image_input = image_url_data.get("url")
             else:
+                user_text_content = str(msg.content)
+            prompt_parts.append(f"User: {user_text_content}")
+    # The final "Assistant:" turn prompts the model for a response.
+    prompt_parts.append("Assistant:")
+    # All models on Replicate's API expect a single 'prompt' string.
+    payload["prompt"] = "\n\n".join(prompt_parts)
+    if system_prompt:
+        payload["system_prompt"] = system_prompt
+    if image_input:
+        payload["image"] = image_input
     # Map common OpenAI parameters to Replicate equivalents
     if request.max_tokens: payload["max_new_tokens"] = request.max_tokens
     if request.temperature: payload["temperature"] = request.temperature
                         if current_event == "output":
                             if data:
                                 chunk = {
+                                    "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
                                     "choices": [{"index": 0, "delta": {"content": data}, "finish_reason": None}]
                                 }
                                 yield json.dumps(chunk)
             return
     final_chunk = {
+        "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
         "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
     }
     yield json.dumps(final_chunk)
     if request.model not in SUPPORTED_MODELS:
         raise HTTPException(status_code=404, detail=f"Model not found. Available models: {list(SUPPORTED_MODELS.keys())}")
+    replicate_input = prepare_replicate_input(request)
     if request.stream:
+        return EventSourceResponse(stream_replicate_sse(SUPPORTED_MODELS[request.model], replicate_input), media_type="text/event-stream")
     # Non-streaming fallback
+    url = f"https://api.replicate.com/v1/models/{SUPPORTED_MODELS[request.model]}/predictions"
     headers = {"Authorization": f"Bearer {REPLICATE_API_TOKEN}", "Content-Type": "application/json", "Prefer": "wait=120"}
     async with httpx.AsyncClient() as client:
         try: