Spaces:

rkihacker
/

R2OAI

Paused

App Files Files Community

rkihacker commited on Oct 21

Commit

c466862

verified ·

1 Parent(s): df13528

Update main.py

Browse files

Files changed (1) hide show

main.py +61 -48

main.py CHANGED Viewed

@@ -16,7 +16,7 @@ if not REPLICATE_API_TOKEN:
     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
 # FastAPI Init
-app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="5.0.0 (Vision Enabled)")
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
@@ -30,56 +30,70 @@ class OpenAIChatCompletionRequest(BaseModel):
 # --- Supported Models ---
 SUPPORTED_MODELS = {
     "llama3-8b-instruct": "meta/meta-llama-3-8b-instruct",
-    "claude-4.5-haiku": "anthropic/claude-4.5-haiku" # This model supports vision
 }
 # --- Core Logic ---
-def prepare_replicate_input(request: OpenAIChatCompletionRequest) -> Dict[str, Any]:
     """
-    Formats the input for Replicate API, handling both text and vision (image) inputs.
     """
     payload = {}
-    prompt_parts = []
-    system_prompt = None
-    image_url = None # Variable to hold the image data URI
-    for msg in request.messages:
-        if msg.role == "system":
-            system_prompt = str(msg.content)
-        elif msg.role == "user":
-            # --- VISION SUPPORT START ---
-            if isinstance(msg.content, list):
-                # This is a multi-modal request (text + image)
-                text_content = ""
-                for part in msg.content:
-                    if part.get("type") == "text":
-                        text_content += part.get("text", "") + "\n"
-                    elif part.get("type") == "image_url":
-                        # Capture the first image URL found
-                        if not image_url:
-                            image_url = part.get("image_url", {}).get("url")
-                # Use the official Claude "Human:" prefix for the prompt
-                prompt_parts.append(f"Human: {text_content.strip()}")
-            else:
-                # Standard text-only message
-                prompt_parts.append(f"Human: {msg.content}")
-            # --- VISION SUPPORT END ---
-        elif msg.role == "assistant":
-            # Use the official Claude "Assistant:" prefix for the prompt
-            prompt_parts.append(f"Assistant: {msg.content}")
-    # Add the final "Assistant:" turn to prompt the model for its response.
-    prompt_parts.append("Assistant:")
-    payload["prompt"] = "\n\n".join(prompt_parts)
-    if system_prompt:
-         payload["system_prompt"] = system_prompt
-    if image_url:
-        # Add the captured image URL to the payload for the vision model
-        payload["image"] = image_url
     # Map common OpenAI parameters to Replicate equivalents
     if request.max_tokens: payload["max_new_tokens"] = request.max_tokens
@@ -123,15 +137,13 @@ async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
                         current_event = line[len("event:"):].strip()
                     elif line.startswith("data:"):
                         data = line[len("data:"):].strip()
                         if current_event == "output":
                             if data:
                                 chunk = {
-                                    "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
                                     "choices": [{"index": 0, "delta": {"content": data}, "finish_reason": None}]
                                 }
                                 yield json.dumps(chunk)
                         elif current_event == "done":
                             break
         except httpx.ReadTimeout:
@@ -139,7 +151,7 @@ async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
             return
     final_chunk = {
-        "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
         "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
     }
     yield json.dumps(final_chunk)
@@ -158,7 +170,8 @@ async def create_chat_completion(request: OpenAIChatCompletionRequest):
         raise HTTPException(status_code=404, detail=f"Model not found. Available models: {list(SUPPORTED_MODELS.keys())}")
     replicate_id = SUPPORTED_MODELS[request.model]
-    replicate_input = prepare_replicate_input(request)
     if request.stream:
         return EventSourceResponse(stream_replicate_sse(replicate_id, replicate_input), media_type="text/event-stream")

     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
 # FastAPI Init
+app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="6.0.0 (Claude Vision Enabled)")
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
 # --- Supported Models ---
 SUPPORTED_MODELS = {
+    # Text Models
     "llama3-8b-instruct": "meta/meta-llama-3-8b-instruct",
+    # Anthropic Claude Models (Vision Enabled)
+    "claude-4.5-haiku": "anthropic/claude-4.5-haiku",
+    "claude-4.5-sonnet": "anthropic/claude-4.5-sonnet",
+    # Other Vision Model (uses different input format)
+    "llava-13b": "yorickvp/llava-13b:e272157381e2a3bf12df3a8edd1f38d1dbd736bbb7437277c8b34175f8fce358"
 }
 # --- Core Logic ---
+def prepare_replicate_input(request: OpenAIChatCompletionRequest, replicate_id: str) -> Dict[str, Any]:
     """
+    Formats the input for the Replicate API based on the model's requirements.
+    - Modern Claude models accept the 'messages' array directly for multimodal input.
+    - Other models may require a flattened 'prompt' string and a separate 'image' field.
     """
     payload = {}
+    # --- MODEL-AWARE PAYLOAD PREPARATION ---
+    if "anthropic/claude" in replicate_id:
+        # These models support the OpenAI-like 'messages' array directly.
+        # This is the correct way to handle multimodal (image) inputs for Claude.
+        messages_for_payload = []
+        system_prompt = None
+        for msg in request.messages:
+            if msg.role == "system":
+                system_prompt = str(msg.content)
+            else:
+                # Convert Pydantic model to dict and add to the list
+                messages_for_payload.append(msg.dict())
+        payload["messages"] = messages_for_payload
+        if system_prompt:
+            payload["system_prompt"] = system_prompt
+    else:
+        # Fallback for models that require a flattened prompt string (e.g., Llama, Llava)
+        prompt_parts = []
+        image_input = None
+        for msg in request.messages:
+            if msg.role == "system":
+                # System prompts are handled differently or prepended by the user
+                # for these models, often as part of the main prompt.
+                # For simplicity, we'll place it at the beginning.
+                prompt_parts.insert(0, str(msg.content))
+            elif msg.role == "assistant":
+                prompt_parts.append(f"Assistant: {msg.content}")
+            elif msg.role == "user":
+                user_text_content = ""
+                if isinstance(msg.content, list):
+                    for item in msg.content:
+                        if item.get("type") == "text":
+                            user_text_content += item.get("text", "")
+                        elif item.get("type") == "image_url":
+                            image_url_data = item.get("image_url", {})
+                            image_input = image_url_data.get("url")
+                else:
+                    user_text_content = str(msg.content)
+                prompt_parts.append(f"User: {user_text_content}")
+        prompt_parts.append("Assistant:")
+        payload["prompt"] = "\n\n".join(prompt_parts)
+        if image_input:
+            payload["image"] = image_input
     # Map common OpenAI parameters to Replicate equivalents
     if request.max_tokens: payload["max_new_tokens"] = request.max_tokens
                         current_event = line[len("event:"):].strip()
                     elif line.startswith("data:"):
                         data = line[len("data:"):].strip()
                         if current_event == "output":
                             if data:
                                 chunk = {
+                                    "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_id,
                                     "choices": [{"index": 0, "delta": {"content": data}, "finish_reason": None}]
                                 }
                                 yield json.dumps(chunk)
                         elif current_event == "done":
                             break
         except httpx.ReadTimeout:
             return
     final_chunk = {
+        "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_id,
         "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
     }
     yield json.dumps(final_chunk)
         raise HTTPException(status_code=404, detail=f"Model not found. Available models: {list(SUPPORTED_MODELS.keys())}")
     replicate_id = SUPPORTED_MODELS[request.model]
+    # Pass the replicate_id to the prepare function so it knows which format to use
+    replicate_input = prepare_replicate_input(request, replicate_id)
     if request.stream:
         return EventSourceResponse(stream_replicate_sse(replicate_id, replicate_input), media_type="text/event-stream")