Spaces:

nomid2
/

Replicate

Sleeping

App Files Files Community

nomid2 commited on Jun 7, 2025

Commit

a7ba54d

verified ·

1 Parent(s): 0eca9c1

Create app.py

Browse files

Files changed (1) hide show

app.py +380 -0

app.py ADDED Viewed

	@@ -0,0 +1,380 @@

+import os
+import json
+import time
+import uuid
+import asyncio
+import logging
+from typing import Dict, Any, Optional, AsyncGenerator
+from datetime import datetime
+import httpx
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import StreamingResponse, JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from typing import List, Union, Literal
+# 配置日志
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(
+    title="Replicate API Proxy",
+    description="将 Replicate API 转换为 OpenAI 兼容格式的代理服务",
+    version="1.0.0"
+)
+# 添加 CORS 中间件
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# 环境变量
+REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
+if not REPLICATE_API_TOKEN:
+    logger.warning("REPLICATE_API_TOKEN 未设置，某些功能可能无法正常工作")
+# OpenAI 兼容的请求模型
+class ChatMessage(BaseModel):
+    role: Literal["system", "user", "assistant"]
+    content: str
+class ChatCompletionRequest(BaseModel):
+    model: str = "claude-3-5-sonnet"
+    messages: List[ChatMessage]
+    temperature: Optional[float] = Field(default=0.7, ge=0, le=2)
+    max_tokens: Optional[int] = Field(default=1000, ge=1)
+    stream: Optional[bool] = False
+    top_p: Optional[float] = Field(default=1, ge=0, le=1)
+# OpenAI 兼容的响应模型
+class ChatCompletionChoice(BaseModel):
+    index: int
+    message: ChatMessage
+    finish_reason: str
+class ChatCompletionUsage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+class ChatCompletionResponse(BaseModel):
+    id: str
+    object: str = "chat.completion"
+    created: int
+    model: str
+    choices: List[ChatCompletionChoice]
+    usage: ChatCompletionUsage
+class ChatCompletionStreamChoice(BaseModel):
+    index: int
+    delta: Dict[str, Any]
+    finish_reason: Optional[str] = None
+class ChatCompletionStreamResponse(BaseModel):
+    id: str
+    object: str = "chat.completion.chunk"
+    created: int
+    model: str
+    choices: List[ChatCompletionStreamChoice]
+# Replicate API 客户端
+class ReplicateClient:
+    def __init__(self, api_token: str):
+        self.api_token = api_token
+        self.base_url = "https://api.replicate.com/v1"
+        self.headers = {
+            "Authorization": f"Bearer {api_token}",
+            "Content-Type": "application/json"
+        }
+    def format_messages_for_replicate(self, messages: List[ChatMessage]) -> str:
+        """将 OpenAI 格式的消息转换为 Replicate 格式"""
+        formatted_messages = []
+        for message in messages:
+            if message.role == "system":
+                formatted_messages.append(f"System: {message.content}")
+            elif message.role == "user":
+                formatted_messages.append(f"Human: {message.content}")
+            elif message.role == "assistant":
+                formatted_messages.append(f"Assistant: {message.content}")
+        # 为 Claude 添加最后的 Assistant: 提示
+        if not any(msg.role == "assistant" for msg in messages[-1:]):
+            formatted_messages.append("Assistant:")
+        return "\n\n".join(formatted_messages)
+    async def create_prediction(
+        self,
+        messages: List[ChatMessage],
+        temperature: float = 0.7,
+        max_tokens: int = 1000,
+        top_p: float = 1.0
+    ) -> Dict[str, Any]:
+        """创建 Replicate 预测"""
+        # 构建输入
+        prompt = self.format_messages_for_replicate(messages)
+        payload = {
+            "version": "14e5e6719b5af8e6a0b4b1d73b48bb0f8e8b3a7a0b4b1d73b48bb0f8e8b3a7a0",  # Claude 3.5 Sonnet 版本ID
+            "input": {
+                "prompt": prompt,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "top_p": top_p,
+                "system_prompt": "You are Claude, an AI assistant created by Anthropic."
+            }
+        }
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            try:
+                response = await client.post(
+                    f"{self.base_url}/predictions",
+                    headers=self.headers,
+                    json=payload
+                )
+                response.raise_for_status()
+                return response.json()
+            except httpx.RequestError as e:
+                logger.error(f"请求 Replicate API 失败: {e}")
+                raise HTTPException(status_code=502, detail="上游服务请求失败")
+            except httpx.HTTPStatusError as e:
+                logger.error(f"Replicate API 返回错误: {e.response.status_code} - {e.response.text}")
+                raise HTTPException(status_code=e.response.status_code, detail="上游服务错误")
+    async def get_prediction(self, prediction_id: str) -> Dict[str, Any]:
+        """获取预测结果"""
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            try:
+                response = await client.get(
+                    f"{self.base_url}/predictions/{prediction_id}",
+                    headers=self.headers
+                )
+                response.raise_for_status()
+                return response.json()
+            except httpx.RequestError as e:
+                logger.error(f"获取预测结果失败: {e}")
+                raise HTTPException(status_code=502, detail="获取结果失败")
+    async def wait_for_prediction(self, prediction_id: str, max_wait: int = 300) -> Dict[str, Any]:
+        """等待预测完成"""
+        start_time = time.time()
+        while time.time() - start_time < max_wait:
+            prediction = await self.get_prediction(prediction_id)
+            if prediction["status"] == "succeeded":
+                return prediction
+            elif prediction["status"] == "failed":
+                error_msg = prediction.get("error", "预测失败")
+                logger.error(f"Replicate 预测失败: {error_msg}")
+                raise HTTPException(status_code=502, detail=f"预测失败: {error_msg}")
+            elif prediction["status"] in ["canceled"]:
+                raise HTTPException(status_code=502, detail="预测被取消")
+            # 等待一段时间后重试
+            await asyncio.sleep(2)
+        raise HTTPException(status_code=504, detail="预测超时")
+# 初始化 Replicate 客户端
+replicate_client = None
+if REPLICATE_API_TOKEN:
+    replicate_client = ReplicateClient(REPLICATE_API_TOKEN)
+def calculate_tokens(text: str) -> int:
+    """简单的 token 计算（实际应用中应使用更精确的方法）"""
+    return len(text.split()) + len(text) // 4
+def create_openai_response(
+    content: str,
+    model: str,
+    request_id: str,
+    prompt_tokens: int,
+    completion_tokens: int
+) -> ChatCompletionResponse:
+    """创建 OpenAI 格式的响应"""
+    return ChatCompletionResponse(
+        id=request_id,
+        created=int(time.time()),
+        model=model,
+        choices=[
+            ChatCompletionChoice(
+                index=0,
+                message=ChatMessage(role="assistant", content=content),
+                finish_reason="stop"
+            )
+        ],
+        usage=ChatCompletionUsage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+    )
+async def create_openai_stream(
+    content: str,
+    model: str,
+    request_id: str
+) -> AsyncGenerator[str, None]:
+    """创建 OpenAI 格式的流式响应"""
+    # 开始流式响应
+    start_chunk = ChatCompletionStreamResponse(
+        id=request_id,
+        created=int(time.time()),
+        model=model,
+        choices=[
+            ChatCompletionStreamChoice(
+                index=0,
+                delta={"role": "assistant", "content": ""}
+            )
+        ]
+    )
+    yield f"data: {start_chunk.model_dump_json()}\n\n"
+    # 分块发送内容
+    words = content.split()
+    for i, word in enumerate(words):
+        chunk_content = word + (" " if i < len(words) - 1 else "")
+        chunk = ChatCompletionStreamResponse(
+            id=request_id,
+            created=int(time.time()),
+            model=model,
+            choices=[
+                ChatCompletionStreamChoice(
+                    index=0,
+                    delta={"content": chunk_content}
+                )
+            ]
+        )
+        yield f"data: {chunk.model_dump_json()}\n\n"
+        await asyncio.sleep(0.05)  # 模拟流式响应延迟
+    # 结束流式响应
+    end_chunk = ChatCompletionStreamResponse(
+        id=request_id,
+        created=int(time.time()),
+        model=model,
+        choices=[
+            ChatCompletionStreamChoice(
+                index=0,
+                delta={},
+                finish_reason="stop"
+            )
+        ]
+    )
+    yield f"data: {end_chunk.model_dump_json()}\n\n"
+    yield "data: [DONE]\n\n"
+@app.get("/")
+async def root():
+    """根路径"""
+    return {
+        "message": "Replicate API Proxy",
+        "version": "1.0.0",
+        "status": "running",
+        "replicate_configured": REPLICATE_API_TOKEN is not None
+    }
+@app.get("/v1/models")
+async def list_models():
+    """列出可用模型"""
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id": "claude-3-5-sonnet",
+                "object": "model",
+                "created": int(time.time()),
+                "owned_by": "anthropic"
+            }
+        ]
+    }
+@app.post("/v1/chat/completions")
+async def create_chat_completion(request: ChatCompletionRequest):
+    """创建聊天完成"""
+    if not replicate_client:
+        raise HTTPException(
+            status_code=500,
+            detail="Replicate API Token 未配置，请设置 REPLICATE_API_TOKEN 环境变量"
+        )
+    request_id = f"chatcmpl-{uuid.uuid4().hex}"
+    try:
+        # 创建 Replicate 预测
+        prediction = await replicate_client.create_prediction(
+            messages=request.messages,
+            temperature=request.temperature,
+            max_tokens=request.max_tokens,
+            top_p=request.top_p
+        )
+        # 等待预测完成
+        completed_prediction = await replicate_client.wait_for_prediction(
+            prediction["id"]
+        )
+        # 提取生成的内容
+        output = completed_prediction.get("output", [])
+        if isinstance(output, list):
+            content = "".join(output)
+        else:
+            content = str(output)
+        # 计算 token 使用量
+        prompt_text = " ".join([msg.content for msg in request.messages])
+        prompt_tokens = calculate_tokens(prompt_text)
+        completion_tokens = calculate_tokens(content)
+        if request.stream:
+            # 返回流式响应
+            return StreamingResponse(
+                create_openai_stream(content, request.model, request_id),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "Connection": "keep-alive",
+                    "Access-Control-Allow-Origin": "*",
+                }
+            )
+        else:
+            # 返回标准响应
+            response = create_openai_response(
+                content=content,
+                model=request.model,
+                request_id=request_id,
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens
+            )
+            return response
+    except Exception as e:
+        logger.error(f"处理聊天完成请求时出错: {e}")
+        if isinstance(e, HTTPException):
+            raise e
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+async def health_check():
+    """健康检查"""
+    return {
+        "status": "healthy",
+        "timestamp": datetime.utcnow().isoformat(),
+        "replicate_configured": REPLICATE_API_TOKEN is not None
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)