Spaces:

nomid2
/

Replicate

Sleeping

App Files Files Community

nomid2 commited on Jun 7, 2025

Commit

8ccb8d4

verified ·

1 Parent(s): a7ba54d

Update app.py

Browse files

Files changed (1) hide show

app.py +240 -324

app.py CHANGED Viewed

@@ -1,30 +1,25 @@
 import os
 import json
-import time
-import uuid
 import asyncio
-import logging
-from typing import Dict, Any, Optional, AsyncGenerator
-from datetime import datetime
-import httpx
-from fastapi import FastAPI, HTTPException, Request
-from fastapi.responses import StreamingResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel, Field
-from typing import List, Union, Literal
 # 配置日志
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 app = FastAPI(
-    title="Replicate API Proxy",
-    description="将 Replicate API 转换为 OpenAI 兼容格式的代理服务",
     version="1.0.0"
 )
-# 添加 CORS 中间件
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -33,348 +28,269 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# 环境变量
 REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
 if not REPLICATE_API_TOKEN:
-    logger.warning("REPLICATE_API_TOKEN 未设置，某些功能可能无法正常工作")
-# OpenAI 兼容的请求模型
-class ChatMessage(BaseModel):
-    role: Literal["system", "user", "assistant"]
-    content: str
-class ChatCompletionRequest(BaseModel):
-    model: str = "claude-3-5-sonnet"
-    messages: List[ChatMessage]
-    temperature: Optional[float] = Field(default=0.7, ge=0, le=2)
-    max_tokens: Optional[int] = Field(default=1000, ge=1)
-    stream: Optional[bool] = False
-    top_p: Optional[float] = Field(default=1, ge=0, le=1)
-# OpenAI 兼容的响应模型
-class ChatCompletionChoice(BaseModel):
-    index: int
-    message: ChatMessage
-    finish_reason: str
-class ChatCompletionUsage(BaseModel):
-    prompt_tokens: int
-    completion_tokens: int
-    total_tokens: int
-class ChatCompletionResponse(BaseModel):
-    id: str
-    object: str = "chat.completion"
-    created: int
-    model: str
-    choices: List[ChatCompletionChoice]
-    usage: ChatCompletionUsage
-class ChatCompletionStreamChoice(BaseModel):
-    index: int
-    delta: Dict[str, Any]
-    finish_reason: Optional[str] = None
-class ChatCompletionStreamResponse(BaseModel):
-    id: str
-    object: str = "chat.completion.chunk"
-    created: int
-    model: str
-    choices: List[ChatCompletionStreamChoice]
-# Replicate API 客户端
-class ReplicateClient:
-    def __init__(self, api_token: str):
-        self.api_token = api_token
-        self.base_url = "https://api.replicate.com/v1"
-        self.headers = {
-            "Authorization": f"Bearer {api_token}",
-            "Content-Type": "application/json"
-        }
-    def format_messages_for_replicate(self, messages: List[ChatMessage]) -> str:
-        """将 OpenAI 格式的消息转换为 Replicate 格式"""
-        formatted_messages = []
-        for message in messages:
-            if message.role == "system":
-                formatted_messages.append(f"System: {message.content}")
-            elif message.role == "user":
-                formatted_messages.append(f"Human: {message.content}")
-            elif message.role == "assistant":
-                formatted_messages.append(f"Assistant: {message.content}")
-        # 为 Claude 添加最后的 Assistant: 提示
-        if not any(msg.role == "assistant" for msg in messages[-1:]):
-            formatted_messages.append("Assistant:")
-        return "\n\n".join(formatted_messages)
-    async def create_prediction(
-        self,
-        messages: List[ChatMessage],
-        temperature: float = 0.7,
-        max_tokens: int = 1000,
-        top_p: float = 1.0
-    ) -> Dict[str, Any]:
-        """创建 Replicate 预测"""
-        # 构建输入
-        prompt = self.format_messages_for_replicate(messages)
-        payload = {
-            "version": "14e5e6719b5af8e6a0b4b1d73b48bb0f8e8b3a7a0b4b1d73b48bb0f8e8b3a7a0",  # Claude 3.5 Sonnet 版本ID
-            "input": {
-                "prompt": prompt,
-                "max_tokens": max_tokens,
-                "temperature": temperature,
-                "top_p": top_p,
-                "system_prompt": "You are Claude, an AI assistant created by Anthropic."
-            }
         }
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            try:
-                response = await client.post(
-                    f"{self.base_url}/predictions",
-                    headers=self.headers,
-                    json=payload
-                )
-                response.raise_for_status()
-                return response.json()
-            except httpx.RequestError as e:
-                logger.error(f"请求 Replicate API 失败: {e}")
-                raise HTTPException(status_code=502, detail="上游服务请求失败")
-            except httpx.HTTPStatusError as e:
-                logger.error(f"Replicate API 返回错误: {e.response.status_code} - {e.response.text}")
-                raise HTTPException(status_code=e.response.status_code, detail="上游服务错误")
-    async def get_prediction(self, prediction_id: str) -> Dict[str, Any]:
-        """获取预测结果"""
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            try:
-                response = await client.get(
-                    f"{self.base_url}/predictions/{prediction_id}",
-                    headers=self.headers
-                )
-                response.raise_for_status()
-                return response.json()
-            except httpx.RequestError as e:
-                logger.error(f"获取预测结果失败: {e}")
-                raise HTTPException(status_code=502, detail="获取结果失败")
-    async def wait_for_prediction(self, prediction_id: str, max_wait: int = 300) -> Dict[str, Any]:
-        """等待预测完成"""
-        start_time = time.time()
-        while time.time() - start_time < max_wait:
-            prediction = await self.get_prediction(prediction_id)
-            if prediction["status"] == "succeeded":
-                return prediction
-            elif prediction["status"] == "failed":
-                error_msg = prediction.get("error", "预测失败")
-                logger.error(f"Replicate 预测失败: {error_msg}")
-                raise HTTPException(status_code=502, detail=f"预测失败: {error_msg}")
-            elif prediction["status"] in ["canceled"]:
-                raise HTTPException(status_code=502, detail="预测被取消")
-            # 等待一段时间后重试
-            await asyncio.sleep(2)
-        raise HTTPException(status_code=504, detail="预测超时")
-# 初始化 Replicate 客户端
-replicate_client = None
-if REPLICATE_API_TOKEN:
-    replicate_client = ReplicateClient(REPLICATE_API_TOKEN)
-def calculate_tokens(text: str) -> int:
-    """简单的 token 计算（实际应用中应使用更精确的方法）"""
-    return len(text.split()) + len(text) // 4
-def create_openai_response(
-    content: str,
-    model: str,
-    request_id: str,
-    prompt_tokens: int,
-    completion_tokens: int
-) -> ChatCompletionResponse:
-    """创建 OpenAI 格式的响应"""
-    return ChatCompletionResponse(
-        id=request_id,
-        created=int(time.time()),
-        model=model,
-        choices=[
-            ChatCompletionChoice(
-                index=0,
-                message=ChatMessage(role="assistant", content=content),
-                finish_reason="stop"
-            )
-        ],
-        usage=ChatCompletionUsage(
-            prompt_tokens=prompt_tokens,
-            completion_tokens=completion_tokens,
-            total_tokens=prompt_tokens + completion_tokens
-        )
-    )
-async def create_openai_stream(
-    content: str,
-    model: str,
-    request_id: str
-) -> AsyncGenerator[str, None]:
-    """创建 OpenAI 格式的流式响应"""
-    # 开始流式响应
-    start_chunk = ChatCompletionStreamResponse(
-        id=request_id,
-        created=int(time.time()),
-        model=model,
-        choices=[
-            ChatCompletionStreamChoice(
-                index=0,
-                delta={"role": "assistant", "content": ""}
-            )
-        ]
-    )
-    yield f"data: {start_chunk.model_dump_json()}\n\n"
-    # 分块发送内容
-    words = content.split()
-    for i, word in enumerate(words):
-        chunk_content = word + (" " if i < len(words) - 1 else "")
-        chunk = ChatCompletionStreamResponse(
-            id=request_id,
-            created=int(time.time()),
-            model=model,
-            choices=[
-                ChatCompletionStreamChoice(
-                    index=0,
-                    delta={"content": chunk_content}
-                )
-            ]
-        )
-        yield f"data: {chunk.model_dump_json()}\n\n"
-        await asyncio.sleep(0.05)  # 模拟流式响应延迟
-    # 结束流式响应
-    end_chunk = ChatCompletionStreamResponse(
-        id=request_id,
-        created=int(time.time()),
-        model=model,
-        choices=[
-            ChatCompletionStreamChoice(
-                index=0,
-                delta={},
-                finish_reason="stop"
-            )
-        ]
-    )
-    yield f"data: {end_chunk.model_dump_json()}\n\n"
-    yield "data: [DONE]\n\n"
 @app.get("/")
 async def root():
-    """根路径"""
     return {
-        "message": "Replicate API Proxy",
-        "version": "1.0.0",
         "status": "running",
-        "replicate_configured": REPLICATE_API_TOKEN is not None
     }
 @app.get("/v1/models")
 async def list_models():
-    """列出可用模型"""
-    return {
-        "object": "list",
-        "data": [
-            {
-                "id": "claude-3-5-sonnet",
-                "object": "model",
-                "created": int(time.time()),
-                "owned_by": "anthropic"
-            }
-        ]
-    }
 @app.post("/v1/chat/completions")
-async def create_chat_completion(request: ChatCompletionRequest):
-    """创建聊天完成"""
-    if not replicate_client:
-        raise HTTPException(
-            status_code=500,
-            detail="Replicate API Token 未配置，请设置 REPLICATE_API_TOKEN 环境变量"
-        )
-    request_id = f"chatcmpl-{uuid.uuid4().hex}"
     try:
-        # 创建 Replicate 预测
-        prediction = await replicate_client.create_prediction(
-            messages=request.messages,
-            temperature=request.temperature,
-            max_tokens=request.max_tokens,
-            top_p=request.top_p
-        )
-        # 等待预测完成
-        completed_prediction = await replicate_client.wait_for_prediction(
-            prediction["id"]
-        )
-        # 提取生成的内容
-        output = completed_prediction.get("output", [])
-        if isinstance(output, list):
-            content = "".join(output)
-        else:
-            content = str(output)
-        # 计算 token 使用量
-        prompt_text = " ".join([msg.content for msg in request.messages])
-        prompt_tokens = calculate_tokens(prompt_text)
-        completion_tokens = calculate_tokens(content)
-        if request.stream:
-            # 返回流式响应
-            return StreamingResponse(
-                create_openai_stream(content, request.model, request_id),
-                media_type="text/event-stream",
-                headers={
-                    "Cache-Control": "no-cache",
-                    "Connection": "keep-alive",
-                    "Access-Control-Allow-Origin": "*",
-                }
-            )
-        else:
-            # 返回标准响应
-            response = create_openai_response(
-                content=content,
-                model=request.model,
-                request_id=request_id,
-                prompt_tokens=prompt_tokens,
-                completion_tokens=completion_tokens
-            )
-            return response
     except Exception as e:
-        logger.error(f"处理聊天完成请求时出错: {e}")
-        if isinstance(e, HTTPException):
-            raise e
         raise HTTPException(status_code=500, detail=str(e))
-@app.get("/health")
-async def health_check():
-    """健康检查"""
-    return {
-        "status": "healthy",
-        "timestamp": datetime.utcnow().isoformat(),
-        "replicate_configured": REPLICATE_API_TOKEN is not None
-    }
 if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 import json
 import asyncio
+import aiohttp
+from fastapi import FastAPI, Request, HTTPException
+from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+from typing import Dict, Any, AsyncGenerator
+import logging
 # 配置日志
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 app = FastAPI(
+    title="Replicate API Proxy for LobeChat",
+    description="A proxy service to forward Replicate API requests in OpenAI-compatible format",
     version="1.0.0"
 )
+# 添加CORS中间件
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# 从环境变量获取配置
 REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
 if not REPLICATE_API_TOKEN:
+    logger.warning("REPLICATE_API_TOKEN not found in environment variables")
+# Replicate API配置
+REPLICATE_BASE_URL = "https://api.replicate.com/v1"
+DEFAULT_MODEL = "anthropic/claude-4-sonnet"
+def transform_openai_to_replicate(openai_request: Dict[str, Any], model_override: str = None) -> Dict[str, Any]:
+    """将OpenAI格式的请求转换为Replicate格式"""
+    messages = openai_request.get("messages", [])
+    # 提取system prompt
+    system_prompt = ""
+    user_messages = []
+    for message in messages:
+        if message.get("role") == "system":
+            system_prompt = message.get("content", "")
+        elif message.get("role") in ["user", "assistant"]:
+            user_messages.append(message)
+    # 构建prompt
+    prompt_parts = []
+    for msg in user_messages:
+        role = msg.get("role", "")
+        content = msg.get("content", "")
+        if role == "user":
+            prompt_parts.append(f"User: {content}")
+        elif role == "assistant":
+            prompt_parts.append(f"Assistant: {content}")
+    prompt = "\n\n".join(prompt_parts)
+    if prompt_parts and not prompt.endswith("\n\nAssistant:"):
+        prompt += "\n\nAssistant:"
+    # 确定使用的模型
+    model = model_override or openai_request.get("model", DEFAULT_MODEL)
+    if not model.startswith("anthropic/"):
+        model = f"anthropic/{model}" if "/" not in model else model
+    replicate_request = {
+        "stream": openai_request.get("stream", False),
+        "input": {
+            "prompt": prompt,
+            "system_prompt": system_prompt or "You are a helpful assistant",
+            "max_tokens": openai_request.get("max_tokens", 1000),
+            "temperature": openai_request.get("temperature", 0.7)
         }
+    }
+    return replicate_request, model
+async def create_replicate_prediction(session: aiohttp.ClientSession, model: str, data: Dict[str, Any]) -> Dict[str, Any]:
+    """创建Replicate预测"""
+    url = f"{REPLICATE_BASE_URL}/models/{model}/predictions"
+    headers = {
+        "Authorization": f"Bearer {REPLICATE_API_TOKEN}",
+        "Content-Type": "application/json"
+    }
+    async with session.post(url, headers=headers, json=data) as response:
+        if response.status != 201:
+            error_text = await response.text()
+            logger.error(f"Replicate API error: {response.status} - {error_text}")
+            raise HTTPException(status_code=response.status, detail=f"Replicate API error: {error_text}")
+        return await response.json()
+async def stream_replicate_response(session: aiohttp.ClientSession, stream_url: str) -> AsyncGenerator[str, None]:
+    """流式读取Replicate响应"""
+    headers = {
+        "Accept": "text/event-stream",
+        "Cache-Control": "no-store"
+    }
+    async with session.get(stream_url, headers=headers) as response:
+        if response.status != 200:
+            error_text = await response.text()
+            logger.error(f"Stream error: {response.status} - {error_text}")
+            raise HTTPException(status_code=response.status, detail=f"Stream error: {error_text}")
+        async for line in response.content:
+            line = line.decode('utf-8').strip()
+            if line:
+                yield line
+def transform_replicate_to_openai_stream(event_data: str, model: str) -> str:
+    """将Replicate流式响应转换为OpenAI格式"""
+    if not event_data.startswith("data: "):
+        return ""
+    try:
+        data = json.loads(event_data[6:])  # 移除 "data: " 前缀
+        if data.get("event") == "output":
+            # 构建OpenAI格式的响应
+            openai_response = {
+                "id": f"chatcmpl-{data.get('id', 'unknown')}",
+                "object": "chat.completion.chunk",
+                "created": int(asyncio.get_event_loop().time()),
+                "model": model,
+                "choices": [{
+                    "index": 0,
+                    "delta": {
+                        "content": data.get("data", "")
+                    },
+                    "finish_reason": None
+                }]
+            }
+            return f"data: {json.dumps(openai_response)}\n\n"
+        elif data.get("event") == "done":
+            # 发送结束标记
+            openai_response = {
+                "id": f"chatcmpl-{data.get('id', 'unknown')}",
+                "object": "chat.completion.chunk",
+                "created": int(asyncio.get_event_loop().time()),
+                "model": model,
+                "choices": [{
+                    "index": 0,
+                    "delta": {},
+                    "finish_reason": "stop"
+                }]
+            }
+            return f"data: {json.dumps(openai_response)}\n\ndata: [DONE]\n\n"
+        return ""
+    except json.JSONDecodeError:
+        logger.warning(f"Failed to parse event data: {event_data}")
+        return ""
 @app.get("/")
 async def root():
+    """健康检查端点"""
     return {
+        "message": "Replicate API Proxy for LobeChat",
         "status": "running",
+        "replicate_token_configured": bool(REPLICATE_API_TOKEN)
     }
 @app.get("/v1/models")
 async def list_models():
+    """列出可用模型（兼容OpenAI API）"""
+    models = [
+        {
+            "id": "claude-4-sonnet",
+            "object": "model",
+            "created": 1677610602,
+            "owned_by": "anthropic"
+        },
+        {
+            "id": "claude-3-sonnet",
+            "object": "model",
+            "created": 1677610602,
+            "owned_by": "anthropic"
+        },
+        {
+            "id": "claude-3-haiku",
+            "object": "model",
+            "created": 1677610602,
+            "owned_by": "anthropic"
+        }
+    ]
+    return {"object": "list", "data": models}
 @app.post("/v1/chat/completions")
+async def chat_completions(request: Request):
+    """处理聊天完成请求（兼容OpenAI API）"""
+    if not REPLICATE_API_TOKEN:
+        raise HTTPException(status_code=500, detail="REPLICATE_API_TOKEN not configured")
     try:
+        body = await request.json()
+        logger.info(f"Received request: {json.dumps(body, indent=2)}")
+        # 转换请求格式
+        replicate_data, model = transform_openai_to_replicate(body)
+        logger.info(f"Transformed to Replicate format: {json.dumps(replicate_data, indent=2)}")
+        async with aiohttp.ClientSession() as session:
+            # 创建预测
+            prediction = await create_replicate_prediction(session, model, replicate_data)
+            logger.info(f"Created prediction: {prediction.get('id')}")
+            if body.get("stream", False):
+                # 流式响应
+                stream_url = prediction.get("urls", {}).get("stream")
+                if not stream_url:
+                    raise HTTPException(status_code=500, detail="Stream URL not available")
+                async def generate_stream():
+                    try:
+                        async for event in stream_replicate_response(session, stream_url):
+                            openai_event = transform_replicate_to_openai_stream(event, model)
+                            if openai_event:
+                                yield openai_event
+                    except Exception as e:
+                        logger.error(f"Stream generation error: {e}")
+                        # 发送错误响应
+                        error_response = {
+                            "error": {
+                                "message": str(e),
+                                "type": "stream_error"
+                            }
+                        }
+                        yield f"data: {json.dumps(error_response)}\n\n"
+                return StreamingResponse(
+                    generate_stream(),
+                    media_type="text/event-stream",
+                    headers={
+                        "Cache-Control": "no-cache",
+                        "Connection": "keep-alive",
+                        "Access-Control-Allow-Origin": "*",
+                    }
+                )
+            else:
+                # 非流式响应 - 等待预测完成
+                prediction_url = f"{REPLICATE_BASE_URL}/predictions/{prediction['id']}"
+                headers = {"Authorization": f"Bearer {REPLICATE_API_TOKEN}"}
+                # 轮询等待结果
+                while True:
+                    async with session.get(prediction_url, headers=headers) as response:
+                        result = await response.json()
+                        if result.get("status") == "succeeded":
+                            content = "".join(result.get("output", []))
+                            openai_response = {
+                                "id": f"chatcmpl-{result['id']}",
+                                "object": "chat.completion",
+                                "created": int(asyncio.get_event_loop().time()),
+                                "model": model,
+                                "choices": [{
+                                    "index": 0,
+                                    "message": {
+                                        "role": "assistant",
+                                        "content": content
+                                    },
+                                    "finish_reason": "stop"
+                                }],
+                                "usage": {
+                                    "prompt_tokens": 0,
+                                    "completion_tokens": 0,
+                                    "total_tokens": 0
+                                }
+                            }
+                            return openai_response
+                        elif result.get("status") == "failed":
+                            raise HTTPException(status_code=500, detail=f"Prediction failed: {result.get('error')}")
+                        # 等待一秒后重试
+                        await asyncio.sleep(1)
     except Exception as e:
+        logger.error(f"Error processing request: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
+    port = int(os.getenv("PORT", 7860))
+    uvicorn.run(app, host="0.0.0.0", port=port)