Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -302,6 +302,55 @@ async def health_check():
|
|
| 302 |
"device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0
|
| 303 |
}
|
| 304 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
if __name__ == "__main__":
|
| 306 |
import uvicorn
|
| 307 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 302 |
"device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0
|
| 303 |
}
|
| 304 |
|
| 305 |
+
# Anthropic Claude 格式的请求体
|
| 306 |
+
class MessagesRequest(BaseModel):
|
| 307 |
+
model: str
|
| 308 |
+
messages: List[Message]
|
| 309 |
+
max_tokens: Optional[int] = 2048
|
| 310 |
+
temperature: Optional[float] = 0.7
|
| 311 |
+
top_p: Optional[float] = 0.9
|
| 312 |
+
|
| 313 |
+
@app.post("/v1/messages")
|
| 314 |
+
async def messages_endpoint(request: MessagesRequest):
|
| 315 |
+
"""
|
| 316 |
+
兼容 Anthropic Claude CLI 的接口
|
| 317 |
+
"""
|
| 318 |
+
try:
|
| 319 |
+
if model is None or tokenizer is None:
|
| 320 |
+
try:
|
| 321 |
+
load_model()
|
| 322 |
+
except:
|
| 323 |
+
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 324 |
+
|
| 325 |
+
# 使用已有的格式化和生成逻辑
|
| 326 |
+
prompt = format_messages(request.messages)
|
| 327 |
+
response_text = generate_response(
|
| 328 |
+
prompt,
|
| 329 |
+
request.temperature,
|
| 330 |
+
request.max_tokens,
|
| 331 |
+
request.top_p
|
| 332 |
+
)
|
| 333 |
+
|
| 334 |
+
return {
|
| 335 |
+
"id": f"msg-{uuid.uuid4().hex[:8]}",
|
| 336 |
+
"type": "message",
|
| 337 |
+
"role": "assistant",
|
| 338 |
+
"content": [
|
| 339 |
+
{"type": "text", "text": response_text}
|
| 340 |
+
],
|
| 341 |
+
"model": request.model,
|
| 342 |
+
"stop_reason": "end_turn",
|
| 343 |
+
"stop_sequence": None,
|
| 344 |
+
"usage": {
|
| 345 |
+
"input_tokens": len(tokenizer.encode(prompt)),
|
| 346 |
+
"output_tokens": len(tokenizer.encode(response_text))
|
| 347 |
+
}
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
except Exception as e:
|
| 351 |
+
print(f"Error processing /v1/messages request: {str(e)}")
|
| 352 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
| 353 |
+
|
| 354 |
if __name__ == "__main__":
|
| 355 |
import uvicorn
|
| 356 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|