ghmodels / app.py
kevin
处理非流式请求时响应体为空的问题
fcfe585
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import Response
import logging
import httpx
import random
import uvicorn
import json
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
client = httpx.AsyncClient()
BASE_URL_CHAT = "https://models.inference.ai.azure.com/chat/completions"
BASE_URL_EMBEDDINGS = "https://models.inference.ai.azure.com/embeddings"
async def process_request_body(body: bytes) -> bytes:
try:
data = json.loads(body)
if isinstance(data, dict) and "store" in data:
del data["store"]
return json.dumps(data).encode()
except json.JSONDecodeError:
return body
async def make_request(method, url, headers, body, api_keys=None, retry_count=0):
try:
if api_keys and len(api_keys) > 1:
remaining_keys = api_keys.copy()
while remaining_keys and retry_count < 3:
selected_key = random.choice(remaining_keys)
remaining_keys.remove(selected_key)
headers = {**headers, "Authorization": f"Bearer {selected_key}"}
logger.info(f"Attempting request with API key: {selected_key}")
try:
r = await client.request(
method,
url,
headers=headers,
content=body,
timeout=600
)
if r.status_code < 400:
return r
logger.error(f"Request failed with key {selected_key}, status code: {r.status_code}")
except Exception as e:
logger.error(f"Request failed with key {selected_key}: {str(e)}")
retry_count += 1
raise HTTPException(status_code=500, detail="All API keys failed")
else:
while retry_count < 3:
single_key = api_keys[0] if api_keys else headers.get("authorization", "").replace("Bearer ", "").strip()
headers = {**headers, "Authorization": f"Bearer {single_key}"}
logger.info(f"Attempting request with API key: {single_key}")
try:
r = await client.request(
method,
url,
headers=headers,
content=body,
timeout=600
)
if r.status_code < 400:
return r
logger.error(f"Request failed with status code: {r.status_code}")
except Exception as e:
logger.error(f"Request failed: {str(e)}")
retry_count += 1
raise HTTPException(status_code=500, detail="Request failed after 3 retries")
except Exception as e:
logger.error(f"Request failed: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.api_route(
"/v1/chat/completions",
methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH"],
)
@app.api_route(
"/hf/v1/chat/completions",
methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH"],
)
async def chat_completions(request: Request):
target_url = BASE_URL_CHAT
try:
headers = dict(request.headers)
if "content-length" in headers:
del headers["content-length"]
if "host" in headers:
del headers["host"]
headers["Host"] = "models.inference.ai.azure.com"
api_keys = None
auth_header = headers.get("authorization", "")
if auth_header and auth_header.startswith("Bearer "):
raw_keys = auth_header.replace("Bearer ", "").strip()
api_keys = [k.strip() for k in raw_keys.split(',') if k.strip()]
if "authorization" in headers:
del headers["authorization"]
request_body = await request.body()
processed_body = await process_request_body(request_body)
r = await make_request(request.method, target_url, headers, processed_body, api_keys)
# return Response(content=r.content, status_code=r.status_code, headers=r.headers)
response_headers = {
"content-type": "application/json",
}
return Response(content=r.content, status_code=r.status_code, headers=response_headers)
except Exception as e:
logger.error(f"Forwarding request failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.api_route("/v1/embeddings", methods=["POST", "OPTIONS"])
@app.api_route("/hf/v1/embeddings", methods=["POST", "OPTIONS"])
async def embeddings(request: Request):
target_url = BASE_URL_EMBEDDINGS
try:
headers = dict(request.headers)
if "content-length" in headers:
del headers["content-length"]
if "host" in headers:
del headers["host"]
headers["Host"] = "models.inference.ai.azure.com"
api_keys = None
auth_header = headers.get("authorization", "")
if auth_header and auth_header.startswith("Bearer "):
raw_keys = auth_header.replace("Bearer ", "").strip()
api_keys = [k.strip() for k in raw_keys.split(',') if k.strip()]
if "authorization" in headers:
del headers["authorization"]
request_body = await request.body()
processed_body = await process_request_body(request_body)
r = await make_request(request.method, target_url, headers, processed_body, api_keys)
return Response(content=r.content, status_code=r.status_code, headers=r.headers)
except Exception as e:
logger.error(f"Forwarding request failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/v1/models")
@app.get("/hf/v1/models")
async def list_models():
models_data = {
"object": "list",
"data": [
{"id": "AI21-Jamba-1.5-Large", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "AI21-Jamba-1.5-Mini", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Cohere-command-r", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Cohere-command-r-08-2024", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Cohere-command-r-plus", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Cohere-command-r-plus-08-2024", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Cohere-embed-v3-english", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Cohere-embed-v3-multilingual", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Llama-3.2-90B-Vision-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Llama-3.2-11B-Vision-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Meta-Llama-3.1-405B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Meta-Llama-3.1-70B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Meta-Llama-3.1-8B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Meta-Llama-3-70B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Meta-Llama-3-8B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Mistral-large", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Mistral-large-2407", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Mistral-Nemo", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Mistral-small", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Ministral-3B", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "gpt-4o", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "gpt-4o-mini", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "o1-preview", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "o1-mini", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "text-embedding-3-large", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "text-embedding-3-small", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Phi-3.5-MoE-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Phi-3.5-vision-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Phi-3.5-mini-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Phi-3-medium-128k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Phi-3-medium-4k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Phi-3-mini-128k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Phi-3-mini-4k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Phi-3-small-128k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Phi-3-small-8k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "jais-30b-chat", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Llama-3.3-70B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
{"id": "Mistral-large-2411", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
]
}
return models_data
@app.get("/health")
@app.get("/")
async def health_check():
logger.info("Health check endpoint called")
return {"status": "healthy"}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8080)