Test-GLU / app.py
Mr-Help's picture
Update app.py
9e57d26 verified
import os
from typing import List, Literal, Optional
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from openai import OpenAI
from pydantic import BaseModel, Field
app = FastAPI(title="GLM-5 Chat API", version="1.0.0")
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
MODEL_NAME = os.getenv("MODEL_NAME", "zai-org/GLM-5")
if not HF_TOKEN:
raise RuntimeError("HF_TOKEN environment variable is missing.")
client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=HF_TOKEN,
)
class ChatMessage(BaseModel):
role: Literal["system", "user", "assistant"]
content: str
class ChatRequest(BaseModel):
messages: List[ChatMessage] = Field(..., min_length=1)
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 700
top_p: Optional[float] = 0.95
class SimpleChatRequest(BaseModel):
message: str
system_prompt: Optional[str] = "أنت مساعد ذكي ومفيد. جاوب بشكل طبيعي وواضح."
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 700
top_p: Optional[float] = 0.95
@app.get("/")
def root():
return {
"ok": True,
"service": "glm5-fastapi",
"model": MODEL_NAME,
"endpoints": ["/health", "/chat", "/chat/simple"],
}
@app.get("/health")
def health():
return {"ok": True, "model": MODEL_NAME}
@app.post("/chat/simple")
def chat_simple(req: SimpleChatRequest):
try:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": req.system_prompt},
{"role": "user", "content": req.message},
],
temperature=req.temperature,
max_tokens=req.max_tokens,
top_p=req.top_p,
)
choice = response.choices[0]
reply = ""
if choice.message and choice.message.content:
reply = choice.message.content
finish_reason = getattr(choice, "finish_reason", None)
usage = None
if getattr(response, "usage", None):
usage = {
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens,
}
return JSONResponse(
{
"ok": True,
"model": MODEL_NAME,
"reply": reply,
"finish_reason": finish_reason,
"usage": usage,
}
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/chat")
def chat(req: ChatRequest):
try:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[m.model_dump() for m in req.messages],
temperature=req.temperature,
max_tokens=req.max_tokens,
top_p=req.top_p,
)
choice = response.choices[0]
reply = ""
if choice.message and choice.message.content:
reply = choice.message.content
finish_reason = getattr(choice, "finish_reason", None)
usage = None
if getattr(response, "usage", None):
usage = {
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens,
}
return JSONResponse(
{
"ok": True,
"model": MODEL_NAME,
"reply": reply,
"finish_reason": finish_reason,
"usage": usage,
}
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))