import os from typing import List, Literal, Optional from fastapi import FastAPI, HTTPException from fastapi.responses import JSONResponse from openai import OpenAI from pydantic import BaseModel, Field app = FastAPI(title="GLM-5 Chat API", version="1.0.0") HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") MODEL_NAME = os.getenv("MODEL_NAME", "zai-org/GLM-5") if not HF_TOKEN: raise RuntimeError("HF_TOKEN environment variable is missing.") client = OpenAI( base_url="https://router.huggingface.co/v1", api_key=HF_TOKEN, ) class ChatMessage(BaseModel): role: Literal["system", "user", "assistant"] content: str class ChatRequest(BaseModel): messages: List[ChatMessage] = Field(..., min_length=1) temperature: Optional[float] = 0.7 max_tokens: Optional[int] = 700 top_p: Optional[float] = 0.95 class SimpleChatRequest(BaseModel): message: str system_prompt: Optional[str] = "أنت مساعد ذكي ومفيد. جاوب بشكل طبيعي وواضح." temperature: Optional[float] = 0.7 max_tokens: Optional[int] = 700 top_p: Optional[float] = 0.95 @app.get("/") def root(): return { "ok": True, "service": "glm5-fastapi", "model": MODEL_NAME, "endpoints": ["/health", "/chat", "/chat/simple"], } @app.get("/health") def health(): return {"ok": True, "model": MODEL_NAME} @app.post("/chat/simple") def chat_simple(req: SimpleChatRequest): try: response = client.chat.completions.create( model=MODEL_NAME, messages=[ {"role": "system", "content": req.system_prompt}, {"role": "user", "content": req.message}, ], temperature=req.temperature, max_tokens=req.max_tokens, top_p=req.top_p, ) choice = response.choices[0] reply = "" if choice.message and choice.message.content: reply = choice.message.content finish_reason = getattr(choice, "finish_reason", None) usage = None if getattr(response, "usage", None): usage = { "prompt_tokens": response.usage.prompt_tokens, "completion_tokens": response.usage.completion_tokens, "total_tokens": response.usage.total_tokens, } return JSONResponse( { "ok": True, "model": MODEL_NAME, "reply": reply, "finish_reason": finish_reason, "usage": usage, } ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/chat") def chat(req: ChatRequest): try: response = client.chat.completions.create( model=MODEL_NAME, messages=[m.model_dump() for m in req.messages], temperature=req.temperature, max_tokens=req.max_tokens, top_p=req.top_p, ) choice = response.choices[0] reply = "" if choice.message and choice.message.content: reply = choice.message.content finish_reason = getattr(choice, "finish_reason", None) usage = None if getattr(response, "usage", None): usage = { "prompt_tokens": response.usage.prompt_tokens, "completion_tokens": response.usage.completion_tokens, "total_tokens": response.usage.total_tokens, } return JSONResponse( { "ok": True, "model": MODEL_NAME, "reply": reply, "finish_reason": finish_reason, "usage": usage, } ) except Exception as e: raise HTTPException(status_code=500, detail=str(e))