| import os |
| from typing import List, Literal, Optional |
|
|
| from fastapi import FastAPI, HTTPException |
| from fastapi.responses import JSONResponse |
| from openai import OpenAI |
| from pydantic import BaseModel, Field |
|
|
| app = FastAPI(title="GLM-5 Chat API", version="1.0.0") |
|
|
| HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") |
| MODEL_NAME = os.getenv("MODEL_NAME", "zai-org/GLM-5") |
|
|
| if not HF_TOKEN: |
| raise RuntimeError("HF_TOKEN environment variable is missing.") |
|
|
| client = OpenAI( |
| base_url="https://router.huggingface.co/v1", |
| api_key=HF_TOKEN, |
| ) |
|
|
|
|
| class ChatMessage(BaseModel): |
| role: Literal["system", "user", "assistant"] |
| content: str |
|
|
|
|
| class ChatRequest(BaseModel): |
| messages: List[ChatMessage] = Field(..., min_length=1) |
| temperature: Optional[float] = 0.7 |
| max_tokens: Optional[int] = 700 |
| top_p: Optional[float] = 0.95 |
|
|
|
|
| class SimpleChatRequest(BaseModel): |
| message: str |
| system_prompt: Optional[str] = "أنت مساعد ذكي ومفيد. جاوب بشكل طبيعي وواضح." |
| temperature: Optional[float] = 0.7 |
| max_tokens: Optional[int] = 700 |
| top_p: Optional[float] = 0.95 |
|
|
|
|
| @app.get("/") |
| def root(): |
| return { |
| "ok": True, |
| "service": "glm5-fastapi", |
| "model": MODEL_NAME, |
| "endpoints": ["/health", "/chat", "/chat/simple"], |
| } |
|
|
|
|
| @app.get("/health") |
| def health(): |
| return {"ok": True, "model": MODEL_NAME} |
|
|
|
|
| @app.post("/chat/simple") |
| def chat_simple(req: SimpleChatRequest): |
| try: |
| response = client.chat.completions.create( |
| model=MODEL_NAME, |
| messages=[ |
| {"role": "system", "content": req.system_prompt}, |
| {"role": "user", "content": req.message}, |
| ], |
| temperature=req.temperature, |
| max_tokens=req.max_tokens, |
| top_p=req.top_p, |
| ) |
|
|
| choice = response.choices[0] |
|
|
| reply = "" |
| if choice.message and choice.message.content: |
| reply = choice.message.content |
|
|
| finish_reason = getattr(choice, "finish_reason", None) |
|
|
| usage = None |
| if getattr(response, "usage", None): |
| usage = { |
| "prompt_tokens": response.usage.prompt_tokens, |
| "completion_tokens": response.usage.completion_tokens, |
| "total_tokens": response.usage.total_tokens, |
| } |
|
|
| return JSONResponse( |
| { |
| "ok": True, |
| "model": MODEL_NAME, |
| "reply": reply, |
| "finish_reason": finish_reason, |
| "usage": usage, |
| } |
| ) |
|
|
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
| @app.post("/chat") |
| def chat(req: ChatRequest): |
| try: |
| response = client.chat.completions.create( |
| model=MODEL_NAME, |
| messages=[m.model_dump() for m in req.messages], |
| temperature=req.temperature, |
| max_tokens=req.max_tokens, |
| top_p=req.top_p, |
| ) |
|
|
| choice = response.choices[0] |
|
|
| reply = "" |
| if choice.message and choice.message.content: |
| reply = choice.message.content |
|
|
| finish_reason = getattr(choice, "finish_reason", None) |
|
|
| usage = None |
| if getattr(response, "usage", None): |
| usage = { |
| "prompt_tokens": response.usage.prompt_tokens, |
| "completion_tokens": response.usage.completion_tokens, |
| "total_tokens": response.usage.total_tokens, |
| } |
|
|
| return JSONResponse( |
| { |
| "ok": True, |
| "model": MODEL_NAME, |
| "reply": reply, |
| "finish_reason": finish_reason, |
| "usage": usage, |
| } |
| ) |
|
|
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |