File size: 3,847 Bytes
f4cb00a 289306a 7cdd79b 289306a f4cb00a 289306a 7cdd79b 0fe8321 289306a f4cb00a 289306a f4cb00a 289306a f4cb00a 7cdd79b 289306a a722165 289306a a722165 289306a a722165 7cdd79b 289306a 7cdd79b 289306a 7cdd79b f4cb00a 289306a 527af48 289306a f4cb00a 289306a f4cb00a 289306a f4cb00a 9e57d26 289306a 9e57d26 289306a a722165 289306a a722165 289306a a722165 289306a a722165 9e57d26 058027f 289306a 9e57d26 289306a 058027f 289306a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | import os
from typing import List, Literal, Optional
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from openai import OpenAI
from pydantic import BaseModel, Field
app = FastAPI(title="GLM-5 Chat API", version="1.0.0")
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
MODEL_NAME = os.getenv("MODEL_NAME", "zai-org/GLM-5")
if not HF_TOKEN:
raise RuntimeError("HF_TOKEN environment variable is missing.")
client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=HF_TOKEN,
)
class ChatMessage(BaseModel):
role: Literal["system", "user", "assistant"]
content: str
class ChatRequest(BaseModel):
messages: List[ChatMessage] = Field(..., min_length=1)
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 700
top_p: Optional[float] = 0.95
class SimpleChatRequest(BaseModel):
message: str
system_prompt: Optional[str] = "أنت مساعد ذكي ومفيد. جاوب بشكل طبيعي وواضح."
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 700
top_p: Optional[float] = 0.95
@app.get("/")
def root():
return {
"ok": True,
"service": "glm5-fastapi",
"model": MODEL_NAME,
"endpoints": ["/health", "/chat", "/chat/simple"],
}
@app.get("/health")
def health():
return {"ok": True, "model": MODEL_NAME}
@app.post("/chat/simple")
def chat_simple(req: SimpleChatRequest):
try:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": req.system_prompt},
{"role": "user", "content": req.message},
],
temperature=req.temperature,
max_tokens=req.max_tokens,
top_p=req.top_p,
)
choice = response.choices[0]
reply = ""
if choice.message and choice.message.content:
reply = choice.message.content
finish_reason = getattr(choice, "finish_reason", None)
usage = None
if getattr(response, "usage", None):
usage = {
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens,
}
return JSONResponse(
{
"ok": True,
"model": MODEL_NAME,
"reply": reply,
"finish_reason": finish_reason,
"usage": usage,
}
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/chat")
def chat(req: ChatRequest):
try:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[m.model_dump() for m in req.messages],
temperature=req.temperature,
max_tokens=req.max_tokens,
top_p=req.top_p,
)
choice = response.choices[0]
reply = ""
if choice.message and choice.message.content:
reply = choice.message.content
finish_reason = getattr(choice, "finish_reason", None)
usage = None
if getattr(response, "usage", None):
usage = {
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens,
}
return JSONResponse(
{
"ok": True,
"model": MODEL_NAME,
"reply": reply,
"finish_reason": finish_reason,
"usage": usage,
}
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) |