qwen-api-fastapi / application.py
convaiinnovations's picture
Upload folder using huggingface_hub
e2addc4 verified
Raw
History Blame Contribute Delete
2.24 kB
import jwt
import time
import os
from datetime import datetime, timedelta
from fastapi import FastAPI, Depends, HTTPException
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from dotenv import load_dotenv
# --- Load Environment Variables ---
load_dotenv()
SECRET_KEY = os.getenv("JWT_SECRET_KEY", "default-fallback-secret")
ALGORITHM = "HS256"
MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
security = HTTPBearer()
def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
token = credentials.credentials
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
return payload
except Exception:
raise HTTPException(status_code=401, detail="Unauthorized")
# --- FastAPI Setup ---
app = FastAPI()
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype="auto", device_map="auto")
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
messages: list[ChatMessage]
max_tokens: int = 100
@app.get("/")
def read_root():
return {"message": "Qwen OpenAI-style API is running with .env auth"}
@app.post("/v1/chat/completions")
async def chat_generate(request: ChatCompletionRequest, user=Depends(verify_token)):
chat_msgs = [msg.dict() for msg in request.messages]
text = tokenizer.apply_chat_template(chat_msgs, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=request.max_tokens
)
generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return {
"id": f"chatcmpl-{int(time.time())}",
"object": "chat.completion",
"model": MODEL_NAME,
"choices": [{
"message": {"role": "assistant", "content": response},
"finish_reason": "stop"
}]
}