APi_English / app /routes /nlp.py
CrazyMonkey0
feat(nlp): change nlp model to Qwen/Qwen2.5-1.5B-Instruct
4d18a16
from fastapi import APIRouter, Request
from fastapi.responses import JSONResponse
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from pydantic import BaseModel
from .tts import send_audio
import uuid
import os
router = APIRouter()
SYSTEM_PROMPT = """you are emma an advanced AI assistant for English language learning."""
class ChatRequest(BaseModel):
message: str
# Load NLP model
def load_model_nlp():
model_id = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float32, # CPU friendly
device_map="cpu"
)
model.eval()
return model, tokenizer
@router.post("/chat")
async def chat(request: Request, chat_request: ChatRequest):
text = chat_request.message
model = request.app.state.model_nlp
tokenizer = request.app.state.tokenizer_nlp
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": text},
]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=150,
temperature=0.7,
top_p=0.9,
do_sample=True,
)
response_text = tokenizer.decode(
output[0][inputs["input_ids"].shape[-1]:],
skip_special_tokens=True
).strip()
# Generate audio using TTS
audio_name = send_audio(request, response_text)
return JSONResponse(
{"response": response_text,
"audio": audio_name,}
)