from fastapi import FastAPI from pydantic import BaseModel from transformers import T5Tokenizer, T5ForConditionalGeneration import torch import uvicorn app = FastAPI() MODEL_NAME = "obx0x3/empathy-dementia" tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME) model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME) class PromptRequest(BaseModel): message: str lang: str = None def detect_language(text: str): """Simple French/English detection based on keywords.""" fr_keywords = ["je", "tu", "c’est", "j’ai", "où", "suis", "pas", "peux"] return "fr" if any(word in text.lower() for word in fr_keywords) else "en" def prefix_message(message: str, lang: str) -> str: """Add prefix to help model route context correctly.""" if lang == "fr": return f"émotion: {message}" elif any(q in message.lower() for q in ["why", "how", "what", "when", "where", "?"]): return f"chat: {message}" elif any(e in message.lower() for e in ["feel", "i’m", "i am", "sad", "scared", "lonely", "happy", "forgot"]): return f"emotion: {message}" else: return f"chat: {message}" @app.post("/generate") async def generate_response(payload: PromptRequest): lang = payload.lang or detect_language(payload.message) input_text = prefix_message(payload.message, lang) inputs = tokenizer.encode(input_text, return_tensors="pt") with torch.no_grad(): outputs = model.generate(inputs, max_length=128, num_beams=4, early_stopping=True) result = tokenizer.decode(outputs[0], skip_special_tokens=True) return {"reply": result.strip(), "language": lang} if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)