Avatar / Back-end /services /llm_service.py
DataSage12's picture
Initial commit - HOLOKIA-AVATAR v2.2
de63014
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import logging
import uvicorn
from langdetect import detect, DetectorFactory, LangDetectException
from langchain_groq import ChatGroq
from dotenv import load_dotenv
import os
from fastapi.middleware.cors import CORSMiddleware
from typing import List, Dict
import re
load_dotenv()
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger("LLM_Service")
app = FastAPI()
origins = ["http://localhost:5173", "http://127.0.0.1:5173"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
logger.error("GROQ_API_KEY manquante dans l'environnement")
raise RuntimeError("GROQ_API_KEY non configurée")
MODEL_NAME = "meta-llama/llama-4-scout-17b-16e-instruct"
logger.info(f"Chargement du modèle LLM Groq : {MODEL_NAME}")
llm = ChatGroq(model=MODEL_NAME, temperature=0)
DetectorFactory.seed = 0
EN_WORDS = {"hi", "hello", "hey", "ok", "thanks", "bye", "yes", "no"}
AR_WORDS = {"salam", "salaam", "marhaban", "مرحبا", "سلام", "شكرا", "أهلا"}
FR_WORDS = {"bonjour", "salut", "merci", "oui", "non", "s'il vous plaît", "svp"}
user_histories: Dict[str, List[Dict[str, str]]] = {}
def normalize_text(text: str) -> str:
text = re.sub(r'[^\w\s]', '', text.lower().strip())
text = re.sub(r'\s+', ' ', text)
return text
def detect_language(user_text: str, user_id: str, default_lang: str = "fr") -> str:
if not user_text:
return default_lang
text_normalized = normalize_text(user_text)
if text_normalized in EN_WORDS:
return "en"
if text_normalized in AR_WORDS:
return "ar"
if text_normalized in FR_WORDS:
return "fr"
history = user_histories.get(user_id, [])
if history and len(user_text) < 10:
last_msgs = [msg["content"] for msg in history[-2:] if msg["role"] == "user"]
for msg in last_msgs:
try:
lang = detect(msg)
if lang in {"fr", "en", "ar"}:
return lang
except LangDetectException:
pass
try:
lang = detect(user_text)
except LangDetectException:
return default_lang
if lang not in {"fr", "en", "ar"}:
return default_lang
return lang
class LLMRequest(BaseModel):
text: str
user_id: str
history: List[Dict[str, str]] = []
@app.post("/generate")
async def generate_response(request: LLMRequest):
user_text = request.text.strip()
user_id = request.user_id
if not user_text:
raise HTTPException(status_code=400, detail="Le texte ne peut pas être vide")
if not user_id:
raise HTTPException(status_code=400, detail="L'identifiant utilisateur est requis")
lang = detect_language(user_text, user_id, default_lang="fr")
logger.info(f"Requête LLM reçue ({len(user_text)} caractères), langue: {lang}, user_id: {user_id}")
if lang == "fr":
system_prompt = (
"Tu es HOLOKIA, un avatar IA conversationnel. "
"Réponds uniquement aux questions posées avec précision, clarté et simplicité. "
"Sois toujours poli et chaleureux dans ta manière de parler. "
"Si la question n’est pas claire, demande gentiment une précision. "
"Ne donne pas d’informations inutiles et reste concentré sur le sujet."
)
elif lang == "ar":
system_prompt = (
"أنت HOLOKIA، شخصية ذكاء اصطناعي محادثة. "
"أجب فقط على الأسئلة المطروحة بدقة ووضوح وبأسلوب بسيط. "
"كن دائمًا مهذبًا ودودًا في طريقة كلامك. "
"إذا لم يكن السؤال واضحًا، اطلب بلطف توضيحًا. "
"لا تضف معلومات غير ضرورية وابقَ مركزًا على الموضوع."
)
else:
system_prompt = (
"You are HOLOKIA, a conversational AI avatar. "
"Answer only the questions asked, with accuracy, clarity, and simplicity. "
"Always remain polite and friendly in your tone. "
"If the question is unclear, kindly ask for clarification. "
"Do not add unnecessary information and stay focused on the topic."
)
history = request.history if request.history else user_histories.get(user_id, [])
history.append({"role": "user", "content": user_text})
messages = [("system", system_prompt)] + [
(msg["role"], msg["content"]) for msg in history[-3:]
]
total_length = sum(len(msg[1]) for msg in messages)
if total_length > 4000:
logger.warning(f"Prompt trop long ({total_length} caractères), truncation")
messages = [("system", system_prompt)] + messages[-2:]
logger.debug(f"Prompt envoyé au LLM: {messages}")
try:
response = llm.invoke(messages)
response_text = response.content.strip()
history.append({"role": "assistant", "content": response_text})
user_histories[user_id] = history[-5:]
logger.info("Réponse LLM générée avec succès")
return {
"response": response_text,
"lang": lang,
"history": user_histories[user_id]
}
except Exception as e:
logger.error(f"Erreur LLM: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail="Échec de génération de réponse")
@app.get("/health")
async def health_check():
return {"status": "ok", "service": "llm"}
@app.options("/generate")
async def options_generate():
return {"message": "OK"}
def run_service():
uvicorn.run(app, host="0.0.0.0", port=5002)
if __name__ == "__main__":
run_service()