Spaces:

HOLOKIATEAM
/

Avatar

Sleeping

App Files Files Community

Avatar / Back-end /services /llm_service.py

DataSage12

Initial commit - HOLOKIA-AVATAR v2.2

de63014 7 months ago

raw

history blame contribute delete

5.96 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	import logging
	import uvicorn
	from langdetect import detect, DetectorFactory, LangDetectException
	from langchain_groq import ChatGroq
	from dotenv import load_dotenv
	import os
	from fastapi.middleware.cors import CORSMiddleware
	from typing import List, Dict
	import re

	load_dotenv()

	logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
	logger = logging.getLogger("LLM_Service")

	app = FastAPI()

	origins = ["http://localhost:5173", "http://127.0.0.1:5173"]
	app.add_middleware(
	CORSMiddleware,
	allow_origins=origins,
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	api_key = os.getenv("GROQ_API_KEY")
	if not api_key:
	logger.error("GROQ_API_KEY manquante dans l'environnement")
	raise RuntimeError("GROQ_API_KEY non configurée")

	MODEL_NAME = "meta-llama/llama-4-scout-17b-16e-instruct"
	logger.info(f"Chargement du modèle LLM Groq : {MODEL_NAME}")
	llm = ChatGroq(model=MODEL_NAME, temperature=0)

	DetectorFactory.seed = 0

	EN_WORDS = {"hi", "hello", "hey", "ok", "thanks", "bye", "yes", "no"}
	AR_WORDS = {"salam", "salaam", "marhaban", "مرحبا", "سلام", "شكرا", "أهلا"}
	FR_WORDS = {"bonjour", "salut", "merci", "oui", "non", "s'il vous plaît", "svp"}

	user_histories: Dict[str, List[Dict[str, str]]] = {}

	def normalize_text(text: str) -> str:
	text = re.sub(r'[^\w\s]', '', text.lower().strip())
	text = re.sub(r'\s+', ' ', text)
	return text

	def detect_language(user_text: str, user_id: str, default_lang: str = "fr") -> str:
	if not user_text:
	return default_lang

	text_normalized = normalize_text(user_text)

	if text_normalized in EN_WORDS:
	return "en"
	if text_normalized in AR_WORDS:
	return "ar"
	if text_normalized in FR_WORDS:
	return "fr"

	history = user_histories.get(user_id, [])
	if history and len(user_text) < 10:
	last_msgs = [msg["content"] for msg in history[-2:] if msg["role"] == "user"]
	for msg in last_msgs:
	try:
	lang = detect(msg)
	if lang in {"fr", "en", "ar"}:
	return lang
	except LangDetectException:
	pass

	try:
	lang = detect(user_text)
	except LangDetectException:
	return default_lang

	if lang not in {"fr", "en", "ar"}:
	return default_lang

	return lang

	class LLMRequest(BaseModel):
	text: str
	user_id: str
	history: List[Dict[str, str]] = []

	@app.post("/generate")
	async def generate_response(request: LLMRequest):
	user_text = request.text.strip()
	user_id = request.user_id
	if not user_text:
	raise HTTPException(status_code=400, detail="Le texte ne peut pas être vide")
	if not user_id:
	raise HTTPException(status_code=400, detail="L'identifiant utilisateur est requis")

	lang = detect_language(user_text, user_id, default_lang="fr")
	logger.info(f"Requête LLM reçue ({len(user_text)} caractères), langue: {lang}, user_id: {user_id}")

	if lang == "fr":
	system_prompt = (
	"Tu es HOLOKIA, un avatar IA conversationnel. "
	"Réponds uniquement aux questions posées avec précision, clarté et simplicité. "
	"Sois toujours poli et chaleureux dans ta manière de parler. "
	"Si la question n’est pas claire, demande gentiment une précision. "
	"Ne donne pas d’informations inutiles et reste concentré sur le sujet."
	)
	elif lang == "ar":
	system_prompt = (
	"أنت HOLOKIA، شخصية ذكاء اصطناعي محادثة. "
	"أجب فقط على الأسئلة المطروحة بدقة ووضوح وبأسلوب بسيط. "
	"كن دائمًا مهذبًا ودودًا في طريقة كلامك. "
	"إذا لم يكن السؤال واضحًا، اطلب بلطف توضيحًا. "
	"لا تضف معلومات غير ضرورية وابقَ مركزًا على الموضوع."
	)
	else:
	system_prompt = (
	"You are HOLOKIA, a conversational AI avatar. "
	"Answer only the questions asked, with accuracy, clarity, and simplicity. "
	"Always remain polite and friendly in your tone. "
	"If the question is unclear, kindly ask for clarification. "
	"Do not add unnecessary information and stay focused on the topic."
	)

	history = request.history if request.history else user_histories.get(user_id, [])
	history.append({"role": "user", "content": user_text})

	messages = [("system", system_prompt)] + [
	(msg["role"], msg["content"]) for msg in history[-3:]
	]

	total_length = sum(len(msg[1]) for msg in messages)
	if total_length > 4000:
	logger.warning(f"Prompt trop long ({total_length} caractères), truncation")
	messages = [("system", system_prompt)] + messages[-2:]

	logger.debug(f"Prompt envoyé au LLM: {messages}")

	try:
	response = llm.invoke(messages)
	response_text = response.content.strip()
	history.append({"role": "assistant", "content": response_text})
	user_histories[user_id] = history[-5:]
	logger.info("Réponse LLM générée avec succès")
	return {
	"response": response_text,
	"lang": lang,
	"history": user_histories[user_id]
	}
	except Exception as e:
	logger.error(f"Erreur LLM: {str(e)}", exc_info=True)
	raise HTTPException(status_code=500, detail="Échec de génération de réponse")

	@app.get("/health")
	async def health_check():
	return {"status": "ok", "service": "llm"}

	@app.options("/generate")
	async def options_generate():
	return {"message": "OK"}

	def run_service():
	uvicorn.run(app, host="0.0.0.0", port=5002)

	if __name__ == "__main__":
	run_service()