Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| import logging | |
| import uvicorn | |
| from langdetect import detect, DetectorFactory, LangDetectException | |
| from langchain_groq import ChatGroq | |
| from dotenv import load_dotenv | |
| import os | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from typing import List, Dict | |
| import re | |
| load_dotenv() | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") | |
| logger = logging.getLogger("LLM_Service") | |
| app = FastAPI() | |
| origins = ["http://localhost:5173", "http://127.0.0.1:5173"] | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=origins, | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| api_key = os.getenv("GROQ_API_KEY") | |
| if not api_key: | |
| logger.error("GROQ_API_KEY manquante dans l'environnement") | |
| raise RuntimeError("GROQ_API_KEY non configurée") | |
| MODEL_NAME = "meta-llama/llama-4-scout-17b-16e-instruct" | |
| logger.info(f"Chargement du modèle LLM Groq : {MODEL_NAME}") | |
| llm = ChatGroq(model=MODEL_NAME, temperature=0) | |
| DetectorFactory.seed = 0 | |
| EN_WORDS = {"hi", "hello", "hey", "ok", "thanks", "bye", "yes", "no"} | |
| AR_WORDS = {"salam", "salaam", "marhaban", "مرحبا", "سلام", "شكرا", "أهلا"} | |
| FR_WORDS = {"bonjour", "salut", "merci", "oui", "non", "s'il vous plaît", "svp"} | |
| user_histories: Dict[str, List[Dict[str, str]]] = {} | |
| def normalize_text(text: str) -> str: | |
| text = re.sub(r'[^\w\s]', '', text.lower().strip()) | |
| text = re.sub(r'\s+', ' ', text) | |
| return text | |
| def detect_language(user_text: str, user_id: str, default_lang: str = "fr") -> str: | |
| if not user_text: | |
| return default_lang | |
| text_normalized = normalize_text(user_text) | |
| if text_normalized in EN_WORDS: | |
| return "en" | |
| if text_normalized in AR_WORDS: | |
| return "ar" | |
| if text_normalized in FR_WORDS: | |
| return "fr" | |
| history = user_histories.get(user_id, []) | |
| if history and len(user_text) < 10: | |
| last_msgs = [msg["content"] for msg in history[-2:] if msg["role"] == "user"] | |
| for msg in last_msgs: | |
| try: | |
| lang = detect(msg) | |
| if lang in {"fr", "en", "ar"}: | |
| return lang | |
| except LangDetectException: | |
| pass | |
| try: | |
| lang = detect(user_text) | |
| except LangDetectException: | |
| return default_lang | |
| if lang not in {"fr", "en", "ar"}: | |
| return default_lang | |
| return lang | |
| class LLMRequest(BaseModel): | |
| text: str | |
| user_id: str | |
| history: List[Dict[str, str]] = [] | |
| async def generate_response(request: LLMRequest): | |
| user_text = request.text.strip() | |
| user_id = request.user_id | |
| if not user_text: | |
| raise HTTPException(status_code=400, detail="Le texte ne peut pas être vide") | |
| if not user_id: | |
| raise HTTPException(status_code=400, detail="L'identifiant utilisateur est requis") | |
| lang = detect_language(user_text, user_id, default_lang="fr") | |
| logger.info(f"Requête LLM reçue ({len(user_text)} caractères), langue: {lang}, user_id: {user_id}") | |
| if lang == "fr": | |
| system_prompt = ( | |
| "Tu es HOLOKIA, un avatar IA conversationnel. " | |
| "Réponds uniquement aux questions posées avec précision, clarté et simplicité. " | |
| "Sois toujours poli et chaleureux dans ta manière de parler. " | |
| "Si la question n’est pas claire, demande gentiment une précision. " | |
| "Ne donne pas d’informations inutiles et reste concentré sur le sujet." | |
| ) | |
| elif lang == "ar": | |
| system_prompt = ( | |
| "أنت HOLOKIA، شخصية ذكاء اصطناعي محادثة. " | |
| "أجب فقط على الأسئلة المطروحة بدقة ووضوح وبأسلوب بسيط. " | |
| "كن دائمًا مهذبًا ودودًا في طريقة كلامك. " | |
| "إذا لم يكن السؤال واضحًا، اطلب بلطف توضيحًا. " | |
| "لا تضف معلومات غير ضرورية وابقَ مركزًا على الموضوع." | |
| ) | |
| else: | |
| system_prompt = ( | |
| "You are HOLOKIA, a conversational AI avatar. " | |
| "Answer only the questions asked, with accuracy, clarity, and simplicity. " | |
| "Always remain polite and friendly in your tone. " | |
| "If the question is unclear, kindly ask for clarification. " | |
| "Do not add unnecessary information and stay focused on the topic." | |
| ) | |
| history = request.history if request.history else user_histories.get(user_id, []) | |
| history.append({"role": "user", "content": user_text}) | |
| messages = [("system", system_prompt)] + [ | |
| (msg["role"], msg["content"]) for msg in history[-3:] | |
| ] | |
| total_length = sum(len(msg[1]) for msg in messages) | |
| if total_length > 4000: | |
| logger.warning(f"Prompt trop long ({total_length} caractères), truncation") | |
| messages = [("system", system_prompt)] + messages[-2:] | |
| logger.debug(f"Prompt envoyé au LLM: {messages}") | |
| try: | |
| response = llm.invoke(messages) | |
| response_text = response.content.strip() | |
| history.append({"role": "assistant", "content": response_text}) | |
| user_histories[user_id] = history[-5:] | |
| logger.info("Réponse LLM générée avec succès") | |
| return { | |
| "response": response_text, | |
| "lang": lang, | |
| "history": user_histories[user_id] | |
| } | |
| except Exception as e: | |
| logger.error(f"Erreur LLM: {str(e)}", exc_info=True) | |
| raise HTTPException(status_code=500, detail="Échec de génération de réponse") | |
| async def health_check(): | |
| return {"status": "ok", "service": "llm"} | |
| async def options_generate(): | |
| return {"message": "OK"} | |
| def run_service(): | |
| uvicorn.run(app, host="0.0.0.0", port=5002) | |
| if __name__ == "__main__": | |
| run_service() |