Initial commit: Kibali AI with RTX 5090 Blackwell support and CUDA 13.0 Nightly

Files changed (5) hide show

Dockerfile +21 -11
main.py +8 -3
requirements.txt +5 -8
tools/todo.py +24 -156
tools/web.py +7 -8

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-# --- STAGE 1 : Build du Frontend (Vite) ---
 FROM node:18-alpine AS build-frontend
 WORKDIR /app/frontend
 COPY kibali-ui/package*.json ./
@@ -6,31 +6,41 @@ RUN npm install
 COPY kibali-ui/ ./
 RUN npm run build
-# --- STAGE 2 : Backend + Serveur Statique ---
-FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04
 WORKDIR /app
-# Installation de Python
 RUN apt-get update && apt-get install -y \
     python3-pip \
     python3-dev \
     && rm -rf /var/lib/apt/lists/*
-# Installation de PyTorch
-RUN pip3 install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
-# Installation des dépendances
 COPY requirements.txt .
 RUN pip3 install --no-cache-dir -r requirements.txt
-# On récupère le dossier 'dist' de Vite et on le renomme 'static'
-COPY --from=build-frontend /app/frontend/dist ./static
-# Copie du code Python
 COPY . .
 ENV PYTHONUNBUFFERED=1
 EXPOSE 8000
-# Commande corrigée pour Ubuntu (python3)
 CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

+# --- STAGE 1 : Build du Frontend ---
 FROM node:18-alpine AS build-frontend
 WORKDIR /app/frontend
 COPY kibali-ui/package*.json ./
 COPY kibali-ui/ ./
 RUN npm run build
+# --- STAGE 2 : Backend (Base NVIDIA Blackwell Compatible) ---
+# On utilise une base 12.6 qui supporte les drivers de la série 50
+FROM nvidia/cuda:12.6.1-runtime-ubuntu22.04
 WORKDIR /app
+ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get install -y \
     python3-pip \
     python3-dev \
+    libgomp1 \
+    git \
     && rm -rf /var/lib/apt/lists/*
+# INSTALLATION PYTORCH NIGHTLY CUDA 13.0
+# C'est ici qu'on débloque le support sm_120
+RUN pip3 install --no-cache-dir --upgrade pip
+RUN pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu130
+# Installation du reste des dépendances
 COPY requirements.txt .
 RUN pip3 install --no-cache-dir -r requirements.txt
+# On force une version récente de transformers pour le tokenizer Blackwell
+RUN pip3 install --upgrade transformers accelerate bitsandbytes
+COPY --from=build-frontend /app/frontend/dist ./static
 COPY . .
+RUN mkdir -p /app/model_cache
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 ENV PYTHONUNBUFFERED=1
+ENV MODEL_PATH=/app/model_cache
 EXPOSE 8000
 CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

main.py CHANGED Viewed

@@ -55,11 +55,16 @@ app.add_middleware(
 )
 # --- CHARGEMENT DES MODÈLES ---
-MODEL_PATH = "/home/belikan/geoscan/agent_kibali/model_cache"
 logger.info("Chargement du modèle d'embedding...")
-embed_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
 logger.info("Chargement du tokenizer et du modèle LLM...")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token

 )
 # --- CHARGEMENT DES MODÈLES ---
+MODEL_PATH = os.getenv("MODEL_PATH", "./model_cache")
+logger.info(f"Utilisation du chemin modèle : {MODEL_PATH}")
 logger.info("Chargement du modèle d'embedding...")
+# Utilisation du cache_folder pour que SentenceTransformer stocke aussi dans le volume partagé
+embed_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2', cache_folder=MODEL_PATH)
 logger.info("Chargement du tokenizer et du modèle LLM...")
+# Suppression de local_files_only=True pour permettre la compatibilité initiale avec nouvelles architectures GPU
+tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token

requirements.txt CHANGED Viewed

@@ -1,9 +1,7 @@
-# --- Core IA (Versions Stables) ---
-# Note: On ne met pas de version figée pour torch ici,
-# car on l'installe via l'URL spécifique dans le Dockerfile.
 transformers==4.41.2
-bitsandbytes>=0.41.0
-accelerate
 sentence-transformers
 faiss-gpu
@@ -14,9 +12,8 @@ pydantic
 python-multipart
 # --- Outils & Data ---
 pypdf>=3.0.0
 numpy<2.0.0
-folium
 duckduckgo-search
-huggingface_hub==0.23.4
-spaces

+# --- Core IA ---
 transformers==4.41.2
+bitsandbytes>=0.43.0
+accelerate>=0.30.0
 sentence-transformers
 faiss-gpu
 python-multipart
 # --- Outils & Data ---
+tavily-python
 pypdf>=3.0.0
 numpy<2.0.0
 duckduckgo-search
+huggingface_hub

tools/todo.py CHANGED Viewed

@@ -1,7 +1,10 @@
-import streamlit as st
 import time
 from typing import List, Optional
 import re
 def analyze_query_type(prompt: str) -> dict:
     """Analyse le type de requête pour adapter la stratégie de réflexion"""
@@ -17,36 +20,30 @@ def analyze_query_type(prompt: str) -> dict:
         "geographical": False
     }
-    # Détection de questions temporelles
     temporal_keywords = ["aujourd'hui", "maintenant", "récent", "actuel", "dernier", "2024", "2025"]
     if any(kw in prompt_lower for kw in temporal_keywords):
         analysis["temporal"] = True
         analysis["needs_web"] = True
-    # Détection géographique
     geo_keywords = ["gabon", "libreville", "port-gentil", "franceville", "oyem", "où", "localisation"]
     if any(kw in prompt_lower for kw in geo_keywords):
         analysis["geographical"] = True
-    # Détection de questions sur documents
     doc_keywords = ["selon le document", "d'après le pdf", "dans le fichier", "uploadé"]
     if any(kw in prompt_lower for kw in doc_keywords):
         analysis["needs_docs"] = True
         analysis["type"] = "document_query"
-    # Détection de continuation de conversation
     continuation_keywords = ["ils", "elles", "lui", "leur", "donc", "alors", "ensuite", "aussi", "également"]
     if any(kw in prompt_lower for kw in continuation_keywords) or len(prompt.split()) < 5:
         analysis["needs_memory"] = True
         analysis["type"] = "continuation"
-    # Détection de complexité
-    if len(prompt.split()) > 15 or "?" in prompt and prompt.count("?") > 1:
         analysis["complexity"] = "complex"
-    elif "pourquoi" in prompt_lower or "comment" in prompt_lower or "expliquer" in prompt_lower:
         analysis["complexity"] = "medium"
-    # Questions nécessitant le web
     web_keywords = ["actualité", "news", "prix", "cours", "météo", "horaire"]
     if any(kw in prompt_lower for kw in web_keywords):
         analysis["needs_web"] = True
@@ -57,71 +54,44 @@ def analyze_query_type(prompt: str) -> dict:
 def detect_subject_shift(prompt: str, current_subject: str, subject_keywords: List[str]) -> dict:
     """Détecte un changement de sujet et évalue la force du changement"""
     if not current_subject or not subject_keywords:
-        return {
-            "shift_detected": False,
-            "shift_strength": 0.0,
-            "new_subject_detected": True,
-            "reason": "Premier message ou pas de sujet actuel"
-        }
     prompt_lower = prompt.lower()
-    # Calcul de l'overlap des mots-clés
     prompt_words = set(re.findall(r'\b\w{4,}\b', prompt_lower))
     keyword_overlap = len(prompt_words.intersection(set(subject_keywords)))
     overlap_ratio = keyword_overlap / max(len(subject_keywords), 1)
-    # Détection de marqueurs de changement de sujet
     shift_markers = ["maintenant", "sinon", "autre chose", "parlons de", "passons à", "nouveau sujet"]
     has_shift_marker = any(marker in prompt_lower for marker in shift_markers)
-    # Calcul de la force du changement
     shift_strength = 0.0
-    if overlap_ratio < 0.2:
-        shift_strength += 0.5
-    if has_shift_marker:
-        shift_strength += 0.3
-    if len(prompt_words) > 5 and keyword_overlap == 0:
-        shift_strength += 0.2
-    shift_detected = shift_strength > 0.4
     return {
-        "shift_detected": shift_detected,
         "shift_strength": shift_strength,
         "new_subject_detected": shift_strength > 0.6,
-        "keyword_overlap": keyword_overlap,
-        "overlap_ratio": overlap_ratio,
-        "reason": f"Overlap: {overlap_ratio:.1%}, Marqueurs: {has_shift_marker}"
     }
 def generate_search_strategy(analysis: dict, subject_keywords: List[str], geo_info: dict) -> dict:
     """Génère une stratégie de recherche optimisée"""
     strategy = {
         "use_rag": analysis["needs_docs"],
-        "use_memory": analysis["needs_memory"] or analysis["type"] == "continuation",
-        "use_web": analysis["needs_web"] or analysis["temporal"],
-        "memory_k": 5,
-        "rag_k": 3,
-        "web_enhanced": False,
-        "search_query_suffix": ""
     }
-    # Ajustement selon la complexité
     if analysis["complexity"] == "complex":
-        strategy["memory_k"] = 8
-        strategy["rag_k"] = 5
-    elif analysis["complexity"] == "simple":
-        strategy["memory_k"] = 3
-        strategy["rag_k"] = 2
-    # Enrichissement de la recherche web
     if analysis["needs_web"]:
         strategy["web_enhanced"] = True
-        if subject_keywords:
-            strategy["search_query_suffix"] = f" {' '.join(subject_keywords[:3])}"
-        if analysis["geographical"]:
-            strategy["search_query_suffix"] += f" {geo_info.get('city', 'Gabon')}"
     return strategy
@@ -132,118 +102,16 @@ def execute_reflection_plan(
     current_subject: Optional[str] = None,
     subject_keywords: Optional[List[str]] = None
 ):
-    """
-    Phase de réflexion structurée avec analyse contextuelle avancée et adaptation dynamique.
-    """
-    if geo_info is None:
-        geo_info = {}
-    if messages is None:
-        messages = []
-    if subject_keywords is None:
-        subject_keywords = []
-    # ÉTAPE 1: Analyse du type de requête
-    query_analysis = analyze_query_type(prompt)
-    # ÉTAPE 2: Détection de changement de sujet
     subject_shift = detect_subject_shift(prompt, current_subject, subject_keywords)
-    # ÉTAPE 3: Génération de la stratégie de recherche
     search_strategy = generate_search_strategy(query_analysis, subject_keywords, geo_info)
-    # ÉTAPE 4: Affichage de la réflexion (si Streamlit disponible)
-    try:
-        location = f"{geo_info.get('city', 'Libreville')}, {geo_info.get('country', 'Gabon')}"
-        with st.status("🧠 Kibali Thinking Engine", expanded=True) as status:
-            st.write(f"🌍 **Localisation active :** {location}")
-            st.write("")
-            # Analyse du type de requête
-            st.write("### 📊 Analyse de la requête")
-            st.write(f"- **Type :** {query_analysis['type'].replace('_', ' ').title()}")
-            st.write(f"- **Complexité :** {query_analysis['complexity'].title()}")
-            if query_analysis['temporal']:
-                st.write("- ⏰ **Dimension temporelle détectée** → Recherche web activée")
-            if query_analysis['geographical']:
-                st.write(f"- 🗺️ **Contexte géographique :** {location}")
-            time.sleep(0.2)
-            st.write("")
-            # Détection de changement de sujet
-            st.write("### 🔄 Analyse du contexte conversationnel")
-            if subject_shift['shift_detected']:
-                if subject_shift['new_subject_detected']:
-                    st.write("- 🆕 **Nouveau sujet détecté** → Rafraîchissement du contexte")
-                else:
-                    st.write(f"- ⚠️ **Changement partiel** (force: {subject_shift['shift_strength']:.0%})")
-                st.write(f"  *Raison : {subject_shift['reason']}*")
-            else:
-                st.write("- ✅ **Continuité du sujet actuel**")
-                if subject_keywords:
-                    st.write(f"  *Mots-clés actifs : {', '.join(subject_keywords[:5])}*")
-                st.write(f"  *Overlap : {subject_shift['keyword_overlap']}/{len(subject_keywords)} mots-clés*")
-            time.sleep(0.2)
-            st.write("")
-            # Stratégie de recherche
-            st.write("### 🎯 Stratégie de réponse")
-            sources = []
-            if search_strategy['use_rag']:
-                sources.append(f"📚 Documents PDF (top {search_strategy['rag_k']})")
-            if search_strategy['use_memory']:
-                sources.append(f"🧠 Mémoire conversationnelle (top {search_strategy['memory_k']})")
-            if search_strategy['use_web']:
-                web_label = "🌐 Recherche web"
-                if search_strategy['web_enhanced']:
-                    web_label += " (enrichie avec contexte)"
-                sources.append(web_label)
-            if not sources:
-                sources.append("💭 Connaissance générale du modèle")
-            for i, source in enumerate(sources, 1):
-                st.write(f"{i}. {source}")
-                time.sleep(0.15)
-            st.write("")
-            # Plan d'action détaillé
-            st.write("### ⚙️ Plan d'exécution")
-            steps = []
-            if search_strategy['use_rag']:
-                steps.append("Extraction des chunks pertinents depuis la base vectorielle PDF")
-            if search_strategy['use_memory']:
-                steps.append("Récupération des échanges similaires avec scoring de pertinence")
-            if search_strategy['use_web']:
-                query_suffix = search_strategy['search_query_suffix']
-                steps.append(f"Requête web : '{prompt[:50]}...{query_suffix}'")
-            steps.append("Synthèse des sources avec priorisation hiérarchique")
-            steps.append("Génération de la réponse avec verrouillage contextuel")
-            for i, step in enumerate(steps, 1):
-                st.write(f"{i}. {step}")
-                time.sleep(0.15)
-            time.sleep(0.3)
-            status.update(
-                label="✅ Stratégie validée - Génération en cours",
-                state="complete",
-                expanded=False
-            )
-    except Exception as e:
-        # Fallback si Streamlit n'est pas disponible
-        print(f"[Kibali Thinking] Type: {query_analysis['type']}, Complexité: {query_analysis['complexity']}")
-        print(f"[Kibali Thinking] Changement de sujet: {subject_shift['shift_detected']} (force: {subject_shift['shift_strength']:.0%})")
-        print(f"[Kibali Thinking] Sources: RAG={search_strategy['use_rag']}, Memory={search_strategy['use_memory']}, Web={search_strategy['use_web']}")
     return {
         "analysis": query_analysis,

 import time
 from typing import List, Optional
 import re
+import os
+from dotenv import load_dotenv
+load_dotenv()
 def analyze_query_type(prompt: str) -> dict:
     """Analyse le type de requête pour adapter la stratégie de réflexion"""
         "geographical": False
     }
     temporal_keywords = ["aujourd'hui", "maintenant", "récent", "actuel", "dernier", "2024", "2025"]
     if any(kw in prompt_lower for kw in temporal_keywords):
         analysis["temporal"] = True
         analysis["needs_web"] = True
     geo_keywords = ["gabon", "libreville", "port-gentil", "franceville", "oyem", "où", "localisation"]
     if any(kw in prompt_lower for kw in geo_keywords):
         analysis["geographical"] = True
     doc_keywords = ["selon le document", "d'après le pdf", "dans le fichier", "uploadé"]
     if any(kw in prompt_lower for kw in doc_keywords):
         analysis["needs_docs"] = True
         analysis["type"] = "document_query"
     continuation_keywords = ["ils", "elles", "lui", "leur", "donc", "alors", "ensuite", "aussi", "également"]
     if any(kw in prompt_lower for kw in continuation_keywords) or len(prompt.split()) < 5:
         analysis["needs_memory"] = True
         analysis["type"] = "continuation"
+    if len(prompt.split()) > 15 or (prompt.count("?") > 1):
         analysis["complexity"] = "complex"
+    elif any(kw in prompt_lower for kw in ["pourquoi", "comment", "expliquer"]):
         analysis["complexity"] = "medium"
     web_keywords = ["actualité", "news", "prix", "cours", "météo", "horaire"]
     if any(kw in prompt_lower for kw in web_keywords):
         analysis["needs_web"] = True
 def detect_subject_shift(prompt: str, current_subject: str, subject_keywords: List[str]) -> dict:
     """Détecte un changement de sujet et évalue la force du changement"""
     if not current_subject or not subject_keywords:
+        return {"shift_detected": False, "shift_strength": 0.0, "new_subject_detected": True, "reason": "Init"}
     prompt_lower = prompt.lower()
     prompt_words = set(re.findall(r'\b\w{4,}\b', prompt_lower))
     keyword_overlap = len(prompt_words.intersection(set(subject_keywords)))
     overlap_ratio = keyword_overlap / max(len(subject_keywords), 1)
     shift_markers = ["maintenant", "sinon", "autre chose", "parlons de", "passons à", "nouveau sujet"]
     has_shift_marker = any(marker in prompt_lower for marker in shift_markers)
     shift_strength = 0.0
+    if overlap_ratio < 0.2: shift_strength += 0.5
+    if has_shift_marker: shift_strength += 0.3
     return {
+        "shift_detected": shift_strength > 0.4,
         "shift_strength": shift_strength,
         "new_subject_detected": shift_strength > 0.6,
+        "reason": f"Overlap: {overlap_ratio:.1%}"
     }
 def generate_search_strategy(analysis: dict, subject_keywords: List[str], geo_info: dict) -> dict:
     """Génère une stratégie de recherche optimisée"""
     strategy = {
         "use_rag": analysis["needs_docs"],
+        "use_memory": analysis["needs_memory"],
+        "use_web": analysis["needs_web"],
+        "memory_k": 5, "rag_k": 3,
+        "web_enhanced": False, "search_query_suffix": ""
     }
     if analysis["complexity"] == "complex":
+        strategy.update({"memory_k": 8, "rag_k": 5})
     if analysis["needs_web"]:
         strategy["web_enhanced"] = True
+        suffix = " ".join(subject_keywords[:3]) if subject_keywords else ""
+        strategy["search_query_suffix"] = f"{suffix} {geo_info.get('city', 'Gabon')}"
     return strategy
     current_subject: Optional[str] = None,
     subject_keywords: Optional[List[str]] = None
 ):
+    """Phase de réflexion structurée compatible FastAPI (sans Streamlit)"""
+    geo_info = geo_info or {}
+    subject_keywords = subject_keywords or []
+    query_analysis = analyze_query_type(prompt)
     subject_shift = detect_subject_shift(prompt, current_subject, subject_keywords)
     search_strategy = generate_search_strategy(query_analysis, subject_keywords, geo_info)
+    # Logs internes (visibles dans Docker)
+    print(f"🧠 [REFLECTION] Type: {query_analysis['type']} | Web: {search_strategy['use_web']}")
     return {
         "analysis": query_analysis,

tools/web.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from tavily import TavilyClient
 from duckduckgo_search import DDGS
-import streamlit as st
 import os
 from dotenv import load_dotenv
@@ -12,9 +11,10 @@ def web_search(query: str):
     results = []
     images = []
-    # 1. TENTATIVE AVEC TAVILY (Plus robuste pour les agents)
     if TAVILY_API_KEY:
         try:
             tavily = TavilyClient(api_key=TAVILY_API_KEY)
             res = tavily.search(
                 query=query,
@@ -28,12 +28,12 @@ def web_search(query: str):
             if len(results) >= 2:
                 return {"results": results, "images": images, "query": query, "source": "Tavily"}
         except Exception as e:
-            print(f"Tavily Error: {e}")
-    # 2. FALLBACK AVEC DUCKDUCKGO (Avec gestion d'erreur propre)
     try:
         with DDGS() as ddgs:
-            # Texte - Utilisation d'un timeout implicite par le context manager
             ddg_gen = ddgs.text(query, max_results=5)
             if ddg_gen:
                 for r in ddg_gen:
@@ -43,16 +43,15 @@ def web_search(query: str):
                         "url": r.get('href')
                     })
-            # Images - Séparé pour éviter de tout bloquer en cas de 403
             try:
                 ddg_img_gen = ddgs.images(query, max_results=3)
                 if ddg_img_gen:
                     images = [img.get('image') for img in ddg_img_gen if img.get('image')]
             except Exception:
-                pass # Les images sont facultatives
     except Exception as e:
-        print(f"DuckDuckGo Error: {e}")
     return {
         "results": results,

 from tavily import TavilyClient
 from duckduckgo_search import DDGS
 import os
 from dotenv import load_dotenv
     results = []
     images = []
+    # 1. TENTATIVE AVEC TAVILY
     if TAVILY_API_KEY:
         try:
+            print(f"🔍 Recherche Tavily pour : {query}")
             tavily = TavilyClient(api_key=TAVILY_API_KEY)
             res = tavily.search(
                 query=query,
             if len(results) >= 2:
                 return {"results": results, "images": images, "query": query, "source": "Tavily"}
         except Exception as e:
+            print(f"⚠️ Tavily Error: {e}")
+    # 2. FALLBACK AVEC DUCKDUCKGO
+    print(f"🦆 Fallback DuckDuckGo pour : {query}")
     try:
         with DDGS() as ddgs:
             ddg_gen = ddgs.text(query, max_results=5)
             if ddg_gen:
                 for r in ddg_gen:
                         "url": r.get('href')
                     })
             try:
                 ddg_img_gen = ddgs.images(query, max_results=3)
                 if ddg_img_gen:
                     images = [img.get('image') for img in ddg_img_gen if img.get('image')]
             except Exception:
+                pass
     except Exception as e:
+        print(f"❌ DuckDuckGo Error: {e}")
     return {
         "results": results,