lojol469-cmd
commited on
Commit
·
dec8c8b
0
Parent(s):
Déploiement final Kibali : Code + Modèle complet
Browse files- .gitattributes +1 -0
- .gitignore +3 -0
- README.md +18 -0
- agent.py +72 -0
- app.py +190 -0
- kibali_logo.svg +48 -0
- llm.py +23 -0
- memory_faiss.py +28 -0
- requirements.txt +15 -0
- tools/__init__.py +0 -0
- tools/calculator.py +0 -0
- tools/geo.py +34 -0
- tools/local_docs.py +23 -0
- tools/todo.py +41 -0
- tools/web.py +69 -0
.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
model_cache/** filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
.env
|
| 3 |
+
*.pyc
|
README.md
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Kibali IA Pro
|
| 3 |
+
emoji: 🦁
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: black
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: 1.31.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Kibali IA Pro - Agentic RAG
|
| 13 |
+
Assistant intelligent optimisé pour le contexte gabonais, utilisant une architecture RAG avec mémoire FAISS et recherche hybride.
|
| 14 |
+
|
| 15 |
+
## Configuration locale
|
| 16 |
+
1. Clonez le repository.
|
| 17 |
+
2. Créez un fichier `.env` avec votre `TAVILY_API_KEY`.
|
| 18 |
+
3. Installez les dépendances : `pip install -r requirements.txt`.
|
agent.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.agents import create_react_agent, AgentExecutor
|
| 2 |
+
from langchain.memory import ConversationBufferMemory
|
| 3 |
+
from langchain_community.vectorstores import FAISS
|
| 4 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 5 |
+
from langchain.prompts import PromptTemplate
|
| 6 |
+
from langchain_openai import ChatOpenAI
|
| 7 |
+
from tools.web import web_search_tool
|
| 8 |
+
from tools.local_docs import local_knowledge_tool
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
def create_agent():
|
| 12 |
+
llm = ChatOpenAI(
|
| 13 |
+
model="gpt-4o-mini",
|
| 14 |
+
temperature=0.2
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
tools = [
|
| 18 |
+
local_knowledge_tool,
|
| 19 |
+
web_search_tool,
|
| 20 |
+
]
|
| 21 |
+
|
| 22 |
+
prompt = PromptTemplate.from_template(
|
| 23 |
+
"""Tu es Kibali, un agent IA avancé.
|
| 24 |
+
|
| 25 |
+
CAPACITÉS :
|
| 26 |
+
- Base de connaissances locale
|
| 27 |
+
- Recherche web Tavily
|
| 28 |
+
|
| 29 |
+
RÈGLES :
|
| 30 |
+
1. Base locale TOUJOURS en premier
|
| 31 |
+
2. Web seulement si nécessaire
|
| 32 |
+
3. Cite les sources
|
| 33 |
+
4. Mentionne les contradictions
|
| 34 |
+
|
| 35 |
+
FORMAT ReAct STRICT :
|
| 36 |
+
|
| 37 |
+
Question: {input}
|
| 38 |
+
Thought: raisonnement
|
| 39 |
+
Action: outil
|
| 40 |
+
Action Input: entrée
|
| 41 |
+
Observation: résultat
|
| 42 |
+
...
|
| 43 |
+
Final Answer: réponse claire
|
| 44 |
+
|
| 45 |
+
Commence !
|
| 46 |
+
|
| 47 |
+
Question: {input}
|
| 48 |
+
Thought: {agent_scratchpad}
|
| 49 |
+
"""
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
agent = create_react_agent(
|
| 53 |
+
llm=llm,
|
| 54 |
+
tools=tools,
|
| 55 |
+
prompt=prompt
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
memory = ConversationBufferMemory(
|
| 59 |
+
memory_key="chat_history",
|
| 60 |
+
return_messages=True
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
executor = AgentExecutor(
|
| 64 |
+
agent=agent,
|
| 65 |
+
tools=tools,
|
| 66 |
+
memory=memory,
|
| 67 |
+
verbose=True,
|
| 68 |
+
max_iterations=5,
|
| 69 |
+
handle_parsing_errors=True
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
return executor
|
app.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import folium
|
| 4 |
+
from streamlit_folium import st_folium
|
| 5 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
|
| 6 |
+
from sentence_transformers import SentenceTransformer
|
| 7 |
+
import torch
|
| 8 |
+
import faiss
|
| 9 |
+
import numpy as np
|
| 10 |
+
from threading import Thread
|
| 11 |
+
import os
|
| 12 |
+
import base64
|
| 13 |
+
from streamlit_js_eval import streamlit_js_eval
|
| 14 |
+
|
| 15 |
+
# Imports outils personnalisés
|
| 16 |
+
from tools.web import web_search
|
| 17 |
+
from tools.todo import execute_reflection_plan
|
| 18 |
+
from tools.geo import get_geo_context
|
| 19 |
+
|
| 20 |
+
# --- CONFIGURATION ---
|
| 21 |
+
st.set_page_config(
|
| 22 |
+
page_title="Kibali AI Pro - Agentic RAG",
|
| 23 |
+
page_icon="🦁",
|
| 24 |
+
layout="wide",
|
| 25 |
+
initial_sidebar_state="expanded"
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
LOGO_PATH = "/home/belikan/geoscan/agent_kibali/kibali_logo.svg"
|
| 29 |
+
MODEL_PATH = "/home/belikan/geoscan/agent_kibali/model_cache"
|
| 30 |
+
|
| 31 |
+
def get_base64_svg(path):
|
| 32 |
+
try:
|
| 33 |
+
with open(path, "rb") as f:
|
| 34 |
+
return base64.b64encode(f.read()).decode()
|
| 35 |
+
except Exception: return None
|
| 36 |
+
|
| 37 |
+
def apply_custom_css(filters):
|
| 38 |
+
logo_base64 = get_base64_svg(LOGO_PATH)
|
| 39 |
+
logo_html = f'<div style="display: flex; justify-content: center; margin-top: -75px; margin-bottom: 20px;"><img src="data:image/svg+xml;base64,{logo_base64}" width="120"></div>' if logo_base64 else ""
|
| 40 |
+
st.markdown(f"""
|
| 41 |
+
<style>
|
| 42 |
+
.stChatMessage {{ border-radius: 15px; margin-bottom: 10px; border: 1px solid #303030; }}
|
| 43 |
+
.main {{ background-color: #0e1117; }}
|
| 44 |
+
.leaflet-tile-container {{ filter: brightness({filters['brightness']}%) contrast({filters['contrast']}%) saturate({filters['saturate']}%) invert({filters['invert']}%); }}
|
| 45 |
+
</style>
|
| 46 |
+
{logo_html}
|
| 47 |
+
""", unsafe_allow_html=True)
|
| 48 |
+
|
| 49 |
+
@st.cache_resource
|
| 50 |
+
def init_resources():
|
| 51 |
+
embed_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
|
| 52 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
|
| 53 |
+
if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
|
| 54 |
+
|
| 55 |
+
bnb_config = BitsAndBytesConfig(
|
| 56 |
+
load_in_4bit=True, bnb_4bit_use_double_quant=True,
|
| 57 |
+
bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16
|
| 58 |
+
)
|
| 59 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 60 |
+
MODEL_PATH, quantization_config=bnb_config, device_map="auto",
|
| 61 |
+
torch_dtype=torch.float16, trust_remote_code=True
|
| 62 |
+
)
|
| 63 |
+
return embed_model, model, tokenizer
|
| 64 |
+
|
| 65 |
+
embed_model, model, tokenizer = init_resources()
|
| 66 |
+
|
| 67 |
+
# --- INITIALISATION MÉMOIRE ---
|
| 68 |
+
if "vector_index" not in st.session_state:
|
| 69 |
+
st.session_state.vector_index = faiss.IndexFlatL2(384)
|
| 70 |
+
st.session_state.memory_text = []
|
| 71 |
+
|
| 72 |
+
if "messages" not in st.session_state: st.session_state.messages = []
|
| 73 |
+
if "map_filters" not in st.session_state:
|
| 74 |
+
st.session_state.map_filters = {"brightness": 100, "contrast": 100, "saturate": 100, "invert": 0}
|
| 75 |
+
|
| 76 |
+
# --- GÉOLOCALISATION ---
|
| 77 |
+
if "geo" not in st.session_state: st.session_state.geo = get_geo_context()
|
| 78 |
+
loc = streamlit_js_eval(js_expressions="""new Promise((resolve) => { navigator.geolocation.getCurrentPosition((pos) => { resolve({ coords: { latitude: pos.coords.latitude, longitude: pos.coords.longitude } }); }, (err) => resolve(null)); });""", want_output=True)
|
| 79 |
+
if loc and loc.get('coords'):
|
| 80 |
+
st.session_state.geo.update({"latitude": loc['coords']['latitude'], "longitude": loc['coords']['longitude'], "method": "GPS"})
|
| 81 |
+
|
| 82 |
+
apply_custom_css(st.session_state.map_filters)
|
| 83 |
+
|
| 84 |
+
# --- SIDEBAR ---
|
| 85 |
+
with st.sidebar:
|
| 86 |
+
st.image(LOGO_PATH if os.path.exists(LOGO_PATH) else "https://img.icons8.com/fluent/96/lion.png", width=80)
|
| 87 |
+
st.title("Kibali OS v3")
|
| 88 |
+
with st.expander("📍 Carte & Géo", expanded=True):
|
| 89 |
+
m = folium.Map(location=[st.session_state.geo['latitude'], st.session_state.geo['longitude']], zoom_start=15)
|
| 90 |
+
folium.Marker([st.session_state.geo['latitude'], st.session_state.geo['longitude']], icon=folium.Icon(color="red")).add_to(m)
|
| 91 |
+
st_folium(m, height=250, use_container_width=True)
|
| 92 |
+
|
| 93 |
+
thinking_mode = st.toggle("🧠 Réflexion Agentique", value=True)
|
| 94 |
+
if st.button("🗑️ Reset Chat"):
|
| 95 |
+
st.session_state.messages = []
|
| 96 |
+
st.session_state.memory_text = []
|
| 97 |
+
st.session_state.vector_index = faiss.IndexFlatL2(384)
|
| 98 |
+
st.rerun()
|
| 99 |
+
|
| 100 |
+
# --- AFFICHAGE HISTORIQUE ---
|
| 101 |
+
for msg in st.session_state.messages:
|
| 102 |
+
with st.chat_message(msg["role"], avatar=LOGO_PATH if msg["role"] == "assistant" else "user"):
|
| 103 |
+
st.markdown(msg["content"])
|
| 104 |
+
|
| 105 |
+
# --- LOGIQUE CHAT ---
|
| 106 |
+
if prompt := st.chat_input("Posez votre question..."):
|
| 107 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 108 |
+
with st.chat_message("user", avatar="user"):
|
| 109 |
+
st.markdown(prompt)
|
| 110 |
+
|
| 111 |
+
with st.chat_message("assistant", avatar=LOGO_PATH):
|
| 112 |
+
if thinking_mode:
|
| 113 |
+
# Passage de l'historique pour la réflexion
|
| 114 |
+
execute_reflection_plan(prompt, st.session_state.geo, st.session_state.messages[:-1])
|
| 115 |
+
|
| 116 |
+
with st.status("🔍 Analyse du fil de discussion...", expanded=False) as status:
|
| 117 |
+
# 1. REFORMULATION DE LA REQUÊTE (Pour des images précises)
|
| 118 |
+
# Si la question est courte, on lui injecte le contexte du dernier message
|
| 119 |
+
search_query = prompt
|
| 120 |
+
if len(prompt.split()) < 4 and len(st.session_state.messages) > 1:
|
| 121 |
+
last_context = st.session_state.messages[-2]['content']
|
| 122 |
+
search_query = f"Gabon {prompt} {last_context[:60]}"
|
| 123 |
+
|
| 124 |
+
# 2. CONTEXTE IMMÉDIAT
|
| 125 |
+
recent_history = ""
|
| 126 |
+
if len(st.session_state.messages) > 1:
|
| 127 |
+
for m in st.session_state.messages[-4:-1]:
|
| 128 |
+
recent_history += f"{m['role'].upper()}: {m['content']}\n"
|
| 129 |
+
|
| 130 |
+
# 3. RÉCUPÉRATION MÉMOIRE LONG TERME (FAISS)
|
| 131 |
+
past_ctx = ""
|
| 132 |
+
if st.session_state.vector_index.ntotal > 0:
|
| 133 |
+
query_vec = embed_model.encode([prompt]).astype('float32')
|
| 134 |
+
D, I = st.session_state.vector_index.search(query_vec, k=2)
|
| 135 |
+
past_ctx = "\n".join([st.session_state.memory_text[i] for i in I[0] if i != -1])
|
| 136 |
+
|
| 137 |
+
# 4. RECHERCHE WEB (Utilise la recherche reformulée pour les images)
|
| 138 |
+
search_data = web_search(search_query)
|
| 139 |
+
web_ctx = "\n".join([f"- {r['content'][:300]}" for r in search_data.get("results", [])])
|
| 140 |
+
imgs = search_data.get("images", [])[:3]
|
| 141 |
+
status.update(label="Contexte stabilisé", state="complete")
|
| 142 |
+
|
| 143 |
+
if imgs:
|
| 144 |
+
st.write("🖼️ **Sources visuelles :**")
|
| 145 |
+
im_cols = st.columns(len(imgs))
|
| 146 |
+
for i, url in enumerate(imgs): im_cols[i].image(url, use_container_width=True)
|
| 147 |
+
|
| 148 |
+
# 5. GÉNÉRATION AVEC PROMPT RENFORCÉ
|
| 149 |
+
placeholder = st.empty()
|
| 150 |
+
full_response = ""
|
| 151 |
+
|
| 152 |
+
sys_instr = (
|
| 153 |
+
f"Tu es Kibali, assistant intelligent au Gabon ({st.session_state.geo['city']}). "
|
| 154 |
+
f"Reste strictement dans le sujet de l'HISTORIQUE RÉCENT. "
|
| 155 |
+
f"L'utilisateur parle de sport ou d'actualité, ne donne pas de définitions générales."
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
final_prompt = (
|
| 159 |
+
f"### SYSTEM: {sys_instr}\n"
|
| 160 |
+
f"### HISTORIQUE RÉCENT:\n{recent_history}\n"
|
| 161 |
+
f"### MÉMOIRE FAISS:\n{past_ctx}\n"
|
| 162 |
+
f"### CONTEXTE WEB:\n{web_ctx}\n"
|
| 163 |
+
f"### QUESTION: {prompt}\n"
|
| 164 |
+
f"### RÉPONSE DE KIBALI:"
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
inputs = tokenizer(final_prompt, return_tensors="pt").to(model.device)
|
| 168 |
+
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 169 |
+
|
| 170 |
+
gen_thread = Thread(target=model.generate, kwargs=dict(
|
| 171 |
+
**inputs, streamer=streamer, max_new_tokens=800,
|
| 172 |
+
temperature=0.3, # Température basse pour la précision contextuelle
|
| 173 |
+
do_sample=True, pad_token_id=tokenizer.eos_token_id
|
| 174 |
+
))
|
| 175 |
+
gen_thread.start()
|
| 176 |
+
|
| 177 |
+
for token in streamer:
|
| 178 |
+
if "###" in token: break
|
| 179 |
+
full_response += token
|
| 180 |
+
placeholder.markdown(full_response + "▌")
|
| 181 |
+
placeholder.markdown(full_response)
|
| 182 |
+
|
| 183 |
+
# 6. SAUVEGARDE & INDEXATION
|
| 184 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 185 |
+
|
| 186 |
+
new_mem = f"Q: {prompt} | R: {full_response}"
|
| 187 |
+
st.session_state.memory_text.append(new_mem)
|
| 188 |
+
st.session_state.vector_index.add(embed_model.encode([new_mem]).astype('float32'))
|
| 189 |
+
|
| 190 |
+
torch.cuda.empty_cache()
|
kibali_logo.svg
ADDED
|
|
llm.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 2 |
+
from langchain_community.llms import HuggingFacePipeline
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
def load_llm(model_path):
|
| 6 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
| 7 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 8 |
+
model_path,
|
| 9 |
+
device_map="auto",
|
| 10 |
+
torch_dtype=torch.bfloat16,
|
| 11 |
+
trust_remote_code=True
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
pipe = pipeline(
|
| 15 |
+
"text-generation",
|
| 16 |
+
model=model,
|
| 17 |
+
tokenizer=tokenizer,
|
| 18 |
+
max_new_tokens=1024,
|
| 19 |
+
temperature=0.2,
|
| 20 |
+
repetition_penalty=1.15
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
return HuggingFacePipeline(pipeline=pipe)
|
memory_faiss.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import datetime
|
| 3 |
+
from langchain_community.vectorstores import FAISS
|
| 4 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 5 |
+
from langchain_core.documents import Document
|
| 6 |
+
|
| 7 |
+
FAISS_PATH = "data/faiss_index"
|
| 8 |
+
|
| 9 |
+
def load_faiss():
|
| 10 |
+
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
| 11 |
+
if os.path.exists(FAISS_PATH):
|
| 12 |
+
return FAISS.load_local(
|
| 13 |
+
FAISS_PATH,
|
| 14 |
+
embeddings,
|
| 15 |
+
allow_dangerous_deserialization=True
|
| 16 |
+
)
|
| 17 |
+
return FAISS.from_texts(
|
| 18 |
+
["Initialisation mémoire Kibali"],
|
| 19 |
+
embeddings
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
def save_memory(vectordb, user_msg, assistant_msg):
|
| 23 |
+
ts = datetime.datetime.now().isoformat()
|
| 24 |
+
vectordb.add_documents([
|
| 25 |
+
Document(page_content=f"[User {ts}] {user_msg}"),
|
| 26 |
+
Document(page_content=f"[Kibali {ts}] {assistant_msg}")
|
| 27 |
+
])
|
| 28 |
+
vectordb.save_local(FAISS_PATH)
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
folium
|
| 4 |
+
streamlit-folium
|
| 5 |
+
transformers
|
| 6 |
+
sentence-transformers
|
| 7 |
+
torch
|
| 8 |
+
faiss-cpu
|
| 9 |
+
numpy
|
| 10 |
+
streamlit-js-eval
|
| 11 |
+
tavily-python
|
| 12 |
+
duckduckgo-search
|
| 13 |
+
python-dotenv
|
| 14 |
+
bitsandbytes
|
| 15 |
+
accelerate
|
tools/__init__.py
ADDED
|
File without changes
|
tools/calculator.py
ADDED
|
File without changes
|
tools/geo.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/geo.py
|
| 2 |
+
import requests
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
def get_geo_context():
|
| 6 |
+
"""
|
| 7 |
+
Récupère la géolocalisation par IP (Fallback gratuit et illimité).
|
| 8 |
+
"""
|
| 9 |
+
try:
|
| 10 |
+
# Utilisation de ip-api.com (Gratuit, fiable, pas de clé requise)
|
| 11 |
+
response = requests.get('http://ip-api.com/json/', timeout=5)
|
| 12 |
+
data = response.json()
|
| 13 |
+
|
| 14 |
+
if data.get('status') == 'success':
|
| 15 |
+
return {
|
| 16 |
+
"ip": data.get('query'),
|
| 17 |
+
"country": data.get('country', 'Gabon'),
|
| 18 |
+
"city": data.get('city', 'Libreville'),
|
| 19 |
+
"latitude": float(data.get('lat', 0.3908)),
|
| 20 |
+
"longitude": float(data.get('lon', 9.4544)),
|
| 21 |
+
"timezone": data.get('timezone', 'Africa/Libreville'),
|
| 22 |
+
"method": "IP (Approximatif)",
|
| 23 |
+
"lang": "Français"
|
| 24 |
+
}
|
| 25 |
+
else:
|
| 26 |
+
raise Exception("Erreur API ip-api")
|
| 27 |
+
|
| 28 |
+
except Exception as e:
|
| 29 |
+
logging.warning(f"Échec IP Geo: {e}")
|
| 30 |
+
return {
|
| 31 |
+
"ip": "127.0.0.1", "country": "Gabon", "city": "Libreville",
|
| 32 |
+
"latitude": 0.3908, "longitude": 9.4544, "timezone": "Africa/Libreville",
|
| 33 |
+
"method": "Valeurs par défaut", "lang": "Français"
|
| 34 |
+
}
|
tools/local_docs.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.tools import Tool
|
| 2 |
+
from langchain_community.vectorstores import FAISS
|
| 3 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
EMBEDDINGS = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 7 |
+
INDEX_PATH = "memory/faiss_index"
|
| 8 |
+
|
| 9 |
+
if os.path.exists(INDEX_PATH):
|
| 10 |
+
vectordb = FAISS.load_local(INDEX_PATH, EMBEDDINGS, allow_dangerous_deserialization=True)
|
| 11 |
+
else:
|
| 12 |
+
vectordb = FAISS.from_texts(["Base vide"], EMBEDDINGS)
|
| 13 |
+
vectordb.save_local(INDEX_PATH)
|
| 14 |
+
|
| 15 |
+
def local_search(query: str):
|
| 16 |
+
docs = vectordb.similarity_search(query, k=3)
|
| 17 |
+
return "\n".join([d.page_content for d in docs])
|
| 18 |
+
|
| 19 |
+
local_knowledge_tool = Tool(
|
| 20 |
+
name="Base Locale",
|
| 21 |
+
func=local_search,
|
| 22 |
+
description="Recherche dans la base documentaire locale FAISS"
|
| 23 |
+
)
|
tools/todo.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/todo.py
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
def execute_reflection_plan(prompt, geo_info=None, messages=[]):
|
| 6 |
+
"""
|
| 7 |
+
Phase de réflexion structurée avec verrouillage du sujet conversationnel.
|
| 8 |
+
"""
|
| 9 |
+
if geo_info is None: geo_info = {}
|
| 10 |
+
|
| 11 |
+
# 1. ANALYSE DU SUJET (Contextualisation)
|
| 12 |
+
# Si le prompt est court, on récupère le sujet du dernier message
|
| 13 |
+
subject = prompt
|
| 14 |
+
is_continuation = len(prompt.split()) < 5 or any(x in prompt.lower() for x in ["ils", "elles", "donc", "alors", "ceux-là"])
|
| 15 |
+
|
| 16 |
+
if is_continuation and len(messages) > 0:
|
| 17 |
+
# On extrait le sujet principal du dernier échange pour "nourrir" la réflexion
|
| 18 |
+
subject = f"{prompt} (contexte: {messages[-1]['content'][:50]}...)"
|
| 19 |
+
|
| 20 |
+
location = f"{geo_info.get('city', 'Libreville')}, {geo_info.get('country', 'Gabon')}"
|
| 21 |
+
method = geo_info.get('method', 'Inconnue')
|
| 22 |
+
|
| 23 |
+
with st.status(f"🧠 Kibali Thinking Engine", expanded=True) as status:
|
| 24 |
+
st.write(f"🌍 **Localisation active :** {location}")
|
| 25 |
+
|
| 26 |
+
if is_continuation:
|
| 27 |
+
st.write(f"🔗 **Liaison contextuelle :** Analyse du sujet précédent détectée.")
|
| 28 |
+
|
| 29 |
+
steps = [
|
| 30 |
+
f"Identification de l'entité : Recherche d'informations sur '{subject}'.",
|
| 31 |
+
"Extraction de la mémoire sémantique FAISS pour éviter les répétitions.",
|
| 32 |
+
"Requête Web enrichie : Combinaison du sujet récent + question actuelle pour les images.",
|
| 33 |
+
f"Vérification de la pertinence culturelle et temporelle pour le Gabon."
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
for i, step in enumerate(steps):
|
| 37 |
+
st.write(f"{i+1}. {step}")
|
| 38 |
+
time.sleep(0.15)
|
| 39 |
+
|
| 40 |
+
status.update(label="✅ Stratégie de réponse validée", state="complete", expanded=False)
|
| 41 |
+
return True
|
tools/web.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/web.py
|
| 2 |
+
from tavily import TavilyClient
|
| 3 |
+
from duckduckgo_search import DDGS
|
| 4 |
+
import streamlit as st
|
| 5 |
+
import os
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
|
| 8 |
+
# Chargement du fichier .env pour le local
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
# Récupération de la clé API (priorité au système/HF Secrets)
|
| 12 |
+
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
|
| 13 |
+
|
| 14 |
+
def web_search(query: str):
|
| 15 |
+
"""
|
| 16 |
+
Recherche hybride Tavily + DuckDuckGo avec sécurité API et fallback.
|
| 17 |
+
"""
|
| 18 |
+
results = []
|
| 19 |
+
images = []
|
| 20 |
+
|
| 21 |
+
# Initialisation client Tavily si la clé existe
|
| 22 |
+
tavily = None
|
| 23 |
+
if TAVILY_API_KEY:
|
| 24 |
+
tavily = TavilyClient(api_key=TAVILY_API_KEY)
|
| 25 |
+
|
| 26 |
+
# 1. TENTATIVE AVEC TAVILY
|
| 27 |
+
if tavily:
|
| 28 |
+
try:
|
| 29 |
+
res = tavily.search(
|
| 30 |
+
query=query,
|
| 31 |
+
max_results=5,
|
| 32 |
+
search_depth="advanced",
|
| 33 |
+
include_images=True
|
| 34 |
+
)
|
| 35 |
+
results = res.get('results', [])
|
| 36 |
+
images = res.get('images', [])
|
| 37 |
+
|
| 38 |
+
if len(results) >= 3:
|
| 39 |
+
return {"results": results, "images": images, "query": query, "source": "Tavily"}
|
| 40 |
+
|
| 41 |
+
except Exception as e:
|
| 42 |
+
st.warning("🔄 Tavily indisponible ou limite atteinte, basculement sur DuckDuckGo...")
|
| 43 |
+
|
| 44 |
+
# 2. FALLBACK AVEC DUCKDUCKGO
|
| 45 |
+
try:
|
| 46 |
+
with DDGS() as ddgs:
|
| 47 |
+
# Texte
|
| 48 |
+
ddg_text = list(ddgs.text(query, max_results=5))
|
| 49 |
+
for r in ddg_text:
|
| 50 |
+
results.append({
|
| 51 |
+
"title": r.get('title'),
|
| 52 |
+
"content": r.get('body'),
|
| 53 |
+
"url": r.get('href')
|
| 54 |
+
})
|
| 55 |
+
|
| 56 |
+
# Images
|
| 57 |
+
if not images:
|
| 58 |
+
ddg_imgs = list(ddgs.images(query, max_results=3))
|
| 59 |
+
images = [img.get('image') for img in ddg_imgs]
|
| 60 |
+
|
| 61 |
+
except Exception as e:
|
| 62 |
+
st.error(f"⚠️ Échec critique des moteurs de recherche : {e}")
|
| 63 |
+
|
| 64 |
+
return {
|
| 65 |
+
"results": results,
|
| 66 |
+
"images": images,
|
| 67 |
+
"query": query,
|
| 68 |
+
"source": "Hybrid/DDG"
|
| 69 |
+
}
|