import os import faiss import pickle import numpy as np import openai import tiktoken from dotenv import load_dotenv from openai import OpenAI from pathlib import Path from huggingface_hub import hf_hub_download # assurez-vous que ce dossier existe et est writeable CACHE_DIR = os.getenv("CACHE_DIR", "/tmp/cache") os.makedirs(CACHE_DIR, exist_ok=True) # —— CONFIG —— load_dotenv() os.environ["TRANSFORMERS_CACHE"] = os.getenv("TRANSFORMERS_CACHE", "/tmp/huggingface/cache") os.environ["HF_HOME"] = os.getenv("HF_HOME", "/tmp/huggingface") # Use the new OpenAI client client = OpenAI() EMBED_MODEL = "text-embedding-3-large" CHAT_MODEL = "o4-mini-2025-04-16" FAISS_INDEX_FILE = "tindle_index.faiss" IDS_PKL = "tindle_ids.pkl" CHUNKS_PKL = "tindle_chunks.pkl" TOP_K = 10 MAX_TOKENS_CONTEXT = 4000 SYSTEM_PROMPT = ( "Tu es un assistant expert en droit fiscal. " "Fais d'abord appel aux passages fournis pour répondre. " "Si ces passages sont insuffisants, utilise tes connaissances générales en le précisant clairement." ) # —— CHARGEMENT DE L'INDEX —— # Télécharger les fichiers depuis le repo Hugging Face index_path = hf_hub_download(repo_id="Jordanche/fiscarag", filename=FAISS_INDEX_FILE, repo_type="dataset", cache_dir=CACHE_DIR) ids_path = hf_hub_download(repo_id="Jordanche/fiscarag", filename=IDS_PKL, repo_type="dataset" ,cache_dir=CACHE_DIR) chunks_path = hf_hub_download(repo_id="Jordanche/fiscarag", filename=CHUNKS_PKL, repo_type="dataset",cache_dir=CACHE_DIR) # Charger les fichiers index = faiss.read_index(index_path) with open(ids_path, "rb") as f: ids = pickle.load(f) with open(chunks_path, "rb") as f: chunks_dict = pickle.load(f) # —— TOKEN COUNTER —— enc = tiktoken.get_encoding("cl100k_base") def num_tokens(s: str) -> int: return len(enc.encode(s)) # —— FONCTIONS RAG —— def embed_question(question: str) -> list[float]: resp = client.embeddings.create( model=EMBED_MODEL, input=[question] ) # on récupère l'attribut .data, puis .embedding return resp.data[0].embedding def retrieve_chunks(q_emb: list[float], k: int = TOP_K): xq = np.array([q_emb], dtype="float32") distances, indices = index.search(xq, k) out = [] for dist, idx in zip(distances[0], indices[0]): cid = ids[idx] meta = chunks_dict[cid] out.append({ "score": float(dist), "id": cid, "text": meta["text"], "metadata": {cle: val for cle, val in meta.items() if cle != "text"} # Inclure le dictionnaire metadata complet }) return out def build_context(chunks, max_tokens=MAX_TOKENS_CONTEXT): parts, tokens = [], 0 for c in sorted(chunks, key=lambda x: x["score"]): # Construire la section métadonnées metadata_parts = [] for key, value in c["metadata"].items(): metadata_parts.append(f"{key}: {value}") metadata_str = f" | ".join(metadata_parts) if metadata_parts else "" source_info = f"(Source: {c['id']}" if metadata_str: source_info += f" | {metadata_str}" source_info += ")" piece = f"{source_info} {c['text']}" nt = num_tokens(piece) if tokens + nt > max_tokens: break parts.append(piece) tokens += nt return "\n\n".join(parts) def make_prompt(question: str, context: str): return [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": f"Question: {question}\n\nContexte:\n{context}"} ] def answer_question(question: str, k: int = TOP_K) -> str: # 1) Embed q_emb = embed_question(question) # 2) Retrieve top_chunks = retrieve_chunks(q_emb, k) # 3) Assemble context = build_context(top_chunks) # 4) Prompt messages = make_prompt(question, context) # 5) Call LLM resp = client.chat.completions.create( model=CHAT_MODEL, messages=messages ) return resp.choices[0].message.content # —— EXEMPLE —— if __name__ == "__main__": question = "Quels sont les délais pour la réhabilitation d'hôtels en outre-mer ?" print(answer_question(question, k=10))