Spaces:

ZedLow
/

Constrained-Financial-RAG

Sleeping

App Files Files Community

ZedLow commited on Feb 5

Commit

a24954e

verified ·

1 Parent(s): f2863bc

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -25

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGenerati
 from qwen_vl_utils import process_vision_info
 # --- CONFIGURATION ---
-print(f"🚀 Démarrage RAG Finance (Mode Multi-View : 3 Images)...")
 # --- 1. DONNÉES ---
 try:
@@ -16,10 +16,11 @@ try:
         dataset = json.load(f)
 except:
     dataset = []
-    print("⚠️ Index vide.")
-# --- 2. MODÈLES ---
-# A. EMBEDDING : GTE-Qwen2-7B (Le modèle LOURD qui causait les crashs mémoire)
 EMBED_MODEL_ID = "Alibaba-NLP/gte-Qwen2-7B-instruct"
 print(f"🔹 Chargement Embedder : {EMBED_MODEL_ID}")
@@ -28,8 +29,8 @@ embed_model = AutoModel.from_pretrained(
     EMBED_MODEL_ID,
     trust_remote_code=False,
     torch_dtype=torch.bfloat16,
-    # C'est cette ligne qui fait planter si pas de GPU détecté immédiatement
-    attn_implementation="flash_attention_2",
     device_map="auto"
 )
@@ -49,12 +50,14 @@ print(f"👁️ Chargement Vision : {GEN_MODEL_ID}")
 gen_model = Qwen2VLForConditionalGeneration.from_pretrained(
     GEN_MODEL_ID,
     torch_dtype=torch.bfloat16,
-    attn_implementation="flash_attention_2",
     device_map="auto"
 )
 gen_processor = AutoProcessor.from_pretrained(GEN_MODEL_ID)
-# --- 3. FONCTIONS ---
 def last_token_pool(last_hidden_states, attention_mask):
     left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
     if left_padding:
@@ -64,14 +67,15 @@ def last_token_pool(last_hidden_states, attention_mask):
         batch_size = last_hidden_states.shape[0]
         return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
-# --- 4. PIPELINE ---
 @spaces.GPU
 def retrieve_and_answer(query):
     print(f"⚡ Question : {query}")
     if not dataset: return None, "Base vide", "Pas de document"
-    # 1. RETRIEVAL (Recalculé à chaque fois -> Lent)
     valid_docs = []
     for i, doc in enumerate(dataset):
         text = doc.get('text', '').strip()
@@ -89,6 +93,7 @@ def retrieve_and_answer(query):
         d_embeddings_list = []
         doc_texts = [d['text'] for d in valid_docs]
         for i in range(0, len(doc_texts), 1):
             d_inputs = embed_tokenizer(doc_texts[i:i+1], max_length=8192, padding=True, truncation=True, return_tensors='pt').to(embed_model.device)
             d_outputs = embed_model(**d_inputs)
@@ -100,7 +105,7 @@ def retrieve_and_answer(query):
         scores = (q_emb @ d_emb_final.T).squeeze(0)
         top_k_indices = torch.topk(scores, k=min(10, len(scores))).indices.tolist()
-    # 2. RERANKING
     pairs = []
     for idx in top_k_indices:
         pairs.append([query, valid_docs[idx]['text']])
@@ -110,7 +115,7 @@ def retrieve_and_answer(query):
         r_scores = rerank_model(**r_inputs, return_dict=True).logits.view(-1).float()
         top_3_indices_local = torch.topk(r_scores, k=min(3, len(r_scores))).indices.tolist()
-    # 3. PREPARATION IMAGES (C'est ICI que l'hallucination se crée)
     images_content = []
     gallery_preview = []
     meta_info = ""
@@ -126,20 +131,28 @@ def retrieve_and_answer(query):
         try:
             img = Image.open(image_path)
-            # PROBLÈME ICI : On ne dit pas au modèle "Ceci est Microsoft" ou "Ceci est Apple"
-            # Il voit juste "Image 1", "Image 2"...
-            images_content.append({"type": "text", "text": f"Image {rank+1} (Pertinence: {score:.2f}):\n"})
             images_content.append({"type": "image", "image": img})
-            gallery_preview.append((img, f"Page {rank+1} - Score {score:.2f}"))
-            meta_info += f"- **Image {rank+1}:** {doc['doc_name']} (Score: {score:.2f})\n"
         except:
             continue
-    # 4. GENERATION
     system_prompt = (
-        "You are an expert financial analyst examining 3 pages of a report. "
-        "Your goal is to answer the user question using ONLY the provided images."
     )
     user_content = images_content + [{"type": "text", "text": f"\nUser Question: {query}"}]
@@ -167,16 +180,16 @@ def retrieve_and_answer(query):
 # --- 5. UI ---
 with gr.Blocks(title="RAG Finance") as demo:
-    gr.Markdown("# 🚀 RAG Finance (Version Originale Instable)")
     with gr.Row():
-        query_input = gr.Textbox(label="Question")
         submit_btn = gr.Button("Analyser", variant="primary")
     with gr.Row():
-        output_gallery = gr.Gallery(label="Pages")
-        output_meta = gr.Markdown(label="Sources")
-        output_text = gr.Markdown(label="Réponse")
     submit_btn.click(retrieve_and_answer, inputs=query_input, outputs=[output_gallery, output_meta, output_text])

 from qwen_vl_utils import process_vision_info
 # --- CONFIGURATION ---
+print(f"🚀 Démarrage RAG Finance (Version Originale + Fix Hallucination)...")
 # --- 1. DONNÉES ---
 try:
         dataset = json.load(f)
 except:
     dataset = []
+    print("⚠️ Index vide ou fichier non trouvé.")
+# --- 2. MODÈLES (INCHANGÉS) ---
+# A. EMBEDDING : GTE-Qwen2-7B (Le modèle LOURD original)
 EMBED_MODEL_ID = "Alibaba-NLP/gte-Qwen2-7B-instruct"
 print(f"🔹 Chargement Embedder : {EMBED_MODEL_ID}")
     EMBED_MODEL_ID,
     trust_remote_code=False,
     torch_dtype=torch.bfloat16,
+    # J'ai mis en commentaire la ligne qui fait planter le démarrage sur CPU (ZeroGPU)
+    # attn_implementation="flash_attention_2",
     device_map="auto"
 )
 gen_model = Qwen2VLForConditionalGeneration.from_pretrained(
     GEN_MODEL_ID,
     torch_dtype=torch.bfloat16,
+    # Idem, désactivé pour éviter le crash "No CUDA" au boot
+    # attn_implementation="flash_attention_2",
     device_map="auto"
 )
 gen_processor = AutoProcessor.from_pretrained(GEN_MODEL_ID)
+# --- 3. FONCTIONS UTILITAIRES ---
 def last_token_pool(last_hidden_states, attention_mask):
     left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
     if left_padding:
         batch_size = last_hidden_states.shape[0]
         return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
+# --- 4. LOGIQUE RAG MULTI-VIEW ---
 @spaces.GPU
 def retrieve_and_answer(query):
     print(f"⚡ Question : {query}")
     if not dataset: return None, "Base vide", "Pas de document"
+    # === ÉTAPE 1 : RETRIEVAL (Embedding) ===
     valid_docs = []
     for i, doc in enumerate(dataset):
         text = doc.get('text', '').strip()
         d_embeddings_list = []
         doc_texts = [d['text'] for d in valid_docs]
+        # Batch size de 1 pour économiser la mémoire avec le gros modèle 7B
         for i in range(0, len(doc_texts), 1):
             d_inputs = embed_tokenizer(doc_texts[i:i+1], max_length=8192, padding=True, truncation=True, return_tensors='pt').to(embed_model.device)
             d_outputs = embed_model(**d_inputs)
         scores = (q_emb @ d_emb_final.T).squeeze(0)
         top_k_indices = torch.topk(scores, k=min(10, len(scores))).indices.tolist()
+    # === ÉTAPE 2 : RERANKING ===
     pairs = []
     for idx in top_k_indices:
         pairs.append([query, valid_docs[idx]['text']])
         r_scores = rerank_model(**r_inputs, return_dict=True).logits.view(-1).float()
         top_3_indices_local = torch.topk(r_scores, k=min(3, len(r_scores))).indices.tolist()
+    # === ÉTAPE 3 : PRÉPARATION IMAGES (ICI ON CORRIGE L'HALLUCINATION) ===
     images_content = []
     gallery_preview = []
     meta_info = ""
         try:
             img = Image.open(image_path)
+            # --- FIX HALLUCINATION ---
+            # On récupère le nom du document (ex: "Microsoft 2023 Report")
+            doc_name = doc.get('doc_name', 'Unknown Document')
+            # On l'injecte explicitement dans le texte que voit l'IA
+            prompt_header = f"DOCUMENT SOURCE: {doc_name} (Relevance: {score:.2f})\n"
+            images_content.append({"type": "text", "text": prompt_header})
             images_content.append({"type": "image", "image": img})
+            gallery_preview.append((img, f"{doc_name} (Rank {rank+1})"))
+            meta_info += f"- **{doc_name}** (Score: {score:.2f})\n"
         except:
             continue
+    # === ÉTAPE 4 : GÉNÉRATION ===
+    # On renforce le prompt système pour qu'il fasse attention au nom du document
     system_prompt = (
+        "You are an expert financial analyst. Answer the user question using ONLY the provided images.\n"
+        "IMPORTANT: Before reading a table, check the 'DOCUMENT SOURCE' name above the image.\n"
+        "If the user asks about Microsoft, do not use data from an Apple document (and vice versa)."
     )
     user_content = images_content + [{"type": "text", "text": f"\nUser Question: {query}"}]
 # --- 5. UI ---
 with gr.Blocks(title="RAG Finance") as demo:
+    gr.Markdown("# 🚀 RAG Finance (Moteurs Originaux + Sécurité Hallucination)")
     with gr.Row():
+        query_input = gr.Textbox(label="Question", placeholder="Ex: What is the revenue of Microsoft?")
         submit_btn = gr.Button("Analyser", variant="primary")
     with gr.Row():
+        output_gallery = gr.Gallery(label="Pages Analysées", columns=3, height=300)
+        output_meta = gr.Markdown(label="Sources Identifiées")
+        output_text = gr.Markdown(label="Réponse IA")
     submit_btn.click(retrieve_and_answer, inputs=query_input, outputs=[output_gallery, output_meta, output_text])