Spaces:

Sof850
/

RAG

Runtime error

App Files Files Community

Kakarot21 commited on Dec 1, 2025

Commit

fc8e15c

1 Parent(s): 46f5338

feat: Switch to French-specific models and refine prompt engineering for improved French RAG.

Browse files

Files changed (2) hide show

app.py +22 -15
data_cutter.py +1 -1

app.py CHANGED Viewed

@@ -8,8 +8,7 @@ from data_cutter import create_db
 # Constants
 CHROMA_PATH = "chroma_db"
-# Using a smaller model suitable for HF Spaces free tier
-MODEL_ID = "moussaKam/t5-small-fr-summarization"  # ~250MB, much more suitable for HF Spaces
 print("🚀 Starting app...")
@@ -40,7 +39,7 @@ pipe = pipeline(
     "text2text-generation",
     model=model,
     tokenizer=tokenizer,
-    max_new_tokens=256,
     device=-1,  # CPU
     do_sample=True,
     temperature=0.7,
@@ -54,27 +53,30 @@ def chat_function(message, history):
     try:
         # Search for relevant chunks
-        results = vectorstore.similarity_search(message, k=2)
         context = "\n\n".join([doc.page_content for doc in results])
-        # Build prompt (FLAN-T5 works better with English prompts)
-        prompt = f"""Voici le contexte extrait du document :
 {context}
-Question (en français) : {message}
-Répondez uniquement en français en utilisant ce contexte.
-Si la réponse n'est pas dans le contexte, dites "Je ne sais pas"."""
         # Generate response
-        outputs = pipe(prompt, max_new_tokens=256, num_return_sequences=1)
         response = outputs[0]['generated_text'].strip()
         print(f"✅ Response generated: {response[:100]}...")
         return response
     except Exception as e:
-        error_msg = f"Error generating response: {str(e)}"
         print(f"❌ {error_msg}")
         return error_msg
@@ -82,10 +84,15 @@ Si la réponse n'est pas dans le contexte, dites "Je ne sais pas"."""
 # 4️⃣ Gradio Interface
 demo = gr.ChatInterface(
     fn=chat_function,
-    title="RAG Chat with Your Data",
-    description=f"Ask questions about your documents. Powered by {MODEL_ID}.",
-    examples=["What is the main topic?", "Summarize the content."]
 )
 if __name__ == "__main__":
-    demo.launch()

 # Constants
 CHROMA_PATH = "chroma_db"
+MODEL_ID = "google/flan-t5-small"  # Better for French Q&A
 print("🚀 Starting app...")
     "text2text-generation",
     model=model,
     tokenizer=tokenizer,
+    max_new_tokens=300,
     device=-1,  # CPU
     do_sample=True,
     temperature=0.7,
     try:
         # Search for relevant chunks
+        results = vectorstore.similarity_search(message, k=3)
         context = "\n\n".join([doc.page_content for doc in results])
+        # Build prompt optimized for Flan-T5
+        prompt = f"""Contexte du document:
 {context}
+Question: {message}
+Répondez en français en vous basant uniquement sur le contexte ci-dessus. Si l'information n'est pas dans le contexte, dites "Je ne trouve pas cette information dans le document"."""
         # Generate response
+        outputs = pipe(prompt, max_new_tokens=300, num_return_sequences=1)
         response = outputs[0]['generated_text'].strip()
+        # Fallback if response is too short or empty
+        if len(response) < 10:
+            response = "Je n'ai pas trouvé d'information pertinente dans le document pour répondre à votre question."
         print(f"✅ Response generated: {response[:100]}...")
         return response
     except Exception as e:
+        error_msg = f"Erreur lors de la génération de la réponse: {str(e)}"
         print(f"❌ {error_msg}")
         return error_msg
 # 4️⃣ Gradio Interface
 demo = gr.ChatInterface(
     fn=chat_function,
+    title="💬 RAG Chat - Documents en Français",
+    description=f"Posez des questions sur vos documents PDF en français. Propulsé par {MODEL_ID}.",
+    examples=[
+        "Quel est le sujet principal du document ?",
+        "Résume le contenu principal.",
+        "Quelles sont les informations importantes ?"
+    ],
+    theme=gr.themes.Soft()
 )
 if __name__ == "__main__":
+    demo.launch()

data_cutter.py CHANGED Viewed

@@ -58,7 +58,7 @@ def create_db():
     print("\nCreating ChromaDB vector store with HuggingFace embeddings (all-MiniLM-L6-v2)...")
     embeddings = HuggingFaceEmbeddings(
-        model_name="sentence-transformers/all-MiniLM-L6-v2"
     )
     vectorstore = Chroma.from_documents(

     print("\nCreating ChromaDB vector store with HuggingFace embeddings (all-MiniLM-L6-v2)...")
     embeddings = HuggingFaceEmbeddings(
+        model_name="dangvantuan/sentence-camembert-base"  # French-specific embeddings
     )
     vectorstore = Chroma.from_documents(