RAG / app.py
Kakarot21's picture
Remove theme configuration from Gradio interface.
80c4aa4
import gradio as gr
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch
import os
from data_cutter import create_db
# Constants
CHROMA_PATH = "chroma_db"
MODEL_ID = "google/flan-t5-small" # Better for French Q&A
print("🚀 Starting app...")
# 1️⃣ Initialize / Load Database
print("🔄 Initializing database from data folder...")
try:
vectorstore = create_db()
print("✅ Database created successfully!")
except Exception as e:
print(f"❌ Error creating database: {e}")
if os.path.exists(CHROMA_PATH):
print("⚠️ Attempting to load existing database...")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
else:
raise e
# 2️⃣ Load LLM
print(f"🤖 Loading AI Model ({MODEL_ID})...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSeq2SeqLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32,
low_cpu_mem_usage=True
)
pipe = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=300,
device=-1, # CPU
do_sample=True,
temperature=0.7,
top_p=0.9,
)
print("✅ AI Model loaded successfully!")
# 3️⃣ Chat function
def chat_function(message, history):
print(f"📨 Question received: {message}")
try:
# Search for relevant chunks
results = vectorstore.similarity_search(message, k=3)
context = "\n\n".join([doc.page_content for doc in results])
# Build prompt optimized for Flan-T5
prompt = f"""Contexte du document:
{context}
Question: {message}
Répondez en français en vous basant uniquement sur le contexte ci-dessus. Si l'information n'est pas dans le contexte, dites "Je ne trouve pas cette information dans le document"."""
# Generate response
outputs = pipe(prompt, max_new_tokens=300, num_return_sequences=1)
response = outputs[0]['generated_text'].strip()
# Fallback if response is too short or empty
if len(response) < 10:
response = "Je n'ai pas trouvé d'information pertinente dans le document pour répondre à votre question."
print(f"✅ Response generated: {response[:100]}...")
return response
except Exception as e:
error_msg = f"Erreur lors de la génération de la réponse: {str(e)}"
print(f"❌ {error_msg}")
return error_msg
# 4️⃣ Gradio Interface
demo = gr.ChatInterface(
fn=chat_function,
title="💬 RAG Chat - Documents en Français",
description=f"Posez des questions sur vos documents PDF en français. Propulsé par {MODEL_ID}.",
examples=[
"Quel est le sujet principal du document ?",
"Résume le contenu principal.",
"Quelles sont les informations importantes ?"
]
)
if __name__ == "__main__":
demo.launch()