import gradio as gr from langchain_huggingface import HuggingFaceEmbeddings from langchain_chroma import Chroma from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline import torch import os from data_cutter import create_db # Constants CHROMA_PATH = "chroma_db" MODEL_ID = "google/flan-t5-small" # Better for French Q&A print("🚀 Starting app...") # 1️⃣ Initialize / Load Database print("🔄 Initializing database from data folder...") try: vectorstore = create_db() print("✅ Database created successfully!") except Exception as e: print(f"❌ Error creating database: {e}") if os.path.exists(CHROMA_PATH): print("⚠️ Attempting to load existing database...") embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectorstore = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings) else: raise e # 2️⃣ Load LLM print(f"🤖 Loading AI Model ({MODEL_ID})...") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForSeq2SeqLM.from_pretrained( MODEL_ID, torch_dtype=torch.float32, low_cpu_mem_usage=True ) pipe = pipeline( "text2text-generation", model=model, tokenizer=tokenizer, max_new_tokens=300, device=-1, # CPU do_sample=True, temperature=0.7, top_p=0.9, ) print("✅ AI Model loaded successfully!") # 3️⃣ Chat function def chat_function(message, history): print(f"📨 Question received: {message}") try: # Search for relevant chunks results = vectorstore.similarity_search(message, k=3) context = "\n\n".join([doc.page_content for doc in results]) # Build prompt optimized for Flan-T5 prompt = f"""Contexte du document: {context} Question: {message} Répondez en français en vous basant uniquement sur le contexte ci-dessus. Si l'information n'est pas dans le contexte, dites "Je ne trouve pas cette information dans le document".""" # Generate response outputs = pipe(prompt, max_new_tokens=300, num_return_sequences=1) response = outputs[0]['generated_text'].strip() # Fallback if response is too short or empty if len(response) < 10: response = "Je n'ai pas trouvé d'information pertinente dans le document pour répondre à votre question." print(f"✅ Response generated: {response[:100]}...") return response except Exception as e: error_msg = f"Erreur lors de la génération de la réponse: {str(e)}" print(f"❌ {error_msg}") return error_msg # 4️⃣ Gradio Interface demo = gr.ChatInterface( fn=chat_function, title="💬 RAG Chat - Documents en Français", description=f"Posez des questions sur vos documents PDF en français. Propulsé par {MODEL_ID}.", examples=[ "Quel est le sujet principal du document ?", "Résume le contenu principal.", "Quelles sont les informations importantes ?" ] ) if __name__ == "__main__": demo.launch()