Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_chroma import Chroma | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
| import torch | |
| import os | |
| from data_cutter import create_db | |
| # Constants | |
| CHROMA_PATH = "chroma_db" | |
| MODEL_ID = "google/flan-t5-small" # Better for French Q&A | |
| print("🚀 Starting app...") | |
| # 1️⃣ Initialize / Load Database | |
| print("🔄 Initializing database from data folder...") | |
| try: | |
| vectorstore = create_db() | |
| print("✅ Database created successfully!") | |
| except Exception as e: | |
| print(f"❌ Error creating database: {e}") | |
| if os.path.exists(CHROMA_PATH): | |
| print("⚠️ Attempting to load existing database...") | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vectorstore = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings) | |
| else: | |
| raise e | |
| # 2️⃣ Load LLM | |
| print(f"🤖 Loading AI Model ({MODEL_ID})...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForSeq2SeqLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True | |
| ) | |
| pipe = pipeline( | |
| "text2text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_new_tokens=300, | |
| device=-1, # CPU | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| ) | |
| print("✅ AI Model loaded successfully!") | |
| # 3️⃣ Chat function | |
| def chat_function(message, history): | |
| print(f"📨 Question received: {message}") | |
| try: | |
| # Search for relevant chunks | |
| results = vectorstore.similarity_search(message, k=3) | |
| context = "\n\n".join([doc.page_content for doc in results]) | |
| # Build prompt optimized for Flan-T5 | |
| prompt = f"""Contexte du document: | |
| {context} | |
| Question: {message} | |
| Répondez en français en vous basant uniquement sur le contexte ci-dessus. Si l'information n'est pas dans le contexte, dites "Je ne trouve pas cette information dans le document".""" | |
| # Generate response | |
| outputs = pipe(prompt, max_new_tokens=300, num_return_sequences=1) | |
| response = outputs[0]['generated_text'].strip() | |
| # Fallback if response is too short or empty | |
| if len(response) < 10: | |
| response = "Je n'ai pas trouvé d'information pertinente dans le document pour répondre à votre question." | |
| print(f"✅ Response generated: {response[:100]}...") | |
| return response | |
| except Exception as e: | |
| error_msg = f"Erreur lors de la génération de la réponse: {str(e)}" | |
| print(f"❌ {error_msg}") | |
| return error_msg | |
| # 4️⃣ Gradio Interface | |
| demo = gr.ChatInterface( | |
| fn=chat_function, | |
| title="💬 RAG Chat - Documents en Français", | |
| description=f"Posez des questions sur vos documents PDF en français. Propulsé par {MODEL_ID}.", | |
| examples=[ | |
| "Quel est le sujet principal du document ?", | |
| "Résume le contenu principal.", | |
| "Quelles sont les informations importantes ?" | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |