Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| from transformers import pipeline | |
| # specific imports to fix "ModuleNotFoundError" | |
| from langchain.chains import RetrievalQA | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.llms import HuggingFacePipeline | |
| # ------------------ LOAD EMBEDDINGS ------------------ | |
| # We use a standard efficient embedding model | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| # ------------------ LOAD VECTOR STORE ------------------ | |
| # Check if vectorstore exists to avoid crashing | |
| if not os.path.exists("vectorstore/faiss_index"): | |
| print("❌ ERROR: 'vectorstore/faiss_index' folder not found.") | |
| print(" Please run your ingest/indexing script first to create the database.") | |
| # Create a dummy empty DB just so the app doesn't crash immediately (optional) | |
| db = FAISS.from_texts(["Empty index"], embeddings) | |
| else: | |
| db = FAISS.load_local( | |
| "vectorstore/faiss_index", | |
| embeddings, | |
| allow_dangerous_deserialization=True | |
| ) | |
| # ------------------ LOAD LLM ------------------ | |
| # Using phi-2. | |
| # WARNING: If the Space crashes with "OOM" (Out of Memory), change this to "google/flan-t5-small" | |
| print("Loading Model...") | |
| text_gen_pipeline = pipeline( | |
| "text-generation", | |
| model="microsoft/phi-2", | |
| max_new_tokens=256, # Reduced slightly to save memory | |
| temperature=0.2, | |
| do_sample=True, | |
| truncation=True | |
| ) | |
| llm = HuggingFacePipeline(pipeline=text_gen_pipeline) | |
| # ------------------ RAG CHAIN ------------------ | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| retriever=db.as_retriever(search_kwargs={"k": 3}), | |
| chain_type="stuff", | |
| ) | |
| # ------------------ CHAT FUNCTION ------------------ | |
| def chat(user_message, history): | |
| if not user_message.strip(): | |
| return history | |
| try: | |
| # 'invoke' is the new standard, but 'run' is kept for compatibility with your code | |
| answer = qa_chain.run(user_message) | |
| except Exception as e: | |
| answer = f"Error generating answer: {str(e)}" | |
| history.append((user_message, answer)) | |
| return history | |
| # ------------------ GRADIO UI ------------------ | |
| with gr.Blocks(title="Document RAG Chatbot") as demo: | |
| gr.Markdown( | |
| """ | |
| # 📚 Document RAG Chatbot | |
| Answers are generated **strictly from the provided documents** using Retrieval-Augmented Generation. | |
| """ | |
| ) | |
| chatbot = gr.Chatbot(height=400) | |
| query = gr.Textbox( | |
| label="Ask a question", | |
| placeholder="Ask something from the documents..." | |
| ) | |
| with gr.Row(): | |
| submit_btn = gr.Button("Submit", variant="primary") | |
| clear_btn = gr.Button("Clear Chat") | |
| # Wire up the buttons | |
| query.submit(chat, [query, chatbot], chatbot) | |
| submit_btn.click(chat, [query, chatbot], chatbot) | |
| clear_btn.click(lambda: [], None, chatbot) | |
| if __name__ == "__main__": | |
| demo.launch() |