Wall06's picture
Update app.py
52f7b28 verified
import os
import gradio as gr
from transformers import pipeline
# specific imports to fix "ModuleNotFoundError"
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline
# ------------------ LOAD EMBEDDINGS ------------------
# We use a standard efficient embedding model
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# ------------------ LOAD VECTOR STORE ------------------
# Check if vectorstore exists to avoid crashing
if not os.path.exists("vectorstore/faiss_index"):
print("❌ ERROR: 'vectorstore/faiss_index' folder not found.")
print(" Please run your ingest/indexing script first to create the database.")
# Create a dummy empty DB just so the app doesn't crash immediately (optional)
db = FAISS.from_texts(["Empty index"], embeddings)
else:
db = FAISS.load_local(
"vectorstore/faiss_index",
embeddings,
allow_dangerous_deserialization=True
)
# ------------------ LOAD LLM ------------------
# Using phi-2.
# WARNING: If the Space crashes with "OOM" (Out of Memory), change this to "google/flan-t5-small"
print("Loading Model...")
text_gen_pipeline = pipeline(
"text-generation",
model="microsoft/phi-2",
max_new_tokens=256, # Reduced slightly to save memory
temperature=0.2,
do_sample=True,
truncation=True
)
llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
# ------------------ RAG CHAIN ------------------
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=db.as_retriever(search_kwargs={"k": 3}),
chain_type="stuff",
)
# ------------------ CHAT FUNCTION ------------------
def chat(user_message, history):
if not user_message.strip():
return history
try:
# 'invoke' is the new standard, but 'run' is kept for compatibility with your code
answer = qa_chain.run(user_message)
except Exception as e:
answer = f"Error generating answer: {str(e)}"
history.append((user_message, answer))
return history
# ------------------ GRADIO UI ------------------
with gr.Blocks(title="Document RAG Chatbot") as demo:
gr.Markdown(
"""
# 📚 Document RAG Chatbot
Answers are generated **strictly from the provided documents** using Retrieval-Augmented Generation.
"""
)
chatbot = gr.Chatbot(height=400)
query = gr.Textbox(
label="Ask a question",
placeholder="Ask something from the documents..."
)
with gr.Row():
submit_btn = gr.Button("Submit", variant="primary")
clear_btn = gr.Button("Clear Chat")
# Wire up the buttons
query.submit(chat, [query, chatbot], chatbot)
submit_btn.click(chat, [query, chatbot], chatbot)
clear_btn.click(lambda: [], None, chatbot)
if __name__ == "__main__":
demo.launch()