Spaces:

Wall06
/

Document_RAG_Chatbot

Runtime error

App Files Files Community

Document_RAG_Chatbot / app.py

Wall06

Update app.py

52f7b28 verified about 1 month ago

raw

history blame contribute delete

2.98 kB

	import os
	import gradio as gr
	from transformers import pipeline

	# specific imports to fix "ModuleNotFoundError"
	from langchain.chains import RetrievalQA
	from langchain_community.vectorstores import FAISS
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_community.llms import HuggingFacePipeline

	# ------------------ LOAD EMBEDDINGS ------------------
	# We use a standard efficient embedding model
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2"
	)

	# ------------------ LOAD VECTOR STORE ------------------
	# Check if vectorstore exists to avoid crashing
	if not os.path.exists("vectorstore/faiss_index"):
	print("❌ ERROR: 'vectorstore/faiss_index' folder not found.")
	print(" Please run your ingest/indexing script first to create the database.")
	# Create a dummy empty DB just so the app doesn't crash immediately (optional)
	db = FAISS.from_texts(["Empty index"], embeddings)
	else:
	db = FAISS.load_local(
	"vectorstore/faiss_index",
	embeddings,
	allow_dangerous_deserialization=True
	)

	# ------------------ LOAD LLM ------------------
	# Using phi-2.
	# WARNING: If the Space crashes with "OOM" (Out of Memory), change this to "google/flan-t5-small"
	print("Loading Model...")
	text_gen_pipeline = pipeline(
	"text-generation",
	model="microsoft/phi-2",
	max_new_tokens=256, # Reduced slightly to save memory
	temperature=0.2,
	do_sample=True,
	truncation=True
	)

	llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

	# ------------------ RAG CHAIN ------------------
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	retriever=db.as_retriever(search_kwargs={"k": 3}),
	chain_type="stuff",
	)

	# ------------------ CHAT FUNCTION ------------------
	def chat(user_message, history):
	if not user_message.strip():
	return history

	try:
	# 'invoke' is the new standard, but 'run' is kept for compatibility with your code
	answer = qa_chain.run(user_message)
	except Exception as e:
	answer = f"Error generating answer: {str(e)}"

	history.append((user_message, answer))
	return history

	# ------------------ GRADIO UI ------------------
	with gr.Blocks(title="Document RAG Chatbot") as demo:
	gr.Markdown(
	"""
	# 📚 Document RAG Chatbot
	Answers are generated strictly from the provided documents using Retrieval-Augmented Generation.
	"""
	)

	chatbot = gr.Chatbot(height=400)
	query = gr.Textbox(
	label="Ask a question",
	placeholder="Ask something from the documents..."
	)

	with gr.Row():
	submit_btn = gr.Button("Submit", variant="primary")
	clear_btn = gr.Button("Clear Chat")

	# Wire up the buttons
	query.submit(chat, [query, chatbot], chatbot)
	submit_btn.click(chat, [query, chatbot], chatbot)
	clear_btn.click(lambda: [], None, chatbot)

	if __name__ == "__main__":
	demo.launch()