Spaces:

faiz0983
/

Multi-Rag_Chatbot

Sleeping

App Files Files Community

Multi-Rag_Chatbot / app.py

faiz0983

Create app.py

f768714 verified 4 months ago

raw

history blame

4.28 kB

	import os
	import gradio as gr
	from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_groq import ChatGroq
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory

	# --- 1. SETUP API ---
	# In Hugging Face, we use os.environ to get the secret
	api_key = os.environ.get("GROQ_API")

	# --- 2. FILE LOADING LOGIC ---
	def load_any(path: str):
	p = path.lower()
	if p.endswith(".pdf"): return PyPDFLoader(path).load()
	if p.endswith(".txt"): return TextLoader(path, encoding="utf-8").load()
	if p.endswith(".docx"): return Docx2txtLoader(path).load()
	return []

	# --- 3. PROCESSING FUNCTION ---
	# This function runs when the user clicks "Build Chatbot"
	def process_files(files):
	if not files:
	return None, "⚠️ Please upload at least one file."

	if not api_key:
	return None, "❌ Error: GROQ_API key not found in Secrets."

	try:
	# Load Documents
	docs = []
	for file_obj in files:
	# Gradio passes file objects, we need their paths
	docs.extend(load_any(file_obj.name))

	if not docs:
	return None, "⚠️ No readable text found in files."

	# Split Text
	splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	chunks = splitter.split_documents(docs)

	# Create Embeddings & Vector Store
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	db = FAISS.from_documents(chunks, embeddings)
	retriever = db.as_retriever(search_kwargs={"k": 4})

	# Create Chain
	llm = ChatGroq(
	groq_api_key=api_key,
	model="llama-3.3-70b-versatile",
	temperature=0
	)

	memory = ConversationBufferMemory(
	memory_key="chat_history",
	return_messages=True,
	output_key="answer"
	)

	chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=retriever,
	memory=memory,
	return_source_documents=True,
	output_key="answer"
	)

	return chain, f"✅ Success! Processed {len(chunks)} chunks. You can chat now."

	except Exception as e:
	return None, f"❌ Error: {str(e)}"

	# --- 4. CHAT FUNCTION ---
	def chat_function(message, history, chain):
	if not chain:
	return "⚠️ Please upload files and click 'Build Chatbot' first."

	try:
	res = chain.invoke({"question": message})
	answer = res["answer"]

	# Format Sources
	sources = []
	for d in res.get("source_documents", []):
	src = os.path.basename(d.metadata.get("source", "unknown"))
	text = (d.page_content or "").replace("\n", " ")[:100] + "..."
	sources.append(f"- {src}: {text}")

	final_answer = answer + "\n\n---\nSources:\n" + "\n".join(sources)
	return final_answer
	except Exception as e:
	return f"❌ Error generating answer: {str(e)}"

	# --- 5. BUILD UI ---
	with gr.Blocks(title="RAG Chatbot") as demo:
	gr.Markdown("# 📚 RAG Chatbot (LangChain + Groq)")

	# Store the RAG chain in the user's browser session (State)
	chain_state = gr.State(None)

	with gr.Row():
	with gr.Column(scale=1):
	file_input = gr.File(file_count="multiple", label="Upload PDF/TXT/DOCX")
	build_btn = gr.Button("Build Chatbot", variant="primary")
	status_output = gr.Textbox(label="Status", interactive=False)

	with gr.Column(scale=2):
	chatbot = gr.ChatInterface(
	fn=chat_function,
	additional_inputs=[chain_state] # Pass the chain to the chat function
	)

	# Connect the "Build" button to the processing function
	build_btn.click(
	fn=process_files,
	inputs=[file_input],
	outputs=[chain_state, status_output]
	)

	if __name__ == "__main__":
	demo.launch()