Spaces:

rca123456
/

rag-chatbot

Sleeping

App Files Files Community

rag-chatbot / app.py

rca123456

Update app.py

6f0863d verified 8 months ago

raw

history blame contribute delete

3.57 kB

	# app.py

	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import Chroma
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.document_loaders import PyPDFLoader
	from langchain.chains import RetrievalQA
	from langchain.llms.base import LLM

	from typing import List, Optional
	from groq import Groq
	import gradio as gr
	import os
	import uuid

	from dotenv import load_dotenv
	load_dotenv()


	# ✅ Groq LLM Wrapper
	class GroqLLM(LLM):
	model: str = "llama3-8b-8192"
	api_key: str = os.environ.get("GROQ_API_KEY") # Use env variable for safety
	temperature: float = 0.0

	def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
	client = Groq(api_key=self.api_key)
	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	]
	response = client.chat.completions.create(
	model=self.model,
	messages=messages,
	temperature=self.temperature,
	)
	return response.choices[0].message.content

	@property
	def _llm_type(self) -> str:
	return "groq-llm"

	# ✅ Session Store
	session_store = {}

	# ✅ Process PDF File
	def process_pdf_and_setup_chain(pdf_file):
	if not pdf_file:
	return "❌ No PDF uploaded."

	file_path = pdf_file.name
	temp_dir = f"temp_{uuid.uuid4().hex}"
	os.makedirs(temp_dir, exist_ok=True)

	try:
	loader = PyPDFLoader(file_path)
	documents = loader.load()

	splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	docs = splitter.split_documents(documents)

	embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
	vectorstore = Chroma.from_documents(docs, embedding, persist_directory=os.path.join(temp_dir, "chroma"))

	retriever = vectorstore.as_retriever()
	groq_llm = GroqLLM()

	qa_chain = RetrievalQA.from_chain_type(
	llm=groq_llm,
	retriever=retriever,
	return_source_documents=True
	)

	session_store["qa_chain"] = qa_chain
	session_store["temp_dir"] = temp_dir

	return "✅ PDF processed! You can now ask questions."

	except Exception as e:
	return f"❌ Error: {str(e)}"

	# ✅ Answering Function
	def answer_question(query):
	qa_chain = session_store.get("qa_chain")
	if not qa_chain:
	return "❌ Please upload and process a PDF first."
	if not query.strip():
	return "❗ Please enter a question."
	try:
	result = qa_chain({"query": query})
	return result["result"]
	except Exception as e:
	return f"❌ Error: {str(e)}"

	# ✅ Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("## 📄 PDF Q&A with LangChain + Groq LLaMA3")
	gr.Markdown("Upload a PDF, process it, and ask any question from its content.")

	with gr.Row():
	pdf_input = gr.File(label="📄 Upload PDF", file_types=[".pdf"])
	process_btn = gr.Button("⚙️ Process PDF")

	status = gr.Textbox(label="Status", interactive=False)

	with gr.Row():
	question = gr.Textbox(label="Ask a question", lines=2, placeholder="e.g. What is the document about?")
	ask_btn = gr.Button("🔍 Ask")

	answer = gr.Textbox(label="Answer", interactive=False)

	process_btn.click(fn=process_pdf_and_setup_chain, inputs=pdf_input, outputs=status)
	ask_btn.click(fn=answer_question, inputs=question, outputs=answer)

	demo.launch()