Spaces:

TanU21
/

PDF.RAG

Sleeping

App Files Files Community

PDF.RAG / app.py

TanU21

Create app.py

6103a94 verified 9 months ago

raw

history blame contribute delete

4.7 kB

	import streamlit as st
	from langchain_community.document_loaders import PDFPlumberLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_core.prompts import PromptTemplate
	import os
	import tempfile
	from langchain_groq import ChatGroq
	from dotenv import load_dotenv

	# Max document length to avoid exceeding token limits
	MAX_DOC_LENGTH = 4000

	def process_pdf(uploaded_file):
	try:
	if not uploaded_file:
	return "Error: No file uploaded."

	# ✅ Save the uploaded file to a temporary location
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
	temp_file.write(uploaded_file.read())
	temp_path = temp_file.name # Get the actual file path

	# ✅ Now we can load it using PDFPlumberLoader
	loader = PDFPlumberLoader(temp_path)
	result = loader.load()

	# ✅ Split the document into chunks
	splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20)
	split_docs = splitter.split_documents(result)

	# ✅ Extract text from the split documents
	document_text = "\n".join([doc.page_content for doc in split_docs])
	document_text = document_text[:MAX_DOC_LENGTH]

	# ✅ Clean up temporary file (optional, but recommended)
	os.remove(temp_path)

	return document_text
	except Exception as e:
	return f"Error processing PDF: {str(e)}"

	def initialize_llm():
	"""Initializes the LLM with error handling for unavailable models."""
	load_dotenv()
	groq_api_key = os.getenv("Groq_API_Key")
	if not groq_api_key:
	st.error("GROQ_API_KEY environment variable is missing.")
	return None

	try:
	return ChatGroq(
	model="llama3-8b-8192",
	temperature=0.7,
	api_key=groq_api_key,
	verbose=False
	)
	except Exception as e:
	st.error(f"Error initializing LLM: {str(e)}")
	return None

	def create_prompt():
	"""Creates a structured prompt template for document-based Q&A."""
	return PromptTemplate(
	input_variables=["document", "question"],
	template=(
	"You are an AI assistant that provides precise answers based on the given document. "
	"Use only the information available in the document to respond.\n\n"
	"Document:\n{document}\n\n"
	"Question: {question}\n"
	"Answer:"
	)
	)

	def generate_answer(chain, document_text, user_input):
	"""Generates an answer from the LLM while handling API errors."""
	try:
	response = chain.invoke({"document": document_text, "question": user_input})
	answer = response.content
	return str(answer)
	except Exception as e:
	error_message = str(e).lower()
	if "rate_limit_exceeded" in error_message:
	return "⚠️ Error: Rate limit exceeded. Try again later."
	elif "context_length_exceeded" in error_message:
	return "⚠️ Error: Input too long. Please shorten your document or question."
	elif "model_not_found" in error_message or "model_decommissioned" in error_message:
	return "⚠️ Error: Selected model is unavailable. Please try a different one."
	return f"⚠️ Error generating answer: {str(e)}"

	def main():
	"""Streamlit UI"""
	st.set_page_config(page_title="Ask My PDF", layout="wide")

	st.title("📄 Ask My PDF")

	with st.sidebar:
	st.header("🔍 Upload PDF")
	uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])

	if uploaded_file:
	st.success("✅ File uploaded successfully!")

	user_input = st.text_area("💬 Enter your question:", placeholder="Ask something about the document...")

	if st.button("Get Answer", use_container_width=True):
	if not uploaded_file:
	st.warning("⚠️ Please upload a PDF document.")
	elif not user_input.strip():
	st.warning("⚠️ Please enter a question.")
	else:
	document_text = process_pdf(uploaded_file)
	if isinstance(document_text, str) and document_text.startswith("Error"):
	st.error(document_text)
	else:
	llm = initialize_llm()
	if llm:
	prompt = create_prompt()
	chain = prompt \| llm
	answer = generate_answer(chain, document_text, user_input)
	st.subheader("📌 Answer:")
	st.markdown(f"> {answer}")

	if __name__ == "__main__":
	main() # ✅ Ensures Streamlit runs in the right context