import streamlit as st from langchain_community.document_loaders import PDFPlumberLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_core.prompts import PromptTemplate import os import tempfile from langchain_groq import ChatGroq from dotenv import load_dotenv # Max document length to avoid exceeding token limits MAX_DOC_LENGTH = 4000 def process_pdf(uploaded_file): try: if not uploaded_file: return "Error: No file uploaded." # ✅ Save the uploaded file to a temporary location with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: temp_file.write(uploaded_file.read()) temp_path = temp_file.name # Get the actual file path # ✅ Now we can load it using PDFPlumberLoader loader = PDFPlumberLoader(temp_path) result = loader.load() # ✅ Split the document into chunks splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20) split_docs = splitter.split_documents(result) # ✅ Extract text from the split documents document_text = "\n".join([doc.page_content for doc in split_docs]) document_text = document_text[:MAX_DOC_LENGTH] # ✅ Clean up temporary file (optional, but recommended) os.remove(temp_path) return document_text except Exception as e: return f"Error processing PDF: {str(e)}" def initialize_llm(): """Initializes the LLM with error handling for unavailable models.""" load_dotenv() groq_api_key = os.getenv("Groq_API_Key") if not groq_api_key: st.error("GROQ_API_KEY environment variable is missing.") return None try: return ChatGroq( model="llama3-8b-8192", temperature=0.7, api_key=groq_api_key, verbose=False ) except Exception as e: st.error(f"Error initializing LLM: {str(e)}") return None def create_prompt(): """Creates a structured prompt template for document-based Q&A.""" return PromptTemplate( input_variables=["document", "question"], template=( "You are an AI assistant that provides precise answers based on the given document. " "Use only the information available in the document to respond.\n\n" "Document:\n{document}\n\n" "Question: {question}\n" "Answer:" ) ) def generate_answer(chain, document_text, user_input): """Generates an answer from the LLM while handling API errors.""" try: response = chain.invoke({"document": document_text, "question": user_input}) answer = response.content return str(answer) except Exception as e: error_message = str(e).lower() if "rate_limit_exceeded" in error_message: return "⚠️ Error: Rate limit exceeded. Try again later." elif "context_length_exceeded" in error_message: return "⚠️ Error: Input too long. Please shorten your document or question." elif "model_not_found" in error_message or "model_decommissioned" in error_message: return "⚠️ Error: Selected model is unavailable. Please try a different one." return f"⚠️ Error generating answer: {str(e)}" def main(): """Streamlit UI""" st.set_page_config(page_title="Ask My PDF", layout="wide") st.title("📄 Ask My PDF") with st.sidebar: st.header("🔍 Upload PDF") uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"]) if uploaded_file: st.success("✅ File uploaded successfully!") user_input = st.text_area("💬 Enter your question:", placeholder="Ask something about the document...") if st.button("Get Answer", use_container_width=True): if not uploaded_file: st.warning("⚠️ Please upload a PDF document.") elif not user_input.strip(): st.warning("⚠️ Please enter a question.") else: document_text = process_pdf(uploaded_file) if isinstance(document_text, str) and document_text.startswith("Error"): st.error(document_text) else: llm = initialize_llm() if llm: prompt = create_prompt() chain = prompt | llm answer = generate_answer(chain, document_text, user_input) st.subheader("📌 Answer:") st.markdown(f"> {answer}") if __name__ == "__main__": main() # ✅ Ensures Streamlit runs in the right context