Spaces:

TanU21
/

PDF.RAG

Sleeping

File size: 4,697 Bytes

6103a94

import streamlit as st
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
import os
import tempfile
from langchain_groq import ChatGroq
from dotenv import load_dotenv

# Max document length to avoid exceeding token limits
MAX_DOC_LENGTH = 4000 

def process_pdf(uploaded_file):
    try:
        if not uploaded_file:
            return "Error: No file uploaded."

        # ✅ Save the uploaded file to a temporary location
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
            temp_file.write(uploaded_file.read())
            temp_path = temp_file.name  # Get the actual file path

        # ✅ Now we can load it using PDFPlumberLoader
        loader = PDFPlumberLoader(temp_path)
        result = loader.load()

        # ✅ Split the document into chunks
        splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20)
        split_docs = splitter.split_documents(result)

        # ✅ Extract text from the split documents
        document_text = "\n".join([doc.page_content for doc in split_docs])
        document_text = document_text[:MAX_DOC_LENGTH]

        # ✅ Clean up temporary file (optional, but recommended)
        os.remove(temp_path)

        return document_text  
    except Exception as e:
        return f"Error processing PDF: {str(e)}"

def initialize_llm():
    """Initializes the LLM with error handling for unavailable models."""
    load_dotenv()
    groq_api_key = os.getenv("Groq_API_Key")
    if not groq_api_key:
        st.error("GROQ_API_KEY environment variable is missing.")
        return None

    try:
        return ChatGroq(
            model="llama3-8b-8192",  
            temperature=0.7,
            api_key=groq_api_key,
            verbose=False
        )
    except Exception as e:
        st.error(f"Error initializing LLM: {str(e)}")
        return None

def create_prompt():
    """Creates a structured prompt template for document-based Q&A."""
    return PromptTemplate(
        input_variables=["document", "question"],
        template=(
            "You are an AI assistant that provides precise answers based on the given document. "
            "Use only the information available in the document to respond.\n\n"
            "Document:\n{document}\n\n"
            "Question: {question}\n"
            "Answer:"
        )
    )

def generate_answer(chain, document_text, user_input):
    """Generates an answer from the LLM while handling API errors."""
    try:
        response = chain.invoke({"document": document_text, "question": user_input})
        answer = response.content
        return str(answer)
    except Exception as e:
        error_message = str(e).lower()
        if "rate_limit_exceeded" in error_message:
            return "⚠️ Error: Rate limit exceeded. Try again later."
        elif "context_length_exceeded" in error_message:
            return "⚠️ Error: Input too long. Please shorten your document or question."
        elif "model_not_found" in error_message or "model_decommissioned" in error_message:
            return "⚠️ Error: Selected model is unavailable. Please try a different one."
        return f"⚠️ Error generating answer: {str(e)}"

def main():
    """Streamlit UI"""
    st.set_page_config(page_title="Ask My PDF", layout="wide")

    st.title("📄 Ask My PDF")

    with st.sidebar:
        st.header("🔍 Upload PDF")
        uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])

        if uploaded_file:
            st.success("✅ File uploaded successfully!")

    user_input = st.text_area("💬 Enter your question:", placeholder="Ask something about the document...")

    if st.button("Get Answer", use_container_width=True):
        if not uploaded_file:
            st.warning("⚠️ Please upload a PDF document.")
        elif not user_input.strip():
            st.warning("⚠️ Please enter a question.")
        else:
            document_text = process_pdf(uploaded_file)
            if isinstance(document_text, str) and document_text.startswith("Error"):
                st.error(document_text)
            else:
                llm = initialize_llm()
                if llm:
                    prompt = create_prompt()
                    chain = prompt | llm
                    answer = generate_answer(chain, document_text, user_input)
                    st.subheader("📌 Answer:")
                    st.markdown(f"> {answer}")

if __name__ == "__main__":
    main()  # ✅ Ensures Streamlit runs in the right context