File size: 4,697 Bytes
6103a94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import streamlit as st
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
import os
import tempfile
from langchain_groq import ChatGroq
from dotenv import load_dotenv

# Max document length to avoid exceeding token limits
MAX_DOC_LENGTH = 4000 

def process_pdf(uploaded_file):
    try:
        if not uploaded_file:
            return "Error: No file uploaded."

        # βœ… Save the uploaded file to a temporary location
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
            temp_file.write(uploaded_file.read())
            temp_path = temp_file.name  # Get the actual file path

        # βœ… Now we can load it using PDFPlumberLoader
        loader = PDFPlumberLoader(temp_path)
        result = loader.load()

        # βœ… Split the document into chunks
        splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20)
        split_docs = splitter.split_documents(result)

        # βœ… Extract text from the split documents
        document_text = "\n".join([doc.page_content for doc in split_docs])
        document_text = document_text[:MAX_DOC_LENGTH]

        # βœ… Clean up temporary file (optional, but recommended)
        os.remove(temp_path)

        return document_text  
    except Exception as e:
        return f"Error processing PDF: {str(e)}"

def initialize_llm():
    """Initializes the LLM with error handling for unavailable models."""
    load_dotenv()
    groq_api_key = os.getenv("Groq_API_Key")
    if not groq_api_key:
        st.error("GROQ_API_KEY environment variable is missing.")
        return None

    try:
        return ChatGroq(
            model="llama3-8b-8192",  
            temperature=0.7,
            api_key=groq_api_key,
            verbose=False
        )
    except Exception as e:
        st.error(f"Error initializing LLM: {str(e)}")
        return None

def create_prompt():
    """Creates a structured prompt template for document-based Q&A."""
    return PromptTemplate(
        input_variables=["document", "question"],
        template=(
            "You are an AI assistant that provides precise answers based on the given document. "
            "Use only the information available in the document to respond.\n\n"
            "Document:\n{document}\n\n"
            "Question: {question}\n"
            "Answer:"
        )
    )

def generate_answer(chain, document_text, user_input):
    """Generates an answer from the LLM while handling API errors."""
    try:
        response = chain.invoke({"document": document_text, "question": user_input})
        answer = response.content
        return str(answer)
    except Exception as e:
        error_message = str(e).lower()
        if "rate_limit_exceeded" in error_message:
            return "⚠️ Error: Rate limit exceeded. Try again later."
        elif "context_length_exceeded" in error_message:
            return "⚠️ Error: Input too long. Please shorten your document or question."
        elif "model_not_found" in error_message or "model_decommissioned" in error_message:
            return "⚠️ Error: Selected model is unavailable. Please try a different one."
        return f"⚠️ Error generating answer: {str(e)}"

def main():
    """Streamlit UI"""
    st.set_page_config(page_title="Ask My PDF", layout="wide")

    st.title("πŸ“„ Ask My PDF")

    with st.sidebar:
        st.header("πŸ” Upload PDF")
        uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])

        if uploaded_file:
            st.success("βœ… File uploaded successfully!")

    user_input = st.text_area("πŸ’¬ Enter your question:", placeholder="Ask something about the document...")

    if st.button("Get Answer", use_container_width=True):
        if not uploaded_file:
            st.warning("⚠️ Please upload a PDF document.")
        elif not user_input.strip():
            st.warning("⚠️ Please enter a question.")
        else:
            document_text = process_pdf(uploaded_file)
            if isinstance(document_text, str) and document_text.startswith("Error"):
                st.error(document_text)
            else:
                llm = initialize_llm()
                if llm:
                    prompt = create_prompt()
                    chain = prompt | llm
                    answer = generate_answer(chain, document_text, user_input)
                    st.subheader("πŸ“Œ Answer:")
                    st.markdown(f"> {answer}")

if __name__ == "__main__":
    main()  # βœ… Ensures Streamlit runs in the right context