Spaces:

TanU21
/

PDF.RAG

Running

App Files Files Community

TanU21 commited on Mar 28

Commit

6103a94

verified ·

1 Parent(s): 296f294

Create app.py

Browse files

Files changed (1) hide show

app.py +124 -0

app.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import streamlit as st
+from langchain_community.document_loaders import PDFPlumberLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_core.prompts import PromptTemplate
+import os
+import tempfile
+from langchain_groq import ChatGroq
+from dotenv import load_dotenv
+# Max document length to avoid exceeding token limits
+MAX_DOC_LENGTH = 4000
+def process_pdf(uploaded_file):
+    try:
+        if not uploaded_file:
+            return "Error: No file uploaded."
+        # ✅ Save the uploaded file to a temporary location
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+            temp_file.write(uploaded_file.read())
+            temp_path = temp_file.name  # Get the actual file path
+        # ✅ Now we can load it using PDFPlumberLoader
+        loader = PDFPlumberLoader(temp_path)
+        result = loader.load()
+        # ✅ Split the document into chunks
+        splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20)
+        split_docs = splitter.split_documents(result)
+        # ✅ Extract text from the split documents
+        document_text = "\n".join([doc.page_content for doc in split_docs])
+        document_text = document_text[:MAX_DOC_LENGTH]
+        # ✅ Clean up temporary file (optional, but recommended)
+        os.remove(temp_path)
+        return document_text
+    except Exception as e:
+        return f"Error processing PDF: {str(e)}"
+def initialize_llm():
+    """Initializes the LLM with error handling for unavailable models."""
+    load_dotenv()
+    groq_api_key = os.getenv("Groq_API_Key")
+    if not groq_api_key:
+        st.error("GROQ_API_KEY environment variable is missing.")
+        return None
+    try:
+        return ChatGroq(
+            model="llama3-8b-8192",
+            temperature=0.7,
+            api_key=groq_api_key,
+            verbose=False
+        )
+    except Exception as e:
+        st.error(f"Error initializing LLM: {str(e)}")
+        return None
+def create_prompt():
+    """Creates a structured prompt template for document-based Q&A."""
+    return PromptTemplate(
+        input_variables=["document", "question"],
+        template=(
+            "You are an AI assistant that provides precise answers based on the given document. "
+            "Use only the information available in the document to respond.\n\n"
+            "Document:\n{document}\n\n"
+            "Question: {question}\n"
+            "Answer:"
+        )
+    )
+def generate_answer(chain, document_text, user_input):
+    """Generates an answer from the LLM while handling API errors."""
+    try:
+        response = chain.invoke({"document": document_text, "question": user_input})
+        answer = response.content
+        return str(answer)
+    except Exception as e:
+        error_message = str(e).lower()
+        if "rate_limit_exceeded" in error_message:
+            return "⚠️ Error: Rate limit exceeded. Try again later."
+        elif "context_length_exceeded" in error_message:
+            return "⚠️ Error: Input too long. Please shorten your document or question."
+        elif "model_not_found" in error_message or "model_decommissioned" in error_message:
+            return "⚠️ Error: Selected model is unavailable. Please try a different one."
+        return f"⚠️ Error generating answer: {str(e)}"
+def main():
+    """Streamlit UI"""
+    st.set_page_config(page_title="Ask My PDF", layout="wide")
+    st.title("📄 Ask My PDF")
+    with st.sidebar:
+        st.header("🔍 Upload PDF")
+        uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
+        if uploaded_file:
+            st.success("✅ File uploaded successfully!")
+    user_input = st.text_area("💬 Enter your question:", placeholder="Ask something about the document...")
+    if st.button("Get Answer", use_container_width=True):
+        if not uploaded_file:
+            st.warning("⚠️ Please upload a PDF document.")
+        elif not user_input.strip():
+            st.warning("⚠️ Please enter a question.")
+        else:
+            document_text = process_pdf(uploaded_file)
+            if isinstance(document_text, str) and document_text.startswith("Error"):
+                st.error(document_text)
+            else:
+                llm = initialize_llm()
+                if llm:
+                    prompt = create_prompt()
+                    chain = prompt | llm
+                    answer = generate_answer(chain, document_text, user_input)
+                    st.subheader("📌 Answer:")
+                    st.markdown(f"> {answer}")
+if __name__ == "__main__":
+    main()  # ✅ Ensures Streamlit runs in the right context