TanU21 commited on
Commit
6103a94
Β·
verified Β·
1 Parent(s): 296f294

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -0
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.document_loaders import PDFPlumberLoader
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain_core.prompts import PromptTemplate
5
+ import os
6
+ import tempfile
7
+ from langchain_groq import ChatGroq
8
+ from dotenv import load_dotenv
9
+
10
+ # Max document length to avoid exceeding token limits
11
+ MAX_DOC_LENGTH = 4000
12
+
13
+ def process_pdf(uploaded_file):
14
+ try:
15
+ if not uploaded_file:
16
+ return "Error: No file uploaded."
17
+
18
+ # βœ… Save the uploaded file to a temporary location
19
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
20
+ temp_file.write(uploaded_file.read())
21
+ temp_path = temp_file.name # Get the actual file path
22
+
23
+ # βœ… Now we can load it using PDFPlumberLoader
24
+ loader = PDFPlumberLoader(temp_path)
25
+ result = loader.load()
26
+
27
+ # βœ… Split the document into chunks
28
+ splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20)
29
+ split_docs = splitter.split_documents(result)
30
+
31
+ # βœ… Extract text from the split documents
32
+ document_text = "\n".join([doc.page_content for doc in split_docs])
33
+ document_text = document_text[:MAX_DOC_LENGTH]
34
+
35
+ # βœ… Clean up temporary file (optional, but recommended)
36
+ os.remove(temp_path)
37
+
38
+ return document_text
39
+ except Exception as e:
40
+ return f"Error processing PDF: {str(e)}"
41
+
42
+ def initialize_llm():
43
+ """Initializes the LLM with error handling for unavailable models."""
44
+ load_dotenv()
45
+ groq_api_key = os.getenv("Groq_API_Key")
46
+ if not groq_api_key:
47
+ st.error("GROQ_API_KEY environment variable is missing.")
48
+ return None
49
+
50
+ try:
51
+ return ChatGroq(
52
+ model="llama3-8b-8192",
53
+ temperature=0.7,
54
+ api_key=groq_api_key,
55
+ verbose=False
56
+ )
57
+ except Exception as e:
58
+ st.error(f"Error initializing LLM: {str(e)}")
59
+ return None
60
+
61
+ def create_prompt():
62
+ """Creates a structured prompt template for document-based Q&A."""
63
+ return PromptTemplate(
64
+ input_variables=["document", "question"],
65
+ template=(
66
+ "You are an AI assistant that provides precise answers based on the given document. "
67
+ "Use only the information available in the document to respond.\n\n"
68
+ "Document:\n{document}\n\n"
69
+ "Question: {question}\n"
70
+ "Answer:"
71
+ )
72
+ )
73
+
74
+ def generate_answer(chain, document_text, user_input):
75
+ """Generates an answer from the LLM while handling API errors."""
76
+ try:
77
+ response = chain.invoke({"document": document_text, "question": user_input})
78
+ answer = response.content
79
+ return str(answer)
80
+ except Exception as e:
81
+ error_message = str(e).lower()
82
+ if "rate_limit_exceeded" in error_message:
83
+ return "⚠️ Error: Rate limit exceeded. Try again later."
84
+ elif "context_length_exceeded" in error_message:
85
+ return "⚠️ Error: Input too long. Please shorten your document or question."
86
+ elif "model_not_found" in error_message or "model_decommissioned" in error_message:
87
+ return "⚠️ Error: Selected model is unavailable. Please try a different one."
88
+ return f"⚠️ Error generating answer: {str(e)}"
89
+
90
+ def main():
91
+ """Streamlit UI"""
92
+ st.set_page_config(page_title="Ask My PDF", layout="wide")
93
+
94
+ st.title("πŸ“„ Ask My PDF")
95
+
96
+ with st.sidebar:
97
+ st.header("πŸ” Upload PDF")
98
+ uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
99
+
100
+ if uploaded_file:
101
+ st.success("βœ… File uploaded successfully!")
102
+
103
+ user_input = st.text_area("πŸ’¬ Enter your question:", placeholder="Ask something about the document...")
104
+
105
+ if st.button("Get Answer", use_container_width=True):
106
+ if not uploaded_file:
107
+ st.warning("⚠️ Please upload a PDF document.")
108
+ elif not user_input.strip():
109
+ st.warning("⚠️ Please enter a question.")
110
+ else:
111
+ document_text = process_pdf(uploaded_file)
112
+ if isinstance(document_text, str) and document_text.startswith("Error"):
113
+ st.error(document_text)
114
+ else:
115
+ llm = initialize_llm()
116
+ if llm:
117
+ prompt = create_prompt()
118
+ chain = prompt | llm
119
+ answer = generate_answer(chain, document_text, user_input)
120
+ st.subheader("πŸ“Œ Answer:")
121
+ st.markdown(f"> {answer}")
122
+
123
+ if __name__ == "__main__":
124
+ main() # βœ… Ensures Streamlit runs in the right context