File size: 4,697 Bytes
6103a94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import streamlit as st
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
import os
import tempfile
from langchain_groq import ChatGroq
from dotenv import load_dotenv
# Max document length to avoid exceeding token limits
MAX_DOC_LENGTH = 4000
def process_pdf(uploaded_file):
try:
if not uploaded_file:
return "Error: No file uploaded."
# β
Save the uploaded file to a temporary location
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(uploaded_file.read())
temp_path = temp_file.name # Get the actual file path
# β
Now we can load it using PDFPlumberLoader
loader = PDFPlumberLoader(temp_path)
result = loader.load()
# β
Split the document into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20)
split_docs = splitter.split_documents(result)
# β
Extract text from the split documents
document_text = "\n".join([doc.page_content for doc in split_docs])
document_text = document_text[:MAX_DOC_LENGTH]
# β
Clean up temporary file (optional, but recommended)
os.remove(temp_path)
return document_text
except Exception as e:
return f"Error processing PDF: {str(e)}"
def initialize_llm():
"""Initializes the LLM with error handling for unavailable models."""
load_dotenv()
groq_api_key = os.getenv("Groq_API_Key")
if not groq_api_key:
st.error("GROQ_API_KEY environment variable is missing.")
return None
try:
return ChatGroq(
model="llama3-8b-8192",
temperature=0.7,
api_key=groq_api_key,
verbose=False
)
except Exception as e:
st.error(f"Error initializing LLM: {str(e)}")
return None
def create_prompt():
"""Creates a structured prompt template for document-based Q&A."""
return PromptTemplate(
input_variables=["document", "question"],
template=(
"You are an AI assistant that provides precise answers based on the given document. "
"Use only the information available in the document to respond.\n\n"
"Document:\n{document}\n\n"
"Question: {question}\n"
"Answer:"
)
)
def generate_answer(chain, document_text, user_input):
"""Generates an answer from the LLM while handling API errors."""
try:
response = chain.invoke({"document": document_text, "question": user_input})
answer = response.content
return str(answer)
except Exception as e:
error_message = str(e).lower()
if "rate_limit_exceeded" in error_message:
return "β οΈ Error: Rate limit exceeded. Try again later."
elif "context_length_exceeded" in error_message:
return "β οΈ Error: Input too long. Please shorten your document or question."
elif "model_not_found" in error_message or "model_decommissioned" in error_message:
return "β οΈ Error: Selected model is unavailable. Please try a different one."
return f"β οΈ Error generating answer: {str(e)}"
def main():
"""Streamlit UI"""
st.set_page_config(page_title="Ask My PDF", layout="wide")
st.title("π Ask My PDF")
with st.sidebar:
st.header("π Upload PDF")
uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
if uploaded_file:
st.success("β
File uploaded successfully!")
user_input = st.text_area("π¬ Enter your question:", placeholder="Ask something about the document...")
if st.button("Get Answer", use_container_width=True):
if not uploaded_file:
st.warning("β οΈ Please upload a PDF document.")
elif not user_input.strip():
st.warning("β οΈ Please enter a question.")
else:
document_text = process_pdf(uploaded_file)
if isinstance(document_text, str) and document_text.startswith("Error"):
st.error(document_text)
else:
llm = initialize_llm()
if llm:
prompt = create_prompt()
chain = prompt | llm
answer = generate_answer(chain, document_text, user_input)
st.subheader("π Answer:")
st.markdown(f"> {answer}")
if __name__ == "__main__":
main() # β
Ensures Streamlit runs in the right context
|