|
|
import streamlit as st |
|
|
from langchain_community.document_loaders import PDFPlumberLoader |
|
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
from langchain_core.prompts import PromptTemplate |
|
|
import os |
|
|
import tempfile |
|
|
from langchain_groq import ChatGroq |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
MAX_DOC_LENGTH = 4000 |
|
|
|
|
|
def process_pdf(uploaded_file): |
|
|
try: |
|
|
if not uploaded_file: |
|
|
return "Error: No file uploaded." |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: |
|
|
temp_file.write(uploaded_file.read()) |
|
|
temp_path = temp_file.name |
|
|
|
|
|
|
|
|
loader = PDFPlumberLoader(temp_path) |
|
|
result = loader.load() |
|
|
|
|
|
|
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20) |
|
|
split_docs = splitter.split_documents(result) |
|
|
|
|
|
|
|
|
document_text = "\n".join([doc.page_content for doc in split_docs]) |
|
|
document_text = document_text[:MAX_DOC_LENGTH] |
|
|
|
|
|
|
|
|
os.remove(temp_path) |
|
|
|
|
|
return document_text |
|
|
except Exception as e: |
|
|
return f"Error processing PDF: {str(e)}" |
|
|
|
|
|
def initialize_llm(): |
|
|
"""Initializes the LLM with error handling for unavailable models.""" |
|
|
load_dotenv() |
|
|
groq_api_key = os.getenv("Groq_API_Key") |
|
|
if not groq_api_key: |
|
|
st.error("GROQ_API_KEY environment variable is missing.") |
|
|
return None |
|
|
|
|
|
try: |
|
|
return ChatGroq( |
|
|
model="llama3-8b-8192", |
|
|
temperature=0.7, |
|
|
api_key=groq_api_key, |
|
|
verbose=False |
|
|
) |
|
|
except Exception as e: |
|
|
st.error(f"Error initializing LLM: {str(e)}") |
|
|
return None |
|
|
|
|
|
def create_prompt(): |
|
|
"""Creates a structured prompt template for document-based Q&A.""" |
|
|
return PromptTemplate( |
|
|
input_variables=["document", "question"], |
|
|
template=( |
|
|
"You are an AI assistant that provides precise answers based on the given document. " |
|
|
"Use only the information available in the document to respond.\n\n" |
|
|
"Document:\n{document}\n\n" |
|
|
"Question: {question}\n" |
|
|
"Answer:" |
|
|
) |
|
|
) |
|
|
|
|
|
def generate_answer(chain, document_text, user_input): |
|
|
"""Generates an answer from the LLM while handling API errors.""" |
|
|
try: |
|
|
response = chain.invoke({"document": document_text, "question": user_input}) |
|
|
answer = response.content |
|
|
return str(answer) |
|
|
except Exception as e: |
|
|
error_message = str(e).lower() |
|
|
if "rate_limit_exceeded" in error_message: |
|
|
return "β οΈ Error: Rate limit exceeded. Try again later." |
|
|
elif "context_length_exceeded" in error_message: |
|
|
return "β οΈ Error: Input too long. Please shorten your document or question." |
|
|
elif "model_not_found" in error_message or "model_decommissioned" in error_message: |
|
|
return "β οΈ Error: Selected model is unavailable. Please try a different one." |
|
|
return f"β οΈ Error generating answer: {str(e)}" |
|
|
|
|
|
def main(): |
|
|
"""Streamlit UI""" |
|
|
st.set_page_config(page_title="Ask My PDF", layout="wide") |
|
|
|
|
|
st.title("π Ask My PDF") |
|
|
|
|
|
with st.sidebar: |
|
|
st.header("π Upload PDF") |
|
|
uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"]) |
|
|
|
|
|
if uploaded_file: |
|
|
st.success("β
File uploaded successfully!") |
|
|
|
|
|
user_input = st.text_area("π¬ Enter your question:", placeholder="Ask something about the document...") |
|
|
|
|
|
if st.button("Get Answer", use_container_width=True): |
|
|
if not uploaded_file: |
|
|
st.warning("β οΈ Please upload a PDF document.") |
|
|
elif not user_input.strip(): |
|
|
st.warning("β οΈ Please enter a question.") |
|
|
else: |
|
|
document_text = process_pdf(uploaded_file) |
|
|
if isinstance(document_text, str) and document_text.startswith("Error"): |
|
|
st.error(document_text) |
|
|
else: |
|
|
llm = initialize_llm() |
|
|
if llm: |
|
|
prompt = create_prompt() |
|
|
chain = prompt | llm |
|
|
answer = generate_answer(chain, document_text, user_input) |
|
|
st.subheader("π Answer:") |
|
|
st.markdown(f"> {answer}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|