Ask_My_PDF / app.py
TanU21's picture
Update app.py
11070d5 verified
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
import gradio as gr
import os
from langchain_groq import ChatGroq
from dotenv import load_dotenv
MAX_DOC_LENGTH = 4000
def process_pdf(file):
try:
temp_path = file.name
if not os.path.exists(temp_path):
return "Error: Uploaded file path does not exist."
loader = PDFPlumberLoader(temp_path)
result = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20)
split_docs = splitter.split_documents(result)
# Extract text from the split documents
document_text = "\n".join([doc.page_content for doc in split_docs])
document_text = document_text[:MAX_DOC_LENGTH]
return document_text # Returning the full document text
except Exception as e:
return f"Error processing PDF: {str(e)}"
def initialize_llm():
load_dotenv()
groq_api_key = os.getenv("Groq_API_Key")
if not groq_api_key:
raise ValueError("GROQ_API_KEY environment variable not set.")
return ChatGroq(
model= "llama3-8b-8192",
temperature=0.7,
api_key=groq_api_key,
verbose=False
)
def create_prompt():
return PromptTemplate(
input_variables=["document", "question"],
template=(
"You are an AI assistant that provides precise answers based on the given document. "
"Use only the information available in the document to respond.\n\n"
"Document:\n{document}\n\n"
"Question: {question}\n"
"Answer:"
)
)
def generate_answer(chain, document_text, user_input):
try:
response = chain.invoke({"document": document_text, "question": user_input})
answer = response.content
return str(answer)
except Exception as e:
return f"Error generating answer: {str(e)}"
def handle_file(file, user_input):
if not file:
return "Please upload a PDF document."
document_text = process_pdf(file)
if isinstance(document_text, str) and document_text.startswith("Error"):
return document_text # Return error message if processing failed
if not user_input.strip():
return "Please enter a question."
llm = initialize_llm()
prompt = create_prompt()
chain = prompt | llm
return generate_answer(chain, document_text, user_input)
interface = gr.Interface(
fn=handle_file,
inputs=[
gr.File(label="Upload PDF"),
gr.Textbox(lines=2, placeholder="Enter your question here...")
],
outputs=gr.Textbox(label="Answer"),
title="Ask My PDF",
description="Upload a PDF document and ask questions about its content."
)
interface.launch()