pdf-qa-system / app.py
aimal-khan's picture
Update app.py
810ead9 verified
import gradio as gr
import pdfplumber
import logging
from transformers import pipeline
# Set up logging to capture the backend process and outputs
logging.basicConfig(level=logging.INFO)
# 1) Function to extract text from the PDF file
def extract_pdf_text(path):
with pdfplumber.open(path) as pdf:
text = ""
for page in pdf.pages:
text += page.extract_text()
logging.info("Extracted PDF Text (first 500 chars): %s", text[:500]) # Log the first 500 characters of the extracted text
return text
# 2) Load Hugging Face's pre-trained summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# 3) Function to process the PDF, receive user question, and generate a summary
def process_pdf_and_qa(pdf_file, question):
# Extract text from the uploaded PDF
pdf_text = extract_pdf_text(pdf_file)
# Log the question being asked
logging.info("Received Question: %s", question)
# Log the context (PDF text) being passed to the model
logging.info("Context (PDF text): %s", pdf_text[:500]) # Only log the first 500 characters
# Generate the summary of the PDF text using the summarization model
summary = summarizer(pdf_text, max_length=200, min_length=50, do_sample=False)
# Extract the summary text
answer = summary[0]['summary_text']
logging.info("Generated Summary: %s", answer)
return answer
# 4) Create Gradio interface to take input and provide output
iface = gr.Interface(
fn=process_pdf_and_qa,
inputs=[
gr.File(label="Upload PDF"), # For uploading the PDF file
gr.Textbox(label="Ask a Question") # For asking the question (though not used directly here, it's part of the interface)
],
outputs="text", # Output is text (the summary)
title="PDF Summarization and Q&A System", # Title of the app
description="Upload a PDF and ask a question to get a summary of the document." # Description
)
# 5) Launch the interface with debugging enabled
iface.launch(debug=True)
print("Smiley")