Yatheshr's picture
Update app.py
7f5a4c8 verified
import gradio as gr
import fitz # PyMuPDF
from transformers import pipeline
# Load BERT QA pipeline
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
# Extract text from uploaded PDF
def extract_text(pdf_file):
text = ""
with fitz.open(pdf_file.name) as doc: # ← changed line
for page in doc:
text += page.get_text()
return text
# Main function for Gradio
def qa_from_pdf_upload(pdf_file, question):
if not pdf_file:
return "❌ Please upload a PDF.", "", "", ""
context = extract_text(pdf_file)
if not context.strip():
return "❌ Could not extract text from the PDF.", "", "", ""
result = qa_pipeline(question=question, context=context)
return result["answer"], round(result["score"] * 100, 2), result["start"], result["end"]
# Gradio UI
gr.Interface(
fn=qa_from_pdf_upload,
inputs=[
gr.File(label="πŸ“₯ Upload Fund PDF"),
gr.Textbox(label="❓ Your Question", placeholder="e.g., Who is the fund manager?")
],
outputs=[
gr.Textbox(label="βœ… Answer"),
gr.Textbox(label="πŸ“Š Confidence Score (%)"),
gr.Textbox(label="Start Index"),
gr.Textbox(label="End Index")
],
title="πŸ“˜ Morningstar Fund QA from PDF",
description="Upload a fund report in PDF format and ask questions using BERT."
).launch()