Spaces:
Sleeping
Sleeping
File size: 1,412 Bytes
e21fbd8 7f5a4c8 e21fbd8 7f5a4c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
import fitz # PyMuPDF
from transformers import pipeline
# Load BERT QA pipeline
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
# Extract text from uploaded PDF
def extract_text(pdf_file):
text = ""
with fitz.open(pdf_file.name) as doc: # β changed line
for page in doc:
text += page.get_text()
return text
# Main function for Gradio
def qa_from_pdf_upload(pdf_file, question):
if not pdf_file:
return "β Please upload a PDF.", "", "", ""
context = extract_text(pdf_file)
if not context.strip():
return "β Could not extract text from the PDF.", "", "", ""
result = qa_pipeline(question=question, context=context)
return result["answer"], round(result["score"] * 100, 2), result["start"], result["end"]
# Gradio UI
gr.Interface(
fn=qa_from_pdf_upload,
inputs=[
gr.File(label="π₯ Upload Fund PDF"),
gr.Textbox(label="β Your Question", placeholder="e.g., Who is the fund manager?")
],
outputs=[
gr.Textbox(label="β
Answer"),
gr.Textbox(label="π Confidence Score (%)"),
gr.Textbox(label="Start Index"),
gr.Textbox(label="End Index")
],
title="π Morningstar Fund QA from PDF",
description="Upload a fund report in PDF format and ask questions using BERT."
).launch()
|