Spaces:

Yatheshr
/

PDF_File_QA_using_BERT

Sleeping

File size: 1,412 Bytes

import gradio as gr
import fitz  # PyMuPDF
from transformers import pipeline

# Load BERT QA pipeline
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")

# Extract text from uploaded PDF
def extract_text(pdf_file):
    text = ""
    with fitz.open(pdf_file.name) as doc:  # ← changed line
        for page in doc:
            text += page.get_text()
    return text

# Main function for Gradio
def qa_from_pdf_upload(pdf_file, question):
    if not pdf_file:
        return "❌ Please upload a PDF.", "", "", ""

    context = extract_text(pdf_file)
    if not context.strip():
        return "❌ Could not extract text from the PDF.", "", "", ""

    result = qa_pipeline(question=question, context=context)
    return result["answer"], round(result["score"] * 100, 2), result["start"], result["end"]

# Gradio UI
gr.Interface(
    fn=qa_from_pdf_upload,
    inputs=[
        gr.File(label="📥 Upload Fund PDF"),
        gr.Textbox(label="❓ Your Question", placeholder="e.g., Who is the fund manager?")
    ],
    outputs=[
        gr.Textbox(label="✅ Answer"),
        gr.Textbox(label="📊 Confidence Score (%)"),
        gr.Textbox(label="Start Index"),
        gr.Textbox(label="End Index")
    ],
    title="📘 Morningstar Fund QA from PDF",
    description="Upload a fund report in PDF format and ask questions using BERT."
).launch()