File size: 1,412 Bytes
e21fbd8
 
 
 
 
 
 
 
 
 
7f5a4c8
e21fbd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f5a4c8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
import fitz  # PyMuPDF
from transformers import pipeline

# Load BERT QA pipeline
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")

# Extract text from uploaded PDF
def extract_text(pdf_file):
    text = ""
    with fitz.open(pdf_file.name) as doc:  # ← changed line
        for page in doc:
            text += page.get_text()
    return text

# Main function for Gradio
def qa_from_pdf_upload(pdf_file, question):
    if not pdf_file:
        return "❌ Please upload a PDF.", "", "", ""

    context = extract_text(pdf_file)
    if not context.strip():
        return "❌ Could not extract text from the PDF.", "", "", ""

    result = qa_pipeline(question=question, context=context)
    return result["answer"], round(result["score"] * 100, 2), result["start"], result["end"]

# Gradio UI
gr.Interface(
    fn=qa_from_pdf_upload,
    inputs=[
        gr.File(label="πŸ“₯ Upload Fund PDF"),
        gr.Textbox(label="❓ Your Question", placeholder="e.g., Who is the fund manager?")
    ],
    outputs=[
        gr.Textbox(label="βœ… Answer"),
        gr.Textbox(label="πŸ“Š Confidence Score (%)"),
        gr.Textbox(label="Start Index"),
        gr.Textbox(label="End Index")
    ],
    title="πŸ“˜ Morningstar Fund QA from PDF",
    description="Upload a fund report in PDF format and ask questions using BERT."
).launch()