import gradio as gr import fitz # PyMuPDF from transformers import pipeline # Load BERT QA pipeline qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad") # Extract text from uploaded PDF def extract_text(pdf_file): text = "" with fitz.open(pdf_file.name) as doc: # ← changed line for page in doc: text += page.get_text() return text # Main function for Gradio def qa_from_pdf_upload(pdf_file, question): if not pdf_file: return "❌ Please upload a PDF.", "", "", "" context = extract_text(pdf_file) if not context.strip(): return "❌ Could not extract text from the PDF.", "", "", "" result = qa_pipeline(question=question, context=context) return result["answer"], round(result["score"] * 100, 2), result["start"], result["end"] # Gradio UI gr.Interface( fn=qa_from_pdf_upload, inputs=[ gr.File(label="📥 Upload Fund PDF"), gr.Textbox(label="❓ Your Question", placeholder="e.g., Who is the fund manager?") ], outputs=[ gr.Textbox(label="✅ Answer"), gr.Textbox(label="📊 Confidence Score (%)"), gr.Textbox(label="Start Index"), gr.Textbox(label="End Index") ], title="📘 Morningstar Fund QA from PDF", description="Upload a fund report in PDF format and ask questions using BERT." ).launch()