Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import fitz # PyMuPDF | |
| from transformers import pipeline | |
| # Load BERT QA pipeline | |
| qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad") | |
| # Extract text from uploaded PDF | |
| def extract_text(pdf_file): | |
| text = "" | |
| with fitz.open(pdf_file.name) as doc: # β changed line | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| # Main function for Gradio | |
| def qa_from_pdf_upload(pdf_file, question): | |
| if not pdf_file: | |
| return "β Please upload a PDF.", "", "", "" | |
| context = extract_text(pdf_file) | |
| if not context.strip(): | |
| return "β Could not extract text from the PDF.", "", "", "" | |
| result = qa_pipeline(question=question, context=context) | |
| return result["answer"], round(result["score"] * 100, 2), result["start"], result["end"] | |
| # Gradio UI | |
| gr.Interface( | |
| fn=qa_from_pdf_upload, | |
| inputs=[ | |
| gr.File(label="π₯ Upload Fund PDF"), | |
| gr.Textbox(label="β Your Question", placeholder="e.g., Who is the fund manager?") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="β Answer"), | |
| gr.Textbox(label="π Confidence Score (%)"), | |
| gr.Textbox(label="Start Index"), | |
| gr.Textbox(label="End Index") | |
| ], | |
| title="π Morningstar Fund QA from PDF", | |
| description="Upload a fund report in PDF format and ask questions using BERT." | |
| ).launch() | |