|
|
import gradio as gr |
|
|
import fitz |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad") |
|
|
summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small") |
|
|
|
|
|
|
|
|
def extract_text(pdf_file): |
|
|
text = "" |
|
|
with fitz.open(pdf_file.name) as doc: |
|
|
for page in doc: |
|
|
text += page.get_text() |
|
|
return text |
|
|
|
|
|
|
|
|
def qa_from_pdf_upload(pdf_file, question): |
|
|
if not pdf_file: |
|
|
return "β Please upload a PDF.", "", "", "" |
|
|
|
|
|
context = extract_text(pdf_file) |
|
|
if not context.strip(): |
|
|
return "β Could not extract text from the PDF.", "", "", "" |
|
|
|
|
|
result = qa_pipeline(question=question, context=context) |
|
|
return result["answer"], round(result["score"] * 100, 2), result["start"], result["end"] |
|
|
|
|
|
|
|
|
def summarize_pdf(pdf_file): |
|
|
if not pdf_file: |
|
|
return "β Please upload a PDF." |
|
|
|
|
|
text = extract_text(pdf_file) |
|
|
if not text.strip(): |
|
|
return "β Could not extract text from the PDF." |
|
|
|
|
|
|
|
|
max_input_length = 1024 |
|
|
text = text[:max_input_length] |
|
|
|
|
|
summary = summarizer(text, max_length=150, min_length=40, do_sample=False)[0]["summary_text"] |
|
|
return summary |
|
|
|
|
|
|
|
|
with gr.Blocks(title="π Morningstar PDF Analyzer") as demo: |
|
|
gr.Markdown("## π Morningstar Fund PDF Analyzer\nUpload a PDF fund report and either ask questions or get a summary.") |
|
|
|
|
|
with gr.Tabs(): |
|
|
with gr.TabItem("π Q&A from PDF"): |
|
|
pdf_input_qa = gr.File(label="π₯ Upload Fund PDF") |
|
|
question_input = gr.Textbox(label="β Your Question", placeholder="e.g., Who is the fund manager?") |
|
|
answer_output = gr.Textbox(label="β
Answer") |
|
|
score_output = gr.Textbox(label="π Confidence Score (%)") |
|
|
start_output = gr.Textbox(label="Start Index") |
|
|
end_output = gr.Textbox(label="End Index") |
|
|
qa_button = gr.Button("π§ Get Answer") |
|
|
qa_button.click( |
|
|
fn=qa_from_pdf_upload, |
|
|
inputs=[pdf_input_qa, question_input], |
|
|
outputs=[answer_output, score_output, start_output, end_output] |
|
|
) |
|
|
|
|
|
with gr.TabItem("π Summary"): |
|
|
pdf_input_sum = gr.File(label="π₯ Upload Fund PDF") |
|
|
summary_output = gr.Textbox(label="π Summary", lines=10) |
|
|
sum_button = gr.Button("π Generate Summary") |
|
|
sum_button.click(fn=summarize_pdf, inputs=[pdf_input_sum], outputs=[summary_output]) |
|
|
|
|
|
|
|
|
demo.launch() |
|
|
|