Yatheshr's picture
Create app.py
4ffec39 verified
import gradio as gr
import fitz # PyMuPDF
from transformers import pipeline
# Load models
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small") # Or use 'facebook/bart-large-cnn'
# Extract text from uploaded PDF
def extract_text(pdf_file):
text = ""
with fitz.open(pdf_file.name) as doc:
for page in doc:
text += page.get_text()
return text
# QA pipeline function
def qa_from_pdf_upload(pdf_file, question):
if not pdf_file:
return "❌ Please upload a PDF.", "", "", ""
context = extract_text(pdf_file)
if not context.strip():
return "❌ Could not extract text from the PDF.", "", "", ""
result = qa_pipeline(question=question, context=context)
return result["answer"], round(result["score"] * 100, 2), result["start"], result["end"]
# Summarization function
def summarize_pdf(pdf_file):
if not pdf_file:
return "❌ Please upload a PDF."
text = extract_text(pdf_file)
if not text.strip():
return "❌ Could not extract text from the PDF."
# Truncate text if too long for model
max_input_length = 1024
text = text[:max_input_length]
summary = summarizer(text, max_length=150, min_length=40, do_sample=False)[0]["summary_text"]
return summary
# Gradio UI with Tabs
with gr.Blocks(title="πŸ“˜ Morningstar PDF Analyzer") as demo:
gr.Markdown("## πŸ“˜ Morningstar Fund PDF Analyzer\nUpload a PDF fund report and either ask questions or get a summary.")
with gr.Tabs():
with gr.TabItem("πŸ” Q&A from PDF"):
pdf_input_qa = gr.File(label="πŸ“₯ Upload Fund PDF")
question_input = gr.Textbox(label="❓ Your Question", placeholder="e.g., Who is the fund manager?")
answer_output = gr.Textbox(label="βœ… Answer")
score_output = gr.Textbox(label="πŸ“Š Confidence Score (%)")
start_output = gr.Textbox(label="Start Index")
end_output = gr.Textbox(label="End Index")
qa_button = gr.Button("🧠 Get Answer")
qa_button.click(
fn=qa_from_pdf_upload,
inputs=[pdf_input_qa, question_input],
outputs=[answer_output, score_output, start_output, end_output]
)
with gr.TabItem("πŸ“ Summary"):
pdf_input_sum = gr.File(label="πŸ“₯ Upload Fund PDF")
summary_output = gr.Textbox(label="πŸ“ Summary", lines=10)
sum_button = gr.Button("πŸ“„ Generate Summary")
sum_button.click(fn=summarize_pdf, inputs=[pdf_input_sum], outputs=[summary_output])
# Launch the interface
demo.launch()