File size: 2,752 Bytes
4ffec39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
import fitz  # PyMuPDF
from transformers import pipeline

# Load models
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")  # Or use 'facebook/bart-large-cnn'

# Extract text from uploaded PDF
def extract_text(pdf_file):
    text = ""
    with fitz.open(pdf_file.name) as doc:
        for page in doc:
            text += page.get_text()
    return text

# QA pipeline function
def qa_from_pdf_upload(pdf_file, question):
    if not pdf_file:
        return "❌ Please upload a PDF.", "", "", ""

    context = extract_text(pdf_file)
    if not context.strip():
        return "❌ Could not extract text from the PDF.", "", "", ""

    result = qa_pipeline(question=question, context=context)
    return result["answer"], round(result["score"] * 100, 2), result["start"], result["end"]

# Summarization function
def summarize_pdf(pdf_file):
    if not pdf_file:
        return "❌ Please upload a PDF."

    text = extract_text(pdf_file)
    if not text.strip():
        return "❌ Could not extract text from the PDF."

    # Truncate text if too long for model
    max_input_length = 1024
    text = text[:max_input_length]

    summary = summarizer(text, max_length=150, min_length=40, do_sample=False)[0]["summary_text"]
    return summary

# Gradio UI with Tabs
with gr.Blocks(title="πŸ“˜ Morningstar PDF Analyzer") as demo:
    gr.Markdown("## πŸ“˜ Morningstar Fund PDF Analyzer\nUpload a PDF fund report and either ask questions or get a summary.")

    with gr.Tabs():
        with gr.TabItem("πŸ” Q&A from PDF"):
            pdf_input_qa = gr.File(label="πŸ“₯ Upload Fund PDF")
            question_input = gr.Textbox(label="❓ Your Question", placeholder="e.g., Who is the fund manager?")
            answer_output = gr.Textbox(label="βœ… Answer")
            score_output = gr.Textbox(label="πŸ“Š Confidence Score (%)")
            start_output = gr.Textbox(label="Start Index")
            end_output = gr.Textbox(label="End Index")
            qa_button = gr.Button("🧠 Get Answer")
            qa_button.click(
                fn=qa_from_pdf_upload,
                inputs=[pdf_input_qa, question_input],
                outputs=[answer_output, score_output, start_output, end_output]
            )

        with gr.TabItem("πŸ“ Summary"):
            pdf_input_sum = gr.File(label="πŸ“₯ Upload Fund PDF")
            summary_output = gr.Textbox(label="πŸ“ Summary", lines=10)
            sum_button = gr.Button("πŸ“„ Generate Summary")
            sum_button.click(fn=summarize_pdf, inputs=[pdf_input_sum], outputs=[summary_output])

# Launch the interface
demo.launch()