Yatheshr commited on
Commit
4ffec39
Β·
verified Β·
1 Parent(s): 312210c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz # PyMuPDF
3
+ from transformers import pipeline
4
+
5
+ # Load models
6
+ qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
7
+ summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small") # Or use 'facebook/bart-large-cnn'
8
+
9
+ # Extract text from uploaded PDF
10
+ def extract_text(pdf_file):
11
+ text = ""
12
+ with fitz.open(pdf_file.name) as doc:
13
+ for page in doc:
14
+ text += page.get_text()
15
+ return text
16
+
17
+ # QA pipeline function
18
+ def qa_from_pdf_upload(pdf_file, question):
19
+ if not pdf_file:
20
+ return "❌ Please upload a PDF.", "", "", ""
21
+
22
+ context = extract_text(pdf_file)
23
+ if not context.strip():
24
+ return "❌ Could not extract text from the PDF.", "", "", ""
25
+
26
+ result = qa_pipeline(question=question, context=context)
27
+ return result["answer"], round(result["score"] * 100, 2), result["start"], result["end"]
28
+
29
+ # Summarization function
30
+ def summarize_pdf(pdf_file):
31
+ if not pdf_file:
32
+ return "❌ Please upload a PDF."
33
+
34
+ text = extract_text(pdf_file)
35
+ if not text.strip():
36
+ return "❌ Could not extract text from the PDF."
37
+
38
+ # Truncate text if too long for model
39
+ max_input_length = 1024
40
+ text = text[:max_input_length]
41
+
42
+ summary = summarizer(text, max_length=150, min_length=40, do_sample=False)[0]["summary_text"]
43
+ return summary
44
+
45
+ # Gradio UI with Tabs
46
+ with gr.Blocks(title="πŸ“˜ Morningstar PDF Analyzer") as demo:
47
+ gr.Markdown("## πŸ“˜ Morningstar Fund PDF Analyzer\nUpload a PDF fund report and either ask questions or get a summary.")
48
+
49
+ with gr.Tabs():
50
+ with gr.TabItem("πŸ” Q&A from PDF"):
51
+ pdf_input_qa = gr.File(label="πŸ“₯ Upload Fund PDF")
52
+ question_input = gr.Textbox(label="❓ Your Question", placeholder="e.g., Who is the fund manager?")
53
+ answer_output = gr.Textbox(label="βœ… Answer")
54
+ score_output = gr.Textbox(label="πŸ“Š Confidence Score (%)")
55
+ start_output = gr.Textbox(label="Start Index")
56
+ end_output = gr.Textbox(label="End Index")
57
+ qa_button = gr.Button("🧠 Get Answer")
58
+ qa_button.click(
59
+ fn=qa_from_pdf_upload,
60
+ inputs=[pdf_input_qa, question_input],
61
+ outputs=[answer_output, score_output, start_output, end_output]
62
+ )
63
+
64
+ with gr.TabItem("πŸ“ Summary"):
65
+ pdf_input_sum = gr.File(label="πŸ“₯ Upload Fund PDF")
66
+ summary_output = gr.Textbox(label="πŸ“ Summary", lines=10)
67
+ sum_button = gr.Button("πŸ“„ Generate Summary")
68
+ sum_button.click(fn=summarize_pdf, inputs=[pdf_input_sum], outputs=[summary_output])
69
+
70
+ # Launch the interface
71
+ demo.launch()