import gradio as gr import fitz # PyMuPDF from transformers import pipeline # Load models qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad") summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small") # Or use 'facebook/bart-large-cnn' # Extract text from uploaded PDF def extract_text(pdf_file): text = "" with fitz.open(pdf_file.name) as doc: for page in doc: text += page.get_text() return text # QA pipeline function def qa_from_pdf_upload(pdf_file, question): if not pdf_file: return "❌ Please upload a PDF.", "", "", "" context = extract_text(pdf_file) if not context.strip(): return "❌ Could not extract text from the PDF.", "", "", "" result = qa_pipeline(question=question, context=context) return result["answer"], round(result["score"] * 100, 2), result["start"], result["end"] # Summarization function def summarize_pdf(pdf_file): if not pdf_file: return "❌ Please upload a PDF." text = extract_text(pdf_file) if not text.strip(): return "❌ Could not extract text from the PDF." # Truncate text if too long for model max_input_length = 1024 text = text[:max_input_length] summary = summarizer(text, max_length=150, min_length=40, do_sample=False)[0]["summary_text"] return summary # Gradio UI with Tabs with gr.Blocks(title="📘 Morningstar PDF Analyzer") as demo: gr.Markdown("## 📘 Morningstar Fund PDF Analyzer\nUpload a PDF fund report and either ask questions or get a summary.") with gr.Tabs(): with gr.TabItem("🔍 Q&A from PDF"): pdf_input_qa = gr.File(label="📥 Upload Fund PDF") question_input = gr.Textbox(label="❓ Your Question", placeholder="e.g., Who is the fund manager?") answer_output = gr.Textbox(label="✅ Answer") score_output = gr.Textbox(label="📊 Confidence Score (%)") start_output = gr.Textbox(label="Start Index") end_output = gr.Textbox(label="End Index") qa_button = gr.Button("🧠 Get Answer") qa_button.click( fn=qa_from_pdf_upload, inputs=[pdf_input_qa, question_input], outputs=[answer_output, score_output, start_output, end_output] ) with gr.TabItem("📝 Summary"): pdf_input_sum = gr.File(label="📥 Upload Fund PDF") summary_output = gr.Textbox(label="📝 Summary", lines=10) sum_button = gr.Button("📄 Generate Summary") sum_button.click(fn=summarize_pdf, inputs=[pdf_input_sum], outputs=[summary_output]) # Launch the interface demo.launch()