""" DealFlow AI — Gradio Web UI Multi-agent investment due diligence system. PDF pitch deck upload → 3-agent analysis → investment memo output. """ from __future__ import annotations import os import sys import tempfile import threading from pathlib import Path import gradio as gr from loguru import logger # Ensure src is importable sys.path.insert(0, str(Path(__file__).parent.parent)) from src.crew import DealFlowCrew # ─── Theme & CSS ───────────────────────────────────────────────────────────── CUSTOM_CSS = """ .header-block { background: linear-gradient(135deg, #1e3a5f 0%, #0f2027 100%); padding: 24px; border-radius: 12px; margin-bottom: 16px; } .status-running { color: #f59e0b; font-weight: bold; } .status-done { color: #10b981; font-weight: bold; } .status-error { color: #ef4444; font-weight: bold; } .agent-card { border-left: 4px solid #2563eb; padding-left: 12px; margin: 8px 0; } """ HEADER_MD = """ # DealFlow AI ### Multi-Agent Investment Due Diligence System **Powered by:** AMD MI300X + Qwen3-VL-32B + CrewAI v1.0 Upload a startup pitch deck PDF and DealFlow AI will deploy a 3-agent crew to: 1. **Researcher** — extract claims, validate market data, map competitors 2. **Financial Analyst** — validate projections, model unit economics, generate charts 3. **Report Writer** — synthesize a professional investment memo """ AGENT_STATUS_TEMPLATE = """\ | Agent | Status | Role | |-------|--------|------| | Researcher | {r_status} | Web research + PDF extraction | | Financial Analyst | {fa_status} | Projection validation + charts | | Report Writer | {rw_status} | Investment memo synthesis | """ def format_agent_status(stage: int) -> str: statuses = ["Waiting", "Waiting", "Waiting"] icons = ["⏳ Running", "✅ Done", "⏭ Queued"] if stage == 0: statuses = ["⏳ Running", "⏭ Queued", "⏭ Queued"] elif stage == 1: statuses = ["✅ Done", "⏳ Running", "⏭ Queued"] elif stage == 2: statuses = ["✅ Done", "✅ Done", "⏳ Running"] elif stage == 3: statuses = ["✅ Done", "✅ Done", "✅ Done"] return AGENT_STATUS_TEMPLATE.format( r_status=statuses[0], fa_status=statuses[1], rw_status=statuses[2], ) _MAX_PDF_BYTES = 50 * 1024 * 1024 # 50 MB _PDF_MAGIC = b"%PDF" _ALLOWED_VLLM_HOSTS = frozenset(["localhost", "127.0.0.1"]) def _validate_vllm_url(url: str) -> bool: """Only allow vLLM URLs pointing to localhost to prevent SSRF.""" try: from urllib.parse import urlparse parsed = urlparse(url) return parsed.hostname in _ALLOWED_VLLM_HOSTS except Exception: return False def run_analysis( pdf_file, company_name: str, llm_backend: str, serper_key: str, vllm_url: str, vllm_model: str, hf_token: str, progress=gr.Progress(track_tqdm=True), ): """Main analysis function called by Gradio.""" # Validate inputs if pdf_file is None: yield ( "**Error:** Please upload a PDF pitch deck.", format_agent_status(-1), gr.update(visible=False), gr.update(visible=False), [], ) return # Sanitize company name (strip control chars, limit length) company_name = "".join( c for c in (company_name or "").strip() if c >= " " )[:100] or "Unknown Company" # Server-side PDF validation (size + magic bytes) try: pdf_size = os.path.getsize(pdf_file) if pdf_size > _MAX_PDF_BYTES: yield ( f"**Error:** PDF too large ({pdf_size // 1024 // 1024} MB). Maximum is 50 MB.", format_agent_status(-1), gr.update(visible=False), gr.update(visible=False), [], ) return with open(pdf_file, "rb") as fh: magic = fh.read(4) if magic != _PDF_MAGIC: yield ( "**Error:** Uploaded file does not appear to be a valid PDF.", format_agent_status(-1), gr.update(visible=False), gr.update(visible=False), [], ) return except Exception as exc: logger.error(f"PDF validation error: {exc}") yield ( "**Error:** Could not validate the uploaded file.", format_agent_status(-1), gr.update(visible=False), gr.update(visible=False), [], ) return # Build per-request config (never mutate os.environ) effective_vllm_url = vllm_url.strip() or os.getenv("VLLM_BASE_URL", "http://localhost:8000/v1") if not _validate_vllm_url(effective_vllm_url): yield ( "**Error:** vLLM URL must point to localhost only.", format_agent_status(-1), gr.update(visible=False), gr.update(visible=False), [], ) return from src.config import AppConfig, Backend config = AppConfig( llm_backend=Backend(llm_backend), vllm_base_url=effective_vllm_url, vllm_model=vllm_model.strip() or os.getenv("VLLM_MODEL", "Qwen/Qwen3-VL-32B-Instruct-FP8"), vllm_api_key=os.getenv("VLLM_API_KEY", ""), hf_api_token=hf_token.strip() or os.getenv("HF_API_TOKEN"), serper_api_key=serper_key.strip() or os.getenv("SERPER_API_KEY"), ) # Set up output dir out_dir = Path(tempfile.mkdtemp(prefix="dealflow_")) log_lines: list[str] = [] stage = [0] # mutable container for closure def progress_callback(msg: str) -> None: log_lines.append(msg) # Detect stage transitions if "AGENT 1/3" in msg or "Researcher" in msg.upper(): stage[0] = 0 elif "AGENT 2/3" in msg or "Financial" in msg.upper(): stage[0] = 1 elif "AGENT 3/3" in msg or "Report" in msg.upper(): stage[0] = 2 # Initial status yield ( "**Status:** Initializing DealFlow AI crew...", format_agent_status(-1), gr.update(visible=False), gr.update(visible=False), [], ) try: crew = DealFlowCrew(config=config, progress_callback=progress_callback) yield ( "**Status:** Running due diligence pipeline...\n\n" + "\n".join(f"- {ln}" for ln in log_lines[-5:]), format_agent_status(0), gr.update(visible=False), gr.update(visible=False), [], ) # Run the crew (blocking) result = crew.run( pdf_path=pdf_file, company_name=company_name, output_dir=str(out_dir), ) memo = result.get("memo_content", "") charts = result.get("charts", []) elapsed = result.get("elapsed_seconds", 0) backend = result.get("backend", "unknown") status_msg = ( f"**Status:** Analysis complete in {elapsed}s \n" f"**Backend:** {backend} \n" f"**Charts generated:** {len(charts)}" ) yield ( status_msg, format_agent_status(3), gr.update(value=memo, visible=True), gr.update( value=result.get("memo_path"), visible=bool(result.get("memo_path")), ), charts, ) except Exception as exc: logger.error(f"Analysis failed: {exc}", exc_info=True) yield ( "**Error:** Analysis failed. Please check your configuration and try again.", format_agent_status(-1), gr.update(visible=False), gr.update(visible=False), [], ) def build_ui() -> gr.Blocks: with gr.Blocks( theme=gr.themes.Base( primary_hue="blue", secondary_hue="slate", ), css=CUSTOM_CSS, title="DealFlow AI", ) as demo: gr.Markdown(HEADER_MD) with gr.Row(): # ─── Left: Inputs ───────────────────────────────────────── with gr.Column(scale=1): gr.Markdown("### Upload & Configure") pdf_input = gr.File( label="Pitch Deck PDF", file_types=[".pdf"], type="filepath", ) company_input = gr.Textbox( label="Company Name", placeholder="e.g. Acme AI", max_lines=1, ) with gr.Accordion("Inference Settings", open=False): backend_select = gr.Radio( choices=["vllm", "hf", "openai"], value="vllm", label="Backend", info="vllm = AMD MI300X (recommended), hf = HuggingFace fallback", ) vllm_url_input = gr.Textbox( label="vLLM Base URL", value="http://localhost:8000/v1", placeholder="http://localhost:8000/v1", max_lines=1, ) vllm_model_input = gr.Textbox( label="vLLM Model", value="Qwen/Qwen3-VL-32B-Instruct-FP8", max_lines=1, ) hf_token_input = gr.Textbox( label="HuggingFace Token (HF fallback)", placeholder="hf_...", type="password", max_lines=1, ) with gr.Accordion("API Keys", open=False): serper_key_input = gr.Textbox( label="Serper API Key", placeholder="Your Serper.dev key", type="password", max_lines=1, ) run_btn = gr.Button( "Run Due Diligence", variant="primary", size="lg", ) # ─── Right: Outputs ─────────────────────────────────────── with gr.Column(scale=2): gr.Markdown("### Analysis Progress") status_output = gr.Markdown("**Status:** Waiting for input...") agent_status_output = gr.Markdown(format_agent_status(-1)) with gr.Tab("Investment Memo"): memo_output = gr.Markdown( value="*Investment memo will appear here after analysis...*", visible=True, ) with gr.Tab("Charts"): charts_output = gr.Gallery( label="Generated Charts", show_label=False, columns=2, height="auto", ) memo_download = gr.File( label="Download Memo (Markdown)", visible=False, ) # ─── Wire up ────────────────────────────────────────────────── run_btn.click( fn=run_analysis, inputs=[ pdf_input, company_input, backend_select, serper_key_input, vllm_url_input, vllm_model_input, hf_token_input, ], outputs=[ status_output, agent_status_output, memo_output, memo_download, charts_output, ], ) gr.Markdown( "---\n" "*DealFlow AI — AMD Developer Hackathon 2026 | " "CrewAI v1.0 + Qwen3-VL-32B + AMD MI300X*" ) return demo if __name__ == "__main__": demo = build_ui() demo.launch( server_name="0.0.0.0", server_port=int(os.getenv("GRADIO_PORT", "7860")), share=False, show_error=False, # Don't expose error details in UI )