dealflow-ai / ui /app.py
PeterBot22's picture
fix: switch to Docker SDK for full build control
b81fb8e verified
"""
DealFlow AI β€” Gradio Web UI
Multi-agent investment due diligence system.
PDF pitch deck upload β†’ 3-agent analysis β†’ investment memo output.
"""
from __future__ import annotations
import os
import sys
import tempfile
import threading
from pathlib import Path
import gradio as gr
from loguru import logger
# Ensure src is importable
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.crew import DealFlowCrew
# ─── Theme & CSS ─────────────────────────────────────────────────────────────
CUSTOM_CSS = """
.header-block {
background: linear-gradient(135deg, #1e3a5f 0%, #0f2027 100%);
padding: 24px;
border-radius: 12px;
margin-bottom: 16px;
}
.status-running { color: #f59e0b; font-weight: bold; }
.status-done { color: #10b981; font-weight: bold; }
.status-error { color: #ef4444; font-weight: bold; }
.agent-card {
border-left: 4px solid #2563eb;
padding-left: 12px;
margin: 8px 0;
}
"""
HEADER_MD = """
# DealFlow AI
### Multi-Agent Investment Due Diligence System
**Powered by:** AMD MI300X + Qwen3-VL-32B + CrewAI v1.0
Upload a startup pitch deck PDF and DealFlow AI will deploy a 3-agent crew to:
1. **Researcher** β€” extract claims, validate market data, map competitors
2. **Financial Analyst** β€” validate projections, model unit economics, generate charts
3. **Report Writer** β€” synthesize a professional investment memo
"""
AGENT_STATUS_TEMPLATE = """\
| Agent | Status | Role |
|-------|--------|------|
| Researcher | {r_status} | Web research + PDF extraction |
| Financial Analyst | {fa_status} | Projection validation + charts |
| Report Writer | {rw_status} | Investment memo synthesis |
"""
def format_agent_status(stage: int) -> str:
statuses = ["Waiting", "Waiting", "Waiting"]
icons = ["⏳ Running", "βœ… Done", "⏭ Queued"]
if stage == 0:
statuses = ["⏳ Running", "⏭ Queued", "⏭ Queued"]
elif stage == 1:
statuses = ["βœ… Done", "⏳ Running", "⏭ Queued"]
elif stage == 2:
statuses = ["βœ… Done", "βœ… Done", "⏳ Running"]
elif stage == 3:
statuses = ["βœ… Done", "βœ… Done", "βœ… Done"]
return AGENT_STATUS_TEMPLATE.format(
r_status=statuses[0],
fa_status=statuses[1],
rw_status=statuses[2],
)
_MAX_PDF_BYTES = 50 * 1024 * 1024 # 50 MB
_PDF_MAGIC = b"%PDF"
_ALLOWED_VLLM_HOSTS = frozenset(["localhost", "127.0.0.1"])
def _validate_vllm_url(url: str) -> bool:
"""Only allow vLLM URLs pointing to localhost to prevent SSRF."""
try:
from urllib.parse import urlparse
parsed = urlparse(url)
return parsed.hostname in _ALLOWED_VLLM_HOSTS
except Exception:
return False
def run_analysis(
pdf_file,
company_name: str,
llm_backend: str,
serper_key: str,
vllm_url: str,
vllm_model: str,
hf_token: str,
progress=gr.Progress(track_tqdm=True),
):
"""Main analysis function called by Gradio."""
# Validate inputs
if pdf_file is None:
yield (
"**Error:** Please upload a PDF pitch deck.",
format_agent_status(-1),
gr.update(visible=False),
gr.update(visible=False),
[],
)
return
# Sanitize company name (strip control chars, limit length)
company_name = "".join(
c for c in (company_name or "").strip() if c >= " "
)[:100] or "Unknown Company"
# Server-side PDF validation (size + magic bytes)
try:
pdf_size = os.path.getsize(pdf_file)
if pdf_size > _MAX_PDF_BYTES:
yield (
f"**Error:** PDF too large ({pdf_size // 1024 // 1024} MB). Maximum is 50 MB.",
format_agent_status(-1),
gr.update(visible=False),
gr.update(visible=False),
[],
)
return
with open(pdf_file, "rb") as fh:
magic = fh.read(4)
if magic != _PDF_MAGIC:
yield (
"**Error:** Uploaded file does not appear to be a valid PDF.",
format_agent_status(-1),
gr.update(visible=False),
gr.update(visible=False),
[],
)
return
except Exception as exc:
logger.error(f"PDF validation error: {exc}")
yield (
"**Error:** Could not validate the uploaded file.",
format_agent_status(-1),
gr.update(visible=False),
gr.update(visible=False),
[],
)
return
# Build per-request config (never mutate os.environ)
effective_vllm_url = vllm_url.strip() or os.getenv("VLLM_BASE_URL", "http://localhost:8000/v1")
if not _validate_vllm_url(effective_vllm_url):
yield (
"**Error:** vLLM URL must point to localhost only.",
format_agent_status(-1),
gr.update(visible=False),
gr.update(visible=False),
[],
)
return
from src.config import AppConfig, Backend
config = AppConfig(
llm_backend=Backend(llm_backend),
vllm_base_url=effective_vllm_url,
vllm_model=vllm_model.strip() or os.getenv("VLLM_MODEL", "Qwen/Qwen3-VL-32B-Instruct-FP8"),
vllm_api_key=os.getenv("VLLM_API_KEY", ""),
hf_api_token=hf_token.strip() or os.getenv("HF_API_TOKEN"),
serper_api_key=serper_key.strip() or os.getenv("SERPER_API_KEY"),
)
# Set up output dir
out_dir = Path(tempfile.mkdtemp(prefix="dealflow_"))
log_lines: list[str] = []
stage = [0] # mutable container for closure
def progress_callback(msg: str) -> None:
log_lines.append(msg)
# Detect stage transitions
if "AGENT 1/3" in msg or "Researcher" in msg.upper():
stage[0] = 0
elif "AGENT 2/3" in msg or "Financial" in msg.upper():
stage[0] = 1
elif "AGENT 3/3" in msg or "Report" in msg.upper():
stage[0] = 2
# Initial status
yield (
"**Status:** Initializing DealFlow AI crew...",
format_agent_status(-1),
gr.update(visible=False),
gr.update(visible=False),
[],
)
try:
crew = DealFlowCrew(config=config, progress_callback=progress_callback)
yield (
"**Status:** Running due diligence pipeline...\n\n"
+ "\n".join(f"- {ln}" for ln in log_lines[-5:]),
format_agent_status(0),
gr.update(visible=False),
gr.update(visible=False),
[],
)
# Run the crew (blocking)
result = crew.run(
pdf_path=pdf_file,
company_name=company_name,
output_dir=str(out_dir),
)
memo = result.get("memo_content", "")
charts = result.get("charts", [])
elapsed = result.get("elapsed_seconds", 0)
backend = result.get("backend", "unknown")
status_msg = (
f"**Status:** Analysis complete in {elapsed}s \n"
f"**Backend:** {backend} \n"
f"**Charts generated:** {len(charts)}"
)
yield (
status_msg,
format_agent_status(3),
gr.update(value=memo, visible=True),
gr.update(
value=result.get("memo_path"),
visible=bool(result.get("memo_path")),
),
charts,
)
except Exception as exc:
logger.error(f"Analysis failed: {exc}", exc_info=True)
yield (
"**Error:** Analysis failed. Please check your configuration and try again.",
format_agent_status(-1),
gr.update(visible=False),
gr.update(visible=False),
[],
)
def build_ui() -> gr.Blocks:
with gr.Blocks(
theme=gr.themes.Base(
primary_hue="blue",
secondary_hue="slate",
),
css=CUSTOM_CSS,
title="DealFlow AI",
) as demo:
gr.Markdown(HEADER_MD)
with gr.Row():
# ─── Left: Inputs ─────────────────────────────────────────
with gr.Column(scale=1):
gr.Markdown("### Upload & Configure")
pdf_input = gr.File(
label="Pitch Deck PDF",
file_types=[".pdf"],
type="filepath",
)
company_input = gr.Textbox(
label="Company Name",
placeholder="e.g. Acme AI",
max_lines=1,
)
with gr.Accordion("Inference Settings", open=False):
backend_select = gr.Radio(
choices=["vllm", "hf", "openai"],
value="vllm",
label="Backend",
info="vllm = AMD MI300X (recommended), hf = HuggingFace fallback",
)
vllm_url_input = gr.Textbox(
label="vLLM Base URL",
value="http://localhost:8000/v1",
placeholder="http://localhost:8000/v1",
max_lines=1,
)
vllm_model_input = gr.Textbox(
label="vLLM Model",
value="Qwen/Qwen3-VL-32B-Instruct-FP8",
max_lines=1,
)
hf_token_input = gr.Textbox(
label="HuggingFace Token (HF fallback)",
placeholder="hf_...",
type="password",
max_lines=1,
)
with gr.Accordion("API Keys", open=False):
serper_key_input = gr.Textbox(
label="Serper API Key",
placeholder="Your Serper.dev key",
type="password",
max_lines=1,
)
run_btn = gr.Button(
"Run Due Diligence",
variant="primary",
size="lg",
)
# ─── Right: Outputs ───────────────────────────────────────
with gr.Column(scale=2):
gr.Markdown("### Analysis Progress")
status_output = gr.Markdown("**Status:** Waiting for input...")
agent_status_output = gr.Markdown(format_agent_status(-1))
with gr.Tab("Investment Memo"):
memo_output = gr.Markdown(
value="*Investment memo will appear here after analysis...*",
visible=True,
)
with gr.Tab("Charts"):
charts_output = gr.Gallery(
label="Generated Charts",
show_label=False,
columns=2,
height="auto",
)
memo_download = gr.File(
label="Download Memo (Markdown)",
visible=False,
)
# ─── Wire up ──────────────────────────────────────────────────
run_btn.click(
fn=run_analysis,
inputs=[
pdf_input,
company_input,
backend_select,
serper_key_input,
vllm_url_input,
vllm_model_input,
hf_token_input,
],
outputs=[
status_output,
agent_status_output,
memo_output,
memo_download,
charts_output,
],
)
gr.Markdown(
"---\n"
"*DealFlow AI β€” AMD Developer Hackathon 2026 | "
"CrewAI v1.0 + Qwen3-VL-32B + AMD MI300X*"
)
return demo
if __name__ == "__main__":
demo = build_ui()
demo.launch(
server_name="0.0.0.0",
server_port=int(os.getenv("GRADIO_PORT", "7860")),
share=False,
show_error=False, # Don't expose error details in UI
)