Spaces:

PeterBot22
/

dealflow-ai

Running

App Files Files Community

dealflow-ai / ui /app.py

PeterBot22

fix: switch to Docker SDK for full build control

b81fb8e verified 26 days ago

raw

history blame contribute delete

12.6 kB

	"""
	DealFlow AI — Gradio Web UI
	Multi-agent investment due diligence system.
	PDF pitch deck upload → 3-agent analysis → investment memo output.
	"""
	from __future__ import annotations

	import os
	import sys
	import tempfile
	import threading
	from pathlib import Path

	import gradio as gr
	from loguru import logger

	# Ensure src is importable
	sys.path.insert(0, str(Path(__file__).parent.parent))

	from src.crew import DealFlowCrew


	# ─── Theme & CSS ─────────────────────────────────────────────────────────────

	CUSTOM_CSS = """
	.header-block {
	background: linear-gradient(135deg, #1e3a5f 0%, #0f2027 100%);
	padding: 24px;
	border-radius: 12px;
	margin-bottom: 16px;
	}
	.status-running { color: #f59e0b; font-weight: bold; }
	.status-done { color: #10b981; font-weight: bold; }
	.status-error { color: #ef4444; font-weight: bold; }
	.agent-card {
	border-left: 4px solid #2563eb;
	padding-left: 12px;
	margin: 8px 0;
	}
	"""

	HEADER_MD = """
	# DealFlow AI
	### Multi-Agent Investment Due Diligence System

	Powered by: AMD MI300X + Qwen3-VL-32B + CrewAI v1.0

	Upload a startup pitch deck PDF and DealFlow AI will deploy a 3-agent crew to:
	1. Researcher — extract claims, validate market data, map competitors
	2. Financial Analyst — validate projections, model unit economics, generate charts
	3. Report Writer — synthesize a professional investment memo
	"""

	AGENT_STATUS_TEMPLATE = """\
	\| Agent \| Status \| Role \|
	\|-------\|--------\|------\|
	\| Researcher \| {r_status} \| Web research + PDF extraction \|
	\| Financial Analyst \| {fa_status} \| Projection validation + charts \|
	\| Report Writer \| {rw_status} \| Investment memo synthesis \|
	"""


	def format_agent_status(stage: int) -> str:
	statuses = ["Waiting", "Waiting", "Waiting"]
	icons = ["⏳ Running", "✅ Done", "⏭ Queued"]

	if stage == 0:
	statuses = ["⏳ Running", "⏭ Queued", "⏭ Queued"]
	elif stage == 1:
	statuses = ["✅ Done", "⏳ Running", "⏭ Queued"]
	elif stage == 2:
	statuses = ["✅ Done", "✅ Done", "⏳ Running"]
	elif stage == 3:
	statuses = ["✅ Done", "✅ Done", "✅ Done"]

	return AGENT_STATUS_TEMPLATE.format(
	r_status=statuses[0],
	fa_status=statuses[1],
	rw_status=statuses[2],
	)


	_MAX_PDF_BYTES = 50 * 1024 * 1024 # 50 MB
	_PDF_MAGIC = b"%PDF"

	_ALLOWED_VLLM_HOSTS = frozenset(["localhost", "127.0.0.1"])


	def _validate_vllm_url(url: str) -> bool:
	"""Only allow vLLM URLs pointing to localhost to prevent SSRF."""
	try:
	from urllib.parse import urlparse
	parsed = urlparse(url)
	return parsed.hostname in _ALLOWED_VLLM_HOSTS
	except Exception:
	return False


	def run_analysis(
	pdf_file,
	company_name: str,
	llm_backend: str,
	serper_key: str,
	vllm_url: str,
	vllm_model: str,
	hf_token: str,
	progress=gr.Progress(track_tqdm=True),
	):
	"""Main analysis function called by Gradio."""

	# Validate inputs
	if pdf_file is None:
	yield (
	"Error: Please upload a PDF pitch deck.",
	format_agent_status(-1),
	gr.update(visible=False),
	gr.update(visible=False),
	[],
	)
	return

	# Sanitize company name (strip control chars, limit length)
	company_name = "".join(
	c for c in (company_name or "").strip() if c >= " "
	)[:100] or "Unknown Company"

	# Server-side PDF validation (size + magic bytes)
	try:
	pdf_size = os.path.getsize(pdf_file)
	if pdf_size > _MAX_PDF_BYTES:
	yield (
	f"Error: PDF too large ({pdf_size // 1024 // 1024} MB). Maximum is 50 MB.",
	format_agent_status(-1),
	gr.update(visible=False),
	gr.update(visible=False),
	[],
	)
	return
	with open(pdf_file, "rb") as fh:
	magic = fh.read(4)
	if magic != _PDF_MAGIC:
	yield (
	"Error: Uploaded file does not appear to be a valid PDF.",
	format_agent_status(-1),
	gr.update(visible=False),
	gr.update(visible=False),
	[],
	)
	return
	except Exception as exc:
	logger.error(f"PDF validation error: {exc}")
	yield (
	"Error: Could not validate the uploaded file.",
	format_agent_status(-1),
	gr.update(visible=False),
	gr.update(visible=False),
	[],
	)
	return

	# Build per-request config (never mutate os.environ)
	effective_vllm_url = vllm_url.strip() or os.getenv("VLLM_BASE_URL", "http://localhost:8000/v1")
	if not _validate_vllm_url(effective_vllm_url):
	yield (
	"Error: vLLM URL must point to localhost only.",
	format_agent_status(-1),
	gr.update(visible=False),
	gr.update(visible=False),
	[],
	)
	return

	from src.config import AppConfig, Backend
	config = AppConfig(
	llm_backend=Backend(llm_backend),
	vllm_base_url=effective_vllm_url,
	vllm_model=vllm_model.strip() or os.getenv("VLLM_MODEL", "Qwen/Qwen3-VL-32B-Instruct-FP8"),
	vllm_api_key=os.getenv("VLLM_API_KEY", ""),
	hf_api_token=hf_token.strip() or os.getenv("HF_API_TOKEN"),
	serper_api_key=serper_key.strip() or os.getenv("SERPER_API_KEY"),
	)

	# Set up output dir
	out_dir = Path(tempfile.mkdtemp(prefix="dealflow_"))

	log_lines: list[str] = []
	stage = [0] # mutable container for closure

	def progress_callback(msg: str) -> None:
	log_lines.append(msg)
	# Detect stage transitions
	if "AGENT 1/3" in msg or "Researcher" in msg.upper():
	stage[0] = 0
	elif "AGENT 2/3" in msg or "Financial" in msg.upper():
	stage[0] = 1
	elif "AGENT 3/3" in msg or "Report" in msg.upper():
	stage[0] = 2

	# Initial status
	yield (
	"Status: Initializing DealFlow AI crew...",
	format_agent_status(-1),
	gr.update(visible=False),
	gr.update(visible=False),
	[],
	)

	try:
	crew = DealFlowCrew(config=config, progress_callback=progress_callback)

	yield (
	"Status: Running due diligence pipeline...\n\n"
	+ "\n".join(f"- {ln}" for ln in log_lines[-5:]),
	format_agent_status(0),
	gr.update(visible=False),
	gr.update(visible=False),
	[],
	)

	# Run the crew (blocking)
	result = crew.run(
	pdf_path=pdf_file,
	company_name=company_name,
	output_dir=str(out_dir),
	)

	memo = result.get("memo_content", "")
	charts = result.get("charts", [])
	elapsed = result.get("elapsed_seconds", 0)
	backend = result.get("backend", "unknown")

	status_msg = (
	f"Status: Analysis complete in {elapsed}s \n"
	f"Backend: {backend} \n"
	f"Charts generated: {len(charts)}"
	)

	yield (
	status_msg,
	format_agent_status(3),
	gr.update(value=memo, visible=True),
	gr.update(
	value=result.get("memo_path"),
	visible=bool(result.get("memo_path")),
	),
	charts,
	)

	except Exception as exc:
	logger.error(f"Analysis failed: {exc}", exc_info=True)
	yield (
	"Error: Analysis failed. Please check your configuration and try again.",
	format_agent_status(-1),
	gr.update(visible=False),
	gr.update(visible=False),
	[],
	)


	def build_ui() -> gr.Blocks:
	with gr.Blocks(
	theme=gr.themes.Base(
	primary_hue="blue",
	secondary_hue="slate",
	),
	css=CUSTOM_CSS,
	title="DealFlow AI",
	) as demo:

	gr.Markdown(HEADER_MD)

	with gr.Row():
	# ─── Left: Inputs ─────────────────────────────────────────
	with gr.Column(scale=1):
	gr.Markdown("### Upload & Configure")

	pdf_input = gr.File(
	label="Pitch Deck PDF",
	file_types=[".pdf"],
	type="filepath",
	)
	company_input = gr.Textbox(
	label="Company Name",
	placeholder="e.g. Acme AI",
	max_lines=1,
	)

	with gr.Accordion("Inference Settings", open=False):
	backend_select = gr.Radio(
	choices=["vllm", "hf", "openai"],
	value="vllm",
	label="Backend",
	info="vllm = AMD MI300X (recommended), hf = HuggingFace fallback",
	)
	vllm_url_input = gr.Textbox(
	label="vLLM Base URL",
	value="http://localhost:8000/v1",
	placeholder="http://localhost:8000/v1",
	max_lines=1,
	)
	vllm_model_input = gr.Textbox(
	label="vLLM Model",
	value="Qwen/Qwen3-VL-32B-Instruct-FP8",
	max_lines=1,
	)
	hf_token_input = gr.Textbox(
	label="HuggingFace Token (HF fallback)",
	placeholder="hf_...",
	type="password",
	max_lines=1,
	)

	with gr.Accordion("API Keys", open=False):
	serper_key_input = gr.Textbox(
	label="Serper API Key",
	placeholder="Your Serper.dev key",
	type="password",
	max_lines=1,
	)

	run_btn = gr.Button(
	"Run Due Diligence",
	variant="primary",
	size="lg",
	)

	# ─── Right: Outputs ───────────────────────────────────────
	with gr.Column(scale=2):
	gr.Markdown("### Analysis Progress")

	status_output = gr.Markdown("Status: Waiting for input...")
	agent_status_output = gr.Markdown(format_agent_status(-1))

	with gr.Tab("Investment Memo"):
	memo_output = gr.Markdown(
	value="Investment memo will appear here after analysis...",
	visible=True,
	)

	with gr.Tab("Charts"):
	charts_output = gr.Gallery(
	label="Generated Charts",
	show_label=False,
	columns=2,
	height="auto",
	)

	memo_download = gr.File(
	label="Download Memo (Markdown)",
	visible=False,
	)

	# ─── Wire up ──────────────────────────────────────────────────
	run_btn.click(
	fn=run_analysis,
	inputs=[
	pdf_input,
	company_input,
	backend_select,
	serper_key_input,
	vllm_url_input,
	vllm_model_input,
	hf_token_input,
	],
	outputs=[
	status_output,
	agent_status_output,
	memo_output,
	memo_download,
	charts_output,
	],
	)

	gr.Markdown(
	"---\n"
	"*DealFlow AI — AMD Developer Hackathon 2026 \| "
	"CrewAI v1.0 + Qwen3-VL-32B + AMD MI300X*"
	)

	return demo


	if __name__ == "__main__":
	demo = build_ui()
	demo.launch(
	server_name="0.0.0.0",
	server_port=int(os.getenv("GRADIO_PORT", "7860")),
	share=False,
	show_error=False, # Don't expose error details in UI
	)