Spaces:
Sleeping
Sleeping
| # app.py | |
| # ============================================================ | |
| # VentureMatch β Tinder-style Startup Matcher (HF Spaces / Gradio 6.x) | |
| # β Embeddings (.npy) + FAISS (cosine) for fast search | |
| # β Diverse sampling so same query returns different deck | |
| # β Optional LLM (chat_completion) ONLY for insight/summary (never blocks search) | |
| # ============================================================ | |
| import os | |
| import re | |
| import math | |
| import time | |
| import json | |
| import random | |
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| from datasets import load_dataset | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| # Optional LLM via HF Inference (CHAT API) | |
| try: | |
| from huggingface_hub import InferenceClient | |
| HF_OK = True | |
| except Exception: | |
| HF_OK = False | |
| # ------------------------- | |
| # CONFIG | |
| # ------------------------- | |
| DATASET_REPO = "Yoav-omer/startups" | |
| EMB_PATH = "embeddings_minilm.npy" | |
| EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" # must match embeddings dim (384) | |
| CANDIDATES_K = 800 | |
| DECK_SIZE = 10 | |
| # Optional: LLM (only if HF_TOKEN exists). Used for insight, not for retrieval. | |
| LLM_MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct" # chat-friendly on HF Inference | |
| LLM_MAX_TOKENS = 220 | |
| LLM_TEMPERATURE = 0.7 | |
| LLM_TIMEOUT_S = 18 | |
| RNG_SEED = 42 | |
| random.seed(RNG_SEED) | |
| np.random.seed(RNG_SEED) | |
| # ------------------------- | |
| # LOAD DATASET | |
| # ------------------------- | |
| print("π Initializing VentureMatch Engine...") | |
| ds = load_dataset(DATASET_REPO) | |
| split_name = "train" if "train" in ds else list(ds.keys())[0] | |
| df_raw = ds[split_name].to_pandas() | |
| # ------------------------- | |
| # COLUMN NORMALIZATION | |
| # ------------------------- | |
| rename_map = { | |
| "startup_id": "entity_id", | |
| "id": "entity_id", | |
| "burn": "BURN_RATE", | |
| "BURN": "BURN_RATE", | |
| "ARR_usd": "ARR", | |
| "arr": "ARR", | |
| "valuation": "VALUE", | |
| "valuation_usd": "VALUE", | |
| "competitors": "competitors_count", | |
| } | |
| df_raw = df_raw.rename(columns={k: v for k, v in rename_map.items() if k in df_raw.columns}) | |
| required = ["entity_id", "name", "sector", "stage", "business_model", "ask_usd", "pitch"] | |
| missing = [c for c in required if c not in df_raw.columns] | |
| if missing: | |
| raise ValueError(f"Dataset is missing required column(s): {missing}") | |
| optional_defaults = { | |
| "elevator_speech": "", | |
| "keywords": "", | |
| "ARR": np.nan, | |
| "BURN_RATE": np.nan, | |
| "VALUE": np.nan, | |
| "competitors_count": np.nan, | |
| } | |
| for c, d in optional_defaults.items(): | |
| if c not in df_raw.columns: | |
| df_raw[c] = d | |
| for c in ["ask_usd", "ARR", "BURN_RATE", "VALUE", "competitors_count"]: | |
| df_raw[c] = pd.to_numeric(df_raw[c], errors="coerce") | |
| # ------------------------- | |
| # LOAD EMBEDDINGS + FAISS | |
| # ------------------------- | |
| if not os.path.exists(EMB_PATH): | |
| raise FileNotFoundError(f"β Missing {EMB_PATH}. Upload it to your Space repo root.") | |
| emb = np.load(EMB_PATH).astype(np.float32) | |
| if emb.shape[0] != len(df_raw): | |
| raise ValueError( | |
| f"β Embeddings rows ({emb.shape[0]}) != dataset rows ({len(df_raw)}).\n" | |
| "Your .npy must match dataset row order EXACTLY." | |
| ) | |
| # cosine via dot-product on normalized vectors | |
| emb /= (np.linalg.norm(emb, axis=1, keepdims=True) + 1e-12) | |
| index = faiss.IndexFlatIP(emb.shape[1]) | |
| index.add(emb) | |
| # query embed model | |
| embedder = SentenceTransformer(EMBED_MODEL_ID, device="cpu") | |
| print(f"β Loaded: {len(df_raw)} rows | dim={emb.shape[1]} | FAISS={index.ntotal}") | |
| # ------------------------- | |
| # OPTIONAL LLM CLIENT (SAFE) | |
| # ------------------------- | |
| HF_TOKEN = os.getenv("HF_TOKEN", "").strip() | |
| llm_client = None | |
| if HF_OK and HF_TOKEN: | |
| try: | |
| llm_client = InferenceClient(token=HF_TOKEN) | |
| print("β LLM enabled via HF Inference (chat_completion).") | |
| except Exception as e: | |
| llm_client = None | |
| print(f"β οΈ LLM disabled: {e}") | |
| # ------------------------- | |
| # LISTS FOR UI | |
| # ------------------------- | |
| SECTOR_LIST = sorted(df_raw["sector"].dropna().astype(str).unique().tolist()) | |
| STAGE_LIST = sorted(df_raw["stage"].dropna().astype(str).unique().tolist()) | |
| BMODEL_LIST = sorted(df_raw["business_model"].dropna().astype(str).unique().tolist()) | |
| # ------------------------- | |
| # HELPERS | |
| # ------------------------- | |
| STOPWORDS = set(["the", "a", "an", "and", "or", "to", "for", "of", "in", "on", "with", "by", "from", "at", "as", "is", "are"]) | |
| def clean_text(s: str) -> str: | |
| s = "" if pd.isna(s) else str(s) | |
| return re.sub(r"\s+", " ", s).strip() | |
| def format_currency(value): | |
| try: | |
| v = float(value) | |
| if math.isnan(v): return "N/A" | |
| if v >= 1e9: return f"${v/1e9:.2f}B" | |
| if v >= 1e6: return f"${v/1e6:.2f}M" | |
| if v >= 1e3: return f"${v/1e3:.0f}K" | |
| return f"${v:.0f}" | |
| except: | |
| return "N/A" | |
| def clamp01(x: float) -> float: | |
| return max(0.0, min(1.0, x)) | |
| def similarity_to_pct(sim: float) -> int: | |
| pct = (sim - 0.25) / (0.80 - 0.25) | |
| return int(round(100 * clamp01(pct))) | |
| def tokenize_reason(query: str) -> list: | |
| q = re.sub(r"[^a-zA-Z0-9\s\-]", " ", query.lower()) | |
| toks = [t for t in q.split() if t and t not in STOPWORDS and len(t) > 2] | |
| seen, out = set(), [] | |
| for t in toks: | |
| if t not in seen: | |
| out.append(t); seen.add(t) | |
| return out[:8] | |
| def heuristic_insight(row: dict, query: str) -> str: | |
| toks = tokenize_reason(query) | |
| blob = f"{row.get('pitch','')} {row.get('keywords','')} {row.get('elevator_speech','')}".lower() | |
| hits = [t for t in toks if t in blob][:4] | |
| reason = "Matches: " + ", ".join(hits) if hits else "Semantically aligned with your thesis." | |
| return ( | |
| f"{reason} β’ Ask {format_currency(row.get('ask_usd'))}" | |
| f" β’ ARR {format_currency(row.get('ARR'))}" | |
| f" β’ Value {format_currency(row.get('VALUE'))}" | |
| ) | |
| def llm_insight(row: dict, query: str) -> str: | |
| """ | |
| Never blocks the app: | |
| - If LLM is not available or fails -> heuristic fallback. | |
| - Uses chat_completion (conversational task). | |
| """ | |
| if llm_client is None: | |
| return heuristic_insight(row, query) | |
| prompt = f""" | |
| You are a VC analyst. Given a user thesis and a startup profile, write 1 short insight: | |
| - 1 sentence why it's a match (or not) | |
| - Mention 1 key risk or missing detail | |
| Keep it under 35 words. | |
| User thesis: | |
| {query} | |
| Startup: | |
| Name: {row.get('name')} | |
| Sector: {row.get('sector')} | |
| Stage: {row.get('stage')} | |
| Business model: {row.get('business_model')} | |
| Ask: {row.get('ask_usd')} | |
| ARR: {row.get('ARR')} | |
| Burn/mo: {row.get('BURN_RATE')} | |
| Pitch: {row.get('pitch')} | |
| """.strip() | |
| try: | |
| # chat_completion API (supported task: conversational) | |
| resp = llm_client.chat_completion( | |
| model=LLM_MODEL_ID, | |
| messages=[ | |
| {"role": "system", "content": "You are concise, practical, and skeptical."}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| max_tokens=LLM_MAX_TOKENS, | |
| temperature=LLM_TEMPERATURE, | |
| timeout=LLM_TIMEOUT_S, | |
| ) | |
| text = resp.choices[0].message.content.strip() | |
| text = re.sub(r"\s+", " ", text) | |
| return text[:300] if text else heuristic_insight(row, query) | |
| except Exception: | |
| return heuristic_insight(row, query) | |
| def make_cover_svg(name: str, sector: str, stage: str) -> str: | |
| name = clean_text(name)[:26] | |
| sector = clean_text(sector)[:18] | |
| stage = clean_text(stage)[:14] | |
| return f""" | |
| <svg width="900" height="290" viewBox="0 0 900 290" xmlns="http://www.w3.org/2000/svg"> | |
| <defs> | |
| <linearGradient id="g" x1="0" y1="0" x2="1" y2="1"> | |
| <stop offset="0%" stop-color="#FD297B"/> | |
| <stop offset="50%" stop-color="#FF5864"/> | |
| <stop offset="100%" stop-color="#4CC9F0"/> | |
| </linearGradient> | |
| <filter id="shadow" x="-10%" y="-10%" width="120%" height="120%"> | |
| <feDropShadow dx="0" dy="12" stdDeviation="14" flood-color="#000000" flood-opacity="0.22"/> | |
| </filter> | |
| </defs> | |
| <rect x="0" y="0" width="900" height="290" rx="34" fill="url(#g)"/> | |
| <g filter="url(#shadow)"> | |
| <rect x="46" y="56" width="560" height="178" rx="26" fill="rgba(255,255,255,0.18)"/> | |
| </g> | |
| <text x="86" y="142" font-family="Inter, system-ui" font-size="44" font-weight="900" fill="#ffffff">{name}</text> | |
| <text x="88" y="188" font-family="Inter, system-ui" font-size="22" font-weight="700" fill="rgba(255,255,255,0.92)">{sector} β’ {stage}</text> | |
| </svg> | |
| """.strip() | |
| def card_html(row: dict, sim: float, query: str, insight_text: str, stamp: str = "") -> str: | |
| pct = similarity_to_pct(sim) | |
| cover = make_cover_svg(row.get("name",""), row.get("sector",""), row.get("stage","")) | |
| comp = row.get("competitors_count") | |
| comp_txt = "N/A" if pd.isna(comp) else str(int(comp)) | |
| stamp_html = "" | |
| if stamp == "LIKE": | |
| stamp_html = """<div class="stamp like">INVEST</div>""" | |
| elif stamp == "NOPE": | |
| stamp_html = """<div class="stamp nope">PASS</div>""" | |
| return f""" | |
| <div class="vm-wrap"> | |
| <div class="vm-card"> | |
| {stamp_html} | |
| <div class="vm-top"> | |
| <span class="pill">{pct}% MATCH</span> | |
| <span class="id">#{row.get("entity_id","")}</span> | |
| </div> | |
| <div class="vm-cover">{cover}</div> | |
| <div class="vm-body"> | |
| <div class="vm-title"> | |
| <div class="name">{row.get("name","")}</div> | |
| <div class="meta">{row.get("sector","")} β’ {row.get("stage","")} β’ <span class="bmodel">{row.get("business_model","")}</span></div> | |
| </div> | |
| <div class="vm-quote">β{clean_text(row.get("pitch",""))}β</div> | |
| <div class="vm-grid"> | |
| <div class="vm-stat"><div class="k">Ask</div><div class="v">{format_currency(row.get("ask_usd"))}</div></div> | |
| <div class="vm-stat"><div class="k">ARR</div><div class="v">{format_currency(row.get("ARR"))}</div></div> | |
| <div class="vm-stat"><div class="k">Burn/Mo</div><div class="v">{format_currency(row.get("BURN_RATE"))}</div></div> | |
| <div class="vm-stat"><div class="k">Value</div><div class="v">{format_currency(row.get("VALUE"))}</div></div> | |
| <div class="vm-stat"><div class="k">Competitors</div><div class="v">{comp_txt}</div></div> | |
| </div> | |
| <div class="vm-insight"><b>β¨ AI Insight:</b> {clean_text(insight_text)}</div> | |
| </div> | |
| </div> | |
| </div> | |
| """.strip() | |
| def semantic_search(query: str): | |
| qv = embedder.encode([query], normalize_embeddings=True).astype(np.float32) | |
| scores, idxs = index.search(qv, CANDIDATES_K) | |
| return scores[0], idxs[0] | |
| def apply_filters(df: pd.DataFrame, sectors, stages, bmodels, ask_min, ask_max): | |
| out = df.copy() | |
| if sectors: | |
| out = out[out["sector"].isin(sectors)] | |
| if stages: | |
| out = out[out["stage"].isin(stages)] | |
| if bmodels: | |
| out = out[out["business_model"].isin(bmodels)] | |
| # keep rows with NaN too (so it doesn't kill results) | |
| out = out[(out["ask_usd"].isna()) | ((out["ask_usd"] >= ask_min) & (out["ask_usd"] <= ask_max))] | |
| return out | |
| def diverse_sample(df: pd.DataFrame, n: int, diversity: float) -> pd.DataFrame: | |
| """ | |
| diversity in [0..1] | |
| 0 -> deterministic top-n | |
| 1 -> strong randomness from top pool | |
| """ | |
| df = df.sort_values("similarity", ascending=False).copy() | |
| if len(df) <= n: | |
| return df | |
| if diversity <= 0.05: | |
| return df.head(n) | |
| pool = df.head(min(140, len(df))).copy() | |
| sims = pool["similarity"].to_numpy() | |
| # temperature controls randomness | |
| temp = 0.06 + 0.55 * float(diversity) | |
| w = np.exp((sims - sims.max()) / max(1e-6, temp)) | |
| w = w / (w.sum() + 1e-12) | |
| # time-based seed to change every search | |
| rng = np.random.default_rng(int(time.time() * 1000) % (2**32 - 1)) | |
| chosen = rng.choice(len(pool), size=n, replace=False, p=w) | |
| sampled = pool.iloc[chosen].copy() | |
| sampled = sampled.sort_values("similarity", ascending=False) | |
| return sampled | |
| def portfolio_to_table(portfolio): | |
| rows = [] | |
| for p in (portfolio or []): | |
| rows.append([ | |
| p.get("entity_id",""), | |
| p.get("name",""), | |
| p.get("sector",""), | |
| p.get("stage",""), | |
| p.get("business_model",""), | |
| format_currency(p.get("ask_usd")), | |
| float(p.get("similarity", 0.0)), | |
| ]) | |
| return rows | |
| # ------------------------- | |
| # MAIN SEARCH | |
| # ------------------------- | |
| def start_search(user_query, sectors, stages, bmodels, ask_min, ask_max, diversity, portfolio_state): | |
| q = clean_text(user_query) | |
| if len(q) < 6: | |
| return ( | |
| gr.update(visible=True), gr.update(visible=False), | |
| "", [], 0, portfolio_state, | |
| "<div class='vm-error'>Write a longer thesis (β₯ 6 chars).</div>", | |
| "" | |
| ) | |
| # Semantic retrieval | |
| scores, idxs = semantic_search(q) | |
| cand = df_raw.iloc[idxs].copy() | |
| cand["similarity"] = scores | |
| # Filters | |
| cand = apply_filters(cand, sectors, stages, bmodels, float(ask_min), float(ask_max)) | |
| if cand.empty: | |
| return ( | |
| gr.update(visible=True), gr.update(visible=False), | |
| "", [], 0, portfolio_state, | |
| "<div class='vm-error'>No matches. Try broader filters.</div>", | |
| "" | |
| ) | |
| deck_df = diverse_sample(cand, DECK_SIZE, diversity=float(diversity)) | |
| deck = deck_df.to_dict("records") | |
| first = deck[0] | |
| insight = llm_insight(first, q) | |
| html = card_html(first, float(first["similarity"]), q, insight) | |
| thesis_info = f"**Search mode:** Embeddings + FAISS β’ **Diversity:** {float(diversity):.2f}" | |
| if llm_client is not None: | |
| thesis_info += " β’ **AI Insight:** LLM enabled" | |
| else: | |
| thesis_info += " β’ **AI Insight:** heuristic" | |
| return ( | |
| gr.update(visible=False), gr.update(visible=True), | |
| html, deck, 0, portfolio_state, | |
| "", # status | |
| thesis_info | |
| ) | |
| def swipe_action(deck, pos, action, query, portfolio): | |
| if not deck: | |
| return "<div class='vm-error'>No deck loaded.</div>", pos, gr.update(visible=True), portfolio | |
| pos = int(pos or 0) | |
| if pos >= len(deck): | |
| return "<div class='vm-end'>π End of deck. Start a new search.</div>", pos, gr.update(visible=False), portfolio | |
| current = deck[pos] | |
| if action == "INVEST": | |
| portfolio = (portfolio or []) | |
| portfolio.append(dict(current)) | |
| stamp = "LIKE" if action == "INVEST" else "NOPE" | |
| new_pos = pos + 1 | |
| if new_pos >= len(deck): | |
| end_html = "<div class='vm-end'>π You reached the end. Check your portfolio below.</div>" | |
| return end_html, new_pos, gr.update(visible=False), portfolio | |
| nxt = deck[new_pos] | |
| insight = llm_insight(nxt, query) | |
| html = card_html(nxt, float(nxt["similarity"]), query, insight, stamp=stamp) | |
| return html, new_pos, gr.update(visible=True), portfolio | |
| def remove_selected(portfolio, txt): | |
| portfolio = portfolio or [] | |
| txt = "" if txt is None else str(txt) | |
| parts = [p.strip() for p in txt.split(",") if p.strip()] | |
| idxs = set() | |
| for p in parts: | |
| if p.isdigit(): | |
| idxs.add(int(p)) | |
| new_port = [p for i, p in enumerate(portfolio) if i not in idxs] | |
| return new_port, portfolio_to_table(new_port) | |
| def clear_portfolio(): | |
| return [], [] | |
| # ------------------------- | |
| # CSS (Tinder-like) | |
| # ------------------------- | |
| CSS = """ | |
| :root{ | |
| --pink:#FD297B; | |
| --red:#FF5864; | |
| --cyan:#4CC9F0; | |
| --bg1:#0b0b10; | |
| --card: rgba(255,255,255,0.92); | |
| --shadow: 0 30px 70px rgba(0,0,0,0.25); | |
| } | |
| body{ | |
| background: radial-gradient(1200px 700px at 20% 20%, rgba(253,41,123,0.20), transparent 60%), | |
| radial-gradient(900px 600px at 80% 30%, rgba(76,201,240,0.18), transparent 55%), | |
| linear-gradient(180deg, #0b0b10 0%, #0f111a 70%, #0b0b10 100%) !important; | |
| } | |
| .vm-hero{ | |
| padding: 18px 14px 8px 14px; | |
| border-radius: 18px; | |
| background: rgba(255,255,255,0.04); | |
| border: 1px solid rgba(255,255,255,0.08); | |
| } | |
| .vm-wrap { display:flex; justify-content:center; padding: 10px 0 16px 0; } | |
| .vm-card { | |
| width: min(580px, 95vw); | |
| border-radius: 30px; | |
| background: var(--card); | |
| box-shadow: var(--shadow); | |
| border: 1px solid rgba(255,255,255,0.12); | |
| overflow: hidden; | |
| position: relative; | |
| backdrop-filter: blur(8px); | |
| } | |
| .vm-top{ | |
| display:flex; justify-content:space-between; align-items:center; | |
| padding: 14px 18px; | |
| background: linear-gradient(90deg, rgba(253,41,123,0.16), rgba(76,201,240,0.14)); | |
| } | |
| .pill{ | |
| font-weight: 900; | |
| font-size: 12px; | |
| letter-spacing: 0.8px; | |
| padding: 7px 12px; | |
| border-radius: 999px; | |
| color: #fff; | |
| background: linear-gradient(45deg, var(--pink), var(--red)); | |
| box-shadow: 0 10px 22px rgba(253,41,123,0.28); | |
| } | |
| .id{ color: rgba(0,0,0,0.55); font-size: 12px; font-weight: 700; } | |
| .vm-cover { background: #fff; padding: 12px 12px 0px 12px; } | |
| .vm-body { padding: 16px 18px 18px 18px; } | |
| .name { font-size: 32px; font-weight: 1000; letter-spacing: -0.7px; color: #0c0c10; } | |
| .meta { margin-top: 4px; font-size: 14px; color: rgba(0,0,0,0.65); font-weight: 800; } | |
| .bmodel { color: var(--red); } | |
| .vm-quote{ | |
| margin-top: 14px; | |
| background: rgba(0,0,0,0.04); | |
| border: 1px solid rgba(0,0,0,0.06); | |
| border-radius: 18px; | |
| padding: 14px 14px; | |
| font-size: 15px; | |
| line-height: 1.55; | |
| color: rgba(0,0,0,0.82); | |
| } | |
| .vm-grid{ | |
| margin-top: 14px; | |
| display:grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 10px; | |
| } | |
| .vm-stat{ | |
| background: rgba(255,255,255,0.78); | |
| border: 1px solid rgba(0,0,0,0.06); | |
| border-radius: 16px; | |
| padding: 10px 12px; | |
| } | |
| .vm-stat .k{ | |
| font-size: 10px; | |
| font-weight: 1000; | |
| letter-spacing: 0.9px; | |
| text-transform: uppercase; | |
| color: rgba(0,0,0,0.48); | |
| } | |
| .vm-stat .v{ | |
| margin-top: 2px; | |
| font-size: 16px; | |
| font-weight: 1000; | |
| color: rgba(0,0,0,0.86); | |
| } | |
| .vm-insight{ | |
| margin-top: 14px; | |
| border-radius: 16px; | |
| padding: 12px 14px; | |
| font-size: 13px; | |
| line-height: 1.5; | |
| background: rgba(255,88,100,0.10); | |
| border: 1px dashed rgba(255,88,100,0.60); | |
| color: rgba(0,0,0,0.78); | |
| } | |
| .vm-error{ | |
| padding: 14px 16px; | |
| border-radius: 16px; | |
| background: rgba(255,88,100,0.16); | |
| border: 1px solid rgba(255,88,100,0.28); | |
| color: rgba(255,255,255,0.92); | |
| font-weight: 800; | |
| text-align:center; | |
| } | |
| .vm-end{ | |
| padding: 22px 16px; | |
| border-radius: 18px; | |
| background: rgba(76,201,240,0.14); | |
| border: 1px solid rgba(76,201,240,0.28); | |
| color: rgba(255,255,255,0.92); | |
| font-weight: 900; | |
| text-align:center; | |
| } | |
| .stamp{ | |
| position:absolute; | |
| top: 102px; | |
| left: 22px; | |
| transform: rotate(-14deg); | |
| font-size: 34px; | |
| font-weight: 1000; | |
| letter-spacing: 1px; | |
| padding: 10px 14px; | |
| border-radius: 14px; | |
| opacity: 0.0; | |
| animation: pop 0.55s ease forwards; | |
| z-index: 10; | |
| } | |
| .stamp.like { border: 6px solid rgba(50,205,50,0.88); color: rgba(50,205,50,0.92); } | |
| .stamp.nope { border: 6px solid rgba(255,59,92,0.88); color: rgba(255,59,92,0.92); } | |
| @keyframes pop{ | |
| 0% { opacity: 0.0; transform: translateY(8px) rotate(-14deg) scale(0.92); } | |
| 60% { opacity: 1.0; transform: translateY(0px) rotate(-14deg) scale(1.05); } | |
| 100% { opacity: 0.0; transform: translateY(-2px) rotate(-14deg) scale(1.02); } | |
| } | |
| """ | |
| # ------------------------- | |
| # UI | |
| # ------------------------- | |
| with gr.Blocks() as demo: | |
| deck_state = gr.State([]) | |
| pos_state = gr.State(0) | |
| portfolio_state = gr.State([]) | |
| last_query_state = gr.State("") | |
| with gr.Column(elem_id="onboarding") as onboarding_view: | |
| gr.Markdown( | |
| """ | |
| <div class="vm-hero"> | |
| # π VentureMatch | |
| ### Tinder-style startup search (Embeddings + FAISS) | |
| Write a thesis β filter β get a swipe deck. | |
| Same thesis twice? You'll still get **varied** results. | |
| </div> | |
| """.strip() | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| query_input = gr.Textbox( | |
| label="Investment Thesis", | |
| placeholder="Example: 'Cybersecurity for SMBs, low burn, Seed, B2B SaaS'", | |
| lines=4 | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["Cybersecurity for small businesses, phishing defense, low burn"], | |
| ["ClimateTech for factories: carbon accounting + compliance"], | |
| ["HealthTech remote monitoring for elderly patients, B2B SaaS"], | |
| ], | |
| inputs=query_input, | |
| label="Quick Starters (1-click)" | |
| ) | |
| with gr.Column(scale=1): | |
| sectors_input = gr.Dropdown(choices=SECTOR_LIST, multiselect=True, label="Sector (multi-select)") | |
| stages_input = gr.Dropdown(choices=STAGE_LIST, multiselect=True, label="Stage (multi-select)") | |
| bmodels_input = gr.Dropdown(choices=BMODEL_LIST, multiselect=True, label="Business Model (multi-select)") | |
| diversity = gr.Slider( | |
| minimum=0.0, maximum=1.0, value=0.50, step=0.05, | |
| label="Result Diversity", | |
| info="Higher = more different results for same query." | |
| ) | |
| with gr.Accordion("Advanced Filters", open=False): | |
| with gr.Row(): | |
| ask_min = gr.Number(value=0, label="Ask min (USD)") | |
| ask_max = gr.Number(value=10_000_000, label="Ask max (USD)") | |
| thesis_info = gr.Markdown("") | |
| status_box = gr.HTML("") | |
| start_btn = gr.Button("FIND STARTUPS π₯", variant="primary") | |
| with gr.Column(visible=False) as matching_view: | |
| display_area = gr.HTML() | |
| with gr.Row(visible=True) as action_row: | |
| pass_btn = gr.Button("PASS β", variant="secondary") | |
| invest_btn = gr.Button("INVEST π", variant="primary") | |
| back_btn = gr.Button("β¬ Back to Search", variant="secondary") | |
| gr.Markdown("## π Portfolio") | |
| portfolio_table = gr.Dataframe( | |
| headers=["entity_id","name","sector","stage","business_model","ask","similarity"], | |
| datatype=["str","str","str","str","str","str","number"], | |
| interactive=False | |
| ) | |
| with gr.Row(): | |
| remove_rows = gr.Textbox(label="Remove rows (indices)", placeholder="Example: 0,2,3") | |
| remove_btn = gr.Button("Remove Selected", variant="secondary") | |
| clear_btn = gr.Button("Clear Portfolio", variant="stop") | |
| # Events | |
| def on_start(user_query, sectors, stages, bmodels, ask_min_v, ask_max_v, diversity_v, portfolio_v): | |
| try: | |
| a_min = float(ask_min_v); a_max = float(ask_max_v) | |
| if a_min > a_max: | |
| return ( | |
| gr.update(visible=True), gr.update(visible=False), | |
| "", [], 0, portfolio_v, | |
| "<div class='vm-error'>Ask: min must be β€ max</div>", | |
| thesis_info.value | |
| ) | |
| except: | |
| return ( | |
| gr.update(visible=True), gr.update(visible=False), | |
| "", [], 0, portfolio_v, | |
| "<div class='vm-error'>Bad Ask min/max</div>", | |
| thesis_info.value | |
| ) | |
| return start_search( | |
| user_query, sectors, stages, bmodels, | |
| a_min, a_max, | |
| float(diversity_v), | |
| portfolio_v | |
| ) | |
| start_btn.click( | |
| on_start, | |
| inputs=[query_input, sectors_input, stages_input, bmodels_input, ask_min, ask_max, diversity, portfolio_state], | |
| outputs=[onboarding_view, matching_view, display_area, deck_state, pos_state, portfolio_state, status_box, thesis_info] | |
| ).then(lambda p: portfolio_to_table(p), inputs=portfolio_state, outputs=portfolio_table) | |
| invest_btn.click( | |
| lambda deck, pos, query, port: swipe_action(deck, pos, "INVEST", query, port), | |
| inputs=[deck_state, pos_state, query_input, portfolio_state], | |
| outputs=[display_area, pos_state, action_row, portfolio_state] | |
| ).then(lambda p: portfolio_to_table(p), inputs=portfolio_state, outputs=portfolio_table) | |
| pass_btn.click( | |
| lambda deck, pos, query, port: swipe_action(deck, pos, "PASS", query, port), | |
| inputs=[deck_state, pos_state, query_input, portfolio_state], | |
| outputs=[display_area, pos_state, action_row, portfolio_state] | |
| ).then(lambda p: portfolio_to_table(p), inputs=portfolio_state, outputs=portfolio_table) | |
| back_btn.click(lambda: (gr.update(visible=True), gr.update(visible=False)), outputs=[onboarding_view, matching_view]) | |
| remove_btn.click(remove_selected, inputs=[portfolio_state, remove_rows], outputs=[portfolio_state, portfolio_table]) | |
| clear_btn.click(lambda: clear_portfolio(), outputs=[portfolio_state, portfolio_table]) | |
| # Queue helps stability on Spaces | |
| demo.queue(default_concurrency_limit=1, max_size=32) | |
| # IMPORTANT: In Gradio 6.x pass css/theme via launch() | |
| demo.launch(css=CSS, theme=gr.themes.Default(primary_hue="pink"), ssr_mode=False) |