Spaces:

LeonceNsh
/

networkx-saas

Runtime error

App Files Files Community

LeonceNsh commited on Oct 29, 2025

Commit

f5f328a

verified ·

1 Parent(s): 00607ac

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -91

app.py CHANGED Viewed

@@ -8,40 +8,40 @@ from typing import List, Dict, Tuple, Optional
 from functools import lru_cache
 import time
- ============================================================================
- CONFIGURATION
- ============================================================================
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 FILE_PATH = "cbinsights_data.csv"
-DATA_TIMESTAMP = "2024-09"   Update manually or parse from filename
- UI Copy
 TITLE = "Venture Networks Visualization"
 SUBTITLE_TEMPLATE = "Active: {country} • {industry} • {valuation_range} • {count} companies"
 INSTRUCTIONS = """
-How to use:
-1. Filter by Country, Industry, Company, Investor, and Valuation Range
-2. Hover over nodes to see details • Click a node to focus and view full information
-3. Download the filtered dataset as CSV • Use Nashville Filter for local quick access
 """
 EMPTY_STATE = """
- No results match your filters.
-Try: Clearing exclusions • Expanding valuation range • Selecting "All" for Country or Industry
 """
-ERROR_VALUATION = "Data Error: Could not identify a single valuation column. Found: {columns}"
-ERROR_FILE = "File Error: Dataset not found at `{path}`. Ensure `cbinsights_data.csv` is in the working directory."
-TRUNCATION_NOTICE = "Notice: Showing top {cap} of {total} companies by valuation. Adjust slider or refine filters."
- Graph Design
-COMPANY_COLOR = "66c2a5"
-COMPANY_STROKE = "2d6a4f"
-INVESTOR_STROKE = "000000"
-INVESTOR_COLORS = ["E69F00", "56B4E9", "009E73", "F0E442", "0072B2", "D55E00", "CC79A7", "999999"]
-EDGE_COLOR = "cccccc"
 EDGE_OPACITY = 0.6
 NODE_SIZE_MIN = 10
@@ -49,7 +49,7 @@ NODE_SIZE_MAX = 60
 INVESTOR_SIZE = 36
 LABEL_FONT_SIZE = 11
 INVESTOR_LABEL_FONT_SIZE = 12
-LARGE_COMPANY_THRESHOLD = 10   Show labels for valuations >10B
 DEFAULT_NODE_CAP = 300
 SPRING_LAYOUT_ITERATIONS_SMALL = 150
@@ -59,9 +59,9 @@ DEBOUNCE_MS = 250
 VALUATION_RANGES = ["All", "1-5", "5-10", "10-15", "15-20", "20+"]
- ============================================================================
- DATA LOADING AND PREPROCESSING
- ============================================================================
 def load_and_clean_data(file_path: str) -> pd.DataFrame:
     """Load CSV, standardize columns, filter Health, parse valuation."""
     try:
@@ -74,18 +74,18 @@ def load_and_clean_data(file_path: str) -> pd.DataFrame:
         logger.error(f"Error loading CSV: {e}")
         raise
-     Standardize columns
     data.columns = data.columns.str.strip().str.lower()
     logger.info(f"Columns: {data.columns.tolist()}")
-     Identify valuation column
     val_cols = [col for col in data.columns if 'valuation' in col]
     if len(val_cols) != 1:
         logger.error(f"Expected 1 valuation column, found {len(val_cols)}: {val_cols}")
         raise ValueError(ERROR_VALUATION.format(columns=val_cols))
     val_col = val_cols[0]
-     Clean valuation
     data["Valuation_Billions"] = (
         data[val_col]
         .astype(str)
@@ -94,7 +94,7 @@ def load_and_clean_data(file_path: str) -> pd.DataFrame:
     )
     data["Valuation_Billions"] = pd.to_numeric(data["Valuation_Billions"], errors='coerce').fillna(0)
-     Rename columns
     rename_map = {
         "company": "Company",
         "date_joined": "Date_Joined",
@@ -105,15 +105,15 @@ def load_and_clean_data(file_path: str) -> pd.DataFrame:
     }
     data.rename(columns=rename_map, inplace=True)
-     Strip whitespace
     for col in data.select_dtypes(include='object').columns:
         data[col] = data[col].str.strip()
-     Filter out "Health" (case-insensitive); keep "Healthcare"
     data = data[~data["Industry"].str.lower().isin(['health'])]
     logger.info(f"After filtering 'Health': {len(data)} rows")
-     Fill missing Select_Investors
     data["Select_Investors"] = data["Select_Investors"].fillna("")
     return data
@@ -134,9 +134,9 @@ def build_investor_company_mapping(df: pd.DataFrame) -> Dict[str, List[str]]:
     return mapping
- ============================================================================
- FILTERING LOGIC
- ============================================================================
 def filter_by_valuation_range(df: pd.DataFrame, selected_range: str) -> pd.DataFrame:
     """Filter dataframe by valuation range (billions)."""
     if selected_range == "All":
@@ -170,10 +170,10 @@ def apply_filters(
     """Apply all inclusion and exclusion filters."""
     filtered = df.copy()
-     Valuation range
     filtered = filter_by_valuation_range(filtered, valuation_range)
-     Include filters
     if country != "All":
         filtered = filtered[filtered["Country"] == country]
     if industry != "All":
@@ -181,11 +181,11 @@ def apply_filters(
     if company != "All":
         filtered = filtered[filtered["Company"] == company]
     if investors:
-         Exact token match: split Select_Investors and check membership
         pattern = '|'.join([re.escape(inv) for inv in investors])
         filtered = filtered[filtered["Select_Investors"].str.contains(pattern, case=False, na=False, regex=True)]
-     Exclude filters
     if exclude_countries:
         filtered = filtered[~filtered["Country"].isin(exclude_countries)]
     if exclude_industries:
@@ -196,8 +196,8 @@ def apply_filters(
         pattern = '|'.join([re.escape(inv) for inv in exclude_investors])
         filtered = filtered[~filtered["Select_Investors"].str.contains(pattern, case=False, na=False, regex=True)]
-     Quick find (highlight only; filter applied in graph rendering)
-     For filtering, we match Company or any investor token
     if quick_find.strip():
         qf = quick_find.strip()
         mask = (
@@ -219,9 +219,9 @@ def cap_companies(df: pd.DataFrame, cap: int) -> Tuple[pd.DataFrame, bool]:
     return capped, True
- ============================================================================
- GRAPH GENERATION
- ============================================================================
 def build_graph(
     filtered_df: pd.DataFrame,
     investor_list: List[str],
@@ -261,7 +261,7 @@ def generate_plotly_figure(
             annotations=[dict(text=EMPTY_STATE, showarrow=False, font=dict(size=14), x=0.5, y=0.5, xref='paper', yref='paper')]
         )
-     Layout
     iterations = SPRING_LAYOUT_ITERATIONS_SMALL if G.number_of_nodes() < 200 else SPRING_LAYOUT_ITERATIONS_LARGE
     if layout_cache and "pos" in layout_cache:
         pos = layout_cache["pos"]
@@ -272,11 +272,11 @@ def generate_plotly_figure(
             layout_cache["pos"] = pos
         logger.debug(f"Generated layout with {iterations} iterations")
-     Color map for investors
     sorted_investors = sorted(investor_list)
     investor_color_map = {inv: INVESTOR_COLORS[i % len(INVESTOR_COLORS)] for i, inv in enumerate(sorted_investors)}
-     Edges
     edge_x, edge_y = [], []
     for u, v in G.edges():
         x0, y0 = pos[u]
@@ -293,7 +293,7 @@ def generate_plotly_figure(
         showlegend=False
     )
-     Nodes
     node_x, node_y, node_text, node_hovertext = [], [], [], []
     node_color, node_size, node_line_color = [], [], []
     node_textposition = []
@@ -311,7 +311,7 @@ def generate_plotly_figure(
         node_type = G.nodes[node].get("node_type", "company")
         if node_type == "investor":
-             Investor node
             node_text.append(node)
             node_color.append(investor_color_map[node])
             node_size.append(INVESTOR_SIZE)
@@ -322,10 +322,10 @@ def generate_plotly_figure(
             hovertext = f"<b>Investor:</b> {node}<br><b>Portfolio:</b> {len(portfolio_companies)} companies<br><b>Total Cap:</b> ${total_cap:.1f}B"
             node_hovertext.append(hovertext)
         else:
-             Company node
             row = filtered_df[filtered_df["Company"] == node]
             if row.empty:
-                 Shouldn't happen, but fallback
                 node_size.append(NODE_SIZE_MIN)
                 node_color.append(COMPANY_COLOR)
                 node_line_color.append(COMPANY_STROKE)
@@ -338,13 +338,13 @@ def generate_plotly_figure(
             industry = row["Industry"].values[0] if "Industry" in row else "N/A"
             country = row["Country"].values[0] if "Country" in row else "N/A"
-             Size: sqrt-scaled, clamped
-            size = max(NODE_SIZE_MIN, min(NODE_SIZE_MAX, (valuation  0.5) * 8))
             node_size.append(size)
             node_color.append(COMPANY_COLOR)
             node_line_color.append(COMPANY_STROKE)
-             Hovertext
             investors_str = row["Select_Investors"].values[0]
             hovertext = f"<b>Company:</b> {node}<br><b>Industry:</b> {industry}<br><b>Valuation:</b> ${valuation:.1f}B"
             if investors_str:
@@ -354,7 +354,7 @@ def generate_plotly_figure(
                     hovertext += f" +{len(inv_list)-5} more"
             node_hovertext.append(hovertext)
-             Label logic
             show_label = (
                 show_all_labels or
                 show_labels_for_range or
@@ -363,7 +363,7 @@ def generate_plotly_figure(
                 node in top5_companies
             )
             if show_label:
-                 Bold if top 3
                 top3 = set(filtered_df.nlargest(3, "Valuation_Billions")["Company"].tolist())
                 if node in top3:
                     node_text.append(f"<b>{node}</b>")
@@ -389,7 +389,7 @@ def generate_plotly_figure(
         showlegend=False
     )
-     Summary annotation
     total_valuation = filtered_df["Valuation_Billions"].sum()
     num_investors = len(investor_list)
     num_companies = len(filtered_df)
@@ -422,41 +422,41 @@ def generate_plotly_figure(
                 align='center'
             )
         ],
-        plot_bgcolor='ffffff',
-        paper_bgcolor='ffffff'
     )
     return fig
- ============================================================================
- GRADIO APP
- ============================================================================
 def main():
-     Load data once
     try:
         data = load_and_clean_data(FILE_PATH)
     except Exception as e:
         logger.error(f"Failed to load data: {e}")
-         Fallback Gradio UI showing error
         with gr.Blocks(title=TITLE) as demo:
-            gr.Markdown(f" {TITLE}")
             gr.Markdown(ERROR_FILE.format(path=FILE_PATH) if "not found" in str(e) else str(e))
         demo.launch()
         return
     investor_company_mapping = build_investor_company_mapping(data)
-     Prepare dropdown choices
     country_list = ["All"] + sorted(data["Country"].dropna().unique())
     industry_list = ["All"] + sorted(data["Industry"].dropna().unique())
     company_list = ["All"] + sorted(data["Company"].dropna().unique())
     investor_list_all = sorted(investor_company_mapping.keys())
-     Check if City column exists for Nashville filter
     has_city = "City" in data.columns
-     State for caching layout
     layout_cache_state = gr.State({})
     def app_logic(
@@ -467,7 +467,7 @@ def main():
     ):
         start = time.time()
-         Apply filters
         filtered = apply_filters(
             data, country, industry, company, investors,
             exclude_countries, exclude_industries, exclude_companies, exclude_investors,
@@ -483,25 +483,25 @@ def main():
             subtitle = "No results"
             return empty_fig, subtitle, "", layout_cache
-         Cap companies
         original_count = len(filtered)
         filtered, was_truncated = cap_companies(filtered, node_cap)
-         Build investor list from filtered data
         filtered_inv_mapping = build_investor_company_mapping(filtered)
         current_investors = list(filtered_inv_mapping.keys())
-         Build graph
         G = build_graph(filtered, current_investors, show_all_labels, valuation_range, quick_find)
-         Generate figure
-         Invalidate layout cache if node set changed
         current_nodes = set(G.nodes())
         if layout_cache.get("nodes") != current_nodes:
             layout_cache = {"nodes": current_nodes}
         fig = generate_plotly_figure(G, filtered, current_investors, show_all_labels, valuation_range, quick_find, layout_cache)
-         Subtitle
         subtitle = SUBTITLE_TEMPLATE.format(
             country=country,
             industry=industry,
@@ -509,7 +509,7 @@ def main():
             count=len(filtered)
         )
-         Truncation notice
         notice = ""
         if was_truncated:
             notice = TRUNCATION_NOTICE.format(cap=node_cap, total=original_count)
@@ -522,7 +522,7 @@ def main():
     def apply_nashville_filter():
         """Pre-fill Nashville filter."""
         if has_city:
-            return "United States", gr.update(), gr.update(), gr.update()   Set country, others unchanged
         else:
             logger.warning("City column not found; Nashville filter only sets Country")
             return "United States", gr.update(), gr.update(), gr.update()
@@ -530,9 +530,9 @@ def main():
     def clear_all():
         """Reset all filters to default."""
         return (
-            "All", "All", "All", [],   Include filters
-            [], [], [], [],             Exclude filters
-            "All", DEFAULT_NODE_CAP, False, ""   Valuation, node cap, labels, quick find
         )
     def clear_exclusions():
@@ -540,7 +540,7 @@ def main():
         return [], [], [], []
     with gr.Blocks(title=f"{TITLE} ({DATA_TIMESTAMP})", theme=gr.themes.Soft()) as demo:
-        gr.Markdown(f" {TITLE}")
         gr.Markdown(f"*Updated {DATA_TIMESTAMP}*")
         subtitle_display = gr.Markdown("Active Scope: All • All • All • 0 companies")
@@ -580,10 +580,10 @@ def main():
             reset_view_btn = gr.Button("Reset View", variant="secondary", size="sm")
             download_csv_btn = gr.Button("Download Filtered CSV", variant="primary", size="sm")
-         State
         layout_cache = gr.State({})
-         Inputs and outputs
         inputs = [
             country_filter, industry_filter, company_filter, investor_filter,
             exclude_country, exclude_industry, exclude_company, exclude_investor,
@@ -592,11 +592,11 @@ def main():
         ]
         outputs = [graph_output, subtitle_display, truncation_notice, layout_cache]
-         Event handlers (debounced via Gradio's built-in; for older versions, use time.sleep trick)
-        for control in inputs[:-1]:   Exclude layout_cache from triggers
             control.change(app_logic, inputs, outputs)
-         Button actions
         nashville_btn.click(
             apply_nashville_filter,
             inputs=None,
@@ -620,12 +620,12 @@ def main():
         ).then(app_logic, inputs, outputs)
         reset_view_btn.click(
-            lambda: (gr.update(), gr.update(), "", {}),   Clear quick_find and layout cache
             inputs=None,
             outputs=[graph_output, subtitle_display, quick_find_box, layout_cache]
         )
-         Download CSV (requires Gradio >=3.x File component; here we provide a placeholder)
         def export_csv(
             country, industry, company, investors,
             exclude_countries, exclude_industries, exclude_companies, exclude_investors,
@@ -650,12 +650,12 @@ def main():
         gr.Markdown("""
         ---
-        Accessibility: Use Tab to navigate controls. Press Enter to activate buttons. Graph nodes are keyboard-focusable.
-        Color Legend: Companies are teal-green. Investors are color-coded (see palette). Non-color cues: stroke outlines differentiate node types.
-        Performance: Graphs update in <500ms for ≤300 companies. Large datasets are auto-capped; adjust slider as needed.
         """)
-         Initial render
         demo.load(app_logic, inputs, outputs)
     demo.launch(share=False, server_name="0.0.0.0", server_port=7860)

 from functools import lru_cache
 import time
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 FILE_PATH = "cbinsights_data.csv"
+DATA_TIMESTAMP = "2024-09"  # Update manually or parse from filename
+# UI Copy
 TITLE = "Venture Networks Visualization"
 SUBTITLE_TEMPLATE = "Active: {country} • {industry} • {valuation_range} • {count} companies"
 INSTRUCTIONS = """
+**How to use:**
+1. **Filter** by Country, Industry, Company, Investor, and Valuation Range
+2. **Hover** over nodes to see details • **Click** a node to focus and view full information
+3. **Download** the filtered dataset as CSV • Use **Nashville Filter** for local quick access
 """
 EMPTY_STATE = """
+### No results match your filters.
+**Try:** Clearing exclusions • Expanding valuation range • Selecting "All" for Country or Industry
 """
+ERROR_VALUATION = "**Data Error:** Could not identify a single valuation column. Found: {columns}"
+ERROR_FILE = "**File Error:** Dataset not found at `{path}`. Ensure `cbinsights_data.csv` is in the working directory."
+TRUNCATION_NOTICE = "**Notice:** Showing top {cap} of {total} companies by valuation. Adjust slider or refine filters."
+# Graph Design
+COMPANY_COLOR = "#66c2a5"
+COMPANY_STROKE = "#2d6a4f"
+INVESTOR_STROKE = "#000000"
+INVESTOR_COLORS = ["#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999"]
+EDGE_COLOR = "#cccccc"
 EDGE_OPACITY = 0.6
 NODE_SIZE_MIN = 10
 INVESTOR_SIZE = 36
 LABEL_FONT_SIZE = 11
 INVESTOR_LABEL_FONT_SIZE = 12
+LARGE_COMPANY_THRESHOLD = 10  # Show labels for valuations >10B
 DEFAULT_NODE_CAP = 300
 SPRING_LAYOUT_ITERATIONS_SMALL = 150
 VALUATION_RANGES = ["All", "1-5", "5-10", "10-15", "15-20", "20+"]
+# ============================================================================
+# DATA LOADING AND PREPROCESSING
+# ============================================================================
 def load_and_clean_data(file_path: str) -> pd.DataFrame:
     """Load CSV, standardize columns, filter Health, parse valuation."""
     try:
         logger.error(f"Error loading CSV: {e}")
         raise
+    # Standardize columns
     data.columns = data.columns.str.strip().str.lower()
     logger.info(f"Columns: {data.columns.tolist()}")
+    # Identify valuation column
     val_cols = [col for col in data.columns if 'valuation' in col]
     if len(val_cols) != 1:
         logger.error(f"Expected 1 valuation column, found {len(val_cols)}: {val_cols}")
         raise ValueError(ERROR_VALUATION.format(columns=val_cols))
     val_col = val_cols[0]
+    # Clean valuation
     data["Valuation_Billions"] = (
         data[val_col]
         .astype(str)
     )
     data["Valuation_Billions"] = pd.to_numeric(data["Valuation_Billions"], errors='coerce').fillna(0)
+    # Rename columns
     rename_map = {
         "company": "Company",
         "date_joined": "Date_Joined",
     }
     data.rename(columns=rename_map, inplace=True)
+    # Strip whitespace
     for col in data.select_dtypes(include='object').columns:
         data[col] = data[col].str.strip()
+    # Filter out "Health" (case-insensitive); keep "Healthcare"
     data = data[~data["Industry"].str.lower().isin(['health'])]
     logger.info(f"After filtering 'Health': {len(data)} rows")
+    # Fill missing Select_Investors
     data["Select_Investors"] = data["Select_Investors"].fillna("")
     return data
     return mapping
+# ============================================================================
+# FILTERING LOGIC
+# ============================================================================
 def filter_by_valuation_range(df: pd.DataFrame, selected_range: str) -> pd.DataFrame:
     """Filter dataframe by valuation range (billions)."""
     if selected_range == "All":
     """Apply all inclusion and exclusion filters."""
     filtered = df.copy()
+    # Valuation range
     filtered = filter_by_valuation_range(filtered, valuation_range)
+    # Include filters
     if country != "All":
         filtered = filtered[filtered["Country"] == country]
     if industry != "All":
     if company != "All":
         filtered = filtered[filtered["Company"] == company]
     if investors:
+        # Exact token match: split Select_Investors and check membership
         pattern = '|'.join([re.escape(inv) for inv in investors])
         filtered = filtered[filtered["Select_Investors"].str.contains(pattern, case=False, na=False, regex=True)]
+    # Exclude filters
     if exclude_countries:
         filtered = filtered[~filtered["Country"].isin(exclude_countries)]
     if exclude_industries:
         pattern = '|'.join([re.escape(inv) for inv in exclude_investors])
         filtered = filtered[~filtered["Select_Investors"].str.contains(pattern, case=False, na=False, regex=True)]
+    # Quick find (highlight only; filter applied in graph rendering)
+    # For filtering, we match Company or any investor token
     if quick_find.strip():
         qf = quick_find.strip()
         mask = (
     return capped, True
+# ============================================================================
+# GRAPH GENERATION
+# ============================================================================
 def build_graph(
     filtered_df: pd.DataFrame,
     investor_list: List[str],
             annotations=[dict(text=EMPTY_STATE, showarrow=False, font=dict(size=14), x=0.5, y=0.5, xref='paper', yref='paper')]
         )
+    # Layout
     iterations = SPRING_LAYOUT_ITERATIONS_SMALL if G.number_of_nodes() < 200 else SPRING_LAYOUT_ITERATIONS_LARGE
     if layout_cache and "pos" in layout_cache:
         pos = layout_cache["pos"]
             layout_cache["pos"] = pos
         logger.debug(f"Generated layout with {iterations} iterations")
+    # Color map for investors
     sorted_investors = sorted(investor_list)
     investor_color_map = {inv: INVESTOR_COLORS[i % len(INVESTOR_COLORS)] for i, inv in enumerate(sorted_investors)}
+    # Edges
     edge_x, edge_y = [], []
     for u, v in G.edges():
         x0, y0 = pos[u]
         showlegend=False
     )
+    # Nodes
     node_x, node_y, node_text, node_hovertext = [], [], [], []
     node_color, node_size, node_line_color = [], [], []
     node_textposition = []
         node_type = G.nodes[node].get("node_type", "company")
         if node_type == "investor":
+            # Investor node
             node_text.append(node)
             node_color.append(investor_color_map[node])
             node_size.append(INVESTOR_SIZE)
             hovertext = f"<b>Investor:</b> {node}<br><b>Portfolio:</b> {len(portfolio_companies)} companies<br><b>Total Cap:</b> ${total_cap:.1f}B"
             node_hovertext.append(hovertext)
         else:
+            # Company node
             row = filtered_df[filtered_df["Company"] == node]
             if row.empty:
+                # Shouldn't happen, but fallback
                 node_size.append(NODE_SIZE_MIN)
                 node_color.append(COMPANY_COLOR)
                 node_line_color.append(COMPANY_STROKE)
             industry = row["Industry"].values[0] if "Industry" in row else "N/A"
             country = row["Country"].values[0] if "Country" in row else "N/A"
+            # Size: sqrt-scaled, clamped
+            size = max(NODE_SIZE_MIN, min(NODE_SIZE_MAX, (valuation ** 0.5) * 8))
             node_size.append(size)
             node_color.append(COMPANY_COLOR)
             node_line_color.append(COMPANY_STROKE)
+            # Hovertext
             investors_str = row["Select_Investors"].values[0]
             hovertext = f"<b>Company:</b> {node}<br><b>Industry:</b> {industry}<br><b>Valuation:</b> ${valuation:.1f}B"
             if investors_str:
                     hovertext += f" +{len(inv_list)-5} more"
             node_hovertext.append(hovertext)
+            # Label logic
             show_label = (
                 show_all_labels or
                 show_labels_for_range or
                 node in top5_companies
             )
             if show_label:
+                # Bold if top 3
                 top3 = set(filtered_df.nlargest(3, "Valuation_Billions")["Company"].tolist())
                 if node in top3:
                     node_text.append(f"<b>{node}</b>")
         showlegend=False
     )
+    # Summary annotation
     total_valuation = filtered_df["Valuation_Billions"].sum()
     num_investors = len(investor_list)
     num_companies = len(filtered_df)
                 align='center'
             )
         ],
+        plot_bgcolor='#ffffff',
+        paper_bgcolor='#ffffff'
     )
     return fig
+# ============================================================================
+# GRADIO APP
+# ============================================================================
 def main():
+    # Load data once
     try:
         data = load_and_clean_data(FILE_PATH)
     except Exception as e:
         logger.error(f"Failed to load data: {e}")
+        # Fallback Gradio UI showing error
         with gr.Blocks(title=TITLE) as demo:
+            gr.Markdown(f"# {TITLE}")
             gr.Markdown(ERROR_FILE.format(path=FILE_PATH) if "not found" in str(e) else str(e))
         demo.launch()
         return
     investor_company_mapping = build_investor_company_mapping(data)
+    # Prepare dropdown choices
     country_list = ["All"] + sorted(data["Country"].dropna().unique())
     industry_list = ["All"] + sorted(data["Industry"].dropna().unique())
     company_list = ["All"] + sorted(data["Company"].dropna().unique())
     investor_list_all = sorted(investor_company_mapping.keys())
+    # Check if City column exists for Nashville filter
     has_city = "City" in data.columns
+    # State for caching layout
     layout_cache_state = gr.State({})
     def app_logic(
     ):
         start = time.time()
+        # Apply filters
         filtered = apply_filters(
             data, country, industry, company, investors,
             exclude_countries, exclude_industries, exclude_companies, exclude_investors,
             subtitle = "No results"
             return empty_fig, subtitle, "", layout_cache
+        # Cap companies
         original_count = len(filtered)
         filtered, was_truncated = cap_companies(filtered, node_cap)
+        # Build investor list from filtered data
         filtered_inv_mapping = build_investor_company_mapping(filtered)
         current_investors = list(filtered_inv_mapping.keys())
+        # Build graph
         G = build_graph(filtered, current_investors, show_all_labels, valuation_range, quick_find)
+        # Generate figure
+        # Invalidate layout cache if node set changed
         current_nodes = set(G.nodes())
         if layout_cache.get("nodes") != current_nodes:
             layout_cache = {"nodes": current_nodes}
         fig = generate_plotly_figure(G, filtered, current_investors, show_all_labels, valuation_range, quick_find, layout_cache)
+        # Subtitle
         subtitle = SUBTITLE_TEMPLATE.format(
             country=country,
             industry=industry,
             count=len(filtered)
         )
+        # Truncation notice
         notice = ""
         if was_truncated:
             notice = TRUNCATION_NOTICE.format(cap=node_cap, total=original_count)
     def apply_nashville_filter():
         """Pre-fill Nashville filter."""
         if has_city:
+            return "United States", gr.update(), gr.update(), gr.update()  # Set country, others unchanged
         else:
             logger.warning("City column not found; Nashville filter only sets Country")
             return "United States", gr.update(), gr.update(), gr.update()
     def clear_all():
         """Reset all filters to default."""
         return (
+            "All", "All", "All", [],  # Include filters
+            [], [], [], [],            # Exclude filters
+            "All", DEFAULT_NODE_CAP, False, ""  # Valuation, node cap, labels, quick find
         )
     def clear_exclusions():
         return [], [], [], []
     with gr.Blocks(title=f"{TITLE} ({DATA_TIMESTAMP})", theme=gr.themes.Soft()) as demo:
+        gr.Markdown(f"# {TITLE}")
         gr.Markdown(f"*Updated {DATA_TIMESTAMP}*")
         subtitle_display = gr.Markdown("Active Scope: All • All • All • 0 companies")
             reset_view_btn = gr.Button("Reset View", variant="secondary", size="sm")
             download_csv_btn = gr.Button("Download Filtered CSV", variant="primary", size="sm")
+        # State
         layout_cache = gr.State({})
+        # Inputs and outputs
         inputs = [
             country_filter, industry_filter, company_filter, investor_filter,
             exclude_country, exclude_industry, exclude_company, exclude_investor,
         ]
         outputs = [graph_output, subtitle_display, truncation_notice, layout_cache]
+        # Event handlers (debounced via Gradio's built-in; for older versions, use time.sleep trick)
+        for control in inputs[:-1]:  # Exclude layout_cache from triggers
             control.change(app_logic, inputs, outputs)
+        # Button actions
         nashville_btn.click(
             apply_nashville_filter,
             inputs=None,
         ).then(app_logic, inputs, outputs)
         reset_view_btn.click(
+            lambda: (gr.update(), gr.update(), "", {}),  # Clear quick_find and layout cache
             inputs=None,
             outputs=[graph_output, subtitle_display, quick_find_box, layout_cache]
         )
+        # Download CSV (requires Gradio >=3.x File component; here we provide a placeholder)
         def export_csv(
             country, industry, company, investors,
             exclude_countries, exclude_industries, exclude_companies, exclude_investors,
         gr.Markdown("""
         ---
+        **Accessibility:** Use Tab to navigate controls. Press Enter to activate buttons. Graph nodes are keyboard-focusable.
+        **Color Legend:** Companies are teal-green. Investors are color-coded (see palette). Non-color cues: stroke outlines differentiate node types.
+        **Performance:** Graphs update in <500ms for ≤300 companies. Large datasets are auto-capped; adjust slider as needed.
         """)
+        # Initial render
         demo.load(app_logic, inputs, outputs)
     demo.launch(share=False, server_name="0.0.0.0", server_port=7860)