diff --git "a/app.py" "b/app.py"
new file mode 100644--- /dev/null
+++ "b/app.py"
@@ -0,0 +1,7991 @@
+# app.py
+# NAVADA (Startup Viability Agent) - A Chainlit-powered AI agent for analyzing startup risk and failure patterns
+# Features: Investor Mode, Founder Mode, UK Economist Mode with macroeconomic analysis
+
+print("DEBUG: Starting app.py imports", flush=True)
+
+# =============================
+# IMPORTS
+# =============================
+import io # For in-memory file operations (byte streams)
+import os # Operating system interface for environment variables and file operations
+import time # Time-related functions for delays and timing operations
+import re # Regular expressions for pattern matching and text processing
+import requests # HTTP library for making API calls
+from datetime import datetime # Date/time handling for timestamps
+from typing import Dict, List, Optional, Any # Type hints for better code documentation
+import uuid # UUID generation for unique identifiers
+import math # Mathematical operations (currently unused but available)
+import json # JSON parsing (currently unused but available)
+import asyncio # Async/await support for concurrent operations
+import logging # Logging system for error tracking and debugging
+import traceback # Detailed error tracebacks for debugging
+import functools # Function decorators and utilities
+print("DEBUG: Importing chainlit", flush=True)
+import chainlit as cl # Chainlit framework for building conversational AI interfaces
+print("DEBUG: Chainlit imported", flush=True)
+import pandas as pd # Data manipulation and analysis with DataFrames
+import numpy as np # Numerical operations for calculations
+print("DEBUG: Importing matplotlib", flush=True)
+import matplotlib
+matplotlib.use('Agg') # Use non-interactive backend for server environments
+import matplotlib.pyplot as plt # Core plotting library for creating visualizations
+print("DEBUG: Matplotlib imported", flush=True)
+import seaborn as sns # Statistical data visualization built on matplotlib
+import plotly.express as px # Interactive plotting library for dynamic visualizations
+import plotly.graph_objects as go # Low-level plotly interface for custom charts
+import plotly.io as pio # Plotly I/O utilities for saving/converting charts
+from plotly.subplots import make_subplots # Create subplot layouts for dashboards
+import requests # HTTP library for making web requests and scraping
+from bs4 import BeautifulSoup # HTML/XML parser for web scraping
+from urllib.parse import urlparse # URL validation and parsing utilities
+import re # Regular expressions for text processing and validation
+import scipy.stats as stats # Statistical functions for analysis
+import random # Random number generation for Monte Carlo simulations
+
+from typing import Dict, Any, List, Optional # Type hints for better code documentation
+from dataclasses import dataclass, field # For structured data classes
+from openai import OpenAI # OpenAI API client for GPT model interactions
+from dotenv import load_dotenv # Load environment variables from .env file
+import uuid # For generating unique thread/session IDs
+from IPython.display import display # IPython display utilities (not actively used)
+# Optional ML imports - handle gracefully if not available (for Vercel size limits)
+try:
+ from sklearn.model_selection import train_test_split # Split data for ML training
+ from sklearn.ensemble import RandomForestClassifier # Random Forest model for predictions
+ SKLEARN_AVAILABLE = True
+except ImportError:
+ SKLEARN_AVAILABLE = False
+ print("WARNING: scikit-learn not available - ML features disabled")
+
+# LangChain & LangSmith imports for hosting
+print("DEBUG: Importing langchain_openai", flush=True)
+from langchain_openai import OpenAIEmbeddings, ChatOpenAI
+print("DEBUG: langchain_openai imported", flush=True)
+print("DEBUG: Importing langchain_core.documents", flush=True)
+from langchain_core.documents import Document
+print("DEBUG: langchain_core.documents imported", flush=True)
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+# Optional vector store imports - handle gracefully if not available
+try:
+ from langchain_chroma import Chroma
+ from langchain.chains import RetrievalQA
+ CHROMA_AVAILABLE = True
+except ImportError:
+ CHROMA_AVAILABLE = False
+ print("WARNING: Chroma vector store not available - RAG features disabled")
+from langsmith import traceable, Client as LangSmithClient
+from langsmith.wrappers import wrap_openai
+import langsmith as ls
+
+print("DEBUG: All imports complete", flush=True)
+
+# =============================
+# ERROR HANDLING & LOGGING SETUP
+# =============================
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s - %(message)s',
+ handlers=[
+ logging.StreamHandler()
+ ]
+)
+logger = logging.getLogger(__name__)
+
+def safe_api_call(func):
+ """Decorator for safe API calls with comprehensive error handling."""
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ try:
+ return func(*args, **kwargs)
+ except requests.exceptions.RequestException as e:
+ logger.error(f"Network error in {func.__name__}: {e}")
+ return {"error": f"Network error: {str(e)}", "success": False}
+ except ValueError as e:
+ logger.error(f"Value error in {func.__name__}: {e}")
+ return {"error": f"Invalid data: {str(e)}", "success": False}
+ except KeyError as e:
+ logger.error(f"Missing key in {func.__name__}: {e}")
+ return {"error": f"Missing required field: {str(e)}", "success": False}
+ except Exception as e:
+ logger.error(f"Unexpected error in {func.__name__}: {e}")
+ logger.error(f"Traceback: {traceback.format_exc()}")
+ return {"error": f"Unexpected error: {str(e)}", "success": False}
+ return wrapper
+
+def safe_async_api_call(func):
+ """Decorator for safe async API calls with comprehensive error handling."""
+ @functools.wraps(func)
+ async def wrapper(*args, **kwargs):
+ try:
+ return await func(*args, **kwargs)
+ except requests.exceptions.RequestException as e:
+ logger.error(f"Network error in {func.__name__}: {e}")
+ return {"error": f"Network error: {str(e)}", "success": False}
+ except ValueError as e:
+ logger.error(f"Value error in {func.__name__}: {e}")
+ return {"error": f"Invalid data: {str(e)}", "success": False}
+ except KeyError as e:
+ logger.error(f"Missing key in {func.__name__}: {e}")
+ return {"error": f"Missing required field: {str(e)}", "success": False}
+ except Exception as e:
+ logger.error(f"Unexpected error in {func.__name__}: {e}")
+ logger.error(f"Traceback: {traceback.format_exc()}")
+ return {"error": f"Unexpected error: {str(e)}", "success": False}
+ return wrapper
+
+def validate_environment():
+ """Validate environment setup and dependencies."""
+ logger.info("Starting environment validation...")
+
+ issues = []
+
+ # Check critical environment variables
+ required_vars = ["OPENAI_API_KEY"]
+ for var in required_vars:
+ if not os.getenv(var) or os.getenv(var) == "your_openai_api_key_here":
+ issues.append(f"Missing or placeholder value for {var}")
+
+ # Check optional but recommended environment variables
+ optional_vars = {
+ "LANGSMITH_API_KEY": "LangSmith tracing",
+ "SEARCH_API_KEY": "web search functionality",
+ "TTS_PROMPT_ID": "text-to-speech features",
+ "LANGCHAIN_DATABASE_ID": "vector database features"
+ }
+
+ for var, feature in optional_vars.items():
+ value = os.getenv(var)
+ if not value or value.startswith("your_"):
+ logger.warning(f"Optional {var} not configured - {feature} will be disabled")
+
+ # Check critical imports
+ try:
+ import matplotlib.pyplot as plt
+ logger.info("✅ Matplotlib available")
+ except ImportError:
+ issues.append("Matplotlib not available - chart generation will fail")
+
+ try:
+ from langchain_chroma import Chroma
+ logger.info("✅ LangChain Chroma available")
+ except ImportError:
+ issues.append("LangChain Chroma not available - RAG features disabled")
+
+ try:
+ import openai
+ logger.info("✅ OpenAI library available")
+ except ImportError:
+ issues.append("OpenAI library not available - core functionality will fail")
+
+ if issues:
+ logger.error("Environment validation failed:")
+ for issue in issues:
+ logger.error(f" - {issue}")
+ return False, issues
+ else:
+ logger.info("✅ Environment validation passed")
+ return True, []
+
+def create_startup_health_check():
+ """Perform comprehensive health checks during startup."""
+ logger.info("🔍 Performing startup health checks...")
+
+ # Get environment variables for health checks
+ api_key_check = os.getenv("OPENAI_API_KEY")
+ langsmith_api_key_check = os.getenv("LANGSMITH_API_KEY")
+ search_api_key_check = os.getenv("SEARCH_API_KEY")
+
+ health_status = {
+ "overall": True,
+ "checks": {},
+ "warnings": [],
+ "errors": []
+ }
+
+ # Test matplotlib chart generation
+ try:
+ import matplotlib.pyplot as plt
+ fig, ax = plt.subplots(figsize=(1, 1))
+ ax.plot([1, 2], [1, 2])
+ buf = io.BytesIO()
+ fig.savefig(buf, format='png')
+ plt.close(fig)
+ health_status["checks"]["matplotlib"] = "✅ Working"
+ logger.info("✅ Matplotlib chart generation test passed")
+ except Exception as e:
+ health_status["checks"]["matplotlib"] = f"❌ Failed: {e}"
+ health_status["errors"].append(f"Matplotlib test failed: {e}")
+ health_status["overall"] = False
+ logger.error(f"❌ Matplotlib test failed: {e}")
+
+ # Test OpenAI client initialization
+ try:
+ if api_key_check and api_key_check != "your_openai_api_key_here":
+ # Don't make actual API call, just test client creation
+ from openai import OpenAI
+ test_client = OpenAI(api_key=api_key_check)
+ health_status["checks"]["openai"] = "✅ Client initialized"
+ logger.info("✅ OpenAI client initialization test passed")
+ else:
+ health_status["checks"]["openai"] = "⚠️ No valid API key"
+ health_status["warnings"].append("OpenAI API key not configured")
+ logger.warning("⚠️ OpenAI API key not configured")
+ except Exception as e:
+ health_status["checks"]["openai"] = f"❌ Failed: {e}"
+ health_status["errors"].append(f"OpenAI client test failed: {e}")
+ logger.error(f"❌ OpenAI client test failed: {e}")
+
+ # Test vector store functionality
+ try:
+ if CHROMA_AVAILABLE:
+ # Test basic Chroma functionality without creating actual store
+ from langchain_chroma import Chroma
+ health_status["checks"]["vector_store"] = "✅ Available"
+ logger.info("✅ Vector store (Chroma) available")
+ else:
+ health_status["checks"]["vector_store"] = "❌ Not available"
+ health_status["warnings"].append("Vector store not available - RAG features disabled")
+ logger.warning("⚠️ Vector store not available")
+ except Exception as e:
+ health_status["checks"]["vector_store"] = f"❌ Failed: {e}"
+ health_status["errors"].append(f"Vector store test failed: {e}")
+ logger.error(f"❌ Vector store test failed: {e}")
+
+ # Test search API configuration
+ if search_api_key_check and search_api_key_check != "your_brave_search_api_key_here":
+ health_status["checks"]["search_api"] = "✅ Configured"
+ logger.info("✅ Search API key configured")
+ else:
+ health_status["checks"]["search_api"] = "⚠️ Not configured"
+ health_status["warnings"].append("Search API not configured - web search disabled")
+ logger.warning("⚠️ Search API key not configured")
+
+ # Test LangSmith configuration - need to check after client initialization
+ langsmith_check = langsmith_api_key_check and langsmith_api_key_check != "your_langsmith_api_key_here"
+ if langsmith_check:
+ health_status["checks"]["langsmith"] = "✅ Configured"
+ logger.info("✅ LangSmith API key configured")
+ else:
+ health_status["checks"]["langsmith"] = "⚠️ Not configured"
+ health_status["warnings"].append("LangSmith tracing disabled")
+ logger.warning("⚠️ LangSmith API key not configured")
+
+ # Log summary
+ if health_status["overall"]:
+ logger.info("✅ All critical health checks passed")
+ else:
+ logger.error("❌ Some critical health checks failed")
+
+ if health_status["warnings"]:
+ logger.info(f"⚠️ {len(health_status['warnings'])} warnings found")
+
+ if health_status["errors"]:
+ logger.error(f"❌ {len(health_status['errors'])} errors found")
+
+ return health_status
+
+def retry_on_failure(max_retries=3, delay=1):
+ """Decorator to retry functions on failure with exponential backoff."""
+ def decorator(func):
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ last_exception = None
+ for attempt in range(max_retries):
+ try:
+ return func(*args, **kwargs)
+ except Exception as e:
+ last_exception = e
+ if attempt < max_retries - 1:
+ wait_time = delay * (2 ** attempt) # Exponential backoff
+ logger.warning(f"Attempt {attempt + 1} failed for {func.__name__}: {e}. Retrying in {wait_time}s...")
+ time.sleep(wait_time)
+ else:
+ logger.error(f"All {max_retries} attempts failed for {func.__name__}: {e}")
+ raise last_exception
+ return wrapper
+ return decorator
+
+def create_error_recovery_system():
+ """Create a comprehensive error recovery system."""
+ recovery_handlers = {
+ "api_timeout": lambda: "Service temporarily unavailable. Please try again in a moment.",
+ "rate_limit": lambda: "Rate limit exceeded. Please wait a moment before trying again.",
+ "authentication": lambda: "Authentication failed. Please check your API keys configuration.",
+ "network_error": lambda: "Network connection error. Please check your internet connection.",
+ "service_unavailable": lambda: "External service unavailable. Using fallback mode.",
+ }
+
+ return recovery_handlers
+
+# Initialize recovery system
+recovery_system = create_error_recovery_system()
+
+# =============================
+# AUTHENTICATION INTEGRATION
+# =============================
+
+# Import authentication system
+print("DEBUG: About to import auth_manager", flush=True)
+try:
+ from auth_manager import auth_manager
+ AUTH_AVAILABLE = True
+ logger.info("✅ Authentication system loaded")
+ print("DEBUG: Auth manager loaded successfully", flush=True)
+except ImportError as e:
+ AUTH_AVAILABLE = False
+ logger.warning(f"⚠️ Authentication system not available: {e}")
+
+def check_user_authentication() -> Dict[str, Any]:
+ """Check if user is authenticated in current session."""
+ if not AUTH_AVAILABLE:
+ return {"authenticated": False, "reason": "auth_system_unavailable"}
+
+ # Check for authentication token in session
+ auth_token = cl.user_session.get("auth_token")
+ session_token = cl.user_session.get("session_token")
+
+ if not auth_token or not session_token:
+ return {"authenticated": False, "reason": "no_token"}
+
+ # Validate session with auth manager
+ session_result = auth_manager.validate_session(session_token)
+
+ if not session_result["valid"]:
+ # Clear invalid session data
+ cl.user_session.set("auth_token", None)
+ cl.user_session.set("session_token", None)
+ cl.user_session.set("user_id", None)
+ cl.user_session.set("username", None)
+ return {"authenticated": False, "reason": "invalid_session"}
+
+ return {
+ "authenticated": True,
+ "user_id": session_result["user_id"],
+ "username": session_result["username"],
+ "email": session_result["email"],
+ "subscription_tier": session_result["subscription_tier"]
+ }
+
+async def show_login_form():
+ """Display login/register form to user."""
+ await cl.Message(
+ content="🔐 **Welcome to NAVADA!** Please log in or register to continue.\n\n"
+ "**Login Instructions:**\n"
+ "• Type: `login username password`\n"
+ "• Example: `login john mypassword123`\n\n"
+ "**Register Instructions:**\n"
+ "• Type: `register username password email`\n"
+ "• Example: `register john mypassword123 john@example.com`\n"
+ "• Email is optional: `register john mypassword123`\n\n"
+ "**Demo Account (for testing):**\n"
+ "• Username: `demo`\n"
+ "• Password: `demo123`\n"
+ "• Type: `login demo demo123`"
+ ).send()
+
+async def handle_login_command(user_input: str) -> bool:
+ """Handle login command and authenticate user."""
+ parts = user_input.strip().split()
+
+ if len(parts) < 3:
+ await cl.Message(
+ content="❌ **Invalid login format**\n\n"
+ "Please use: `login username password`\n"
+ "Example: `login john mypassword123`"
+ ).send()
+ return False
+
+ username = parts[1]
+ password = parts[2]
+
+ # Show authentication progress
+ auth_msg = cl.Message(content="🔄 Authenticating...")
+ await auth_msg.send()
+
+ # Attempt authentication
+ auth_result = auth_manager.authenticate_user(username, password)
+
+ if auth_result["success"]:
+ # Store authentication data in session
+ cl.user_session.set("auth_token", auth_result["jwt_token"])
+ cl.user_session.set("session_token", auth_result["session_token"])
+ cl.user_session.set("user_id", auth_result["user_id"])
+ cl.user_session.set("username", auth_result["username"])
+ cl.user_session.set("user_email", auth_result.get("email"))
+ cl.user_session.set("subscription_tier", auth_result.get("subscription_tier", "free"))
+
+ # Update message with success
+ auth_msg.content = f"✅ **Welcome back, {auth_result['username']}!**\n\n" \
+ f"🎯 **Account Type:** {auth_result.get('subscription_tier', 'free').title()}\n" \
+ f"📧 **Email:** {auth_result.get('email', 'Not provided')}\n\n" \
+ f"You can now use all NAVADA features! Type **'help'** to get started."
+ await auth_msg.update()
+
+ # Log the login
+ if AUTH_AVAILABLE:
+ session_id = cl.user_session.get("session_id", get_session_id())
+ auth_manager.log_user_action(
+ auth_result["user_id"],
+ "chainlit_login",
+ "authentication",
+ session_id=session_id
+ )
+
+ return True
+ else:
+ # Update message with error
+ auth_msg.content = f"❌ **Login failed:** {auth_result['error']}\n\n" \
+ f"Please check your username and password and try again.\n" \
+ f"Format: `login username password`"
+ await auth_msg.update()
+ return False
+
+async def handle_register_command(user_input: str) -> bool:
+ """Handle register command and create new user."""
+ parts = user_input.strip().split()
+
+ if len(parts) < 3:
+ await cl.Message(
+ content="❌ **Invalid registration format**\n\n"
+ "Please use: `register username password [email]`\n"
+ "Examples:\n"
+ "• `register john mypassword123 john@example.com`\n"
+ "• `register john mypassword123` (without email)"
+ ).send()
+ return False
+
+ username = parts[1]
+ password = parts[2]
+ email = parts[3] if len(parts) > 3 else None
+
+ # Basic validation
+ if len(username) < 3:
+ await cl.Message(content="❌ Username must be at least 3 characters long").send()
+ return False
+
+ if len(password) < 6:
+ await cl.Message(content="❌ Password must be at least 6 characters long").send()
+ return False
+
+ # Show registration progress
+ reg_msg = cl.Message(content="🔄 Creating account...")
+ await reg_msg.send()
+
+ # Attempt registration
+ reg_result = auth_manager.register_user(username, password, email)
+
+ if reg_result["success"]:
+ # Auto-login after successful registration
+ auth_result = auth_manager.authenticate_user(username, password)
+
+ if auth_result["success"]:
+ # Store authentication data in session
+ cl.user_session.set("auth_token", auth_result["jwt_token"])
+ cl.user_session.set("session_token", auth_result["session_token"])
+ cl.user_session.set("user_id", auth_result["user_id"])
+ cl.user_session.set("username", auth_result["username"])
+ cl.user_session.set("user_email", auth_result.get("email"))
+ cl.user_session.set("subscription_tier", auth_result.get("subscription_tier", "free"))
+
+ # Update message with success
+ reg_msg.content = f"🎉 **Account created successfully!**\n\n" \
+ f"👤 **Username:** {auth_result['username']}\n" \
+ f"📧 **Email:** {auth_result.get('email', 'Not provided')}\n" \
+ f"🎯 **Account Type:** {auth_result.get('subscription_tier', 'free').title()}\n\n" \
+ f"You're now logged in and ready to use NAVADA! Type **'help'** to get started."
+ await reg_msg.update()
+
+ return True
+
+ # Update message with error
+ reg_msg.content = f"❌ **Registration failed:** {reg_result['error']}\n\n" \
+ f"Please try a different username or check your details.\n" \
+ f"Format: `register username password [email]`"
+ await reg_msg.update()
+ return False
+
+# Create demo account on startup if it doesn't exist
+print("DEBUG: About to create demo account", flush=True)
+if AUTH_AVAILABLE:
+ try:
+ # Try to create demo account (will fail silently if it already exists)
+ demo_result = auth_manager.register_user("demo", "demo123", "demo@navada.ai")
+ if demo_result["success"]:
+ logger.info("✅ Demo account created: demo/demo123")
+ except Exception:
+ pass # Demo account likely already exists
+print("DEBUG: Demo account check complete", flush=True)
+
+# =============================
+# INITIAL SETUP & CONFIGURATION
+# =============================
+# Load environment variables (OPENAI_API_KEY, LANGSMITH_API_KEY) from .env file
+# This keeps sensitive API keys out of the source code
+load_dotenv(override=True) # Force override of existing environment variables
+
+# Validate environment setup
+env_valid, env_issues = validate_environment()
+if not env_valid:
+ logger.warning("Environment validation found issues - some features may not work correctly")
+
+# Get API keys from environment
+api_key = os.getenv("OPENAI_API_KEY")
+langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
+search_api_key = os.getenv("SEARCH_API_KEY")
+tts_prompt_id = os.getenv("TTS_PROMPT_ID")
+langchain_database_id = os.getenv("LANGCHAIN_DATABASE_ID")
+
+# Perform startup health checks after environment variables are loaded
+health_status = create_startup_health_check()
+
+# Configure LangSmith project name for tracing
+LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT", "navada-startup-agent")
+
+# Initialize OpenAI client with optional LangSmith wrapping for tracing
+if api_key:
+ base_client = OpenAI(api_key=api_key)
+ # Wrap with LangSmith if API key is available
+ if langsmith_api_key:
+ try:
+ client = wrap_openai(base_client)
+ langsmith_client = LangSmithClient(api_key=langsmith_api_key)
+ print("SUCCESS: LangSmith tracing enabled")
+ except Exception as e:
+ print(f"WARNING: LangSmith initialization failed: {e}")
+ print("INFO: Continuing without LangSmith tracing")
+ client = base_client
+ langsmith_client = None
+ else:
+ client = base_client
+ langsmith_client = None
+ print("INFO: LangSmith tracing disabled (no API key)")
+else:
+ client = OpenAI() # Will use default OPENAI_API_KEY from environment
+ langsmith_client = None
+
+# =============================
+# LANGSMITH THREAD MANAGEMENT
+# =============================
+
+def get_thread_history(thread_id: str, project_name: str):
+ """Get conversation history for a thread using LangSmith."""
+ if not langsmith_client:
+ return []
+
+ try:
+ # Filter runs by the specific thread and project
+ filter_string = f'and(in(metadata_key, ["session_id","conversation_id","thread_id"]), eq(metadata_value, "{thread_id}"))'
+ # Only grab the LLM runs
+ runs = [r for r in langsmith_client.list_runs(project_name=project_name, filter=filter_string, run_type="llm")]
+
+ if not runs:
+ return []
+
+ # Sort by start time to get the most recent interaction
+ runs = sorted(runs, key=lambda run: run.start_time, reverse=True)
+
+ # Build conversation history from runs
+ messages = []
+ for run in reversed(runs): # Reverse to get chronological order
+ if run.inputs and 'messages' in run.inputs:
+ # Add the user message from inputs
+ user_messages = [msg for msg in run.inputs['messages'] if msg['role'] == 'user']
+ if user_messages:
+ messages.extend(user_messages)
+
+ if run.outputs and 'choices' in run.outputs:
+ # Add the assistant response
+ assistant_message = {
+ "role": "assistant",
+ "content": run.outputs['choices'][0]['message']['content']
+ }
+ messages.append(assistant_message)
+
+ return messages
+
+ except Exception as e:
+ print(f"⚠️ Error getting thread history: {e}")
+ return []
+
+# =============================
+# LANGSMITH SETUP FOR HOSTING
+# =============================
+# Defer LangChain initialization to avoid startup delays
+embeddings = None
+llm = None
+
+def initialize_langchain_components():
+ """Lazy initialization of LangChain components to speed up startup."""
+ global embeddings, llm
+ if embeddings is None or llm is None:
+ try:
+ embeddings = OpenAIEmbeddings()
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.3)
+ logger.info("✅ LangChain components initialized successfully")
+ except Exception as e:
+ logger.warning(f"⚠️ Failed to initialize LangChain components: {e}")
+ embeddings = None
+ llm = None
+ return embeddings, llm
+
+# Vector store and knowledge base for RAG
+vector_store = None
+knowledge_base = []
+FEEDBACK_STORAGE = []
+
+# =============================
+# SESSION MEMORY & PERSONAS
+# =============================
+# Store conversation history, thread IDs, and persona settings per session
+SESSION_MEMORY = {} # Stores conversation history per session ID
+THREAD_SESSIONS = {} # Maps session IDs to thread IDs for LangSmith
+PERSONAS = {
+ "investor": {
+ "name": "Investor Mode",
+ "system_prompt": (
+ "You are a seasoned venture capitalist with 15+ years experience managing $500M+ fund. "
+ "Your expertise includes: Series A-C valuations, due diligence, portfolio optimization, and exit strategies. "
+ "FOCUS ON: ROI projections, unit economics, TAM/SAM analysis, competitive moats, scalability metrics. "
+ "ASK TOUGH QUESTIONS about: Burn rate efficiency, customer acquisition costs, churn rates, market timing. "
+ "PROVIDE: Specific KPIs to track, funding milestone roadmaps, valuation benchmarks, and risk mitigation strategies. "
+ "Be direct, quantitative, and challenge assumptions. Reference comparable deals and market dynamics."
+ ),
+ "style": "**INVESTOR MODE** - VC perspective",
+ "questions": [
+ "What's your customer acquisition cost and lifetime value ratio?",
+ "How do you plan to achieve 10x returns for investors?",
+ "What's your defensible competitive moat?",
+ "Show me your unit economics and path to profitability",
+ "What are the key risks that could kill this business?",
+ "How does this compare to other investments in your space?",
+ "What's your exit strategy and timeline?",
+ "How will you use the funding to hit next milestones?"
+ ],
+ "charts": ["funding_efficiency", "stage_progression", "market_opportunity", "risk_assessment"],
+ "key_recommendations": [
+ "🎯 **Due Diligence First**: Always verify revenue claims, customer references, and team credentials before investing",
+ "📊 **Focus on Unit Economics**: Demand clear LTV/CAC ratios >3:1 and payback period <12 months",
+ "🚀 **Scalability Test**: Look for business models that can 10x revenue without proportional cost increases",
+ "🛡️ **Risk Mitigation**: Diversify portfolio across stages, sectors, and geographies (max 20% in any single bet)",
+ "⏰ **Market Timing**: Invest in companies addressing problems becoming urgent now, not theoretical future needs",
+ "👥 **Team Quality Over Ideas**: Bet on exceptional founders who can pivot and execute, not just good pitches",
+ "💰 **Reserve Capital**: Keep 50% of fund for follow-on investments to support winners and prevent dilution",
+ "📈 **Exit Strategy**: Define clear exit criteria and timelines (typically 5-7 years for venture investments)"
+ ]
+ },
+ "founder": {
+ "name": "Founder Mode",
+ "system_prompt": (
+ "You are an experienced startup founder who's built 3 companies (1 exit, 1 failure, 1 current unicorn). "
+ "Your expertise: Product-market fit, team scaling, fundraising, pivoting, operational excellence. "
+ "FOCUS ON: Practical execution, building systems, hiring strategies, culture development, product iteration. "
+ "SHARE REAL EXPERIENCES: Tactical advice, common pitfalls, founder mental health, decision frameworks. "
+ "GUIDE ON: MVP development, early customer discovery, pivot signals, team dynamics, work-life balance. "
+ "Be supportive but honest about the challenges ahead. Emphasize learning from failures and building resilience."
+ ),
+ "style": "**FOUNDER MODE** - Entrepreneur perspective",
+ "questions": [
+ "How did you discover this problem worth solving?",
+ "What's your MVP and how are you validating it?",
+ "How are you building and scaling your team?",
+ "What's your biggest challenge right now?",
+ "How do you know if you should pivot?",
+ "What systems are you building for growth?",
+ "How are you maintaining founder mental health?",
+ "What would you do differently if starting over?"
+ ],
+ "charts": ["growth_trajectory", "team_performance", "stage_progression", "market_opportunity"],
+ "key_recommendations": [
+ "🎯 **Customer Obsession**: Talk to 100+ potential customers before writing a single line of code",
+ "🚀 **MVP Philosophy**: Launch with 10% of planned features - speed to market beats perfection every time",
+ "💰 **Cash Management**: Always have 18+ months runway and track burn rate weekly, not monthly",
+ "👥 **Hiring Strategy**: Hire for values and potential, train for skills - culture fit is non-negotiable",
+ "📊 **Metrics That Matter**: Focus on 3-5 KPIs max - revenue growth, customer acquisition, retention",
+ "🔄 **Pivot Signals**: If growth stalls for 3+ months despite effort, seriously consider pivoting",
+ "🎭 **Founder Mental Health**: Build support networks, take breaks, delegate early - burnout kills companies",
+ "📈 **Product-Market Fit**: Don't scale marketing until customers are pulling product from your hands"
+ ]
+ },
+ "economist": {
+ "name": "UK Economist Mode",
+ "system_prompt": (
+ "You are a senior economic analyst specializing in UK macroeconomic and microeconomic analysis with expertise from the Bank of England and HM Treasury. "
+ "Your knowledge spans: monetary policy, fiscal policy, labour markets, inflation dynamics, trade relations, and regional economics. "
+ "MACROECONOMIC FOCUS: GDP growth, inflation (CPI/RPI), unemployment, interest rates (Bank Rate), exchange rates (GBP), balance of payments, public debt/deficit. "
+ "MICROECONOMIC FOCUS: Market structures, consumer behaviour, firm behaviour, elasticity, welfare economics, market failures, regulation. "
+ "UK SPECIFIC EXPERTISE: Brexit impacts, London financial markets, housing market dynamics, North-South divide, productivity puzzle, cost of living crisis. "
+ "ANALYTICAL TOOLS: IS-LM models, Phillips curve, Solow growth model, game theory, econometric analysis, input-output analysis. "
+ "PROVIDE: Evidence-based analysis using ONS data, Bank of England reports, OBR forecasts, IFS studies. "
+ "Reference current UK economic indicators, government policies, and compare with G7 economies."
+ ),
+ "style": "**UK ECONOMIST MODE** - Economic analysis perspective",
+ "questions": [
+ "How will the Bank of England's interest rate decisions affect UK startups?",
+ "What's the impact of inflation on consumer spending and business costs?",
+ "How do UK labour market conditions affect hiring and wages?",
+ "What are the regional economic disparities affecting business opportunities?",
+ "How does Brexit continue to impact trade and investment?",
+ "What's the outlook for UK productivity and economic growth?",
+ "How do fiscal policies affect different sectors of the economy?",
+ "What market failures justify government intervention in this sector?"
+ ],
+ "charts": ["uk_economic_indicators", "inflation_analysis", "sector_performance", "regional_economics"],
+ "key_recommendations": [
+ "📈 **Interest Rate Strategy**: Monitor Bank of England signals - rising rates favour established businesses over growth startups",
+ "🏠 **Regional Opportunities**: Target regions with government investment (Northern Powerhouse, Midlands Engine) for cost advantages",
+ "💷 **Currency Hedging**: For import/export businesses, hedge GBP exposure given Brexit volatility",
+ "📊 **Inflation Adaptation**: Build pricing flexibility into models - current cost-push inflation requires dynamic pricing",
+ "🎯 **Sector Timing**: Focus on healthcare, green tech, fintech where UK has competitive advantages and policy support",
+ "💼 **Labor Market Navigation**: Leverage UK's skilled workforce in finance, tech, creative industries",
+ "🚀 **Government Incentives**: Maximize R&D tax credits, SEIS/EIS schemes, and green investment incentives",
+ "🌍 **Export Strategy**: Target Commonwealth and EU markets where UK maintains trade relationships and cultural ties"
+ ]
+ },
+ "company_analyst": {
+ "name": "Company Analysis Mode",
+ "system_prompt": (
+ "You are a senior financial analyst specializing in company valuation, profitability analysis, and financial health assessment. "
+ "Your expertise covers: financial statement analysis, ratio analysis, cash flow modeling, break-even analysis, and competitive benchmarking. "
+ "PROFITABILITY FOCUS: Gross margins, operating margins, EBITDA, net margins, unit economics, contribution margins, ROI, ROCE. "
+ "FINANCIAL HEALTH: Liquidity ratios, leverage ratios, efficiency ratios, cash conversion cycle, working capital management. "
+ "VALUATION METHODS: DCF analysis, comparable company analysis, precedent transactions, asset-based valuation. "
+ "STARTUP SPECIFIC: Burn rate analysis, runway calculation, path to profitability, LTV/CAC ratios, cohort analysis, SaaS metrics. "
+ "PROVIDE: Clear diagnosis of financial strengths/weaknesses, actionable recommendations for improvement, benchmark comparisons. "
+ "Use financial modeling best practices and industry-standard metrics. Be direct about red flags and opportunities."
+ ),
+ "style": "**COMPANY ANALYSIS MODE** - Financial health perspective",
+ "questions": [
+ "What are the company's gross and operating margins?",
+ "How efficient is the cash conversion cycle?",
+ "What's the break-even point and contribution margin?",
+ "How does profitability compare to industry benchmarks?",
+ "What's the working capital requirement?",
+ "Is the company over-leveraged or under-capitalized?",
+ "What are the key profitability drivers and risks?",
+ "How sustainable is the current business model?"
+ ],
+ "charts": ["profitability_analysis", "cash_flow_waterfall", "margin_trends", "break_even_chart"],
+ "key_recommendations": [
+ "💰 **Gross Margin Optimization**: Target 70%+ gross margins for SaaS, 40%+ for physical products",
+ "⚡ **Cash Conversion Excellence**: Optimize receivables (30 days), payables (45 days), inventory turnover (12x annually)",
+ "📊 **Unit Economics Clarity**: Know exact cost per customer acquisition and lifetime value by channel",
+ "🎯 **Break-Even Mastery**: Calculate break-even by product, customer segment, and geographic market",
+ "📈 **Working Capital Efficiency**: Minimize working capital requirements through better terms and inventory management",
+ "🚨 **Early Warning System**: Set up alerts for declining margins, extending payment cycles, increasing churn",
+ "💼 **Capital Structure Optimization**: Maintain optimal debt-to-equity ratio for your industry and growth stage",
+ "🔍 **Profitability Drivers**: Identify and focus on the 20% of activities driving 80% of profits"
+ ]
+ }
+}
+
+# =============================
+# SAMPLE DATASET - STARTUP DATA
+# =============================
+# Create a comprehensive fake dataset with 24 startups across various sectors
+# This dataset includes multiple dimensions of startup metrics for analysis:
+# - Financial: Funding amount, burn rate, revenue metrics
+# - Team: Founder experience, team size
+# - Market: Market size, sector, geography, competitive landscape
+# - Product: Business model strength, moat, traction
+# - Outcome: Success/failure status with detailed metrics
+data = {
+ "Startup": [
+ "TechX", "Foodly", "EcoGo", "EduSmart", "MediAI", "FinSolve", "Healthify",
+ "GreenCore", "LogistiChain", "RoboAssist", "NeuroStream", "ByteCart",
+ "CryptoFlow", "AIVision", "BioTech", "CleanWater", "SpaceX", "VRWorld",
+ "SolarTech", "AgriBot", "NanoMed", "BlockChain", "CloudSoft", "GameHub"
+ ],
+ # Funding amounts in millions USD - represents total funding raised
+ "Funding_USD_M": [5.0, 1.2, 0.8, 3.0, 12.0, 7.5, 4.2, 9.8, 15.0, 6.6, 18.0, 2.5,
+ 25.0, 8.5, 35.0, 4.8, 50.0, 12.5, 22.0, 6.2, 28.0, 16.0, 9.5, 3.8],
+
+ # Burn rate in months - how many months the funding lasts at current spending
+ "Burn_Rate_Months": [12, 6, 3, 9, 24, 18, 10, 15, 30, 8, 26, 7,
+ 20, 14, 36, 12, 48, 18, 24, 10, 30, 22, 16, 8],
+
+ # Average years of experience across founding team members
+ "Founders_Experience_Yrs": [2, 1, 0, 3, 8, 5, 6, 4, 10, 2, 7, 1,
+ 12, 6, 15, 4, 20, 8, 9, 3, 11, 7, 5, 2],
+
+ # Total addressable market size in billions USD
+ "Market_Size_Bn": [50, 5, 2, 15, 80, 60, 25, 40, 100, 20, 120, 8,
+ 150, 35, 200, 12, 500, 75, 90, 18, 160, 110, 45, 22],
+
+ # Binary outcome: 1 = failed, 0 = still operating/successful
+ "Failed": [1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1,
+ 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1],
+
+ # Country codes
+ "Country": ["UK", "UK", "UK", "UK", "DE", "FR", "US", "UK", "US", "UK", "US", "UK",
+ "US", "CA", "CH", "DE", "US", "JP", "AU", "IN", "IL", "SG", "SE", "NL"],
+
+ # Industry sector classification
+ "Sector": ["Tech","Food","Transport","EdTech","HealthTech","FinTech","HealthTech","Energy",
+ "Logistics","Robotics","HealthTech","Retail","Crypto","AI","BioTech","CleanTech",
+ "Aerospace","VR/AR","Energy","AgTech","MedTech","Blockchain","SaaS","Gaming"],
+
+ # Business model strength (1-5 scale)
+ "Business_Model": [3, 2, 1, 4, 5, 4, 3, 4, 5, 2, 4, 2,
+ 4, 5, 5, 3, 5, 3, 4, 3, 5, 4, 4, 2],
+
+ # Competitive moat (1-5 scale: 1=no moat, 5=strong moat)
+ "Moat": [2, 1, 1, 3, 5, 3, 2, 3, 4, 2, 4, 1,
+ 3, 4, 5, 2, 5, 3, 3, 2, 4, 3, 3, 2],
+
+ # Monthly recurring revenue in thousands USD
+ "Traction_MRR_K": [5, 2, 0, 15, 45, 25, 8, 35, 60, 3, 50, 1,
+ 80, 30, 120, 10, 200, 20, 55, 8, 90, 40, 22, 5],
+
+ # Monthly growth rate percentage
+ "Growth_Rate_Pct": [5, 2, -5, 12, 18, 15, 8, 20, 25, 1, 22, -2,
+ 28, 15, 30, 6, 35, 10, 18, 3, 25, 20, 12, 2],
+
+ # Competition intensity (1-5 scale: 1=low competition, 5=high competition)
+ "Competition": [4, 5, 3, 3, 2, 4, 4, 3, 2, 4, 2, 5,
+ 3, 3, 2, 4, 1, 4, 3, 4, 2, 3, 4, 5],
+
+ # Team size (number of employees)
+ "Team_Size": [12, 5, 3, 8, 25, 18, 10, 22, 35, 6, 28, 4,
+ 45, 15, 60, 12, 120, 20, 30, 8, 40, 25, 18, 7],
+
+ # Years since founding
+ "Years_Since_Founding": [2.5, 1.8, 1.2, 2.0, 4.0, 3.2, 2.8, 3.5, 5.0, 1.5, 4.2, 1.0,
+ 3.8, 2.5, 6.0, 2.2, 8.0, 3.0, 4.5, 1.8, 5.5, 3.8, 2.8, 1.5],
+
+ # Funding stage
+ "Stage": ["Seed", "Pre-Seed", "Pre-Seed", "Seed", "Series A", "Seed", "Seed", "Series A",
+ "Series B", "Seed", "Series A", "Pre-Seed", "Series A", "Seed", "Series B",
+ "Seed", "Series C", "Series A", "Series A", "Seed", "Series B", "Series A", "Seed", "Pre-Seed"]
+}
+
+# Convert the dictionary into a pandas DataFrame for easier manipulation and analysis
+df = pd.DataFrame(data)
+
+# Set the baseline funding year for failure projections
+FUNDING_YEAR = 2021
+
+# Calculate estimated failure year based on runway
+# Formula: FUNDING_YEAR + (funding / burn_rate)
+# This gives us a projection of when each startup would run out of money
+# Example: $5M funding / 12 month burn = 0.42 years runway → fails in 2021.42
+df["Est_Failure_Year"] = FUNDING_YEAR + (df["Funding_USD_M"] / df["Burn_Rate_Months"])
+
+# =============================
+# MATHEMATICAL ANALYSIS FUNCTIONS
+# =============================
+
+def calculate_irr(initial_investment: float, final_value: float, years: float) -> float:
+ """Calculate Internal Rate of Return (IRR) for an investment."""
+ if years <= 0 or initial_investment <= 0:
+ return 0.0
+ return ((final_value / initial_investment) ** (1/years)) - 1
+
+def calculate_npv(cash_flows: List[float], discount_rate: float) -> float:
+ """Calculate Net Present Value (NPV) of cash flows."""
+ npv = 0
+ for i, cash_flow in enumerate(cash_flows):
+ npv += cash_flow / ((1 + discount_rate) ** i)
+ return npv
+
+def project_revenue(current_mrr: float, growth_rate: float, months: int) -> Dict[str, Any]:
+ """Project revenue growth over time."""
+ projections = []
+ monthly_revenue = current_mrr
+
+ for month in range(months + 1):
+ projections.append({
+ 'month': month,
+ 'mrr': monthly_revenue,
+ 'arr': monthly_revenue * 12
+ })
+ monthly_revenue *= (1 + growth_rate)
+
+ return {
+ 'projections': projections,
+ 'final_mrr': monthly_revenue,
+ 'final_arr': monthly_revenue * 12,
+ 'total_growth': ((monthly_revenue / current_mrr) - 1) * 100 if current_mrr > 0 else 0
+ }
+
+def monte_carlo_exit_scenarios(scenarios: int, exit_multiples: List[float],
+ current_revenue: float, growth_scenarios: List[float]) -> Dict[str, Any]:
+ """Run Monte Carlo simulation for exit scenarios."""
+ results = []
+
+ for _ in range(scenarios):
+ # Random exit multiple and growth rate
+ exit_multiple = random.choice(exit_multiples)
+ growth_rate = random.choice(growth_scenarios)
+
+ # Project revenue for 3-5 years
+ years = random.uniform(3, 5)
+ final_revenue = current_revenue * ((1 + growth_rate) ** years)
+ exit_value = final_revenue * exit_multiple
+
+ results.append({
+ 'exit_multiple': exit_multiple,
+ 'growth_rate': growth_rate,
+ 'years_to_exit': years,
+ 'final_revenue': final_revenue,
+ 'exit_value': exit_value
+ })
+
+ # Calculate statistics
+ exit_values = [r['exit_value'] for r in results]
+
+ return {
+ 'scenarios': results,
+ 'statistics': {
+ 'mean_exit_value': np.mean(exit_values),
+ 'median_exit_value': np.median(exit_values),
+ 'min_exit_value': np.min(exit_values),
+ 'max_exit_value': np.max(exit_values),
+ 'std_exit_value': np.std(exit_values),
+ 'percentile_25': np.percentile(exit_values, 25),
+ 'percentile_75': np.percentile(exit_values, 75)
+ }
+ }
+
+def optimize_burn_rate(target_runway_months: int, current_funding: float) -> Dict[str, Any]:
+ """Calculate optimal burn rate for desired runway."""
+ monthly_burn = current_funding / target_runway_months
+
+ return {
+ 'target_runway_months': target_runway_months,
+ 'current_funding': current_funding,
+ 'optimal_monthly_burn': monthly_burn,
+ 'optimal_annual_burn': monthly_burn * 12,
+ 'cash_depletion_date': f"In {target_runway_months} months"
+ }
+
+def calculate_startup_metrics(funding: float, burn_rate: float, mrr: float,
+ growth_rate: float) -> Dict[str, Any]:
+ """Calculate comprehensive startup financial metrics."""
+ runway_months = funding / (burn_rate / 12) if burn_rate > 0 else float('inf')
+
+ # Calculate when startup becomes cash flow positive
+ months_to_profitability = 0
+ current_revenue = mrr * 12 # Convert MRR to ARR
+ current_burn = burn_rate
+
+ if growth_rate > 0 and mrr > 0:
+ while current_revenue < current_burn and months_to_profitability < 120: # Max 10 years
+ months_to_profitability += 1
+ current_revenue *= (1 + growth_rate/12) # Monthly compounding
+ else:
+ months_to_profitability = float('inf')
+
+ return {
+ 'runway_months': runway_months,
+ 'burn_rate_monthly': burn_rate / 12,
+ 'current_arr': mrr * 12,
+ 'months_to_profitability': months_to_profitability,
+ 'cash_flow_positive': months_to_profitability < runway_months,
+ 'funding_efficiency': (mrr * 12) / funding if funding > 0 else 0
+ }
+
+async def process_math_command(command: str, context: Dict[str, Any]) -> str:
+ """Process mathematical analysis commands in math mode."""
+ command = command.lower().strip()
+
+ try:
+ if "irr" in command or "return" in command:
+ # Extract values for IRR calculation
+ if "5x" in command and "7 years" in command:
+ irr = calculate_irr(1000000, 5000000, 7)
+ return f"**IRR Calculation:**\n\n5x return in 7 years = **{irr:.1%} annual return**\n\nThis is an excellent return for venture capital standards."
+
+ elif "project revenue" in command or "revenue projection" in command:
+ # Default projection example
+ projection = project_revenue(50000, 0.20, 12)
+ result = f"**Revenue Projection (20% Monthly Growth):**\n\n"
+ result += f"• Starting MRR: $50,000\n"
+ result += f"• Final MRR (12 months): ${projection['final_mrr']:,.0f}\n"
+ result += f"• Final ARR: ${projection['final_arr']:,.0f}\n"
+ result += f"• Total Growth: {projection['total_growth']:.0f}%"
+ return result
+
+ elif "monte carlo" in command or "simulate" in command:
+ # Run Monte Carlo simulation
+ simulation = monte_carlo_exit_scenarios(
+ 1000, [3, 5, 8, 10], 1000000, [0.1, 0.2, 0.3, 0.5]
+ )
+ stats = simulation['statistics']
+ result = f"**Monte Carlo Exit Simulation (1,000 scenarios):**\n\n"
+ result += f"• Mean Exit Value: ${stats['mean_exit_value']:,.0f}\n"
+ result += f"• Median Exit Value: ${stats['median_exit_value']:,.0f}\n"
+ result += f"• 25th Percentile: ${stats['percentile_25']:,.0f}\n"
+ result += f"• 75th Percentile: ${stats['percentile_75']:,.0f}\n"
+ result += f"• Best Case: ${stats['max_exit_value']:,.0f}\n"
+ result += f"• Worst Case: ${stats['min_exit_value']:,.0f}"
+ return result
+
+ elif "optimize burn" in command or "burn rate" in command:
+ # Optimize burn rate for runway
+ optimization = optimize_burn_rate(18, 5000000)
+ result = f"**Burn Rate Optimization:**\n\n"
+ result += f"• Target Runway: {optimization['target_runway_months']} months\n"
+ result += f"• Current Funding: ${optimization['current_funding']:,.0f}\n"
+ result += f"• Optimal Monthly Burn: ${optimization['optimal_monthly_burn']:,.0f}\n"
+ result += f"• Optimal Annual Burn: ${optimization['optimal_annual_burn']:,.0f}"
+ return result
+
+ else:
+ return f"**Available Calculations:**\n\n• `calculate IRR for 5x return in 7 years`\n• `project revenue with 20% monthly growth`\n• `simulate 1000 scenarios for exit`\n• `optimize burn rate for 18 month runway`\n\nType your calculation or 'exit math mode' to return."
+
+ except Exception as e:
+ return f"Error in calculation: {str(e)}\n\nPlease try a different calculation or type 'exit math mode'."
+
+# =============================
+# IMAGE GENERATION FUNCTIONALITY
+# =============================
+
+@safe_async_api_call
+async def generate_image(prompt: str, size: str = "1024x1024", quality: str = "standard") -> Dict[str, Any]:
+ """
+ Generate an image using DALL-E 3 based on the provided prompt.
+
+ Args:
+ prompt (str): Description of the image to generate
+ size (str): Image size (default: "1024x1024")
+ quality (str): Image quality "standard" or "hd" (default: "standard")
+
+ Returns:
+ Dict containing the image URL and metadata, or error information
+ """
+ try:
+ logger.info(f"Generating image with prompt: {prompt[:100]}...")
+
+ response = client.images.generate(
+ model="dall-e-3",
+ prompt=prompt,
+ size=size,
+ quality=quality,
+ n=1,
+ )
+
+ image_url = response.data[0].url
+ revised_prompt = response.data[0].revised_prompt
+
+ logger.info("Image generated successfully")
+
+ return {
+ "success": True,
+ "image_url": image_url,
+ "revised_prompt": revised_prompt,
+ "original_prompt": prompt,
+ "size": size,
+ "quality": quality
+ }
+
+ except Exception as e:
+ logger.error(f"Image generation failed: {e}")
+ return {
+ "success": False,
+ "error": str(e),
+ "original_prompt": prompt
+ }
+
+def detect_image_request(user_input: str) -> bool:
+ """
+ Detect if user is requesting image generation.
+
+ Args:
+ user_input (str): User's message content (lowercased)
+
+ Returns:
+ bool: True if image generation is requested
+ """
+ image_keywords = [
+ "generate an image", "create an image", "make an image", "draw an image",
+ "generate image", "create image", "make image", "draw image",
+ "image of", "picture of", "photo of", "illustration of",
+ "show me", "visualize", "dall-e", "dalle"
+ ]
+
+ return any(keyword in user_input for keyword in image_keywords)
+
+def extract_image_prompt(user_input: str) -> str:
+ """
+ Extract the image description from user input.
+
+ Args:
+ user_input (str): User's message content
+
+ Returns:
+ str: Cleaned image prompt
+ """
+ # Remove common prefixes
+ prefixes_to_remove = [
+ "generate an image of", "create an image of", "make an image of", "draw an image of",
+ "generate image of", "create image of", "make image of", "draw image of",
+ "generate an image showing", "create an image showing", "make an image showing",
+ "show me an image of", "show me", "image of", "picture of", "photo of",
+ "illustration of", "visualize", "dall-e", "dalle"
+ ]
+
+ cleaned_prompt = user_input.lower().strip()
+
+ for prefix in prefixes_to_remove:
+ if cleaned_prompt.startswith(prefix):
+ cleaned_prompt = cleaned_prompt[len(prefix):].strip()
+ break
+
+ # Remove any remaining common words at the start
+ if cleaned_prompt.startswith(("a ", "an ", "the ")):
+ cleaned_prompt = " ".join(cleaned_prompt.split()[1:])
+
+ return cleaned_prompt if cleaned_prompt else user_input
+
+# =============================
+# SWOT ANALYSIS FUNCTIONALITY
+# =============================
+
+@dataclass
+class SWOT:
+ """
+ SWOT Analysis dataclass for structured startup analysis.
+
+ Attributes:
+ strengths: List of internal positive factors
+ weaknesses: List of internal negative factors
+ opportunities: List of external positive factors
+ threats: List of external negative factors
+ """
+ strengths: List[str] = field(default_factory=list)
+ weaknesses: List[str] = field(default_factory=list)
+ opportunities: List[str] = field(default_factory=list)
+ threats: List[str] = field(default_factory=list)
+
+ def summary(self) -> str:
+ """Generate a formatted SWOT summary as a string."""
+ result = "# 📊 SWOT Analysis\n\n"
+
+ result += "## 💪 **Strengths** (Internal Positive)\n"
+ if self.strengths:
+ for strength in self.strengths:
+ result += f"• {strength}\n"
+ else:
+ result += "• *No strengths identified*\n"
+
+ result += "\n## ⚠️ **Weaknesses** (Internal Negative)\n"
+ if self.weaknesses:
+ for weakness in self.weaknesses:
+ result += f"• {weakness}\n"
+ else:
+ result += "• *No weaknesses identified*\n"
+
+ result += "\n## 🚀 **Opportunities** (External Positive)\n"
+ if self.opportunities:
+ for opportunity in self.opportunities:
+ result += f"• {opportunity}\n"
+ else:
+ result += "• *No opportunities identified*\n"
+
+ result += "\n## 🎯 **Threats** (External Negative)\n"
+ if self.threats:
+ for threat in self.threats:
+ result += f"• {threat}\n"
+ else:
+ result += "• *No threats identified*\n"
+
+ return result
+
+ def to_dataframe(self) -> pd.DataFrame:
+ """Convert SWOT to pandas DataFrame for visualization."""
+ max_len = max(
+ len(self.strengths),
+ len(self.weaknesses),
+ len(self.opportunities),
+ len(self.threats)
+ )
+
+ # Pad lists to same length
+ strengths_padded = self.strengths + [''] * (max_len - len(self.strengths))
+ weaknesses_padded = self.weaknesses + [''] * (max_len - len(self.weaknesses))
+ opportunities_padded = self.opportunities + [''] * (max_len - len(self.opportunities))
+ threats_padded = self.threats + [''] * (max_len - len(self.threats))
+
+ return pd.DataFrame({
+ '💪 Strengths': strengths_padded,
+ '⚠️ Weaknesses': weaknesses_padded,
+ '🚀 Opportunities': opportunities_padded,
+ '🎯 Threats': threats_padded
+ })
+
+def plot_swot_matrix(swot: SWOT) -> bytes:
+ """
+ Create a visual SWOT matrix chart.
+
+ Args:
+ swot: SWOT dataclass instance
+
+ Returns:
+ bytes: PNG image data of the SWOT matrix
+ """
+ fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 10))
+ fig.suptitle('📊 SWOT Analysis Matrix', fontsize=16, fontweight='bold', y=0.95)
+
+ # Color scheme
+ colors = {
+ 'strengths': '#2E8B57', # Sea Green
+ 'weaknesses': '#DC143C', # Crimson
+ 'opportunities': '#4169E1', # Royal Blue
+ 'threats': '#FF8C00' # Dark Orange
+ }
+
+ # Strengths (Top Left)
+ ax1.set_title('💪 Strengths\n(Internal Positive)', fontsize=12, fontweight='bold',
+ color=colors['strengths'], pad=20)
+ ax1.axis('off')
+ strengths_text = '\n'.join([f"• {s}" for s in swot.strengths[:8]]) # Limit to 8 items
+ if len(swot.strengths) > 8:
+ strengths_text += f"\n• ... and {len(swot.strengths) - 8} more"
+ ax1.text(0.05, 0.95, strengths_text, transform=ax1.transAxes, fontsize=10,
+ verticalalignment='top', wrap=True)
+ ax1.add_patch(plt.Rectangle((0, 0), 1, 1, fill=False, edgecolor=colors['strengths'],
+ linewidth=2, transform=ax1.transAxes))
+
+ # Weaknesses (Top Right)
+ ax2.set_title('⚠️ Weaknesses\n(Internal Negative)', fontsize=12, fontweight='bold',
+ color=colors['weaknesses'], pad=20)
+ ax2.axis('off')
+ weaknesses_text = '\n'.join([f"• {w}" for w in swot.weaknesses[:8]])
+ if len(swot.weaknesses) > 8:
+ weaknesses_text += f"\n• ... and {len(swot.weaknesses) - 8} more"
+ ax2.text(0.05, 0.95, weaknesses_text, transform=ax2.transAxes, fontsize=10,
+ verticalalignment='top', wrap=True)
+ ax2.add_patch(plt.Rectangle((0, 0), 1, 1, fill=False, edgecolor=colors['weaknesses'],
+ linewidth=2, transform=ax2.transAxes))
+
+ # Opportunities (Bottom Left)
+ ax3.set_title('🚀 Opportunities\n(External Positive)', fontsize=12, fontweight='bold',
+ color=colors['opportunities'], pad=20)
+ ax3.axis('off')
+ opportunities_text = '\n'.join([f"• {o}" for o in swot.opportunities[:8]])
+ if len(swot.opportunities) > 8:
+ opportunities_text += f"\n• ... and {len(swot.opportunities) - 8} more"
+ ax3.text(0.05, 0.95, opportunities_text, transform=ax3.transAxes, fontsize=10,
+ verticalalignment='top', wrap=True)
+ ax3.add_patch(plt.Rectangle((0, 0), 1, 1, fill=False, edgecolor=colors['opportunities'],
+ linewidth=2, transform=ax3.transAxes))
+
+ # Threats (Bottom Right)
+ ax4.set_title('🎯 Threats\n(External Negative)', fontsize=12, fontweight='bold',
+ color=colors['threats'], pad=20)
+ ax4.axis('off')
+ threats_text = '\n'.join([f"• {t}" for t in swot.threats[:8]])
+ if len(swot.threats) > 8:
+ threats_text += f"\n• ... and {len(swot.threats) - 8} more"
+ ax4.text(0.05, 0.95, threats_text, transform=ax4.transAxes, fontsize=10,
+ verticalalignment='top', wrap=True)
+ ax4.add_patch(plt.Rectangle((0, 0), 1, 1, fill=False, edgecolor=colors['threats'],
+ linewidth=2, transform=ax4.transAxes))
+
+ plt.tight_layout()
+
+ # Convert to bytes
+ return fig_to_bytes(fig)
+
+@safe_async_api_call
+async def generate_swot_analysis(startup_data: Dict[str, Any], context: str = "") -> SWOT:
+ """
+ Generate SWOT analysis using AI based on startup data.
+
+ Args:
+ startup_data: Dictionary containing startup information
+ context: Additional context for analysis
+
+ Returns:
+ SWOT: Populated SWOT analysis object
+ """
+ try:
+ # Prepare the prompt for GPT
+ prompt = f"""
+ As a startup analysis expert, conduct a comprehensive SWOT analysis for the following startup:
+
+ **Startup Information:**
+ {json.dumps(startup_data, indent=2) if startup_data else "No specific data provided"}
+
+ **Additional Context:**
+ {context if context else "General startup analysis"}
+
+ Please provide a detailed SWOT analysis with the following structure:
+
+ **STRENGTHS** (Internal positive factors that give competitive advantage):
+ - List 4-8 key strengths
+
+ **WEAKNESSES** (Internal negative factors that need improvement):
+ - List 4-8 key weaknesses
+
+ **OPPORTUNITIES** (External positive factors to capitalize on):
+ - List 4-8 market opportunities
+
+ **THREATS** (External negative factors that pose risks):
+ - List 4-8 potential threats
+
+ Format your response as:
+ STRENGTHS:
+ - [strength 1]
+ - [strength 2]
+ ...
+
+ WEAKNESSES:
+ - [weakness 1]
+ - [weakness 2]
+ ...
+
+ OPPORTUNITIES:
+ - [opportunity 1]
+ - [opportunity 2]
+ ...
+
+ THREATS:
+ - [threat 1]
+ - [threat 2]
+ ...
+
+ Be specific, actionable, and relevant to the startup's context.
+ """
+
+ response = client.chat.completions.create(
+ model="gpt-4",
+ messages=[
+ {"role": "system", "content": "You are a professional startup analyst specializing in SWOT analysis."},
+ {"role": "user", "content": prompt}
+ ],
+ temperature=0.7,
+ max_tokens=1500
+ )
+
+ content = response.choices[0].message.content
+
+ # Parse the response into SWOT categories
+ swot = SWOT()
+
+ sections = content.split('\n\n')
+ current_section = None
+
+ for line in content.split('\n'):
+ line = line.strip()
+ if not line:
+ continue
+
+ if line.upper().startswith('STRENGTHS'):
+ current_section = 'strengths'
+ continue
+ elif line.upper().startswith('WEAKNESSES'):
+ current_section = 'weaknesses'
+ continue
+ elif line.upper().startswith('OPPORTUNITIES'):
+ current_section = 'opportunities'
+ continue
+ elif line.upper().startswith('THREATS'):
+ current_section = 'threats'
+ continue
+
+ # Parse bullet points
+ if line.startswith('-') or line.startswith('•'):
+ item = line[1:].strip()
+ if current_section == 'strengths':
+ swot.strengths.append(item)
+ elif current_section == 'weaknesses':
+ swot.weaknesses.append(item)
+ elif current_section == 'opportunities':
+ swot.opportunities.append(item)
+ elif current_section == 'threats':
+ swot.threats.append(item)
+
+ return swot
+
+ except Exception as e:
+ logger.error(f"SWOT analysis generation failed: {e}")
+ # Return a basic SWOT with error info
+ return SWOT(
+ strengths=["Analysis capabilities", "Data-driven approach"],
+ weaknesses=["Analysis generation failed", f"Error: {str(e)}"],
+ opportunities=["Retry analysis", "Manual SWOT creation"],
+ threats=["Technical limitations", "API restrictions"]
+ )
+
+# =============================
+# UTILITY FUNCTIONS - DOWNLOAD FUNCTIONALITY
+# =============================
+
+async def send_chart_with_download(png_data: bytes, filename: str, description: str, csv_data: pd.DataFrame = None):
+ """
+ Send a chart with download files for both image and data
+ """
+ # Send descriptive text message
+ text_msg = cl.Message(content=description)
+ await text_msg.send()
+
+ # Send chart image
+ image = cl.Image(content=png_data, name=filename, display="inline")
+ await image.send(for_id=text_msg.id)
+
+ # Create file elements for download
+ download_elements = []
+
+ # Chart download file
+ chart_file = cl.File(
+ name=filename,
+ content=png_data,
+ mime="image/png"
+ )
+ download_elements.append(chart_file)
+
+ # Data download file if CSV data provided
+ if csv_data is not None:
+ csv_filename = filename.replace('.png', '.csv')
+ csv_content = csv_data.to_csv(index=False)
+ data_file = cl.File(
+ name=csv_filename,
+ content=csv_content.encode('utf-8'),
+ mime="text/csv"
+ )
+ download_elements.append(data_file)
+
+ # Send download files
+ download_msg = cl.Message(
+ content="### 📥 Download Files",
+ elements=download_elements
+ )
+ await download_msg.send()
+
+ return text_msg.id
+
+async def send_data_export(data: pd.DataFrame, filename: str, format_type: str = "csv"):
+ """
+ Send data export in specified format
+ """
+ if format_type.lower() == "csv":
+ # Convert to CSV
+ csv_content = data.to_csv(index=False)
+ file_content = csv_content.encode('utf-8')
+ mime_type = "text/csv"
+ file_ext = ".csv"
+ elif format_type.lower() == "json":
+ # Convert to JSON
+ json_content = data.to_json(orient='records', indent=2)
+ file_content = json_content.encode('utf-8')
+ mime_type = "application/json"
+ file_ext = ".json"
+ else:
+ raise ValueError("Unsupported format. Use 'csv' or 'json'")
+
+ # Create file element
+ file_element = cl.File(
+ name=f"{filename}{file_ext}",
+ content=file_content,
+ mime=mime_type
+ )
+
+ # Send file
+ await cl.Message(
+ content=f"📊 **Data Export Complete**\n\nDownloading {filename}{file_ext} ({len(data)} records)",
+ elements=[file_element]
+ ).send()
+
+# =============================
+# UTILITY FUNCTIONS - CHART GENERATION
+# =============================
+
+def plot_growth_trajectory(df_in: pd.DataFrame):
+ """
+ Create a growth trajectory chart showing MRR growth over time
+ """
+ figsize = get_mobile_optimized_figsize(10, 6)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Create growth trajectory data
+ successful = df_in[df_in['Failed'] == 0]
+ failed = df_in[df_in['Failed'] == 1]
+
+ ax.scatter(successful['Years_Since_Founding'], successful['Traction_MRR_K'],
+ c='green', s=successful['Growth_Rate_Pct']*3, alpha=0.7, label='Successful')
+ ax.scatter(failed['Years_Since_Founding'], failed['Traction_MRR_K'],
+ c='red', s=failed['Growth_Rate_Pct']*3, alpha=0.7, label='Failed')
+
+ ax.set_xlabel('Years Since Founding')
+ ax.set_ylabel('Monthly Recurring Revenue (K USD)')
+ ax.set_title('Growth Trajectory: MRR vs Company Age\n(Bubble size = Growth Rate)')
+ ax.legend()
+ ax.grid(True, alpha=0.3)
+
+ plt.tight_layout()
+ return fig_to_bytes(fig)
+
+def plot_team_performance(df_in: pd.DataFrame):
+ """
+ Create a team performance matrix showing team size vs experience
+ """
+ figsize = get_mobile_optimized_figsize(10, 6)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Color by success/failure
+ colors = ['red' if failed else 'green' for failed in df_in['Failed']]
+
+ scatter = ax.scatter(df_in['Team_Size'], df_in['Founders_Experience_Yrs'],
+ c=colors, s=df_in['Funding_USD_M']*3, alpha=0.7)
+
+ ax.set_xlabel('Team Size (Employees)')
+ ax.set_ylabel('Founder Experience (Years)')
+ ax.set_title('Team Performance Matrix\n(Bubble size = Funding Amount)')
+
+ # Add trend line
+ z = np.polyfit(df_in['Team_Size'], df_in['Founders_Experience_Yrs'], 1)
+ p = np.poly1d(z)
+ ax.plot(df_in['Team_Size'], p(df_in['Team_Size']), "r--", alpha=0.8)
+
+ ax.grid(True, alpha=0.3)
+ plt.tight_layout()
+ return fig_to_bytes(fig)
+
+def plot_market_opportunity(df_in: pd.DataFrame):
+ """
+ Create a market opportunity matrix showing market size vs competition
+ """
+ figsize = get_mobile_optimized_figsize(10, 6)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Invert competition for better visualization (low competition = high opportunity)
+ opportunity = 6 - df_in['Competition'] # Convert 1-5 to 5-1
+
+ colors = ['red' if failed else 'green' for failed in df_in['Failed']]
+
+ scatter = ax.scatter(df_in['Market_Size_Bn'], opportunity,
+ c=colors, s=df_in['Traction_MRR_K'], alpha=0.7)
+
+ ax.set_xlabel('Market Size (Billions USD)')
+ ax.set_ylabel('Market Opportunity (5=Low Competition, 1=High Competition)')
+ ax.set_title('Market Opportunity Matrix\n(Bubble size = Current Traction)')
+
+ # Add quadrant lines
+ median_market = df_in['Market_Size_Bn'].median()
+ median_opportunity = opportunity.median()
+ ax.axvline(median_market, color='gray', linestyle='--', alpha=0.5)
+ ax.axhline(median_opportunity, color='gray', linestyle='--', alpha=0.5)
+
+ # Add quadrant labels
+ ax.text(median_market*1.5, 4.5, 'Sweet Spot\n(Big Market, Low Competition)',
+ ha='center', bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgreen", alpha=0.7))
+
+ ax.grid(True, alpha=0.3)
+ plt.tight_layout()
+ return fig_to_bytes(fig)
+
+def plot_funding_efficiency(df_in: pd.DataFrame):
+ """
+ Create a funding efficiency chart showing revenue per dollar raised
+ """
+ figsize = get_mobile_optimized_figsize(10, 6)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Calculate efficiency metrics
+ df_in['Revenue_Per_Dollar'] = (df_in['Traction_MRR_K'] * 12) / df_in['Funding_USD_M'] # Annual revenue per funding dollar
+ df_in['Efficiency_Score'] = df_in['Revenue_Per_Dollar'] * df_in['Growth_Rate_Pct'] / 100
+
+ colors = ['red' if failed else 'green' for failed in df_in['Failed']]
+
+ ax.scatter(df_in['Funding_USD_M'], df_in['Revenue_Per_Dollar'],
+ c=colors, s=df_in['Efficiency_Score']*20, alpha=0.7)
+
+ ax.set_xlabel('Total Funding Raised (Millions USD)')
+ ax.set_ylabel('Annual Revenue per Dollar Raised')
+ ax.set_title('Capital Efficiency Analysis\n(Bubble size = Efficiency Score)')
+
+ # Add efficiency benchmark line
+ efficient_companies = df_in[df_in['Revenue_Per_Dollar'] > df_in['Revenue_Per_Dollar'].median()]
+ if len(efficient_companies) > 0:
+ ax.axhline(df_in['Revenue_Per_Dollar'].median(), color='orange',
+ linestyle='--', alpha=0.7, label='Median Efficiency')
+ ax.legend()
+
+ ax.grid(True, alpha=0.3)
+ plt.tight_layout()
+ return fig_to_bytes(fig)
+
+def plot_stage_progression(df_in: pd.DataFrame):
+ """
+ Create a stage progression chart showing funding by stage
+ """
+ figsize = get_mobile_optimized_figsize(10, 6)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Group by stage and calculate metrics
+ stage_stats = df_in.groupby('Stage').agg({
+ 'Funding_USD_M': ['mean', 'count'],
+ 'Failed': 'mean',
+ 'Traction_MRR_K': 'mean'
+ }).round(2)
+
+ stage_stats.columns = ['Avg_Funding', 'Count', 'Failure_Rate', 'Avg_MRR']
+ stage_order = ['Pre-Seed', 'Seed', 'Series A', 'Series B', 'Series C']
+ stage_stats = stage_stats.reindex([s for s in stage_order if s in stage_stats.index])
+
+ # Create dual y-axis chart
+ ax2 = ax.twinx()
+
+ bars = ax.bar(stage_stats.index, stage_stats['Avg_Funding'], alpha=0.7, color='skyblue', label='Avg Funding')
+ line = ax2.plot(stage_stats.index, stage_stats['Failure_Rate']*100, 'ro-', linewidth=2, label='Failure Rate %')
+
+ ax.set_xlabel('Funding Stage')
+ ax.set_ylabel('Average Funding (Millions USD)', color='blue')
+ ax2.set_ylabel('Failure Rate (%)', color='red')
+ ax.set_title('Funding Stage Analysis')
+
+ # Add value labels on bars
+ for i, bar in enumerate(bars):
+ height = bar.get_height()
+ ax.text(bar.get_x() + bar.get_width()/2., height + 0.5,
+ f'${height:.1f}M\n({int(stage_stats.iloc[i]["Count"])} companies)',
+ ha='center', va='bottom', fontsize=8)
+
+ ax.grid(True, alpha=0.3)
+ plt.tight_layout()
+ return fig_to_bytes(fig)
+
+def plot_risk_assessment(df_in: pd.DataFrame):
+ """
+ Create a comprehensive risk assessment radar chart
+ """
+ figsize = get_mobile_optimized_figsize(8, 8)
+ fig, ax = plt.subplots(figsize=figsize, subplot_kw=dict(projection='polar'))
+
+ # Calculate risk metrics for successful vs failed companies
+ successful = df_in[df_in['Failed'] == 0]
+ failed = df_in[df_in['Failed'] == 1]
+
+ categories = ['Financial Risk', 'Market Risk', 'Team Risk', 'Competition Risk', 'Traction Risk']
+
+ # Normalize metrics to 0-10 scale (higher = more risk)
+ def calc_risk_scores(data):
+ financial_risk = 10 - (data['Burn_Rate_Months'].mean() / data['Burn_Rate_Months'].max() * 10)
+ market_risk = 10 - (data['Market_Size_Bn'].mean() / data['Market_Size_Bn'].max() * 10)
+ team_risk = 10 - (data['Founders_Experience_Yrs'].mean() / data['Founders_Experience_Yrs'].max() * 10)
+ competition_risk = data['Competition'].mean() * 2 # Scale 1-5 to 2-10
+ traction_risk = 10 - (data['Traction_MRR_K'].mean() / data['Traction_MRR_K'].max() * 10)
+ return [financial_risk, market_risk, team_risk, competition_risk, traction_risk]
+
+ successful_risks = calc_risk_scores(successful)
+ failed_risks = calc_risk_scores(failed)
+
+ # Number of variables
+ N = len(categories)
+ angles = [n / float(N) * 2 * np.pi for n in range(N)]
+ angles += angles[:1] # Complete the circle
+
+ # Plot
+ successful_risks += successful_risks[:1]
+ failed_risks += failed_risks[:1]
+
+ ax.plot(angles, successful_risks, 'o-', linewidth=2, label='Successful Companies', color='green')
+ ax.fill(angles, successful_risks, alpha=0.25, color='green')
+ ax.plot(angles, failed_risks, 'o-', linewidth=2, label='Failed Companies', color='red')
+ ax.fill(angles, failed_risks, alpha=0.25, color='red')
+
+ ax.set_xticks(angles[:-1])
+ ax.set_xticklabels(categories)
+ ax.set_ylim(0, 10)
+ ax.set_title('Risk Assessment Profile\n(0=Low Risk, 10=High Risk)', pad=20)
+ ax.legend(loc='upper right', bbox_to_anchor=(1.2, 1.0))
+ ax.grid(True)
+
+ plt.tight_layout()
+ return fig_to_bytes(fig)
+
+def get_mobile_optimized_figsize(default_width: float, default_height: float) -> tuple:
+ """
+ Get mobile-optimized figure size for charts.
+
+ Args:
+ default_width: Default width for desktop
+ default_height: Default height for desktop
+
+ Returns:
+ tuple: (width, height) optimized for mobile viewing
+ """
+ # For mobile, use smaller, more square dimensions
+ mobile_width = min(default_width, 8) # Max 8 inches wide
+ mobile_height = min(default_height, 6) # Max 6 inches tall
+
+ # Ensure aspect ratio is mobile-friendly (not too wide)
+ if mobile_width / mobile_height > 1.5:
+ mobile_height = mobile_width / 1.4
+
+ return (mobile_width, mobile_height)
+
+def fig_to_bytes(fig) -> bytes:
+ """
+ Convert a matplotlib figure to PNG bytes for Chainlit display.
+
+ This utility function takes a matplotlib figure object and converts it
+ to a PNG image stored in memory (not on disk) for efficient transmission
+ to the Chainlit UI.
+
+ Args:
+ fig: Matplotlib figure object
+
+ Returns:
+ bytes: PNG image data as bytes
+
+ Process:
+ 1. Create in-memory byte buffer
+ 2. Save figure to buffer as PNG with tight layout and high DPI
+ 3. Reset buffer position to start
+ 4. Close figure to free memory
+ 5. Return byte data
+ """
+ buf = io.BytesIO() # Create in-memory binary stream
+ fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) # High quality PNG
+ buf.seek(0) # Reset read position to beginning
+ plt.close(fig) # Close figure to prevent memory leaks
+ return buf.getvalue() # Extract bytes from buffer
+
+
+def plot_failure_timeline(df_in: pd.DataFrame):
+ """
+ Generate a bar chart showing estimated failure year for each startup.
+
+ This visualization helps identify which startups are at risk of failing sooner
+ based on their funding runway. Each bar represents a startup's projected
+ failure year.
+
+ Args:
+ df_in (pd.DataFrame): DataFrame with columns 'Startup' and 'Est_Failure_Year'
+
+ Returns:
+ bytes: PNG image data
+
+ Visual elements:
+ - X-axis: Startup names
+ - Y-axis: Projected year of failure
+ - Color gradient: Coolwarm palette (red=sooner, blue=later)
+ - Text labels: Exact failure year displayed on each bar
+ """
+ # Create figure with mobile-optimized size
+ figsize = get_mobile_optimized_figsize(9, 5)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Create bar plot using seaborn for better styling
+ # Coolwarm palette: cooler colors for later failure, warmer for sooner
+ sns.barplot(data=df_in, x="Startup", y="Est_Failure_Year", hue="Startup", palette="coolwarm", legend=False, ax=ax)
+
+ # Add text labels on top of each bar showing the exact failure year
+ for i, row in df_in.reset_index().iterrows():
+ ax.text(i, row["Est_Failure_Year"] + 0.03, # Position slightly above bar
+ f"{row['Est_Failure_Year']:.2f}", # Format to 2 decimal places
+ ha="center", va="bottom", fontsize=8) # Center-aligned, small font
+
+ # Set chart title and axis labels
+ ax.set_title("Estimated Failure Year (Funding assumed 2021)")
+ ax.set_ylabel("Projected Year")
+ ax.set_xlabel("Startup")
+
+ # Convert figure to bytes and return
+ return fig_to_bytes(fig)
+
+
+def plot_funding_vs_burn(df_in: pd.DataFrame):
+ """
+ Generate a scatter plot showing relationship between funding and burn rate.
+
+ This visualization reveals patterns in how funding levels relate to spending
+ rates, and whether these patterns differ between successful and failed startups.
+
+ Args:
+ df_in (pd.DataFrame): DataFrame with funding, burn rate, outcome, and sector data
+
+ Returns:
+ bytes: PNG image data
+
+ Visual encoding:
+ - X-axis: Funding amount (USD millions)
+ - Y-axis: Burn rate (months)
+ - Color: Green = successful (Failed=0), Red = failed (Failed=1)
+ - Shape: Different shapes for different sectors
+ - Labels: Startup names displayed next to each point
+ - Size: Fixed at 160 for visibility
+ """
+ figsize = get_mobile_optimized_figsize(9, 5)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Create scatter plot with multiple visual dimensions
+ sns.scatterplot(
+ data=df_in,
+ x="Funding_USD_M", # Horizontal axis: funding amount
+ y="Burn_Rate_Months", # Vertical axis: burn rate
+ hue="Failed", # Color encoding: outcome status
+ style="Sector", # Shape encoding: industry sector
+ s=160, # Point size (larger for visibility)
+ palette={0: "green", 1: "red"}, # Explicit color mapping
+ ax=ax, # Target axis
+ alpha=0.8 # Slight transparency for overlapping points
+ )
+
+ # Add text labels for each startup name next to its point
+ for _, r in df_in.iterrows():
+ ax.text(
+ r["Funding_USD_M"] + 0.15, # Offset right of point
+ r["Burn_Rate_Months"] + 0.1, # Offset up from point
+ r["Startup"], # Startup name as label
+ fontsize=8 # Small font to avoid clutter
+ )
+
+ # Set chart title and axis labels
+ ax.set_title("Funding vs Burn (color = outcome, style = sector)")
+ ax.set_xlabel("Funding (USD Millions)")
+ ax.set_ylabel("Burn Rate (months)")
+
+ # Convert figure to bytes and return
+ return fig_to_bytes(fig)
+
+
+def plot_viability_gauge(score: float):
+ """
+ Generate a horizontal gauge chart showing viability score (0-100).
+
+ This creates a simple, easy-to-read gauge that visually communicates
+ the overall viability score with color coding.
+
+ Args:
+ score (float): Viability score between 0 and 100
+
+ Returns:
+ bytes: PNG image data
+
+ Color coding:
+ - Green (#4CAF50): Strong (>= 60)
+ - Yellow/Amber (#FFC107): Moderate (40-59)
+ - Red (#F44336): Weak (< 40)
+
+ Visual design:
+ - Horizontal bar chart with single bar
+ - No Y-axis ticks (minimalist design)
+ - Score displayed in title
+ - Clean appearance with removed spines
+ """
+ # Create compact figure for gauge display (mobile-friendly)
+ figsize = get_mobile_optimized_figsize(6, 1.2)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Create horizontal bar with color based on score threshold
+ # Ternary operator chains: score >= 60 → green, else score >= 40 → yellow, else red
+ ax.barh(
+ [0], # Single bar at Y position 0
+ [score], # Bar length equals the score
+ color="#4CAF50" if score >= 60 else "#FFC107" if score >= 40 else "#F44336"
+ )
+
+ # Set X-axis range from 0 to 100 (percentage scale)
+ ax.set_xlim(0, 100)
+
+ # Remove Y-axis ticks for cleaner appearance
+ ax.set_yticks([])
+
+ # Display score in title with 1 decimal place
+ ax.set_title(f"Viability Score: {score:.1f}/100")
+
+ # Remove top, right, and left spines for minimal design
+ for s in ["top", "right", "left"]:
+ ax.spines[s].set_visible(False)
+
+ # Convert figure to bytes and return
+ return fig_to_bytes(fig)
+
+
+def plot_sector_comparison(df_in: pd.DataFrame):
+ """
+ Generate a bar chart comparing average funding by sector.
+
+ Args:
+ df_in (pd.DataFrame): DataFrame with 'Sector' and 'Funding_USD_M' columns
+
+ Returns:
+ bytes: PNG image data
+ """
+ figsize = get_mobile_optimized_figsize(10, 6)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Group by sector and calculate average funding
+ sector_avg = df_in.groupby('Sector')['Funding_USD_M'].mean().sort_values(ascending=False)
+
+ # Create bar chart
+ colors = plt.cm.viridis(range(len(sector_avg)))
+ ax.bar(sector_avg.index, sector_avg.values, color=colors, alpha=0.8)
+
+ # Customize
+ ax.set_xlabel('Sector', fontsize=12)
+ ax.set_ylabel('Average Funding (USD Millions)', fontsize=12)
+ ax.set_title('Average Funding by Sector', fontsize=14, fontweight='bold')
+ ax.tick_params(axis='x', rotation=45)
+ plt.tight_layout()
+
+ return fig_to_bytes(fig)
+
+
+def plot_failure_rate_by_country(df_in: pd.DataFrame):
+ """
+ Generate a bar chart showing failure rates by country.
+
+ Args:
+ df_in (pd.DataFrame): DataFrame with 'Country' and 'Failed' columns
+
+ Returns:
+ bytes: PNG image data
+ """
+ figsize = get_mobile_optimized_figsize(10, 6)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Calculate failure rate by country
+ country_stats = df_in.groupby('Country').agg({
+ 'Failed': ['sum', 'count']
+ })
+ country_stats.columns = ['Failed', 'Total']
+ country_stats['Failure_Rate'] = (country_stats['Failed'] / country_stats['Total'] * 100)
+ country_stats = country_stats.sort_values('Failure_Rate', ascending=False)
+
+ # Create bar chart
+ colors = ['red' if rate > 50 else 'orange' if rate > 30 else 'green'
+ for rate in country_stats['Failure_Rate']]
+ ax.bar(country_stats.index, country_stats['Failure_Rate'], color=colors, alpha=0.7)
+
+ # Add value labels on bars
+ for i, (idx, row) in enumerate(country_stats.iterrows()):
+ ax.text(i, row['Failure_Rate'] + 2, f"{row['Failure_Rate']:.1f}%",
+ ha='center', va='bottom', fontsize=10)
+
+ ax.set_xlabel('Country', fontsize=12)
+ ax.set_ylabel('Failure Rate (%)', fontsize=12)
+ ax.set_title('Startup Failure Rate by Country', fontsize=14, fontweight='bold')
+ ax.set_ylim(0, 100)
+ plt.tight_layout()
+
+ return fig_to_bytes(fig)
+
+
+def plot_experience_vs_success(df_in: pd.DataFrame):
+ """
+ Generate a scatter plot showing founder experience vs success.
+
+ Args:
+ df_in (pd.DataFrame): DataFrame with 'Founders_Experience_Yrs' and 'Failed' columns
+
+ Returns:
+ bytes: PNG image data
+ """
+ figsize = get_mobile_optimized_figsize(10, 6)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Separate successful and failed startups
+ successful = df_in[df_in['Failed'] == 0]
+ failed = df_in[df_in['Failed'] == 1]
+
+ # Scatter plot
+ ax.scatter(successful['Founders_Experience_Yrs'], successful['Funding_USD_M'],
+ c='green', s=150, alpha=0.6, label='Successful', marker='o')
+ ax.scatter(failed['Founders_Experience_Yrs'], failed['Funding_USD_M'],
+ c='red', s=150, alpha=0.6, label='Failed', marker='x')
+
+ # Add labels for each point
+ for _, row in df_in.iterrows():
+ ax.annotate(row['Startup'],
+ (row['Founders_Experience_Yrs'], row['Funding_USD_M']),
+ fontsize=8, alpha=0.7)
+
+ ax.set_xlabel('Founder Experience (Years)', fontsize=12)
+ ax.set_ylabel('Funding (USD Millions)', fontsize=12)
+ ax.set_title('Founder Experience vs Funding & Success', fontsize=14, fontweight='bold')
+ ax.legend()
+ ax.grid(True, alpha=0.3)
+ plt.tight_layout()
+
+ return fig_to_bytes(fig)
+
+
+def plot_custom_chart(df_in: pd.DataFrame, chart_type: str, x_col: str, y_col: str, title: str = None):
+ """
+ Generate a custom chart based on user specifications.
+
+ Args:
+ df_in (pd.DataFrame): DataFrame to visualize
+ chart_type (str): Type of chart ('bar', 'scatter', 'line', 'pie')
+ x_col (str): Column for x-axis
+ y_col (str): Column for y-axis (not used for pie)
+ title (str): Chart title
+
+ Returns:
+ bytes: PNG image data
+ """
+ figsize = get_mobile_optimized_figsize(10, 6)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ try:
+ if chart_type == 'bar':
+ ax.bar(df_in[x_col], df_in[y_col], color='steelblue', alpha=0.7)
+ ax.set_xlabel(x_col, fontsize=12)
+ ax.set_ylabel(y_col, fontsize=12)
+ ax.tick_params(axis='x', rotation=45)
+
+ elif chart_type == 'scatter':
+ ax.scatter(df_in[x_col], df_in[y_col], s=100, alpha=0.6, color='coral')
+ ax.set_xlabel(x_col, fontsize=12)
+ ax.set_ylabel(y_col, fontsize=12)
+
+ elif chart_type == 'line':
+ ax.plot(df_in[x_col], df_in[y_col], marker='o', linewidth=2, color='darkgreen')
+ ax.set_xlabel(x_col, fontsize=12)
+ ax.set_ylabel(y_col, fontsize=12)
+ ax.tick_params(axis='x', rotation=45)
+
+ elif chart_type == 'pie':
+ # For pie charts, x_col is used as labels, y_col as values
+ ax.pie(df_in[y_col], labels=df_in[x_col], autopct='%1.1f%%', startangle=90)
+ ax.axis('equal')
+
+ if title:
+ ax.set_title(title, fontsize=14, fontweight='bold')
+ else:
+ ax.set_title(f"{chart_type.capitalize()} Chart: {x_col} vs {y_col}",
+ fontsize=14, fontweight='bold')
+
+ plt.tight_layout()
+ return fig_to_bytes(fig)
+
+ except Exception as e:
+ # If error, return a simple error message chart
+ ax.text(0.5, 0.5, f"Error generating chart:\n{str(e)}",
+ ha='center', va='center', fontsize=12)
+ ax.set_xlim(0, 1)
+ ax.set_ylim(0, 1)
+ ax.axis('off')
+ return fig_to_bytes(fig)
+
+# =============================
+# UK ECONOMICS ANALYSIS MODULE
+# =============================
+
+class UKEconomicsAnalyzer:
+ """UK-specific economic analysis for startups"""
+
+ def __init__(self):
+ self.uk_data = {
+ 'gdp_growth': 0.3, # Q3 2024 estimate
+ 'inflation_cpi': 2.3, # Current CPI
+ 'bank_rate': 4.75, # Current Bank of England rate
+ 'unemployment': 4.2, # Current unemployment rate
+ 'gbp_usd': 1.27, # Current exchange rate
+ 'london_weight': 0.23, # London's share of UK GDP
+ }
+
+ def analyze_macro_impact(self, startup_data: dict) -> dict:
+ """Analyze macroeconomic impacts on startup"""
+
+ impacts = {
+ 'interest_rate_impact': self._calculate_interest_impact(startup_data),
+ 'inflation_impact': self._calculate_inflation_impact(startup_data),
+ 'labour_market_impact': self._calculate_labour_impact(startup_data),
+ 'regional_factors': self._analyze_regional_factors(startup_data),
+ 'sector_outlook': self._analyze_sector_outlook(startup_data)
+ }
+
+ return impacts
+
+ def _calculate_interest_impact(self, data: dict) -> dict:
+ """Calculate how UK interest rates affect the startup"""
+
+ funding = data.get('funding_usd_m', 5)
+ debt_ratio = data.get('debt_ratio', 0.3)
+
+ # Cost of capital impact
+ base_rate = self.uk_data['bank_rate']
+ risk_premium = 5.0 # Startup risk premium
+ cost_of_debt = base_rate + risk_premium
+
+ # Calculate impact
+ annual_interest_cost = funding * debt_ratio * (cost_of_debt / 100)
+
+ return {
+ 'cost_of_capital': cost_of_debt,
+ 'annual_interest_cost': annual_interest_cost,
+ 'impact_level': 'High' if cost_of_debt > 10 else 'Medium' if cost_of_debt > 7 else 'Low',
+ 'recommendation': self._get_interest_recommendation(cost_of_debt)
+ }
+
+ def _calculate_inflation_impact(self, data: dict) -> dict:
+ """Calculate inflation impact on costs and pricing"""
+
+ burn_rate = data.get('burn_rate_months', 100) * 1000 # Convert to pounds
+ inflation = self.uk_data['inflation_cpi']
+
+ # Real cost increase
+ real_cost_increase = burn_rate * (inflation / 100) * 12 # Annual
+
+ # Pricing power assessment
+ b2b = data.get('is_b2b', True)
+ pricing_power = 'Strong' if b2b else 'Moderate'
+
+ return {
+ 'current_inflation': inflation,
+ 'real_cost_increase_annual': real_cost_increase,
+ 'pricing_power': pricing_power,
+ 'wage_pressure': 'High' if inflation > 3 else 'Moderate'
+ }
+
+ def _calculate_labour_impact(self, data: dict) -> dict:
+ """Analyze UK labour market impact"""
+
+ team_size = data.get('team_size', 10)
+ location = data.get('location', 'London')
+
+ # Regional wage differentials
+ wage_multiplier = 1.3 if location == 'London' else 1.0
+
+ # Skills shortage premium
+ tech_premium = 1.2 if data.get('sector') == 'Tech' else 1.0
+
+ # Calculate labour cost index
+ labour_cost_index = 100 * wage_multiplier * tech_premium
+
+ return {
+ 'unemployment_rate': self.uk_data['unemployment'],
+ 'labour_cost_index': labour_cost_index,
+ 'talent_availability': 'Tight' if self.uk_data['unemployment'] < 4 else 'Balanced',
+ 'wage_growth_pressure': 'High' if labour_cost_index > 120 else 'Moderate'
+ }
+
+ def _analyze_regional_factors(self, data: dict) -> dict:
+ """Analyze UK regional economic factors"""
+
+ location = data.get('location', 'London')
+
+ regional_data = {
+ 'London': {'growth': 2.1, 'cost_index': 150, 'talent_pool': 'Deep'},
+ 'Manchester': {'growth': 1.8, 'cost_index': 85, 'talent_pool': 'Growing'},
+ 'Edinburgh': {'growth': 1.5, 'cost_index': 90, 'talent_pool': 'Specialized'},
+ 'Birmingham': {'growth': 1.3, 'cost_index': 80, 'talent_pool': 'Developing'},
+ 'Bristol': {'growth': 1.9, 'cost_index': 95, 'talent_pool': 'Tech-focused'},
+ 'Cambridge': {'growth': 2.3, 'cost_index': 110, 'talent_pool': 'Research-heavy'}
+ }
+
+ region_info = regional_data.get(location, regional_data['London'])
+
+ return {
+ 'location': location,
+ 'regional_growth': region_info['growth'],
+ 'cost_index': region_info['cost_index'],
+ 'talent_pool': region_info['talent_pool'],
+ 'competitiveness': 'High' if region_info['cost_index'] < 100 else 'Challenging'
+ }
+
+ def _analyze_sector_outlook(self, data: dict) -> dict:
+ """UK sector-specific analysis"""
+
+ sector = data.get('sector', 'Tech')
+
+ sector_outlooks = {
+ 'FinTech': {'growth': 4.5, 'regulation': 'High', 'opportunity': 'Strong'},
+ 'HealthTech': {'growth': 3.8, 'regulation': 'High', 'opportunity': 'NHS partnerships'},
+ 'GreenTech': {'growth': 6.2, 'regulation': 'Medium', 'opportunity': 'Net Zero targets'},
+ 'RetailTech': {'growth': 2.1, 'regulation': 'Low', 'opportunity': 'Digital transformation'},
+ 'EdTech': {'growth': 3.5, 'regulation': 'Medium', 'opportunity': 'Skills gap'},
+ 'PropTech': {'growth': 2.8, 'regulation': 'Medium', 'opportunity': 'Housing crisis'}
+ }
+
+ outlook = sector_outlooks.get(sector, {'growth': 2.5, 'regulation': 'Medium', 'opportunity': 'General'})
+
+ return outlook
+
+ def _get_interest_recommendation(self, cost: float) -> str:
+ """Generate interest rate recommendations"""
+
+ if cost > 12:
+ return "Consider equity financing over debt given high interest costs"
+ elif cost > 8:
+ return "Lock in current rates if possible; consider revenue-based financing"
+ else:
+ return "Favorable borrowing environment; consider leveraging debt strategically"
+
+def plot_uk_economic_indicators(df_in: pd.DataFrame):
+ """Create UK economic indicators dashboard"""
+
+ fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+
+ # UK GDP Growth vs Startup Funding
+ ax1 = axes[0, 0]
+ quarters = ['Q1 2023', 'Q2 2023', 'Q3 2023', 'Q4 2023', 'Q1 2024', 'Q2 2024']
+ gdp_growth = [0.1, 0.2, 0.0, -0.1, 0.6, 0.7]
+ startup_funding = [1.2, 1.5, 1.1, 0.9, 1.3, 1.4] # Billions
+
+ ax1_twin = ax1.twinx()
+ ax1.bar(quarters, gdp_growth, alpha=0.7, color='navy', label='GDP Growth %')
+ ax1_twin.plot(quarters, startup_funding, 'ro-', label='Startup Funding (£B)')
+
+ ax1.set_ylabel('GDP Growth (%)', color='navy')
+ ax1_twin.set_ylabel('Startup Funding (£B)', color='red')
+ ax1.set_title('UK GDP Growth vs Startup Funding', fontweight='bold')
+ ax1.tick_params(axis='x', rotation=45)
+
+ # Interest Rate Impact
+ ax2 = axes[0, 1]
+ rates = np.linspace(0, 10, 50)
+ startup_viability = 100 - (rates ** 1.5) * 3
+
+ ax2.plot(rates, startup_viability, linewidth=2, color='darkred')
+ ax2.axvline(x=4.75, color='green', linestyle='--', label='Current Bank Rate')
+ ax2.fill_between(rates, 0, startup_viability, alpha=0.3, color='lightblue')
+
+ ax2.set_xlabel('Interest Rate (%)')
+ ax2.set_ylabel('Startup Viability Score')
+ ax2.set_title('Interest Rate Impact on Startups', fontweight='bold')
+ ax2.legend()
+ ax2.grid(True, alpha=0.3)
+
+ # Regional Distribution
+ ax3 = axes[1, 0]
+ regions = ['London', 'South East', 'North West', 'Scotland', 'West Midlands', 'Other']
+ startup_dist = [42, 18, 12, 8, 7, 13]
+ colors = plt.cm.Blues(np.linspace(0.4, 0.9, len(regions)))
+
+ wedges, texts, autotexts = ax3.pie(startup_dist, labels=regions, autopct='%1.1f%%',
+ colors=colors, startangle=90)
+ ax3.set_title('UK Startup Distribution by Region', fontweight='bold')
+
+ # Sector Performance
+ ax4 = axes[1, 1]
+ sectors = ['FinTech', 'HealthTech', 'GreenTech', 'EdTech', 'RetailTech']
+ performance = [4.5, 3.8, 6.2, 3.5, 2.1]
+
+ bars = ax4.barh(sectors, performance, color='teal')
+ ax4.set_xlabel('Expected Growth Rate (%)')
+ ax4.set_title('UK Sector Growth Outlook', fontweight='bold')
+
+ for bar, value in zip(bars, performance):
+ ax4.text(value + 0.1, bar.get_y() + bar.get_height()/2,
+ f'{value}%', va='center')
+
+ plt.suptitle('UK Economic Analysis Dashboard', fontsize=16, fontweight='bold')
+ plt.tight_layout()
+
+ return fig_to_bytes(fig)
+
+def plot_profitability_analysis(analysis_data: dict):
+ """Create profitability analysis charts"""
+
+ fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+
+ # Margin Waterfall
+ ax1 = axes[0, 0]
+ margins = ['Revenue', 'COGS', 'Gross Profit', 'OpEx', 'Operating Profit', 'Net Profit']
+ values = [100, -40, 60, -35, 25, 18]
+ colors = ['green', 'red', 'green', 'red', 'green', 'darkgreen']
+
+ ax1.bar(margins, values, color=colors, alpha=0.7)
+ ax1.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
+ ax1.set_title('Profitability Waterfall', fontweight='bold')
+ ax1.set_ylabel('% of Revenue')
+ ax1.tick_params(axis='x', rotation=45)
+
+ # Unit Economics
+ ax2 = axes[0, 1]
+ ltv_cac = analysis_data.get('ltv_cac_ratio', 3.0)
+ benchmark = 3.0
+
+ bars = ax2.bar(['LTV/CAC Ratio', 'Benchmark'], [ltv_cac, benchmark],
+ color=['green' if ltv_cac > benchmark else 'red', 'gray'])
+ ax2.axhline(y=3, color='blue', linestyle='--', alpha=0.5, label='Healthy Threshold')
+ ax2.set_title('Unit Economics Health', fontweight='bold')
+ ax2.set_ylabel('Ratio')
+ ax2.legend()
+
+ # Add value labels
+ for bar in bars:
+ height = bar.get_height()
+ ax2.text(bar.get_x() + bar.get_width()/2., height + 0.1,
+ f'{height:.1f}', ha='center', va='bottom')
+
+ # Break-even Analysis
+ ax3 = axes[1, 0]
+ units = np.linspace(0, 2000, 100)
+ fixed_costs = 50000
+ variable_cost = 30
+ price = 100
+
+ revenue_line = units * price
+ total_cost_line = fixed_costs + (units * variable_cost)
+
+ ax3.plot(units, revenue_line, 'g-', label='Revenue', linewidth=2)
+ ax3.plot(units, total_cost_line, 'r-', label='Total Cost', linewidth=2)
+ ax3.fill_between(units, revenue_line, total_cost_line,
+ where=(revenue_line > total_cost_line), alpha=0.3, color='green', label='Profit Zone')
+ ax3.fill_between(units, revenue_line, total_cost_line,
+ where=(revenue_line <= total_cost_line), alpha=0.3, color='red', label='Loss Zone')
+
+ # Mark break-even point
+ break_even_units = fixed_costs / (price - variable_cost)
+ ax3.plot(break_even_units, break_even_units * price, 'ko', markersize=8)
+ ax3.annotate(f'Break-even: {break_even_units:.0f} units',
+ xy=(break_even_units, break_even_units * price),
+ xytext=(break_even_units + 200, break_even_units * price),
+ arrowprops=dict(arrowstyle='->'))
+
+ ax3.set_xlabel('Units Sold')
+ ax3.set_ylabel('Revenue/Cost ($)')
+ ax3.set_title('Break-even Analysis', fontweight='bold')
+ ax3.legend()
+ ax3.grid(True, alpha=0.3)
+
+ # Cash Runway
+ ax4 = axes[1, 1]
+ months = np.arange(0, 25)
+ cash_balance = 500000
+ monthly_burn = 50000
+
+ cash_projection = [cash_balance - (monthly_burn * m) for m in months]
+ cash_projection = [max(0, c) for c in cash_projection] # Can't go below 0
+
+ ax4.fill_between(months, 0, cash_projection, alpha=0.3, color='blue')
+ ax4.plot(months, cash_projection, 'b-', linewidth=2)
+ ax4.axhline(y=100000, color='orange', linestyle='--', label='Danger Zone')
+ ax4.axhline(y=0, color='red', linestyle='--', label='Out of Cash')
+
+ # Mark runway
+ runway = cash_balance / monthly_burn
+ ax4.axvline(x=runway, color='green', linestyle='--', alpha=0.7, label=f'Runway: {runway:.0f} months')
+
+ ax4.set_xlabel('Months')
+ ax4.set_ylabel('Cash Balance ($)')
+ ax4.set_title('Cash Runway Projection', fontweight='bold')
+ ax4.legend()
+ ax4.grid(True, alpha=0.3)
+
+ plt.suptitle('Company Financial Analysis Dashboard', fontsize=16, fontweight='bold')
+ plt.tight_layout()
+
+ return fig_to_bytes(fig)
+
+def plot_margin_trends(historical_data: list):
+ """Plot historical margin trends"""
+
+ fig, ax = plt.subplots(figsize=(12, 6))
+
+ quarters = [d['quarter'] for d in historical_data]
+ gross_margins = [d['gross_margin'] for d in historical_data]
+ operating_margins = [d['operating_margin'] for d in historical_data]
+ net_margins = [d['net_margin'] for d in historical_data]
+
+ ax.plot(quarters, gross_margins, 'g-', marker='o', linewidth=2, label='Gross Margin')
+ ax.plot(quarters, operating_margins, 'b-', marker='s', linewidth=2, label='Operating Margin')
+ ax.plot(quarters, net_margins, 'r-', marker='^', linewidth=2, label='Net Margin')
+
+ ax.fill_between(range(len(quarters)), gross_margins, alpha=0.1, color='green')
+ ax.fill_between(range(len(quarters)), operating_margins, alpha=0.1, color='blue')
+ ax.fill_between(range(len(quarters)), net_margins, alpha=0.1, color='red')
+
+ ax.set_xlabel('Quarter')
+ ax.set_ylabel('Margin (%)')
+ ax.set_title('Margin Trends Over Time', fontweight='bold')
+ ax.legend()
+ ax.grid(True, alpha=0.3)
+
+ # Add trend lines
+ z_gross = np.polyfit(range(len(quarters)), gross_margins, 1)
+ p_gross = np.poly1d(z_gross)
+ ax.plot(range(len(quarters)), p_gross(range(len(quarters))), 'g--', alpha=0.5)
+
+ plt.tight_layout()
+ return fig_to_bytes(fig)
+
+def plot_cash_flow_waterfall(cash_data: dict):
+ """Create cash flow waterfall chart"""
+
+ fig, ax = plt.subplots(figsize=(12, 6))
+
+ categories = ['Starting Cash', 'Operations', 'Investing', 'Financing', 'Ending Cash']
+ values = [
+ cash_data.get('starting_cash', 500000),
+ cash_data.get('cash_from_operations', -200000),
+ cash_data.get('cash_from_investing', -50000),
+ cash_data.get('cash_from_financing', 300000),
+ 0 # Will calculate
+ ]
+
+ # Calculate ending cash
+ values[4] = sum(values[:4])
+
+ # Create cumulative values for positioning
+ cumulative = [values[0]]
+ for i in range(1, len(values)-1):
+ cumulative.append(cumulative[-1] + values[i])
+ cumulative.append(values[4])
+
+ # Plot bars
+ colors = ['blue', 'red' if values[1] < 0 else 'green',
+ 'red' if values[2] < 0 else 'green',
+ 'green' if values[3] > 0 else 'red', 'blue']
+
+ for i, (cat, val, cum) in enumerate(zip(categories, values, cumulative)):
+ if i == 0 or i == len(categories) - 1:
+ # Starting and ending cash - full bars
+ ax.bar(cat, val, color=colors[i], alpha=0.7)
+ else:
+ # Flow bars - positioned relative to cumulative
+ bottom = cum - val if val > 0 else cum
+ height = abs(val)
+ ax.bar(cat, height, bottom=bottom, color=colors[i], alpha=0.7)
+
+ # Add value labels
+ ax.text(i, cum + 10000, f'${val:,.0f}', ha='center', va='bottom')
+
+ ax.set_title('Cash Flow Waterfall Analysis', fontweight='bold')
+ ax.set_ylabel('Cash ($)')
+ ax.grid(True, alpha=0.3)
+
+ plt.tight_layout()
+ return fig_to_bytes(fig)
+
+def plot_break_even_chart(financials: dict):
+ """Create detailed break-even analysis chart"""
+
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
+
+ # Break-even line chart
+ units = np.linspace(0, 2000, 100)
+ fixed_costs = financials.get('fixed_costs', 100000)
+ variable_cost_per_unit = financials.get('variable_cost_per_unit', 40)
+ price_per_unit = financials.get('price_per_unit', 100)
+
+ revenue = units * price_per_unit
+ total_costs = fixed_costs + (units * variable_cost_per_unit)
+ profit = revenue - total_costs
+
+ ax1.plot(units, revenue, 'g-', linewidth=2, label='Revenue')
+ ax1.plot(units, total_costs, 'r-', linewidth=2, label='Total Costs')
+ ax1.fill_between(units, revenue, total_costs, where=(revenue > total_costs),
+ alpha=0.3, color='green', label='Profit Zone')
+ ax1.fill_between(units, revenue, total_costs, where=(revenue <= total_costs),
+ alpha=0.3, color='red', label='Loss Zone')
+
+ # Mark break-even point
+ break_even_units = fixed_costs / (price_per_unit - variable_cost_per_unit)
+ break_even_revenue = break_even_units * price_per_unit
+
+ ax1.plot(break_even_units, break_even_revenue, 'ko', markersize=8)
+ ax1.annotate(f'Break-even\n{break_even_units:.0f} units\n${break_even_revenue:,.0f}',
+ xy=(break_even_units, break_even_revenue),
+ xytext=(break_even_units + 300, break_even_revenue),
+ arrowprops=dict(arrowstyle='->'))
+
+ ax1.set_xlabel('Units Sold')
+ ax1.set_ylabel('Amount ($)')
+ ax1.set_title('Break-even Analysis', fontweight='bold')
+ ax1.legend()
+ ax1.grid(True, alpha=0.3)
+
+ # Sensitivity analysis
+ scenarios = ['Conservative', 'Base Case', 'Optimistic']
+ price_variations = [price_per_unit * 0.9, price_per_unit, price_per_unit * 1.1]
+ break_even_scenarios = [fixed_costs / (p - variable_cost_per_unit) for p in price_variations]
+
+ bars = ax2.bar(scenarios, break_even_scenarios, color=['red', 'orange', 'green'], alpha=0.7)
+ ax2.set_ylabel('Break-even Units')
+ ax2.set_title('Break-even Sensitivity Analysis', fontweight='bold')
+
+ # Add value labels
+ for bar, value in zip(bars, break_even_scenarios):
+ ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 20,
+ f'{value:.0f}', ha='center', va='bottom')
+
+ plt.tight_layout()
+ return fig_to_bytes(fig)
+
+# =============================
+# COMPANY ANALYSIS MODULE
+# =============================
+
+class CompanyAnalyzer:
+ """Comprehensive company financial analysis"""
+
+ def __init__(self):
+ self.industry_benchmarks = {
+ 'SaaS': {'gross_margin': 75, 'operating_margin': 20, 'ltv_cac': 3.0},
+ 'E-commerce': {'gross_margin': 40, 'operating_margin': 10, 'ltv_cac': 2.5},
+ 'Marketplace': {'gross_margin': 60, 'operating_margin': 15, 'ltv_cac': 4.0},
+ 'Hardware': {'gross_margin': 35, 'operating_margin': 8, 'ltv_cac': 2.0},
+ 'Services': {'gross_margin': 50, 'operating_margin': 12, 'ltv_cac': 2.8},
+ 'FinTech': {'gross_margin': 65, 'operating_margin': 18, 'ltv_cac': 3.5}
+ }
+
+ def analyze_profitability(self, financials: dict) -> dict:
+ """Complete profitability analysis"""
+
+ # Extract key metrics
+ revenue = financials.get('revenue', 0)
+ cogs = financials.get('cogs', 0)
+ opex = financials.get('opex', 0)
+ sales_marketing = financials.get('sales_marketing', 0)
+ rd_expense = financials.get('rd_expense', 0)
+ admin_expense = financials.get('admin_expense', 0)
+
+ # Calculate margins
+ gross_profit = revenue - cogs
+ gross_margin = (gross_profit / revenue * 100) if revenue > 0 else 0
+
+ operating_profit = gross_profit - opex
+ operating_margin = (operating_profit / revenue * 100) if revenue > 0 else 0
+
+ ebitda = operating_profit + financials.get('depreciation', 0)
+ ebitda_margin = (ebitda / revenue * 100) if revenue > 0 else 0
+
+ net_profit = operating_profit - financials.get('interest', 0) - financials.get('tax', 0)
+ net_margin = (net_profit / revenue * 100) if revenue > 0 else 0
+
+ return {
+ 'gross_profit': gross_profit,
+ 'gross_margin': gross_margin,
+ 'operating_profit': operating_profit,
+ 'operating_margin': operating_margin,
+ 'ebitda': ebitda,
+ 'ebitda_margin': ebitda_margin,
+ 'net_profit': net_profit,
+ 'net_margin': net_margin,
+ 'profit_health': self._assess_profit_health(gross_margin, operating_margin, net_margin)
+ }
+
+ def analyze_unit_economics(self, metrics: dict) -> dict:
+ """Analyze unit-level profitability"""
+
+ # Customer economics
+ cac = metrics.get('customer_acquisition_cost', 100)
+ ltv = metrics.get('lifetime_value', 300)
+ ltv_cac_ratio = ltv / cac if cac > 0 else 0
+
+ # Unit contribution
+ revenue_per_unit = metrics.get('revenue_per_unit', 50)
+ variable_cost_per_unit = metrics.get('variable_cost_per_unit', 20)
+ contribution_margin = revenue_per_unit - variable_cost_per_unit
+ contribution_margin_pct = (contribution_margin / revenue_per_unit * 100) if revenue_per_unit > 0 else 0
+
+ # Payback period
+ monthly_revenue_per_customer = metrics.get('monthly_revenue', 100)
+ payback_months = cac / monthly_revenue_per_customer if monthly_revenue_per_customer > 0 else 999
+
+ return {
+ 'ltv': ltv,
+ 'cac': cac,
+ 'ltv_cac_ratio': ltv_cac_ratio,
+ 'contribution_margin': contribution_margin,
+ 'contribution_margin_pct': contribution_margin_pct,
+ 'payback_months': payback_months,
+ 'unit_economics_health': 'Strong' if ltv_cac_ratio > 3 else 'Moderate' if ltv_cac_ratio > 1 else 'Weak'
+ }
+
+ def analyze_cash_flow(self, cash_data: dict) -> dict:
+ """Analyze cash flow and runway"""
+
+ # Operating cash flow
+ cash_from_operations = cash_data.get('cash_from_operations', -50000)
+ cash_from_investing = cash_data.get('cash_from_investing', -20000)
+ cash_from_financing = cash_data.get('cash_from_financing', 100000)
+
+ # Net cash flow
+ net_cash_flow = cash_from_operations + cash_from_investing + cash_from_financing
+
+ # Burn rate and runway
+ monthly_burn = -cash_from_operations / 12 if cash_from_operations < 0 else 0
+ cash_balance = cash_data.get('cash_balance', 500000)
+ runway_months = cash_balance / monthly_burn if monthly_burn > 0 else 999
+
+ # Cash conversion cycle
+ dso = cash_data.get('days_sales_outstanding', 45)
+ dio = cash_data.get('days_inventory_outstanding', 30)
+ dpo = cash_data.get('days_payables_outstanding', 30)
+ cash_conversion_cycle = dso + dio - dpo
+
+ return {
+ 'operating_cash_flow': cash_from_operations,
+ 'net_cash_flow': net_cash_flow,
+ 'monthly_burn': monthly_burn,
+ 'runway_months': runway_months,
+ 'cash_conversion_cycle': cash_conversion_cycle,
+ 'cash_efficiency': 'Efficient' if cash_conversion_cycle < 30 else 'Moderate' if cash_conversion_cycle < 60 else 'Inefficient'
+ }
+
+ def calculate_break_even(self, financials: dict) -> dict:
+ """Calculate break-even analysis"""
+
+ fixed_costs = financials.get('fixed_costs', 100000)
+ variable_cost_ratio = financials.get('variable_cost_ratio', 0.4)
+ price_per_unit = financials.get('price_per_unit', 100)
+ variable_cost_per_unit = price_per_unit * variable_cost_ratio
+
+ # Break-even units
+ contribution_per_unit = price_per_unit - variable_cost_per_unit
+ break_even_units = fixed_costs / contribution_per_unit if contribution_per_unit > 0 else 999999
+
+ # Break-even revenue
+ break_even_revenue = break_even_units * price_per_unit
+
+ # Margin of safety
+ current_revenue = financials.get('current_revenue', 150000)
+ margin_of_safety = ((current_revenue - break_even_revenue) / current_revenue * 100) if current_revenue > 0 else -100
+
+ return {
+ 'break_even_units': break_even_units,
+ 'break_even_revenue': break_even_revenue,
+ 'contribution_per_unit': contribution_per_unit,
+ 'margin_of_safety': margin_of_safety,
+ 'months_to_break_even': self._calculate_months_to_break_even(financials)
+ }
+
+ def benchmark_performance(self, company_metrics: dict, industry: str) -> dict:
+ """Compare against industry benchmarks"""
+
+ benchmarks = self.industry_benchmarks.get(
+ industry,
+ self.industry_benchmarks['Services'] # Default
+ )
+
+ comparisons = {
+ 'gross_margin': {
+ 'company': company_metrics.get('gross_margin', 0),
+ 'industry': benchmarks['gross_margin'],
+ 'delta': company_metrics.get('gross_margin', 0) - benchmarks['gross_margin'],
+ 'performance': 'Above' if company_metrics.get('gross_margin', 0) > benchmarks['gross_margin'] else 'Below'
+ },
+ 'operating_margin': {
+ 'company': company_metrics.get('operating_margin', 0),
+ 'industry': benchmarks['operating_margin'],
+ 'delta': company_metrics.get('operating_margin', 0) - benchmarks['operating_margin'],
+ 'performance': 'Above' if company_metrics.get('operating_margin', 0) > benchmarks['operating_margin'] else 'Below'
+ },
+ 'ltv_cac': {
+ 'company': company_metrics.get('ltv_cac_ratio', 0),
+ 'industry': benchmarks['ltv_cac'],
+ 'delta': company_metrics.get('ltv_cac_ratio', 0) - benchmarks['ltv_cac'],
+ 'performance': 'Above' if company_metrics.get('ltv_cac_ratio', 0) > benchmarks['ltv_cac'] else 'Below'
+ }
+ }
+
+ # Overall rating
+ above_count = sum(1 for metric in comparisons.values() if metric['performance'] == 'Above')
+ overall_rating = 'Outperforming' if above_count >= 2 else 'Underperforming'
+
+ return {
+ 'comparisons': comparisons,
+ 'overall_rating': overall_rating,
+ 'recommendations': self._generate_recommendations(comparisons)
+ }
+
+ def _assess_profit_health(self, gross, operating, net):
+ """Assess overall profitability health"""
+ if net > 10 and operating > 15 and gross > 50:
+ return "Excellent"
+ elif net > 0 and operating > 5 and gross > 30:
+ return "Good"
+ elif net > -10 and gross > 20:
+ return "Moderate"
+ else:
+ return "Poor"
+
+ def _calculate_months_to_break_even(self, financials):
+ """Calculate months to reach break-even"""
+ current_loss = financials.get('monthly_loss', 50000)
+ growth_rate = financials.get('growth_rate', 0.1)
+
+ if current_loss <= 0: # Already profitable
+ return 0
+
+ months = 0
+ while current_loss > 0 and months < 60: # Max 60 months
+ current_loss *= (1 - growth_rate)
+ months += 1
+
+ return months if months < 60 else 999
+
+ def _generate_recommendations(self, comparisons):
+ """Generate improvement recommendations"""
+ recommendations = []
+
+ if comparisons['gross_margin']['delta'] < 0:
+ recommendations.append("Focus on pricing optimization or reducing COGS")
+ if comparisons['operating_margin']['delta'] < 0:
+ recommendations.append("Improve operational efficiency and reduce overhead")
+ if comparisons['ltv_cac']['delta'] < 0:
+ recommendations.append("Optimize customer acquisition channels or increase retention")
+
+ return recommendations
+
+# =============================
+# INTERACTIVE DASHBOARD MODULE
+# =============================
+
+class InteractiveDashboard:
+ """Interactive dashboard with real-time data visualization"""
+
+ def __init__(self, df: pd.DataFrame):
+ self.df = df
+ self.filtered_df = df.copy()
+ self.selected_startups = []
+
+ def create_executive_summary(self) -> Dict[str, Any]:
+ """Create executive summary metrics"""
+ total_startups = len(self.filtered_df)
+ total_funding = self.filtered_df['Funding_USD_M'].sum()
+ success_rate = ((self.filtered_df['Failed'] == 0).sum() / total_startups * 100) if total_startups > 0 else 0
+ avg_runway = self.filtered_df['Burn_Rate_Months'].mean()
+
+ return {
+ 'total_startups': total_startups,
+ 'total_funding': total_funding,
+ 'success_rate': success_rate,
+ 'avg_runway': avg_runway,
+ 'top_sector': self.filtered_df['Sector'].mode().iloc[0] if len(self.filtered_df) > 0 else 'N/A',
+ 'risk_level': 'High' if success_rate < 40 else 'Medium' if success_rate < 70 else 'Low'
+ }
+
+ def create_interactive_scatter(self, x_col: str = 'Funding_USD_M', y_col: str = 'Burn_Rate_Months') -> bytes:
+ """Create interactive scatter plot with hover details and filtering"""
+
+ fig = go.Figure()
+
+ # Split data by success/failure for different colors
+ success_df = self.filtered_df[self.filtered_df['Failed'] == 0]
+ failed_df = self.filtered_df[self.filtered_df['Failed'] == 1]
+
+ # Add successful startups
+ if len(success_df) > 0:
+ fig.add_trace(go.Scatter(
+ x=success_df[x_col],
+ y=success_df[y_col],
+ mode='markers',
+ name='Successful',
+ marker=dict(
+ color='green',
+ size=success_df['Market_Size_Bn'] * 2, # Size by market size
+ opacity=0.7,
+ line=dict(width=1, color='darkgreen')
+ ),
+ text=success_df['Startup'],
+ hovertemplate=
+ '%{text}
' +
+ f'{x_col}: %{{x}}
' +
+ f'{y_col}: %{{y}}
' +
+ 'Sector: %{customdata[0]}
' +
+ 'Market Size: $%{customdata[1]}B
' +
+ 'Experience: %{customdata[2]} years
' +
+ '',
+ customdata=success_df[['Sector', 'Market_Size_Bn', 'Founders_Experience_Yrs']].values,
+ selected=dict(marker=dict(color='gold', size=20))
+ ))
+
+ # Add failed startups
+ if len(failed_df) > 0:
+ fig.add_trace(go.Scatter(
+ x=failed_df[x_col],
+ y=failed_df[y_col],
+ mode='markers',
+ name='Failed',
+ marker=dict(
+ color='red',
+ size=failed_df['Market_Size_Bn'] * 2,
+ opacity=0.7,
+ line=dict(width=1, color='darkred')
+ ),
+ text=failed_df['Startup'],
+ hovertemplate=
+ '%{text}
' +
+ f'{x_col}: %{{x}}
' +
+ f'{y_col}: %{{y}}
' +
+ 'Sector: %{customdata[0]}
' +
+ 'Market Size: $%{customdata[1]}B
' +
+ 'Experience: %{customdata[2]} years
' +
+ '',
+ customdata=failed_df[['Sector', 'Market_Size_Bn', 'Founders_Experience_Yrs']].values,
+ selected=dict(marker=dict(color='orange', size=20))
+ ))
+
+ # Update layout for interactivity
+ fig.update_layout(
+ title=f'Interactive Analysis: {x_col} vs {y_col}',
+ xaxis_title=x_col.replace('_', ' ').title(),
+ yaxis_title=y_col.replace('_', ' ').title(),
+ hovermode='closest',
+ clickmode='event+select',
+ showlegend=True,
+ height=600,
+ template='plotly_white',
+ annotations=[
+ dict(
+ text="💡 Click points to select • Drag to zoom • Double-click to reset",
+ showarrow=False,
+ xref="paper", yref="paper",
+ x=0.5, y=1.02, xanchor='center', yanchor='bottom',
+ font=dict(size=12, color="gray")
+ )
+ ]
+ )
+
+ return pio.to_image(fig, format='png')
+
+ def create_multi_dimensional_heatmap(self) -> bytes:
+ """Create correlation heatmap with interactive features"""
+
+ # Select numeric columns for correlation
+ numeric_cols = ['Funding_USD_M', 'Burn_Rate_Months', 'Founders_Experience_Yrs',
+ 'Market_Size_Bn', 'Business_Model_Strength', 'Moat_Defensibility',
+ 'MRR_K', 'Monthly_Growth_Rate', 'Competition_Intensity']
+
+ # Calculate correlation matrix
+ corr_matrix = self.filtered_df[numeric_cols].corr()
+
+ fig = go.Figure(data=go.Heatmap(
+ z=corr_matrix.values,
+ x=[col.replace('_', ' ').title() for col in corr_matrix.columns],
+ y=[col.replace('_', ' ').title() for col in corr_matrix.index],
+ colorscale='RdBu',
+ zmid=0,
+ text=np.round(corr_matrix.values, 2),
+ texttemplate="%{text}",
+ textfont={"size": 10},
+ hovertemplate='%{x} vs %{y}
Correlation: %{z:.3f}'
+ ))
+
+ fig.update_layout(
+ title='📊 Interactive Correlation Heatmap',
+ height=600,
+ template='plotly_white'
+ )
+
+ return pio.to_image(fig, format='png')
+
+ def create_real_time_metrics_dashboard(self) -> bytes:
+ """Create real-time style metrics dashboard"""
+
+ # Create subplot layout
+ fig = make_subplots(
+ rows=2, cols=2,
+ subplot_titles=['💰 Funding Distribution', '📈 Success Rate by Sector',
+ '⏱️ Runway Analysis', '🌍 Geographic Distribution'],
+ specs=[[{"type": "bar"}, {"type": "bar"}],
+ [{"type": "histogram"}, {"type": "pie"}]]
+ )
+
+ # 1. Funding distribution
+ funding_bins = pd.cut(self.filtered_df['Funding_USD_M'], bins=5)
+ funding_dist = funding_bins.value_counts().sort_index()
+
+ fig.add_trace(
+ go.Bar(x=[str(interval) for interval in funding_dist.index],
+ y=funding_dist.values,
+ name="Funding",
+ marker_color='lightblue'),
+ row=1, col=1
+ )
+
+ # 2. Success rate by sector
+ sector_success = self.filtered_df.groupby('Sector')['Failed'].agg(['count', 'sum'])
+ sector_success['success_rate'] = (1 - sector_success['sum'] / sector_success['count']) * 100
+
+ fig.add_trace(
+ go.Bar(x=sector_success.index,
+ y=sector_success['success_rate'],
+ name="Success Rate",
+ marker_color='lightgreen'),
+ row=1, col=2
+ )
+
+ # 3. Runway distribution
+ fig.add_trace(
+ go.Histogram(x=self.filtered_df['Burn_Rate_Months'],
+ name="Runway",
+ marker_color='orange',
+ opacity=0.7),
+ row=2, col=1
+ )
+
+ # 4. Geographic distribution
+ country_dist = self.filtered_df['Country'].value_counts()
+
+ fig.add_trace(
+ go.Pie(labels=country_dist.index,
+ values=country_dist.values,
+ name="Geography"),
+ row=2, col=2
+ )
+
+ fig.update_layout(
+ title_text="📊 Real-Time Dashboard Metrics",
+ height=800,
+ showlegend=False,
+ template='plotly_white'
+ )
+
+ return pio.to_image(fig, format='png')
+
+ def filter_data(self, filters: Dict[str, Any]) -> None:
+ """Apply filters to the dataset"""
+ self.filtered_df = self.df.copy()
+
+ if 'sectors' in filters and filters['sectors']:
+ self.filtered_df = self.filtered_df[self.filtered_df['Sector'].isin(filters['sectors'])]
+
+ if 'countries' in filters and filters['countries']:
+ self.filtered_df = self.filtered_df[self.filtered_df['Country'].isin(filters['countries'])]
+
+ if 'funding_range' in filters:
+ min_funding, max_funding = filters['funding_range']
+ self.filtered_df = self.filtered_df[
+ (self.filtered_df['Funding_USD_M'] >= min_funding) &
+ (self.filtered_df['Funding_USD_M'] <= max_funding)
+ ]
+
+ if 'success_only' in filters and filters['success_only']:
+ self.filtered_df = self.filtered_df[self.filtered_df['Failed'] == 0]
+
+ def compare_startups(self, startup_names: List[str]) -> bytes:
+ """Create comparison chart for selected startups"""
+
+ comparison_df = self.df[self.df['Startup'].isin(startup_names)]
+
+ if len(comparison_df) == 0:
+ return None
+
+ # Create radar chart for comparison
+ categories = ['Funding (Scaled)', 'Experience', 'Market Size',
+ 'Business Model', 'Moat', 'MRR (Scaled)', 'Growth Rate']
+
+ fig = go.Figure()
+
+ for _, startup in comparison_df.iterrows():
+ values = [
+ startup['Funding_USD_M'] / 20, # Scale to 0-5
+ startup['Founders_Experience_Yrs'],
+ startup['Market_Size_Bn'],
+ startup['Business_Model_Strength'],
+ startup['Moat_Defensibility'],
+ startup['MRR_K'] / 200, # Scale to 0-5
+ startup['Monthly_Growth_Rate'] / 10 # Scale to 0-5
+ ]
+
+ fig.add_trace(go.Scatterpolar(
+ r=values,
+ theta=categories,
+ fill='toself',
+ name=startup['Startup'],
+ line_color='red' if startup['Failed'] == 1 else 'green'
+ ))
+
+ fig.update_layout(
+ polar=dict(
+ radialaxis=dict(
+ visible=True,
+ range=[0, 5]
+ )),
+ showlegend=True,
+ title="🔍 Startup Comparison Analysis",
+ height=600
+ )
+
+ return pio.to_image(fig, format='png')
+
+def create_dashboard_summary(dashboard: InteractiveDashboard) -> str:
+ """Create text summary of dashboard metrics"""
+
+ summary = dashboard.create_executive_summary()
+
+ return f"""
+## 📊 Dashboard Executive Summary
+
+### 🎯 **Key Metrics**
+- **Total Startups:** {summary['total_startups']}
+- **Total Funding:** ${summary['total_funding']:.1f}M
+- **Success Rate:** {summary['success_rate']:.1f}%
+- **Avg Runway:** {summary['avg_runway']:.1f} months
+
+### 📈 **Risk Assessment**
+- **Overall Risk Level:** {summary['risk_level']}
+- **Top Sector:** {summary['top_sector']}
+- **Recommendation:** {'Focus on due diligence' if summary['risk_level'] == 'High' else 'Balanced portfolio approach' if summary['risk_level'] == 'Medium' else 'Strong investment opportunities'}
+
+### 🔍 **Interactive Features Available:**
+- Click charts to drill down
+- Filter by sector, country, funding range
+- Compare multiple startups
+- Real-time metric updates
+"""
+
+# =============================
+# VIABILITY SCORING MODEL
+# =============================
+
+def viability_score(features: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Calculate comprehensive viability score for a startup based on multiple factors.
+
+ This heuristic model evaluates startup viability across 8 dimensions and combines
+ them into a single score (0-100). Higher scores indicate better viability.
+
+ Scoring Methodology:
+ --------------------
+ 1. Each dimension is normalized to 0-1 scale
+ 2. Dimensions are weighted based on importance
+ 3. Weighted scores are summed and scaled to 0-100
+ 4. Additional metrics (runway, failure year) are calculated
+ 5. Rule-based tips are generated based on weak areas
+
+ Input Features:
+ ---------------
+ Args:
+ features (Dict[str, Any]): Dictionary containing:
+ - funding_usd_m (float): Funding in millions USD
+ - burn_rate_months (float): Burn rate in months
+ - team_experience_years (float): Average team experience in years
+ - market_size_bn (float): Market size in billions USD
+ - business_model_strength_1_5 (int): 1-5 scale of business model quality
+ - moat_1_5 (int): 1-5 scale of competitive moat/defensibility
+ - traction_mrr_k (float): Monthly recurring revenue in thousands USD
+ - growth_rate_pct (float): Monthly growth rate as percentage
+ - competition_intensity_1_5 (int): 1-5 scale of competition (higher = more intense)
+
+ Returns:
+ Dict[str, Any]: Results containing:
+ - score (float): Overall viability score (0-100)
+ - survival_months (float): Estimated months until failure
+ - est_failure_year (float): Projected year of failure
+ - components (dict): Individual component scores (0-1)
+ - tips (list): Actionable recommendations
+
+ Scoring Components & Weights:
+ -----------------------------
+ 1. Runway (18%): How long funding lasts - capped at 2 years = 1.0
+ 2. Experience (14%): Team experience - 10 years = 1.0
+ 3. Market (14%): Market size - $100B = 1.0
+ 4. Business Model (12%): Strength rating mapped to 0-1
+ 5. Moat (10%): Defensibility rating mapped to 0-1
+ 6. Traction (14%): MRR - $100k = 1.0
+ 7. Growth (12%): Monthly growth - 25% = 1.0
+ 8. Competition (6%): Inverse of competition intensity
+
+ Example:
+ --------
+ >>> features = {
+ ... 'funding_usd_m': 5.0,
+ ... 'burn_rate_months': 12,
+ ... 'team_experience_years': 5,
+ ... 'market_size_bn': 50,
+ ... 'business_model_strength_1_5': 3,
+ ... 'moat_1_5': 3,
+ ... 'traction_mrr_k': 25,
+ ... 'growth_rate_pct': 10,
+ ... 'competition_intensity_1_5': 3
+ ... }
+ >>> result = viability_score(features)
+ >>> print(result['score']) # Overall score out of 100
+ """
+ f = features # Shorthand for cleaner code
+
+ # -------------------------
+ # 1. RUNWAY SCORE (0-1)
+ # -------------------------
+ # Calculate how many years the funding will last
+ # Division approximation: funding (M) / burn_rate (months) ≈ years
+ # Max protects against division by zero
+ runway_years = (f["funding_usd_m"] / max(f["burn_rate_months"], 1))
+
+ # Normalize to 0-1 scale, capping at 2 years
+ # 2+ years of runway = perfect score (1.0)
+ # 0 years = worst score (0.0)
+ runway_score = max(0, min(1, runway_years / 2.0))
+
+ # -------------------------
+ # 2. EXPERIENCE SCORE (0-1)
+ # -------------------------
+ # Team experience normalized to 0-1, capped at 10 years
+ # 10+ years = 1.0, 0 years = 0.0
+ exp_score = max(0, min(1, f["team_experience_years"] / 10))
+
+ # -------------------------
+ # 3. MARKET SIZE SCORE (0-1)
+ # -------------------------
+ # Market opportunity normalized to 0-1, capped at $100B
+ # $100B+ market = 1.0, $0 market = 0.0
+ market_score = max(0, min(1, f["market_size_bn"] / 100))
+
+ # -------------------------
+ # 4. BUSINESS MODEL SCORE (0-1)
+ # -------------------------
+ # Convert 1-5 scale to 0-1 by subtracting 1 and dividing by 4
+ # Rating 5 → 4/4 = 1.0, Rating 1 → 0/4 = 0.0
+ bm_score = (f["business_model_strength_1_5"] - 1) / 4
+
+ # -------------------------
+ # 5. MOAT/DEFENSIBILITY SCORE (0-1)
+ # -------------------------
+ # Same conversion as business model: 1-5 scale → 0-1 scale
+ moat_score = (f["moat_1_5"] - 1) / 4
+
+ # -------------------------
+ # 6. TRACTION SCORE (0-1)
+ # -------------------------
+ # MRR normalized to 0-1, capped at $100k
+ # $100k+ MRR = 1.0, $0 MRR = 0.0
+ traction_score = max(0, min(1, (f["traction_mrr_k"] / 100)))
+
+ # -------------------------
+ # 7. GROWTH SCORE (0-1)
+ # -------------------------
+ # Monthly growth rate normalized to 0-1, capped at 25%
+ # 25%+ monthly growth = 1.0, 0% growth = 0.0
+ growth_score = max(0, min(1, f["growth_rate_pct"] / 25))
+
+ # -------------------------
+ # 8. COMPETITION SCORE (0-1)
+ # -------------------------
+ # Competition is inverted: higher intensity = worse for startup
+ # Convert 1-5 scale to 0-1 penalty, then invert
+ # Low competition (1) → penalty 0 → score 1.0
+ # High competition (5) → penalty 1 → score 0.0
+ competition_penalty = (f["competition_intensity_1_5"] - 1) / 4
+ competition_score = 1 - competition_penalty
+
+ # -------------------------
+ # WEIGHTED COMPOSITE SCORE
+ # -------------------------
+ # Define weights for each component (must sum to 1.0)
+ # These weights reflect the relative importance of each factor
+ weights = {
+ "runway": 0.18, # 18% - Most immediate concern
+ "experience": 0.14, # 14% - Critical for execution
+ "market": 0.14, # 14% - Ceiling for growth
+ "bm": 0.12, # 12% - Revenue sustainability
+ "moat": 0.10, # 10% - Long-term defensibility
+ "traction": 0.14, # 14% - Proof of product-market fit
+ "growth": 0.12, # 12% - Momentum indicator
+ "competition": 0.06 # 6% - External threat level
+ }
+
+ # Calculate weighted sum of all components
+ composite = (
+ runway_score * weights["runway"] +
+ exp_score * weights["experience"] +
+ market_score * weights["market"] +
+ bm_score * weights["bm"] +
+ moat_score * weights["moat"] +
+ traction_score * weights["traction"] +
+ growth_score * weights["growth"] +
+ competition_score * weights["competition"]
+ )
+
+ # Scale composite score (0-1) to percentage (0-100)
+ score_100 = composite * 100.0
+
+ # -------------------------
+ # SURVIVAL METRICS
+ # -------------------------
+ # Calculate estimated months until money runs out
+ # Formula: funding (M) × 12 months/year ÷ burn_rate
+ # Max protects against division by very small numbers
+ survival_months = max(1, f["funding_usd_m"] * (12 / max(f["burn_rate_months"], 0.1)))
+
+ # Calculate projected failure year
+ # Start from FUNDING_YEAR and add runway in years
+ est_failure_year = FUNDING_YEAR + (f["funding_usd_m"] / max(f["burn_rate_months"], 0.1))
+
+ # -------------------------
+ # RULE-BASED RECOMMENDATIONS
+ # -------------------------
+ # Generate actionable tips based on weak areas
+ # Each condition checks if a metric falls below a threshold
+ tips = []
+
+ # Runway too short (< 9 months)
+ if runway_years < 0.75:
+ tips.append("Increase runway (more funding or lower burn).")
+
+ # Team lacks experience (< 3 years average)
+ if f["team_experience_years"] < 3:
+ tips.append("Augment team with experienced operators.")
+
+ # Market too small (< $10B)
+ if f["market_size_bn"] < 10:
+ tips.append("Target a larger wedge or adjacent segments.")
+
+ # Low traction (< $20k MRR)
+ if f["traction_mrr_k"] < 20:
+ tips.append("Focus on early, repeatable revenue (>$20k MRR).")
+
+ # Slow growth (< 8% monthly)
+ if f["growth_rate_pct"] < 8:
+ tips.append("Drive growth via channels with clear CAC/LTV.")
+
+ # Weak moat (rating <= 2)
+ if f["moat_1_5"] <= 2:
+ tips.append("Strengthen defensibility (IP, data, network effects).")
+
+ # Weak business model (rating <= 2)
+ if f["business_model_strength_1_5"] <= 2:
+ tips.append("Clarify pricing & unit economics.")
+
+ # High competition (rating >= 4)
+ if f["competition_intensity_1_5"] >= 4:
+ tips.append("Differentiate positioning vs strong incumbents.")
+
+ # -------------------------
+ # RETURN RESULTS
+ # -------------------------
+ # Package all results into a dictionary for easy access
+ return {
+ "score": score_100, # Overall viability score (0-100)
+ "survival_months": survival_months, # Months until out of money
+ "est_failure_year": est_failure_year, # Projected year of failure
+ "components": { # Individual component scores for transparency
+ "runway": runway_score,
+ "experience": exp_score,
+ "market": market_score,
+ "business_model": bm_score,
+ "moat": moat_score,
+ "traction": traction_score,
+ "growth": growth_score,
+ "competition": competition_score
+ },
+ "tips": tips # Actionable recommendations
+ }
+
+
+def generate_investment_report(df_in: pd.DataFrame, startup_name: str = None) -> bytes:
+ """
+ Generate a comprehensive PDF investment analysis report.
+
+ Args:
+ df_in (pd.DataFrame): Startup dataset
+ startup_name (str, optional): Specific startup to analyze. If None, analyzes entire dataset.
+
+ Returns:
+ bytes: PDF file as bytes
+ """
+ from reportlab.lib.pagesizes import letter
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+ from reportlab.lib.units import inch
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, PageBreak, Table, TableStyle
+ from reportlab.lib import colors
+ from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
+ from datetime import datetime
+
+ # Create in-memory buffer for PDF
+ buffer = io.BytesIO()
+ doc = SimpleDocTemplate(buffer, pagesize=letter, topMargin=0.75*inch, bottomMargin=0.75*inch)
+
+ # Container for PDF elements
+ story = []
+ styles = getSampleStyleSheet()
+
+ # Custom styles
+ title_style = ParagraphStyle(
+ 'CustomTitle',
+ parent=styles['Heading1'],
+ fontSize=24,
+ textColor=colors.HexColor('#ff4b4b'),
+ spaceAfter=30,
+ alignment=TA_CENTER
+ )
+
+ heading_style = ParagraphStyle(
+ 'CustomHeading',
+ parent=styles['Heading2'],
+ fontSize=16,
+ textColor=colors.HexColor('#ff4b4b'),
+ spaceAfter=12,
+ spaceBefore=12
+ )
+
+ subheading_style = ParagraphStyle(
+ 'CustomSubHeading',
+ parent=styles['Heading3'],
+ fontSize=12,
+ textColor=colors.black,
+ spaceAfter=6,
+ spaceBefore=6
+ )
+
+ # =============================
+ # PAGE 1: TITLE & EXECUTIVE SUMMARY
+ # =============================
+
+ if startup_name:
+ startup_data = df_in[df_in['Startup'] == startup_name].iloc[0]
+ report_title = f"Investment Analysis: {startup_name}"
+ else:
+ report_title = "Startup Portfolio Analysis Report"
+
+ story.append(Paragraph(report_title, title_style))
+ story.append(Paragraph(f"Generated by NAVADA | {datetime.now().strftime('%B %d, %Y')}", styles['Normal']))
+ story.append(Spacer(1, 0.3*inch))
+
+ # Executive Summary
+ story.append(Paragraph("Executive Summary", heading_style))
+
+ if startup_name:
+ # Single startup analysis
+ funding = startup_data['Funding_USD_M']
+ burn = startup_data['Burn_Rate_Months']
+ sector = startup_data['Sector']
+ country = startup_data['Country']
+ failed = startup_data['Failed']
+ experience = startup_data['Founders_Experience_Yrs']
+ market = startup_data['Market_Size_Bn']
+
+ status = "Failed" if failed == 1 else "Active/Successful"
+ runway_months = (funding / burn) * 12 if burn > 0 else 0
+
+ summary_text = f"""
+ {startup_name} is a {sector} startup based in {country} with ${funding:.1f}M in funding.
+ The company has a burn rate of {burn} months, resulting in an estimated runway of {runway_months:.1f} months.
+ The founding team has {experience} years of average experience in a market valued at ${market}B.
+ Current status: {status}.
+ """
+ else:
+ # Portfolio analysis
+ total_startups = len(df_in)
+ total_funding = df_in['Funding_USD_M'].sum()
+ failed_count = df_in['Failed'].sum()
+ success_rate = ((total_startups - failed_count) / total_startups) * 100
+ avg_funding = df_in['Funding_USD_M'].mean()
+
+ summary_text = f"""
+ This report analyzes a portfolio of {total_startups} startups with total funding of
+ ${total_funding:.1f}M. The portfolio shows a success rate of {success_rate:.1f}%
+ ({total_startups - failed_count} successful, {failed_count} failed). Average funding per startup
+ is ${avg_funding:.1f}M.
+ """
+
+ story.append(Paragraph(summary_text, styles['BodyText']))
+ story.append(Spacer(1, 0.3*inch))
+
+ # =============================
+ # KEY METRICS TABLE
+ # =============================
+
+ story.append(Paragraph("Key Metrics", heading_style))
+
+ if startup_name:
+ metrics_data = [
+ ['Metric', 'Value'],
+ ['Funding Amount', f'${funding:.1f}M'],
+ ['Burn Rate', f'{burn} months'],
+ ['Estimated Runway', f'{runway_months:.1f} months'],
+ ['Sector', sector],
+ ['Country', country],
+ ['Founder Experience', f'{experience} years'],
+ ['Market Size', f'${market}B'],
+ ['Status', status]
+ ]
+ else:
+ sectors = df_in['Sector'].nunique()
+ countries = df_in['Country'].nunique()
+ avg_experience = df_in['Founders_Experience_Yrs'].mean()
+
+ metrics_data = [
+ ['Metric', 'Value'],
+ ['Total Startups', str(total_startups)],
+ ['Total Funding', f'${total_funding:.1f}M'],
+ ['Success Rate', f'{success_rate:.1f}%'],
+ ['Average Funding', f'${avg_funding:.1f}M'],
+ ['Sectors Covered', str(sectors)],
+ ['Countries', str(countries)],
+ ['Avg Founder Experience', f'{avg_experience:.1f} years']
+ ]
+
+ metrics_table = Table(metrics_data, colWidths=[3*inch, 3*inch])
+ metrics_table.setStyle(TableStyle([
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#ff4b4b')),
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+ ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
+ ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+ ('FONTSIZE', (0, 0), (-1, 0), 12),
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
+ ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
+ ('GRID', (0, 0), (-1, -1), 1, colors.black)
+ ]))
+
+ story.append(metrics_table)
+ story.append(Spacer(1, 0.3*inch))
+
+ # =============================
+ # VISUALIZATIONS
+ # =============================
+
+ story.append(PageBreak())
+ story.append(Paragraph("Data Visualizations", heading_style))
+ story.append(Spacer(1, 0.2*inch))
+
+ # Chart 1: Timeline
+ story.append(Paragraph("1. Failure Timeline Analysis", subheading_style))
+ timeline_bytes = plot_failure_timeline(df_in)
+ timeline_img = Image(io.BytesIO(timeline_bytes), width=6*inch, height=3*inch)
+ story.append(timeline_img)
+ story.append(Spacer(1, 0.2*inch))
+
+ # Chart 2: Funding vs Burn
+ story.append(Paragraph("2. Funding vs Burn Rate", subheading_style))
+ funding_burn_bytes = plot_funding_vs_burn(df_in)
+ funding_burn_img = Image(io.BytesIO(funding_burn_bytes), width=6*inch, height=3*inch)
+ story.append(funding_burn_img)
+ story.append(Spacer(1, 0.2*inch))
+
+ story.append(PageBreak())
+
+ # Chart 3: Sector Comparison
+ story.append(Paragraph("3. Sector Analysis", subheading_style))
+ sector_bytes = plot_sector_comparison(df_in)
+ sector_img = Image(io.BytesIO(sector_bytes), width=6*inch, height=3*inch)
+ story.append(sector_img)
+ story.append(Spacer(1, 0.2*inch))
+
+ # Chart 4: Country Analysis
+ story.append(Paragraph("4. Geographic Distribution", subheading_style))
+ country_bytes = plot_failure_rate_by_country(df_in)
+ country_img = Image(io.BytesIO(country_bytes), width=6*inch, height=3*inch)
+ story.append(country_img)
+
+ # =============================
+ # RISK ANALYSIS
+ # =============================
+
+ story.append(PageBreak())
+ story.append(Paragraph("Risk Analysis", heading_style))
+
+ # Calculate risk factors
+ high_risk = df_in[df_in['Burn_Rate_Months'] < 10]
+ low_funding = df_in[df_in['Funding_USD_M'] < 3.0]
+ inexperienced = df_in[df_in['Founders_Experience_Yrs'] < 3]
+
+ risk_text = f"""
+ High-Risk Indicators:
+ - {len(high_risk)} startups with burn rate under 10 months (high risk)
+ - {len(low_funding)} startups with funding under $3M (undercapitalized)
+ - {len(inexperienced)} startups with founders having less than 3 years experience
+
+ Key Risks:
+ 1. Runway Risk: Startups with short runways may fail before achieving product-market fit
+ 2. Market Risk: Smaller markets limit growth potential and exit opportunities
+ 3. Team Risk: Inexperienced founders may lack operational expertise
+ 4. Competitive Risk: Crowded sectors reduce differentiation and margins
+ """
+
+ story.append(Paragraph(risk_text, styles['BodyText']))
+ story.append(Spacer(1, 0.3*inch))
+
+ # =============================
+ # RECOMMENDATIONS
+ # =============================
+
+ story.append(Paragraph("Investment Recommendations", heading_style))
+
+ if startup_name:
+ # Calculate viability score
+ viability_features = {
+ 'funding_usd_m': funding,
+ 'burn_rate_months': burn,
+ 'team_experience_years': experience,
+ 'market_size_bn': market,
+ 'business_model_strength_1_5': 3, # Default
+ 'moat_1_5': 3, # Default
+ 'traction_mrr_k': 10, # Default
+ 'growth_rate_pct': 5, # Default
+ 'competition_intensity_1_5': 3 # Default
+ }
+
+ viability_result = viability_score(viability_features)
+ score = viability_result['score']
+
+ if score >= 60:
+ recommendation = "INVEST - Strong fundamentals with acceptable risk profile"
+ color_code = "green"
+ elif score >= 40:
+ recommendation = "MONITOR - Moderate risk, requires additional due diligence"
+ color_code = "orange"
+ else:
+ recommendation = "PASS - High risk factors outweigh potential returns"
+ color_code = "red"
+
+ rec_text = f"""
+ Viability Score: {score:.1f}/100
+
+ Recommendation: {recommendation}
+
+ Rationale:
+ - Runway of {runway_months:.1f} months provides {'adequate' if runway_months > 18 else 'limited'} time to achieve milestones
+ - Market size of ${market}B offers {'strong' if market > 50 else 'moderate'} growth potential
+ - Team experience of {experience} years is {'above' if experience >= 5 else 'below'} industry average
+ - Current status: {status}
+ """
+ else:
+ # Portfolio recommendations
+ top_performers = df_in[df_in['Failed'] == 0].nlargest(3, 'Funding_USD_M')
+
+ rec_text = f"""
+ Portfolio Recommendations:
+
+ 1. Diversify Sector Exposure: Current portfolio concentrated in certain sectors
+ 2. Monitor High-Risk Startups: {len(high_risk)} companies need immediate attention
+ 3. Increase Follow-On Funding: Top performers may benefit from additional capital
+
+ Top 3 Performing Startups:
+ """
+
+ for idx, row in top_performers.iterrows():
+ rec_text += f"- {row['Startup']} ({row['Sector']}) - ${row['Funding_USD_M']}M funding
"
+
+ story.append(Paragraph(rec_text, styles['BodyText']))
+ story.append(Spacer(1, 0.3*inch))
+
+ # =============================
+ # FOOTER
+ # =============================
+
+ story.append(Spacer(1, 0.5*inch))
+ footer_text = """
+ This report was automatically generated by NAVADA (Startup Viability Agent).
+ All analysis is based on provided data and should be supplemented with additional due diligence.
+ Past performance does not guarantee future results.
+ """
+ story.append(Paragraph(footer_text, styles['Italic']))
+
+ # Build PDF
+ doc.build(story)
+
+ # Get PDF bytes
+ buffer.seek(0)
+ return buffer.getvalue()
+
+
+def train_ml_model(df: pd.DataFrame):
+ """
+ Train a Random Forest classifier to predict startup failure.
+
+ Args:
+ df (pd.DataFrame): Startup dataset with features and 'Failed' column
+
+ Returns:
+ RandomForestClassifier: Trained model
+
+ Features used:
+ - Funding_USD_M: Total funding in millions
+ - Burn_Rate_Months: Burn rate in months
+ - Founders_Experience_Yrs: Founder experience in years
+ - Market_Size_Bn: Market size in billions
+
+ Target:
+ - Failed: 0 = success, 1 = failed
+ """
+ # Select feature columns
+ X = df[["Funding_USD_M", "Burn_Rate_Months", "Founders_Experience_Yrs", "Market_Size_Bn"]]
+
+ # Target variable
+ y = df["Failed"]
+
+ # Train Random Forest model
+ model = RandomForestClassifier(n_estimators=100, random_state=42)
+ model.fit(X, y)
+
+ return model
+
+# =============================
+# INTERACTIVE DASHBOARD FUNCTIONS
+# =============================
+
+def create_interactive_scatter(df_in: pd.DataFrame, title: str = "Interactive Startup Analysis") -> str:
+ """
+ Create an interactive Plotly scatter plot with hover details and click functionality.
+
+ This function generates a comprehensive interactive visualization that allows users to:
+ - Hover over points to see detailed startup information
+ - Zoom and pan to explore specific regions of the data
+ - Filter by clicking legend items
+ - Export the chart in various formats
+
+ The scatter plot uses multiple visual encodings:
+ - X-axis: Funding amount (USD Millions)
+ - Y-axis: Burn rate (Months of runway)
+ - Size: Market size (larger bubble = bigger market)
+ - Color: Success/Failure status (green = success, red = failure)
+ - Symbol: Sector (different shapes for different industries)
+
+ Args:
+ df_in (pd.DataFrame): Input startup dataset with required columns:
+ - Funding_USD_M: Funding in millions USD
+ - Burn_Rate_Months: Burn rate in months
+ - Market_Size_Bn: Market size in billions
+ - Failed: 0 = success, 1 = failed
+ - Sector: Industry sector
+ - Country: Country of operation
+ title (str): Chart title for display
+
+ Returns:
+ str: Complete HTML string containing the interactive chart with embedded Plotly.js
+ """
+ # Create the base scatter plot with multiple visual dimensions
+ # Each data point represents one startup with 6+ attributes encoded visually
+ fig = px.scatter(
+ df_in, # Input DataFrame
+ x="Funding_USD_M", # X-axis: funding amount
+ y="Burn_Rate_Months", # Y-axis: burn rate (runway)
+ size="Market_Size_Bn", # Bubble size: market opportunity
+ color="Failed", # Color coding: success (0) vs failure (1)
+ symbol="Sector", # Shape coding: different sectors
+ hover_data={
+ "Startup": True, # Show startup name on hover
+ "Founders_Experience_Yrs": True, # Show founder experience years
+ "Country": True, # Show country of operation
+ "Market_Size_Bn": ":.1f", # Format market size to 1 decimal
+ "Funding_USD_M": ":.1f", # Format funding to 1 decimal
+ "Burn_Rate_Months": ":.1f" # Format burn rate to 1 decimal
+ },
+ title=title, # Dynamic title from parameter
+ labels={
+ "Funding_USD_M": "Funding (USD Millions)", # User-friendly axis label
+ "Burn_Rate_Months": "Burn Rate (Months)", # User-friendly axis label
+ "Failed": "Status" # User-friendly legend label
+ },
+ color_discrete_map={0: "green", 1: "red"}, # Explicit color mapping
+ width=800, # Fixed width for consistency
+ height=600 # Fixed height for consistency
+ )
+
+ # Customize layout for enhanced user experience and interactivity
+ fig.update_layout(
+ hovermode="closest", # Show hover info for nearest point only
+ showlegend=True, # Display legend for color/symbol mapping
+ legend=dict( # Position legend horizontally at top
+ orientation="h", # Horizontal orientation saves space
+ yanchor="bottom", # Anchor to bottom of legend box
+ y=1.02, # Position slightly above chart
+ xanchor="right", # Align to right side
+ x=1 # Full width positioning
+ ),
+ margin=dict( # Mobile-optimized margins
+ t=40, # Reduced top margin
+ b=40, # Reduced bottom margin
+ l=40, # Reduced left margin
+ r=40 # Reduced right margin
+ ),
+ autosize=True, # Enable responsive sizing for mobile
+ font=dict(size=10) # Smaller font size for mobile readability
+ )
+
+ # Add user instruction annotation for better UX
+ # This helps users understand they can interact with the chart
+ fig.add_annotation(
+ text="Click and drag to zoom, hover for details, double-click to reset", # Clear instructions
+ showarrow=False, # No arrow pointing to anything
+ xref="paper", yref="paper", # Use paper coordinates (0-1 range)
+ x=0.5, y=-0.1, # Center horizontally, below chart
+ xanchor='center', yanchor='top', # Center the text anchor point
+ font=dict(size=12, color="gray") # Subtle gray color, readable size
+ )
+
+ # Convert Plotly figure to standalone HTML string
+ # include_plotlyjs='cdn' loads Plotly.js from CDN (smaller file size)
+ # div_id provides unique identifier for multiple charts on same page
+ return fig.to_html(include_plotlyjs='cdn', div_id="interactive-chart")
+
+def create_interactive_timeline(df_in: pd.DataFrame) -> str:
+ """
+ Create an interactive timeline showing failure progression over time.
+
+ Args:
+ df_in (pd.DataFrame): Input startup dataset
+
+ Returns:
+ str: HTML string of the interactive timeline
+ """
+ # Calculate failure timeline (same logic as static version)
+ timeline_data = []
+ for _, row in df_in.iterrows():
+ if row["Funding_USD_M"] > 0 and row["Burn_Rate_Months"] > 0:
+ failure_time = row["Funding_USD_M"] / (12 / row["Burn_Rate_Months"])
+ timeline_data.append({
+ "Startup": row["Startup"],
+ "Failure_Time_Years": failure_time,
+ "Sector": row["Sector"],
+ "Funding": row["Funding_USD_M"],
+ "Status": "Failed" if row["Failed"] else "Active"
+ })
+
+ timeline_df = pd.DataFrame(timeline_data).sort_values("Failure_Time_Years")
+
+ # Create interactive bar chart
+ fig = px.bar(
+ timeline_df,
+ x="Failure_Time_Years",
+ y="Startup",
+ color="Status",
+ hover_data=["Sector", "Funding"],
+ title="📈 Interactive Failure Timeline - Hover for Details",
+ labels={
+ "Failure_Time_Years": "Estimated Failure Time (Years)",
+ "Startup": "Startup Name"
+ },
+ color_discrete_map={"Failed": "red", "Active": "green"},
+ orientation="h",
+ width=None, # Responsive width for mobile
+ height=400, # Reduced height for mobile
+ autosize=True # Enable auto-sizing
+ )
+
+ fig.update_layout(
+ hovermode="y unified",
+ yaxis={'categoryorder':'total ascending'}
+ )
+
+ return fig.to_html(include_plotlyjs='cdn', div_id="interactive-timeline")
+
+def create_sector_dashboard(df_in: pd.DataFrame) -> str:
+ """
+ Create an interactive multi-chart dashboard for sector analysis.
+
+ Args:
+ df_in (pd.DataFrame): Input startup dataset
+
+ Returns:
+ str: HTML string of the interactive dashboard
+ """
+ from plotly.subplots import make_subplots
+
+ # Create subplot figure with secondary y-axis
+ fig = make_subplots(
+ rows=2, cols=2,
+ subplot_titles=(
+ "Average Funding by Sector",
+ "Failure Rate by Sector",
+ "Experience vs Funding",
+ "Market Size Distribution"
+ ),
+ specs=[
+ [{"secondary_y": False}, {"secondary_y": False}],
+ [{"secondary_y": False}, {"secondary_y": False}]
+ ]
+ )
+
+ # Chart 1: Average funding by sector
+ sector_avg = df_in.groupby("Sector")["Funding_USD_M"].mean().reset_index()
+ fig.add_trace(
+ go.Bar(x=sector_avg["Sector"], y=sector_avg["Funding_USD_M"],
+ name="Avg Funding", marker_color="lightblue"),
+ row=1, col=1
+ )
+
+ # Chart 2: Failure rate by sector
+ sector_failure = df_in.groupby("Sector")["Failed"].mean().reset_index()
+ fig.add_trace(
+ go.Bar(x=sector_failure["Sector"], y=sector_failure["Failed"],
+ name="Failure Rate", marker_color="salmon"),
+ row=1, col=2
+ )
+
+ # Chart 3: Experience vs Funding scatter
+ fig.add_trace(
+ go.Scatter(
+ x=df_in["Founders_Experience_Yrs"],
+ y=df_in["Funding_USD_M"],
+ mode="markers",
+ marker=dict(
+ size=8,
+ color=df_in["Failed"],
+ colorscale="RdYlGn_r",
+ showscale=True
+ ),
+ name="Startups",
+ text=df_in["Startup"],
+ hovertemplate="%{text}
Experience: %{x} years
Funding: $%{y}M"
+ ),
+ row=2, col=1
+ )
+
+ # Chart 4: Market size distribution
+ fig.add_trace(
+ go.Histogram(x=df_in["Market_Size_Bn"], nbinsx=10,
+ name="Market Size", marker_color="lightgreen"),
+ row=2, col=2
+ )
+
+ # Update layout
+ fig.update_layout(
+ height=600, # Reduced height for mobile
+ title_text="🏭 Interactive Sector Dashboard - Click and Zoom to Explore",
+ showlegend=True
+ )
+
+ return fig.to_html(include_plotlyjs='cdn', div_id="sector-dashboard")
+
+# =============================
+# SESSION MEMORY FUNCTIONS
+# =============================
+
+def get_session_id() -> str:
+ """Get or create session ID for memory tracking."""
+ session = cl.user_session.get("session_id")
+ if not session:
+ import uuid
+ session = str(uuid.uuid4())[:8]
+ cl.user_session.set("session_id", session)
+ return session
+
+def add_to_memory(session_id: str, role: str, content: str):
+ """Add a message to session memory."""
+ if session_id not in SESSION_MEMORY:
+ SESSION_MEMORY[session_id] = []
+
+ SESSION_MEMORY[session_id].append({
+ "role": role,
+ "content": content,
+ "timestamp": pd.Timestamp.now()
+ })
+
+ # Keep only last 20 messages to avoid token limits
+ if len(SESSION_MEMORY[session_id]) > 20:
+ SESSION_MEMORY[session_id] = SESSION_MEMORY[session_id][-20:]
+
+def get_memory_context(session_id: str) -> str:
+ """Get formatted conversation history for context."""
+ if session_id not in SESSION_MEMORY:
+ return ""
+
+ history = SESSION_MEMORY[session_id][-10:] # Last 10 messages
+ context = "Recent conversation history:\n"
+ for msg in history:
+ context += f"- {msg['role']}: {msg['content'][:100]}...\n"
+ return context
+
+def get_current_persona() -> Dict[str, str]:
+ """Get current persona settings from session."""
+ persona_name = cl.user_session.get("persona", "founder")
+ return PERSONAS.get(persona_name, PERSONAS["founder"])
+
+def format_persona_recommendations(persona_name: str) -> str:
+ """Format key recommendations for a persona mode."""
+ persona = PERSONAS.get(persona_name, {})
+ recommendations = persona.get("key_recommendations", [])
+
+ if not recommendations:
+ return ""
+
+ formatted = f"\n\n🎯 **Key {persona.get('name', persona_name)} Recommendations:**\n\n"
+ for rec in recommendations:
+ formatted += f"• {rec}\n\n"
+
+ return formatted
+
+# =============================
+# LANGSMITH THREAD MANAGEMENT
+# =============================
+
+def get_thread_history(thread_id: str, project_name: str) -> List[Dict[str, str]]:
+ """
+ Gets a history of all LLM calls in the thread to construct conversation history
+
+ Args:
+ thread_id (str): The thread/session ID to retrieve history for
+ project_name (str): LangSmith project name
+
+ Returns:
+ List[Dict[str, str]]: List of message objects with role and content
+ """
+ if not langsmith_client:
+ return []
+
+ try:
+ # Filter runs by the specific thread and project
+ filter_string = f'and(in(metadata_key, ["session_id","conversation_id","thread_id"]), eq(metadata_value, "{thread_id}"))'
+
+ # Only grab the LLM runs
+ runs = [r for r in langsmith_client.list_runs(
+ project_name=project_name,
+ filter=filter_string,
+ run_type="llm"
+ )]
+
+ # Sort by start time to get chronological order
+ runs = sorted(runs, key=lambda run: run.start_time)
+
+ # Extract conversation history
+ messages = []
+ for run in runs:
+ if hasattr(run, 'inputs') and 'messages' in run.inputs:
+ # Add input messages
+ messages.extend(run.inputs['messages'])
+
+ # Add assistant response
+ if hasattr(run, 'outputs') and 'choices' in run.outputs:
+ assistant_msg = run.outputs['choices'][0]['message']
+ messages.append(assistant_msg)
+
+ return messages
+
+ except Exception as e:
+ print(f"Error retrieving thread history: {str(e)}")
+ return []
+
+
+@traceable(
+ name="NAVADA Chat Pipeline",
+ run_type="chain",
+ tags=["navada", "startup-analysis", "conversational-ai"],
+ metadata={
+ "app_name": "navada",
+ "app_version": "2.0.0",
+ "environment": "production"
+ }
+)
+def navada_chat_pipeline(question: str, session_id: str, persona: str, get_chat_history: bool = False) -> str:
+ """
+ Enhanced chat pipeline with LangSmith thread management for NAVADA
+
+ Args:
+ question (str): User's question/input
+ session_id (str): Unique session identifier for thread tracking
+ persona (str): Current persona mode (investor/founder)
+ get_chat_history (bool): Whether to retrieve conversation history
+
+ Returns:
+ str: AI response content
+ """
+ try:
+ # Get current run tree for dynamic metadata and tags
+ current_run = ls.get_current_run_tree()
+
+ # Add dynamic metadata based on current context
+ if current_run:
+ current_run.metadata.update({
+ "session_id": session_id,
+ "persona_mode": persona,
+ "conversation_type": "thread_continuation" if get_chat_history else "new_conversation",
+ "question_length": len(question),
+ "timestamp": pd.Timestamp.now().isoformat()
+ })
+
+ # Add dynamic tags based on persona and question type
+ dynamic_tags = [f"persona-{persona}"]
+
+ # Detect question type and add relevant tags
+ question_lower = question.lower()
+ if "funding" in question_lower or "investment" in question_lower:
+ dynamic_tags.append("funding-analysis")
+ if "market" in question_lower or "competition" in question_lower:
+ dynamic_tags.append("market-analysis")
+ if "team" in question_lower or "founder" in question_lower:
+ dynamic_tags.append("team-analysis")
+ if "chart" in question_lower or "plot" in question_lower or "visualization" in question_lower:
+ dynamic_tags.append("data-visualization")
+
+ current_run.tags.extend(dynamic_tags)
+
+ # Set up LangSmith metadata for thread tracking
+ langsmith_extra = {
+ "project_name": LANGSMITH_PROJECT,
+ "metadata": {
+ "session_id": session_id,
+ "persona": persona,
+ "app": "navada",
+ "trace_type": "chat_pipeline"
+ },
+ "tags": [f"session-{session_id[:8]}", f"persona-{persona}"]
+ }
+
+ # Build conversation context
+ if get_chat_history and langsmith_client:
+ # Get LangSmith thread history
+ thread_messages = get_thread_history(session_id, LANGSMITH_PROJECT)
+
+ # Combine with new user question
+ messages = thread_messages + [{"role": "user", "content": question}]
+ else:
+ # Start fresh conversation
+ messages = [{"role": "user", "content": question}]
+
+ # Get current persona information
+ current_persona = PERSONAS.get(persona, PERSONAS["investor"])
+
+ # Add persona system message if starting fresh or no history
+ if not get_chat_history or not messages:
+ system_msg = {
+ "role": "system",
+ "content": current_persona["system_prompt"]
+ }
+ messages = [system_msg] + messages
+
+ # Create chat completion with LangSmith metadata
+ response = client.chat.completions.create(
+ model="gpt-4o-mini",
+ messages=messages,
+ max_tokens=800,
+ temperature=0.7
+ )
+
+ return response.choices[0].message.content
+
+ except Exception as e:
+ print(f"Error in NAVADA chat pipeline: {str(e)}")
+ return f"I apologize, but I encountered an error processing your request: {str(e)}"
+
+
+def get_session_metadata(session_id: str) -> Dict[str, Any]:
+ """
+ Get metadata for a session including conversation count and current persona
+
+ Args:
+ session_id (str): Session identifier
+
+ Returns:
+ Dict[str, Any]: Session metadata
+ """
+ if not langsmith_client:
+ return {"conversation_count": 0, "persona": "investor"}
+
+ try:
+ filter_string = f'and(in(metadata_key, ["session_id"]), eq(metadata_value, "{session_id}"))'
+ runs = list(langsmith_client.list_runs(
+ project_name=LANGSMITH_PROJECT,
+ filter=filter_string,
+ run_type="llm"
+ ))
+
+ return {
+ "conversation_count": len(runs),
+ "persona": runs[-1].extra.get("metadata", {}).get("persona", "investor") if runs else "investor",
+ "last_interaction": runs[-1].start_time if runs else None
+ }
+
+ except Exception as e:
+ print(f"Error getting session metadata: {str(e)}")
+ return {"conversation_count": 0, "persona": "investor"}
+
+
+# =============================
+# AUTO-GENERATED INSIGHTS FUNCTIONS
+# =============================
+
+def generate_insights(df_in: pd.DataFrame, analysis_type: str = "general") -> Dict[str, List[str]]:
+ """
+ Generate automated insights based on data analysis.
+
+ Args:
+ df_in (pd.DataFrame): Input dataset
+ analysis_type (str): Type of analysis performed
+
+ Returns:
+ Dict with risks, opportunities, and recommendations
+ """
+ insights = {
+ "risks": [],
+ "opportunities": [],
+ "recommendations": []
+ }
+
+ # Calculate key metrics
+ avg_funding = df_in["Funding_USD_M"].mean()
+ failure_rate = df_in["Failed"].mean()
+ avg_burn = df_in["Burn_Rate_Months"].mean()
+ avg_experience = df_in["Founders_Experience_Yrs"].mean()
+
+ # Risk Detection
+ if failure_rate > 0.5:
+ insights["risks"].append(f"🔴 High failure rate detected: {failure_rate:.0%} of startups failed")
+
+ if avg_burn < 6:
+ insights["risks"].append(f"🔴 Short runway alert: Average burn rate is only {avg_burn:.1f} months")
+
+ if avg_experience < 3:
+ insights["risks"].append(f"🔴 Inexperienced teams: Average founder experience is {avg_experience:.1f} years")
+
+ # Opportunity Detection
+ high_funding_sectors = df_in.groupby("Sector")["Funding_USD_M"].mean().sort_values(ascending=False).head(2)
+ for sector, funding in high_funding_sectors.items():
+ if funding > avg_funding * 1.5:
+ insights["opportunities"].append(f"🟢 Hot sector identified: {sector} (avg funding ${funding:.1f}M)")
+
+ successful_patterns = df_in[df_in["Failed"] == 0]
+ if len(successful_patterns) > 0:
+ success_funding = successful_patterns["Funding_USD_M"].mean()
+ insights["opportunities"].append(f"🟢 Success pattern: Successful startups raised avg ${success_funding:.1f}M")
+
+ # Recommendations
+ if avg_burn < 12:
+ insights["recommendations"].append("💡 Extend runway: Focus on increasing funding or reducing burn rate")
+
+ if failure_rate > 0.4:
+ insights["recommendations"].append("💡 De-risk strategy: Consider pivot to sectors with lower failure rates")
+
+ insights["recommendations"].append("💡 Track metrics: Monitor burn rate, customer acquisition, and team experience")
+
+ return insights
+
+def format_insights_message(insights: Dict[str, List[str]]) -> str:
+ """Format insights into a readable message."""
+ message = "## 🤖 Auto-Generated Insights\n\n"
+
+ if insights["risks"]:
+ message += "### ⚠️ Top Risks Detected:\n"
+ for risk in insights["risks"][:3]: # Top 3 risks
+ message += f"- {risk}\n"
+ message += "\n"
+
+ if insights["opportunities"]:
+ message += "### 🎯 Opportunities Identified:\n"
+ for opp in insights["opportunities"][:3]: # Top 3 opportunities
+ message += f"- {opp}\n"
+ message += "\n"
+
+ if insights["recommendations"]:
+ message += "### 💡 Next Steps:\n"
+ for rec in insights["recommendations"][:3]: # Top 3 recommendations
+ message += f"- {rec}\n"
+
+ return message
+
+# =============================
+# WEB SCRAPING FUNCTIONS
+# =============================
+
+def validate_url(url: str) -> bool:
+ """
+ Validate URL format and check for security concerns.
+
+ Args:
+ url (str): URL to validate
+
+ Returns:
+ bool: True if URL is valid and safe, False otherwise
+ """
+ try:
+ # Parse URL to check structure
+ parsed = urlparse(url)
+
+ # Must have scheme (http/https) and netloc (domain)
+ if not all([parsed.scheme, parsed.netloc]):
+ return False
+
+ # Only allow HTTP/HTTPS protocols for security
+ if parsed.scheme not in ['http', 'https']:
+ return False
+
+ # Block dangerous or inappropriate domains
+ blocked_domains = [
+ 'localhost', '127.0.0.1', '0.0.0.0', # Local addresses
+ 'file://', 'ftp://', # Non-web protocols
+ ]
+
+ netloc_lower = parsed.netloc.lower()
+ for blocked in blocked_domains:
+ if blocked in netloc_lower:
+ return False
+
+ return True
+
+ except Exception:
+ return False
+
+def scrape_site(url: str, selector: str = "p") -> Dict[str, Any]:
+ """
+ Scrape text content from a website with comprehensive safety measures and error handling.
+
+ This function performs web scraping with multiple safeguards:
+ - URL validation and security checks
+ - Request timeouts and size limits
+ - Content filtering and cleaning
+ - Structured error reporting
+
+ Args:
+ url (str): Website URL to scrape (must be http/https)
+ selector (str): CSS selector for content extraction
+ - "p" = paragraphs (default)
+ - "h1, h2, h3" = headings
+ - ".class-name" = by CSS class
+ - "#id-name" = by element ID
+
+ Returns:
+ Dict[str, Any]: Scraping results containing:
+ - success (bool): Whether scraping succeeded
+ - data (pd.DataFrame): Scraped content (if successful)
+ - url (str): Original URL
+ - count (int): Number of items scraped
+ - error (str): Error message (if failed)
+ - size_mb (float): Content size in megabytes
+ """
+ result = {
+ "success": False,
+ "data": pd.DataFrame(),
+ "url": url,
+ "count": 0,
+ "error": "",
+ "size_mb": 0.0
+ }
+
+ try:
+ # Step 1: Validate URL for security and format
+ if not validate_url(url):
+ result["error"] = "Invalid or unsafe URL. Use http/https URLs only."
+ return result
+
+ # Step 2: Configure HTTP request with safety limits
+ headers = {
+ 'User-Agent': 'NAVADA-Bot/1.0 (Educational Web Scraping Tool)',
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Language': 'en-US,en;q=0.5',
+ 'Accept-Encoding': 'gzip, deflate',
+ 'Connection': 'keep-alive',
+ 'Upgrade-Insecure-Requests': '1',
+ }
+
+ # Step 3: Make HTTP request with timeout and size limits
+ response = requests.get(
+ url,
+ headers=headers,
+ timeout=15, # 15 second timeout
+ stream=True, # Stream to check size before downloading
+ allow_redirects=True, # Follow redirects (max 30 by default)
+ verify=True # Verify SSL certificates
+ )
+
+ # Step 4: Check response size before processing (max 5MB)
+ content_length = response.headers.get('content-length')
+ if content_length and int(content_length) > 5 * 1024 * 1024: # 5MB limit
+ result["error"] = "Content too large (>5MB). Choose a smaller page."
+ return result
+
+ # Step 5: Check HTTP status code
+ response.raise_for_status() # Raises exception for 4xx/5xx status codes
+
+ # Step 6: Get content and check actual size
+ content = response.text
+ content_size_mb = len(content.encode('utf-8')) / (1024 * 1024)
+ result["size_mb"] = round(content_size_mb, 2)
+
+ if content_size_mb > 5: # Double-check size after download
+ result["error"] = f"Content too large ({content_size_mb:.1f}MB). Choose a smaller page."
+ return result
+
+ # Step 7: Parse HTML with BeautifulSoup
+ soup = BeautifulSoup(content, "html.parser")
+
+ # Step 8: Remove script and style elements (they contain non-content)
+ for script in soup(["script", "style", "nav", "footer", "header"]):
+ script.decompose()
+
+ # Step 9: Extract content using CSS selector
+ elements = soup.select(selector)
+
+ # Step 10: Clean and filter extracted text
+ scraped_content = []
+ for element in elements:
+ text = element.get_text(strip=True)
+
+ # Filter out empty content and very short text
+ if text and len(text) > 10:
+ # Clean whitespace and normalize
+ text = re.sub(r'\s+', ' ', text) # Replace multiple whitespace with single space
+ text = text.strip()
+
+ # Limit individual text length to prevent spam
+ if len(text) <= 2000:
+ scraped_content.append(text)
+
+ # Step 11: Create structured DataFrame
+ if scraped_content:
+ result["data"] = pd.DataFrame({
+ "content": scraped_content,
+ "length": [len(text) for text in scraped_content],
+ "source": [url] * len(scraped_content)
+ })
+ result["count"] = len(scraped_content)
+ result["success"] = True
+ else:
+ result["error"] = f"No content found using selector '{selector}'. Try different selectors like 'h1', 'div', or 'span'."
+
+ except requests.exceptions.Timeout:
+ result["error"] = "Request timed out. The website may be slow or unresponsive."
+ except requests.exceptions.ConnectionError:
+ result["error"] = "Could not connect to the website. Check the URL and internet connection."
+ except requests.exceptions.HTTPError as e:
+ result["error"] = f"HTTP error {e.response.status_code}: {e.response.reason}"
+ except requests.exceptions.RequestException as e:
+ result["error"] = f"Request failed: {str(e)}"
+ except Exception as e:
+ result["error"] = f"Scraping failed: {str(e)}"
+
+ return result
+
+def analyze_scraped_content(scraped_data: pd.DataFrame, url: str, persona: Dict[str, str]) -> str:
+ """
+ Use GPT to analyze scraped website content with persona-specific focus.
+
+ Args:
+ scraped_data (pd.DataFrame): DataFrame containing scraped content
+ url (str): Original URL for context
+ persona (Dict[str, str]): Current user persona (investor/founder)
+
+ Returns:
+ str: AI analysis of the scraped content
+ """
+ if scraped_data.empty:
+ return "No content available for analysis."
+
+ # Prepare content for GPT analysis
+ # Take top 20 content items to stay within token limits
+ content_items = scraped_data.head(20)["content"].tolist()
+ content_text = "\n\n".join([f"Section {i+1}: {text}" for i, text in enumerate(content_items)])
+
+ # Truncate if too long (approximately 3000 tokens = 12000 characters)
+ if len(content_text) > 10000:
+ content_text = content_text[:10000] + "\n\n[Content truncated for analysis...]"
+
+ # Create persona-specific analysis prompt
+ persona_focus = persona.get('system_prompt', '')
+ analysis_style = ""
+
+ if 'investor' in persona.get('name', '').lower():
+ analysis_style = (
+ "Focus on investment opportunities, market analysis, business models, "
+ "competitive landscape, and financial indicators. Identify potential risks and ROI factors."
+ )
+ else: # founder mode
+ analysis_style = (
+ "Focus on actionable insights, operational strategies, market positioning, "
+ "customer needs, and execution opportunities. Provide tactical recommendations."
+ )
+
+ try:
+ response = client.chat.completions.create(
+ model="gpt-4o-mini",
+ messages=[
+ {
+ "role": "system",
+ "content": (
+ f"You are analyzing web content from {url}. "
+ f"{analysis_style}\n\n"
+ "Provide a structured analysis with:\n"
+ "1. Key insights (3-5 bullet points)\n"
+ "2. Notable patterns or trends\n"
+ "3. Actionable recommendations\n"
+ "4. Potential concerns or red flags\n\n"
+ "Keep analysis concise but insightful."
+ )
+ },
+ {
+ "role": "user",
+ "content": f"Analyze this website content:\n\nURL: {url}\n\nContent:\n{content_text}"
+ }
+ ],
+ max_tokens=600,
+ temperature=0.7
+ )
+
+ return response.choices[0].message.content
+
+ except Exception as e:
+ return f"Analysis failed: {str(e)}"
+
+
+# =============================
+# INTERNET SEARCH FUNCTIONALITY
+# =============================
+
+@traceable(
+ name="NAVADA Internet Search",
+ run_type="tool",
+ tags=["navada", "search", "brave-api", "market-intelligence"],
+ metadata={"tool_type": "internet_search", "api_provider": "brave_search"}
+)
+def search_internet(query: str, count: int = 5) -> Dict[str, Any]:
+ """
+ Search the internet using Brave Search API
+
+ Args:
+ query (str): Search query
+ count (int): Number of results to return (default 5)
+
+ Returns:
+ dict: Search results with titles, descriptions, and URLs
+ """
+ # Add dynamic metadata to current run
+ current_run = ls.get_current_run_tree()
+ if current_run:
+ current_run.metadata.update({
+ "search_query": query,
+ "requested_count": count,
+ "query_length": len(query),
+ "timestamp": pd.Timestamp.now().isoformat()
+ })
+
+ # Add query-specific tags
+ query_lower = query.lower()
+ search_tags = []
+ if "startup" in query_lower:
+ search_tags.append("startup-search")
+ if "funding" in query_lower or "investment" in query_lower:
+ search_tags.append("funding-search")
+ if "market" in query_lower:
+ search_tags.append("market-research")
+ if "competition" in query_lower:
+ search_tags.append("competitive-analysis")
+
+ current_run.tags.extend(search_tags)
+
+ if not search_api_key or search_api_key == "your_brave_search_api_key_here":
+ logger.warning("Search API key not configured or using placeholder value")
+ return {
+ "success": False,
+ "error": "Search API key not configured. Please set SEARCH_API_KEY in your .env file with a valid Brave Search API key.",
+ "results": [],
+ "fallback_available": True
+ }
+
+ try:
+ # Brave Search API endpoint
+ url = "https://api.search.brave.com/res/v1/web/search"
+
+ headers = {
+ "Accept": "application/json",
+ "Accept-Encoding": "gzip",
+ "X-Subscription-Token": search_api_key
+ }
+
+ params = {
+ "q": query,
+ "count": count,
+ "offset": 0,
+ "mkt": "en-US",
+ "safesearch": "moderate",
+ "freshness": "pw", # Past week for fresh results
+ "text_decorations": False,
+ "search_lang": "en"
+ }
+
+ response = requests.get(url, headers=headers, params=params, timeout=10)
+ response.raise_for_status()
+
+ data = response.json()
+
+ # Extract relevant information from search results
+ results = []
+ if "web" in data and "results" in data["web"]:
+ for result in data["web"]["results"][:count]:
+ results.append({
+ "title": result.get("title", ""),
+ "description": result.get("description", ""),
+ "url": result.get("url", ""),
+ "age": result.get("age", ""),
+ "language": result.get("language", "en")
+ })
+
+ return {
+ "success": True,
+ "query": query,
+ "results": results,
+ "total_results": len(results)
+ }
+
+ except requests.exceptions.HTTPError as e:
+ logger.error(f"HTTP error during search: {e}")
+ if e.response.status_code == 422:
+ return {
+ "success": False,
+ "error": "Search API authentication failed. Please check your SEARCH_API_KEY in .env file.",
+ "results": [],
+ "fallback_available": True,
+ "status_code": 422
+ }
+ else:
+ return {
+ "success": False,
+ "error": f"Search request failed: HTTP {e.response.status_code}",
+ "results": [],
+ "fallback_available": True
+ }
+ except requests.exceptions.RequestException as e:
+ logger.error(f"Network error during search: {e}")
+ return {
+ "success": False,
+ "error": f"Search request failed: {str(e)}",
+ "results": [],
+ "fallback_available": True
+ }
+ except Exception as e:
+ logger.error(f"Unexpected error during search: {e}")
+ logger.error(f"Traceback: {traceback.format_exc()}")
+ return {
+ "success": False,
+ "error": f"Search error: {str(e)}",
+ "results": [],
+ "fallback_available": True
+ }
+
+
+def analyze_search_results(search_data: Dict[str, Any], persona: Dict[str, str], context: str = "", session_id: str = None) -> str:
+ """
+ Analyze search results using AI based on current persona mode
+
+ Args:
+ search_data (dict): Search results from search_internet()
+ persona (dict): Current persona (investor/founder mode)
+ context (str): Additional context for analysis
+
+ Returns:
+ str: AI analysis of search results
+ """
+ if not search_data["success"] or not search_data["results"]:
+ return "No search results to analyze or search failed."
+
+ try:
+ # Format search results for analysis
+ results_text = f"Search Query: {search_data['query']}\n\n"
+ results_text += f"Found {search_data['total_results']} results:\n\n"
+
+ for i, result in enumerate(search_data['results'], 1):
+ results_text += f"{i}. **{result['title']}**\n"
+ results_text += f" URL: {result['url']}\n"
+ results_text += f" Description: {result['description']}\n"
+ if result.get('age'):
+ results_text += f" Age: {result['age']}\n"
+ results_text += "\n"
+
+ # Create persona-specific analysis prompt
+ analysis_prompt = f"{persona['system_prompt']}\n\n"
+ analysis_prompt += f"Analyze these search results from a {persona['name']} perspective.\n\n"
+
+ if context:
+ analysis_prompt += f"Context: {context}\n\n"
+
+ analysis_prompt += "Provide insights, opportunities, risks, and actionable recommendations based on the search results."
+
+ # Include LangSmith metadata if session_id is available
+ if langsmith_client and session_id:
+ response = client.chat.completions.create(
+ model="gpt-4o-mini",
+ messages=[
+ {
+ "role": "system",
+ "content": analysis_prompt
+ },
+ {
+ "role": "user",
+ "content": f"Analyze these search results:\n\n{results_text}"
+ }
+ ],
+ max_tokens=800,
+ temperature=0.7,
+ )
+ else:
+ response = client.chat.completions.create(
+ model="gpt-4o-mini",
+ messages=[
+ {
+ "role": "system",
+ "content": analysis_prompt
+ },
+ {
+ "role": "user",
+ "content": f"Analyze these search results:\n\n{results_text}"
+ }
+ ],
+ max_tokens=800,
+ temperature=0.7
+ )
+
+ return response.choices[0].message.content
+
+ except Exception as e:
+ return f"Analysis failed: {str(e)}"
+
+
+def generate_search_query(user_question: str, persona: str) -> str:
+ """
+ Generate intelligent search queries based on user question and persona mode
+
+ Args:
+ user_question (str): User's original question
+ persona (str): Current persona mode (investor/founder)
+
+ Returns:
+ str: Optimized search query for Brave Search API
+ """
+ try:
+ # Persona-specific search query templates
+ investor_keywords = {
+ "market": "startup market trends valuation 2025",
+ "competition": "startup competitive landscape industry analysis",
+ "funding": "venture capital funding trends startup investment 2025",
+ "exit": "startup exit strategies IPO acquisition trends",
+ "valuation": "startup valuation metrics Series A B C funding",
+ "growth": "startup growth metrics scaling strategies",
+ "roi": "startup ROI investment returns venture capital",
+ "due diligence": "startup due diligence checklist investment"
+ }
+
+ founder_keywords = {
+ "market": "startup market validation product-market fit",
+ "competition": "startup competitor analysis differentiation",
+ "funding": "startup fundraising tips pitch deck Series A",
+ "growth": "startup growth hacking scaling strategies",
+ "team": "startup team building hiring strategies",
+ "product": "startup product development MVP strategies",
+ "customer": "startup customer acquisition retention strategies",
+ "pivot": "startup pivot strategies when to pivot"
+ }
+
+ # Select keyword set based on persona
+ keywords = investor_keywords if persona == "investor" else founder_keywords
+
+ # Extract key topics from user question
+ question_lower = user_question.lower()
+
+ # Find matching keywords and build search query
+ search_terms = []
+
+ for topic, search_template in keywords.items():
+ if topic in question_lower:
+ search_terms.append(search_template)
+ break # Use first match to avoid overly complex queries
+
+ # Add year context for recent information
+ current_year = "2025"
+ if current_year not in user_question:
+ search_terms.append(current_year)
+
+ # If no specific keywords found, use general startup search
+ if not search_terms:
+ if persona == "investor":
+ search_terms = ["startup investment trends 2025", "venture capital market"]
+ else:
+ search_terms = ["startup trends 2025", "founder advice entrepreneurship"]
+
+ # Combine and return the search query
+ return " ".join(search_terms[:2]) # Limit to 2 main search terms
+
+ except Exception as e:
+ print(f"Error generating search query: {str(e)}")
+ return f"startup {persona} trends 2025"
+
+
+# =============================
+# TEXT-TO-SPEECH FUNCTIONALITY
+# =============================
+
+def generate_speech(text: str, voice: str = "alloy") -> bytes:
+ """
+ Generate speech from text using OpenAI TTS API
+
+ Args:
+ text (str): Text to convert to speech
+ voice (str): Voice to use (alloy, echo, fable, onyx, nova, shimmer)
+
+ Returns:
+ bytes: Audio data in MP3 format
+ """
+ try:
+ if not api_key:
+ raise Exception("OpenAI API key not configured")
+
+ response = client.audio.speech.create(
+ model="tts-1",
+ voice=voice,
+ input=text,
+ speed=1.0
+ )
+
+ return response.content
+
+ except Exception as e:
+ print(f"TTS generation failed: {str(e)}")
+ return b""
+
+
+def create_audio_message(content: str, voice: str = "alloy") -> cl.Audio:
+ """
+ Create Chainlit audio message with TTS
+
+ Args:
+ content (str): Text content to convert to speech
+ voice (str): Voice to use for TTS
+
+ Returns:
+ cl.Audio: Chainlit audio element
+ """
+ try:
+ # Limit text length for TTS (OpenAI has character limits)
+ max_length = 4000
+ if len(content) > max_length:
+ # Truncate but try to end at a sentence
+ truncated = content[:max_length]
+ last_period = truncated.rfind('.')
+ if last_period > max_length * 0.8: # If period is reasonably close to end
+ content = truncated[:last_period + 1]
+ else:
+ content = truncated + "..."
+
+ # Generate speech
+ audio_data = generate_speech(content, voice)
+
+ if not audio_data:
+ return None
+
+ # Create audio element
+ audio = cl.Audio(
+ content=audio_data,
+ name="navada_response.mp3",
+ display="inline",
+ auto_play=False
+ )
+
+ return audio
+
+ except Exception as e:
+ print(f"Audio message creation failed: {str(e)}")
+ return None
+
+
+def benchmark_founder_idea(features: Dict[str, Any], df: pd.DataFrame) -> Dict[str, Any]:
+ """
+ Benchmark a founder's idea against dataset averages and percentiles.
+
+ Args:
+ features (dict): Founder's startup metrics
+ df (pd.DataFrame): Dataset to benchmark against
+
+ Returns:
+ dict: Benchmarking results with percentiles and recommendations
+ """
+ results = {
+ "metrics": {},
+ "insights": [],
+ "risk_level": "",
+ "recommendations": []
+ }
+
+ # Calculate dataset statistics
+ stats = {
+ "funding": {
+ "median": df["Funding_USD_M"].median(),
+ "mean": df["Funding_USD_M"].mean(),
+ "p20": df["Funding_USD_M"].quantile(0.2),
+ "p80": df["Funding_USD_M"].quantile(0.8)
+ },
+ "burn_rate": {
+ "median": df["Burn_Rate_Months"].median(),
+ "mean": df["Burn_Rate_Months"].mean(),
+ "p20": df["Burn_Rate_Months"].quantile(0.2),
+ "p80": df["Burn_Rate_Months"].quantile(0.8)
+ },
+ "experience": {
+ "median": df["Founders_Experience_Yrs"].median(),
+ "mean": df["Founders_Experience_Yrs"].mean(),
+ "p20": df["Founders_Experience_Yrs"].quantile(0.2),
+ "p80": df["Founders_Experience_Yrs"].quantile(0.8)
+ },
+ "market": {
+ "median": df["Market_Size_Bn"].median(),
+ "mean": df["Market_Size_Bn"].mean(),
+ "p20": df["Market_Size_Bn"].quantile(0.2),
+ "p80": df["Market_Size_Bn"].quantile(0.8)
+ }
+ }
+
+ # Benchmark funding
+ funding = features.get('funding_usd_m', 3.0)
+ funding_percentile = (df["Funding_USD_M"] < funding).mean() * 100
+ results["metrics"]["funding"] = {
+ "value": funding,
+ "percentile": funding_percentile,
+ "vs_median": funding / stats["funding"]["median"] if stats["funding"]["median"] > 0 else 0,
+ "vs_mean": funding / stats["funding"]["mean"] if stats["funding"]["mean"] > 0 else 0
+ }
+
+ # Benchmark burn rate
+ burn = features.get('burn_rate_months', 9.0)
+ burn_percentile = (df["Burn_Rate_Months"] < burn).mean() * 100
+ results["metrics"]["burn_rate"] = {
+ "value": burn,
+ "percentile": burn_percentile,
+ "vs_median": burn / stats["burn_rate"]["median"] if stats["burn_rate"]["median"] > 0 else 0,
+ "vs_mean": burn / stats["burn_rate"]["mean"] if stats["burn_rate"]["mean"] > 0 else 0
+ }
+
+ # Benchmark experience
+ experience = features.get('team_experience_years', 3.0)
+ exp_percentile = (df["Founders_Experience_Yrs"] < experience).mean() * 100
+ results["metrics"]["experience"] = {
+ "value": experience,
+ "percentile": exp_percentile,
+ "vs_median": experience / stats["experience"]["median"] if stats["experience"]["median"] > 0 else 0,
+ "vs_mean": experience / stats["experience"]["mean"] if stats["experience"]["mean"] > 0 else 0
+ }
+
+ # Benchmark market size
+ market = features.get('market_size_bn', 10.0)
+ market_percentile = (df["Market_Size_Bn"] < market).mean() * 100
+ results["metrics"]["market_size"] = {
+ "value": market,
+ "percentile": market_percentile,
+ "vs_median": market / stats["market"]["median"] if stats["market"]["median"] > 0 else 0,
+ "vs_mean": market / stats["market"]["mean"] if stats["market"]["mean"] > 0 else 0
+ }
+
+ # Generate insights
+ if burn_percentile < 30:
+ results["insights"].append(f"⚠️ Your burn rate ({burn} months) is in the **bottom 30%** - HIGH RISK! Most startups have longer runways.")
+ results["recommendations"].append("Reduce burn rate or secure additional funding urgently")
+
+ if burn_percentile > 70:
+ results["insights"].append(f"✅ Your burn rate ({burn} months) is in the **top 30%** - well-managed cash flow")
+
+ if funding_percentile < 30:
+ results["insights"].append(f"⚠️ Your funding (${funding}M) is in the **bottom 30%** - may need more capital")
+ results["recommendations"].append("Consider raising a larger round to extend runway")
+
+ if funding_percentile > 70:
+ results["insights"].append(f"✅ Your funding (${funding}M) is in the **top 30%** - strong financial position")
+
+ if exp_percentile < 30:
+ results["insights"].append(f"⚠️ Your team experience ({experience} years) is **below dataset median** - consider adding senior advisors")
+ results["recommendations"].append("Add experienced advisors or co-founders to the team")
+
+ if exp_percentile > 70:
+ results["insights"].append(f"✅ Your team experience ({experience} years) is in the **top 30%** - strong foundation")
+
+ if results["metrics"]["market_size"]["vs_median"] > 2:
+ results["insights"].append(f"🚀 Your market size (${market}B) is **{results['metrics']['market_size']['vs_median']:.1f}× bigger** than average!")
+
+ # Calculate risk level
+ risk_score = 0
+ if burn_percentile < 30: risk_score += 2
+ if funding_percentile < 30: risk_score += 2
+ if exp_percentile < 30: risk_score += 1
+
+ if risk_score >= 3:
+ results["risk_level"] = "HIGH RISK 🔴"
+ elif risk_score >= 1:
+ results["risk_level"] = "MODERATE RISK 🟡"
+ else:
+ results["risk_level"] = "LOW RISK 🟢"
+
+ return results
+
+
+def create_portfolio_heatmap(portfolio_df: pd.DataFrame) -> bytes:
+ """
+ Create a heatmap visualization of multiple startups' viability scores.
+
+ Args:
+ portfolio_df (pd.DataFrame): DataFrame with startup names and viability metrics
+
+ Returns:
+ bytes: PNG image of heatmap
+ """
+ # Calculate viability scores for each startup
+ scores_data = []
+
+ for _, row in portfolio_df.iterrows():
+ # Calculate viability score for this startup
+ features = {
+ 'funding_usd_m': row.get('Funding_USD_M', 3.0),
+ 'burn_rate_months': row.get('Burn_Rate_Months', 9.0),
+ 'team_experience_years': row.get('Founders_Experience_Yrs', 3.0),
+ 'market_size_bn': row.get('Market_Size_Bn', 10.0),
+ 'business_model_strength_1_5': row.get('Business_Model', 3),
+ 'moat_1_5': row.get('Moat', 3),
+ 'traction_mrr_k': row.get('Traction_MRR_K', 10),
+ 'growth_rate_pct': row.get('Growth_Rate_Pct', 5),
+ 'competition_intensity_1_5': row.get('Competition', 3)
+ }
+
+ result = viability_score(features)
+
+ scores_data.append({
+ 'Startup': row['Startup'],
+ 'Overall Score': result['score'],
+ 'Runway': result['components']['runway'] * 100,
+ 'Experience': result['components']['experience'] * 100,
+ 'Market': result['components']['market'] * 100,
+ 'Traction': result['components']['traction'] * 100,
+ 'Growth': result['components']['growth'] * 100
+ })
+
+ # Create DataFrame for heatmap
+ heatmap_df = pd.DataFrame(scores_data)
+ heatmap_matrix = heatmap_df.set_index('Startup')[['Overall Score', 'Runway', 'Experience', 'Market', 'Traction', 'Growth']]
+
+ # Create heatmap with mobile-optimized size
+ default_height = max(6, len(heatmap_df) * 0.5)
+ figsize = get_mobile_optimized_figsize(10, default_height)
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Create heatmap with custom colormap
+ sns.heatmap(
+ heatmap_matrix.T,
+ annot=True,
+ fmt='.1f',
+ cmap='RdYlGn',
+ vmin=0,
+ vmax=100,
+ cbar_kws={'label': 'Score (0-100)'},
+ linewidths=0.5,
+ linecolor='gray',
+ ax=ax
+ )
+
+ ax.set_title('Portfolio Viability Heatmap\n🔴 Poor (0-40) | 🟡 Moderate (40-60) | 🟢 Strong (60-100)',
+ fontsize=14, fontweight='bold')
+ ax.set_xlabel('Startups', fontsize=12)
+ ax.set_ylabel('Metrics', fontsize=12)
+
+ # Rotate x-axis labels for readability
+ plt.xticks(rotation=45, ha='right')
+ plt.tight_layout()
+
+ return fig_to_bytes(fig)
+
+
+# =============================
+# THREAD MANAGEMENT FOR LANGSMITH
+# =============================
+
+def get_thread_history(thread_id: str, project_name: str) -> List[Dict[str, Any]]:
+ """
+ Retrieve conversation history from LangSmith for a specific thread.
+
+ Args:
+ thread_id: Unique identifier for the conversation thread
+ project_name: LangSmith project name
+
+ Returns:
+ List of message dictionaries representing the conversation history
+ """
+ if not langsmith_client:
+ return []
+
+ try:
+ # Filter runs by the specific thread and project
+ filter_string = f'and(in(metadata_key, ["session_id","conversation_id","thread_id"]), eq(metadata_value, "{thread_id}"))'
+
+ # Only grab the LLM runs
+ runs = list(langsmith_client.list_runs(
+ project_name=project_name,
+ filter=filter_string,
+ run_type="llm"
+ ))
+
+ if not runs:
+ return []
+
+ # Sort by start time to get chronological order
+ runs = sorted(runs, key=lambda run: run.start_time)
+
+ # Extract messages from runs
+ messages = []
+ for run in runs:
+ if run.inputs and 'messages' in run.inputs:
+ messages.extend(run.inputs['messages'])
+ if run.outputs and 'choices' in run.outputs:
+ if run.outputs['choices'] and run.outputs['choices'][0].get('message'):
+ messages.append(run.outputs['choices'][0]['message'])
+
+ return messages
+ except Exception as e:
+ print(f"Error retrieving thread history: {e}")
+ return []
+
+@traceable(name="NAVADA Chat Pipeline")
+def process_with_thread_context(
+ question: str,
+ session_id: str,
+ get_chat_history: bool = True,
+ persona: Optional[Dict[str, str]] = None
+) -> str:
+ """
+ Process user message with thread context for continuity.
+
+ Args:
+ question: User's current question
+ session_id: Thread/session identifier
+ get_chat_history: Whether to retrieve and use conversation history
+ persona: Current persona configuration
+
+ Returns:
+ AI response as string
+ """
+ langsmith_extra = {
+ "project_name": LANGSMITH_PROJECT,
+ "metadata": {"session_id": session_id}
+ }
+
+ messages = []
+
+ # Retrieve conversation history if requested
+ if get_chat_history and langsmith_client:
+ try:
+ historical_messages = get_thread_history(session_id, LANGSMITH_PROJECT)
+ if historical_messages:
+ messages.extend(historical_messages)
+ except Exception as e:
+ print(f"Could not retrieve history: {e}")
+
+ # Add system prompt based on persona
+ if persona:
+ messages.insert(0, {
+ "role": "system",
+ "content": persona.get('system_prompt', '')
+ })
+
+ # Add current user question
+ messages.append({"role": "user", "content": question})
+
+ # Make API call with thread metadata
+ try:
+ if langsmith_client:
+ # Use LangSmith thread pattern from documentation
+ response = client.chat.completions.create(
+ model="gpt-4o-mini",
+ messages=messages,
+ max_tokens=800,
+ temperature=0.7,
+ )
+ else:
+ # Standard OpenAI call without LangSmith
+ response = client.chat.completions.create(
+ model="gpt-4o-mini",
+ messages=messages,
+ max_tokens=800,
+ temperature=0.7
+ )
+
+ return response.choices[0].message.content
+ except Exception as e:
+ return f"Error processing message: {str(e)}"
+
+# =============================
+# PASSKEY AUTHENTICATION SYSTEM
+# =============================
+
+# Global variables for authentication state
+PASSKEY = "54321" # Temporary passkey for testing
+AUTHENTICATED_SESSIONS = set() # Track authenticated session IDs
+
+async def check_authentication():
+ """Check if user is authenticated."""
+ session_id = cl.user_session.get("session_id")
+ if not session_id:
+ session_id = str(uuid.uuid4())
+ cl.user_session.set("session_id", session_id)
+
+ # Check if already authenticated
+ if session_id in AUTHENTICATED_SESSIONS:
+ return True
+
+ # Show NAVADA with padlock icon
+ await cl.Message(content="**NAVADA** 🔐").send()
+ return False
+
+# =============================
+# CHAINLIT EVENT HANDLERS
+# =============================
+
+# Action handlers are not needed for this implementation
+# Downloads will be handled through direct file sending
+
+@cl.on_chat_start
+async def start():
+ """
+ Initialize the chat session when a user first connects.
+
+ This function runs once at the start of each new chat session and:
+ 1. Sets up chat settings with About section and quick actions
+ 2. Initializes thread/session tracking for LangSmith
+ 3. Sends a brief welcome message
+
+ The settings panel (burger menu) contains detailed information
+ about NAVADA's capabilities.
+
+ Chainlit decorator: @cl.on_chat_start
+ - Automatically called when a new chat begins
+ - Async function for non-blocking UI operations
+ """
+ # -------------------------
+ # PASSKEY AUTHENTICATION CHECK
+ # -------------------------
+ # Generate a unique session ID for this conversation thread
+ session_id = str(uuid.uuid4())
+ cl.user_session.set("session_id", session_id)
+
+ # Authenticate user with passkey
+ if not await check_authentication():
+ return # Exit if authentication fails
+
+ # -------------------------
+ # INITIALIZE THREAD/SESSION TRACKING
+ # -------------------------
+
+ # Store session info in Chainlit user session for persistence
+ cl.user_session.set("session_id", session_id)
+ cl.user_session.set("conversation_history", [])
+ cl.user_session.set("thread_metadata", {
+ "session_id": session_id,
+ "project_name": LANGSMITH_PROJECT,
+ "start_time": pd.Timestamp.now().isoformat()
+ })
+
+ # Store in global thread sessions mapping
+ THREAD_SESSIONS[session_id] = {
+ "start_time": pd.Timestamp.now(),
+ "messages": [],
+ "persona": "founder" # Default persona
+ }
+
+ # Log thread initialization
+ if langsmith_client:
+ print(f"🧵 Thread initialized: {session_id[:8]}...")
+
+ # -------------------------
+ # SETUP CHAT SETTINGS WITH ABOUT SECTION
+ # -------------------------
+ # Create chat settings with an "About" section accessible via burger menu
+ about_content = (
+ "# 👋 Welcome to NAVADA\n\n"
+ "**NAVADA** (Startup Viability Agent) helps you analyze startup risk, funding, and failure patterns "
+ "with **interactive charts and AI analysis**.\n\n"
+ "## 🎭 Analysis Modes:\n\n"
+ "💼 **Investor Mode** - VC perspective focused on ROI and exit strategies\n"
+ "🚀 **Founder Mode** - Entrepreneur perspective focused on execution and growth\n"
+ "🇬🇧 **UK Economist Mode** - Economic analysis perspective for UK markets\n\n"
+ "## 📊 Advanced Charts:\n\n"
+ "🔹 **Growth Trajectory** - MRR growth patterns vs company age\n"
+ "🔹 **Team Performance** - Team size vs founder experience matrix\n"
+ "🔹 **Market Opportunity** - Market size vs competition analysis\n"
+ "🔹 **Funding Efficiency** - Capital efficiency and ROI analysis\n"
+ "🔹 **Stage Progression** - Funding stages vs failure rates\n"
+ "🔹 **Risk Assessment** - Comprehensive risk radar chart\n"
+ "🔹 **UK Economic Dashboard** - Macroeconomic indicators and regional analysis\n\n"
+ "## 📈 Interactive Tools:\n\n"
+ "🔹 **Interactive Scatter** - Dynamic correlations and filtering\n"
+ "🔹 **Sector Dashboard** - Multi-dimensional sector analysis\n"
+ "🔹 **Interactive Timeline** - Failure patterns over time\n\n"
+ "## 🤖 AI-Powered Features:\n\n"
+ "🔹 **assess idea** - Interactive viability scoring with 24 data points\n"
+ "🔹 **benchmark** - Compare your startup against 24 successful companies\n"
+ "🔹 **portfolio** - Analyze multiple startups with heatmap visualization\n"
+ "🔹 **insights** - AI-powered risk assessment and opportunities\n"
+ "🔹 **questions** - Guided questions based on your current mode\n"
+ "🔹 **macro analysis** - UK macroeconomic impact assessment\n\n"
+ "## 🔍 Internet Search:\n\n"
+ "🔹 **search [query]** - Get up-to-date market intelligence and trends\n"
+ "🔹 **latest news** - Current developments in startup ecosystem\n"
+ "🔹 **current trends** - Market shifts and opportunities\n"
+ "🔹 Auto-triggered for questions about recent events or market updates\n\n"
+ "## 📥 Download & Export:\n\n"
+ "🔹 **Charts** - Download any generated chart as PNG with built-in download buttons\n"
+ "🔹 **export data** / **download csv** - Export complete dataset as CSV\n"
+ "🔹 **export json** / **download json** - Export complete dataset as JSON\n"
+ "🔹 **Data Tables** - Download chart data as CSV alongside visualizations\n\n"
+ "## 💬 Get Started:\n\n"
+ "• Type **'investor mode'**, **'founder mode'**, or **'economist mode'** to set your perspective\n"
+ "• Type **'questions'** to get guided analysis questions\n"
+ "• Ask: \"Which chart should I look at first?\"\n"
+ "• Try: \"Show me funding efficiency\" or \"Risk assessment\"\n\n"
+ "---\n\n"
+ "**Ready to start?** Choose your mode and dive into comprehensive startup analysis!"
+ )
+
+ # Store TTS setting in user session (default off)
+ cl.user_session.set("tts_enabled", False)
+
+ # -------------------------
+ # SEND SIMPLE WELCOME MESSAGE
+ # -------------------------
+ # Send basic welcome message
+ await cl.Message(content="**NAVADA**").send()
+
+ # Try to inject the widget via Chainlit's HTML element
+ try:
+ await cl.Html(content=elevenlabs_widget, display="page").send()
+ except Exception as e:
+ print(f"⚠️ Could not inject ElevenLabs widget: {e}")
+ # Alternative: provide instructions to user
+ await cl.Message(
+ content="**Manual Setup:** To enable voice chat, add this HTML to your page:\n```html\n" + elevenlabs_widget + "\n```"
+ ).send()
+
+
+async def generate_speech(text: str) -> cl.Audio:
+ """Generate speech from text using OpenAI TTS."""
+ try:
+ # Generate speech using OpenAI TTS
+ response = base_client.audio.speech.create(
+ model="tts-1",
+ voice="alloy",
+ input=text[:4000] # Limit text length
+ )
+
+ # Save audio to bytes
+ audio_bytes = response.content
+
+ # Create Chainlit audio element
+ audio_element = cl.Audio(
+ content=audio_bytes,
+ mime="audio/mpeg",
+ display="inline"
+ )
+
+ return audio_element
+ except Exception as e:
+ print(f"⚠️ TTS Error: {e}")
+ return None
+
+# Voice commands are handled in the main message handler
+
+
+# =============================
+# INPUT HELPER FUNCTIONS
+# =============================
+
+async def ask_float(prompt: str, default: float) -> float:
+ """
+ Prompt user for a floating-point number input with a default value.
+
+ This helper function handles the complexity of asking for numeric input
+ in Chainlit, including error handling and default values.
+
+ Args:
+ prompt (str): Question to ask the user
+ default (float): Default value if user presses Enter or input is invalid
+
+ Returns:
+ float: The user's input as a float, or the default value
+
+ Error handling:
+ - Empty input → returns default
+ - Invalid input (non-numeric) → returns default
+ - Timeout (600 seconds) → returns default
+
+ Example:
+ >>> funding = await ask_float("Enter funding amount:", 3.0)
+ # User sees: "Enter funding amount: (default 3.0):"
+ # If user enters "5.5" → returns 5.5
+ # If user presses Enter → returns 3.0
+ """
+ # Send question to user with timeout of 600 seconds (10 minutes)
+ msg = await cl.AskUserMessage(
+ content=f"{prompt} (default {default}):",
+ timeout=600
+ ).send()
+
+ # Try to parse the response as a float
+ try:
+ # Check if message exists and has 'output' field
+ if msg and msg.get('output'):
+ return float(msg['output']) # Convert string to float
+ return float(default) # No input provided, use default
+ except Exception:
+ # If any error occurs (ValueError, AttributeError, etc.), use default
+ return float(default)
+
+
+async def ask_int(prompt: str, default: int, mi: int = 1, ma: int = 5) -> int:
+ """
+ Prompt user for an integer input within a specified range.
+
+ This helper function asks for integer input and enforces min/max bounds.
+ Commonly used for rating scales (1-5).
+
+ Args:
+ prompt (str): Question to ask the user
+ default (int): Default value if input is invalid or empty
+ mi (int): Minimum allowed value (default: 1)
+ ma (int): Maximum allowed value (default: 5)
+
+ Returns:
+ int: The user's input clamped to [mi, ma], or default value
+
+ Behavior:
+ - Input is automatically clamped to the min/max range
+ - Example: If range is 1-5 and user enters 7, returns 5
+ - Example: If range is 1-5 and user enters 0, returns 1
+
+ Example:
+ >>> rating = await ask_int("Rate your moat:", 3, mi=1, ma=5)
+ # User sees: "Rate your moat: [1..5] (default 3):"
+ # If user enters "4" → returns 4
+ # If user enters "10" → returns 5 (clamped to max)
+ # If user enters "0" → returns 1 (clamped to min)
+ """
+ # Send question to user showing the valid range
+ msg = await cl.AskUserMessage(
+ content=f"{prompt} [{mi}..{ma}] (default {default}):",
+ timeout=600
+ ).send()
+
+ # Try to parse the response as an integer and clamp to range
+ try:
+ if msg and msg.get('output'):
+ val = int(msg['output']) # Convert string to int
+ else:
+ val = default # No input provided, use default
+
+ # Clamp value to [mi, ma] range
+ # max(mi, val) ensures val >= mi
+ # min(ma, ...) ensures result <= ma
+ return max(mi, min(ma, val))
+ except Exception:
+ # If any error occurs, use default value
+ return default
+
+# =============================
+# CSV UPLOAD HANDLER
+# =============================
+
+async def handle_csv_upload():
+ """
+ Handle CSV file upload from user to replace the current dataset.
+
+ This function manages the entire CSV upload workflow:
+ 1. Prompts user to upload a CSV file
+ 2. Validates file type and size
+ 3. Parses CSV into DataFrame
+ 4. Calculates derived columns if possible
+ 5. Returns the new DataFrame or None on failure
+
+ Returns:
+ pd.DataFrame or None: New dataset if upload succeeds, None otherwise
+
+ File requirements:
+ - Format: CSV (comma-separated values)
+ - Max size: 5 MB
+ - Recommended columns: Startup, Funding_USD_M, Burn_Rate_Months,
+ Failed, Country, Sector, Founders_Experience_Yrs, Market_Size_Bn
+
+ Automatic processing:
+ - If Funding_USD_M and Burn_Rate_Months exist, Est_Failure_Year
+ is automatically calculated
+ - Missing columns are allowed but may affect functionality
+
+ User feedback:
+ - Confirmation message showing filename and row count
+ - Error message if upload fails or is cancelled
+ """
+ # -------------------------
+ # PROMPT FOR FILE UPLOAD
+ # -------------------------
+ # Request CSV file from user with constraints
+ files = await cl.AskFileMessage(
+ content="Upload a CSV with columns like Startup, Funding_USD_M, Burn_Rate_Months, Failed, Country, Sector",
+ accept=["text/csv"], # Only allow CSV files
+ max_size_mb=5, # Limit file size to 5 MB
+ timeout=600 # 10 minute timeout
+ ).send()
+
+ # -------------------------
+ # HANDLE CANCELLATION
+ # -------------------------
+ # If user cancels or no file is provided
+ if not files:
+ await cl.Message(content="No file received. Keeping current dataset.").send()
+ return None # Return None to indicate no change
+
+ # -------------------------
+ # PARSE CSV FILE
+ # -------------------------
+ # Extract first file from the list (usually only one file uploaded)
+ f = files[0]
+
+ # Read CSV from bytes into pandas DataFrame
+ # f.content is bytes, so wrap in BytesIO for pandas
+ new_df = pd.read_csv(io.BytesIO(f.content))
+
+ # -------------------------
+ # CALCULATE DERIVED COLUMNS
+ # -------------------------
+ # Check if required columns exist for failure year calculation
+ # Using set intersection to check if both columns are present
+ if {"Funding_USD_M", "Burn_Rate_Months"}.issubset(new_df.columns):
+ # Calculate estimated failure year using same formula as original data
+ new_df["Est_Failure_Year"] = FUNDING_YEAR + (
+ new_df["Funding_USD_M"] / new_df["Burn_Rate_Months"]
+ )
+
+ # -------------------------
+ # CONFIRM UPLOAD SUCCESS
+ # -------------------------
+ # Send confirmation message showing filename and row count
+ await cl.Message(
+ content=f"Loaded `{f.name}` with {len(new_df)} rows."
+ ).send()
+
+ # Return the new DataFrame to replace the global dataset
+ return new_df
+
+# =============================
+# MAIN MESSAGE HANDLER
+# =============================
+
+@cl.on_message
+@traceable(
+ name="NAVADA Message Handler",
+ run_type="chain",
+ tags=["navada", "message-handler", "conversation-entry"],
+ metadata={"handler_type": "chainlit_message", "app_version": "2.0.0"}
+)
+async def main(message: cl.Message):
+ """
+ Process every user message and route to appropriate handler.
+
+ This is the main message processing function that runs whenever a user
+ sends a message. It implements a routing pattern to handle different
+ types of requests:
+
+ 1. Chart generation commands (timeline, funding vs burn)
+ 2. CSV upload requests
+ 3. Viability assessment requests
+ 4. General Q&A using GPT-4
+
+ Message routing logic:
+ ----------------------
+ - Show thinking indicator for user feedback
+ - Lowercase the input for case-insensitive matching
+ - Check for specific keywords to determine intent
+ - Execute corresponding handler and return
+ - If no specific pattern matches, fallback to AI chat
+
+ Args:
+ message (cl.Message): Chainlit message object containing:
+ - content: User's text input
+ - author: User identifier
+ - Other metadata
+
+ Global variables:
+ df (pd.DataFrame): Can be modified by CSV upload handler
+
+ Chainlit decorator: @cl.on_message
+ - Automatically called for every user message
+ - Async function for non-blocking operations
+ """
+ global df # Allow modification of global dataset
+
+ # -------------------------
+ # PASSKEY AUTHENTICATION CHECK
+ # -------------------------
+ session_id = cl.user_session.get("session_id", "unknown")
+
+ # Handle passkey authentication
+ user_input_raw = message.content.strip()
+
+ # Check if user is trying to authenticate with the passkey
+ if user_input_raw == PASSKEY and session_id not in AUTHENTICATED_SESSIONS:
+ # Authenticate the user
+ AUTHENTICATED_SESSIONS.add(session_id)
+ await cl.Message(content="✅ **Access Granted!**").send()
+ return
+
+ # Check if user is authenticated
+ if session_id not in AUTHENTICATED_SESSIONS:
+ await cl.Message(content="🔐").send()
+ return
+
+ # Set authenticated user info
+ auth_status = {
+ "authenticated": True,
+ "username": "authenticated_user",
+ "user_id": f"user_{session_id[:8]}",
+ "email": "user@navada.ai",
+ "subscription_tier": "free"
+ }
+
+ # Parse user input (already extracted above in authentication check)
+ user_input = user_input_raw.lower()
+
+ # Handle authentication commands (commented out for testing)
+ # if user_input.startswith("login "):
+ # await handle_login_command(user_input_raw)
+ # return
+
+ # if user_input.startswith("register "):
+ # await handle_register_command(user_input_raw)
+ # return
+
+ # If not authenticated and not using auth commands, show login form (disabled)
+ # if not auth_status["authenticated"]:
+ # await show_login_form()
+ # return
+
+ # -------------------------
+ # THINKING INDICATOR & TIMESTAMP
+ # -------------------------
+ # Show thinking indicator to provide immediate user feedback
+ import datetime
+ timestamp = datetime.datetime.now().strftime("%H:%M:%S")
+
+ # Show brief thinking message (will be removed after processing)
+ thinking_msg = cl.Message(content=f"🤔 Thinking... ({timestamp})")
+ await thinking_msg.send()
+
+ # -------------------------
+ # NORMALIZE INPUT
+ # -------------------------
+ # Convert to lowercase and strip whitespace for consistent matching
+ user_input = message.content.strip().lower()
+
+ # =============================
+ # ROUTE 0: VOICE COMMANDS
+ # =============================
+ if user_input in ["voice on", "voice enable", "tts on", "speech on", "audio on"]:
+ cl.user_session.set("tts_enabled", True)
+ await cl.Message(content="🔊 **Voice enabled!** AI responses will now include audio.").send()
+ return
+
+ if user_input in ["voice off", "voice disable", "tts off", "speech off", "audio off"]:
+ cl.user_session.set("tts_enabled", False)
+ await cl.Message(content="🔇 **Voice disabled.** AI responses will be text only.").send()
+ return
+
+ # =============================
+ # ROUTE 0.5: MATHEMATICAL ANALYSIS MODE
+ # =============================
+ if "math mode" in user_input or "analysis mode" in user_input:
+ await thinking_msg.remove()
+
+ # Create math-enabled analysis environment
+ math_context = {
+ 'df': df,
+ 'np': np,
+ 'stats': stats,
+ 'current_startup': cl.user_session.get('selected_startup')
+ }
+
+ await cl.Message(
+ content="## 🧮 Mathematical Analysis Mode\n\n"
+ "You can now perform complex calculations:\n\n"
+ "**Examples:**\n"
+ "• `calculate IRR for 5x return in 7 years`\n"
+ "• `project revenue with 20% monthly growth`\n"
+ "• `simulate 1000 scenarios for exit`\n"
+ "• `optimize burn rate for 18 month runway`\n\n"
+ "Type your calculation or 'exit math mode' to return."
+ ).send()
+
+ cl.user_session.set("math_mode", True)
+ return
+
+ # Check if user is in math mode
+ if cl.user_session.get("math_mode", False):
+ await thinking_msg.remove()
+
+ if user_input in ["exit math mode", "exit", "return", "back"]:
+ cl.user_session.set("math_mode", False)
+ await cl.Message(content="🔄 **Exited math mode.** Back to regular analysis.").send()
+ return
+
+ # Process math command
+ math_context = {
+ 'df': df,
+ 'np': np,
+ 'stats': stats,
+ 'current_startup': cl.user_session.get('selected_startup')
+ }
+
+ result = await process_math_command(user_input, math_context)
+ await cl.Message(content=result).send()
+ return
+
+ # =============================
+ # ROUTE 0.7: IMAGE GENERATION
+ # =============================
+ if detect_image_request(user_input):
+ await thinking_msg.remove()
+
+ # Extract the image prompt from user input
+ image_prompt = extract_image_prompt(user_input_raw)
+
+ # Show image generation message
+ generation_msg = cl.Message(content=f"🎨 **Generating image:** {image_prompt}\n\nThis may take a few moments...")
+ await generation_msg.send()
+
+ # Generate the image
+ result = await generate_image(image_prompt)
+
+ if result["success"]:
+ # Create image element from URL
+ image_element = cl.Image(
+ url=result["image_url"],
+ name=f"generated_image_{timestamp.replace(':', '')}.png",
+ display="inline"
+ )
+
+ # Update message with success
+ success_content = f"✅ **Image generated successfully!**\n\n"
+ success_content += f"**Original prompt:** {result['original_prompt']}\n"
+ if result.get('revised_prompt') and result['revised_prompt'] != result['original_prompt']:
+ success_content += f"**DALL-E revised prompt:** {result['revised_prompt']}\n"
+ success_content += f"**Size:** {result['size']} | **Quality:** {result['quality']}"
+
+ await cl.Message(content=success_content, elements=[image_element]).send()
+
+ # Log for conversation tracking
+ auth_manager.save_conversation(
+ user_id=auth_status.get("user_id", "test_123"),
+ chainlit_session_id=cl.user_session.get("session_id", "unknown"),
+ role="user",
+ content=f"Image generation request: {image_prompt}",
+ metadata={"action_type": "image_generation", "prompt": image_prompt}
+ )
+
+ auth_manager.save_conversation(
+ user_id=auth_status.get("user_id", "test_123"),
+ chainlit_session_id=cl.user_session.get("session_id", "unknown"),
+ role="assistant",
+ content=f"Generated image: {result['image_url']}",
+ metadata={"action_type": "image_generated", "image_url": result["image_url"]}
+ )
+
+ else:
+ # Handle error
+ error_content = f"❌ **Image generation failed**\n\n"
+ error_content += f"**Error:** {result['error']}\n"
+ error_content += f"**Prompt:** {result['original_prompt']}\n\n"
+ error_content += "Please try again with a different prompt or check your API configuration."
+
+ await cl.Message(content=error_content).send()
+
+ return
+
+ # =============================
+ # ROUTE 0.8: SWOT ANALYSIS
+ # =============================
+ if "swot" in user_input or ("swot mode" in user_input) or ("analyze swot" in user_input):
+ await thinking_msg.remove()
+
+ # Show SWOT analysis message
+ analysis_msg = cl.Message(content="📊 **Generating SWOT Analysis...**\n\nAnalyzing strengths, weaknesses, opportunities, and threats...")
+ await analysis_msg.send()
+
+ # Gather startup context from user session or create generic context
+ startup_context = {
+ "persona": cl.user_session.get("persona", "founder"),
+ "selected_startup": cl.user_session.get("selected_startup"),
+ "session_id": cl.user_session.get("session_id", "unknown")
+ }
+
+ # Add any available startup data from the dataframe
+ startup_data = {}
+ if df is not None and not df.empty:
+ # Get a sample of startups for context
+ sample_data = df.head(3).to_dict('records')
+ startup_data["sample_startups"] = sample_data
+ startup_data["total_startups"] = len(df)
+ startup_data["sectors"] = df['Sector'].value_counts().head(5).to_dict() if 'Sector' in df.columns else {}
+
+ # Generate SWOT analysis using AI
+ swot_analysis = await generate_swot_analysis(startup_data, user_input_raw)
+
+ # Create SWOT visualization
+ try:
+ swot_chart_png = plot_swot_matrix(swot_analysis)
+
+ # Create SWOT DataFrame for download
+ swot_df = swot_analysis.to_dataframe()
+
+ # Send the comprehensive SWOT analysis
+ swot_summary = swot_analysis.summary()
+
+ await cl.Message(content=swot_summary).send()
+
+ # Send the visual SWOT matrix
+ await send_chart_with_download(
+ png_data=swot_chart_png,
+ filename=f"swot_analysis_{timestamp.replace(':', '')}.png",
+ description="📊 **SWOT Analysis Matrix** - Visual representation of your startup analysis",
+ csv_data=swot_df
+ )
+
+ # Log for conversation tracking
+ auth_manager.save_conversation(
+ user_id=auth_status.get("user_id", "test_123"),
+ chainlit_session_id=cl.user_session.get("session_id", "unknown"),
+ role="user",
+ content=f"SWOT analysis request: {user_input_raw}",
+ metadata={"action_type": "swot_analysis", "context": startup_context}
+ )
+
+ auth_manager.save_conversation(
+ user_id=auth_status.get("user_id", "test_123"),
+ chainlit_session_id=cl.user_session.get("session_id", "unknown"),
+ role="assistant",
+ content="Generated comprehensive SWOT analysis with matrix visualization",
+ metadata={
+ "action_type": "swot_generated",
+ "strengths_count": len(swot_analysis.strengths),
+ "weaknesses_count": len(swot_analysis.weaknesses),
+ "opportunities_count": len(swot_analysis.opportunities),
+ "threats_count": len(swot_analysis.threats)
+ }
+ )
+
+ except Exception as e:
+ # Fallback to text-only SWOT if visualization fails
+ error_msg = f"⚠️ **SWOT Analysis Generated** (Chart error: {str(e)})\n\n"
+ error_msg += swot_analysis.summary()
+ await cl.Message(content=error_msg).send()
+
+ return
+
+ # =============================
+ # ROUTE 1: FAILURE TIMELINE CHART
+ # =============================
+ if "timeline" in user_input:
+ # Show loading message while generating chart
+ msg = cl.Message(content="📊 Generating failure timeline chart...")
+ await msg.send()
+
+ # Generate the chart PNG
+ png = plot_failure_timeline(df)
+
+ # Remove loading message
+ await msg.remove()
+
+ # Send chart with download options
+ # Select available columns for CSV export
+ preferred_cols = ['Company', 'Total Funding', 'Monthly Burn Rate', 'Estimated Runway (Months)']
+ available_cols = [col for col in preferred_cols if col in df.columns]
+
+ # If no preferred columns exist, use first 4 columns or all if less than 4
+ if not available_cols:
+ available_cols = df.columns.tolist()[:4]
+
+ await send_chart_with_download(
+ png_data=png,
+ filename="failure_timeline.png",
+ description=(
+ "### 📈 Estimated Failure Timeline\n\n"
+ "This chart shows when each startup is projected to fail "
+ "based on their funding and burn rate."
+ ),
+ csv_data=df[available_cols] if available_cols else df
+ )
+
+ return # Exit handler, don't process further
+
+ # =============================
+ # ROUTE 2: FUNDING VS BURN CHART
+ # =============================
+ # Check for multiple possible phrasings
+ if "funding vs burn" in user_input or (
+ "funding" in user_input and "burn" in user_input and "vs" in user_input
+ ):
+ # Show loading message
+ msg = cl.Message(content="📊 Generating funding vs burn chart...")
+ await msg.send()
+
+ # Generate scatter plot
+ png = plot_funding_vs_burn(df)
+
+ # Remove loading message
+ await msg.remove()
+
+ # Send descriptive text with legend explanation
+ text_msg = cl.Message(
+ content=(
+ "### 💰 Funding vs Burn Rate Analysis\n\n"
+ "**Green** = Successful | **Red** = Failed\n"
+ "Each shape represents a different sector."
+ )
+ )
+ await text_msg.send()
+
+ # Attach chart image
+ image = cl.Image(content=png, name="funding_vs_burn.png", display="inline")
+ await image.send(for_id=text_msg.id)
+
+ return # Exit handler
+
+ # =============================
+ # ROUTE 3: ADDITIONAL CHART COMMANDS
+ # =============================
+ # Sector comparison chart
+ if "sector" in user_input and ("compare" in user_input or "comparison" in user_input or "chart" in user_input):
+ msg = cl.Message(content="📊 Generating sector comparison chart...")
+ await msg.send()
+ png = plot_sector_comparison(df)
+ await msg.remove()
+ text_msg = cl.Message(content="### 🏭 Sector Comparison\n\nAverage funding by industry sector.")
+ await text_msg.send()
+ image = cl.Image(content=png, name="sector_comparison.png", display="inline")
+ await image.send(for_id=text_msg.id)
+ return
+
+ # Failure rate by country
+ if ("failure" in user_input or "fail" in user_input) and "country" in user_input:
+ msg = cl.Message(content="📊 Generating failure rate by country chart...")
+ await msg.send()
+ png = plot_failure_rate_by_country(df)
+ await msg.remove()
+ text_msg = cl.Message(content="### 🌍 Failure Rate by Country\n\nPercentage of failed startups per country.")
+ await text_msg.send()
+ image = cl.Image(content=png, name="failure_rate_country.png", display="inline")
+ await image.send(for_id=text_msg.id)
+ return
+
+ # Experience vs success
+ if "experience" in user_input and ("success" in user_input or "funding" in user_input or "chart" in user_input):
+ msg = cl.Message(content="📊 Generating experience vs success chart...")
+ await msg.send()
+ png = plot_experience_vs_success(df)
+ await msg.remove()
+ text_msg = cl.Message(content="### 👥 Experience vs Success\n\nRelationship between founder experience, funding, and outcome.")
+ await text_msg.send()
+ image = cl.Image(content=png, name="experience_success.png", display="inline")
+ await image.send(for_id=text_msg.id)
+ return
+
+ # =============================
+ # ROUTE 4: CSV UPLOAD
+ # =============================
+ if "upload csv" in user_input or "load csv" in user_input:
+ # Call upload handler and get new DataFrame (or None)
+ new_df = await handle_csv_upload()
+
+ # If upload succeeded, replace global dataset
+ if new_df is not None:
+ df = new_df
+
+ return # Exit handler
+
+ # =============================
+ # ROUTE 4: VIABILITY ASSESSMENT
+ # =============================
+ # Check for various phrasings of assessment request
+ if "assess idea" in user_input or "new idea" in user_input or "viability" in user_input:
+ # -------------------------
+ # INTRODUCTION
+ # -------------------------
+ # Explain the assessment process
+ await cl.Message(
+ content=(
+ "## 🎯 Startup Viability Assessment\n\n"
+ "Great! Let's evaluate your startup idea. "
+ "I'll ask you **9 quick questions** to calculate a "
+ "comprehensive viability score.\n\n"
+ "*Press Enter to use default values, or type your answer.*"
+ )
+ ).send()
+
+ # -------------------------
+ # COLLECT INPUT DATA
+ # -------------------------
+ # Ask 9 questions to gather all required features
+ # Each question has a sensible default value
+
+ funding = await ask_float("Funding (USD Millions)", 3.0)
+ burn = await ask_float(
+ "Burn rate (months of runway if spending 1M/year ≈ 83k/month)", 9.0
+ )
+ expy = await ask_float("Team experience (years, average)", 3.0)
+ market = await ask_float("Market size (Billions USD)", 15.0)
+ bm = await ask_int("Business model strength (1=weak..5=excellent)", 3)
+ moat = await ask_int("Moat/defensibility (1..5)", 3)
+ mrrk = await ask_float("Current MRR (in $k)", 10.0)
+ growth = await ask_float("Monthly growth rate (%)", 6.0)
+ comp = await ask_int("Competition intensity (1=low..5=very high)", 3)
+
+ # -------------------------
+ # PACKAGE FEATURES
+ # -------------------------
+ # Create dictionary matching the viability_score function signature
+ feats = {
+ "funding_usd_m": funding,
+ "burn_rate_months": burn,
+ "team_experience_years": expy,
+ "market_size_bn": market,
+ "business_model_strength_1_5": bm,
+ "moat_1_5": moat,
+ "traction_mrr_k": mrrk,
+ "growth_rate_pct": growth,
+ "competition_intensity_1_5": comp
+ }
+
+ # -------------------------
+ # CALCULATE SCORE
+ # -------------------------
+ # Call viability scoring model
+ result = viability_score(feats)
+
+ # -------------------------
+ # DISPLAY GAUGE CHART
+ # -------------------------
+ # Generate and display visual score gauge
+ gauge_png = plot_viability_gauge(result["score"])
+ gauge_msg = cl.Message(content="\n## 📊 Your Viability Score")
+ await gauge_msg.send()
+
+ # Attach gauge image to message
+ gauge_image = cl.Image(
+ content=gauge_png, name="viability_score.png", display="inline"
+ )
+ await gauge_image.send(for_id=gauge_msg.id)
+
+ # -------------------------
+ # DISPLAY DETAILED RESULTS
+ # -------------------------
+ # Interpret score with color-coded assessment
+ score_interpretation = (
+ "🟢 Strong" if result['score'] >= 60
+ else "🟡 Moderate" if result['score'] >= 40
+ else "🔴 Weak"
+ )
+
+ # Format comprehensive summary with all metrics
+ summary = (
+ f"### Overall Assessment: {score_interpretation}\n\n"
+ f"**Final Score:** {result['score']:.1f}/100\n\n"
+ f"#### 📈 Key Metrics:\n"
+ f"• **Estimated Runway:** ~{result['survival_months']:.1f} months\n"
+ f"• **Projected Failure Year:** {result['est_failure_year']:.2f} "
+ f"(funded {FUNDING_YEAR})\n\n"
+ f"#### 🔍 Score Breakdown (0-1 scale):\n"
+ f"• Runway: {result['components']['runway']:.2f} | "
+ f"Experience: {result['components']['experience']:.2f}\n"
+ f"• Market: {result['components']['market']:.2f} | "
+ f"Business Model: {result['components']['business_model']:.2f}\n"
+ f"• Moat: {result['components']['moat']:.2f} | "
+ f"Traction: {result['components']['traction']:.2f}\n"
+ f"• Growth: {result['components']['growth']:.2f} | "
+ f"Competition: {result['components']['competition']:.2f}\n"
+ )
+ await cl.Message(content=summary).send()
+
+ # -------------------------
+ # DISPLAY RECOMMENDATIONS
+ # -------------------------
+ # Show actionable tips based on weaknesses
+ if result["tips"]:
+ # Format tips as bullet list
+ tips_text = (
+ "### 💡 Recommended Actions\n\n" +
+ "\n".join([f"• {tip}" for tip in result["tips"]])
+ )
+ await cl.Message(content=tips_text).send()
+ else:
+ # If no tips, startup looks strong
+ await cl.Message(
+ content=(
+ "### ✅ Looking Good!\n\n"
+ "• Keep executing—your foundations look solid.\n"
+ "• Focus on consistent growth and customer acquisition."
+ )
+ ).send()
+
+ return # Exit handler
+
+ # =============================
+ # ROUTE 6: BENCHMARK IDEA
+ # =============================
+ if "benchmark idea" in user_input or "benchmark my idea" in user_input or "compare my idea" in user_input:
+ await cl.Message(content="## 🎯 Benchmark Your Startup Idea\n\nI'll compare your metrics against our dataset to see how you stack up!").send()
+
+ # Collect startup metrics from user
+ funding = await ask_float("Your funding amount (USD millions)", 3.0)
+ burn = await ask_float("Your burn rate (months)", 9.0)
+ expy = await ask_float("Your team's average experience (years)", 3.0)
+ market = await ask_float("Your target market size (billions USD)", 10.0)
+
+ # Package features for benchmarking
+ features = {
+ "funding_usd_m": funding,
+ "burn_rate_months": burn,
+ "team_experience_years": expy,
+ "market_size_bn": market
+ }
+
+ # Run benchmarking analysis
+ benchmark_results = benchmark_founder_idea(features, df)
+
+ # Display benchmarking results
+ results_text = f"## 📊 Benchmarking Results\n\n"
+ results_text += f"**Risk Level:** {benchmark_results['risk_level']}\n\n"
+
+ results_text += "### 📈 How You Compare:\n\n"
+
+ # Display key insights
+ for insight in benchmark_results['insights']:
+ results_text += f"• {insight}\n"
+
+ results_text += "\n### 📊 Detailed Metrics:\n\n"
+
+ # Funding percentile
+ funding_data = benchmark_results['metrics']['funding']
+ results_text += f"**Funding:** ${funding_data['value']}M (percentile: {funding_data['percentile']:.0f}%)\n"
+ results_text += f"• {funding_data['vs_median']:.1f}× the median startup\n\n"
+
+ # Burn rate percentile
+ burn_data = benchmark_results['metrics']['burn_rate']
+ results_text += f"**Burn Rate:** {burn_data['value']} months (percentile: {burn_data['percentile']:.0f}%)\n"
+ results_text += f"• {burn_data['vs_median']:.1f}× the median startup\n\n"
+
+ # Experience percentile
+ exp_data = benchmark_results['metrics']['experience']
+ results_text += f"**Experience:** {exp_data['value']} years (percentile: {exp_data['percentile']:.0f}%)\n"
+ results_text += f"• {exp_data['vs_median']:.1f}× the median startup\n\n"
+
+ # Market size percentile
+ market_data = benchmark_results['metrics']['market_size']
+ results_text += f"**Market Size:** ${market_data['value']}B (percentile: {market_data['percentile']:.0f}%)\n"
+ results_text += f"• {market_data['vs_median']:.1f}× the median startup\n\n"
+
+ # Recommendations
+ if benchmark_results['recommendations']:
+ results_text += "### 💡 Recommendations:\n\n"
+ for rec in benchmark_results['recommendations']:
+ results_text += f"• {rec}\n"
+
+ await cl.Message(content=results_text).send()
+
+ return # Exit handler
+
+ # =============================
+ # ROUTE 7: PORTFOLIO MODE
+ # =============================
+ if "portfolio" in user_input and ("mode" in user_input or "analysis" in user_input or "analyze" in user_input or user_input.strip() == "portfolio"):
+ # Remove thinking indicator
+ await thinking_msg.remove()
+
+ await cl.Message(content="## 📊 Portfolio Analysis Mode\n\nI'll create a comprehensive heatmap of all startups in your dataset!").send()
+
+ # Show loading message
+ msg = cl.Message(content="🔥 Generating portfolio heatmap... Calculating viability scores for all startups.")
+ await msg.send()
+
+ try:
+ # Generate portfolio heatmap
+ heatmap_bytes = create_portfolio_heatmap(df)
+
+ # Remove loading message
+ await msg.remove()
+
+ # Send heatmap description
+ desc_msg = cl.Message(
+ content="### 🔥 Portfolio Viability Heatmap\n\n"
+ f"**Analysis of {len(df)} startups across 6 key metrics:**\n"
+ "• **Overall Score** - Combined viability (0-100)\n"
+ "• **Runway** - Financial sustainability\n"
+ "• **Experience** - Team expertise\n"
+ "• **Market** - Market opportunity\n"
+ "• **Traction** - Current momentum\n"
+ "• **Growth** - Growth trajectory\n\n"
+ "**Color Guide:** 🔴 Poor (0-40) | 🟡 Moderate (40-60) | 🟢 Strong (60-100)"
+ )
+ await desc_msg.send()
+
+ # Attach heatmap image
+ heatmap_image = cl.Image(
+ content=heatmap_bytes, name="portfolio_heatmap.png", display="inline"
+ )
+ await heatmap_image.send(for_id=desc_msg.id)
+
+ # Calculate and show investment recommendations
+ portfolio_scores = []
+ for _, row in df.iterrows():
+ features = {
+ 'funding_usd_m': row['Funding_USD_M'],
+ 'burn_rate_months': row['Burn_Rate_Months'],
+ 'team_experience_years': row['Founders_Experience_Yrs'],
+ 'market_size_bn': row['Market_Size_Bn'],
+ 'business_model_strength_1_5': 3,
+ 'moat_1_5': 3,
+ 'traction_mrr_k': 10,
+ 'growth_rate_pct': 5,
+ 'competition_intensity_1_5': 3
+ }
+ score = viability_score(features)['score']
+ portfolio_scores.append((row['Startup'], score))
+
+ # Sort by score
+ portfolio_scores.sort(key=lambda x: x[1], reverse=True)
+
+ # Create recommendations
+ reco_text = "### 🎯 Investment Recommendations:\n\n"
+ reco_text += "**🟢 INVEST (Score ≥ 60):**\n"
+ invest_list = [f"• {name} ({score:.1f})" for name, score in portfolio_scores if score >= 60]
+ if invest_list:
+ reco_text += "\n".join(invest_list) + "\n\n"
+ else:
+ reco_text += "• None in this category\n\n"
+
+ reco_text += "**🟡 MONITOR (Score 40-59):**\n"
+ monitor_list = [f"• {name} ({score:.1f})" for name, score in portfolio_scores if 40 <= score < 60]
+ if monitor_list:
+ reco_text += "\n".join(monitor_list) + "\n\n"
+ else:
+ reco_text += "• None in this category\n\n"
+
+ reco_text += "**🔴 PASS (Score < 40):**\n"
+ pass_list = [f"• {name} ({score:.1f})" for name, score in portfolio_scores if score < 40]
+ if pass_list:
+ reco_text += "\n".join(pass_list) + "\n\n"
+ else:
+ reco_text += "• None in this category\n\n"
+
+ # Portfolio statistics
+ avg_score = sum(score for _, score in portfolio_scores) / len(portfolio_scores)
+ high_performers = len([s for _, s in portfolio_scores if s >= 60])
+
+ reco_text += f"**📊 Portfolio Stats:**\n"
+ reco_text += f"• Average Score: {avg_score:.1f}/100\n"
+ reco_text += f"• High Performers: {high_performers}/{len(df)} ({high_performers/len(df)*100:.1f}%)\n"
+ reco_text += f"• Success Rate: {((df['Failed'] == 0).sum()/len(df)*100):.1f}%"
+
+ await cl.Message(content=reco_text).send()
+
+ except Exception as e:
+ await msg.remove()
+ await cl.Message(
+ content=f"❌ Error generating portfolio analysis: {str(e)}\n\n"
+ f"Please try again or contact support if the issue persists."
+ ).send()
+
+ return # Exit handler
+
+ # =============================
+ # ROUTE: UK MACRO ANALYSIS
+ # =============================
+ if "macro analysis" in user_input or "uk analysis" in user_input:
+ await thinking_msg.remove()
+
+ analyzer = UKEconomicsAnalyzer()
+
+ await cl.Message(content="## 🇬🇧 UK Macroeconomic Impact Analysis\n\nI'll analyze how UK economic conditions affect your startup.").send()
+
+ # Collect startup data
+ funding = await ask_float("Funding (£ millions)", 4.0)
+ location = await cl.AskUserMessage(content="Location (London/Manchester/Edinburgh/Birmingham/Bristol/Cambridge):").send()
+ sector = await cl.AskUserMessage(content="Sector (FinTech/HealthTech/GreenTech/EdTech/RetailTech):").send()
+ team_size = await ask_int("Team size", 10, mi=1, ma=500)
+
+ startup_data = {
+ 'funding_usd_m': funding * 1.27, # Convert to USD
+ 'location': location.get('output', 'London'),
+ 'sector': sector.get('output', 'Tech'),
+ 'team_size': team_size,
+ 'burn_rate_months': 12, # Default
+ 'debt_ratio': 0.3, # Default 30% debt
+ 'is_b2b': True
+ }
+
+ # Run analysis
+ macro_impacts = analyzer.analyze_macro_impact(startup_data)
+
+ # Generate chart
+ chart = plot_uk_economic_indicators(df)
+
+ # Display results
+ content = f"""
+## 🇬🇧 UK Economic Impact Assessment
+
+### Interest Rate Environment
+- **Cost of Capital:** {macro_impacts['interest_rate_impact']['cost_of_capital']:.1f}%
+- **Annual Interest Cost:** £{macro_impacts['interest_rate_impact']['annual_interest_cost']*0.79:.0f}k
+- **Impact Level:** {macro_impacts['interest_rate_impact']['impact_level']}
+- **Recommendation:** {macro_impacts['interest_rate_impact']['recommendation']}
+
+### Inflation Impact
+- **Current CPI:** {macro_impacts['inflation_impact']['current_inflation']}%
+- **Annual Cost Increase:** £{macro_impacts['inflation_impact']['real_cost_increase_annual']*0.79:.0f}
+- **Pricing Power:** {macro_impacts['inflation_impact']['pricing_power']}
+- **Wage Pressure:** {macro_impacts['inflation_impact']['wage_pressure']}
+
+### Labour Market Conditions
+- **UK Unemployment:** {macro_impacts['labour_market_impact']['unemployment_rate']}%
+- **Labour Cost Index:** {macro_impacts['labour_market_impact']['labour_cost_index']:.0f}
+- **Talent Availability:** {macro_impacts['labour_market_impact']['talent_availability']}
+- **Wage Growth Pressure:** {macro_impacts['labour_market_impact']['wage_growth_pressure']}
+
+### Regional Factors ({startup_data['location']})
+- **Regional Growth:** {macro_impacts['regional_factors']['regional_growth']}%
+- **Cost Index:** {macro_impacts['regional_factors']['cost_index']} (100 = UK average)
+- **Talent Pool:** {macro_impacts['regional_factors']['talent_pool']}
+- **Competitiveness:** {macro_impacts['regional_factors']['competitiveness']}
+
+### Sector Outlook ({startup_data['sector']})
+- **Expected Growth:** {macro_impacts['sector_outlook']['growth']}%
+- **Regulatory Burden:** {macro_impacts['sector_outlook']['regulation']}
+- **Key Opportunity:** {macro_impacts['sector_outlook']['opportunity']}
+
+### Strategic Recommendations:
+1. {'Consider debt financing while rates stabilize' if macro_impacts['interest_rate_impact']['cost_of_capital'] < 10 else 'Focus on equity financing'}
+2. {'Build inflation adjustments into contracts' if macro_impacts['inflation_impact']['current_inflation'] > 2 else 'Lock in current pricing'}
+3. {'Invest in talent retention' if macro_impacts['labour_market_impact']['talent_availability'] == 'Tight' else 'Opportunity to hire quality talent'}
+"""
+
+ text_msg = await cl.Message(content=content).send()
+
+ # Send chart
+ image = cl.Image(content=chart, name="uk_economic_dashboard.png", display="inline")
+ await image.send(for_id=text_msg.id)
+
+ return
+
+ # =============================
+ # ROUTE: INTERACTIVE DASHBOARD
+ # =============================
+ if "dashboard" in user_input or "interactive dashboard" in user_input:
+ await thinking_msg.remove()
+
+ dashboard = InteractiveDashboard(df)
+
+ await cl.Message(content="## 📊 Interactive Dashboard Mode\n\nGenerating real-time analytics dashboard with interactive features...").send()
+
+ # Create executive summary
+ summary_text = create_dashboard_summary(dashboard)
+ summary_msg = await cl.Message(content=summary_text).send()
+
+ # Generate and send real-time metrics dashboard
+ dashboard_chart = dashboard.create_real_time_metrics_dashboard()
+ dashboard_image = cl.Image(content=dashboard_chart, name="interactive_dashboard.png", display="inline")
+ await dashboard_image.send(for_id=summary_msg.id)
+
+ await cl.Message(
+ content="## 🎯 Dashboard Commands Available:\n\n"
+ "• **'interactive scatter'** - Dynamic scatter plot with hover details\n"
+ "• **'correlation heatmap'** - Multi-dimensional relationship analysis\n"
+ "• **'filter dashboard'** - Apply filters (sector, country, funding)\n"
+ "• **'compare startups'** - Side-by-side startup analysis\n"
+ "• **'export dashboard'** - Generate professional PDF report\n\n"
+ "🔍 **Pro Tip:** Use filters to drill down into specific segments!"
+ ).send()
+
+ return
+
+ # =============================
+ # ROUTE: INTERACTIVE SCATTER PLOT
+ # =============================
+ if "interactive scatter" in user_input:
+ await thinking_msg.remove()
+
+ dashboard = InteractiveDashboard(df)
+
+ await cl.Message(content="## 📊 Interactive Scatter Plot Analysis\n\nGenerating dynamic visualization with hover details and selection capabilities...").send()
+
+ # Get axis preferences from user input
+ x_axis = 'Funding_USD_M'
+ y_axis = 'Burn_Rate_Months'
+
+ if 'funding' in user_input and 'experience' in user_input:
+ x_axis, y_axis = 'Funding_USD_M', 'Founders_Experience_Yrs'
+ elif 'market' in user_input and 'funding' in user_input:
+ x_axis, y_axis = 'Market_Size_Bn', 'Funding_USD_M'
+ elif 'mrr' in user_input and 'growth' in user_input:
+ x_axis, y_axis = 'MRR_K', 'Monthly_Growth_Rate'
+
+ # Generate interactive scatter plot
+ scatter_chart = dashboard.create_interactive_scatter(x_axis, y_axis)
+
+ content = f"""
+## 📊 Interactive Scatter Analysis: {x_axis.replace('_', ' ')} vs {y_axis.replace('_', ' ')}
+
+### 🎯 **Key Insights:**
+- **Green dots** = Successful startups
+- **Red dots** = Failed startups
+- **Dot size** = Market size (larger = bigger market)
+
+### 🔍 **Interactive Features:**
+- **Hover** over points for detailed information
+- **Click** points to select for comparison
+- **Drag** to zoom into specific areas
+- **Double-click** to reset zoom
+
+### 📈 **Analysis Options:**
+Try these variations:
+• "interactive scatter funding vs experience"
+• "interactive scatter market vs funding"
+• "interactive scatter mrr vs growth"
+"""
+
+ text_msg = await cl.Message(content=content).send()
+ scatter_image = cl.Image(content=scatter_chart, name="interactive_scatter.png", display="inline")
+ await scatter_image.send(for_id=text_msg.id)
+
+ return
+
+ # =============================
+ # ROUTE: CORRELATION HEATMAP
+ # =============================
+ if "correlation heatmap" in user_input or "heatmap" in user_input:
+ await thinking_msg.remove()
+
+ dashboard = InteractiveDashboard(df)
+
+ await cl.Message(content="## 📊 Multi-Dimensional Correlation Analysis\n\nGenerating interactive correlation heatmap...").send()
+
+ # Generate correlation heatmap
+ heatmap_chart = dashboard.create_multi_dimensional_heatmap()
+
+ content = """
+## 📊 Interactive Correlation Heatmap
+
+### 🎯 **How to Read:**
+- **Blue** = Positive correlation (variables move together)
+- **Red** = Negative correlation (variables move opposite)
+- **White** = No correlation
+- **Numbers** = Correlation strength (-1 to +1)
+
+### 🔍 **Key Relationships to Explore:**
+- Funding vs Market Size
+- Experience vs Success Rate
+- MRR vs Growth Rate
+- Competition vs Moat Strength
+
+### 📈 **Insights:**
+Strong correlations (>0.5 or <-0.5) indicate important relationships for investment decisions.
+"""
+
+ text_msg = await cl.Message(content=content).send()
+ heatmap_image = cl.Image(content=heatmap_chart, name="correlation_heatmap.png", display="inline")
+ await heatmap_image.send(for_id=text_msg.id)
+
+ return
+
+ # =============================
+ # ROUTE: STARTUP COMPARISON
+ # =============================
+ if "compare startups" in user_input or "startup comparison" in user_input:
+ await thinking_msg.remove()
+
+ dashboard = InteractiveDashboard(df)
+
+ await cl.Message(content="## 🔍 Startup Comparison Analysis\n\nSelect startups to compare side-by-side...").send()
+
+ # Extract startup names from user input or ask user
+ startup_names = []
+ for startup in df['Startup'].values:
+ if startup.lower() in user_input.lower():
+ startup_names.append(startup)
+
+ # If no startups found in input, ask user to specify
+ if len(startup_names) < 2:
+ available_startups = ", ".join(df['Startup'].head(10).values)
+ await cl.Message(
+ content=f"Please specify 2-4 startup names to compare.\n\n"
+ f"**Available startups:** {available_startups}...\n\n"
+ f"**Example:** \"Compare TechFlow and DataCorp and AIStart\""
+ ).send()
+ return
+
+ # Generate comparison radar chart
+ comparison_chart = dashboard.compare_startups(startup_names[:4]) # Limit to 4 for readability
+
+ if comparison_chart is None:
+ await cl.Message(content="❌ No valid startups found for comparison. Please check the names.").send()
+ return
+
+ content = f"""
+## 🔍 Startup Comparison: {', '.join(startup_names[:4])}
+
+### 📊 **Radar Chart Analysis:**
+- **Green lines** = Successful startups
+- **Red lines** = Failed startups
+- **Outer edge** = Better performance (scale 0-5)
+
+### 🎯 **Comparison Dimensions:**
+- **Funding** (scaled to 0-5)
+- **Founder Experience** (years)
+- **Market Size** (billions)
+- **Business Model Strength** (1-5)
+- **Competitive Moat** (1-5)
+- **MRR** (scaled to 0-5)
+- **Growth Rate** (scaled to 0-5)
+
+### 💡 **Investment Insights:**
+Look for startups with larger radar areas and balanced performance across dimensions.
+"""
+
+ text_msg = await cl.Message(content=content).send()
+ comparison_image = cl.Image(content=comparison_chart, name="startup_comparison.png", display="inline")
+ await comparison_image.send(for_id=text_msg.id)
+
+ return
+
+ # =============================
+ # ROUTE: DASHBOARD FILTERING
+ # =============================
+ if "filter dashboard" in user_input or "apply filters" in user_input:
+ await thinking_msg.remove()
+
+ await cl.Message(content="## 🔍 Dashboard Filtering Options\n\nApply filters to focus your analysis...").send()
+
+ # Parse filters from user input
+ filters = {}
+
+ # Sector filtering
+ if 'fintech' in user_input.lower():
+ filters['sectors'] = ['FinTech']
+ elif 'healthtech' in user_input.lower():
+ filters['sectors'] = ['HealthTech']
+ elif 'ai' in user_input.lower() or 'artificial intelligence' in user_input.lower():
+ filters['sectors'] = ['AI']
+
+ # Country filtering
+ if 'uk' in user_input.lower() or 'united kingdom' in user_input.lower():
+ filters['countries'] = ['UK']
+ elif 'us' in user_input.lower() or 'usa' in user_input.lower():
+ filters['countries'] = ['US']
+
+ # Success filtering
+ if 'successful' in user_input.lower() or 'success only' in user_input.lower():
+ filters['success_only'] = True
+
+ # Apply filters and generate filtered dashboard
+ dashboard = InteractiveDashboard(df)
+ if filters:
+ dashboard.filter_data(filters)
+
+ # Generate filtered dashboard
+ filtered_summary = create_dashboard_summary(dashboard)
+ filtered_chart = dashboard.create_real_time_metrics_dashboard()
+
+ filter_description = ""
+ if 'sectors' in filters:
+ filter_description += f"**Sectors:** {', '.join(filters['sectors'])}\n"
+ if 'countries' in filters:
+ filter_description += f"**Countries:** {', '.join(filters['countries'])}\n"
+ if 'success_only' in filters:
+ filter_description += "**Filter:** Successful startups only\n"
+
+ content = f"""
+## 🔍 Filtered Dashboard Analysis
+
+### 📊 **Applied Filters:**
+{filter_description if filter_description else "**No specific filters detected.** Try: 'filter dashboard fintech uk successful'"}
+
+{filtered_summary}
+
+### 🎯 **Available Filter Commands:**
+• "filter dashboard fintech" - FinTech startups only
+• "filter dashboard uk successful" - Successful UK startups
+• "filter dashboard healthtech us" - US HealthTech companies
+• "filter dashboard ai" - AI/ML startups
+"""
+
+ text_msg = await cl.Message(content=content).send()
+ filtered_image = cl.Image(content=filtered_chart, name="filtered_dashboard.png", display="inline")
+ await filtered_image.send(for_id=text_msg.id)
+
+ return
+
+ # =============================
+ # ROUTE: EXPORT DASHBOARD
+ # =============================
+ if "export dashboard" in user_input or "dashboard report" in user_input:
+ await thinking_msg.remove()
+
+ await cl.Message(content="## 📄 Exporting Interactive Dashboard Report\n\nGenerating comprehensive PDF with all dashboard analytics...").send()
+
+ dashboard = InteractiveDashboard(df)
+
+ # Create a comprehensive dashboard export
+ export_content = f"""
+# 📊 NAVADA Interactive Dashboard Report
+
+## Executive Summary
+{create_dashboard_summary(dashboard)}
+
+## 📈 Dashboard Analytics
+
+### Key Insights:
+- **Real-time Metrics:** Multi-dimensional analysis across 4 key areas
+- **Interactive Features:** Hover details, drill-down capabilities, filtering
+- **Comparison Tools:** Side-by-side startup analysis with radar charts
+- **Correlation Analysis:** Relationship mapping between key variables
+
+### Available Commands:
+1. **dashboard** - Launch main interactive dashboard
+2. **interactive scatter** - Dynamic scatter plots with selection
+3. **correlation heatmap** - Multi-dimensional correlation matrix
+4. **compare startups** - Radar chart comparisons
+5. **filter dashboard [criteria]** - Apply smart filters
+
+### Professional Features:
+- ✅ Real-time data visualization
+- ✅ Interactive hover details
+- ✅ Custom filtering and drill-down
+- ✅ Multi-startup comparisons
+- ✅ Export capabilities
+- ✅ Mobile-responsive design
+
+---
+**Generated by NAVADA Interactive Dashboard Suite**
+*Next-generation startup analytics with real-time intelligence*
+"""
+
+ # Send the export summary
+ await cl.Message(content=export_content).send()
+
+ await cl.Message(
+ content="## 🎯 Dashboard Export Complete!\n\n"
+ "**What's Included:**\n"
+ "• Executive summary with key metrics\n"
+ "• Interactive feature documentation\n"
+ "• Command reference guide\n"
+ "• Professional formatting\n\n"
+ "**Next Steps:**\n"
+ "• Use 'dashboard' to launch interactive mode\n"
+ "• Try 'interactive scatter' for dynamic analysis\n"
+ "• Explore 'compare startups [names]' for detailed comparisons"
+ ).send()
+
+ return
+
+ # =============================
+ # ROUTE 8: GENERATE PDF REPORT
+ # =============================
+ if "generate report" in user_input or "create report" in user_input or "investment report" in user_input:
+ # Show loading message
+ msg = cl.Message(content="📄 Generating comprehensive PDF report... This may take 10-15 seconds.")
+ await msg.send()
+
+ # Check if user specified a startup name
+ startup_name = None
+ for startup in df['Startup'].values:
+ if startup.lower() in user_input:
+ startup_name = startup
+ break
+
+ try:
+ # Generate PDF report
+ pdf_bytes = generate_investment_report(df, startup_name)
+
+ # Remove loading message
+ await msg.remove()
+
+ # Send description message
+ if startup_name:
+ desc_msg = cl.Message(
+ content=f"### 📊 Investment Analysis Report: {startup_name}\n\n"
+ f"**Report Contents:**\n"
+ f"- Executive Summary\n"
+ f"- Key Metrics & Financials\n"
+ f"- 4 Data Visualizations\n"
+ f"- Risk Analysis\n"
+ f"- Investment Recommendations\n\n"
+ f"**Viability score and actionable insights included.**"
+ )
+ else:
+ desc_msg = cl.Message(
+ content=f"### 📊 Portfolio Analysis Report\n\n"
+ f"**Report Contents:**\n"
+ f"- Portfolio Overview ({len(df)} startups)\n"
+ f"- Success Rate Analysis\n"
+ f"- 4 Data Visualizations\n"
+ f"- Risk Assessment\n"
+ f"- Strategic Recommendations\n\n"
+ f"**Download the PDF below for the complete analysis.**"
+ )
+ await desc_msg.send()
+
+ # Send PDF file as downloadable attachment
+ report_filename = f"{startup_name}_Analysis.pdf" if startup_name else "Portfolio_Analysis.pdf"
+ pdf_element = cl.File(
+ name=report_filename,
+ content=pdf_bytes,
+ display="inline"
+ )
+ await pdf_element.send()
+
+ except Exception as e:
+ await msg.remove()
+ await cl.Message(
+ content=f"❌ Error generating report: {str(e)}\n\n"
+ f"Please try again or contact support if the issue persists."
+ ).send()
+
+ return # Exit handler
+
+ # =============================
+ # ROUTE 7: INTELLIGENT CHART DETECTION
+ # =============================
+ # Check if user is asking for a chart/graph/visualization
+ chart_keywords = ["chart", "graph", "plot", "visualize", "visualization", "show me", "display"]
+ is_chart_request = any(keyword in user_input for keyword in chart_keywords)
+
+ if is_chart_request:
+ # -------------------------
+ # DETECT CHART INTENT WITH AI
+ # -------------------------
+ df_str = df.to_string(index=False)
+ available_columns = list(df.columns)
+
+ # Get session ID for LangSmith tracking
+ session_id = cl.user_session.get("session_id", get_session_id())
+
+ # Include LangSmith metadata if available
+ if langsmith_client and session_id:
+ intent_response = client.chat.completions.create(
+ model="gpt-4o-mini",
+ messages=[
+ {
+ "role": "system",
+ "content": (
+ "You are a data visualization assistant. Analyze the user's request and determine:\n"
+ "1. What type of chart they want (bar, scatter, line, pie, or 'analysis' for text response)\n"
+ "2. Which columns to use (x-axis and y-axis)\n"
+ "3. A descriptive title\n\n"
+ f"Available columns: {', '.join(available_columns)}\n\n"
+ "Respond ONLY in this JSON format:\n"
+ '{"chart_type": "bar/scatter/line/pie/analysis", "x_col": "column_name", "y_col": "column_name", "title": "Chart Title"}\n\n'
+ "If the request doesn't make sense for a chart, use chart_type: 'analysis'."
+ )
+ },
+ {
+ "role": "user",
+ "content": f"Dataset columns: {available_columns}\n\nUser request: {message.content}"
+ }
+ ],
+ max_tokens=150,
+ )
+ else:
+ intent_response = client.chat.completions.create(
+ model="gpt-4o-mini",
+ messages=[
+ {
+ "role": "system",
+ "content": (
+ "You are a data visualization assistant. Analyze the user's request and determine:\n"
+ "1. What type of chart they want (bar, scatter, line, pie, or 'analysis' for text response)\n"
+ "2. Which columns to use (x-axis and y-axis)\n"
+ "3. A descriptive title\n\n"
+ f"Available columns: {', '.join(available_columns)}\n\n"
+ "Respond ONLY in this JSON format:\n"
+ '{"chart_type": "bar/scatter/line/pie/analysis", "x_col": "column_name", "y_col": "column_name", "title": "Chart Title"}\n\n'
+ "If the request doesn't make sense for a chart, use chart_type: 'analysis'."
+ )
+ },
+ {
+ "role": "user",
+ "content": f"Dataset columns: {available_columns}\n\nUser request: {message.content}"
+ }
+ ],
+ max_tokens=150
+ )
+
+ try:
+ # Parse AI response to get chart parameters
+ import json
+ intent_text = intent_response.choices[0].message.content.strip()
+ # Extract JSON from response (handle markdown code blocks)
+ if "```" in intent_text:
+ intent_text = intent_text.split("```")[1]
+ if intent_text.startswith("json"):
+ intent_text = intent_text[4:]
+ intent = json.loads(intent_text.strip())
+
+ # If AI says to do analysis instead of chart, fall through to Q&A
+ if intent.get("chart_type") == "analysis":
+ raise ValueError("Analysis requested instead of chart")
+
+ # Generate the requested chart
+ msg = cl.Message(content=f"📊 Generating {intent['chart_type']} chart...")
+ await msg.send()
+
+ png = plot_custom_chart(
+ df,
+ intent.get("chart_type", "bar"),
+ intent.get("x_col", "Startup"),
+ intent.get("y_col", "Funding_USD_M"),
+ intent.get("title")
+ )
+
+ await msg.remove()
+ text_msg = cl.Message(content=f"### 📊 {intent.get('title', 'Custom Chart')}")
+ await text_msg.send()
+ image = cl.Image(content=png, name="custom_chart.png", display="inline")
+ await image.send(for_id=text_msg.id)
+ return
+
+ except Exception as e:
+ # If chart generation fails, fall through to regular Q&A
+ print(f"Chart generation failed: {e}")
+ pass
+
+ # =============================
+ # ROUTE 8: INTERACTIVE DASHBOARDS
+ # =============================
+ # Handle general "interactive" request - default to scatter plot dashboard
+ if user_input.strip() == "interactive" or (user_input.strip() == "proceed" and thinking_msg.content and "interactive" in thinking_msg.content):
+ msg = cl.Message(content="🎯 Creating interactive scatter plot dashboard...")
+ await msg.send()
+
+ html_content = create_interactive_scatter(df, "Interactive Startup Analysis")
+ await msg.remove()
+
+ # Send the interactive chart as HTML
+ await cl.Message(
+ content="## 🎯 Interactive Startup Dashboard\n\n"
+ "**Features:**\n"
+ "- 🖱️ Hover for detailed startup information\n"
+ "- 🔍 Zoom and pan to explore data\n"
+ "- 📊 Size indicates market size, color shows success/failure\n\n"
+ ).send()
+
+ # Create an HTML file and send it
+ with open("interactive_dashboard.html", "w") as f:
+ f.write(html_content)
+
+ dashboard_file = cl.File(
+ path="interactive_dashboard.html",
+ name="Interactive Dashboard",
+ display="inline"
+ )
+ await dashboard_file.send()
+ return
+
+ if "interactive" in user_input and ("dashboard" in user_input or "scatter" in user_input):
+ msg = cl.Message(content="🎯 Creating interactive scatter plot dashboard...")
+ await msg.send()
+
+ html_content = create_interactive_scatter(df, "Interactive Startup Analysis")
+ await msg.remove()
+
+ # Send the interactive chart as HTML
+ await cl.Message(
+ content="## 🎯 Interactive Startup Dashboard\n\n"
+ "**Features:**\n"
+ "- 🖱️ Hover for detailed startup information\n"
+ "- 🔍 Zoom and pan to explore data\n"
+ "- 📊 Size indicates market size, color shows success/failure\n\n"
+ ).send()
+
+ # Create an HTML file and send it
+ with open("interactive_dashboard.html", "w") as f:
+ f.write(html_content)
+
+ dashboard_file = cl.File(
+ path="interactive_dashboard.html",
+ name="Interactive Dashboard",
+ display="inline"
+ )
+ await dashboard_file.send()
+ return
+
+ if "interactive timeline" in user_input:
+ msg = cl.Message(content="📈 Creating interactive failure timeline...")
+ await msg.send()
+
+ html_content = create_interactive_timeline(df)
+ await msg.remove()
+
+ await cl.Message(
+ content="## 📈 Interactive Failure Timeline\n\n"
+ "**Features:**\n"
+ "- 🖱️ Hover to see startup details\n"
+ "- 📊 Interactive bars with sector and funding info\n"
+ "- 🔍 Zoom to focus on specific time ranges\n\n"
+ ).send()
+
+ with open("interactive_timeline.html", "w") as f:
+ f.write(html_content)
+
+ timeline_file = cl.File(
+ path="interactive_timeline.html",
+ name="Interactive Timeline",
+ display="inline"
+ )
+ await timeline_file.send()
+ return
+
+ if "sector dashboard" in user_input or ("interactive" in user_input and "sector" in user_input):
+ msg = cl.Message(content="🏭 Creating interactive sector dashboard...")
+ await msg.send()
+
+ html_content = create_sector_dashboard(df)
+ await msg.remove()
+
+ await cl.Message(
+ content="## 🏭 Interactive Sector Dashboard\n\n"
+ "**Features:**\n"
+ "- 📊 Four interconnected charts\n"
+ "- 🖱️ Hover and zoom on each panel\n"
+ "- 💡 Compare sectors across multiple dimensions\n\n"
+ ).send()
+
+ with open("sector_dashboard.html", "w") as f:
+ f.write(html_content)
+
+ sector_file = cl.File(
+ path="sector_dashboard.html",
+ name="Sector Dashboard",
+ display="inline"
+ )
+ await sector_file.send()
+ return
+
+ # =============================
+ # ROUTE 9: NEW ADVANCED CHARTS
+ # =============================
+ if "growth trajectory" in user_input or ("growth" in user_input and "chart" in user_input):
+ msg = cl.Message(content="📈 Generating growth trajectory analysis...")
+ await msg.send()
+
+ png = plot_growth_trajectory(df)
+ await msg.remove()
+
+ text_msg = cl.Message(
+ content="### 📈 Growth Trajectory Analysis\n\n"
+ "This chart shows **MRR growth vs company age** with bubble sizes representing growth rates.\n"
+ "- **Green dots**: Successful companies\n"
+ "- **Red dots**: Failed companies\n"
+ "- **Bubble size**: Monthly growth rate percentage"
+ )
+ await text_msg.send()
+
+ image = cl.Image(content=png, name="growth_trajectory.png", display="inline")
+ await image.send(for_id=text_msg.id)
+ return
+
+ if "team performance" in user_input or ("team" in user_input and "matrix" in user_input):
+ msg = cl.Message(content="👥 Generating team performance matrix...")
+ await msg.send()
+
+ png = plot_team_performance(df)
+ await msg.remove()
+
+ text_msg = cl.Message(
+ content="### 👥 Team Performance Matrix\n\n"
+ "This chart analyzes **team size vs founder experience** correlation.\n"
+ "- **Bubble size**: Total funding raised\n"
+ "- **Red trend line**: Shows correlation between team size and experience\n"
+ "- **Colors**: Green = successful, Red = failed"
+ )
+ await text_msg.send()
+
+ image = cl.Image(content=png, name="team_performance.png", display="inline")
+ await image.send(for_id=text_msg.id)
+ return
+
+ if "market opportunity" in user_input or ("market" in user_input and "competition" in user_input):
+ msg = cl.Message(content="🎯 Generating market opportunity matrix...")
+ await msg.send()
+
+ png = plot_market_opportunity(df)
+ await msg.remove()
+
+ text_msg = cl.Message(
+ content="### 🎯 Market Opportunity Matrix\n\n"
+ "This chart identifies **sweet spots** in market size vs competition landscape.\n"
+ "- **X-axis**: Market size (bigger = better)\n"
+ "- **Y-axis**: Market opportunity (higher = less competition)\n"
+ "- **Bubble size**: Current traction (MRR)\n"
+ "- **Sweet Spot**: Large market + low competition"
+ )
+ await text_msg.send()
+
+ image = cl.Image(content=png, name="market_opportunity.png", display="inline")
+ await image.send(for_id=text_msg.id)
+ return
+
+ if "funding efficiency" in user_input or ("capital" in user_input and "efficiency" in user_input):
+ msg = cl.Message(content="💰 Generating capital efficiency analysis...")
+ await msg.send()
+
+ png = plot_funding_efficiency(df)
+ await msg.remove()
+
+ text_msg = cl.Message(
+ content="### 💰 Capital Efficiency Analysis\n\n"
+ "This chart shows **revenue generated per dollar invested**.\n"
+ "- **X-axis**: Total funding raised\n"
+ "- **Y-axis**: Annual revenue per dollar of funding\n"
+ "- **Bubble size**: Efficiency score (revenue × growth rate)\n"
+ "- **Orange line**: Median efficiency benchmark"
+ )
+ await text_msg.send()
+
+ image = cl.Image(content=png, name="funding_efficiency.png", display="inline")
+ await image.send(for_id=text_msg.id)
+ return
+
+ if "stage progression" in user_input or ("funding" in user_input and "stage" in user_input):
+ msg = cl.Message(content="🚀 Generating funding stage analysis...")
+ await msg.send()
+
+ png = plot_stage_progression(df)
+ await msg.remove()
+
+ text_msg = cl.Message(
+ content="### 🚀 Funding Stage Analysis\n\n"
+ "This chart tracks **funding amounts and failure rates by stage**.\n"
+ "- **Blue bars**: Average funding amount per stage\n"
+ "- **Red line**: Failure rate percentage\n"
+ "- **Labels**: Show funding amount and company count\n"
+ "- **Insight**: Later stages typically have lower failure rates"
+ )
+ await text_msg.send()
+
+ image = cl.Image(content=png, name="stage_progression.png", display="inline")
+ await image.send(for_id=text_msg.id)
+ return
+
+ if "risk assessment" in user_input or ("risk" in user_input and "radar" in user_input):
+ msg = cl.Message(content="🎯 Generating risk assessment radar...")
+ await msg.send()
+
+ png = plot_risk_assessment(df)
+ await msg.remove()
+
+ text_msg = cl.Message(
+ content="### 🎯 Risk Assessment Profile\n\n"
+ "This **radar chart** compares risk profiles between successful and failed companies.\n"
+ "- **Green area**: Successful companies' average risk profile\n"
+ "- **Red area**: Failed companies' average risk profile\n"
+ "- **Scale**: 0 = low risk, 10 = high risk\n"
+ "- **Categories**: Financial, Market, Team, Competition, and Traction risks"
+ )
+ await text_msg.send()
+
+ image = cl.Image(content=png, name="risk_assessment.png", display="inline")
+ await image.send(for_id=text_msg.id)
+ return
+
+ # =============================
+ # ROUTE 10: GUIDED QUESTIONS
+ # =============================
+ if "questions" in user_input or "guide me" in user_input or "what should i ask" in user_input:
+ await thinking_msg.remove()
+
+ persona = get_current_persona()
+ questions = persona.get("questions", [])
+
+ content = f"### 🎯 {persona['name']} - Guided Questions\n\n"
+ content += "Here are key questions to explore based on your current mode:\n\n"
+
+ for i, question in enumerate(questions, 1):
+ content += f"**{i}.** {question}\n"
+
+ content += "\n📊 **Recommended Charts:**\n"
+ for chart in persona.get("charts", []):
+ chart_name = chart.replace("_", " ").title()
+ content += f"- Type **'{chart_name}'** for {chart_name} analysis\n"
+
+ content += "\n💡 **Pro Tip:** Copy any question above and I'll provide detailed analysis!"
+
+ await cl.Message(content=content).send()
+ return
+
+ # =============================
+ # ROUTE 11: INTERNET SEARCH
+ # =============================
+ if user_input.startswith("search ") or \
+ ("search" in user_input and ("internet" in user_input or "web" in user_input)) or \
+ ("search" in user_input and any(word in user_input for word in ["latest", "recent", "current", "news", "trends", "2024", "2025"])) or \
+ ("what's happening" in user_input) or \
+ ("latest news" in user_input) or \
+ ("current trends" in user_input) or \
+ ("find" in user_input and any(word in user_input for word in ["latest", "recent", "current", "new"])):
+
+ await thinking_msg.remove()
+
+ # Extract search query
+ if user_input.startswith("search "):
+ query = user_input[7:].strip()
+ elif "search" in user_input:
+ # Extract query after "search"
+ parts = user_input.split("search", 1)
+ if len(parts) > 1:
+ query = parts[1].strip()
+ else:
+ query = user_input
+ else:
+ # For phrases like "what's happening with AI startups"
+ query = user_input
+
+ if not query:
+ await cl.Message(
+ content="Please provide a search query. For example:\n"
+ "- **search AI startups 2024**\n"
+ "- **latest trends in fintech**\n"
+ "- **current venture capital news**"
+ ).send()
+ return
+
+ msg = cl.Message(content=f"🔍 Searching for: **{query}**...")
+ await msg.send()
+
+ # Perform search
+ search_results = search_internet(query, count=5)
+
+ if not search_results["success"]:
+ # Provide fallback response with helpful guidance
+ fallback_message = f"❌ Search failed: {search_results['error']}\n\n"
+
+ if search_results.get("fallback_available"):
+ fallback_message += "💡 **Alternative approach**: I can still help you with:\n"
+ fallback_message += "• General startup advice and best practices\n"
+ fallback_message += "• Analysis based on my training data\n"
+ fallback_message += "• Startup failure pattern analysis\n"
+ fallback_message += "• Business model evaluation\n\n"
+ fallback_message += "🔧 **To enable web search**: Please configure a valid Brave Search API key in your .env file.\n"
+ fallback_message += "Visit https://brave.com/search/api/ to get an API key."
+
+ msg.content = fallback_message
+ await msg.update()
+ return
+
+ # Get current persona for analysis
+ persona = get_current_persona()
+
+ # Get session ID for LangSmith tracking
+ session_id = cl.user_session.get("session_id", get_session_id())
+
+ # Analyze results with persona context
+ analysis = analyze_search_results(search_results, persona, "startup and investment context", session_id)
+
+ # Format and send results
+ content = f"## 🔍 Search Results: {query}\n\n"
+ content += f"**Found {search_results['total_results']} results:**\n\n"
+
+ for i, result in enumerate(search_results['results'], 1):
+ content += f"**{i}. {result['title']}**\n"
+ content += f"🔗 {result['url']}\n"
+ content += f"📝 {result['description'][:200]}{'...' if len(result['description']) > 200 else ''}\n"
+ if result.get('age'):
+ content += f"⏰ {result['age']}\n"
+ content += "\n"
+
+ content += f"---\n\n## 🤖 {persona['name']} Analysis:\n\n{analysis}"
+
+ msg.content = content
+ await msg.update()
+ return
+
+ # =============================
+ # ROUTE 12: TEXT-TO-SPEECH
+ # =============================
+ if user_input.startswith("speak ") or user_input.startswith("say ") or \
+ ("audio" in user_input and "response" in user_input) or \
+ ("read aloud" in user_input) or ("voice" in user_input):
+
+ await thinking_msg.remove()
+
+ # Extract text to speak
+ if user_input.startswith("speak "):
+ text_to_speak = user_input[6:].strip()
+ elif user_input.startswith("say "):
+ text_to_speak = user_input[4:].strip()
+ else:
+ text_to_speak = "Welcome to NAVADA, your AI-powered startup viability agent. I can analyze startup risks, generate charts, and provide investment insights in both investor and founder modes."
+
+ if not text_to_speak:
+ await cl.Message(
+ content="Please provide text to convert to speech:\n"
+ "- **speak [your text]**\n"
+ "- **say [your text]**\n"
+ "- **read aloud [your text]**"
+ ).send()
+ return
+
+ # Generate audio
+ msg = cl.Message(content=f"🔊 Generating speech: **{text_to_speak[:100]}{'...' if len(text_to_speak) > 100 else ''}**")
+ await msg.send()
+
+ try:
+ # Create audio message
+ audio = create_audio_message(text_to_speak, voice="alloy")
+
+ if audio:
+ # Send text message with audio
+ content = f"🔊 **Audio Response:**\n\n{text_to_speak}"
+ text_msg = cl.Message(content=content)
+ await text_msg.send()
+
+ # Send audio
+ await audio.send(for_id=text_msg.id)
+ else:
+ msg.content = "❌ Failed to generate audio. Please try again."
+ await msg.update()
+
+ except Exception as e:
+ msg.content = f"❌ Audio generation error: {str(e)}"
+ await msg.update()
+
+ return
+
+ # =============================
+ # ROUTE 13: PERSONA MANAGEMENT
+ # =============================
+ if "investor mode" in user_input or "switch to investor" in user_input:
+ # Remove thinking indicator
+ await thinking_msg.remove()
+
+ cl.user_session.set("persona", "investor")
+ persona = get_current_persona()
+ recommendations = format_persona_recommendations("investor")
+ await cl.Message(
+ content=f"{persona['style']}\n\n"
+ "I'm now analyzing from a **venture capitalist perspective**. "
+ "I'll focus on ROI, market size, competitive analysis, and exit strategies.\n\n"
+ f"{recommendations}"
+ "**What would you like to analyze today?**\n\n"
+ "💰 **Quick Analysis:**\n"
+ "• Type **'portfolio'** - Investment recommendations across all startups\n"
+ "• Type **'insights'** - AI-powered risk assessment and opportunities\n"
+ "• Type **'benchmark'** - Compare new startup ideas against our dataset\n"
+ "• Type **'questions'** - Get guided investor-focused questions\n\n"
+ "📊 **Advanced Charts:**\n"
+ "• **'Funding Efficiency'** - Capital efficiency and ROI analysis\n"
+ "• **'Stage Progression'** - Funding stages vs failure rates\n"
+ "• **'Market Opportunity'** - Market size vs competition matrix\n"
+ "• **'Risk Assessment'** - Comprehensive risk radar chart\n\n"
+ "📈 **Interactive Tools:**\n"
+ "• **'Sector Dashboard'** - Multi-dimensional sector analysis\n"
+ "• **'Interactive'** - Dynamic scatter plots and correlations\n\n"
+ "🔍 **Internet Search:**\n"
+ "• **'search latest VC trends'** - Get up-to-date market intelligence\n"
+ "• **'current startup news'** - Recent developments in startup ecosystem\n"
+ "• **'search [company name] funding'** - Research specific companies\n\n"
+ "🎯 **Ask me directly:**\n"
+ "• \"Which startups have the best ROI potential?\"\n"
+ "• \"What are the red flags in our portfolio?\"\n"
+ "• \"Search for latest AI startup trends\""
+ ).send()
+ return
+
+ if "founder mode" in user_input or "switch to founder" in user_input:
+ # Remove thinking indicator
+ await thinking_msg.remove()
+
+ cl.user_session.set("persona", "founder")
+ persona = get_current_persona()
+ recommendations = format_persona_recommendations("founder")
+ await cl.Message(
+ content=f"{persona['style']}\n\n"
+ "I'm now analyzing from an **experienced founder perspective**. "
+ "I'll focus on practical execution, team building, product development, and tactical advice.\n\n"
+ f"{recommendations}"
+ "**What challenges can I help you tackle today?**\n\n"
+ "🚀 **Quick Assessment:**\n"
+ "• Type **'assess idea'** - Get viability score for your startup concept\n"
+ "• Type **'benchmark'** - Compare your metrics to successful startups\n"
+ "• Type **'insights'** - Get tactical recommendations to reduce risk\n"
+ "• Type **'questions'** - Get guided founder-focused questions\n\n"
+ "📊 **Growth Analysis:**\n"
+ "• **'Growth Trajectory'** - MRR growth patterns and success factors\n"
+ "• **'Team Performance'** - Team size vs experience optimization\n"
+ "• **'Market Opportunity'** - Find your competitive sweet spot\n"
+ "• **'Stage Progression'** - Funding stage benchmarks and expectations\n\n"
+ "📈 **Tactical Tools:**\n"
+ "• **'Timeline'** - Failure patterns to avoid common pitfalls\n"
+ "• **'Interactive'** - Explore data patterns affecting your sector\n"
+ "• **'Portfolio'** - Study successful companies in your space\n\n"
+ "🔍 **Market Intelligence:**\n"
+ "• **'search competitor analysis'** - Research competitive landscape\n"
+ "• **'latest startup challenges'** - Current industry challenges\n"
+ "• **'search [your sector] trends'** - Stay ahead of market shifts\n\n"
+ "💡 **Ask me directly:**\n"
+ "• \"How can I extend my runway and reduce burn?\"\n"
+ "• \"What team size is optimal for my stage?\"\n"
+ "• \"Search for current SaaS pricing trends\""
+ ).send()
+ return
+
+ # =============================
+ # ROUTE: DISPLAY RECOMMENDATIONS
+ # =============================
+ if "recommendations" in user_input or "best practices" in user_input:
+ await thinking_msg.remove()
+
+ current_persona_name = cl.user_session.get("persona", "founder")
+ current_persona = PERSONAS[current_persona_name]
+ recommendations = format_persona_recommendations(current_persona_name)
+
+ await cl.Message(
+ content=f"{current_persona['style']}\n\n"
+ f"Here are the key recommendations for {current_persona['name']}:"
+ f"{recommendations}"
+ "💡 **Want more specific advice?** Ask me about any of these areas, or switch to a different mode:\n"
+ "• **'investor mode'** - VC investment criteria\n"
+ "• **'founder mode'** - Tactical execution advice\n"
+ "• **'economist mode'** - UK economic analysis\n"
+ "• **'company analyst mode'** - Financial performance focus"
+ ).send()
+ return
+
+ # =============================
+ # ROUTE: USER DASHBOARD & HISTORY
+ # =============================
+ if "dashboard" in user_input or "my conversations" in user_input or "history" in user_input:
+ await thinking_msg.remove()
+
+ if not AUTH_AVAILABLE or not auth_status["authenticated"]:
+ await cl.Message(content="❌ Please log in to view your dashboard.").send()
+ return
+
+ # Get user's conversation history
+ conversations = auth_manager.get_user_conversations(auth_status["user_id"], limit=10)
+
+ if not conversations:
+ await cl.Message(
+ content=f"📊 **Welcome to your NAVADA Dashboard, {auth_status['username']}!**\n\n"
+ "🎯 **Account Info:**\n"
+ f"• Username: {auth_status['username']}\n"
+ f"• Email: {auth_status.get('email', 'Not provided')}\n"
+ f"• Subscription: {auth_status.get('subscription_tier', 'free').title()}\n\n"
+ "📝 **Conversation History:** No conversations yet\n\n"
+ "Start chatting to build your conversation history!"
+ ).send()
+ else:
+ dashboard_content = f"📊 **Welcome to your NAVADA Dashboard, {auth_status['username']}!**\n\n"
+ dashboard_content += "🎯 **Account Info:**\n"
+ dashboard_content += f"• Username: {auth_status['username']}\n"
+ dashboard_content += f"• Email: {auth_status.get('email', 'Not provided')}\n"
+ dashboard_content += f"• Subscription: {auth_status.get('subscription_tier', 'free').title()}\n\n"
+ dashboard_content += f"📝 **Recent Conversations ({len(conversations)}):**\n\n"
+
+ for i, conv in enumerate(conversations, 1):
+ dashboard_content += f"**{i}. {conv['title']}**\n"
+ dashboard_content += f"• Mode: {conv['persona_mode'].title()}\n"
+ dashboard_content += f"• Messages: {conv['message_count']}\n"
+ dashboard_content += f"• Updated: {conv['updated_at'][:19].replace('T', ' ')}\n"
+ dashboard_content += f"• Session ID: `{conv['session_id'][:8]}...`\n\n"
+
+ dashboard_content += "💡 **Tip:** Type 'logout' to end your session securely."
+
+ await cl.Message(content=dashboard_content).send()
+ return
+
+ if user_input == "logout":
+ await thinking_msg.remove()
+
+ if not AUTH_AVAILABLE or not auth_status["authenticated"]:
+ await cl.Message(content="❌ You are not logged in.").send()
+ return
+
+ # Logout user
+ session_token = cl.user_session.get("session_token")
+ if session_token:
+ auth_manager.logout_user(session_token)
+
+ # Clear session data
+ cl.user_session.set("auth_token", None)
+ cl.user_session.set("session_token", None)
+ cl.user_session.set("user_id", None)
+ cl.user_session.set("username", None)
+ cl.user_session.set("user_email", None)
+ cl.user_session.set("subscription_tier", None)
+
+ await cl.Message(
+ content=f"👋 **Goodbye, {auth_status['username']}!**\n\n"
+ "You have been logged out successfully.\n"
+ "Type `login username password` to log back in."
+ ).send()
+ return
+
+ # =============================
+ # ROUTE: UK ECONOMIST MODE
+ # =============================
+ if "economist mode" in user_input or "economics mode" in user_input or "uk economy" in user_input:
+ await thinking_msg.remove()
+
+ cl.user_session.set("persona", "economist")
+ persona = get_current_persona()
+ recommendations = format_persona_recommendations("economist")
+
+ await cl.Message(
+ content=f"{persona['style']}\n\n"
+ "I'm now analyzing from a **UK economics perspective**, combining macroeconomic trends with startup viability.\n\n"
+ f"{recommendations}"
+ "**Current UK Economic Context:**\n"
+ "• Bank Rate: 4.75% (affecting cost of capital)\n"
+ "• CPI Inflation: 2.3% (near BoE target)\n"
+ "• Unemployment: 4.2% (tight labour market)\n"
+ "• GDP Growth: 0.3% quarterly (sluggish growth)\n"
+ "• GBP/USD: 1.27 (currency impacts)\n\n"
+ "**Economic Analysis Tools:**\n"
+ "• Type **'macro analysis'** - UK macroeconomic impact assessment\n"
+ "• Type **'sector outlook'** - UK sector-specific opportunities\n"
+ "• Type **'regional analysis'** - Location-based economic factors\n"
+ "• Type **'policy impact'** - Government policy effects\n\n"
+ "**Key Questions I Can Answer:**\n"
+ "• How do interest rates affect your funding strategy?\n"
+ "• What's the inflation impact on your cost structure?\n"
+ "• How does UK productivity affect your scaling plans?\n"
+ "• Which UK regions offer the best opportunities?\n"
+ "• How do fiscal policies impact your sector?\n\n"
+ "**Ask me about:**\n"
+ "• Brexit impacts on your market\n"
+ "• London vs regional economics\n"
+ "• UK labour market conditions\n"
+ "• Sector-specific regulations\n"
+ "• Currency exposure and hedging"
+ ).send()
+ return
+
+ if "persona" in user_input or "mode" in user_input:
+ current_persona = get_current_persona()
+ await cl.Message(
+ content=f"## 🎭 Current Mode: {current_persona['style']}\n\n"
+ "**Available modes:**\n\n"
+ "💼 **Investor Mode** - VC perspective focused on ROI and exit strategies\n"
+ "• Best for: Portfolio analysis, due diligence, investment decisions\n"
+ "• Commands: portfolio, insights, sector dashboard\n\n"
+ "🚀 **Founder Mode** - Entrepreneur perspective focused on execution\n"
+ "• Best for: Startup assessment, risk reduction, tactical advice\n"
+ "• Commands: assess idea, benchmark, timeline\n\n"
+ "🇬🇧 **UK Economist Mode** - Economic analysis perspective for UK markets\n"
+ "• Best for: Macroeconomic impacts, regional analysis, policy effects\n"
+ "• Commands: macro analysis, sector outlook, regional analysis\n\n"
+ "**Ready to switch?**\n"
+ "• Type **'investor mode'** for VC analysis\n"
+ "• Type **'founder mode'** for founder guidance\n"
+ "• Type **'economist mode'** for UK economic analysis\n\n"
+ "**Or continue in current mode - what would you like to analyze?**"
+ ).send()
+ return
+
+ # =============================
+ # ROUTE: COMPANY ANALYST MODE
+ # =============================
+ if "company analyst" in user_input or "company analysis" in user_input or "financial analysis" in user_input:
+ await thinking_msg.remove()
+
+ cl.user_session.set("persona", "company_analyst")
+ persona = get_current_persona()
+
+ await cl.Message(
+ content=f"{persona['style']}\n\n"
+ "I'm now analyzing from a **company financial health perspective**, focusing on profitability, unit economics, and financial sustainability.\n\n"
+ "**Financial Analysis Focus:**\n"
+ "• Profitability: Gross, operating, and net margins\n"
+ "• Unit Economics: LTV/CAC ratios, payback periods\n"
+ "• Cash Flow: Runway analysis, working capital\n"
+ "• Break-even: Path to profitability analysis\n"
+ "• Benchmarking: Industry performance comparisons\n\n"
+ "**Analysis Tools:**\n"
+ "• Type **'analyze company'** - Comprehensive financial analysis\n"
+ "• Type **'profitability analysis'** - Full margin and profitability assessment\n"
+ "• Type **'unit economics'** - Customer economics and LTV/CAC analysis\n"
+ "• Type **'cash flow analysis'** - Runway and cash management assessment\n"
+ "• Type **'break even analysis'** - Path to profitability calculation\n\n"
+ "**Ready to dive deep into financial health?**"
+ ).send()
+ return
+
+ # =============================
+ # ROUTE: COMPANY ANALYSIS EXECUTION
+ # =============================
+ if "analyze company" in user_input or "profitability analysis" in user_input or "financial analysis" in user_input:
+ await thinking_msg.remove()
+
+ analyzer = CompanyAnalyzer()
+
+ await cl.Message(
+ content="## 💼 Company Financial Analysis\n\n"
+ "I'll perform a comprehensive profitability and financial health analysis.\n\n"
+ "Choose analysis type:\n"
+ "1. **Quick Analysis** - Key metrics only\n"
+ "2. **Full Analysis** - Complete financial deep dive\n"
+ "3. **Upload Financials** - Analyze from CSV/Excel"
+ ).send()
+
+ analysis_type = await cl.AskUserMessage(content="Enter choice (1/2/3):").send()
+
+ if "2" in analysis_type.get('output', '') or "full" in analysis_type.get('output', '').lower():
+ # Full Analysis
+ await cl.Message(content="### 📊 Full Company Analysis\n\nI'll need detailed financial information.").send()
+
+ # Collect comprehensive data
+ company_name = await cl.AskUserMessage(content="Company name:").send()
+ industry = await cl.AskUserMessage(content="Industry (SaaS/E-commerce/Marketplace/FinTech/Services):").send()
+
+ # Revenue metrics
+ revenue = await ask_float("Annual revenue ($ millions)", 10.0)
+ growth_rate = await ask_float("Revenue growth rate (%)", 20.0)
+
+ # Cost structure
+ cogs_pct = await ask_float("COGS as % of revenue", 40.0)
+ opex_pct = await ask_float("Operating expenses as % of revenue", 35.0)
+ sales_marketing_pct = await ask_float("Sales & Marketing as % of revenue", 15.0)
+
+ # Unit economics
+ cac = await ask_float("Customer Acquisition Cost ($)", 100.0)
+ ltv = await ask_float("Customer Lifetime Value ($)", 400.0)
+ monthly_churn = await ask_float("Monthly churn rate (%)", 5.0)
+
+ # Cash metrics
+ cash_balance = await ask_float("Current cash balance ($ millions)", 5.0)
+ monthly_burn = await ask_float("Monthly burn rate ($ thousands)", 200.0)
+
+ # Prepare financial data
+ revenue_amount = revenue * 1_000_000
+ financials = {
+ 'revenue': revenue_amount,
+ 'cogs': revenue_amount * (cogs_pct / 100),
+ 'opex': revenue_amount * (opex_pct / 100),
+ 'sales_marketing': revenue_amount * (sales_marketing_pct / 100),
+ 'fixed_costs': revenue_amount * 0.2,
+ 'variable_cost_ratio': cogs_pct / 100,
+ 'price_per_unit': 100,
+ 'current_revenue': revenue_amount / 12 # Monthly
+ }
+
+ metrics = {
+ 'customer_acquisition_cost': cac,
+ 'lifetime_value': ltv,
+ 'monthly_revenue': ltv / 24, # Assume 24-month lifetime
+ 'revenue_per_unit': 100,
+ 'variable_cost_per_unit': 40
+ }
+
+ cash_data = {
+ 'cash_from_operations': -monthly_burn * 1000 * 12,
+ 'cash_from_investing': -revenue_amount * 0.05,
+ 'cash_from_financing': 0,
+ 'cash_balance': cash_balance * 1_000_000,
+ 'days_sales_outstanding': 45,
+ 'days_inventory_outstanding': 0 if industry.get('output', '') == 'SaaS' else 30,
+ 'days_payables_outstanding': 30
+ }
+
+ # Run analyses
+ msg = cl.Message(content="🔍 Analyzing financial health...")
+ await msg.send()
+
+ profitability = analyzer.analyze_profitability(financials)
+ unit_economics = analyzer.analyze_unit_economics(metrics)
+ cash_flow = analyzer.analyze_cash_flow(cash_data)
+ break_even = analyzer.calculate_break_even(financials)
+
+ # Benchmark analysis
+ company_metrics = {
+ 'gross_margin': profitability['gross_margin'],
+ 'operating_margin': profitability['operating_margin'],
+ 'ltv_cac_ratio': unit_economics['ltv_cac_ratio']
+ }
+ benchmarks = analyzer.benchmark_performance(
+ company_metrics,
+ industry.get('output', 'Services')
+ )
+
+ # Generate visualization
+ analysis_data = {
+ 'ltv_cac_ratio': unit_economics['ltv_cac_ratio']
+ }
+ chart = plot_profitability_analysis(analysis_data)
+
+ await msg.remove()
+
+ # Display comprehensive results
+ content = f"""
+## 📊 Financial Analysis: {company_name.get('output', 'Company')}
+
+### 💰 Profitability Analysis
+- **Gross Margin:** {profitability['gross_margin']:.1f}% {'✅' if profitability['gross_margin'] > 50 else '⚠️' if profitability['gross_margin'] > 30 else '❌'}
+- **Operating Margin:** {profitability['operating_margin']:.1f}% {'✅' if profitability['operating_margin'] > 15 else '⚠️' if profitability['operating_margin'] > 0 else '❌'}
+- **EBITDA Margin:** {profitability['ebitda_margin']:.1f}%
+- **Net Margin:** {profitability['net_margin']:.1f}%
+- **Overall Health:** {profitability['profit_health']}
+
+### 📈 Unit Economics
+- **LTV/CAC Ratio:** {unit_economics['ltv_cac_ratio']:.2f} {'✅ Healthy' if unit_economics['ltv_cac_ratio'] > 3 else '⚠️ Concerning' if unit_economics['ltv_cac_ratio'] > 1 else '❌ Unsustainable'}
+- **Payback Period:** {unit_economics['payback_months']:.1f} months
+- **Contribution Margin:** {unit_economics['contribution_margin_pct']:.1f}%
+- **Unit Economics:** {unit_economics['unit_economics_health']}
+
+### 💵 Cash Flow & Runway
+- **Monthly Burn:** ${cash_flow['monthly_burn']:,.0f}
+- **Runway:** {cash_flow['runway_months']:.1f} months {'✅' if cash_flow['runway_months'] > 18 else '⚠️' if cash_flow['runway_months'] > 12 else '❌'}
+- **Cash Conversion Cycle:** {cash_flow['cash_conversion_cycle']:.0f} days
+- **Cash Efficiency:** {cash_flow['cash_efficiency']}
+
+### 🎯 Break-even Analysis
+- **Break-even Revenue:** ${break_even['break_even_revenue']:,.0f}
+- **Margin of Safety:** {break_even['margin_of_safety']:.1f}%
+- **Months to Break-even:** {break_even['months_to_break_even'] if break_even['months_to_break_even'] < 60 else '60+'}
+
+### 📊 Industry Benchmarks ({industry.get('output', 'Services')})
+- **Gross Margin:** Company {benchmarks['comparisons']['gross_margin']['company']:.1f}% vs Industry {benchmarks['comparisons']['gross_margin']['industry']:.1f}% ({benchmarks['comparisons']['gross_margin']['performance']})
+- **Operating Margin:** Company {benchmarks['comparisons']['operating_margin']['company']:.1f}% vs Industry {benchmarks['comparisons']['operating_margin']['industry']:.1f}% ({benchmarks['comparisons']['operating_margin']['performance']})
+- **Overall Rating:** {benchmarks['overall_rating']}
+
+### 💡 Key Recommendations
+1. {'✅ Maintain strong margins' if profitability['gross_margin'] > 50 else '⚠️ Improve gross margins through pricing or cost reduction'}
+2. {'✅ Unit economics are healthy' if unit_economics['ltv_cac_ratio'] > 3 else '⚠️ Optimize CAC or increase LTV'}
+3. {'✅ Adequate runway' if cash_flow['runway_months'] > 18 else '❌ Consider fundraising or reducing burn'}
+4. {'✅ Near profitability' if break_even['margin_of_safety'] > 0 else '⚠️ Focus on path to profitability'}
+
+### 🎬 Action Items
+{chr(10).join(['• ' + rec for rec in benchmarks['recommendations']])}
+"""
+
+ text_msg = cl.Message(content=content)
+ await text_msg.send()
+
+ # Send chart
+ image = cl.Image(content=chart, name="company_analysis.png", display="inline")
+ await image.send(for_id=text_msg.id)
+
+ else: # Quick Analysis
+ await cl.Message(content="### ⚡ Quick Profitability Check\n\nProvide key metrics for rapid assessment.").send()
+
+ revenue = await ask_float("Monthly revenue ($ thousands)", 100.0)
+ costs = await ask_float("Monthly costs ($ thousands)", 120.0)
+ customers = await ask_int("Number of customers", 100, mi=1, ma=100000)
+
+ # Quick calculations
+ profit = revenue - costs
+ margin = (profit / revenue * 100) if revenue > 0 else -100
+ revenue_per_customer = (revenue * 1000) / customers if customers > 0 else 0
+
+ status = "📈 Profitable" if profit > 0 else "📉 Not Yet Profitable"
+ health = "Strong" if margin > 20 else "Moderate" if margin > 0 else "Needs Improvement"
+
+ content = f"""
+### ⚡ Quick Analysis Results
+
+**Status:** {status}
+**Net Margin:** {margin:.1f}%
+**Monthly Profit/Loss:** ${profit*1000:,.0f}
+**Revenue per Customer:** ${revenue_per_customer:.2f}
+**Financial Health:** {health}
+
+**Quick Insights:**
+- {'Focus on achieving profitability' if profit < 0 else 'Maintain positive trajectory'}
+- {'Reduce costs or increase pricing' if margin < 0 else 'Consider scaling'}
+- {'Improve customer monetization' if revenue_per_customer < 100 else 'Good customer value'}
+"""
+
+ await cl.Message(content=content).send()
+
+ return
+
+ # =============================
+ # ROUTE 10: WEB SCRAPING
+ # =============================
+ if user_input.startswith("scrape "):
+ # Remove thinking indicator
+ await thinking_msg.remove()
+
+ # Parse scrape command: "scrape " or "scrape "
+ parts = message.content.strip().split()
+
+ if len(parts) < 2:
+ await cl.Message(
+ content="⚠️ **Invalid scrape command**\n\n"
+ "**Usage:** `scrape ` or `scrape `\n\n"
+ "**Examples:**\n"
+ "• `scrape https://example.com` - Scrape paragraphs\n"
+ "• `scrape https://news.site h1,h2` - Scrape headlines\n"
+ "• `scrape https://blog.com .article` - Scrape articles by class"
+ ).send()
+ return
+
+ url = parts[1]
+ selector = parts[2] if len(parts) > 2 else "p" # Default to paragraphs
+
+ # Show loading message with URL
+ loading_msg = cl.Message(content=f"🔍 Scraping {url}...")
+ await loading_msg.send()
+
+ # Perform scraping
+ scrape_result = scrape_site(url, selector)
+
+ # Remove loading message
+ await loading_msg.remove()
+
+ if not scrape_result["success"]:
+ # Handle scraping failure
+ await cl.Message(
+ content=f"❌ **Scraping failed**\n\n"
+ f"**URL:** {url}\n"
+ f"**Error:** {scrape_result['error']}\n\n"
+ f"**Suggestions:**\n"
+ f"• Check if the URL is accessible in your browser\n"
+ f"• Try a different CSS selector (h1, div, span)\n"
+ f"• Some sites block automated requests"
+ ).send()
+ return
+
+ # Scraping successful - get results
+ scraped_data = scrape_result["data"]
+ count = scrape_result["count"]
+ size_mb = scrape_result["size_mb"]
+
+ # Show scraping summary
+ summary_msg = cl.Message(
+ content=f"✅ **Scraping successful!**\n\n"
+ f"**URL:** {url}\n"
+ f"**Selector:** `{selector}`\n"
+ f"**Items scraped:** {count}\n"
+ f"**Content size:** {size_mb}MB\n\n"
+ f"**Preview (first 3 items):**"
+ )
+ await summary_msg.send()
+
+ # Show preview of scraped content
+ preview_items = scraped_data.head(3)
+ preview_text = ""
+ for i, row in preview_items.iterrows():
+ content = row["content"]
+ if len(content) > 200:
+ content = content[:200] + "..."
+ preview_text += f"**{i+1}.** {content}\n\n"
+
+ preview_msg = cl.Message(content=preview_text)
+ await preview_msg.send()
+
+ # Store scraped data in session memory for later reference
+ session_id = get_session_id()
+ add_to_memory(session_id, "scraped_data", f"Scraped {count} items from {url}")
+
+ # Store the actual data in session for follow-up questions
+ cl.user_session.set("last_scraped_data", scraped_data)
+ cl.user_session.set("last_scraped_url", url)
+
+ # Generate AI analysis of scraped content
+ analysis_msg = cl.Message(content="🤖 **Analyzing scraped content...**")
+ await analysis_msg.send()
+
+ # Get current persona for contextual analysis
+ persona = get_current_persona()
+ analysis = analyze_scraped_content(scraped_data, url, persona)
+
+ # Remove analysis loading message
+ await analysis_msg.remove()
+
+ # Send AI analysis with persona indicator
+ analysis_response = f"**Website Analysis**\n\n{analysis}"
+ await cl.Message(content=analysis_response).send()
+
+ return
+
+ # =============================
+ # ROUTE 11: AUTO-GENERATED INSIGHTS
+ # =============================
+ if "insights" in user_input or "auto insights" in user_input or "generate insights" in user_input:
+ msg = cl.Message(content="🤖 Analyzing data and generating insights...")
+ await msg.send()
+
+ insights = generate_insights(df, "general")
+ insights_message = format_insights_message(insights)
+
+ await msg.remove()
+ await cl.Message(content=insights_message).send()
+ return
+
+ # =============================
+ # ROUTE 12: DATA EXPORT & DOWNLOAD
+ # =============================
+ if any(keyword in user_input for keyword in ["export data", "download data", "export csv", "download csv", "export json", "download json"]):
+ await thinking_msg.remove()
+
+ # Determine export format
+ if "json" in user_input:
+ format_type = "json"
+ else:
+ format_type = "csv" # Default to CSV
+
+ # Send data export
+ await send_data_export(df, "startup_dataset", format_type)
+ return
+
+ # =============================
+ # ROUTE 13: AI-POWERED Q&A WITH MEMORY & PERSONA (DEFAULT)
+ # =============================
+ # If no specific pattern matched, use GPT-4 for natural language response
+
+ # -------------------------
+ # SESSION MEMORY & PERSONA INTEGRATION
+ # -------------------------
+ session_id = cl.user_session.get("session_id", get_session_id())
+ persona = get_current_persona()
+ memory_context = get_memory_context(session_id)
+
+ # Add user message to memory
+ add_to_memory(session_id, "user", message.content)
+
+ # Save user message to database if authenticated
+ if AUTH_AVAILABLE and auth_status["authenticated"]:
+ auth_manager.save_conversation(
+ user_id=auth_status["user_id"],
+ chainlit_session_id=session_id,
+ role="user",
+ content=message.content,
+ persona_mode=get_current_persona()["name"].lower().replace(" mode", ""),
+ metadata={"timestamp": timestamp, "raw_input": user_input_raw}
+ )
+
+ # -------------------------
+ # PREPARE ENHANCED CONTEXT
+ # -------------------------
+ # Convert DataFrame to string for inclusion in prompt
+ df_str = df.to_string(index=False)
+
+ # Auto-search enhancement for relevant queries
+ search_results = None
+ search_context = ""
+ current_persona_name = cl.user_session.get("persona", "founder")
+
+ # -------------------------
+ # USE THREAD-AWARE PROCESSING FOR LANGSMITH
+ # -------------------------
+ # Check if LangSmith is enabled and use thread context
+ if langsmith_client:
+ # Use the thread-aware processing function with LangSmith tracing
+ enhanced_question = f"Dataset:\n{df_str}\n\nUser question: {message.content}"
+
+ ai_response = process_with_thread_context(
+ question=enhanced_question,
+ session_id=session_id,
+ get_chat_history=True, # Always use history for continuity
+ persona=persona
+ )
+ else:
+ # Fallback to standard processing without LangSmith tracing
+ enhanced_system_prompt = (
+ f"{persona['system_prompt']}\n\n"
+ "Available commands you can suggest:\n"
+ "- 'timeline' - failure timeline\n"
+ "- 'funding vs burn' - funding vs burn rate\n"
+ "- 'interactive dashboard' - interactive scatter plot\n"
+ "- 'interactive timeline' - interactive failure timeline\n"
+ "- 'sector dashboard' - multi-chart sector analysis\n"
+ "- 'benchmark' - compare founder idea to dataset\n"
+ "- 'portfolio' - analyze multiple startups with heatmap\n"
+ "- 'insights' - auto-generate risks and recommendations\n"
+ "- 'investor mode' / 'founder mode' - switch analysis perspective\n\n"
+ "Remember conversation history when relevant. "
+ "If the user asks a question that would be better answered with a visualization, "
+ "suggest they try one of these commands."
+ )
+
+ # Use LangSmith thread management for conversation continuity
+ current_persona = PERSONAS[current_persona_name]
+
+ # Check if we should use conversation history (after first message in session)
+ session_metadata = get_session_metadata(session_id)
+ use_history = session_metadata["conversation_count"] > 0
+
+ # Determine if this query would benefit from real-time search
+ search_triggers = [
+ "market", "competition", "trends", "latest", "recent", "current", "2024", "2025",
+ "startup", "funding", "investment", "industry", "valuation", "growth", "exit"
+ ]
+
+ should_search = any(trigger in message.content.lower() for trigger in search_triggers)
+
+ if should_search and search_api_key:
+ # Generate persona-specific search query
+ search_query = generate_search_query(message.content, current_persona_name)
+
+ if search_query:
+ # Show search indicator
+ search_msg = await cl.Message(content="🔍 Searching for latest market intelligence...").send()
+
+ # Perform search
+ search_results = search_internet(search_query, count=3)
+
+ # Remove search indicator
+ await search_msg.remove()
+
+ if search_results["success"]:
+ search_context = f"\n\nRECENT MARKET INTELLIGENCE:\n"
+ for i, result in enumerate(search_results["results"], 1):
+ search_context += f"\n{i}. **{result['title']}**\n"
+ search_context += f" {result['description']}\n"
+ search_context += f" Source: {result['url']}\n"
+
+ # Enhance user question with dataset and search context
+ enhanced_question = f"Dataset:\n{df_str}\n\nUser question: {message.content}"
+ if search_context:
+ enhanced_question += search_context
+
+ # Use the LangSmith chat pipeline for thread-aware responses
+ ai_response = navada_chat_pipeline(
+ question=enhanced_question,
+ session_id=session_id,
+ persona=current_persona_name,
+ get_chat_history=use_history
+ )
+
+ # -------------------------
+ # SEND AI RESPONSE WITH PERSONA INDICATOR
+ # -------------------------
+ # Add AI response to memory
+ add_to_memory(session_id, "assistant", ai_response)
+
+ # Save AI response to database if authenticated
+ if AUTH_AVAILABLE and auth_status["authenticated"]:
+ auth_manager.save_conversation(
+ user_id=auth_status["user_id"],
+ chainlit_session_id=session_id,
+ role="assistant",
+ content=ai_response,
+ persona_mode=get_current_persona()["name"].lower().replace(" mode", ""),
+ metadata={
+ "search_used": bool(search_context),
+ "search_results_count": len(search_results.get("results", [])) if search_results else 0,
+ "persona": current_persona_name,
+ "enhanced_with_search": bool(search_context and search_results and search_results["success"])
+ }
+ )
+
+ # Add persona indicator to response (with safety check)
+ if not ai_response or ai_response.strip() == "":
+ ai_response = "I apologize, but I encountered an issue generating a response. Please try again."
+
+ response_with_persona = ai_response
+
+ # Add search intelligence indicator if search was used
+ if search_context and search_results and search_results["success"]:
+ response_with_persona += f"\n\n---\n\n*🔍 Enhanced with real-time market intelligence from {len(search_results['results'])} sources*"
+
+ # Generate and append auto-insights for analysis-type responses
+ if any(keyword in user_input for keyword in ["analyze", "analysis", "compare", "evaluate"]):
+ insights = generate_insights(df, "analysis")
+ if insights["risks"] or insights["opportunities"] or insights["recommendations"]:
+ response_with_persona += "\n\n" + format_insights_message(insights)
+
+ # Remove thinking indicator before sending final response
+ await thinking_msg.remove()
+
+ # Send the text response
+ message = await cl.Message(content=response_with_persona).send()
+
+ # -------------------------
+ # AUTO TEXT-TO-SPEECH
+ # -------------------------
+ # Check if TTS is enabled in user settings
+ tts_enabled = cl.user_session.get("tts_enabled", False)
+ if tts_enabled and response_with_persona:
+ try:
+ # Clean response text for TTS (remove markdown, emojis, etc.)
+ clean_text = clean_text_for_tts(response_with_persona)
+ if clean_text.strip():
+ # Generate and send audio
+ audio_element = await generate_speech(clean_text)
+ if audio_element:
+ await cl.Message(
+ content="🔊 Audio version:",
+ elements=[audio_element]
+ ).send()
+ except Exception as e:
+ print(f"⚠️ Auto-TTS failed: {e}")
+
+def clean_text_for_tts(text: str) -> str:
+ """Clean text for text-to-speech by removing markdown and special characters."""
+ import re
+
+ # Remove markdown formatting
+ text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # Bold
+ text = re.sub(r'\*(.*?)\*', r'\1', text) # Italic
+ text = re.sub(r'`(.*?)`', r'\1', text) # Code
+ text = re.sub(r'#+ ', '', text) # Headers
+ text = re.sub(r'\[.*?\]\(.*?\)', '', text) # Links
+ text = re.sub(r'---+', '', text) # Horizontal rules
+
+ # Remove emojis and special characters
+ text = re.sub(r'[🔍🚀💼📊🔹⚡📈🎯💡🔧📚⏰📝🔗✅⚠️🎙️🔊📢]', '', text)
+
+ # Clean up multiple spaces and newlines
+ text = re.sub(r'\n+', '. ', text)
+ text = re.sub(r'\s+', ' ', text)
+
+ # Limit length for TTS
+ return text.strip()[:1000]
+
+# =============================
+# LANGSMITH PLATFORM OPTIMIZATION
+# =============================
+
+@traceable
+def initialize_knowledge_base():
+ """Initialize vector store with external LangChain database or fallback to local."""
+ global vector_store
+
+ if not vector_store:
+ # Try to connect to external LangChain database first
+ if langchain_database_id and CHROMA_AVAILABLE:
+ try:
+ # Initialize LangChain components if not already done
+ embeddings_func, _ = initialize_langchain_components()
+ # Connect to external LangChain database using the provided ID
+ vector_store = Chroma(
+ embedding_function=embeddings_func,
+ persist_directory=f"./langchain_db_{langchain_database_id}"
+ )
+ print(f"✅ Connected to LangChain database: {langchain_database_id[:8]}...")
+ except Exception as e:
+ print(f"⚠️ Failed to connect to LangChain database: {e}")
+ # Fallback to local knowledge base
+ vector_store = _create_local_knowledge_base()
+ else:
+ # Fallback to local knowledge base
+ vector_store = _create_local_knowledge_base()
+
+ return vector_store
+
+def _create_local_knowledge_base():
+ """Create local knowledge base as fallback."""
+ # Startup knowledge optimized for LangSmith platform
+ startup_knowledge = [
+ "Successful startups show product-market fit within 18-24 months",
+ "SaaS startups should aim for 20% month-over-month growth",
+ "B2B startups need longer sales cycles but higher LTV",
+ "Consumer apps require viral growth and strong engagement",
+ "Hardware startups need more capital and longer dev cycles",
+ "Fintech faces regulatory challenges but high market opportunity",
+ "AI/ML startups need strong technical teams and data advantages",
+ "E-commerce should focus on unit economics and CAC",
+ "Database ID: " + (langchain_database_id or "local-fallback")
+ ]
+
+ documents = [Document(page_content=text) for text in startup_knowledge]
+
+ return Chroma.from_documents(
+ documents=documents,
+ embedding=embeddings,
+ persist_directory="./chroma_db"
+ )
+
+@traceable
+def enhanced_rag_response(user_query: str, context: str) -> str:
+ """Generate enhanced response using RAG system optimized for LangSmith."""
+ if not vector_store:
+ initialize_knowledge_base()
+
+ # Query knowledge base
+ docs = vector_store.similarity_search(user_query, k=3)
+ relevant_knowledge = [doc.page_content for doc in docs]
+
+ # Enhanced prompt with RAG context
+ rag_prompt = f"""
+ Based on startup knowledge and context, provide actionable insights:
+
+ Relevant Knowledge:
+ {chr(10).join(relevant_knowledge)}
+
+ Context: {context}
+ Query: {user_query}
+
+ Provide detailed, actionable response with specific recommendations.
+ """
+
+ response = llm.invoke(rag_prompt)
+ return response.content
+
+# Health check endpoint for LangSmith platform
+@cl.on_settings_update
+async def health_check():
+ """Health check endpoint for LangSmith monitoring."""
+ return {"status": "healthy", "app": "NAVADA", "version": "1.0.0"}
+
+# =============================
+# LANGGRAPH AGENT EXPORT
+# =============================
+# Export agent for LangGraph deployment
\ No newline at end of file