vc_gemini / server /generate_dataset.py
shrads78's picture
Upload folder using huggingface_hub
1ae84ae verified
import json
import random
import uuid
def generate_competitors():
competitors = [
{"id": "a16z_bot", "name": "Andreessen Horowitz", "speed_modifier": 0.8, "valuation_cap": 2.0, "primary_sectors": ["Agentic AI / Automation", "FinTech / Web3"], "secondary_sectors": ["AI/ML Foundation Models", "Enterprise SaaS"]},
{"id": "sequoia_bot", "name": "Sequoia Capital", "speed_modifier": 0.9, "valuation_cap": 1.5, "primary_sectors": ["AI/ML Foundation Models", "Enterprise SaaS"], "secondary_sectors": ["Agentic AI / Automation", "Consumer Social"]},
{"id": "founders_fund_bot", "name": "Founders Fund", "speed_modifier": 1.2, "valuation_cap": 2.5, "primary_sectors": ["DeepTech / Defense", "Hardware / Semiconductors"], "secondary_sectors": ["BioTech / Therapeutics", "ClimateTech / Energy"]},
{"id": "greylock_bot", "name": "Greylock", "speed_modifier": 1.0, "valuation_cap": 1.2, "primary_sectors": ["Enterprise SaaS", "Consumer Social"], "secondary_sectors": ["FinTech / Web3", "EdTech / Future of Work"]},
{"id": "khosla_bot", "name": "Khosla Ventures", "speed_modifier": 1.0, "valuation_cap": 1.5, "primary_sectors": ["DeepTech / Defense", "BioTech / Therapeutics"], "secondary_sectors": ["ClimateTech / Energy", "Hardware / Semiconductors"]},
{"id": "tiger_global_bot", "name": "Tiger Global", "speed_modifier": 0.5, "valuation_cap": 3.0, "primary_sectors": ["Enterprise SaaS", "Consumer Social"], "secondary_sectors": ["AI/ML Foundation Models", "FinTech / Web3"]},
{"id": "benchmark_bot", "name": "Benchmark", "speed_modifier": 1.5, "valuation_cap": 1.1, "primary_sectors": ["Consumer Social", "Enterprise SaaS"], "secondary_sectors": ["Agentic AI / Automation", "FinTech / Web3"]},
{"id": "index_ventures_bot", "name": "Index Ventures", "speed_modifier": 1.0, "valuation_cap": 1.3, "primary_sectors": ["Enterprise SaaS", "FinTech / Web3"], "secondary_sectors": ["Consumer Social", "EdTech / Future of Work"]},
{"id": "lux_bot", "name": "Lux Capital", "speed_modifier": 1.1, "valuation_cap": 1.4, "primary_sectors": ["DeepTech / Defense", "BioTech / Therapeutics"], "secondary_sectors": ["Hardware / Semiconductors", "Robotics / Automation"]},
{"id": "micro_fund_alpha", "name": "Alpha MicroFund", "speed_modifier": 0.7, "valuation_cap": 0.8, "primary_sectors": ["Consumer Social", "EdTech / Future of Work"], "secondary_sectors": ["Enterprise SaaS", "Agentic AI / Automation"]},
{"id": "micro_fund_beta", "name": "Beta MicroFund", "speed_modifier": 0.6, "valuation_cap": 0.9, "primary_sectors": ["Robotics / Automation", "Hardware / Semiconductors"], "secondary_sectors": ["DeepTech / Defense", "ClimateTech / Energy"]},
{"id": "y_combinator_bot", "name": "Y Combinator", "speed_modifier": 0.4, "valuation_cap": 0.7, "primary_sectors": ["Agentic AI / Automation", "AI/ML Foundation Models"], "secondary_sectors": ["Enterprise SaaS", "Consumer Social", "FinTech / Web3"]}
]
with open("competitors.json", "w") as f:
json.dump(competitors, f, indent=2)
print(f"Generated {len(competitors)} competitors.")
def generate_fund_scenarios(count=1000):
sectors = [
"AI/ML Foundation Models",
"Agentic AI / Automation",
"BioTech / Therapeutics",
"DeepTech / Defense",
"Consumer Social",
"FinTech / Web3",
"ClimateTech / Energy",
"Robotics / Automation",
"EdTech / Future of Work",
"Hardware / Semiconductors"
]
first_names = ["Alice", "Bob", "Charlie", "Diana", "Ethan", "Faye", "George", "Hannah", "Ian", "Jane", "Kevin", "Laura", "Mike", "Nina", "Oscar", "Peggy", "Uma", "Viktor", "Wendy", "Zane"]
company_prefixes = ["Quantum", "Aura", "Nebula", "Apex", "Vanguard", "Zenith", "Nova", "Stratos", "Omni", "Nex", "Data", "Bio", "Cranium", "Cyber", "Astro", "Alpha", "Flux", "Core", "Grid", "Pioneer"]
company_suffixes = ["AI", "Dynamics", "Systems", "Labs", "Networks", "Technologies", "Solutions", "Health", "Space", "Genomics", "Robotics", "Analytics", "Agents", "Finance", "Energy", "Chip"]
scenarios = []
for _ in range(count):
sector = random.choice(sectors)
# Valuation logic based on sector
if sector in ["AI/ML Foundation Models", "Agentic AI / Automation"]:
raise_amount = random.randint(3, 15) * 1000000
fair_eval = raise_amount * random.uniform(5.0, 10.0)
true_potential_mult = random.uniform(0.1, 15.0) # High variance
elif sector in ["Enterprise SaaS", "EdTech / Future of Work", "Robotics / Automation"]:
raise_amount = random.randint(1, 4) * 1000000
fair_eval = raise_amount * random.uniform(3.0, 6.0)
true_potential_mult = random.uniform(0.5, 4.0) # Moderate variance
elif sector in ["BioTech / Therapeutics", "ClimateTech / Energy", "Hardware / Semiconductors"]:
raise_amount = random.randint(5, 20) * 1000000
fair_eval = raise_amount * random.uniform(2.0, 4.0)
true_potential_mult = random.choice([0.0, 0.0, 0.5, 5.0, 20.0]) # Boom or bust, capital intensive
elif sector in ["DeepTech / Defense"]:
raise_amount = random.randint(2, 10) * 1000000
fair_eval = raise_amount * random.uniform(3.0, 8.0)
true_potential_mult = random.uniform(0.0, 10.0)
else: # Consumer Social & FinTech
raise_amount = random.randint(1, 5) * 1000000
fair_eval = raise_amount * random.uniform(2.0, 6.0)
true_potential_mult = random.choice([0.0, 0.0, 0.0, 0.5, 1.0, 50.0])
fair_eval = round(fair_eval, -5) # Round to nearest 100k
founder_name = random.choice(first_names)
company_name = f"{random.choice(company_prefixes)} {random.choice(company_suffixes)}"
# Founder Priorities
priority_roll = random.random()
if priority_roll < 0.33:
p_val, p_board, p_speed = True, False, False
hint = f"I'm looking for a premium valuation. We won't take less than ${int((fair_eval * 0.9)/1000000)}M pre-money."
elif priority_roll < 0.66:
p_val, p_board, p_speed = False, True, False
hint = "We aren't optimizing for every last dollar of valuation. We just want to retain 2 board seats so we control our destiny."
else:
p_val, p_board, p_speed = False, False, True
hint = "We are running low on runway. We need to close this round extremely fast. We will take a fair price if you move now."
# Cap Table logic (30% chance of messy options trap)
cap_table = [
{"Shareholder": f"Founder {founder_name}", "Shares": random.randint(4000000, 8000000), "Type": "Common"}
]
if random.random() > 0.5:
cap_table.append({"Shareholder": "Seed Investors", "Shares": random.randint(1000000, 3000000), "Type": "Preferred"})
if random.random() > 0.7:
cap_table.append({"Shareholder": "Unissued Option Pool", "Shares": random.randint(1000000, 2500000), "Type": "Options"})
else:
cap_table.append({"Shareholder": "Employee Option Pool", "Shares": random.randint(500000, 1000000), "Type": "Options"})
scenario = {
"startup_id": str(uuid.uuid4())[:8],
"startup_name": company_name,
"sector": sector,
"raise_amount_str": f"${int(raise_amount/1000000)}M",
"raise_amount": raise_amount,
"true_potential_multiplier": round(true_potential_mult, 2),
"cap_table": cap_table,
"founder_priorities": {
"prioritize_valuation": p_val,
"prioritize_board_control": p_board,
"prioritize_speed": p_speed
},
"founder_hints": {
"valuation_hint": hint
},
"win_conditions": {
"max_board_seats": 1 if p_board else 3,
"min_valuation": fair_eval * (0.9 if p_val else 0.6),
"fair_valuation": fair_eval,
"overpaid_valuation": fair_eval * 1.3,
"max_turns": 4 if p_speed else 8
}
}
scenarios.append(scenario)
with open("fund_scenarios.json", "w") as f:
json.dump(scenarios, f, indent=2)
print(f"Generated {count} fund scenarios.")
if __name__ == "__main__":
generate_competitors()
generate_fund_scenarios(1000)