vc_gemini_v0 / server /vc_gemini_v0_environment.py
shrads78's picture
Upload folder using huggingface_hub
efe6a6d verified
import os
import json
import random
import tempfile
import csv
import shutil
from uuid import uuid4
from typing import Dict, Any, List
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
try:
from vc_gemini_v0.models import VcGeminiV0Action, VcGeminiV0Observation
except ImportError:
try:
from ..models import VcGeminiV0Action, VcGeminiV0Observation
except ImportError:
from models import VcGeminiV0Action, VcGeminiV0Observation
class VcGeminiV0Environment(Environment):
SUPPORTS_CONCURRENT_SESSIONS: bool = True
def __init__(self):
self._state = State(episode_id=str(uuid4()), step_count=0)
self.workspace_dir = tempfile.mkdtemp(prefix="vc_env_v0_")
self.fund_budget = 100000000.0
self.portfolio = []
self.quarter = 1
self.MAX_QUARTERS = 2 # 1 playable quarter, Quarter 2 is payout
self.MAX_TURNS_PER_QUARTER = 5
self.turns_remaining = self.MAX_TURNS_PER_QUARTER
# Load Datasets
base_dir = os.path.dirname(os.path.dirname(__file__))
scenario_path = os.path.join(base_dir, "fund_scenarios.json")
with open(scenario_path, "r") as f:
self.scenarios = json.load(f)
comp_path = os.path.join(base_dir, "competitors.json")
with open(comp_path, "r") as f:
self.competitors = json.load(f)
self.available_scenarios = []
self.active_competitors = []
self.inbox_pitches = []
def reset(self) -> VcGeminiV0Observation:
self._state = State(episode_id=str(uuid4()), step_count=0)
self.fund_budget = 100000000.0
self.portfolio = []
self.quarter = 1
# 1 random rival for the episode (Curriculum V0 simplification)
self.active_competitors = random.sample(self.competitors, 1)
# 3 random startups for the single quarter
self.available_scenarios = random.sample(self.scenarios, 3)
return self._setup_quarter()
def _mark_to_market(self):
"""Updates the paper valuation of the portfolio and returns the interim reward (RVPI)."""
interim_reward = 0.0
for inv in self.portfolio:
if not inv["active"]:
continue
# Simulate a funding round event every quarter
if random.random() < 0.3: # 30% chance for a valuation event
# Up round vs Down round bias based on their true potential
if inv["true_potential_multiplier"] > 1.0:
rvpi_bump = random.uniform(0.1, 0.5)
inv["paper_multiplier"] += rvpi_bump
interim_reward += (rvpi_bump * 0.1) # Soft reward for good paper marks
elif inv["true_potential_multiplier"] == 0.0:
rvpi_drop = random.uniform(-0.1, -0.9)
inv["paper_multiplier"] = max(0.1, inv["paper_multiplier"] + rvpi_drop)
interim_reward += (rvpi_drop * 0.1)
return interim_reward
def _setup_quarter(self) -> VcGeminiV0Observation:
if self.quarter >= self.MAX_QUARTERS:
return self._calculate_final_tvpi()
# Clean up old Data Rooms
if os.path.exists(self.workspace_dir):
shutil.rmtree(self.workspace_dir, ignore_errors=True)
self.workspace_dir = tempfile.mkdtemp(prefix=f"vc_q{self.quarter}_")
self.turns_remaining = self.MAX_TURNS_PER_QUARTER
# Draw 3 Pitches for this quarter
start_idx = (self.quarter - 1) * 3
self.inbox_pitches = self.available_scenarios[start_idx:start_idx+3]
pitch_names = []
# Setup Data Rooms for the 3 Pitches
for scenario in self.inbox_pitches:
startup_dir = os.path.join(self.workspace_dir, scenario["startup_name"].replace(" ", "_"))
os.makedirs(startup_dir, exist_ok=True)
cap_table_path = os.path.join(startup_dir, "cap_table.csv")
with open(cap_table_path, "w", newline='') as f:
writer = csv.writer(f)
writer.writerow(["Shareholder", "Shares", "Type"])
for row in scenario["cap_table"]:
writer.writerow([row["Shareholder"], row["Shares"], row["Type"]])
deck_path = os.path.join(startup_dir, "pitch_deck.txt")
with open(deck_path, "w") as f:
f.write(f"{scenario['startup_name']} Pitch Deck\n")
f.write(f"Sector: {scenario['sector']}\n")
f.write(f"We are raising {scenario['raise_amount_str']}.\n")
f.write("Note: Email the founder if you have diligence questions.\n")
pitch_names.append(f"- {scenario['startup_name']} ({scenario['sector']})")
# Build Portfolio Status String
port_status = "Empty"
if self.portfolio:
port_items = []
for p in self.portfolio:
status = "ACTIVE" if p["active"] else "SOLD"
val = p['invested_amount'] * p['paper_multiplier']
port_items.append(f"{p['startup_name']} [{status}]: Paper Value ${val:,.2f} ({p['paper_multiplier']:.2f}x)")
port_status = "\n".join(port_items)
comps_str = ", ".join([c["name"] for c in self.active_competitors])
obs_text = (
f"--- QUARTER {self.quarter} (Turns Remaining: {self.turns_remaining}) ---\n"
f"Fund Budget Remaining: ${self.fund_budget:,.2f}\n"
f"Active Portfolio:\n{port_status}\n\n"
f"Market Rumor: Active Rival Funds this decade are {comps_str}.\n\n"
f"New Pitches in Inbox:\n" + "\n".join(pitch_names) + "\n\n"
f"Their Data Rooms are mounted inside: {self.workspace_dir}. "
f"You can 'read_file', 'invest', or 'wait'."
)
# Calculate Mark to Market for interim rewards
interim_reward = self._mark_to_market() if self.quarter > 1 else 0.0
return VcGeminiV0Observation(
observation_text=obs_text,
inbox=[],
data={"workspace_dir": self.workspace_dir, "quarter": self.quarter, "budget": self.fund_budget, "turns_left": self.turns_remaining},
done=False,
reward=interim_reward
)
def _calculate_final_tvpi(self) -> VcGeminiV0Observation:
total_returned_capital = self._run_ipo_phase()
total_fund_value = total_returned_capital + self.fund_budget
tvpi = total_fund_value / 100000000.0
# Opportunity Cost / Hurdle Penalty
if tvpi < 1.20:
final_reward = tvpi - 1.0 # Soft penalty for missing hurdle
else:
final_reward = tvpi
summary = "\n=== FUND LIFE COMPLETE (1 QUARTER) ===\n"
summary += f"Total Liquid Capital Returned: ${total_fund_value:,.2f}\n"
summary += f"Gross Fund TVPI: {tvpi:.2f}x\n"
if tvpi < 1.20:
summary += "\nLPs are disappointed. You failed to beat the hurdle rate."
else:
summary += "\nLPs are ecstatic. You successfully managed the fund!"
return VcGeminiV0Observation(
observation_text=summary,
inbox=[],
data={"tvpi": tvpi, "final_reward": final_reward, "portfolio": self.portfolio},
done=True,
reward=final_reward
)
def _run_ipo_phase(self) -> float:
total = 0.0
for p in self.portfolio:
if p["active"]:
exit_value = p["invested_amount"] * p["true_potential_multiplier"]
total += exit_value
p["active"] = False
p["paper_multiplier"] = p["true_potential_multiplier"]
else:
pass
return total
def _get_target_scenario(self, target_name: str):
target = target_name.lower().replace(" ", "")
if not target:
return None
for s in self.inbox_pitches:
name = s["startup_name"].lower().replace(" ", "")
# Flexible matching: either substring exists in the other
if name in target or target in name:
return s
return None
def step(self, action: VcGeminiV0Action) -> VcGeminiV0Observation:
self._state.step_count += 1
a_type = action.action_type
params = action.parameters
# Logging for HF Debugging
print(f"[ENV LOG] Action: {a_type}, Params: {params}")
obs_text = ""
inbox_msgs = []
data_res = {}
# Consume Turn Budget
self.turns_remaining -= 1
if self.turns_remaining <= 0:
# Time's up for the quarter
obs_text = "You ran out of Time (Turns) for this Quarter. The remaining startups in your inbox raised capital from Rivals. "
self.quarter += 1
obs = self._setup_quarter()
obs.observation_text = obs_text + "\n\n" + obs.observation_text
return obs
# Ensure action targets a specific startup if required
startup_name = params.get("startup_name", "")
scen = self._get_target_scenario(startup_name)
if a_type in ["invest", "read_file"]:
# For read_file, startup_name might be optional if path is absolute but
# usually it's used to find the right directory.
if a_type == "invest" and not scen:
print(f"[ENV LOG] ERROR: Startup '{startup_name}' not found in inbox.")
return VcGeminiV0Observation(
observation_text=f"Error: Could not find startup matching '{startup_name}' in your current Quarter inbox.",
inbox=[], data={}, done=False, reward=0.0
)
if a_type == "read_file":
path = params.get("path", "")
# Be forgiving with spaces vs underscores in the startup directory name
path = path.replace(" ", "_")
if not os.path.isabs(path):
path = os.path.join(self.workspace_dir, path)
if os.path.exists(path) and os.path.isfile(path):
with open(path, "r") as f:
content = f.read()
obs_text = f"Read {path} successfully."
data_res["file_content"] = content
else:
obs_text = f"File not found: {path}"
elif a_type == "invest":
amount = 20000000.0
if amount > self.fund_budget:
obs_text = f"You don't have ${amount} left in your fund! You only have ${self.fund_budget}. The deal fell through."
else:
# Deal won! Add to portfolio
equity_percent = 0.10 # Flat 10%
self.portfolio.append({
"startup_name": scen["startup_name"],
"invested_amount": amount,
"equity_percent": equity_percent,
"paper_multiplier": 1.0, # Starts at 1.0x Cost
"true_potential_multiplier": scen["true_potential_multiplier"],
"active": True
})
self.fund_budget -= amount
self.inbox_pitches = [p for p in self.inbox_pitches if p["startup_id"] != scen["startup_id"]]
obs_text = f"Founder says: 'We accept your investment!' You invested ${amount:,.2f} for {equity_percent*100:.1f}% equity in {scen['startup_name']}."
if not self.inbox_pitches:
self.quarter += 1
obs = self._setup_quarter()
obs.observation_text = obs_text + "\n\n" + obs.observation_text
return obs
elif a_type == "wait":
obs_text = "You waited a turn."
else:
obs_text = f"Invalid action_type: {a_type}"
return VcGeminiV0Observation(
observation_text=obs_text,
inbox=inbox_msgs,
data=data_res,
done=False,
reward=0.0,
metadata={"step": self._state.step_count, "quarter": self.quarter, "turns_left": self.turns_remaining}
)
@property
def state(self) -> State:
return self._state