| """Online baseline (no OFO control) simulation using real GPUs. |
| |
| Connects to live vLLM servers and zeusd instances for hardware-in-the-loop |
| baseline measurement. Power readings from a small number of real GPUs are |
| augmented to datacenter scale using the shared InferencePowerAugmenter pipeline. |
| |
| Two modes correspond to two baselines: |
| no-tap Fixed tap positions throughout. |
| tap-change Regulator taps change at scheduled times. |
| |
| Edit the deployment definitions in config.json to match your cluster. |
| |
| Usage: |
| python examples/online/run_baseline.py --config examples/online/config.json |
| python examples/online/run_baseline.py --config examples/online/config.json --mode tap-change |
| """ |
|
|
| from __future__ import annotations |
|
|
| import hashlib |
| import json |
| import logging |
| from fractions import Fraction |
| from pathlib import Path |
|
|
| from pydantic import BaseModel |
|
|
| from openg2g.controller.tap_schedule import TapScheduleController |
| from openg2g.coordinator import Coordinator |
| from openg2g.datacenter.config import DatacenterConfig, PowerAugmentationConfig |
| from openg2g.datacenter.online import ( |
| LiveServerConfig, |
| OnlineDatacenter, |
| VLLMDeployment, |
| ) |
| from openg2g.datacenter.workloads.inference import MLEnergySource, RequestsConfig, RequestStore |
| from openg2g.grid.config import TapPosition, TapSchedule |
| from openg2g.grid.opendss import OpenDSSGrid |
| from openg2g.metrics.voltage import compute_allbus_voltage_stats |
|
|
| logger = logging.getLogger("run_baseline") |
|
|
| |
| TAP_STEP = 0.00625 |
| INITIAL_TAPS = TapPosition(a=1.0 + 14 * TAP_STEP, b=1.0 + 6 * TAP_STEP, c=1.0 + 15 * TAP_STEP) |
| TAP_CHANGE_SCHEDULE = ( |
| TapPosition(a=1.0 + 16 * TAP_STEP, b=1.0 + 6 * TAP_STEP, c=1.0 + 17 * TAP_STEP).at(t=25 * 60) |
| | TapPosition(a=1.0 + 10 * TAP_STEP, b=1.0 + 6 * TAP_STEP, c=1.0 + 10 * TAP_STEP).at(t=55 * 60) |
| ) |
| |
|
|
| V_MIN = 0.95 |
| V_MAX = 1.05 |
| DC_BUS = "671" |
| GPUS_PER_SERVER = 8 |
| DT_DC = Fraction(1, 10) |
| DT_CTRL = Fraction(1) |
| T_TOTAL_S = 3600 |
|
|
|
|
| class OnlineConfig(BaseModel): |
| deployments: list[VLLMDeployment] |
| requests: RequestsConfig = RequestsConfig() |
| requests_dir: Path | None = None |
| ieee_case_dir: Path |
| data_dir: Path | None = None |
| data_sources: list[MLEnergySource] = [] |
| mlenergy_data_dir: Path | None = None |
|
|
| @property |
| def requests_hash(self) -> str: |
| blob = json.dumps( |
| (self.requests.model_dump(mode="json"), sorted(d.spec.model_label for d in self.deployments)), |
| sort_keys=True, |
| ).encode() |
| return hashlib.sha256(blob).hexdigest()[:16] |
|
|
| @property |
| def data_hash(self) -> str: |
| blob = json.dumps( |
| (sorted([s.model_dump(mode="json") for s in self.data_sources], key=lambda s: s["model_label"]),), |
| sort_keys=True, |
| ).encode() |
| return hashlib.sha256(blob).hexdigest()[:16] |
|
|
|
|
| def main(*, config_path: Path, mode: str = "no-tap") -> None: |
| config = OnlineConfig.model_validate_json(config_path.read_bytes()) |
|
|
| requests_dir = config.requests_dir or Path("data/online") / config.requests_hash |
|
|
| save_dir = (Path("outputs") / f"online_baseline_{mode}").resolve() |
| save_dir.mkdir(parents=True, exist_ok=True) |
|
|
| file_handler = logging.FileHandler(save_dir / "console_output.txt", mode="w") |
| file_handler.setFormatter(logging.Formatter("%(asctime)s %(name)s %(levelname)s %(message)s", datefmt="%H:%M:%S")) |
| logging.getLogger().addHandler(file_handler) |
|
|
| RequestStore.ensure(requests_dir, [d.spec for d in config.deployments], config.requests) |
|
|
| tap_ctrl_schedule = TAP_CHANGE_SCHEDULE if mode == "tap-change" else TapSchedule(()) |
|
|
| logger.info("Initializing OnlineDatacenter...") |
| dc_config = DatacenterConfig(gpus_per_server=GPUS_PER_SERVER, base_kw_per_phase=500.0) |
| dc = OnlineDatacenter( |
| dc_config, |
| config.deployments, |
| dt_s=DT_DC, |
| seed=0, |
| power_augmentation=PowerAugmentationConfig( |
| amplitude_scale_range=(0.9, 1.1), |
| noise_fraction=0.02, |
| ), |
| live_server=LiveServerConfig( |
| requests_dir=requests_dir, |
| max_output_tokens=config.requests.max_completion_tokens, |
| itl_window_s=1.0, |
| ), |
| ) |
|
|
| logger.info("Initializing OpenDSSGrid...") |
| grid = OpenDSSGrid( |
| dss_case_dir=config.ieee_case_dir, |
| dss_master_file="IEEE13Nodeckt.dss", |
| dc_bus=DC_BUS, |
| dc_bus_kv=4.16, |
| power_factor=dc_config.power_factor, |
| dt_s=Fraction(1, 10), |
| connection_type="wye", |
| initial_tap_position=INITIAL_TAPS, |
| ) |
|
|
| tap_ctrl = TapScheduleController(schedule=tap_ctrl_schedule, dt_s=DT_CTRL) |
|
|
| logger.info("Running online baseline simulation (mode=%s) for %d seconds...", mode, T_TOTAL_S) |
| coord = Coordinator( |
| datacenter=dc, |
| grid=grid, |
| controllers=[tap_ctrl], |
| total_duration_s=T_TOTAL_S, |
| dc_bus=DC_BUS, |
| live=True, |
| ) |
| log = coord.run() |
|
|
| stats = compute_allbus_voltage_stats(log.grid_states, v_min=V_MIN, v_max=V_MAX) |
| logger.info("=== Voltage Statistics (all-bus) ===") |
| logger.info(" voltage_violation_time = %.1f s", stats.violation_time_s) |
| logger.info(" worst_vmin = %.6f", stats.worst_vmin) |
| logger.info(" worst_vmax = %.6f", stats.worst_vmax) |
| logger.info(" integral_violation = %.4f pu·s", stats.integral_violation_pu_s) |
|
|
| logger.info("Outputs saved to: %s", save_dir) |
|
|
|
|
| if __name__ == "__main__": |
| from dataclasses import dataclass |
|
|
| import tyro |
|
|
| @dataclass |
| class Args: |
| config: str |
| """Path to the online config JSON file.""" |
| mode: str = "no-tap" |
| """Baseline variant: 'no-tap' (fixed taps) or 'tap-change' (scheduled tap changes).""" |
| log_level: str = "INFO" |
| """Logging verbosity (DEBUG, INFO, WARNING).""" |
|
|
| args = tyro.cli(Args) |
|
|
| logging.basicConfig( |
| level=getattr(logging, args.log_level), |
| format="%(levelname)s %(asctime)s [%(name)s:%(lineno)d] %(message)s", |
| datefmt="%H:%M:%S", |
| ) |
| logging.getLogger("httpx").setLevel(logging.WARNING) |
|
|
| main(config_path=Path(args.config), mode=args.mode) |
|
|