Spaces:
Runtime error
Runtime error
File size: 7,733 Bytes
aaef24a 8eccedb aaef24a 8eccedb aaef24a f006b5f aaef24a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 | # /// script
# requires-python = ">=3.13"
# dependencies = [
# "altair",
# "asimpy",
# "marimo",
# "polars==1.24.0",
# ]
# ///
import marimo
__generated_with = "0.20.4"
app = marimo.App(width="medium")
@app.cell(hide_code=True)
def _():
import marimo as mo
import random
import statistics
import altair as alt
import polars as pl
from asimpy import Environment, Process, Resource
return Environment, Process, Resource, alt, mo, pl, random, statistics
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
# M/M/1 Queue Nonlinearity
## *The 90% Utilization Trap*
A single server handles jobs that arrive randomly and take a random amount of time to process. If both inter-arrival times and service times follow exponential distributions, this is called an *M/M/1 queue*, and is the simplest model in queueing theory.
Managers often treat utilization linearly: "90% busy is only a little worse than 80% busy." The M/M/1 formula shows this intuition is badly wrong. The mean number of jobs in the system (waiting and being served) is:
$$L = \frac{\rho}{1 - \rho}$$
where $\rho = \lambda / \mu$ is the utilization ratio (arrival rate divided by service rate). The mean time a job spends in the system follows from Little's Law $L = \lambda W$:
$$W = \frac{1}{\mu - \lambda} = \frac{1}{\mu(1 - \rho)}$$
The denominator $(1 - \rho)$ causes both $L$ and $W$ to blow up as $\rho \to 1$.
| $\rho$ | $L = \rho/(1-\rho)$ | Marginal $\Delta L$ per 0.1 step |
|-------:|--------------------:|--------------------------------:|
| 0.50 | 1.00 | — |
| 0.60 | 1.50 | +0.50 |
| 0.70 | 2.33 | +0.83 |
| 0.80 | 4.00 | +1.67 |
| 0.90 | 9.00 | +5.00 |
Each equal step in $\rho$ produces a larger jump in queue length than the previous step. Going from 80% to 90% utilization adds more queue length than going from 0% to 80% combined. This happens because the queue is stabilized by the gaps in service capacity. When $\rho = 0.9$, only 10% of capacity is slack. Any random burst of arrivals takes far longer to drain than when $\rho = 0.5$ and 50% of capacity is slack. The system spends most of its time recovering from bursts rather than idling.
""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Implementation
The simulation uses a single `Resource(capacity=1)` as the server. A generator process creates customers with inter-arrival gaps drawn from $\text{Exp}(\lambda)$. Each customer records its arrival time, waits to acquire the server, receives $\text{Exp}(\mu)$ service, and logs its total sojourn time. The mean queue length $L$ is computed via Little's Law from the observed mean sojourn time.
""")
return
@app.cell(hide_code=True)
def _(mo):
sim_time_slider = mo.ui.slider(
start=0,
stop=100_000,
step=1_000,
value=20_000,
label="Simulation time",
)
service_rate_slider = mo.ui.slider(
start=1.0,
stop=5.0,
step=0.01,
value=1.0,
label="Service rate",
)
seed_input = mo.ui.number(
value=192,
step=1,
label="Random seed",
)
run_button = mo.ui.run_button(label="Run simulation")
mo.vstack([
sim_time_slider,
service_rate_slider,
seed_input,
run_button,
])
return seed_input, service_rate_slider, sim_time_slider
@app.cell
def _(seed_input, service_rate_slider, sim_time_slider):
SIM_TIME = int(sim_time_slider.value)
SERVICE_RATE = float(service_rate_slider.value)
SEED = int(seed_input.value)
return SEED, SERVICE_RATE, SIM_TIME
@app.cell
def _(Process, random):
class Customer(Process):
def init(self, server, service_rate, sojourn_times):
self.server = server
self.service_rate = service_rate
self.sojourn_times = sojourn_times
async def run(self):
arrival = self.now
async with self.server:
await self.timeout(random.expovariate(self.service_rate))
self.sojourn_times.append(self.now - arrival)
return (Customer,)
@app.cell
def _(Customer, Process, random):
class ArrivalStream(Process):
def init(self, arrival_rate, service_rate, server, sojourn_times):
self.arrival_rate = arrival_rate
self.service_rate = service_rate
self.server = server
self.sojourn_times = sojourn_times
async def run(self):
while True:
await self.timeout(random.expovariate(self.arrival_rate))
Customer(self._env, self.server, self.service_rate, self.sojourn_times)
return (ArrivalStream,)
@app.cell
def _(
ArrivalStream,
Environment,
Resource,
SERVICE_RATE,
SIM_TIME,
statistics,
):
def simulate(rho):
arrival_rate = rho * SERVICE_RATE
sojourn_times = []
env = Environment()
server = Resource(env, capacity=1)
ArrivalStream(env, arrival_rate, SERVICE_RATE, server, sojourn_times)
env.run(until=SIM_TIME)
mean_W = statistics.mean(sojourn_times) if sojourn_times else 0.0
sim_L = arrival_rate * mean_W
theory_L = rho / (1.0 - rho)
return sim_L, theory_L
return (simulate,)
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Simulated vs. Theoretical Queue Length
""")
return
@app.cell
def _(SEED, pl, random, simulate):
def sweep():
random.seed(SEED)
rhos = [0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95]
sweep_rows = []
for rho in rhos:
sim_L, theory_L = simulate(rho)
pct = 100.0 * (sim_L - theory_L) / theory_L
sweep_rows.append({"rho": rho, "theory_L": theory_L, "sim_L": sim_L, "pct_error": pct})
return pl.DataFrame(sweep_rows)
df_sweep = sweep()
df_sweep
return (df_sweep,)
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Marginal Increase in L per 0.1 Step in ρ (Theory)
""")
return
@app.cell
def _(pl):
def marginal():
marginal_rows = []
prev_L, prev_rho = None, None
for rho in [0.5, 0.6, 0.7, 0.8, 0.9]:
theory_L = rho / (1.0 - rho)
if prev_L is not None:
marginal_rows.append({"rho_from": prev_rho, "rho_to": rho, "delta_L": round(theory_L - prev_L, 4)})
prev_L, prev_rho = theory_L, rho
return pl.DataFrame(marginal_rows)
df_marginal = marginal()
df_marginal
return
@app.cell
def _(alt, df_sweep):
df_plot = df_sweep.unpivot(
on=["theory_L", "sim_L"], index="rho", variable_name="source", value_name="L"
)
chart = (
alt.Chart(df_plot)
.mark_line(point=True)
.encode(
x=alt.X("rho:Q", title="Utilization (ρ)"),
y=alt.Y("L:Q", title="Mean queue length (L)"),
color=alt.Color("source:N", title="Source"),
tooltip=["rho:Q", "source:N", "L:Q"],
)
.properties(title="M/M/1 Queue Length vs. Utilization")
)
chart
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Key Takeaway
For any system approximated by an M/M/1 queue, **never target utilization above 80–85%** if low latency matters. The last few percent of throughput come at an enormous cost in queue length and wait time.
""")
return
if __name__ == "__main__":
app.run()
|