File size: 7,733 Bytes
aaef24a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8eccedb
aaef24a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8eccedb
aaef24a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f006b5f
aaef24a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# /// script
# requires-python = ">=3.13"
# dependencies = [
#     "altair",
#     "asimpy",
#     "marimo",
#     "polars==1.24.0",
# ]
# ///

import marimo

__generated_with = "0.20.4"
app = marimo.App(width="medium")


@app.cell(hide_code=True)
def _():
    import marimo as mo
    import random
    import statistics

    import altair as alt
    import polars as pl

    from asimpy import Environment, Process, Resource

    return Environment, Process, Resource, alt, mo, pl, random, statistics


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    # M/M/1 Queue Nonlinearity

    ## *The 90% Utilization Trap*

    A single server handles jobs that arrive randomly and take a random amount of time to process. If both inter-arrival times and service times follow exponential distributions, this is called an *M/M/1 queue*, and is the simplest model in queueing theory.

    Managers often treat utilization linearly: "90% busy is only a little worse than 80% busy." The M/M/1 formula shows this intuition is badly wrong. The mean number of jobs in the system (waiting and being served) is:

    $$L = \frac{\rho}{1 - \rho}$$

    where $\rho = \lambda / \mu$ is the utilization ratio (arrival rate divided by service rate). The mean time a job spends in the system follows from Little's Law $L = \lambda W$:

    $$W = \frac{1}{\mu - \lambda} = \frac{1}{\mu(1 - \rho)}$$

    The denominator $(1 - \rho)$ causes both $L$ and $W$ to blow up as $\rho \to 1$.

    | $\rho$ | $L = \rho/(1-\rho)$ | Marginal $\Delta L$ per 0.1 step |
    |-------:|--------------------:|--------------------------------:|
    |  0.50  |               1.00  |                            —    |
    |  0.60  |               1.50  |                          +0.50  |
    |  0.70  |               2.33  |                          +0.83  |
    |  0.80  |               4.00  |                          +1.67  |
    |  0.90  |               9.00  |                          +5.00  |

    Each equal step in $\rho$ produces a larger jump in queue length than the previous step. Going from 80% to 90% utilization adds more queue length than going from 0% to 80% combined. This happens because the queue is stabilized by the gaps in service capacity. When $\rho = 0.9$, only 10% of capacity is slack. Any random burst of arrivals takes far longer to drain than when $\rho = 0.5$ and 50% of capacity is slack. The system spends most of its time recovering from bursts rather than idling.
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Implementation

    The simulation uses a single `Resource(capacity=1)` as the server. A generator process creates customers with inter-arrival gaps drawn from $\text{Exp}(\lambda)$. Each customer records its arrival time, waits to acquire the server, receives $\text{Exp}(\mu)$ service, and logs its total sojourn time. The mean queue length $L$ is computed via Little's Law from the observed mean sojourn time.
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    sim_time_slider = mo.ui.slider(
        start=0,
        stop=100_000,
        step=1_000,
        value=20_000,
        label="Simulation time",
    )

    service_rate_slider = mo.ui.slider(
        start=1.0,
        stop=5.0,
        step=0.01,
        value=1.0,
        label="Service rate",
    )

    seed_input = mo.ui.number(
        value=192,
        step=1,
        label="Random seed",
    )

    run_button = mo.ui.run_button(label="Run simulation")

    mo.vstack([
        sim_time_slider,
        service_rate_slider,
        seed_input,
        run_button,
    ])
    return seed_input, service_rate_slider, sim_time_slider


@app.cell
def _(seed_input, service_rate_slider, sim_time_slider):
    SIM_TIME = int(sim_time_slider.value)
    SERVICE_RATE = float(service_rate_slider.value)
    SEED = int(seed_input.value)
    return SEED, SERVICE_RATE, SIM_TIME


@app.cell
def _(Process, random):
    class Customer(Process):
        def init(self, server, service_rate, sojourn_times):
            self.server = server
            self.service_rate = service_rate
            self.sojourn_times = sojourn_times

        async def run(self):
            arrival = self.now
            async with self.server:
                await self.timeout(random.expovariate(self.service_rate))
            self.sojourn_times.append(self.now - arrival)

    return (Customer,)


@app.cell
def _(Customer, Process, random):
    class ArrivalStream(Process):
        def init(self, arrival_rate, service_rate, server, sojourn_times):
            self.arrival_rate = arrival_rate
            self.service_rate = service_rate
            self.server = server
            self.sojourn_times = sojourn_times

        async def run(self):
            while True:
                await self.timeout(random.expovariate(self.arrival_rate))
                Customer(self._env, self.server, self.service_rate, self.sojourn_times)

    return (ArrivalStream,)


@app.cell
def _(
    ArrivalStream,
    Environment,
    Resource,
    SERVICE_RATE,
    SIM_TIME,
    statistics,
):
    def simulate(rho):
        arrival_rate = rho * SERVICE_RATE
        sojourn_times = []
        env = Environment()
        server = Resource(env, capacity=1)
        ArrivalStream(env, arrival_rate, SERVICE_RATE, server, sojourn_times)
        env.run(until=SIM_TIME)
        mean_W = statistics.mean(sojourn_times) if sojourn_times else 0.0
        sim_L = arrival_rate * mean_W
        theory_L = rho / (1.0 - rho)
        return sim_L, theory_L

    return (simulate,)


@app.cell(hide_code=True)
def _(mo):
    mo.md("""
    ## Simulated vs. Theoretical Queue Length
    """)
    return


@app.cell
def _(SEED, pl, random, simulate):
    def sweep():
        random.seed(SEED)
        rhos = [0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95]
        sweep_rows = []
        for rho in rhos:
            sim_L, theory_L = simulate(rho)
            pct = 100.0 * (sim_L - theory_L) / theory_L
            sweep_rows.append({"rho": rho, "theory_L": theory_L, "sim_L": sim_L, "pct_error": pct})
        return pl.DataFrame(sweep_rows)

    df_sweep = sweep()
    df_sweep
    return (df_sweep,)


@app.cell(hide_code=True)
def _(mo):
    mo.md("""
    ## Marginal Increase in L per 0.1 Step in ρ (Theory)
    """)
    return


@app.cell
def _(pl):
    def marginal():
        marginal_rows = []
        prev_L, prev_rho = None, None
        for rho in [0.5, 0.6, 0.7, 0.8, 0.9]:
            theory_L = rho / (1.0 - rho)
            if prev_L is not None:
                marginal_rows.append({"rho_from": prev_rho, "rho_to": rho, "delta_L": round(theory_L - prev_L, 4)})
            prev_L, prev_rho = theory_L, rho
        return pl.DataFrame(marginal_rows)

    df_marginal = marginal()
    df_marginal
    return


@app.cell
def _(alt, df_sweep):
    df_plot = df_sweep.unpivot(
        on=["theory_L", "sim_L"], index="rho", variable_name="source", value_name="L"
    )
    chart = (
        alt.Chart(df_plot)
        .mark_line(point=True)
        .encode(
            x=alt.X("rho:Q", title="Utilization (ρ)"),
            y=alt.Y("L:Q", title="Mean queue length (L)"),
            color=alt.Color("source:N", title="Source"),
            tooltip=["rho:Q", "source:N", "L:Q"],
        )
        .properties(title="M/M/1 Queue Length vs. Utilization")
    )
    chart
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Key Takeaway

    For any system approximated by an M/M/1 queue, **never target utilization above 80–85%** if low latency matters. The last few percent of throughput come at an enormous cost in queue length and wait time.
    """)
    return


if __name__ == "__main__":
    app.run()