Spaces:

marimo-team
/

marimo-learn

Runtime error

File size: 9,401 Bytes

# /// script
# requires-python = ">=3.13"
# dependencies = [
#     "altair",
#     "asimpy",
#     "marimo",
#     "polars==1.24.0",
# ]
# ///

import marimo

__generated_with = "0.20.4"
app = marimo.App(width="medium")


@app.cell(hide_code=True)
def _():
    import marimo as mo
    import random
    import statistics

    import altair as alt
    import polars as pl

    from asimpy import Environment, Process, Resource

    return Environment, Process, Resource, alt, mo, pl, random, statistics


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    # Pooled vs. Separate Queues

    ## *Why Airports Switched to Single Lines*

    A facility has two identical servers. Customers arrive as a Poisson process and each needs one server for an exponentially distributed service time. Which queueing discipline should the facility use?

    - Separate queues: each server has its own dedicated line; customers randomly pick a line on arrival and cannot switch.
    - Pooled queue: a single shared line feeds whichever server becomes free first.

    It turns out that pooling the queues is always better, even though both systems have identical total arrival rate, identical per-server service rate, and identical utilization $\rho$. The pooled system consistently produces shorter mean wait times, often by a factor of two or more at moderate utilization. The reason is that separate queues waste servers' idle time. In separate queues, one server may be idle while customers wait in the other line. Pooling eliminates this mismatch: a free server always serves the next waiting customer.

    ### Why Separate Queues Persist

    Despite being provably worse, separate queues feel fairer because customers can see their progress. Single lines eliminate the anxiety of watching the other queue move faster, but historically customers resisted them until airlines and banks demonstrated the improvement empirically in the 1960s–70s.
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Implementation

    This tutorial explores this finding using two simulations run with identical random seeds:

    1. Pooled: `Resource(capacity=2)` with one arrival stream. The resource grants access to whichever capacity slot is free.
    2. Separate: two `Resource(capacity=1)` instances. Arrivals call `random.choice` to pick a server and cannot switch even if it is slower.

    The mean sojourn time is collected across a sweep of utilization levels $\rho$.
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    sim_time_slider = mo.ui.slider(
        start=0,
        stop=100_000,
        step=1_000,
        value=20_000,
        label="Simulation time",
    )

    arrival_rate_slider = mo.ui.slider(
        start=0.5,
        stop=1.9,
        step=0.05,
        value=1.8,
        label="Arrival rate",
    )

    seed_input = mo.ui.number(
        value=192,
        step=1,
        label="Random seed",
    )

    run_button = mo.ui.run_button(label="Run simulation")

    mo.vstack([
        sim_time_slider,
        arrival_rate_slider,
        seed_input,
        run_button,
    ])
    return arrival_rate_slider, seed_input, sim_time_slider


@app.cell
def _(arrival_rate_slider, seed_input, sim_time_slider):
    SIM_TIME = int(sim_time_slider.value)
    ARRIVAL_RATE = float(arrival_rate_slider.value)
    SEED = int(seed_input.value)
    SERVICE_RATE = 1.0
    N_SERVERS = 2
    RHO = ARRIVAL_RATE / (N_SERVERS * SERVICE_RATE)
    return ARRIVAL_RATE, N_SERVERS, RHO, SEED, SERVICE_RATE, SIM_TIME


@app.cell
def _(Process, SERVICE_RATE, random):
    class Customer(Process):
        def init(self, server, sojourn_times):
            self.server = server
            self.sojourn_times = sojourn_times

        async def run(self):
            arrival = self.now
            async with self.server:
                await self.timeout(random.expovariate(SERVICE_RATE))
            self.sojourn_times.append(self.now - arrival)

    return (Customer,)


@app.cell
def _(Customer, Process, random):
    class PooledArrivals(Process):
        def init(self, arrival_rate, server, sojourn_times):
            self.arrival_rate = arrival_rate
            self.server = server
            self.sojourn_times = sojourn_times

        async def run(self):
            while True:
                await self.timeout(random.expovariate(self.arrival_rate))
                Customer(self._env, self.server, self.sojourn_times)

    return (PooledArrivals,)


@app.cell
def _(Customer, Process, random):
    class SeparateArrivals(Process):
        def init(self, arrival_rate, servers, sojourn_times):
            self.arrival_rate = arrival_rate
            self.servers = servers
            self.sojourn_times = sojourn_times

        async def run(self):
            while True:
                await self.timeout(random.expovariate(self.arrival_rate))
                server = random.choice(self.servers)
                Customer(self._env, server, self.sojourn_times)

    return (SeparateArrivals,)


@app.cell
def _(
    ARRIVAL_RATE,
    Environment,
    N_SERVERS,
    PooledArrivals,
    Resource,
    SEED,
    SIM_TIME,
    random,
    statistics,
):
    def run_pooled(arrival_rate=ARRIVAL_RATE):
        random.seed(SEED)
        sojourn_times = []
        env = Environment()
        shared_server = Resource(env, capacity=N_SERVERS)
        PooledArrivals(env, arrival_rate, shared_server, sojourn_times)
        env.run(until=SIM_TIME)
        return statistics.mean(sojourn_times)

    return (run_pooled,)


@app.cell
def _(
    ARRIVAL_RATE,
    Environment,
    N_SERVERS,
    Resource,
    SEED,
    SIM_TIME,
    SeparateArrivals,
    random,
    statistics,
):
    def run_separate(arrival_rate=ARRIVAL_RATE):
        random.seed(SEED)
        sojourn_times = []
        env = Environment()
        servers = [Resource(env, capacity=1) for _ in range(N_SERVERS)]
        SeparateArrivals(env, arrival_rate, servers, sojourn_times)
        env.run(until=SIM_TIME)
        return statistics.mean(sojourn_times)

    return (run_separate,)


@app.cell
def _(ARRIVAL_RATE, N_SERVERS, SERVICE_RATE, pl, run_pooled, run_separate):
    def sweep():
        sweep_rows = []
        for rho in [0.5, 0.6, 0.7, 0.8, 0.9]:
            rate = rho * N_SERVERS * SERVICE_RATE
            pw = run_pooled(arrival_rate=rate)
            sw = run_separate(arrival_rate=rate)
            sweep_rows.append({"rho": rho, "pooled_W": pw, "separate_W": sw, "ratio": sw / pw})
        return pl.DataFrame(sweep_rows)

    df_sweep = sweep()
    pooled_W = run_pooled(arrival_rate=ARRIVAL_RATE)
    separate_W = run_separate(arrival_rate=ARRIVAL_RATE)
    return df_sweep, pooled_W, separate_W


@app.cell(hide_code=True)
def _(N_SERVERS, RHO, SERVICE_RATE, mo, pooled_W, separate_W):
    mo.md(f"""
    ## Results

    {N_SERVERS} servers, service rate {SERVICE_RATE}, utilisation ρ = {RHO:.2f}

    At ρ = {RHO:.2f}: pooled W = {pooled_W:.3f}, separate W = {separate_W:.3f}
    — separate queues are **{separate_W / pooled_W:.2f}×** slower
    """)
    return


@app.cell
def _(df_sweep):
    df_sweep
    return


@app.cell
def _(alt, df_sweep):
    df_plot = df_sweep.unpivot(
        on=["pooled_W", "separate_W"], index="rho", variable_name="system", value_name="W"
    )
    chart = (
        alt.Chart(df_plot)
        .mark_line(point=True)
        .encode(
            x=alt.X("rho:Q", title="Utilization per server (ρ)"),
            y=alt.Y("W:Q", title="Mean sojourn time (W)"),
            color=alt.Color("system:N", title="Queue type"),
            tooltip=["rho:Q", "system:N", "W:Q"],
        )
        .properties(title="Pooled vs. Separate Queues: Mean Sojourn Time")
    )
    chart
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Understanding the Math

    ### Why pooling always wins

    Two separate M/M/1 queues each running at utilization $\rho$ have mean sojourn time $W_{\text{sep}} = 1/(\mu(1-\rho))$. A pooled M/M/2 queue with the same total arrival rate has strictly lower mean sojourn time for every value of $0 < \rho < 1$. The proof uses the [Erlang-C formula](https://en.wikipedia.org/wiki/Erlang_(unit)#Erlang_C_formula), but the intuition is simpler: pooling converts two independent random processes into one, and the combined queue can exploit any idle capacity instantly. At $\rho = 0.8$, separate queues give roughly twice the mean wait of a pooled queue.

    ### Connection to variance reduction

    Think of the service delivered in a time window by two separate servers as two independent random variables $X_1$ and $X_2$. Their average $(X_1 + X_2)/2$ has variance $\sigma^2/2$, which is half the variance of either component alone. Pooling achieves something similar: by combining demand into one stream served by both servers, the system smooths out random fluctuations. The pooled queue is, in effect, averaging over both servers' idle periods instead of locking each idle period to a single lane.

    ### Rule of thumb

    At $\rho = 0.8$, separate queues produce roughly double the mean wait of a pooled queue. This factor grows as $\rho$ increases, because the $(1-\rho)$ term in the denominator amplifies any wasted capacity. The lesson: whenever you can route demand flexibly to a shared resource, do it.
    """)
    return


if __name__ == "__main__":
    app.run()