hxia7 commited on
Commit
2767c41
·
verified ·
1 Parent(s): 1806a1b

Deploy S4-FIFO FastAPI artifact

Browse files
.gitattributes CHANGED
@@ -1,35 +1,2 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ models/ensemble_models.joblib filter=lfs diff=lfs merge=lfs -text
2
+ cost_matrix.npy filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update \
6
+ && apt-get install -y --no-install-recommends libgomp1 \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ COPY . .
13
+
14
+ EXPOSE 7860
15
+
16
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,58 @@
1
  ---
2
- title: S4fifo Api
3
- emoji: 🏢
4
- colorFrom: yellow
5
- colorTo: gray
6
  sdk: docker
7
- pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: S4-FIFO Parameter Prediction API
 
 
 
3
  sdk: docker
4
+ app_port: 7860
5
  ---
6
 
7
+ # S4-FIFO Parameter Prediction API
8
+
9
+ This Docker Space exposes the S4-FIFO control-plane inference artifact as a FastAPI service.
10
+
11
+ The service accepts one 73-dimensional cache-level feature vector and returns:
12
+
13
+ - the risk-minimizing S4-FIFO class and parameter set
14
+ - the top candidates by model probability
15
+ - the top candidates by expected risk
16
+
17
+ ## Endpoints
18
+
19
+ - `GET /health`
20
+ - `GET /metadata`
21
+ - `POST /predict`
22
+ - `GET /docs`
23
+
24
+ ## Request Example
25
+
26
+ ```bash
27
+ curl -X POST "https://<username>-<space-name>.hf.space/predict" \
28
+ -H "Content-Type: application/json" \
29
+ -d '{
30
+ "features": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
31
+ "top_k": 3
32
+ }'
33
+ ```
34
+
35
+ ## Artifact Notes
36
+
37
+ This Space uses the full 20-model LightGBM ensemble from `analysis/xgb_18class_rerun_local/ensemble_models.pkl`, stored as a compressed joblib artifact under `models/ensemble_models.joblib`.
38
+
39
+ The service performs data-driven risk-minimizing inference with `cost_matrix.npy`, matching the training-side RMI logic:
40
+
41
+ ```text
42
+ expected_risk[predicted_class] = cost_matrix[predicted_class] @ class_probabilities
43
+ ```
44
+
45
+ The compressed model artifact is large, so the first request after a cold start can take time while the model is loaded. A smaller dependency-free m2cgen artifact would require training/exporting a lite 73-feature model; the existing header-only lite export in `CacheLib/cachelib/allocator/s4fifo_model` uses a 75-feature model and is therefore not wired into this 73-feature API.
46
+
47
+ ## Deploy to Hugging Face Spaces
48
+
49
+ Create a Docker Space named `s4fifo-api`, then upload this directory as the Space root:
50
+
51
+ ```bash
52
+ cd s4fifo-api
53
+ python -m pip install -U huggingface_hub
54
+ huggingface-cli login
55
+ huggingface-cli upload <username>/s4fifo-api . --repo-type space
56
+ ```
57
+
58
+ For non-interactive upload, set `HF_TOKEN` in your shell instead of committing it to the repository.
cost_matrix.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b7e9b16ec77e4de4e72718b98fb1cdc819da5442de8a362f6479d28a8a3644
3
+ size 2720
main.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ from fastapi import FastAPI, HTTPException
4
+ from pydantic import BaseModel, Field
5
+
6
+ from predictor import N_FEATURES, get_metadata, predict_from_features
7
+
8
+
9
+ app = FastAPI(
10
+ title="S4-FIFO Parameter Prediction API",
11
+ version="0.1.0",
12
+ description="Online control-plane inference artifact for S4-FIFO parameter selection.",
13
+ )
14
+
15
+
16
+ class PredictRequest(BaseModel):
17
+ features: list[float] = Field(
18
+ ...,
19
+ description="73-dimensional cache-level feature vector in the training feature order.",
20
+ min_length=N_FEATURES,
21
+ max_length=N_FEATURES,
22
+ )
23
+ top_k: int = Field(
24
+ default=3,
25
+ ge=1,
26
+ le=18,
27
+ description="Number of probability/risk-ranked candidate configurations to return.",
28
+ )
29
+
30
+
31
+ @app.get("/")
32
+ def root() -> dict[str, Any]:
33
+ return {
34
+ "service": "S4-FIFO Parameter Prediction API",
35
+ "version": app.version,
36
+ "endpoints": {
37
+ "health": "/health",
38
+ "metadata": "/metadata",
39
+ "predict": "POST /predict",
40
+ "docs": "/docs",
41
+ },
42
+ }
43
+
44
+
45
+ @app.get("/health")
46
+ def health() -> dict[str, str]:
47
+ return {"status": "ok"}
48
+
49
+
50
+ @app.get("/metadata")
51
+ def metadata() -> dict[str, Any]:
52
+ return get_metadata()
53
+
54
+
55
+ @app.post("/predict")
56
+ def predict(req: PredictRequest) -> dict[str, Any]:
57
+ if len(req.features) != N_FEATURES:
58
+ raise HTTPException(
59
+ status_code=400,
60
+ detail=f"Expected {N_FEATURES} features, got {len(req.features)}",
61
+ )
62
+ return predict_from_features(req.features, top_k=req.top_k)
model_metadata.json ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_source": "analysis/xgb_18class_rerun_local/ensemble_models.pkl",
3
+ "model_type": "LightGBM multiclass ensemble",
4
+ "n_models": 20,
5
+ "n_features": 73,
6
+ "n_classes": 18,
7
+ "feature_columns": [
8
+ "H_g",
9
+ "H_m",
10
+ "H_s",
11
+ "decay_rate_small",
12
+ "entropy_gap",
13
+ "ghost_pressure",
14
+ "hist_ghost_0",
15
+ "hist_ghost_1",
16
+ "hist_ghost_10",
17
+ "hist_ghost_11",
18
+ "hist_ghost_12",
19
+ "hist_ghost_13",
20
+ "hist_ghost_14",
21
+ "hist_ghost_15",
22
+ "hist_ghost_16",
23
+ "hist_ghost_17",
24
+ "hist_ghost_18",
25
+ "hist_ghost_19",
26
+ "hist_ghost_2",
27
+ "hist_ghost_3",
28
+ "hist_ghost_4",
29
+ "hist_ghost_5",
30
+ "hist_ghost_6",
31
+ "hist_ghost_7",
32
+ "hist_ghost_8",
33
+ "hist_ghost_9",
34
+ "hist_main_0",
35
+ "hist_main_1",
36
+ "hist_main_10",
37
+ "hist_main_11",
38
+ "hist_main_12",
39
+ "hist_main_13",
40
+ "hist_main_14",
41
+ "hist_main_15",
42
+ "hist_main_16",
43
+ "hist_main_17",
44
+ "hist_main_18",
45
+ "hist_main_19",
46
+ "hist_main_2",
47
+ "hist_main_3",
48
+ "hist_main_4",
49
+ "hist_main_5",
50
+ "hist_main_6",
51
+ "hist_main_7",
52
+ "hist_main_8",
53
+ "hist_main_9",
54
+ "hist_small_0",
55
+ "hist_small_1",
56
+ "hist_small_10",
57
+ "hist_small_11",
58
+ "hist_small_12",
59
+ "hist_small_13",
60
+ "hist_small_14",
61
+ "hist_small_15",
62
+ "hist_small_16",
63
+ "hist_small_17",
64
+ "hist_small_18",
65
+ "hist_small_19",
66
+ "hist_small_2",
67
+ "hist_small_3",
68
+ "hist_small_4",
69
+ "hist_small_5",
70
+ "hist_small_6",
71
+ "hist_small_7",
72
+ "hist_small_8",
73
+ "hist_small_9",
74
+ "probation_efficiency",
75
+ "rho_onehit",
76
+ "rho_unique",
77
+ "scan_intensity",
78
+ "tail_heaviness",
79
+ "thrashing_risk",
80
+ "total_reqs"
81
+ ],
82
+ "parameter_sets": [
83
+ {
84
+ "class": 0,
85
+ "rho_s": 0.2,
86
+ "tau_s": 1,
87
+ "tau_g": 0,
88
+ "rho_g": 3.0
89
+ },
90
+ {
91
+ "class": 1,
92
+ "rho_s": 0.05,
93
+ "tau_s": 1,
94
+ "tau_g": 0,
95
+ "rho_g": 0.9
96
+ },
97
+ {
98
+ "class": 2,
99
+ "rho_s": 0.5,
100
+ "tau_s": 1,
101
+ "tau_g": 0,
102
+ "rho_g": 0.9
103
+ },
104
+ {
105
+ "class": 3,
106
+ "rho_s": 0.2,
107
+ "tau_s": 1,
108
+ "tau_g": 0,
109
+ "rho_g": 0.9
110
+ },
111
+ {
112
+ "class": 4,
113
+ "rho_s": 0.05,
114
+ "tau_s": 2,
115
+ "tau_g": 0,
116
+ "rho_g": 6.0
117
+ },
118
+ {
119
+ "class": 5,
120
+ "rho_s": 0.1,
121
+ "tau_s": 2,
122
+ "tau_g": 1,
123
+ "rho_g": 3.0
124
+ },
125
+ {
126
+ "class": 6,
127
+ "rho_s": 0.3,
128
+ "tau_s": 2,
129
+ "tau_g": 0,
130
+ "rho_g": 3.0
131
+ },
132
+ {
133
+ "class": 7,
134
+ "rho_s": 0.05,
135
+ "tau_s": 2,
136
+ "tau_g": 0,
137
+ "rho_g": 3.0
138
+ },
139
+ {
140
+ "class": 8,
141
+ "rho_s": 0.1,
142
+ "tau_s": 2,
143
+ "tau_g": 0,
144
+ "rho_g": 0.9
145
+ },
146
+ {
147
+ "class": 9,
148
+ "rho_s": 0.7,
149
+ "tau_s": 1,
150
+ "tau_g": 1,
151
+ "rho_g": 0.9
152
+ },
153
+ {
154
+ "class": 10,
155
+ "rho_s": 0.2,
156
+ "tau_s": 1,
157
+ "tau_g": 1,
158
+ "rho_g": 0.9
159
+ },
160
+ {
161
+ "class": 11,
162
+ "rho_s": 0.05,
163
+ "tau_s": 1,
164
+ "tau_g": 1,
165
+ "rho_g": 0.9
166
+ },
167
+ {
168
+ "class": 12,
169
+ "rho_s": 0.3,
170
+ "tau_s": 1,
171
+ "tau_g": 0,
172
+ "rho_g": 6.0
173
+ },
174
+ {
175
+ "class": 13,
176
+ "rho_s": 0.2,
177
+ "tau_s": 2,
178
+ "tau_g": 0,
179
+ "rho_g": 0.9
180
+ },
181
+ {
182
+ "class": 14,
183
+ "rho_s": 0.9,
184
+ "tau_s": 2,
185
+ "tau_g": 0,
186
+ "rho_g": 3.0
187
+ },
188
+ {
189
+ "class": 15,
190
+ "rho_s": 0.1,
191
+ "tau_s": 2,
192
+ "tau_g": 0,
193
+ "rho_g": 6.0
194
+ },
195
+ {
196
+ "class": 16,
197
+ "rho_s": 0.3,
198
+ "tau_s": 2,
199
+ "tau_g": 1,
200
+ "rho_g": 3.0
201
+ },
202
+ {
203
+ "class": 17,
204
+ "rho_s": 0.05,
205
+ "tau_s": 2,
206
+ "tau_g": 0,
207
+ "rho_g": 0.9
208
+ }
209
+ ],
210
+ "cost_matrix_shape": [
211
+ 18,
212
+ 18
213
+ ]
214
+ }
models/ensemble_models.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73bc403ead3e52a4462a2ff1732ec93f3ecd064e8b4a7e99c11089fdd568e8ed
3
+ size 372092377
predictor.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import warnings
5
+ from functools import lru_cache
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import joblib
10
+ import numpy as np
11
+
12
+
13
+ APP_DIR = Path(__file__).resolve().parent
14
+ MODEL_PATH = APP_DIR / "models" / "ensemble_models.joblib"
15
+ COST_MATRIX_PATH = APP_DIR / "cost_matrix.npy"
16
+ METADATA_PATH = APP_DIR / "model_metadata.json"
17
+
18
+
19
+ with METADATA_PATH.open() as f:
20
+ _METADATA = json.load(f)
21
+
22
+ N_FEATURES = int(_METADATA["n_features"])
23
+ N_CLASSES = int(_METADATA["n_classes"])
24
+ PARAMETER_SETS = _METADATA["parameter_sets"]
25
+
26
+
27
+ @lru_cache(maxsize=1)
28
+ def _load_models() -> list[Any]:
29
+ return joblib.load(MODEL_PATH)
30
+
31
+
32
+ @lru_cache(maxsize=1)
33
+ def _load_cost_matrix() -> np.ndarray:
34
+ costs = np.load(COST_MATRIX_PATH)
35
+ if costs.shape != (N_CLASSES, N_CLASSES):
36
+ raise ValueError(f"Expected cost matrix {(N_CLASSES, N_CLASSES)}, got {costs.shape}")
37
+ return costs.astype(np.float64, copy=False)
38
+
39
+
40
+ def get_metadata() -> dict[str, Any]:
41
+ return {
42
+ "model_type": _METADATA["model_type"],
43
+ "model_source": _METADATA["model_source"],
44
+ "n_models": _METADATA["n_models"],
45
+ "n_features": N_FEATURES,
46
+ "n_classes": N_CLASSES,
47
+ "feature_columns": _METADATA["feature_columns"],
48
+ "parameter_sets": PARAMETER_SETS,
49
+ "cost_matrix_shape": _METADATA["cost_matrix_shape"],
50
+ }
51
+
52
+
53
+ def _predict_probabilities(features: list[float]) -> np.ndarray:
54
+ x = np.asarray(features, dtype=np.float64).reshape(1, -1)
55
+ probs = np.zeros(N_CLASSES, dtype=np.float64)
56
+
57
+ for model in _load_models():
58
+ with warnings.catch_warnings():
59
+ warnings.filterwarnings("ignore", message="X does not have valid feature names")
60
+ model_probs = np.asarray(model.predict_proba(x)[0], dtype=np.float64)
61
+ if model_probs.shape[0] == N_CLASSES:
62
+ probs += model_probs
63
+ continue
64
+
65
+ full_probs = np.zeros(N_CLASSES, dtype=np.float64)
66
+ classes = getattr(model, "classes_", [])
67
+ for src_idx, class_id in enumerate(classes):
68
+ full_probs[int(class_id)] = model_probs[src_idx]
69
+ probs += full_probs
70
+
71
+ probs /= len(_load_models())
72
+ total = probs.sum()
73
+ if total > 0:
74
+ probs /= total
75
+ return probs
76
+
77
+
78
+ def _ranked_entries(indices: np.ndarray, probs: np.ndarray, risks: np.ndarray) -> list[dict[str, Any]]:
79
+ return [
80
+ {
81
+ "class": int(i),
82
+ "probability": float(probs[i]),
83
+ "expected_risk": float(risks[i]),
84
+ "params": PARAMETER_SETS[int(i)],
85
+ }
86
+ for i in indices
87
+ ]
88
+
89
+
90
+ def predict_from_features(features: list[float], top_k: int = 3) -> dict[str, Any]:
91
+ probs = _predict_probabilities([float(v) for v in features])
92
+ risks = _load_cost_matrix() @ probs
93
+ selected_idx = int(np.argmin(risks))
94
+ probability_idx = np.argsort(probs)[::-1][:top_k]
95
+ risk_idx = np.argsort(risks)[:top_k]
96
+ probability_argmax = int(np.argmax(probs))
97
+
98
+ return {
99
+ "selected_class": selected_idx,
100
+ "selected_params": PARAMETER_SETS[selected_idx],
101
+ "selection_method": "minimum_expected_risk",
102
+ "probability_argmax_class": probability_argmax,
103
+ "probability_argmax_params": PARAMETER_SETS[probability_argmax],
104
+ "top_by_probability": _ranked_entries(probability_idx, probs, risks),
105
+ "top_by_expected_risk": _ranked_entries(risk_idx, probs, risks),
106
+ }
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.115.6
2
+ uvicorn[standard]==0.34.0
3
+ numpy==2.4.5
4
+ joblib==1.5.3
5
+ lightgbm==4.6.0
6
+ scikit-learn==1.8.0