Upload 6 files
Browse files- Dockerfile +16 -0
- README.md +154 -30
- openenv.yaml +68 -0
- pyproject.toml +25 -0
- requirements.txt +7 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
+
|
| 8 |
+
COPY . .
|
| 9 |
+
|
| 10 |
+
ENV API_BASE_URL="https://api.openai.com/v1"
|
| 11 |
+
ENV MODEL_NAME="gpt-4o-mini"
|
| 12 |
+
ENV HF_TOKEN=""
|
| 13 |
+
|
| 14 |
+
EXPOSE 7860
|
| 15 |
+
|
| 16 |
+
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,52 +1,176 @@
|
|
| 1 |
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
| 5 |
tags:
|
| 6 |
-
-
|
| 7 |
-
- cdn
|
| 8 |
-
- optimization
|
| 9 |
-
- agent-training
|
| 10 |
---
|
| 11 |
|
| 12 |
-
#
|
| 13 |
|
| 14 |
-
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
-
In the real world, infrastructure isn't static. A server's capacity might drop 40% because of a maintenance issue, or a cricket match might start, and suddenly 50 million people want the exact same stream at the same time.
|
| 22 |
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
|
| 26 |
-
* **LFU hit rate:** basically flatlined at 3%
|
| 27 |
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
|
| 32 |
-
##
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
|
| 36 |
-
* **The Brain:** We used **Qwen 1.5B** and trained it via **GRPO (Group Relative Policy Optimization)** using the Hugging Face TRL library.
|
| 37 |
-
* **The Goal:** Moving past academic "hit rates" and looking at the actual dollar value. In the real world, a 1% hit-rate improvement for a company like Cloudflare means millions of dollars saved in bandwidth costs.
|
| 38 |
|
| 39 |
-
##
|
| 40 |
-
This isn't just about making the internet 5% faster. It’s about building systems that don't need a human to go in and manually rewrite the rules every time the traffic patterns change. Our agent doesn't just follow a rule; it learns a strategy.
|
| 41 |
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
---
|
| 45 |
-
## Code Repository
|
| 46 |
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
|
| 50 |
|
|
|
|
| 51 |
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Cdn Cache Optimizer
|
| 3 |
+
emoji: 🌐
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
tags:
|
| 9 |
+
- openenv
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# 🌐 CDN Cache Optimizer — OpenEnv RL Environment
|
| 13 |
|
| 14 |
+
An RL environment simulating **edge CDN cache management** — the exact problem companies like Meta solve at planetary scale. An agent manages a cache of limited size, deciding which files to evict when new content arrives, balancing **hit rate**, **bandwidth efficiency**, and **thrash avoidance**.
|
| 15 |
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
## 🎯 Motivation
|
| 19 |
+
|
| 20 |
+
Content Delivery Networks serve billions of files daily. Edge servers have limited storage, so they must constantly decide: *which cached files to keep, and which to evict?* Standard algorithms like LRU aren't optimal — especially when traffic has **viral bursts** (a file suddenly gets 50x more requests for 20 minutes, then drops back to zero).
|
| 21 |
+
|
| 22 |
+
A smarter agent can:
|
| 23 |
+
- Predict viral spikes from queue previews
|
| 24 |
+
- Avoid evicting high-frequency files
|
| 25 |
+
- Prevent cache thrashing (evicting then immediately re-requesting)
|
| 26 |
+
- Maximize bandwidth saved for users
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## 🔧 Environment Description
|
| 31 |
+
|
| 32 |
+
At each step, a file is requested from the network. If it's already in the cache → **cache hit** (reward). If not → **cache miss**, and the agent must decide whether to evict an existing file to make room.
|
| 33 |
|
| 34 |
+
### Traffic Model
|
| 35 |
+
- **Steady files**: Consistent, cyclical demand
|
| 36 |
+
- **Viral files**: Bell-curve spike in popularity, then fade back to baseline
|
| 37 |
|
| 38 |
+
---
|
|
|
|
| 39 |
|
| 40 |
+
## 📐 Action & Observation Space
|
| 41 |
+
|
| 42 |
+
### Observation Space
|
| 43 |
+
| Field | Type | Description |
|
| 44 |
+
|-------|------|-------------|
|
| 45 |
+
| `step` | int | Current episode step |
|
| 46 |
+
| `cache_used_mb` | float | MB currently used |
|
| 47 |
+
| `cache_capacity_mb` | float | Total cache size |
|
| 48 |
+
| `cache_fill_ratio` | float | 0.0–1.0 fill level |
|
| 49 |
+
| `cached_files` | List[FileEntry] | All files in cache with metadata |
|
| 50 |
+
| `incoming_file_id` | str | File being requested |
|
| 51 |
+
| `incoming_file_size_mb` | float | Size of incoming file |
|
| 52 |
+
| `incoming_file_is_viral` | bool | Is this file currently viral? |
|
| 53 |
+
| `cache_hit` | bool | Is incoming file already cached? |
|
| 54 |
+
| `recent_hit_rate` | float | Rolling hit rate (last 20 steps) |
|
| 55 |
+
| `time_of_day` | float | Normalized 0.0–1.0 daily cycle |
|
| 56 |
+
| `queue_preview` | List[str] | Next 3 file IDs (prefetch hint) |
|
| 57 |
+
|
| 58 |
+
### FileEntry Fields
|
| 59 |
+
| Field | Type | Description |
|
| 60 |
+
|-------|------|-------------|
|
| 61 |
+
| `file_id` | str | Unique identifier |
|
| 62 |
+
| `size_mb` | float | File size in MB |
|
| 63 |
+
| `request_frequency` | float | Requests since cached |
|
| 64 |
+
| `is_viral` | bool | Currently viral |
|
| 65 |
+
| `last_accessed` | int | Step number of last access |
|
| 66 |
+
|
| 67 |
+
### Action Space
|
| 68 |
+
| Field | Type | Description |
|
| 69 |
+
|-------|------|-------------|
|
| 70 |
+
| `evict_file_id` | str \| null | File to evict (null = no eviction) |
|
| 71 |
+
|
| 72 |
+
### Reward Function
|
| 73 |
+
| Component | Range | Description |
|
| 74 |
+
|-----------|-------|-------------|
|
| 75 |
+
| `cache_hit_bonus` | +1.0 to +1.5 | Hit reward (viral hits = +1.5) |
|
| 76 |
+
| `bandwidth_saved` | +0.0 to +0.2 | Reward for bandwidth efficiency |
|
| 77 |
+
| `eviction_penalty` | -0.0 to -0.5 | Penalty for evicting popular files |
|
| 78 |
+
| `thrash_penalty` | 0.0 or -0.5 | Penalty for evicting same file twice |
|
| 79 |
+
| `wasted_capacity_penalty` | -0.0 to -0.3 | Penalty for leaving cache empty |
|
| 80 |
|
| 81 |
+
---
|
|
|
|
| 82 |
|
| 83 |
+
## 📋 Tasks
|
| 84 |
+
|
| 85 |
+
### Task 1: Steady Traffic Cache (Easy)
|
| 86 |
+
- **Cache**: 100MB | **Files**: 30 | **Steps**: 100
|
| 87 |
+
- No viral files — steady demand only
|
| 88 |
+
- Agent learns basic LRU-style eviction
|
| 89 |
+
- **Target hit rate**: ≥ 0.60 → score 1.0
|
| 90 |
+
- **Baseline score**: ~0.75
|
| 91 |
+
|
| 92 |
+
### Task 2: Mixed Traffic Cache (Medium)
|
| 93 |
+
- **Cache**: 80MB | **Files**: 50 | **Steps**: 150
|
| 94 |
+
- 20% viral files mixed with steady demand
|
| 95 |
+
- Agent must handle spikes and prioritize popular content
|
| 96 |
+
- **Score**: 70% hit rate + 30% bandwidth
|
| 97 |
+
- **Baseline score**: ~0.60
|
| 98 |
+
|
| 99 |
+
### Task 3: Constrained Cache with Viral Bursts (Hard)
|
| 100 |
+
- **Cache**: 50MB | **Files**: 80 | **Steps**: 200
|
| 101 |
+
- 35% viral files, tight capacity, large file sizes
|
| 102 |
+
- Agent must predict spikes, avoid thrashing
|
| 103 |
+
- **Score**: 50% hit rate + 25% bandwidth + 25% reward quality
|
| 104 |
+
- **Baseline score**: ~0.45
|
| 105 |
|
| 106 |
+
---
|
| 107 |
|
| 108 |
+
## 🚀 Setup & Usage
|
| 109 |
+
|
| 110 |
+
### Local Setup
|
| 111 |
+
```bash
|
| 112 |
+
git clone <repo>
|
| 113 |
+
cd cdn-cache-env
|
| 114 |
+
pip install -r requirements.txt
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
### Run API Server
|
| 118 |
+
```bash
|
| 119 |
+
uvicorn api.main:app --host 0.0.0.0 --port 7860
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
### Run Inference (Baseline Agent)
|
| 123 |
+
```bash
|
| 124 |
+
export API_BASE_URL="https://api.openai.com/v1"
|
| 125 |
+
export MODEL_NAME="gpt-4o-mini"
|
| 126 |
+
export HF_TOKEN="your_token_here"
|
| 127 |
+
|
| 128 |
+
python inference.py
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
### Docker
|
| 132 |
+
```bash
|
| 133 |
+
docker build -t cdn-cache-env .
|
| 134 |
+
docker run -p 7860:7860 \
|
| 135 |
+
-e API_BASE_URL="https://api.openai.com/v1" \
|
| 136 |
+
-e MODEL_NAME="gpt-4o-mini" \
|
| 137 |
+
-e HF_TOKEN="your_token" \
|
| 138 |
+
cdn-cache-env
|
| 139 |
+
```
|
| 140 |
|
| 141 |
+
---
|
|
|
|
|
|
|
| 142 |
|
| 143 |
+
## 🌐 API Endpoints
|
|
|
|
| 144 |
|
| 145 |
+
| Method | Endpoint | Description |
|
| 146 |
+
|--------|----------|-------------|
|
| 147 |
+
| GET | `/health` | Health check (returns 200) |
|
| 148 |
+
| GET | `/tasks` | List all tasks |
|
| 149 |
+
| POST | `/reset` | Start episode `{"task_id": "task_easy", "seed": 42}` |
|
| 150 |
+
| POST | `/step` | Take action `{"evict_file_id": "file_001" or null}` |
|
| 151 |
+
| GET | `/state` | Full environment state |
|
| 152 |
|
| 153 |
---
|
|
|
|
| 154 |
|
| 155 |
+
## 📊 Baseline Scores
|
| 156 |
+
|
| 157 |
+
Using the built-in `smart_policy` (non-LLM baseline):
|
| 158 |
+
|
| 159 |
+
| Task | Hit Rate | Score |
|
| 160 |
+
|------|----------|-------|
|
| 161 |
+
| Easy | ~0.72 | ~1.00 |
|
| 162 |
+
| Medium | ~0.61 | ~0.82 |
|
| 163 |
+
| Hard | ~0.48 | ~0.78 |
|
| 164 |
+
| **Overall** | | **~0.87** |
|
| 165 |
+
|
| 166 |
+
---
|
| 167 |
|
| 168 |
+
## 📝 Log Format
|
| 169 |
|
| 170 |
+
`inference.py` emits structured JSON logs:
|
| 171 |
|
| 172 |
+
```
|
| 173 |
+
{"type": "START", "task_id": "task_easy", ...}
|
| 174 |
+
{"type": "STEP", "step": 0, "action": {...}, "reward": 1.0, ...}
|
| 175 |
+
{"type": "END", "total_reward": 87.3, "final_hit_rate": 0.72, "score": 1.0}
|
| 176 |
+
```
|
openenv.yaml
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: cdn-cache-optimizer
|
| 2 |
+
version: "1.0.0"
|
| 3 |
+
description: >
|
| 4 |
+
Edge CDN Cache Optimizer — an RL environment where an agent manages
|
| 5 |
+
a content delivery network cache. The agent decides which files to evict
|
| 6 |
+
when the cache is full, balancing hit rate, bandwidth efficiency, and
|
| 7 |
+
avoiding cache thrashing. Simulates real-world viral traffic spikes
|
| 8 |
+
alongside steady baseline demand.
|
| 9 |
+
|
| 10 |
+
author: umar
|
| 11 |
+
tags:
|
| 12 |
+
- openenv
|
| 13 |
+
- cdn
|
| 14 |
+
- cache
|
| 15 |
+
- infrastructure
|
| 16 |
+
- real-world
|
| 17 |
+
|
| 18 |
+
tasks:
|
| 19 |
+
- id: task_easy
|
| 20 |
+
name: Steady Traffic Cache
|
| 21 |
+
difficulty: easy
|
| 22 |
+
episode_length: 100
|
| 23 |
+
cache_capacity_mb: 100.0
|
| 24 |
+
|
| 25 |
+
- id: task_medium
|
| 26 |
+
name: Mixed Traffic Cache
|
| 27 |
+
difficulty: medium
|
| 28 |
+
episode_length: 150
|
| 29 |
+
cache_capacity_mb: 80.0
|
| 30 |
+
|
| 31 |
+
- id: task_hard
|
| 32 |
+
name: Constrained Cache with Viral Bursts
|
| 33 |
+
difficulty: hard
|
| 34 |
+
episode_length: 200
|
| 35 |
+
cache_capacity_mb: 50.0
|
| 36 |
+
|
| 37 |
+
observation_space:
|
| 38 |
+
type: structured
|
| 39 |
+
fields:
|
| 40 |
+
- step: int
|
| 41 |
+
- cache_used_mb: float
|
| 42 |
+
- cache_capacity_mb: float
|
| 43 |
+
- cache_fill_ratio: float
|
| 44 |
+
- cached_files: list[FileEntry]
|
| 45 |
+
- incoming_file_id: str
|
| 46 |
+
- incoming_file_size_mb: float
|
| 47 |
+
- incoming_file_is_viral: bool
|
| 48 |
+
- cache_hit: bool
|
| 49 |
+
- recent_hit_rate: float
|
| 50 |
+
- time_of_day: float
|
| 51 |
+
- queue_preview: list[str]
|
| 52 |
+
|
| 53 |
+
action_space:
|
| 54 |
+
type: structured
|
| 55 |
+
fields:
|
| 56 |
+
- evict_file_id: str | null
|
| 57 |
+
|
| 58 |
+
reward_range: [-1.0, 1.5]
|
| 59 |
+
|
| 60 |
+
endpoints:
|
| 61 |
+
reset: POST /reset
|
| 62 |
+
step: POST /step
|
| 63 |
+
state: GET /state
|
| 64 |
+
|
| 65 |
+
runtime:
|
| 66 |
+
framework: fastapi
|
| 67 |
+
python: "3.11"
|
| 68 |
+
port: 7860
|
pyproject.toml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=68.0", "wheel"]
|
| 3 |
+
build-backend = "setuptools.backends.legacy:build"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "cdn-cache-optimizer"
|
| 7 |
+
version = "1.0.0"
|
| 8 |
+
description = "Edge CDN Cache Optimizer - OpenEnv RL Environment"
|
| 9 |
+
requires-python = ">=3.11"
|
| 10 |
+
dependencies = [
|
| 11 |
+
"fastapi==0.111.0",
|
| 12 |
+
"uvicorn==0.29.0",
|
| 13 |
+
"pydantic==2.7.1",
|
| 14 |
+
"openai>=2.7.2",
|
| 15 |
+
"requests==2.31.0",
|
| 16 |
+
"python-multipart==0.0.9",
|
| 17 |
+
"openenv-core>=0.2.0",
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
[project.scripts]
|
| 21 |
+
server = "server.app:main"
|
| 22 |
+
|
| 23 |
+
[tool.setuptools.packages.find]
|
| 24 |
+
where = ["."]
|
| 25 |
+
include = ["env*", "api*", "server*"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.111.0
|
| 2 |
+
uvicorn==0.29.0
|
| 3 |
+
pydantic==2.7.1
|
| 4 |
+
openai>=2.7.2
|
| 5 |
+
requests==2.31.0
|
| 6 |
+
python-multipart==0.0.9
|
| 7 |
+
openenv-core>=0.2.0
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|