Spaces:
Running
Running
Commit Β·
3dfb5fe
1
Parent(s): 6938d9f
Remove rate limiter (blocks evaluator) and fix score aggregation to clamped sum
Browse files- README.md +1 -1
- inference.py +2 -2
- server/app.py +2 -31
README.md
CHANGED
|
@@ -203,7 +203,7 @@ entropyenv/
|
|
| 203 |
βββ pyproject.toml # Package configuration
|
| 204 |
βββ Dockerfile # Multi-stage Docker build
|
| 205 |
βββ server/
|
| 206 |
-
β βββ app.py # FastAPI server with
|
| 207 |
β βββ router.py # Task dispatcher with mastery detection
|
| 208 |
β βββ session.py # Episode state management
|
| 209 |
β βββ web_ui.py # Gradio UI with performance dashboard
|
|
|
|
| 203 |
βββ pyproject.toml # Package configuration
|
| 204 |
βββ Dockerfile # Multi-stage Docker build
|
| 205 |
βββ server/
|
| 206 |
+
β βββ app.py # FastAPI server with session management
|
| 207 |
β βββ router.py # Task dispatcher with mastery detection
|
| 208 |
β βββ session.py # Episode state management
|
| 209 |
β βββ web_ui.py # Gradio UI with performance dashboard
|
inference.py
CHANGED
|
@@ -301,8 +301,8 @@ def run_task(client: OpenAI, task_id: str) -> float:
|
|
| 301 |
if done:
|
| 302 |
break
|
| 303 |
|
| 304 |
-
#
|
| 305 |
-
total_reward = sum(rewards)
|
| 306 |
score = round(min(max(total_reward, 0.01), 0.99), 4)
|
| 307 |
success = score > 0.0
|
| 308 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
|
|
|
| 301 |
if done:
|
| 302 |
break
|
| 303 |
|
| 304 |
+
# Clamped sum β accumulate multi-turn rewards, cap at 0.99
|
| 305 |
+
total_reward = sum(rewards) if rewards else 0.01
|
| 306 |
score = round(min(max(total_reward, 0.01), 0.99), 4)
|
| 307 |
success = score > 0.0
|
| 308 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
server/app.py
CHANGED
|
@@ -20,26 +20,6 @@ from .datasets.clinical_cases import CLINICAL_CASES
|
|
| 20 |
|
| 21 |
app = FastAPI(title='Multi-Agent Dev Tools Environment')
|
| 22 |
|
| 23 |
-
from collections import defaultdict
|
| 24 |
-
from time import time
|
| 25 |
-
|
| 26 |
-
# Global rate limiter (simple token bucket)
|
| 27 |
-
RATE_LIMITS = defaultdict(lambda: {'tokens': 10, 'last_refill': time()})
|
| 28 |
-
|
| 29 |
-
def check_rate_limit(ip: str) -> bool:
|
| 30 |
-
"""Returns True if request allowed, False if rate limited."""
|
| 31 |
-
bucket = RATE_LIMITS[ip]
|
| 32 |
-
now = time()
|
| 33 |
-
elapsed = now - bucket['last_refill']
|
| 34 |
-
refill = int(elapsed / 6)
|
| 35 |
-
if refill > 0:
|
| 36 |
-
bucket['tokens'] = min(10, bucket['tokens'] + refill)
|
| 37 |
-
bucket['last_refill'] = now
|
| 38 |
-
if bucket['tokens'] > 0:
|
| 39 |
-
bucket['tokens'] -= 1
|
| 40 |
-
return True
|
| 41 |
-
return False
|
| 42 |
-
|
| 43 |
# ββ Load Debug Panel HTML ββ
|
| 44 |
_DEBUG_HTML_PATH = os.path.join(os.path.dirname(__file__), 'debug_panel.html')
|
| 45 |
|
|
@@ -126,15 +106,6 @@ async def health(request: Request):
|
|
| 126 |
async def reset(request: Request):
|
| 127 |
"""Create a new episode for a task. Returns episode_id + initial observation."""
|
| 128 |
|
| 129 |
-
# Get client IP
|
| 130 |
-
ip = request.client.host if request.client else '127.0.0.1'
|
| 131 |
-
if not check_rate_limit(ip):
|
| 132 |
-
return JSONResponse(status_code=200, content={
|
| 133 |
-
'error': 'Rate limit exceeded. Max 10 requests/minute.',
|
| 134 |
-
'done': True,
|
| 135 |
-
'observation': {},
|
| 136 |
-
})
|
| 137 |
-
|
| 138 |
try:
|
| 139 |
body = await request.json()
|
| 140 |
task_id = body.get('task_id', 'sec_easy')
|
|
@@ -557,8 +528,8 @@ def _run_single_task_inline(task_id, api_base, api_key, model_id, system_prompt)
|
|
| 557 |
logs.append(msg)
|
| 558 |
yield {'type': 'log', 'level': 'info', 'msg': msg}
|
| 559 |
|
| 560 |
-
#
|
| 561 |
-
total_reward = sum(rewards)
|
| 562 |
score = round(min(max(total_reward, 0.01), 0.99), 4)
|
| 563 |
success = score > 0.0
|
| 564 |
rewards_str = ','.join(f'{r:.2f}' for r in rewards)
|
|
|
|
| 20 |
|
| 21 |
app = FastAPI(title='Multi-Agent Dev Tools Environment')
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
# ββ Load Debug Panel HTML ββ
|
| 24 |
_DEBUG_HTML_PATH = os.path.join(os.path.dirname(__file__), 'debug_panel.html')
|
| 25 |
|
|
|
|
| 106 |
async def reset(request: Request):
|
| 107 |
"""Create a new episode for a task. Returns episode_id + initial observation."""
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
try:
|
| 110 |
body = await request.json()
|
| 111 |
task_id = body.get('task_id', 'sec_easy')
|
|
|
|
| 528 |
logs.append(msg)
|
| 529 |
yield {'type': 'log', 'level': 'info', 'msg': msg}
|
| 530 |
|
| 531 |
+
# Clamped sum β same logic as inference.py
|
| 532 |
+
total_reward = sum(rewards) if rewards else 0.01
|
| 533 |
score = round(min(max(total_reward, 0.01), 0.99), 4)
|
| 534 |
success = score > 0.0
|
| 535 |
rewards_str = ','.join(f'{r:.2f}' for r in rewards)
|