immortalindeed commited on
Commit
3dfb5fe
Β·
1 Parent(s): 6938d9f

Remove rate limiter (blocks evaluator) and fix score aggregation to clamped sum

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. inference.py +2 -2
  3. server/app.py +2 -31
README.md CHANGED
@@ -203,7 +203,7 @@ entropyenv/
203
  β”œβ”€β”€ pyproject.toml # Package configuration
204
  β”œβ”€β”€ Dockerfile # Multi-stage Docker build
205
  β”œβ”€β”€ server/
206
- β”‚ β”œβ”€β”€ app.py # FastAPI server with rate limiting & session management
207
  β”‚ β”œβ”€β”€ router.py # Task dispatcher with mastery detection
208
  β”‚ β”œβ”€β”€ session.py # Episode state management
209
  β”‚ β”œβ”€β”€ web_ui.py # Gradio UI with performance dashboard
 
203
  β”œβ”€β”€ pyproject.toml # Package configuration
204
  β”œβ”€β”€ Dockerfile # Multi-stage Docker build
205
  β”œβ”€β”€ server/
206
+ β”‚ β”œβ”€β”€ app.py # FastAPI server with session management
207
  β”‚ β”œβ”€β”€ router.py # Task dispatcher with mastery detection
208
  β”‚ β”œβ”€β”€ session.py # Episode state management
209
  β”‚ β”œβ”€β”€ web_ui.py # Gradio UI with performance dashboard
inference.py CHANGED
@@ -301,8 +301,8 @@ def run_task(client: OpenAI, task_id: str) -> float:
301
  if done:
302
  break
303
 
304
- # Average gives partial credit for completed steps before crash
305
- total_reward = sum(rewards) / max(len(rewards), 1) if rewards else 0.01
306
  score = round(min(max(total_reward, 0.01), 0.99), 4)
307
  success = score > 0.0
308
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
 
301
  if done:
302
  break
303
 
304
+ # Clamped sum β€” accumulate multi-turn rewards, cap at 0.99
305
+ total_reward = sum(rewards) if rewards else 0.01
306
  score = round(min(max(total_reward, 0.01), 0.99), 4)
307
  success = score > 0.0
308
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
server/app.py CHANGED
@@ -20,26 +20,6 @@ from .datasets.clinical_cases import CLINICAL_CASES
20
 
21
  app = FastAPI(title='Multi-Agent Dev Tools Environment')
22
 
23
- from collections import defaultdict
24
- from time import time
25
-
26
- # Global rate limiter (simple token bucket)
27
- RATE_LIMITS = defaultdict(lambda: {'tokens': 10, 'last_refill': time()})
28
-
29
- def check_rate_limit(ip: str) -> bool:
30
- """Returns True if request allowed, False if rate limited."""
31
- bucket = RATE_LIMITS[ip]
32
- now = time()
33
- elapsed = now - bucket['last_refill']
34
- refill = int(elapsed / 6)
35
- if refill > 0:
36
- bucket['tokens'] = min(10, bucket['tokens'] + refill)
37
- bucket['last_refill'] = now
38
- if bucket['tokens'] > 0:
39
- bucket['tokens'] -= 1
40
- return True
41
- return False
42
-
43
  # ── Load Debug Panel HTML ──
44
  _DEBUG_HTML_PATH = os.path.join(os.path.dirname(__file__), 'debug_panel.html')
45
 
@@ -126,15 +106,6 @@ async def health(request: Request):
126
  async def reset(request: Request):
127
  """Create a new episode for a task. Returns episode_id + initial observation."""
128
 
129
- # Get client IP
130
- ip = request.client.host if request.client else '127.0.0.1'
131
- if not check_rate_limit(ip):
132
- return JSONResponse(status_code=200, content={
133
- 'error': 'Rate limit exceeded. Max 10 requests/minute.',
134
- 'done': True,
135
- 'observation': {},
136
- })
137
-
138
  try:
139
  body = await request.json()
140
  task_id = body.get('task_id', 'sec_easy')
@@ -557,8 +528,8 @@ def _run_single_task_inline(task_id, api_base, api_key, model_id, system_prompt)
557
  logs.append(msg)
558
  yield {'type': 'log', 'level': 'info', 'msg': msg}
559
 
560
- # Average rewards β€” same logic as inference.py
561
- total_reward = sum(rewards) / max(len(rewards), 1) if rewards else 0.01
562
  score = round(min(max(total_reward, 0.01), 0.99), 4)
563
  success = score > 0.0
564
  rewards_str = ','.join(f'{r:.2f}' for r in rewards)
 
20
 
21
  app = FastAPI(title='Multi-Agent Dev Tools Environment')
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # ── Load Debug Panel HTML ──
24
  _DEBUG_HTML_PATH = os.path.join(os.path.dirname(__file__), 'debug_panel.html')
25
 
 
106
  async def reset(request: Request):
107
  """Create a new episode for a task. Returns episode_id + initial observation."""
108
 
 
 
 
 
 
 
 
 
 
109
  try:
110
  body = await request.json()
111
  task_id = body.get('task_id', 'sec_easy')
 
528
  logs.append(msg)
529
  yield {'type': 'log', 'level': 'info', 'msg': msg}
530
 
531
+ # Clamped sum β€” same logic as inference.py
532
+ total_reward = sum(rewards) if rewards else 0.01
533
  score = round(min(max(total_reward, 0.01), 0.99), 4)
534
  success = score > 0.0
535
  rewards_str = ','.join(f'{r:.2f}' for r in rewards)