Sushruth21 commited on
Commit
160ceda
·
1 Parent(s): 9358d83

feat: Integrate task-specific graders into inference script per hackathon rules

Browse files

- Import TASK_GRADERS, get_grader, and get_grader_metadata from task_graders module
- Validate task configuration at startup and display task-specific grader metadata
- Apply task-specific grader function to final observation to calculate score
- Display grader evaluation details including difficulty, targets, and score
- Log metrics including total reward, tasks completed, efficiency, and grader score
- Ensures grader reward logic is configured within inference script as required

Files changed (1) hide show
  1. inference.py +51 -5
inference.py CHANGED
@@ -29,6 +29,7 @@ from openai import OpenAI, OpenAIError
29
 
30
  from he_demo.client import EnergyOptimizationEnv
31
  from he_demo.models import EnergyOptimizationAction
 
32
 
33
  # Environment configuration variables
34
  # Default endpoint uses Hugging Face's router; set API_BASE_URL explicitly if needed.
@@ -205,6 +206,23 @@ async def main() -> None:
205
  if not HF_TOKEN:
206
  raise ValueError("HF_TOKEN environment variable must be set to your Hugging Face API key")
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
209
 
210
  async def local_image_exists(image_name: str) -> bool:
@@ -273,15 +291,43 @@ async def main() -> None:
273
  if done:
274
  break
275
 
276
- # Calculate final score based on tasks completed and efficiency
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  total_reward = sum(rewards)
278
  tasks_completed = len(result.observation.tasks_completed) if result.observation.tasks_completed else 0
279
  efficiency_score = result.observation.efficiency_score
280
 
281
- # Score combines task completion and efficiency
282
- score = (tasks_completed / 5.0) * 0.6 + (efficiency_score / 1.0) * 0.4
283
- score = min(max(score, 0.0), 1.0) # clamp to [0, 1]
284
- success = score >= SUCCESS_SCORE_THRESHOLD
 
285
 
286
  finally:
287
  try:
 
29
 
30
  from he_demo.client import EnergyOptimizationEnv
31
  from he_demo.models import EnergyOptimizationAction
32
+ from he_demo.task_graders import TASK_GRADERS, get_grader, get_grader_metadata
33
 
34
  # Environment configuration variables
35
  # Default endpoint uses Hugging Face's router; set API_BASE_URL explicitly if needed.
 
206
  if not HF_TOKEN:
207
  raise ValueError("HF_TOKEN environment variable must be set to your Hugging Face API key")
208
 
209
+ # ===== GRADER CONFIGURATION (Per Hackathon Rules) =====
210
+ # Validate that the specified task has a grader configured
211
+ if TASK_NAME not in TASK_GRADERS:
212
+ available_tasks = list(TASK_GRADERS.keys())
213
+ raise ValueError(
214
+ f"Task '{TASK_NAME}' not found. Available tasks with graders: {available_tasks}. "
215
+ f"Set ENERGY_TASK environment variable to one of these task names."
216
+ )
217
+
218
+ task_metadata = get_grader_metadata(TASK_NAME)
219
+ print(
220
+ f"[CONFIG] Task-specific grader configured: task={TASK_NAME} "
221
+ f"difficulty={task_metadata['difficulty']} "
222
+ f"description='{task_metadata['description']}'",
223
+ flush=True,
224
+ )
225
+
226
  client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
227
 
228
  async def local_image_exists(image_name: str) -> bool:
 
291
  if done:
292
  break
293
 
294
+ # ===== GRADER INTEGRATION (Per Hackathon Rules) =====
295
+ # Apply the task-specific grader to evaluate performance
296
+ try:
297
+ grader_func = get_grader(TASK_NAME)
298
+ grader_score = grader_func(result.observation)
299
+ grader_metadata = get_grader_metadata(TASK_NAME)
300
+ except Exception as e:
301
+ print(f"[DEBUG] Grader error for task {TASK_NAME}: {e}", flush=True)
302
+ grader_score = 0.0
303
+ grader_metadata = None
304
+
305
+ # Calculate final score using grader logic
306
+ # Grader provides task-specific evaluation (0.0-1.0)
307
+ score = grader_score
308
+
309
+ # Log grader details
310
+ if grader_metadata:
311
+ print(
312
+ f"[GRADER] task={TASK_NAME} difficulty={grader_metadata.get('difficulty', 'unknown')} "
313
+ f"target_ram={grader_metadata.get('target_ram', 'n/a')}% "
314
+ f"target_energy={grader_metadata.get('target_energy', 'n/a')}kWh "
315
+ f"grader_score={grader_score:.3f}",
316
+ flush=True,
317
+ )
318
+
319
+ success = score >= SUCCESS_SCORE_THRESHOLD
320
+
321
+ # Additional logging of completions and efficiency
322
  total_reward = sum(rewards)
323
  tasks_completed = len(result.observation.tasks_completed) if result.observation.tasks_completed else 0
324
  efficiency_score = result.observation.efficiency_score
325
 
326
+ print(
327
+ f"[METRICS] total_reward={total_reward:.2f} tasks_completed={tasks_completed} "
328
+ f"efficiency_score={efficiency_score:.3f} final_grader_score={score:.3f}",
329
+ flush=True,
330
+ )
331
 
332
  finally:
333
  try: