Spaces:
Sleeping
Sleeping
Update inference.py
Browse files- inference.py +4 -28
inference.py
CHANGED
|
@@ -98,14 +98,7 @@ def main():
|
|
| 98 |
difficulty = DIFFICULTIES[task_id]
|
| 99 |
|
| 100 |
# ββ [START] log ββββββββββββββββββββββββββββββββββββββββββ
|
| 101 |
-
print(
|
| 102 |
-
"type": "[START]",
|
| 103 |
-
"task_id": task_id,
|
| 104 |
-
"task_name": task_name,
|
| 105 |
-
"difficulty": difficulty,
|
| 106 |
-
"model": MODEL_NAME,
|
| 107 |
-
"n_episodes": N_EPISODES
|
| 108 |
-
}), flush=True)
|
| 109 |
|
| 110 |
episode_scores = []
|
| 111 |
for seed in range(N_EPISODES):
|
|
@@ -113,26 +106,13 @@ def main():
|
|
| 113 |
episode_scores.append(reward)
|
| 114 |
|
| 115 |
# ββ [STEP] log ββββββββββββββββββββββββββββββββββββββββ
|
| 116 |
-
print(
|
| 117 |
-
"type": "[STEP]",
|
| 118 |
-
"task_id": task_id,
|
| 119 |
-
"episode": seed,
|
| 120 |
-
"reward": reward
|
| 121 |
-
}), flush=True)
|
| 122 |
|
| 123 |
avg_score = round(sum(episode_scores) / len(episode_scores), 4)
|
| 124 |
all_results[task_id] = avg_score
|
| 125 |
|
| 126 |
# ββ [END] log βββββββββββββββββββββββββββββββββββββββββββββ
|
| 127 |
-
print(
|
| 128 |
-
"type": "[END]",
|
| 129 |
-
"task_id": task_id,
|
| 130 |
-
"task_name": task_name,
|
| 131 |
-
"difficulty": difficulty,
|
| 132 |
-
"avg_score": avg_score,
|
| 133 |
-
"scores": episode_scores,
|
| 134 |
-
"done": True
|
| 135 |
-
}), flush=True)
|
| 136 |
|
| 137 |
overall = round(sum(all_results.values()) / 3, 4)
|
| 138 |
|
|
@@ -140,11 +120,7 @@ def main():
|
|
| 140 |
with open("scores.json", "w") as f:
|
| 141 |
json.dump({"tasks": all_results, "overall": overall}, f, indent=2)
|
| 142 |
|
| 143 |
-
print(
|
| 144 |
-
"type": "[SUMMARY]",
|
| 145 |
-
"task_scores": all_results,
|
| 146 |
-
"overall": overall
|
| 147 |
-
}), flush=True)
|
| 148 |
|
| 149 |
|
| 150 |
if __name__ == "__main__":
|
|
|
|
| 98 |
difficulty = DIFFICULTIES[task_id]
|
| 99 |
|
| 100 |
# ββ [START] log ββββββββββββββββββββββββββββββββββββββββββ
|
| 101 |
+
print(f"[START] task={task_name}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
episode_scores = []
|
| 104 |
for seed in range(N_EPISODES):
|
|
|
|
| 106 |
episode_scores.append(reward)
|
| 107 |
|
| 108 |
# ββ [STEP] log ββββββββββββββββββββββββββββββββββββββββ
|
| 109 |
+
print(f"[STEP] step={seed+1} reward={reward}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
avg_score = round(sum(episode_scores) / len(episode_scores), 4)
|
| 112 |
all_results[task_id] = avg_score
|
| 113 |
|
| 114 |
# ββ [END] log βββββββββββββββββββββββββββββββββββββββββββββ
|
| 115 |
+
print(f"[END] task={task_name} score={avg_score} steps={len(episode_scores)}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
overall = round(sum(all_results.values()) / 3, 4)
|
| 118 |
|
|
|
|
| 120 |
with open("scores.json", "w") as f:
|
| 121 |
json.dump({"tasks": all_results, "overall": overall}, f, indent=2)
|
| 122 |
|
| 123 |
+
print(f"[SUMMARY] overall_score={overall} task_scores={all_results}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
|
| 126 |
if __name__ == "__main__":
|