Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- server/app.py +5 -2
- server/rust_coder_environment.py +14 -9
server/app.py
CHANGED
|
@@ -58,7 +58,10 @@ def get_llm_solution(problem_desc: str):
|
|
| 58 |
text = text.split("```rust")[1].split("```")[0]
|
| 59 |
elif "```" in text:
|
| 60 |
text = text.split("```")[1].split("```")[0]
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
| 62 |
except Exception as e:
|
| 63 |
return f"// LLM Error: {e}"
|
| 64 |
|
|
@@ -72,7 +75,7 @@ def evaluate_single(problem_id, code=None):
|
|
| 72 |
solution_code = code if code else get_llm_solution(problem["description"])
|
| 73 |
|
| 74 |
# 2. Guard: If LLM failed, do not evaluate
|
| 75 |
-
if solution_code.startswith("// LLM Error"):
|
| 76 |
return solution_code, {"error": "LLM failed to generate a solution. Check your HF_TOKEN."}
|
| 77 |
|
| 78 |
# 3. Evaluate properly
|
|
|
|
| 58 |
text = text.split("```rust")[1].split("```")[0]
|
| 59 |
elif "```" in text:
|
| 60 |
text = text.split("```")[1].split("```")[0]
|
| 61 |
+
text = text.strip()
|
| 62 |
+
if not text:
|
| 63 |
+
return "// LLM Error: empty response (no code returned)."
|
| 64 |
+
return text
|
| 65 |
except Exception as e:
|
| 66 |
return f"// LLM Error: {e}"
|
| 67 |
|
|
|
|
| 75 |
solution_code = code if code else get_llm_solution(problem["description"])
|
| 76 |
|
| 77 |
# 2. Guard: If LLM failed, do not evaluate
|
| 78 |
+
if not solution_code.strip() or solution_code.startswith("// LLM Error"):
|
| 79 |
return solution_code, {"error": "LLM failed to generate a solution. Check your HF_TOKEN."}
|
| 80 |
|
| 81 |
# 3. Evaluate properly
|
server/rust_coder_environment.py
CHANGED
|
@@ -118,16 +118,21 @@ class RustCoderEnvironment(Environment):
|
|
| 118 |
code = action.code
|
| 119 |
|
| 120 |
if not code.strip():
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
self.current_problem_idx += 1
|
| 124 |
return RustCoderObservation(
|
| 125 |
problem_description=problem["description"],
|
| 126 |
starter_code=problem.get("starter_code", ""),
|
| 127 |
compilation_success=False,
|
| 128 |
compilation_output="Error: no code submitted.",
|
| 129 |
test_results=[],
|
| 130 |
-
reward_breakdown={
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
done=done,
|
| 132 |
reward=0.0,
|
| 133 |
)
|
|
@@ -165,11 +170,11 @@ class RustCoderEnvironment(Environment):
|
|
| 165 |
|
| 166 |
# ββ Total reward ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 167 |
reward_breakdown = {
|
| 168 |
-
"
|
| 169 |
-
"
|
| 170 |
-
"
|
| 171 |
-
"
|
| 172 |
-
"
|
| 173 |
}
|
| 174 |
# Calculate weighted total reward
|
| 175 |
total_reward = round(
|
|
|
|
| 118 |
code = action.code
|
| 119 |
|
| 120 |
if not code.strip():
|
| 121 |
+
# Invalid/empty submission: do not advance the problem index.
|
| 122 |
+
done = False
|
|
|
|
| 123 |
return RustCoderObservation(
|
| 124 |
problem_description=problem["description"],
|
| 125 |
starter_code=problem.get("starter_code", ""),
|
| 126 |
compilation_success=False,
|
| 127 |
compilation_output="Error: no code submitted.",
|
| 128 |
test_results=[],
|
| 129 |
+
reward_breakdown={
|
| 130 |
+
"compilation": 0.0,
|
| 131 |
+
"correctness": 0.0,
|
| 132 |
+
"coverage": 0.0,
|
| 133 |
+
"elegance": 0.0,
|
| 134 |
+
"efficiency": 0.0,
|
| 135 |
+
},
|
| 136 |
done=done,
|
| 137 |
reward=0.0,
|
| 138 |
)
|
|
|
|
| 170 |
|
| 171 |
# ββ Total reward ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 172 |
reward_breakdown = {
|
| 173 |
+
"compilation": round(r_compilation, 4),
|
| 174 |
+
"correctness": round(r_correctness, 4),
|
| 175 |
+
"coverage": round(r_coverage, 4),
|
| 176 |
+
"elegance": round(r_elegance, 4),
|
| 177 |
+
"efficiency": round(r_efficiency, 4),
|
| 178 |
}
|
| 179 |
# Calculate weighted total reward
|
| 180 |
total_reward = round(
|