Parthiban007 commited on
Commit
2154988
Β·
verified Β·
1 Parent(s): 7d402e0

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. server/rust_coder_environment.py +14 -0
server/rust_coder_environment.py CHANGED
@@ -194,6 +194,7 @@ class RustCoderEnvironment(Environment):
194
  problem.get("id"),
195
  problem.get("title"),
196
  )
 
197
  done = False
198
  return RustCoderObservation(
199
  problem_description=problem["description"],
@@ -219,6 +220,13 @@ class RustCoderEnvironment(Environment):
219
  # ── 1. Compilation (40%) ──────────────────────────────────────
220
  compilation_success, compilation_output = self._compile_check(code)
221
  r_compilation = 1.0 if compilation_success else 0.0
 
 
 
 
 
 
 
222
 
223
  # ── 2. Correctness + Coverage (20% each) ─────────────────────
224
  test_results: List[Dict] = []
@@ -242,12 +250,16 @@ class RustCoderEnvironment(Environment):
242
  # Only score elegance for code that compiles; otherwise it can
243
  # incorrectly award points for non-compiling submissions.
244
  r_elegance = self._score_elegance(code) if compilation_success else 0.0
 
 
245
 
246
  # ── 4. Efficiency (10%) ───────────────────────────────────────
247
  baseline_ms: float = problem.get("performance_baseline_ms", 100.0)
248
  r_efficiency = 0.0
249
  if compilation_success:
250
  r_efficiency = self._score_efficiency(code, baseline_ms)
 
 
251
 
252
  # ── Total reward ──────────────────────────────────────────────
253
  reward_breakdown = {
@@ -272,6 +284,8 @@ class RustCoderEnvironment(Environment):
272
  )
273
 
274
  # ── Advance Logic ─────────────────────────────────────────────
 
 
275
  self.current_problem_idx += 1
276
  done = self.current_problem_idx >= len(self.problems)
277
 
 
194
  problem.get("id"),
195
  problem.get("title"),
196
  )
197
+ # Episode is not finished; allow retry on same problem.
198
  done = False
199
  return RustCoderObservation(
200
  problem_description=problem["description"],
 
220
  # ── 1. Compilation (40%) ──────────────────────────────────────
221
  compilation_success, compilation_output = self._compile_check(code)
222
  r_compilation = 1.0 if compilation_success else 0.0
223
+ # Warnings are not compilation errors in Rust, but they indicate lower quality.
224
+ # Penalize compilation score slightly when warnings are present.
225
+ warning_count = 0
226
+ if compilation_output:
227
+ warning_count = len(re.findall(r'(?m)^warning:', compilation_output))
228
+ if compilation_success and warning_count > 0:
229
+ r_compilation = max(0.6, 1.0 - min(0.05 * warning_count, 0.4))
230
 
231
  # ── 2. Correctness + Coverage (20% each) ─────────────────────
232
  test_results: List[Dict] = []
 
250
  # Only score elegance for code that compiles; otherwise it can
251
  # incorrectly award points for non-compiling submissions.
252
  r_elegance = self._score_elegance(code) if compilation_success else 0.0
253
+ if compilation_success and warning_count > 0:
254
+ r_elegance = max(0.0, round(r_elegance - min(0.02 * warning_count, 0.2), 4))
255
 
256
  # ── 4. Efficiency (10%) ───────────────────────────────────────
257
  baseline_ms: float = problem.get("performance_baseline_ms", 100.0)
258
  r_efficiency = 0.0
259
  if compilation_success:
260
  r_efficiency = self._score_efficiency(code, baseline_ms)
261
+ if warning_count > 0:
262
+ r_efficiency = max(0.0, round(r_efficiency - min(0.02 * warning_count, 0.2), 4))
263
 
264
  # ── Total reward ──────────────────────────────────────────────
265
  reward_breakdown = {
 
284
  )
285
 
286
  # ── Advance Logic ─────────────────────────────────────────────
287
+ # One step = one evaluated task. We advance to the next task, and the episode
288
+ # ends only after the final task has been evaluated.
289
  self.current_problem_idx += 1
290
  done = self.current_problem_idx >= len(self.problems)
291