Spaces:

Parthiban007
/

rust_coder

Running

App Files Files Community

Parthiban007 commited on 1 day ago

Commit

2154988

verified ·

1 Parent(s): 7d402e0

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

server/rust_coder_environment.py +14 -0

server/rust_coder_environment.py CHANGED Viewed

@@ -194,6 +194,7 @@ class RustCoderEnvironment(Environment):
                     problem.get("id"),
                     problem.get("title"),
                 )
                 done = False
                 return RustCoderObservation(
                     problem_description=problem["description"],
@@ -219,6 +220,13 @@ class RustCoderEnvironment(Environment):
         # ── 1. Compilation (40%) ──────────────────────────────────────
         compilation_success, compilation_output = self._compile_check(code)
         r_compilation = 1.0 if compilation_success else 0.0
         # ── 2. Correctness + Coverage (20% each) ─────────────────────
         test_results: List[Dict] = []
@@ -242,12 +250,16 @@ class RustCoderEnvironment(Environment):
         # Only score elegance for code that compiles; otherwise it can
         # incorrectly award points for non-compiling submissions.
         r_elegance = self._score_elegance(code) if compilation_success else 0.0
         # ── 4. Efficiency (10%) ───────────────────────────────────────
         baseline_ms: float = problem.get("performance_baseline_ms", 100.0)
         r_efficiency = 0.0
         if compilation_success:
             r_efficiency = self._score_efficiency(code, baseline_ms)
         # ── Total reward ──────────────────────────────────────────────
         reward_breakdown = {
@@ -272,6 +284,8 @@ class RustCoderEnvironment(Environment):
             )
         # ── Advance Logic ─────────────────────────────────────────────
         self.current_problem_idx += 1
         done = self.current_problem_idx >= len(self.problems)

                     problem.get("id"),
                     problem.get("title"),
                 )
+                # Episode is not finished; allow retry on same problem.
                 done = False
                 return RustCoderObservation(
                     problem_description=problem["description"],
         # ── 1. Compilation (40%) ──────────────────────────────────────
         compilation_success, compilation_output = self._compile_check(code)
         r_compilation = 1.0 if compilation_success else 0.0
+        # Warnings are not compilation errors in Rust, but they indicate lower quality.
+        # Penalize compilation score slightly when warnings are present.
+        warning_count = 0
+        if compilation_output:
+            warning_count = len(re.findall(r'(?m)^warning:', compilation_output))
+            if compilation_success and warning_count > 0:
+                r_compilation = max(0.6, 1.0 - min(0.05 * warning_count, 0.4))
         # ── 2. Correctness + Coverage (20% each) ─────────────────────
         test_results: List[Dict] = []
         # Only score elegance for code that compiles; otherwise it can
         # incorrectly award points for non-compiling submissions.
         r_elegance = self._score_elegance(code) if compilation_success else 0.0
+        if compilation_success and warning_count > 0:
+            r_elegance = max(0.0, round(r_elegance - min(0.02 * warning_count, 0.2), 4))
         # ── 4. Efficiency (10%) ───────────────────────────────────────
         baseline_ms: float = problem.get("performance_baseline_ms", 100.0)
         r_efficiency = 0.0
         if compilation_success:
             r_efficiency = self._score_efficiency(code, baseline_ms)
+            if warning_count > 0:
+                r_efficiency = max(0.0, round(r_efficiency - min(0.02 * warning_count, 0.2), 4))
         # ── Total reward ──────────────────────────────────────────────
         reward_breakdown = {
             )
         # ── Advance Logic ─────────────────────────────────────────────
+        # One step = one evaluated task. We advance to the next task, and the episode
+        # ends only after the final task has been evaluated.
         self.current_problem_idx += 1
         done = self.current_problem_idx >= len(self.problems)