Spaces:

Viraj0112
/

rl_code_fix_env

Sleeping

App Files Files Community

Viraaj Sawant commited on Apr 12

Commit

adff592

1 Parent(s): c48199a

updated rewards

Browse files

Files changed (5) hide show

clean_readme.py +30 -0
rl_code_fix_env/README.md +8 -5
rl_code_fix_env/src/environment/environment.py +12 -5
rl_code_fix_env/src/reward/reward.py +6 -2
workingdocker.txt +80 -0

clean_readme.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+readme_path = 'rl_code_fix_env/README.md'
+with open(readme_path, 'r', encoding='utf-8') as f:
+    text = f.read()
+# Strip any existing YAML header if present, then add the clean one
+parts = text.split('---')
+if len(parts) >= 3:
+    body = '---'.join(parts[2:])
+else:
+    body = text
+header = """---
+title: Rl Code Fix Env
+emoji: 🚀
+colorFrom: green
+colorTo: purple
+sdk: docker
+dockerfile: server/Dockerfile
+app_port: 8000
+pinned: false
+---
+"""
+new_content = header + body.strip() + "\n"
+# Write with Unix line endings
+with open(readme_path, 'wb') as f:
+    f.write(new_content.encode('utf-8').replace(b'\r\n', b'\n'))

rl_code_fix_env/README.md CHANGED Viewed

@@ -1,13 +1,16 @@
 ---
-title: Rl Code Fix Env
-emoji: 🚀
 colorFrom: green
 colorTo: purple
 sdk: docker
-dockerfile: server/Dockerfile
-app_port: 8000
 pinned: false
 ---
 # Rl Code Fix Env Environment
 A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
@@ -249,4 +252,4 @@ rl_code_fix_env/
      rl_code_fix_env_environment.py  # Core environment logic
      app.py             # FastAPI application (HTTP + WebSocket endpoints)
      Dockerfile         # Container image definition
-```

 ---
+title: Rl Code Fix Env Environment Server
+emoji: "🚀"
 colorFrom: green
 colorTo: purple
 sdk: docker
 pinned: false
+app_port: 8000
+base_path: /web
+tags:
+  - openenv
 ---
 # Rl Code Fix Env Environment
 A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
      rl_code_fix_env_environment.py  # Core environment logic
      app.py             # FastAPI application (HTTP + WebSocket endpoints)
      Dockerfile         # Container image definition
+```

rl_code_fix_env/src/environment/environment.py CHANGED Viewed

@@ -100,13 +100,18 @@ class CodeEnv:
                 total_count = int(test_counts_match.group(2))
                 self._state["passed"] = passed_count
                 self._state["total"] = max(total_count, 1)
-                # Calculate partial score: passed/total (range 0.0 to 1.0)
-                self._state["test_score"] = passed_count / max(total_count, 1)
             else:
                 # Fallback to binary scoring if counts not found
                 self._state["passed"] = 1 if passed else 0
                 self._state["total"] = 1
-                self._state["test_score"] = 1.0 if passed else 0.0
             self._state["logs"] = logs
@@ -138,9 +143,11 @@ class CodeEnv:
                 last_action_empty=last_action_empty,
             )
             if self._state["passed"] >= self._state["total"]:
-                reward = 1.0
         except Exception:
-            reward = float(self._state.get("test_score", 0.0))
         return self._get_obs(), float(reward), done, {}

                 total_count = int(test_counts_match.group(2))
                 self._state["passed"] = passed_count
                 self._state["total"] = max(total_count, 1)
+                # Calculate partial score: passed/total — clamped to (0, 1) open interval
+                # Validator rejects exact 0.0 and 1.0
+                _EPS = 1e-6
+                raw_score = passed_count / max(total_count, 1)
+                self._state["test_score"] = max(_EPS, min(1.0 - _EPS, raw_score))
             else:
                 # Fallback to binary scoring if counts not found
+                # Use epsilon-clamped values — validator rejects exact 0.0 and 1.0
+                _EPS = 1e-6
                 self._state["passed"] = 1 if passed else 0
                 self._state["total"] = 1
+                self._state["test_score"] = (1.0 - _EPS) if passed else _EPS
             self._state["logs"] = logs
                 last_action_empty=last_action_empty,
             )
             if self._state["passed"] >= self._state["total"]:
+                # 1.0 is rejected by validator — use highest allowed value
+                reward = 1.0 - 1e-6
         except Exception:
+            _EPS = 1e-6
+            reward = max(_EPS, min(1.0 - _EPS, float(self._state.get("test_score", _EPS))))
         return self._get_obs(), float(reward), done, {}

rl_code_fix_env/src/reward/reward.py CHANGED Viewed

@@ -18,8 +18,10 @@ def compute_reward(test_score, trace_obj, code, steps_taken, max_steps, prev_tes
         Reward score in [0.0, 1.0]
     """
     # If last action was empty/no-op, give minimal reward to encourage meaningful actions
     if last_action_empty:
-        return 0.0
     # 1. Functional Progress (90% weight) — primary signal
     functional_reward = float(test_score)
@@ -42,4 +44,6 @@ def compute_reward(test_score, trace_obj, code, steps_taken, max_steps, prev_tes
         - regression_penalty
     )
-    return max(0.0, min(1.0, reward))

         Reward score in [0.0, 1.0]
     """
     # If last action was empty/no-op, give minimal reward to encourage meaningful actions
+    # NOTE: validator requires score strictly > 0.0, so use a small epsilon
+    _EPS = 1e-6
     if last_action_empty:
+        return _EPS
     # 1. Functional Progress (90% weight) — primary signal
     functional_reward = float(test_score)
         - regression_penalty
     )
+    # Clamp to open interval (0.0, 1.0) — validator rejects boundary values
+    _EPS = 1e-6
+    return max(_EPS, min(1.0 - _EPS, reward))

workingdocker.txt ADDED Viewed

	@@ -0,0 +1,80 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# Multi-stage build using openenv-base
+# This Dockerfile is flexible and works for both:
+# - In-repo environments (with local OpenEnv sources)
+# - Standalone environments (with openenv from PyPI/Git)
+# The build script (openenv build) handles context detection and sets appropriate build args.
+ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
+FROM ${BASE_IMAGE} AS builder
+WORKDIR /app
+# Ensure git is available (required for installing dependencies from VCS)
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+# Build argument to control whether we're building standalone or in-repo
+ARG BUILD_MODE=in-repo
+ARG ENV_NAME=rl_code_fix_env
+# Copy environment code (always at root of build context)
+COPY . /app/env
+# For in-repo builds, openenv is already vendored in the build context
+# For standalone builds, openenv will be installed via pyproject.toml
+WORKDIR /app/env
+# Ensure uv is available (for local builds where base image lacks it)
+RUN if ! command -v uv >/dev/null 2>&1; then \
+        curl -LsSf https://astral.sh/uv/install.sh | sh && \
+        mv /root/.local/bin/uv /usr/local/bin/uv && \
+        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
+    fi
+# Install dependencies using uv sync
+# If uv.lock exists, use it; otherwise resolve on the fly
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-install-project --no-editable; \
+    else \
+        uv sync --no-install-project --no-editable; \
+    fi
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-editable; \
+    else \
+        uv sync --no-editable; \
+    fi
+# Final runtime stage
+FROM ${BASE_IMAGE}
+WORKDIR /app
+# Copy the virtual environment from builder
+COPY --from=builder /app/env/.venv /app/.venv
+# Copy the environment code
+COPY --from=builder /app/env /app/env
+# Set PATH to use the virtual environment
+ENV PATH="/app/.venv/bin:$PATH"
+# Set PYTHONPATH so imports work correctly
+ENV PYTHONPATH="/app/env:$PYTHONPATH"
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+# Run the FastAPI server
+# The module path is constructed to work with the /app/env structure
+CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]