Viraaj Sawant commited on
Commit
adff592
·
1 Parent(s): c48199a

updated rewards

Browse files
clean_readme.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ readme_path = 'rl_code_fix_env/README.md'
4
+ with open(readme_path, 'r', encoding='utf-8') as f:
5
+ text = f.read()
6
+
7
+ # Strip any existing YAML header if present, then add the clean one
8
+ parts = text.split('---')
9
+ if len(parts) >= 3:
10
+ body = '---'.join(parts[2:])
11
+ else:
12
+ body = text
13
+
14
+ header = """---
15
+ title: Rl Code Fix Env
16
+ emoji: 🚀
17
+ colorFrom: green
18
+ colorTo: purple
19
+ sdk: docker
20
+ dockerfile: server/Dockerfile
21
+ app_port: 8000
22
+ pinned: false
23
+ ---
24
+ """
25
+
26
+ new_content = header + body.strip() + "\n"
27
+
28
+ # Write with Unix line endings
29
+ with open(readme_path, 'wb') as f:
30
+ f.write(new_content.encode('utf-8').replace(b'\r\n', b'\n'))
rl_code_fix_env/README.md CHANGED
@@ -1,13 +1,16 @@
1
  ---
2
- title: Rl Code Fix Env
3
- emoji: 🚀
4
  colorFrom: green
5
  colorTo: purple
6
  sdk: docker
7
- dockerfile: server/Dockerfile
8
- app_port: 8000
9
  pinned: false
 
 
 
 
10
  ---
 
11
  # Rl Code Fix Env Environment
12
 
13
  A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
@@ -249,4 +252,4 @@ rl_code_fix_env/
249
  rl_code_fix_env_environment.py # Core environment logic
250
  app.py # FastAPI application (HTTP + WebSocket endpoints)
251
  Dockerfile # Container image definition
252
- ```
 
1
  ---
2
+ title: Rl Code Fix Env Environment Server
3
+ emoji: "🚀"
4
  colorFrom: green
5
  colorTo: purple
6
  sdk: docker
 
 
7
  pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
+ tags:
11
+ - openenv
12
  ---
13
+
14
  # Rl Code Fix Env Environment
15
 
16
  A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
 
252
  rl_code_fix_env_environment.py # Core environment logic
253
  app.py # FastAPI application (HTTP + WebSocket endpoints)
254
  Dockerfile # Container image definition
255
+ ```
rl_code_fix_env/src/environment/environment.py CHANGED
@@ -100,13 +100,18 @@ class CodeEnv:
100
  total_count = int(test_counts_match.group(2))
101
  self._state["passed"] = passed_count
102
  self._state["total"] = max(total_count, 1)
103
- # Calculate partial score: passed/total (range 0.0 to 1.0)
104
- self._state["test_score"] = passed_count / max(total_count, 1)
 
 
 
105
  else:
106
  # Fallback to binary scoring if counts not found
 
 
107
  self._state["passed"] = 1 if passed else 0
108
  self._state["total"] = 1
109
- self._state["test_score"] = 1.0 if passed else 0.0
110
 
111
  self._state["logs"] = logs
112
 
@@ -138,9 +143,11 @@ class CodeEnv:
138
  last_action_empty=last_action_empty,
139
  )
140
  if self._state["passed"] >= self._state["total"]:
141
- reward = 1.0
 
142
  except Exception:
143
- reward = float(self._state.get("test_score", 0.0))
 
144
 
145
  return self._get_obs(), float(reward), done, {}
146
 
 
100
  total_count = int(test_counts_match.group(2))
101
  self._state["passed"] = passed_count
102
  self._state["total"] = max(total_count, 1)
103
+ # Calculate partial score: passed/total clamped to (0, 1) open interval
104
+ # Validator rejects exact 0.0 and 1.0
105
+ _EPS = 1e-6
106
+ raw_score = passed_count / max(total_count, 1)
107
+ self._state["test_score"] = max(_EPS, min(1.0 - _EPS, raw_score))
108
  else:
109
  # Fallback to binary scoring if counts not found
110
+ # Use epsilon-clamped values — validator rejects exact 0.0 and 1.0
111
+ _EPS = 1e-6
112
  self._state["passed"] = 1 if passed else 0
113
  self._state["total"] = 1
114
+ self._state["test_score"] = (1.0 - _EPS) if passed else _EPS
115
 
116
  self._state["logs"] = logs
117
 
 
143
  last_action_empty=last_action_empty,
144
  )
145
  if self._state["passed"] >= self._state["total"]:
146
+ # 1.0 is rejected by validator — use highest allowed value
147
+ reward = 1.0 - 1e-6
148
  except Exception:
149
+ _EPS = 1e-6
150
+ reward = max(_EPS, min(1.0 - _EPS, float(self._state.get("test_score", _EPS))))
151
 
152
  return self._get_obs(), float(reward), done, {}
153
 
rl_code_fix_env/src/reward/reward.py CHANGED
@@ -18,8 +18,10 @@ def compute_reward(test_score, trace_obj, code, steps_taken, max_steps, prev_tes
18
  Reward score in [0.0, 1.0]
19
  """
20
  # If last action was empty/no-op, give minimal reward to encourage meaningful actions
 
 
21
  if last_action_empty:
22
- return 0.0
23
 
24
  # 1. Functional Progress (90% weight) — primary signal
25
  functional_reward = float(test_score)
@@ -42,4 +44,6 @@ def compute_reward(test_score, trace_obj, code, steps_taken, max_steps, prev_tes
42
  - regression_penalty
43
  )
44
 
45
- return max(0.0, min(1.0, reward))
 
 
 
18
  Reward score in [0.0, 1.0]
19
  """
20
  # If last action was empty/no-op, give minimal reward to encourage meaningful actions
21
+ # NOTE: validator requires score strictly > 0.0, so use a small epsilon
22
+ _EPS = 1e-6
23
  if last_action_empty:
24
+ return _EPS
25
 
26
  # 1. Functional Progress (90% weight) — primary signal
27
  functional_reward = float(test_score)
 
44
  - regression_penalty
45
  )
46
 
47
+ # Clamp to open interval (0.0, 1.0) — validator rejects boundary values
48
+ _EPS = 1e-6
49
+ return max(_EPS, min(1.0 - _EPS, reward))
workingdocker.txt ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Multi-stage build using openenv-base
8
+ # This Dockerfile is flexible and works for both:
9
+ # - In-repo environments (with local OpenEnv sources)
10
+ # - Standalone environments (with openenv from PyPI/Git)
11
+ # The build script (openenv build) handles context detection and sets appropriate build args.
12
+
13
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
14
+ FROM ${BASE_IMAGE} AS builder
15
+
16
+ WORKDIR /app
17
+
18
+ # Ensure git is available (required for installing dependencies from VCS)
19
+ RUN apt-get update && \
20
+ apt-get install -y --no-install-recommends git && \
21
+ rm -rf /var/lib/apt/lists/*
22
+
23
+ # Build argument to control whether we're building standalone or in-repo
24
+ ARG BUILD_MODE=in-repo
25
+ ARG ENV_NAME=rl_code_fix_env
26
+
27
+ # Copy environment code (always at root of build context)
28
+ COPY . /app/env
29
+
30
+ # For in-repo builds, openenv is already vendored in the build context
31
+ # For standalone builds, openenv will be installed via pyproject.toml
32
+ WORKDIR /app/env
33
+
34
+ # Ensure uv is available (for local builds where base image lacks it)
35
+ RUN if ! command -v uv >/dev/null 2>&1; then \
36
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
37
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
38
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
39
+ fi
40
+
41
+ # Install dependencies using uv sync
42
+ # If uv.lock exists, use it; otherwise resolve on the fly
43
+ RUN --mount=type=cache,target=/root/.cache/uv \
44
+ if [ -f uv.lock ]; then \
45
+ uv sync --frozen --no-install-project --no-editable; \
46
+ else \
47
+ uv sync --no-install-project --no-editable; \
48
+ fi
49
+
50
+ RUN --mount=type=cache,target=/root/.cache/uv \
51
+ if [ -f uv.lock ]; then \
52
+ uv sync --frozen --no-editable; \
53
+ else \
54
+ uv sync --no-editable; \
55
+ fi
56
+
57
+ # Final runtime stage
58
+ FROM ${BASE_IMAGE}
59
+
60
+ WORKDIR /app
61
+
62
+ # Copy the virtual environment from builder
63
+ COPY --from=builder /app/env/.venv /app/.venv
64
+
65
+ # Copy the environment code
66
+ COPY --from=builder /app/env /app/env
67
+
68
+ # Set PATH to use the virtual environment
69
+ ENV PATH="/app/.venv/bin:$PATH"
70
+
71
+ # Set PYTHONPATH so imports work correctly
72
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
73
+
74
+ # Health check
75
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
76
+ CMD curl -f http://localhost:8000/health || exit 1
77
+
78
+ # Run the FastAPI server
79
+ # The module path is constructed to work with the /app/env structure
80
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]