Parthiban007 commited on
Commit
8fef615
·
verified ·
1 Parent(s): 0b15484

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. server/app.py +44 -0
  2. server/rust_coder_environment.py +121 -0
server/app.py CHANGED
@@ -11,6 +11,8 @@ Endpoints:
11
 
12
  import os
13
  import logging
 
 
14
  import gradio as gr
15
  from openai import OpenAI
16
  from dotenv import load_dotenv
@@ -29,6 +31,27 @@ logging.basicConfig(
29
  )
30
  logger = logging.getLogger("rust_coder.server")
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  # --- Core OpenEnv Server Setup ---
33
  # Use a distinct name for the OpenEnv FastAPI instance
34
  openenv_app = create_app(
@@ -52,6 +75,12 @@ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
52
  def get_llm_solution(problem_desc: str):
53
  """Call LLM to get a Rust solution"""
54
  try:
 
 
 
 
 
 
55
  logger.info(
56
  "LLM call start model=%s base_url=%s prompt_chars=%d token_present=%s",
57
  MODEL_NAME,
@@ -77,11 +106,14 @@ def get_llm_solution(problem_desc: str):
77
  text = text.split("```")[1].split("```")[0]
78
  text = text.strip()
79
  if not text:
 
80
  logger.warning("LLM returned empty code after cleanup.")
81
  return "// LLM Error: empty response (no code returned)."
 
82
  logger.info("LLM call end: returned_code_chars=%d", len(text))
83
  return text
84
  except Exception as e:
 
85
  logger.exception("LLM call failed.")
86
  return f"// LLM Error: {e}"
87
 
@@ -90,6 +122,12 @@ def evaluate_single(problem_id, code=None):
90
  try:
91
  idx = int(problem_id.split(":")[0]) - 1
92
  problem = RustCoderEnvironment().problems[idx]
 
 
 
 
 
 
93
  logger.info(
94
  "evaluate_single start problem_id=%s idx=%d code_provided=%s",
95
  problem_id,
@@ -102,6 +140,12 @@ def evaluate_single(problem_id, code=None):
102
 
103
  # 2. Guard: If LLM failed, do not evaluate
104
  if not solution_code.strip() or solution_code.startswith("// LLM Error"):
 
 
 
 
 
 
105
  logger.warning(
106
  "evaluate_single abort: empty_or_error_code=%s chars=%d",
107
  solution_code.startswith("// LLM Error"),
 
11
 
12
  import os
13
  import logging
14
+ import json
15
+ import time
16
  import gradio as gr
17
  from openai import OpenAI
18
  from dotenv import load_dotenv
 
31
  )
32
  logger = logging.getLogger("rust_coder.server")
33
 
34
+ # #region agent log
35
+ _DEBUG_LOG_PATH = os.getenv("DEBUG_LOG_PATH") or "debug-55b5ef.log"
36
+ _DEBUG_SESSION_ID = "55b5ef"
37
+ def _dbg(hypothesis_id: str, location: str, message: str, data: dict, run_id: str = "pre-fix") -> None:
38
+ try:
39
+ payload = {
40
+ "sessionId": _DEBUG_SESSION_ID,
41
+ "runId": run_id,
42
+ "hypothesisId": hypothesis_id,
43
+ "location": location,
44
+ "message": message,
45
+ "data": data,
46
+ "timestamp": int(time.time() * 1000),
47
+ }
48
+ with open(_DEBUG_LOG_PATH, "a", encoding="utf-8") as f:
49
+ f.write(json.dumps(payload, ensure_ascii=False) + "\n")
50
+ except Exception:
51
+ # Never break app for debug logging
52
+ pass
53
+ # #endregion
54
+
55
  # --- Core OpenEnv Server Setup ---
56
  # Use a distinct name for the OpenEnv FastAPI instance
57
  openenv_app = create_app(
 
75
  def get_llm_solution(problem_desc: str):
76
  """Call LLM to get a Rust solution"""
77
  try:
78
+ _dbg(
79
+ "H2",
80
+ "server/app.py:get_llm_solution:entry",
81
+ "LLM call starting",
82
+ {"model": MODEL_NAME, "base_url": API_BASE_URL, "prompt_chars": len(problem_desc or ""), "token_present": bool(HF_TOKEN)},
83
+ )
84
  logger.info(
85
  "LLM call start model=%s base_url=%s prompt_chars=%d token_present=%s",
86
  MODEL_NAME,
 
106
  text = text.split("```")[1].split("```")[0]
107
  text = text.strip()
108
  if not text:
109
+ _dbg("H2", "server/app.py:get_llm_solution:empty", "LLM returned empty after cleanup", {"raw_chars": len((completion.choices[0].message.content or ""))})
110
  logger.warning("LLM returned empty code after cleanup.")
111
  return "// LLM Error: empty response (no code returned)."
112
+ _dbg("H2", "server/app.py:get_llm_solution:exit", "LLM call finished", {"returned_code_chars": len(text)})
113
  logger.info("LLM call end: returned_code_chars=%d", len(text))
114
  return text
115
  except Exception as e:
116
+ _dbg("H2", "server/app.py:get_llm_solution:error", "LLM call exception", {"error": str(e)})
117
  logger.exception("LLM call failed.")
118
  return f"// LLM Error: {e}"
119
 
 
122
  try:
123
  idx = int(problem_id.split(":")[0]) - 1
124
  problem = RustCoderEnvironment().problems[idx]
125
+ _dbg(
126
+ "H2",
127
+ "server/app.py:evaluate_single:entry",
128
+ "evaluate_single called",
129
+ {"problem_id": str(problem_id), "idx": idx, "code_is_none": code is None, "code_chars": len(code or "")},
130
+ )
131
  logger.info(
132
  "evaluate_single start problem_id=%s idx=%d code_provided=%s",
133
  problem_id,
 
140
 
141
  # 2. Guard: If LLM failed, do not evaluate
142
  if not solution_code.strip() or solution_code.startswith("// LLM Error"):
143
+ _dbg(
144
+ "H2",
145
+ "server/app.py:evaluate_single:abort",
146
+ "evaluate_single abort due to empty/error code",
147
+ {"starts_with_llm_error": solution_code.startswith("// LLM Error"), "solution_code_chars": len(solution_code or "")},
148
+ )
149
  logger.warning(
150
  "evaluate_single abort: empty_or_error_code=%s chars=%d",
151
  solution_code.startswith("// LLM Error"),
server/rust_coder_environment.py CHANGED
@@ -13,11 +13,13 @@ import subprocess
13
  import tempfile
14
  import time
15
  import logging
 
16
  from typing import Dict, List, Optional, Tuple
17
 
18
  from openenv.core.env_server.interfaces import Environment
19
 
20
  from models import RustCoderAction, RustCoderObservation
 
21
 
22
 
23
  # Resolve problems.json: look in same dir as this file, then parent
@@ -69,6 +71,29 @@ class RustCoderEnvironment(Environment):
69
  self.current_problem_idx: int = 0
70
  self.step_count: int = 0
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  # ------------------------------------------------------------------
73
  # Internal helpers
74
  # ------------------------------------------------------------------
@@ -119,7 +144,97 @@ class RustCoderEnvironment(Environment):
119
  problem = self.problems[self.current_problem_idx]
120
  code = action.code
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  if not code.strip():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  # Invalid/empty submission: do not advance the problem index.
124
  self._logger.warning(
125
  "Empty code submitted step_count=%d problem_id=%s title=%s",
@@ -127,6 +242,12 @@ class RustCoderEnvironment(Environment):
127
  problem.get("id"),
128
  problem.get("title"),
129
  )
 
 
 
 
 
 
130
  done = False
131
  return RustCoderObservation(
132
  problem_description=problem["description"],
 
13
  import tempfile
14
  import time
15
  import logging
16
+ import json
17
  from typing import Dict, List, Optional, Tuple
18
 
19
  from openenv.core.env_server.interfaces import Environment
20
 
21
  from models import RustCoderAction, RustCoderObservation
22
+ from openai import OpenAI
23
 
24
 
25
  # Resolve problems.json: look in same dir as this file, then parent
 
71
  self.current_problem_idx: int = 0
72
  self.step_count: int = 0
73
 
74
+ # #region agent log
75
+ self._debug_log_path = os.getenv("DEBUG_LOG_PATH") or "debug-55b5ef.log"
76
+ self._debug_session_id = "55b5ef"
77
+ # #endregion
78
+
79
+ # #region agent log
80
+ def _dbg(self, hypothesis_id: str, location: str, message: str, data: dict, run_id: str = "pre-fix") -> None:
81
+ try:
82
+ payload = {
83
+ "sessionId": self._debug_session_id,
84
+ "runId": run_id,
85
+ "hypothesisId": hypothesis_id,
86
+ "location": location,
87
+ "message": message,
88
+ "data": data,
89
+ "timestamp": int(time.time() * 1000),
90
+ }
91
+ with open(self._debug_log_path, "a", encoding="utf-8") as f:
92
+ f.write(json.dumps(payload, ensure_ascii=False) + "\n")
93
+ except Exception:
94
+ pass
95
+ # #endregion
96
+
97
  # ------------------------------------------------------------------
98
  # Internal helpers
99
  # ------------------------------------------------------------------
 
144
  problem = self.problems[self.current_problem_idx]
145
  code = action.code
146
 
147
+ self._dbg(
148
+ "H1",
149
+ "server/rust_coder_environment.py:step:entry",
150
+ "env.step called",
151
+ {
152
+ "step_count": self.step_count,
153
+ "problem_id": problem.get("id"),
154
+ "title": problem.get("title"),
155
+ "code_chars": len(code or ""),
156
+ "code_is_empty": not bool((code or "").strip()),
157
+ },
158
+ )
159
+
160
  if not code.strip():
161
+ # Some UIs may "step" without providing an action payload.
162
+ # Optionally auto-generate code via LLM so the UI can still progress.
163
+ auto_llm = (os.getenv("AUTO_LLM_ON_EMPTY_STEP") or "0").strip().lower() in {"1", "true", "yes", "y"}
164
+ if auto_llm:
165
+ model = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
166
+ base_url = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
167
+ token = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
168
+ prompt = problem.get("description", "")
169
+ starter = problem.get("starter_code", "")
170
+ if starter:
171
+ prompt += f"\n\nStarter Code:\n```rust\n{starter}\n```"
172
+
173
+ self._dbg(
174
+ "H5",
175
+ "server/rust_coder_environment.py:step:auto_llm",
176
+ "AUTO_LLM_ON_EMPTY_STEP enabled; attempting LLM generation",
177
+ {"model": model, "base_url": base_url, "prompt_chars": len(prompt), "token_present": bool(token)},
178
+ )
179
+
180
+ if not token:
181
+ self._logger.error("AUTO_LLM_ON_EMPTY_STEP enabled but HF_TOKEN/API_KEY missing.")
182
+ return RustCoderObservation(
183
+ problem_description=problem.get("description", ""),
184
+ starter_code=problem.get("starter_code", ""),
185
+ compilation_success=False,
186
+ compilation_output="Error: AUTO_LLM_ON_EMPTY_STEP enabled but HF_TOKEN/API_KEY is missing.",
187
+ test_results=[],
188
+ reward_breakdown={
189
+ "compilation": 0.0,
190
+ "correctness": 0.0,
191
+ "coverage": 0.0,
192
+ "elegance": 0.0,
193
+ "efficiency": 0.0,
194
+ },
195
+ done=False,
196
+ reward=0.0,
197
+ )
198
+ try:
199
+ client_llm = OpenAI(base_url=base_url, api_key=token)
200
+ completion = client_llm.chat.completions.create(
201
+ model=model,
202
+ messages=[
203
+ {"role": "system", "content": "You are a senior Rust engineer. Return ONLY the complete fixed Rust code. No explanation."},
204
+ {"role": "user", "content": prompt},
205
+ ],
206
+ temperature=0.1,
207
+ )
208
+ text = (completion.choices[0].message.content or "").strip()
209
+ if "```rust" in text:
210
+ text = text.split("```rust")[1].split("```")[0]
211
+ elif "```" in text:
212
+ text = text.split("```")[1].split("```")[0]
213
+ text = text.strip()
214
+ if text:
215
+ code = text
216
+ self._dbg(
217
+ "H5",
218
+ "server/rust_coder_environment.py:step:auto_llm_ok",
219
+ "LLM produced non-empty code; continuing evaluation",
220
+ {"code_chars": len(code)},
221
+ )
222
+ else:
223
+ self._dbg(
224
+ "H5",
225
+ "server/rust_coder_environment.py:step:auto_llm_empty",
226
+ "LLM returned empty after cleanup; falling back to empty submission behavior",
227
+ {"raw_chars": len((completion.choices[0].message.content or ""))},
228
+ )
229
+ except Exception as e:
230
+ self._dbg(
231
+ "H5",
232
+ "server/rust_coder_environment.py:step:auto_llm_error",
233
+ "LLM call failed; falling back to empty submission behavior",
234
+ {"error": str(e)},
235
+ )
236
+
237
+ if not code.strip():
238
  # Invalid/empty submission: do not advance the problem index.
239
  self._logger.warning(
240
  "Empty code submitted step_count=%d problem_id=%s title=%s",
 
242
  problem.get("id"),
243
  problem.get("title"),
244
  )
245
+ self._dbg(
246
+ "H1",
247
+ "server/rust_coder_environment.py:step:empty",
248
+ "empty code branch taken",
249
+ {"step_count": self.step_count, "problem_id": problem.get("id")},
250
+ )
251
  done = False
252
  return RustCoderObservation(
253
  problem_description=problem["description"],