Spaces:

Dolphin-Syndrom
/

code-review-env

Sleeping

App Files Files Community

Dolphin-Syndrom commited on Apr 5

Commit

9a51a29

1 Parent(s): 62b2af2

Initial code-review-environment submission

Browse files

Files changed (4) hide show

.gitignore +5 -0
inference.py +101 -40
scripts/validate-submission.sh +170 -0
server/Dockerfile +2 -2

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.env
+venv/
+__pycache__/
+*.pyc
+.pytest_cache/

inference.py CHANGED Viewed

@@ -2,6 +2,7 @@ import json
 import os
 import re
 import sys
 from collections.abc import Callable
 from typing import Any
@@ -18,6 +19,7 @@ except ImportError:
 TASK_IDS = ["task_easy", "task_medium", "task_hard"]
 DEFAULT_ENV_URL = "http://localhost:8000"
 DEFAULT_MODEL = "gpt-4o-mini"
 DETECTION_RULES: dict[str, Callable[[str], bool]] = {
@@ -45,6 +47,22 @@ SYSTEM_PROMPT = (
 )
 def detect_issues_rule_based(code_snippet: str) -> list[str]:
     detected: list[str] = []
     for issue_tag, detector in DETECTION_RULES.items():
@@ -94,6 +112,7 @@ def build_llm_action(
     file_name: str,
     task_description: str,
     code_snippet: str,
 ) -> dict[str, Any]:
     user_prompt = (
         f"Task ID: {task_id}\n"
@@ -103,14 +122,25 @@ def build_llm_action(
         "Return strictly JSON with: issues_found, review_comment, severity."
     )
-    completion = client.chat.completions.create(
-        model=model,
-        messages=[
-            {"role": "system", "content": SYSTEM_PROMPT},
-            {"role": "user", "content": user_prompt},
-        ],
-        temperature=0,
-    )
     raw_text = completion.choices[0].message.content or ""
     parsed = _extract_json_object(raw_text)
@@ -134,44 +164,75 @@ def build_llm_action(
 def run_baseline() -> dict[str, dict[str, Any]]:
     env_url = os.getenv("ENV_URL", DEFAULT_ENV_URL).rstrip("/")
-    openai_api_key = os.getenv("OPENAI_API_KEY")
-    openai_model = os.getenv("OPENAI_MODEL", DEFAULT_MODEL)
-    openai_client = OpenAI(api_key=openai_api_key) if openai_api_key else None
     results: dict[str, dict[str, Any]] = {}
     with CodeReviewEnv(base_url=env_url).sync() as env:
         for task_id in TASK_IDS:
-            reset_result = env.reset(task_id=task_id)
-            observation = reset_result.observation
-            code_snippet = observation.code_snippet
-            file_name = observation.file_name
-            task_description = observation.task_description
-            action_payload: dict[str, Any]
-            if openai_client:
-                try:
-                    action_payload = build_llm_action(
-                        client=openai_client,
-                        model=openai_model,
-                        task_id=task_id,
-                        file_name=file_name,
-                        task_description=task_description,
-                        code_snippet=code_snippet,
-                    )
-                except Exception:
                     action_payload = build_rule_action(code_snippet)
-            else:
-                action_payload = build_rule_action(code_snippet)
-            step_result = env.step(ReviewAction.model_validate(action_payload))
-            score = float(step_result.reward or 0.0)
-            results[task_id] = {
-                "score": score,
-                "issues_found": action_payload.get("issues_found", []),
-            }
     return results
@@ -179,7 +240,7 @@ def run_baseline() -> dict[str, dict[str, Any]]:
 def main() -> int:
     try:
         output = run_baseline()
-        print(json.dumps(output, indent=2))
         return 0
     except Exception as exc:
         print(f"inference failed: {exc}", file=sys.stderr)

 import os
 import re
 import sys
+import time
 from collections.abc import Callable
 from typing import Any
 TASK_IDS = ["task_easy", "task_medium", "task_hard"]
 DEFAULT_ENV_URL = "http://localhost:8000"
 DEFAULT_MODEL = "gpt-4o-mini"
+DEFAULT_API_BASE_URL = "https://api.openai.com/v1"
 DETECTION_RULES: dict[str, Callable[[str], bool]] = {
 )
+def log_start(task: str, env: str, model: str) -> None:
+    print(f"[START] task={task} env={env} model={model}", flush=True)
+def log_step(step: int, action: str, reward: float, done: bool, error: str | None) -> None:
+    error_val = error if error else "null"
+    done_val = str(done).lower()
+    print(
+        f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
+        flush=True,
+    )
+def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None:
+    rewards_str = ",".join(f"{r:.2f}" for r in rewards)
+    print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
 def detect_issues_rule_based(code_snippet: str) -> list[str]:
     detected: list[str] = []
     for issue_tag, detector in DETECTION_RULES.items():
     file_name: str,
     task_description: str,
     code_snippet: str,
+    max_retries: int = 3,
 ) -> dict[str, Any]:
     user_prompt = (
         f"Task ID: {task_id}\n"
         "Return strictly JSON with: issues_found, review_comment, severity."
     )
+    last_error: Exception | None = None
+    for attempt in range(max_retries):
+        try:
+            completion = client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": SYSTEM_PROMPT},
+                    {"role": "user", "content": user_prompt},
+                ],
+                temperature=0,
+            )
+            break
+        except Exception as e:
+            last_error = e
+            wait_time = 2 ** attempt  # 1s, 2s, 4s backoff
+            print(f"[RETRY] task_id={task_id} attempt={attempt + 1} wait={wait_time} error={e}", flush=True)
+            time.sleep(wait_time)
+    else:
+        raise last_error  # type: ignore[misc]
     raw_text = completion.choices[0].message.content or ""
     parsed = _extract_json_object(raw_text)
 def run_baseline() -> dict[str, dict[str, Any]]:
     env_url = os.getenv("ENV_URL", DEFAULT_ENV_URL).rstrip("/")
+    api_base_url = os.getenv("API_BASE_URL", DEFAULT_API_BASE_URL)
+    model_name = os.getenv("MODEL_NAME", DEFAULT_MODEL)
+    api_key = os.getenv("HF_TOKEN")
+    local_image_name = os.getenv("LOCAL_IMAGE_NAME")
+    openai_client = OpenAI(base_url=api_base_url, api_key=api_key) if api_key else None
+    run_mode = "llm" if openai_client else "rule_based"
     results: dict[str, dict[str, Any]] = {}
     with CodeReviewEnv(base_url=env_url).sync() as env:
         for task_id in TASK_IDS:
+            log_start(task=task_id, env="code_review_env", model=model_name)
+            try:
+                reset_result = env.reset(task_id=task_id)
+                observation = reset_result.observation
+                code_snippet = observation.code_snippet
+                file_name = observation.file_name
+                task_description = observation.task_description
+                action_payload: dict[str, Any]
+                if openai_client:
+                    try:
+                        action_payload = build_llm_action(
+                            client=openai_client,
+                            model=model_name,
+                            task_id=task_id,
+                            file_name=file_name,
+                            task_description=task_description,
+                            code_snippet=code_snippet,
+                        )
+                        task_mode = "llm"
+                    except Exception as exc:
+                        print(f"LLM Fallback error: {exc}", file=sys.stderr)
+                        action_payload = build_rule_action(code_snippet)
+                        task_mode = "rule_based_fallback"
+                else:
                     action_payload = build_rule_action(code_snippet)
+                    task_mode = "rule_based"
+                # Small delay between tasks to avoid GitHub Models rate limits
+                time.sleep(1)
+                action_str = json.dumps(action_payload, separators=(',', ':'))
+                step_result = env.step(ReviewAction.model_validate(action_payload))
+                score = float(step_result.reward or 0.0)
+                log_step(
+                    step=1,
+                    action=action_str,
+                    reward=score,
+                    done=step_result.done,
+                    error=None
+                )
+                results[task_id] = {
+                    "score": score,
+                    "issues_found": action_payload.get("issues_found", []),
+                }
+                success = score >= 0.95
+                log_end(success=success, steps=1, score=score, rewards=[score])
+            except Exception as e:
+                # Always emit an END line even on exception
+                log_end(success=False, steps=0, score=0.0, rewards=[])
+                raise e
     return results
 def main() -> int:
     try:
         output = run_baseline()
+        # Do not print out any raw JSON for output as it pollutes STDOUT formatting rules
         return 0
     except Exception as exc:
         print(f"inference failed: {exc}", file=sys.stderr)

scripts/validate-submission.sh ADDED Viewed

	@@ -0,0 +1,170 @@

+#!/usr/bin/env bash
+#
+# validate-submission.sh — OpenEnv Submission Validator
+#
+# Checks that your HF Space is live, Docker image builds, and openenv validate passes.
+#
+# Usage:
+#   ./scripts/validate-submission.sh <ping_url> [repo_dir]
+#
+# Example:
+#   ./scripts/validate-submission.sh https://your-space.hf.space .
+set -uo pipefail
+DOCKER_BUILD_TIMEOUT=600
+if [ -t 1 ]; then
+  RED='\033[0;31m'
+  GREEN='\033[0;32m'
+  YELLOW='\033[1;33m'
+  BOLD='\033[1m'
+  NC='\033[0m'
+else
+  RED='' GREEN='' YELLOW='' BOLD='' NC=''
+fi
+run_with_timeout() {
+  local secs="$1"; shift
+  if command -v timeout &>/dev/null; then
+    timeout "$secs" "$@"
+  elif command -v gtimeout &>/dev/null; then
+    gtimeout "$secs" "$@"
+  else
+    "$@" &
+    local pid=$!
+    ( sleep "$secs" && kill "$pid" 2>/dev/null ) &
+    local watcher=$!
+    wait "$pid" 2>/dev/null
+    local rc=$?
+    kill "$watcher" 2>/dev/null
+    wait "$watcher" 2>/dev/null
+    return $rc
+  fi
+}
+portable_mktemp() {
+  local prefix="${1:-validate}"
+  mktemp "${TMPDIR:-/tmp}/${prefix}-XXXXXX" 2>/dev/null || mktemp
+}
+CLEANUP_FILES=()
+cleanup() { rm -f "${CLEANUP_FILES[@]+"${CLEANUP_FILES[@]}"}"; }
+trap cleanup EXIT
+PING_URL="${1:-}"
+REPO_DIR="${2:-.}"
+if [ -z "$PING_URL" ]; then
+  printf "Usage: %s <ping_url> [repo_dir]\n" "$0"
+  printf "\n"
+  printf "  ping_url   Your HuggingFace Space URL (e.g. https://your-space.hf.space)\n"
+  printf "  repo_dir   Path to your repo (default: current directory)\n"
+  exit 1
+fi
+if ! REPO_DIR="$(cd "$REPO_DIR" 2>/dev/null && pwd)"; then
+  printf "Error: directory '%s' not found\n" "${2:-.}"
+  exit 1
+fi
+PING_URL="${PING_URL%/}"
+export PING_URL
+PASS=0
+log()  { printf "[%s] %b\n" "$(date -u +%H:%M:%S)" "$*"; }
+pass() { log "${GREEN}PASSED${NC} -- $1"; PASS=$((PASS + 1)); }
+fail() { log "${RED}FAILED${NC} -- $1"; }
+hint() { printf "  ${YELLOW}Hint:${NC} %b\n" "$1"; }
+stop_at() {
+  printf "\n"
+  printf "${RED}${BOLD}Validation stopped at %s.${NC} Fix the above before continuing.\n" "$1"
+  exit 1
+}
+printf "\n"
+printf "${BOLD}========================================${NC}\n"
+printf "${BOLD}  OpenEnv Submission Validator${NC}\n"
+printf "${BOLD}========================================${NC}\n"
+log "Repo:     $REPO_DIR"
+log "Ping URL: $PING_URL"
+printf "\n"
+log "${BOLD}Step 1/3: Pinging HF Space${NC} ($PING_URL/reset) ..."
+CURL_OUTPUT=$(portable_mktemp "validate-curl")
+CLEANUP_FILES+=("$CURL_OUTPUT")
+HTTP_CODE=$(curl -s -o "$CURL_OUTPUT" -w "%{http_code}" -X POST \
+  -H "Content-Type: application/json" -d '{}' \
+  "$PING_URL/reset" --max-time 30 2>"$CURL_OUTPUT" || printf "000")
+if [ "$HTTP_CODE" = "200" ]; then
+  pass "HF Space is live and responds to /reset"
+elif [ "$HTTP_CODE" = "000" ]; then
+  fail "HF Space not reachable (connection failed or timed out)"
+  hint "Check your network connection and that the Space is running."
+  hint "Try: curl -s -o /dev/null -w '%{http_code}' -X POST $PING_URL/reset"
+  stop_at "Step 1"
+else
+  fail "HF Space /reset returned HTTP $HTTP_CODE (expected 200)"
+  hint "Make sure your Space is running and the URL is correct."
+  hint "Try opening $PING_URL in your browser first."
+  stop_at "Step 1"
+fi
+log "${BOLD}Step 2/3: Running docker build${NC} ..."
+if ! command -v docker &>/dev/null; then
+  fail "docker command not found"
+  hint "Install Docker: https://docs.docker.com/get-docker/"
+  stop_at "Step 2"
+fi
+if [ -f "$REPO_DIR/Dockerfile" ]; then
+  DOCKER_CONTEXT="$REPO_DIR"
+elif [ -f "$REPO_DIR/server/Dockerfile" ]; then
+  DOCKER_CONTEXT="$REPO_DIR/server"
+else
+  fail "No Dockerfile found in repo root or server/ directory"
+  stop_at "Step 2"
+fi
+log "  Found Dockerfile in $DOCKER_CONTEXT"
+BUILD_OK=false
+BUILD_OUTPUT=$(run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build "$DOCKER_CONTEXT" 2>&1) && BUILD_OK=true
+if [ "$BUILD_OK" = true ]; then
+  pass "Docker build succeeded"
+else
+  fail "Docker build failed (timeout=${DOCKER_BUILD_TIMEOUT}s)"
+  printf "%s\n" "$BUILD_OUTPUT" | tail -20
+  stop_at "Step 2"
+fi
+log "${BOLD}Step 3/3: Running openenv validate${NC} ..."
+if ! command -v openenv &>/dev/null; then
+  fail "openenv command not found"
+  hint "Install it: pip install openenv-core"
+  stop_at "Step 3"
+fi
+VALIDATE_OK=false
+VALIDATE_OUTPUT=$(cd "$REPO_DIR" && openenv validate 2>&1) && VALIDATE_OK=true
+if [ "$VALIDATE_OK" = true ]; then
+  pass "openenv validate passed"
+  [ -n "$VALIDATE_OUTPUT" ] && log "  $VALIDATE_OUTPUT"
+else
+  fail "openenv validate failed"
+  printf "%s\n" "$VALIDATE_OUTPUT"
+  stop_at "Step 3"
+fi
+printf "\n"
+printf "${BOLD}========================================${NC}\n"
+printf "${GREEN}${BOLD}  All 3/3 checks passed!${NC}\n"
+printf "${GREEN}${BOLD}  Your submission is ready to submit.${NC}\n"
+printf "${BOLD}========================================${NC}\n"
+printf "\n"
+exit 0

server/Dockerfile CHANGED Viewed

@@ -9,10 +9,10 @@ FROM ${BASE_IMAGE}
 WORKDIR /app
-COPY server/requirements.txt /tmp/requirements.txt
 RUN pip install --no-cache-dir -r /tmp/requirements.txt
-COPY . /app/
 # Health check
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \

 WORKDIR /app
+COPY requirements.txt /tmp/requirements.txt
 RUN pip install --no-cache-dir -r /tmp/requirements.txt
+COPY . /app/server/
 # Health check
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \