Spaces:

jeevank11
/

Scaler-Hackathon

Sleeping

App Files Files Community

jeevank0 commited on Apr 7

Commit

cef0e76

1 Parent(s): 2624f74

first push

Browse files

Files changed (32) hide show

.DS_Store +0 -0
.dockerignore +5 -0
Dockerfile +9 -0
README.md +118 -1
__pycache__/inference.cpython-313.pyc +0 -0
api/__pycache__/main.cpython-313.pyc +0 -0
api/main.py +37 -0
env/__init__.py +3 -0
env/__pycache__/__init__.cpython-313.pyc +0 -0
env/__pycache__/farm_env.cpython-313.pyc +0 -0
env/farm_env.py +191 -0
farmer_advisor_dataset.csv +0 -0
final-requirements.md +55 -0
inference.py +280 -0
openenv.yaml +73 -0
pyproject.toml +24 -0
reference-material/.DS_Store +0 -0
reference-material/add_water_variable.py +59 -0
reference-material/prevalidation-script.sh +192 -0
reference-material/roadmap.md +577 -0
reference-material/sample-inference-script.py +188 -0
requirements.txt +9 -0
scripts/__pycache__/openai_responses_demo.cpython-313.pyc +0 -0
scripts/add_water_variable.py +64 -0
scripts/openai_responses_demo.py +114 -0
server/__init__.py +1 -0
server/app.py +14 -0
tasks/__init__.py +3 -0
tasks/__pycache__/__init__.cpython-313.pyc +0 -0
tasks/__pycache__/graders.cpython-313.pyc +0 -0
tasks/graders.py +68 -0
uv.lock +0 -0

.DS_Store ADDED Viewed

Binary file (10.2 kB). View file

.dockerignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.git
+.venv
+__pycache__
+*.pyc
+.DS_Store

Dockerfile ADDED Viewed

	@@ -0,0 +1,9 @@

+FROM python:3.11-slim
+WORKDIR /app
+COPY . .
+RUN pip install --no-cache-dir -r requirements.txt
+CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

	@@ -1 +1,118 @@
1	- # ~~Scaler-Hackathon~~

+# FarmRL OpenEnv Environment
+## Overview
+FarmRL simulates real-world crop operations where an agent chooses daily irrigation, fertilizer, and pesticide actions to improve productivity while preserving sustainability. This is a practical farm-operations decision problem, not a game task.
+The environment is OpenEnv-compatible and exposes typed state/action/reward models with API endpoints for reset, step, and state retrieval.
+## Motivation
+Farm operations require balancing short-term yield goals with long-term soil and resource health. This environment provides a deterministic, reproducible benchmark for evaluating whether an LLM can make adaptive control decisions under changing weather and soil conditions.
+## Observation Space
+Observation is represented by `FarmState`:
+- `soil_moisture` (0-100)
+- `soil_ph` (4-9)
+- `temperature` (float)
+- `rainfall` (>=0)
+- `crop_stage` (int, >=0)
+- `day` (int, >=0)
+## Action Space
+Action is represented by `FarmAction`:
+- `water` in [0, 50]
+- `fertilizer` in [0, 20]
+- `pesticide` in [0, 10]
+## Reward Design
+Reward is provided at every step and includes:
+- Positive yield progress (`yield_score`)
+- Sustainability encouragement (`sustainability_bonus`)
+- Resource overuse penalty (`resource_penalty`)
+- Explicit penalties for excessive chemical usage (`overuse_penalty`)
+- Explicit loop/stall penalty (`loop_penalty`)
+This gives dense trajectory feedback and discourages destructive/repetitive behavior.
+## Tasks and Difficulty
+Three deterministic grader tasks are provided:
+1. `task_easy_yield` (easy): maximize normalized total reward.
+2. `task_medium_chemical_efficiency` (medium): minimize aggregate fertilizer + pesticide usage.
+3. `task_hard_sustainability_balance` (hard): optimize yield-to-chemical-use ratio.
+Each grader returns a score in [0.0, 1.0].
+## OpenEnv Interface
+API endpoints:
+- `POST /reset`
+- `POST /step`
+- `GET /state`
+`step(action)` returns `observation`, `reward`, `done`, `info`.
+OpenEnv metadata is declared in `openenv.yaml`.
+## Setup
+1. Create and activate a virtual environment.
+2. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+3. Configure `.env`:
+- `API_BASE_URL=https://api.openai.com/v1`
+- `MODEL_NAME=gpt-4o-mini`
+- `OPENAI_API_KEY=<your_key>`
+## Usage
+Run baseline inference:
+```bash
+python inference.py
+```
+Run API server:
+```bash
+uvicorn api.main:app --host 0.0.0.0 --port 7860
+```
+## Baseline Scores
+Typical baseline output includes an `[END]` line with score and rewards. Example from a recent run:
+- `overall score`: 0.564
+- `steps`: 60
+Task-level baseline scores are reported by `tasks/graders.py` and constrained to [0.0, 1.0].
+## Container and Deployment
+Build container:
+```bash
+docker build -t farmrl-space-check:latest .
+```
+Run container:
+```bash
+docker run --rm -p 7860:7860 farmrl-space-check:latest
+```
+This image is suitable for Hugging Face Space deployment.

__pycache__/inference.cpython-313.pyc ADDED Viewed

Binary file (10.8 kB). View file

api/__pycache__/main.cpython-313.pyc ADDED Viewed

Binary file (2.28 kB). View file

api/main.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from pathlib import Path
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from env.farm_env import FarmAction, FarmEnv, FarmState, FarmStepResult
+app = FastAPI(title="FarmRL OpenEnv API", version="0.1.0")
+DATASET_PATH = Path(__file__).resolve(
+).parents[1] / "farmer_advisor_dataset.csv"
+env = FarmEnv(dataset_path=DATASET_PATH)
+class ResetRequest(BaseModel):
+    seed: int | None = None
+@app.post("/reset", response_model=FarmState)
+def reset(payload: ResetRequest | None = None) -> FarmState:
+    seed = payload.seed if payload is not None else None
+    return env.reset(seed=seed)
+@app.post("/step", response_model=FarmStepResult)
+def step(action: FarmAction) -> FarmStepResult:
+    try:
+        return env.step(action)
+    except RuntimeError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+@app.get("/state", response_model=FarmState)
+def state() -> FarmState:
+    try:
+        return env.state()
+    except RuntimeError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc

env/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .farm_env import FarmAction, FarmEnv, FarmState, FarmStepResult
2	+
3	+ __all__ = ["FarmAction", "FarmEnv", "FarmState", "FarmStepResult"]

env/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (282 Bytes). View file

env/__pycache__/farm_env.cpython-313.pyc ADDED Viewed

Binary file (9.78 kB). View file

env/farm_env.py ADDED Viewed

	@@ -0,0 +1,191 @@

+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel, Field
+class FarmState(BaseModel):
+    soil_moisture: float = Field(ge=0.0, le=100.0)
+    soil_ph: float = Field(ge=4.0, le=9.0)
+    temperature: float
+    rainfall: float = Field(ge=0.0)
+    crop_stage: int = Field(ge=0)
+    day: int = Field(ge=0)
+class FarmAction(BaseModel):
+    water: float = Field(ge=0.0, le=50.0)
+    fertilizer: float = Field(ge=0.0, le=20.0)
+    pesticide: float = Field(ge=0.0, le=10.0)
+class FarmStepResult(BaseModel):
+    observation: FarmState
+    reward: float
+    done: bool
+    info: dict[str, Any]
+class FarmEnv:
+    """Minimal deterministic OpenEnv-style farm environment for Phase-1."""
+    REQUIRED_COLUMNS = {
+        "Soil_pH",
+        "Soil_Moisture",
+        "Temperature_C",
+        "Rainfall_mm",
+    }
+    def __init__(
+        self,
+        dataset_path: str | Path = "farmer_advisor_dataset.csv",
+        seed: int = 42,
+        max_days: int = 30,
+    ) -> None:
+        self.dataset_path = Path(dataset_path)
+        self.max_days = max_days
+        self._rng = np.random.default_rng(seed)
+        self._dataset = self._load_dataset(self.dataset_path)
+        self._row_index = 0
+        self._state: FarmState | None = None
+    def _load_dataset(self, dataset_path: Path) -> pd.DataFrame:
+        if not dataset_path.exists():
+            raise FileNotFoundError(f"Dataset not found: {dataset_path}")
+        df = pd.read_csv(dataset_path)
+        missing = self.REQUIRED_COLUMNS - set(df.columns)
+        if missing:
+            raise ValueError(
+                f"Dataset is missing required columns: {sorted(missing)}")
+        return df.reset_index(drop=True)
+    def _next_weather_row(self) -> pd.Series:
+        self._row_index = (self._row_index + 1) % len(self._dataset)
+        return self._dataset.iloc[self._row_index]
+    def reset(self, seed: int | None = None) -> FarmState:
+        if seed is not None:
+            self._rng = np.random.default_rng(seed)
+        self._row_index = int(self._rng.integers(0, len(self._dataset)))
+        row = self._dataset.iloc[self._row_index]
+        self._state = FarmState(
+            soil_moisture=float(np.clip(row["Soil_Moisture"], 0.0, 100.0)),
+            soil_ph=float(np.clip(row["Soil_pH"], 4.5, 8.5)),
+            temperature=float(row["Temperature_C"]),
+            rainfall=float(np.clip(row["Rainfall_mm"], 0.0, 200.0)),
+            crop_stage=0,
+            day=0,
+        )
+        return self._state
+    def state(self) -> FarmState:
+        if self._state is None:
+            raise RuntimeError(
+                "Environment is not initialized. Call reset() first.")
+        return self._state
+    @staticmethod
+    def _clip(value: float, low: float, high: float) -> float:
+        return float(np.clip(value, low, high))
+    @staticmethod
+    def _compute_reward(state: FarmState, action: FarmAction, day: int) -> tuple[float, dict[str, float]]:
+        moisture_score = np.clip(state.soil_moisture / 100.0, 0.0, 1.0)
+        temperature_factor = np.clip(
+            1.0 - abs(state.temperature - 26.0) / 16.0, 0.0, 1.0)
+        rainfall_factor = np.clip(
+            1.0 - abs(state.rainfall - 60.0) / 60.0, 0.0, 1.0)
+        yield_score = (
+            0.4 * float(moisture_score)
+            + 0.3 * float(temperature_factor)
+            + 0.3 * float(rainfall_factor)
+        )
+        resource_penalty = 0.03 * \
+            (action.fertilizer**1.2) + 0.04 * (action.pesticide**1.3)
+        sustainability_bonus = 0.2 * np.exp(-action.fertilizer / 20.0) + 0.2 * np.exp(
+            -action.pesticide / 10.0
+        )
+        overuse_penalty = 0.0
+        if action.fertilizer > 12.0:
+            overuse_penalty += 0.02 * (action.fertilizer - 12.0)
+        if action.pesticide > 6.0:
+            overuse_penalty += 0.03 * (action.pesticide - 6.0)
+        loop_penalty = 0.0
+        if day > 20 and action.water == 0.0 and action.fertilizer == 0.0 and action.pesticide == 0.0:
+            loop_penalty = 0.1
+        reward = float(yield_score + sustainability_bonus -
+                       resource_penalty - overuse_penalty - loop_penalty)
+        info = {
+            "yield_score": float(yield_score),
+            "resource_penalty": float(resource_penalty),
+            "sustainability_bonus": float(sustainability_bonus),
+            "overuse_penalty": float(overuse_penalty),
+            "loop_penalty": float(loop_penalty),
+        }
+        return reward, info
+    def step(self, action: FarmAction | dict[str, float]) -> FarmStepResult:
+        if self._state is None:
+            raise RuntimeError(
+                "Environment is not initialized. Call reset() first.")
+        action_model = action if isinstance(
+            action, FarmAction) else FarmAction(**action)
+        previous_state = self._state
+        weather = self._next_weather_row()
+        day = previous_state.day + 1
+        crop_stage = min(5, day // 6)
+        temperature = 0.7 * previous_state.temperature + \
+            0.3 * float(weather["Temperature_C"])
+        rainfall = 0.5 * previous_state.rainfall + \
+            0.5 * float(weather["Rainfall_mm"])
+        rainfall = self._clip(rainfall, 0.0, 200.0)
+        evaporation = max(temperature - 20.0, 0.0) * 0.35
+        moisture_gain = 0.12 * rainfall + 0.65 * action_model.water
+        moisture_loss = evaporation + 0.5 * crop_stage
+        soil_moisture = self._clip(
+            previous_state.soil_moisture + moisture_gain - moisture_loss,
+            0.0,
+            100.0,
+        )
+        soil_ph = self._clip(
+            previous_state.soil_ph - 0.012 *
+            action_model.fertilizer + 0.002 * action_model.water,
+            4.5,
+            8.5,
+        )
+        self._state = FarmState(
+            soil_moisture=soil_moisture,
+            soil_ph=soil_ph,
+            temperature=float(temperature),
+            rainfall=rainfall,
+            crop_stage=crop_stage,
+            day=day,
+        )
+        reward, reward_info = self._compute_reward(
+            self._state, action_model, day=day)
+        done = day >= self.max_days
+        return FarmStepResult(
+            observation=self._state,
+            reward=reward,
+            done=done,
+            info=reward_info,
+        )

farmer_advisor_dataset.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

final-requirements.md ADDED Viewed

	@@ -0,0 +1,55 @@

+Functional Requirements:
+1. Real-World Task Simulation
+The environment must represent tasks that humans actually perform in real settings—no games
+or toy problems.
+Examples include email triage, code review, data cleaning, scheduling, customer support, and
+content moderation.
+2. OpenEnv Specification Compliance
+The environment must fully implement the OpenEnv interface, including:
+Typed Observation, Action, and Reward models using Pydantic
+step(action) → returns (observation, reward, done, info)
+reset() → returns the initial observation
+state() → returns the current state
+An openenv.yaml file containing metadata
+The implementation must successfully pass validation via openenv validate.
+3. Minimum of Three Tasks with Agent Graders
+Provide at least three tasks, each with a clearly defined objective
+Tasks should span increasing difficulty: easy → medium → hard
+Each task must include a programmatic grader that assigns a score between 0.0 and
+1.0
+Grading criteria must be clear, deterministic, and reproducible
+4. Meaningful Reward Function
+The reward function must provide feedback throughout the task trajectory, not just at
+completion
+It should reward incremental progress toward the objective
+It must penalize undesirable behaviors such as infinite loops or destructive actions
+5. Baseline Inference Script
+Include an inference script that uses the OpenAI API client to evaluate a model within
+the environment
+API credentials must be read from environment variables (HF_TOKEN)
+The script should produce a reproducible baseline score across all tasks
+Non-Functional Requirements:
+1. Deployment on Hugging Face Spaces
+The environment must be deployable as a containerized Hugging Face Space
+It should be tagged with openenv
+2. Containerized Execution
+Provide a working Dockerfile
+The environment must build and run successfully using:
+docker build
+docker run
+3. Documentation
+The README must include:
+Environment overview and motivation
+Definitions of action and observation spaces
+Task descriptions with expected difficulty levels
+Setup and usage instructions
+Baseline performance scores

inference.py ADDED Viewed

	@@ -0,0 +1,280 @@

+from __future__ import annotations
+import json
+import os
+import re
+from pathlib import Path
+from typing import Any, Dict, Optional
+from dotenv import load_dotenv
+from openai import OpenAI
+from env.farm_env import FarmAction, FarmEnv, FarmState
+from tasks.graders import grade_all
+PROJECT_ROOT = Path(__file__).resolve().parent
+ENV_FILE = PROJECT_ROOT / ".env"
+load_dotenv(ENV_FILE)
+def require_env(name: str) -> str:
+    value = os.getenv(name, "").strip()
+    if not value:
+        raise RuntimeError(
+            f"Missing required environment variable '{name}'. "
+            f"Set it in shell or in {ENV_FILE}."
+        )
+    return value
+API_BASE_URL = require_env("API_BASE_URL")
+MODEL_NAME = require_env("MODEL_NAME")
+TASK_NAME = require_env("TASK_NAME")
+BENCHMARK = require_env("BENCHMARK")
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
+PLACEHOLDER_TOKENS = {
+    "your_openai_api_key_here",
+    "replace_with_openai_api_key",
+    "replace-me",
+    "replace_me",
+}
+EPISODES = 3
+STEPS_PER_EPISODE = 20
+SUCCESS_SCORE_THRESHOLD = 0.10
+def clamp(value: float, low: float, high: float) -> float:
+    return max(low, min(high, value))
+def compute_yield_proxy(state: FarmState) -> float:
+    moisture_score = clamp(state.soil_moisture / 100.0, 0.0, 1.0)
+    temperature_factor = clamp(
+        1.0 - abs(state.temperature - 26.0) / 16.0, 0.0, 1.0)
+    rainfall_factor = clamp(1.0 - abs(state.rainfall - 60.0) / 60.0, 0.0, 1.0)
+    return 0.4 * moisture_score + 0.3 * temperature_factor + 0.3 * rainfall_factor
+def build_prompt(state: FarmState, step: int, recent_actions: list[dict[str, float]]) -> str:
+    recent_actions_text = "none"
+    if recent_actions:
+        recent_actions_text = json.dumps(recent_actions[-3:])
+    previous_action_text = "none"
+    if recent_actions:
+        previous_action_text = json.dumps(recent_actions[-1])
+    return (
+        "Farm state:\n"
+        f"step: {step}\n"
+        f"soil moisture: {state.soil_moisture:.2f}\n"
+        f"temperature: {state.temperature:.2f}\n"
+        f"rainfall: {state.rainfall:.2f}\n"
+        f"crop stage: {state.crop_stage}\n"
+        f"day: {state.day}\n"
+        f"previous action: {previous_action_text}\n"
+        f"recent actions: {recent_actions_text}\n\n"
+        "Choose action values in bounds:\n"
+        "water: 0 to 50\n"
+        "fertilizer: 0 to 20\n"
+        "pesticide: 0 to 10\n\n"
+        "Output must be a single valid JSON object with exactly these numeric keys: "
+        "water, fertilizer, pesticide.\n"
+        "If the previous action is identical, change at least one field by >= 2 unless safety constraints require otherwise."
+    )
+def build_client() -> Optional[OpenAI]:
+    base_lower = API_BASE_URL.lower()
+    if "huggingface.co" in base_lower:
+        raise RuntimeError(
+            "Hugging Face router is disabled for LLM calls. "
+            "Set API_BASE_URL to https://api.openai.com/v1"
+        )
+    api_key = OPENAI_API_KEY
+    missing_msg = "Missing API key: set OPENAI_API_KEY."
+    if not api_key:
+        raise RuntimeError(missing_msg)
+    if api_key.lower() in PLACEHOLDER_TOKENS:
+        raise RuntimeError(
+            "OPENAI_API_KEY is a placeholder; set a real key before running inference.")
+    return OpenAI(base_url=API_BASE_URL, api_key=api_key)
+def extract_json_object(text: str) -> Optional[Dict[str, Any]]:
+    text = text.strip()
+    if not text:
+        return None
+    try:
+        parsed = json.loads(text)
+        if isinstance(parsed, dict):
+            return parsed
+    except json.JSONDecodeError:
+        pass
+    match = re.search(r"\{.*\}", text, flags=re.DOTALL)
+    if not match:
+        return None
+    try:
+        parsed = json.loads(match.group(0))
+        if isinstance(parsed, dict):
+            return parsed
+    except json.JSONDecodeError:
+        return None
+    return None
+def coerce_action(payload: Dict[str, Any]) -> FarmAction:
+    if "water" not in payload or "fertilizer" not in payload or "pesticide" not in payload:
+        raise ValueError(
+            "Model response must include water, fertilizer, and pesticide.")
+    water = float(payload["water"])
+    fertilizer = float(payload["fertilizer"])
+    pesticide = float(payload["pesticide"])
+    normalized = {
+        "water": clamp(water, 0.0, 50.0),
+        "fertilizer": clamp(fertilizer, 0.0, 20.0),
+        "pesticide": clamp(pesticide, 0.0, 10.0),
+    }
+    return FarmAction(**normalized)
+def choose_action(
+    client: OpenAI,
+    state: FarmState,
+    step: int,
+    recent_actions: list[dict[str, float]],
+) -> FarmAction:
+    prompt = build_prompt(state, step=step, recent_actions=recent_actions)
+    completion = client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=[
+            {
+                "role": "system",
+                "content": (
+                    "You are a farm operations optimizer. "
+                    "Produce state-dependent control decisions and avoid repetitive action loops. "
+                    "Return strict JSON with numeric keys: water, fertilizer, pesticide. "
+                    "No markdown, no prose, no extra keys. "
+                    "Do not output the same action repeatedly across steps unless explicitly necessary."
+                ),
+            },
+            {"role": "user", "content": prompt},
+        ],
+        temperature=0.35,
+        top_p=0.9,
+        frequency_penalty=0.6,
+        response_format={"type": "json_object"},
+        seed=42 + step,
+        max_tokens=160,
+    )
+    content = (completion.choices[0].message.content or "").strip()
+    payload = extract_json_object(content)
+    if payload is None:
+        raise ValueError("Model did not return valid JSON action payload.")
+    return coerce_action(payload)
+def to_action_string(action: FarmAction) -> str:
+    return json.dumps(action.model_dump(), separators=(",", ":"), sort_keys=True)
+def log_start() -> None:
+    print(
+        f"[START] task={TASK_NAME} env={BENCHMARK} model={MODEL_NAME}", flush=True)
+def log_step(step: int, action: FarmAction, reward: float, done: bool, error: Optional[str]) -> None:
+    error_value = error if error else "null"
+    done_value = str(done).lower()
+    action_str = to_action_string(action)
+    print(
+        f"[STEP] step={step} action={action_str} reward={reward:.2f} "
+        f"done={done_value} error={error_value}",
+        flush=True,
+    )
+def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None:
+    rewards_str = ",".join(f"{value:.2f}" for value in rewards)
+    print(
+        f"[END] success={str(success).lower()} steps={steps} "
+        f"score={score:.3f} rewards={rewards_str}",
+        flush=True,
+    )
+def run_inference() -> None:
+    dataset_path = Path(__file__).resolve().parent / \
+        "farmer_advisor_dataset.csv"
+    env = FarmEnv(dataset_path=dataset_path, seed=42, max_days=30)
+    client = build_client()
+    total_reward = 0.0
+    total_yield = 0.0
+    total_fertilizer = 0.0
+    total_pesticide = 0.0
+    total_steps = 0
+    rewards: list[float] = []
+    recent_actions: list[dict[str, float]] = []
+    log_start()
+    for episode in range(EPISODES):
+        state = env.reset(seed=42 + episode)
+        for _ in range(STEPS_PER_EPISODE):
+            action = choose_action(
+                client=client,
+                state=state,
+                step=total_steps + 1,
+                recent_actions=recent_actions,
+            )
+            step_result = env.step(action)
+            total_steps += 1
+            total_reward += step_result.reward
+            total_yield += compute_yield_proxy(step_result.observation)
+            total_fertilizer += action.fertilizer
+            total_pesticide += action.pesticide
+            rewards.append(step_result.reward)
+            recent_actions.append(action.model_dump())
+            log_step(
+                step=total_steps,
+                action=action,
+                reward=step_result.reward,
+                done=step_result.done,
+                error=None,
+            )
+            state = step_result.observation
+            if step_result.done:
+                break
+    task_scores = grade_all(
+        total_reward=total_reward,
+        total_yield=total_yield,
+        total_fertilizer=total_fertilizer,
+        total_pesticide=total_pesticide,
+        total_steps=total_steps,
+    )
+    overall_score = sum(item["score"]
+                        for item in task_scores) / len(task_scores)
+    overall_score = clamp(overall_score, 0.0, 1.0)
+    success = overall_score >= SUCCESS_SCORE_THRESHOLD
+    log_end(success=success, steps=total_steps,
+            score=overall_score, rewards=rewards)
+if __name__ == "__main__":
+    run_inference()

openenv.yaml ADDED Viewed

	@@ -0,0 +1,73 @@

+environment:
+  name: farmrl-phase1
+  version: 0.1.0
+  description: Minimal FarmRL OpenEnv implementation for Round-1 Phase-1.
+  api:
+    reset: POST /reset
+    step: POST /step
+    state: GET /state
+observation_schema:
+  type: object
+  required:
+    - soil_moisture
+    - soil_ph
+    - temperature
+    - rainfall
+    - crop_stage
+    - day
+  properties:
+    soil_moisture:
+      type: number
+      minimum: 0
+      maximum: 100
+    soil_ph:
+      type: number
+      minimum: 4
+      maximum: 9
+    temperature:
+      type: number
+    rainfall:
+      type: number
+      minimum: 0
+    crop_stage:
+      type: integer
+      minimum: 0
+    day:
+      type: integer
+      minimum: 0
+action_schema:
+  type: object
+  required:
+    - water
+    - fertilizer
+    - pesticide
+  properties:
+    water:
+      type: number
+      minimum: 0
+      maximum: 50
+    fertilizer:
+      type: number
+      minimum: 0
+      maximum: 20
+    pesticide:
+      type: number
+      minimum: 0
+      maximum: 10
+reward_schema:
+  type: number
+  description: yield_score + sustainability_bonus - resource_penalty
+tasks:
+  - id: task_easy_yield
+    difficulty: easy
+    description: Maximize agronomic yield performance using normalized total reward.
+  - id: task_medium_chemical_efficiency
+    difficulty: medium
+    description: Minimize total fertilizer and pesticide while maintaining productivity.
+  - id: task_hard_sustainability_balance
+    difficulty: hard
+    description: Optimize the long-term yield-to-chemical-use balance ratio.

pyproject.toml ADDED Viewed

	@@ -0,0 +1,24 @@

+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "farmrl-openenv-submission"
+version = "0.1.0"
+description = "FarmRL OpenEnv submission package"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "numpy",
+    "pandas",
+    "pydantic",
+    "pyyaml",
+    "openai",
+    "python-dotenv",
+    "fastapi",
+    "uvicorn",
+    "openenv-core>=0.2.0",
+]
+[project.scripts]
+server = "server.app:main"

reference-material/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

reference-material/add_water_variable.py ADDED Viewed

	@@ -0,0 +1,59 @@

+"""
+add_water_variable.py
+Adds a Water_mm column to the farm dataset.
+Water is drawn uniformly from [WATER_MIN, Rainfall_mm].
+Rainfall_mm is reduced by the water drawn to prevent bias.
+"""
+import pandas as pd
+import numpy as np
+import sys
+WATER_MIN = 20   # minimum meaningful irrigation (mm)
+WATER_MAX = 200  # hard ceiling - avoids flooding; also capped at rainfall
+def add_water(df: pd.DataFrame, seed: int = 42) -> pd.DataFrame:
+    rng = np.random.default_rng(seed)
+    df = df.copy()
+    # Upper bound: rainfall itself, capped at WATER_MAX
+    upper = df["Rainfall_mm"].clip(upper=WATER_MAX)
+    # Where rainfall < WATER_MIN we can't irrigate meaningfully — set 0
+    can_irrigate = upper >= WATER_MIN
+    water = np.where(
+        can_irrigate,
+        rng.uniform(WATER_MIN, upper.where(can_irrigate, WATER_MIN)),
+        0.0
+    )
+    df["Water_mm"] = np.round(water, 2)
+    df["Rainfall_mm"] = np.round(df["Rainfall_mm"] - df["Water_mm"], 2)
+    return df
+def main():
+    path = sys.argv[1] if len(sys.argv) > 1 else "farm_data.csv"
+    out  = sys.argv[2] if len(sys.argv) > 2 else path.replace(".csv", "_watered.csv")
+    df = pd.read_csv(path)
+    required = {"Rainfall_mm"}
+    missing = required - set(df.columns)
+    if missing:
+        raise ValueError(f"Missing columns: {missing}")
+    df_out = add_water(df)
+    print(f"Water_mm  — min: {df_out['Water_mm'].min():.1f}  "
+          f"max: {df_out['Water_mm'].max():.1f}  "
+          f"mean: {df_out['Water_mm'].mean():.1f}")
+    print(f"Rainfall_mm after subtraction — min: {df_out['Rainfall_mm'].min():.1f}  "
+          f"mean: {df_out['Rainfall_mm'].mean():.1f}")
+    df_out.to_csv(out, index=False)
+    print(f"Saved → {out}")
+if __name__ == "__main__":
+    main()

reference-material/prevalidation-script.sh ADDED Viewed

	@@ -0,0 +1,192 @@

+#!/usr/bin/env bash
+#
+# validate-submission.sh — OpenEnv Submission Validator
+#
+# Checks that your HF Space is live, Docker image builds, and openenv validate passes.
+#
+# Prerequisites:
+#   - Docker:       https://docs.docker.com/get-docker/
+#   - openenv-core: pip install openenv-core
+#   - curl (usually pre-installed)
+#
+# Run:
+#   curl -fsSL https://raw.githubusercontent.com/<owner>/<repo>/main/scripts/validate-submission.sh | bash -s -- <ping_url> [repo_dir]
+#
+#   Or download and run locally:
+#     chmod +x validate-submission.sh
+#     ./validate-submission.sh <ping_url> [repo_dir]
+#
+# Arguments:
+#   ping_url   Your HuggingFace Space URL (e.g. https://your-space.hf.space)
+#   repo_dir   Path to your repo (default: current directory)
+#
+# Examples:
+#   ./validate-submission.sh https://my-team.hf.space
+#   ./validate-submission.sh https://my-team.hf.space ./my-repo
+#
+set -uo pipefail
+DOCKER_BUILD_TIMEOUT=600
+if [ -t 1 ]; then
+  RED='\033[0;31m'
+  GREEN='\033[0;32m'
+  YELLOW='\033[1;33m'
+  BOLD='\033[1m'
+  NC='\033[0m'
+else
+  RED='' GREEN='' YELLOW='' BOLD='' NC=''
+fi
+run_with_timeout() {
+  local secs="$1"; shift
+  if command -v timeout &>/dev/null; then
+    timeout "$secs" "$@"
+  elif command -v gtimeout &>/dev/null; then
+    gtimeout "$secs" "$@"
+  else
+    "$@" &
+    local pid=$!
+    ( sleep "$secs" && kill "$pid" 2>/dev/null ) &
+    local watcher=$!
+    wait "$pid" 2>/dev/null
+    local rc=$?
+    kill "$watcher" 2>/dev/null
+    wait "$watcher" 2>/dev/null
+    return $rc
+  fi
+}
+portable_mktemp() {
+  local prefix="${1:-validate}"
+  mktemp "${TMPDIR:-/tmp}/${prefix}-XXXXXX" 2>/dev/null || mktemp
+}
+CLEANUP_FILES=()
+cleanup() { rm -f "${CLEANUP_FILES[@]+"${CLEANUP_FILES[@]}"}"; }
+trap cleanup EXIT
+PING_URL="${1:-}"
+REPO_DIR="${2:-.}"
+if [ -z "$PING_URL" ]; then
+  printf "Usage: %s <ping_url> [repo_dir]\n" "$0"
+  printf "\n"
+  printf "  ping_url   Your HuggingFace Space URL (e.g. https://your-space.hf.space)\n"
+  printf "  repo_dir   Path to your repo (default: current directory)\n"
+  exit 1
+fi
+if ! REPO_DIR="$(cd "$REPO_DIR" 2>/dev/null && pwd)"; then
+  printf "Error: directory '%s' not found\n" "${2:-.}"
+  exit 1
+fi
+PING_URL="${PING_URL%/}"
+export PING_URL
+PASS=0
+log()  { printf "[%s] %b\n" "$(date -u +%H:%M:%S)" "$*"; }
+pass() { log "${GREEN}PASSED${NC} -- $1"; PASS=$((PASS + 1)); }
+fail() { log "${RED}FAILED${NC} -- $1"; }
+hint() { printf "  ${YELLOW}Hint:${NC} %b\n" "$1"; }
+stop_at() {
+  printf "\n"
+  printf "${RED}${BOLD}Validation stopped at %s.${NC} Fix the above before continuing.\n" "$1"
+  exit 1
+}
+printf "\n"
+printf "${BOLD}========================================${NC}\n"
+printf "${BOLD}  OpenEnv Submission Validator${NC}\n"
+printf "${BOLD}========================================${NC}\n"
+log "Repo:     $REPO_DIR"
+log "Ping URL: $PING_URL"
+printf "\n"
+log "${BOLD}Step 1/3: Pinging HF Space${NC} ($PING_URL/reset) ..."
+CURL_OUTPUT=$(portable_mktemp "validate-curl")
+CLEANUP_FILES+=("$CURL_OUTPUT")
+HTTP_CODE=$(curl -s -o "$CURL_OUTPUT" -w "%{http_code}" -X POST \
+  -H "Content-Type: application/json" -d '{}' \
+  "$PING_URL/reset" --max-time 30 2>"$CURL_OUTPUT" || printf "000")
+if [ "$HTTP_CODE" = "200" ]; then
+  pass "HF Space is live and responds to /reset"
+elif [ "$HTTP_CODE" = "000" ]; then
+  fail "HF Space not reachable (connection failed or timed out)"
+  hint "Check your network connection and that the Space is running."
+  hint "Try: curl -s -o /dev/null -w '%%{http_code}' -X POST $PING_URL/reset"
+  stop_at "Step 1"
+else
+  fail "HF Space /reset returned HTTP $HTTP_CODE (expected 200)"
+  hint "Make sure your Space is running and the URL is correct."
+  hint "Try opening $PING_URL in your browser first."
+  stop_at "Step 1"
+fi
+log "${BOLD}Step 2/3: Running docker build${NC} ..."
+if ! command -v docker &>/dev/null; then
+  fail "docker command not found"
+  hint "Install Docker: https://docs.docker.com/get-docker/"
+  stop_at "Step 2"
+fi
+if [ -f "$REPO_DIR/Dockerfile" ]; then
+  DOCKER_CONTEXT="$REPO_DIR"
+elif [ -f "$REPO_DIR/server/Dockerfile" ]; then
+  DOCKER_CONTEXT="$REPO_DIR/server"
+else
+  fail "No Dockerfile found in repo root or server/ directory"
+  stop_at "Step 2"
+fi
+log "  Found Dockerfile in $DOCKER_CONTEXT"
+BUILD_LOG=$(portable_mktemp "validate-build")
+CLEANUP_FILES+=("$BUILD_LOG")
+if run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build --progress=plain "$DOCKER_CONTEXT" 2>&1 | tee "$BUILD_LOG"; then
+  pass "Docker build succeeded"
+else
+  fail "Docker build failed (timeout=${DOCKER_BUILD_TIMEOUT}s)"
+  tail -20 "$BUILD_LOG"
+  stop_at "Step 2"
+fi
+log "${BOLD}Step 3/3: Running openenv validate${NC} ..."
+OPENENV_CMD=""
+if command -v openenv &>/dev/null; then
+  OPENENV_CMD="$(command -v openenv)"
+elif [ -x "/Library/Frameworks/Python.framework/Versions/3.13/bin/openenv" ]; then
+  OPENENV_CMD="/Library/Frameworks/Python.framework/Versions/3.13/bin/openenv"
+fi
+if [ -z "$OPENENV_CMD" ]; then
+  fail "openenv command not found"
+  hint "Install it: pip install openenv-core"
+  hint "Or ensure /Library/Frameworks/Python.framework/Versions/3.13/bin is in PATH"
+  stop_at "Step 3"
+fi
+VALIDATE_LOG=$(portable_mktemp "validate-openenv")
+CLEANUP_FILES+=("$VALIDATE_LOG")
+if (cd "$REPO_DIR" && "$OPENENV_CMD" validate 2>&1 | tee "$VALIDATE_LOG"); then
+  pass "openenv validate passed"
+else
+  fail "openenv validate failed"
+  tail -50 "$VALIDATE_LOG"
+  stop_at "Step 3"
+fi
+printf "\n"
+printf "${BOLD}========================================${NC}\n"
+printf "${GREEN}${BOLD}  All 3/3 checks passed!${NC}\n"
+printf "${GREEN}${BOLD}  Your submission is ready to submit.${NC}\n"
+printf "${BOLD}========================================${NC}\n"
+printf "\n"
+exit 0

reference-material/roadmap.md ADDED Viewed

	@@ -0,0 +1,577 @@

+# FarmRL Round-1 Fast Development Roadmap
+## Reference Materials
+### Introduction
+FarmRL is a reinforcement learning project that trains an agent to manage crop farming decisions. Given observable farm conditions such as soil properties, weather, and crop type, the agent learns to control irrigation, fertilizer application, and pesticide use in order to maximise crop yield while maintaining a healthy sustainability score.
+The project is grounded in a tabular agricultural dataset and draws conceptual inspiration from the FarmGym simulation framework. Two training paradigms are supported: a classic RL agent via a custom OpenEnv environment, and an optional text-framing path using TRL for language-model-based decision making.
+The raw CSV dataset is preprocessed once. The preprocessing adds the Water\_mm column (drawn uniformly from [20, min(Rainfall\_mm, 200)]) and subtracts that value from Rainfall\_mm to preserve water-balance invariance. A lightweight regression model (XGBoost) is then trained on the processed data to serve as the environment's transition model.
+---
+## Dataset preprocessing requirement
+Add a preprocessing script that creates a new variable Water\_mm such that:
+Rainfall\_original = Rainfall\_new + Water\_mm
+This prevents bias by conserving total water availability.
+Script file:
+scripts/add\_water\_variable.py
+```
+"""
+add_water_variable.py
+Adds a Water_mm column to the farm dataset.
+Water is drawn uniformly from [WATER_MIN, Rainfall_mm].
+Rainfall_mm is reduced by the water drawn to prevent bias.
+"""
+import pandas as pd
+import numpy as np
+import sys
+WATER_MIN = 20   # minimum meaningful irrigation (mm)
+WATER_MAX = 200  # hard ceiling - avoids flooding; also capped at rainfall
+def add_water(df: pd.DataFrame, seed: int = 42) -> pd.DataFrame:
+    rng = np.random.default_rng(seed)
+    df = df.copy()
+    # Upper bound: rainfall itself, capped at WATER_MAX
+    upper = df["Rainfall_mm"].clip(upper=WATER_MAX)
+    # Where rainfall < WATER_MIN we can't irrigate meaningfully — set 0
+    can_irrigate = upper >= WATER_MIN
+    water = np.where(
+        can_irrigate,
+        rng.uniform(WATER_MIN, upper.where(can_irrigate, WATER_MIN)),
+        0.0
+    )
+    df["Water_mm"] = np.round(water, 2)
+    df["Rainfall_mm"] = np.round(df["Rainfall_mm"] - df["Water_mm"], 2)
+    return df
+def main():
+    path = sys.argv[1] if len(sys.argv) > 1 else "farm_data.csv"
+    out  = sys.argv[2] if len(sys.argv) > 2 else path.replace(".csv", "_watered.csv")
+    df = pd.read_csv(path)
+    required = {"Rainfall_mm"}
+    missing = required - set(df.columns)
+    if missing:
+        raise ValueError(f"Missing columns: {missing}")
+    df_out = add_water(df)
+    print(f"Water_mm  — min: {df_out['Water_mm'].min():.1f}  "
+          f"max: {df_out['Water_mm'].max():.1f}  "
+          f"mean: {df_out['Water_mm'].mean():.1f}")
+    print(f"Rainfall_mm after subtraction — min: {df_out['Rainfall_mm'].min():.1f}  "
+          f"mean: {df_out['Rainfall_mm'].mean():.1f}")
+    df_out.to_csv(out, index=False)
+    print(f"Saved → {out}")
+if __name__ == "__main__":
+    main()
+```
+Purpose:
+• introduces irrigation variable • prevents data leakage • preserves statistical consistency • improves realism of agent decisions
+---
+# 3-Phase Fast Development Plan (3–4 hours)
+Goal: produce validator-compliant submission with improved reward design.
+Scope limitations:
+• simple environment dynamics • minimal dataset preprocessing • basic transition model • improved reward shaping only
+---
+# Phase 1 — OpenEnv Environment (Core functionality)
+**Goal:** produce a valid OpenEnv-compliant environment that passes schema and endpoint checks.
+Estimated time: **1.5 hours**
+---
+## Tasks
+### 1. Define typed state model (Pydantic)
+Keep small but realistic.
+Example variables:
+```
+soil_moisture : float
+soil_ph       : float
+temperature   : float
+rainfall      : float
+crop_stage    : int
+day           : int
+```
+Requirements satisfied:
+- typed models required by OpenEnv spec
+- deterministic state structure
+---
+### 2. Define typed action model
+Discrete actions simplify LLM reliability:
+```
+water       : float   (0–50)
+fertilizer  : float   (0–20)
+pesticide   : float   (0–10)
+```
+Keep ranges bounded to stabilize scoring.
+---
+### 3. Implement environment class
+File:
+```
+env/farm_env.py
+```
+Must implement:
+```
+reset()
+step(action)
+state()
+```
+---
+### 4. Implement improved reward design (only sophistication added)
+Reward must reflect:
+- yield improvement
+- sustainability balance
+- penalty for overuse of chemicals
+Example reward:
+```
+yield_score =
+    0.4 * soil_moisture
+  + 0.3 * temperature_factor
+  + 0.3 * rainfall_factor
+resource_penalty =
+    0.03 * fertilizer^1.2
+  + 0.04 * pesticide^1.3
+sustainability_bonus =
+    0.2 * exp(-fertilizer/20)
+  + 0.2 * exp(-pesticide/10)
+reward =
+    yield_score
+  + sustainability_bonus
+  - resource_penalty
+```
+Characteristics:
+- diminishing returns on fertilizer
+- discourages excessive pesticide
+- stable numeric range
+- smooth gradients
+---
+### 5. Episode termination rule
+```
+max_days = 30
+```
+Short episodes ensure runtime < 20 min.
+---
+### 6. Create openenv.yaml
+Define:
+```
+environment metadata
+observation schema
+action schema
+reward schema
+task definitions
+```
+Ensure field names exactly match Pydantic models.
+---
+### 7. Implement API wrapper (if required by spec)
+Expose:
+```
+POST /reset
+POST /step
+GET /state
+```
+Ensure reset returns valid initial state.
+Requirement satisfied:
+HF Space ping must return 200.
+---
+# Phase 2 — inference pipeline + tasks + graders
+**Goal:** produce valid evaluation run with structured logs and normalized scores.
+Estimated time: **1.5 hours**
+---
+## Tasks
+### 1. Create inference.py in root directory
+File location:
+```
+/inference.py
+```
+Must:
+- load environment
+- call LLM via OpenAI client
+- run episodes
+- log structured output
+- compute task scores
+---
+### 2. Implement OpenAI client usage
+Must use env variables:
+```
+API_BASE_URL
+MODEL_NAME
+HF_TOKEN
+```
+LLM prompt format:
+```
+Farm state:
+soil moisture: 34
+temperature: 26
+rainfall: 3
+crop stage: 2
+Choose action values:
+water
+fertilizer
+pesticide
+```
+LLM output expected as JSON:
+```
+{
+ "water": 20,
+ "fertilizer": 5,
+ "pesticide": 1
+}
+```
+Add fallback defaults if parsing fails.
+---
+### 3. Define 3 tasks
+Tasks must produce score ∈ [0,1].
+---
+#### Task 1 — yield performance
+Measures productivity.
+```
+score =
+normalized(total_reward)
+```
+---
+#### Task 2 — chemical efficiency
+Penalizes excessive fertilizer/pesticide.
+```
+score =
+1 - normalized(total_chemical_use)
+```
+---
+#### Task 3 — sustainability balance
+Encourages moderate actions.
+```
+score =
+yield / (fertilizer + pesticide + 1)
+normalized to 0–1
+```
+---
+### 4. Implement graders
+Each grader returns:
+```
+{
+ "task_id": "...",
+ "score": float
+}
+```
+Ensure:
+```
+0 ≤ score ≤ 1
+```
+Validator requirement.
+---
+### 5. Implement structured logs
+Strict format:
+```
+[START]
+model: MODEL_NAME
+[STEP]
+step: 1
+action: {...}
+reward: ...
+[STEP]
+step: 2
+...
+[END]
+task_scores:
+task1: 0.63
+task2: 0.71
+task3: 0.59
+```
+Formatting must match specification exactly.
+---
+### 6. Runtime optimization
+Keep small:
+```
+episodes = 3
+steps per episode = 20–30
+```
+Ensures runtime well below 20 minutes.
+---
+# Phase 3 — packaging, docker, validation
+**Goal:** ensure infrastructure compatibility and reproducibility.
+Estimated time: **1 hour**
+---
+## Tasks
+### 1. requirements.txt
+Minimal dependencies:
+```
+pydantic
+numpy
+pyyaml
+openai
+fastapi (optional)
+uvicorn (optional)
+```
+Avoid heavy ML libraries.
+---
+### 2. Dockerfile
+Must build automatically.
+Example flow:
+```
+FROM python:3.11-slim
+WORKDIR /app
+COPY . .
+RUN pip install -r requirements.txt
+CMD ["python", "inference.py"]
+```
+Validator requirement satisfied.
+---
+### 3. environment variables support
+Ensure inference.py reads:
+```
+API_BASE_URL
+MODEL_NAME
+HF_TOKEN
+```
+No hardcoding.
+---
+### 4. basic local tests
+Run:
+```
+python inference.py
+```
+Verify:
+- no crashes
+- scores generated
+- logs formatted correctly
+---
+### 5. validation checklist
+Confirm:
+HF Space can call:
+```
+reset()
+step()
+state()
+```
+Ensure:
+- numeric reward returned
+- valid JSON outputs
+- docker build successful
+---
+# Final deliverable structure
+```
+project/
+│
+├── openenv.yaml
+├── inference.py
+├── Dockerfile
+├── requirements.txt
+│
+├── env/
+│   └── farm_env.py
+│
+└── tasks/
+    └── graders.py
+```
+---
+# Expected outcome
+Submission will pass:
+- OpenEnv compliance
+- structured logging requirement
+- 3 task requirement
+- reproducibility requirement
+- runtime constraint
+- docker build requirement
+- HF space endpoint validation
+---

reference-material/sample-inference-script.py ADDED Viewed

	@@ -0,0 +1,188 @@

+"""
+Inference Script Example
+===================================
+MANDATORY
+- Before submitting, ensure the following variables are defined in your environment configuration:
+    API_BASE_URL   The API endpoint for the LLM.
+    MODEL_NAME     The model identifier to use for inference.
+    HF_TOKEN       Your Hugging Face / API key.
+    LOCAL_IMAGE_NAME The name of the local image to use for the environment if you are using from_docker_image()
+                     method
+- Defaults are set only for API_BASE_URL and MODEL_NAME
+    (and should reflect your active inference setup):
+    API_BASE_URL = os.getenv("API_BASE_URL", "<your-active-endpoint>")
+    MODEL_NAME = os.getenv("MODEL_NAME", "<your-active-model>")
+- The inference script must be named `inference.py` and placed in the root directory of the project
+- Participants must use OpenAI Client for all LLM calls using above variables
+STDOUT FORMAT
+- The script must emit exactly three line types to stdout, in this order:
+    [START] task=<task_name> env=<benchmark> model=<model_name>
+    [STEP]  step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
+    [END]   success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>
+  Rules:
+    - One [START] line at episode begin.
+    - One [STEP] line per step, immediately after env.step() returns.
+    - One [END] line after env.close(), always emitted (even on exception).
+    - reward and rewards are formatted to 2 decimal places.
+    - done and success are lowercase booleans: true or false.
+    - error is the raw last_action_error string, or null if none.
+    - All fields on a single line with no newlines within a line.
+    - Each tasks should return score in [0, 1]
+  Example:
+    [START] task=click-test env=miniwob model=Qwen3-VL-30B
+    [STEP] step=1 action=click('123') reward=0.00 done=false error=null
+    [STEP] step=2 action=fill('456','text') reward=0.00 done=false error=null
+    [STEP] step=3 action=click('789') reward=1.00 done=true error=null
+    [END] success=true steps=3 score=1.00 rewards=0.00,0.00,1.00
+"""
+import asyncio
+import os
+import textwrap
+from typing import List, Optional
+from openai import OpenAI
+from my_env_v4 import MyEnvV4Action, MyEnvV4Env
+IMAGE_NAME = os.getenv("IMAGE_NAME") # If you are using docker image
+API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
+API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
+MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
+TASK_NAME = os.getenv("MY_ENV_V4_TASK", "echo")
+BENCHMARK = os.getenv("MY_ENV_V4_BENCHMARK", "my_env_v4")
+MAX_STEPS = 8
+TEMPERATURE = 0.7
+MAX_TOKENS = 150
+SUCCESS_SCORE_THRESHOLD = 0.1  # normalized score in [0, 1]
+# Max possible reward: each token contributes 0.1, across all steps
+_MAX_REWARD_PER_STEP = MAX_TOKENS * 0.1
+MAX_TOTAL_REWARD = MAX_STEPS * _MAX_REWARD_PER_STEP
+SYSTEM_PROMPT = textwrap.dedent(
+    """
+    You are interacting with a simple echo environment.
+    Each turn you must send a message. The environment will echo it back.
+    Reward is proportional to message length: reward = len(message) * 0.1
+    Your goal is to maximize total reward by sending meaningful, substantive messages.
+    Reply with exactly one message string — no quotes, no prefixes, just the message text.
+    """
+).strip()
+def log_start(task: str, env: str, model: str) -> None:
+    print(f"[START] task={task} env={env} model={model}", flush=True)
+def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
+    error_val = error if error else "null"
+    done_val = str(done).lower()
+    print(
+        f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
+        flush=True,
+    )
+def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
+    rewards_str = ",".join(f"{r:.2f}" for r in rewards)
+    print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
+def build_user_prompt(step: int, last_echoed: str, last_reward: float, history: List[str]) -> str:
+    history_block = "\n".join(history[-4:]) if history else "None"
+    return textwrap.dedent(
+        f"""
+        Step: {step}
+        Last echoed message: {last_echoed!r}
+        Last reward: {last_reward:.2f}
+        Previous steps:
+        {history_block}
+        Send your next message.
+        """
+    ).strip()
+def get_model_message(client: OpenAI, step: int, last_echoed: str, last_reward: float, history: List[str]) -> str:
+    user_prompt = build_user_prompt(step, last_echoed, last_reward, history)
+    try:
+        completion = client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": user_prompt},
+            ],
+            temperature=TEMPERATURE,
+            max_tokens=MAX_TOKENS,
+            stream=False,
+        )
+        text = (completion.choices[0].message.content or "").strip()
+        return text if text else "hello"
+    except Exception as exc:
+        print(f"[DEBUG] Model request failed: {exc}", flush=True)
+        return "hello"
+async def main() -> None:
+    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
+    env = await MyEnvV4Env.from_docker_image(IMAGE_NAME)
+    history: List[str] = []
+    rewards: List[float] = []
+    steps_taken = 0
+    score = 0.0
+    success = False
+    log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
+    try:
+        result = await env.reset() # OpenENV.reset()
+        last_echoed = result.observation.echoed_message
+        last_reward = 0.0
+        for step in range(1, MAX_STEPS + 1):
+            if result.done:
+                break
+            message = get_model_message(client, step, last_echoed, last_reward, history)
+            result = await env.step(MyEnvV4Action(message=message))
+            obs = result.observation
+            reward = result.reward or 0.0
+            done = result.done
+            error = None
+            rewards.append(reward)
+            steps_taken = step
+            last_echoed = obs.echoed_message
+            last_reward = reward
+            log_step(step=step, action=message, reward=reward, done=done, error=error)
+            history.append(f"Step {step}: {message!r} -> reward {reward:+.2f}")
+            if done:
+                break
+        score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
+        score = min(max(score, 0.0), 1.0)  # clamp to [0, 1]
+        success = score >= SUCCESS_SCORE_THRESHOLD
+    finally:
+        try:
+            await env.close()
+        except Exception as e:
+            print(f"[DEBUG] env.close() error (container cleanup): {e}", flush=True)
+        log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
+if __name__ == "__main__":
+    asyncio.run(main())

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+numpy
+pandas
+pydantic
+pyyaml
+openai
+python-dotenv
+fastapi
+uvicorn
+openenv-core>=0.2.0

scripts/__pycache__/openai_responses_demo.cpython-313.pyc ADDED Viewed

Binary file (4.84 kB). View file

scripts/add_water_variable.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""
+add_water_variable.py
+Adds a Water_mm column to the farm dataset.
+Water is drawn uniformly from [WATER_MIN, Rainfall_mm].
+Rainfall_mm is reduced by the water drawn to prevent bias.
+"""
+import sys
+import numpy as np
+import pandas as pd
+WATER_MIN = 20
+WATER_MAX = 200
+def add_water(df: pd.DataFrame, seed: int = 42) -> pd.DataFrame:
+    rng = np.random.default_rng(seed)
+    df = df.copy()
+    upper = df["Rainfall_mm"].clip(upper=WATER_MAX)
+    can_irrigate = upper >= WATER_MIN
+    water = np.where(
+        can_irrigate,
+        rng.uniform(WATER_MIN, upper.where(can_irrigate, WATER_MIN)),
+        0.0,
+    )
+    df["Water_mm"] = np.round(water, 2)
+    df["Rainfall_mm"] = np.round(df["Rainfall_mm"] - df["Water_mm"], 2)
+    return df
+def main() -> None:
+    path = sys.argv[1] if len(sys.argv) > 1 else "farm_data.csv"
+    out = sys.argv[2] if len(sys.argv) > 2 else path.replace(
+        ".csv", "_watered.csv")
+    df = pd.read_csv(path)
+    required = {"Rainfall_mm"}
+    missing = required - set(df.columns)
+    if missing:
+        raise ValueError(f"Missing columns: {missing}")
+    df_out = add_water(df)
+    print(
+        f"Water_mm - min: {df_out['Water_mm'].min():.1f} "
+        f"max: {df_out['Water_mm'].max():.1f} "
+        f"mean: {df_out['Water_mm'].mean():.1f}"
+    )
+    print(
+        "Rainfall_mm after subtraction - "
+        f"min: {df_out['Rainfall_mm'].min():.1f} "
+        f"mean: {df_out['Rainfall_mm'].mean():.1f}"
+    )
+    df_out.to_csv(out, index=False)
+    print(f"Saved -> {out}")
+if __name__ == "__main__":
+    main()

scripts/openai_responses_demo.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from __future__ import annotations
+import os
+import sys
+from pathlib import Path
+from dotenv import load_dotenv
+from openai import OpenAI
+ROOT_DIR = Path(__file__).resolve().parents[1]
+ENV_FILE = ROOT_DIR / ".env"
+load_dotenv(ENV_FILE)
+def require_env(name: str) -> str:
+    value = os.getenv(name, "").strip()
+    if not value:
+        raise RuntimeError(
+            f"Missing required environment variable '{name}'. "
+            f"Set it in {ENV_FILE}."
+        )
+    return value
+def extract_output_text(response: object) -> str:
+    text = (getattr(response, "output_text", "") or "").strip()
+    if text:
+        return text
+    parts: list[str] = []
+    output_items = getattr(response, "output", None)
+    if output_items is None and hasattr(response, "model_dump"):
+        payload = response.model_dump()
+        output_items = payload.get("output", [])
+    for item in output_items or []:
+        if isinstance(item, dict):
+            content_items = item.get("content", [])
+        else:
+            content_items = getattr(item, "content", []) or []
+        for content in content_items:
+            if isinstance(content, dict):
+                chunk = content.get("text")
+            else:
+                chunk = getattr(content, "text", None)
+            if chunk:
+                parts.append(str(chunk))
+    return "\n".join(parts).strip()
+def get_response_error_message(response: object) -> str | None:
+    error_obj = getattr(response, "error", None)
+    if not error_obj:
+        return None
+    if isinstance(error_obj, dict):
+        code = error_obj.get("code")
+        message = error_obj.get("message")
+    else:
+        code = getattr(error_obj, "code", None)
+        message = getattr(error_obj, "message", None)
+    if code and message:
+        return f"{code}: {message}"
+    if message:
+        return str(message)
+    return str(error_obj)
+def select_client_config() -> tuple[str, str]:
+    base_url = os.getenv("API_BASE_URL", "https://api.openai.com/v1").strip()
+    openai_api_key = os.getenv("OPENAI_API_KEY", "").strip()
+    if "huggingface.co" in base_url.lower():
+        raise RuntimeError(
+            "Hugging Face router is disabled for LLM calls in this script. "
+            "Set API_BASE_URL to https://api.openai.com/v1"
+        )
+    if not openai_api_key:
+        raise RuntimeError(
+            "Missing OPENAI_API_KEY for the configured API_BASE_URL."
+        )
+    return base_url, openai_api_key
+def main() -> None:
+    base_url, api_key = select_client_config()
+    model = os.getenv("MODEL_NAME", "gpt-5-nano").strip()
+    prompt = " ".join(sys.argv[1:]).strip() or "write a haiku about ai"
+    client = OpenAI(api_key=api_key, base_url=base_url)
+    response = client.responses.create(
+        model=model,
+        input=prompt,
+        store=True,
+    )
+    error_message = get_response_error_message(response)
+    if error_message:
+        raise RuntimeError(f"Responses API failed: {error_message}")
+    output_text = extract_output_text(response)
+    if not output_text:
+        raise RuntimeError("OpenAI response did not contain output text.")
+    print(output_text)
+if __name__ == "__main__":
+    main()

server/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Server package marker for OpenEnv entrypoint discovery.

server/app.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import os
+from api.main import app
+def main() -> None:
+    import uvicorn
+    port = int(os.getenv("PORT", "7860"))
+    uvicorn.run("server.app:app", host="0.0.0.0", port=port)
+if __name__ == "__main__":
+    main()

tasks/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .graders import grade_all
2	+
3	+ __all__ = ["grade_all"]

tasks/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (221 Bytes). View file

tasks/__pycache__/graders.cpython-313.pyc ADDED Viewed

Binary file (2.63 kB). View file

tasks/graders.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from __future__ import annotations
+from typing import Dict, List
+MIN_REWARD_PER_STEP = -1.75
+MAX_REWARD_PER_STEP = 1.40
+MAX_CHEMICAL_PER_STEP = 30.0
+def clamp_01(value: float) -> float:
+    return max(0.0, min(1.0, float(value)))
+def normalize(value: float, low: float, high: float) -> float:
+    if high <= low:
+        return 0.0
+    return clamp_01((value - low) / (high - low))
+def grade_yield_performance(total_reward: float, total_steps: int) -> Dict[str, float]:
+    min_total = MIN_REWARD_PER_STEP * total_steps
+    max_total = MAX_REWARD_PER_STEP * total_steps
+    score = normalize(total_reward, min_total, max_total)
+    return {"task_id": "task_easy_yield", "score": score}
+def grade_chemical_efficiency(
+    total_fertilizer: float,
+    total_pesticide: float,
+    total_steps: int,
+) -> Dict[str, float]:
+    total_chemical_use = total_fertilizer + total_pesticide
+    max_chemical_use = MAX_CHEMICAL_PER_STEP * total_steps
+    score = 1.0 - normalize(total_chemical_use, 0.0, max_chemical_use)
+    return {"task_id": "task_medium_chemical_efficiency", "score": clamp_01(score)}
+def grade_sustainability_balance(
+    total_yield: float,
+    total_fertilizer: float,
+    total_pesticide: float,
+) -> Dict[str, float]:
+    ratio = total_yield / (total_fertilizer + total_pesticide + 1.0)
+    score = ratio / (ratio + 1.0)
+    return {"task_id": "task_hard_sustainability_balance", "score": clamp_01(score)}
+def grade_all(
+    total_reward: float,
+    total_yield: float,
+    total_fertilizer: float,
+    total_pesticide: float,
+    total_steps: int,
+) -> List[Dict[str, float]]:
+    return [
+        grade_yield_performance(
+            total_reward=total_reward, total_steps=total_steps),
+        grade_chemical_efficiency(
+            total_fertilizer=total_fertilizer,
+            total_pesticide=total_pesticide,
+            total_steps=total_steps,
+        ),
+        grade_sustainability_balance(
+            total_yield=total_yield,
+            total_fertilizer=total_fertilizer,
+            total_pesticide=total_pesticide,
+        ),
+    ]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff