# PatchHawk Environment Configuration
# Copy this file to .env and fill in your real values.

# ── Models ────────────────────────────────────────────────────────
# Model for synthetic data generation (Meta SDK Track A)
SYNTH_GENERATOR_MODEL=meta-llama/Llama-3.2-3B-Instruct

# Base policy model for GRPO training
GRPO_POLICY_MODEL=unsloth/Qwen2.5-Coder-7B-Instruct

# ── Weights & Biases ─────────────────────────────────────────────
WANDB_API_KEY=your_wandb_api_key_here
WANDB_PROJECT=patchhawk
WANDB_RUN_NAME=grpo-run-1

# ── Hugging Face Hub ─────────────────────────────────────────────
HF_TOKEN=your_hf_token_here
HF_REPO=your-username/patchhawk

# ── Docker ────────────────────────────────────────────────────────
DOCKER_IMAGE=patchhawk-sandbox:latest

# ── vLLM (Track A SDK) ──────────────────────────────────────────
VLLM_API_BASE=http://localhost:8000/v1