Spaces:

S-Dreamer
/

CodeCraftLab

Runtime error

App Files Files Community

S-Dreamer commited on May 16

Commit

178abc4

verified ·

1 Parent(s): 6cf4784

Upload 3 files

Browse files

Files changed (3) hide show

.env.example +36 -0
Dockerfile +45 -0
config.py +206 -0

.env.example ADDED Viewed

	@@ -0,0 +1,36 @@

+# CodeCraftLab — Environment Configuration
+# Copy to .env and fill in values. Never commit .env to git.
+# --------------------------------------------------------------------------
+# App
+# --------------------------------------------------------------------------
+ENV=development                          # development | staging | production
+LOG_LEVEL=INFO                           # DEBUG | INFO | WARNING | ERROR
+# --------------------------------------------------------------------------
+# Auth (REQUIRED)
+# --------------------------------------------------------------------------
+SECRET_KEY=change-me-to-at-least-32-random-chars-in-production
+ACCESS_TOKEN_EXPIRE_MINUTES=60
+# --------------------------------------------------------------------------
+# Database (REQUIRED)
+# --------------------------------------------------------------------------
+DATABASE_URL=postgresql+asyncpg://codecraftlab:password@localhost:5432/codecraftlab
+# --------------------------------------------------------------------------
+# HuggingFace (required for Hub push)
+# --------------------------------------------------------------------------
+HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxx
+MODEL_CACHE_DIR=./cache
+# --------------------------------------------------------------------------
+# Training
+# --------------------------------------------------------------------------
+MAX_CONCURRENT_JOBS=2
+JOB_OUTPUT_DIR=./checkpoints
+# --------------------------------------------------------------------------
+# CORS (comma-separated list for production)
+# --------------------------------------------------------------------------
+CORS_ORIGINS=["http://localhost:3000"]

Dockerfile ADDED Viewed

	@@ -0,0 +1,45 @@

+# --------------------------------------------------------------------------
+# CodeCraftLab — Dockerfile
+# FastAPI + Uvicorn on port 8000
+# Runs as non-root user (HF Spaces requirement)
+# --------------------------------------------------------------------------
+FROM python:3.11-slim AS base
+# System deps
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    git-lfs \
+    build-essential \
+    && git lfs install \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# Non-root user (required by HuggingFace Spaces)
+RUN useradd -m -u 1000 appuser
+WORKDIR /app
+# --------------------------------------------------------------------------
+FROM base AS deps
+COPY pyproject.toml uv.lock* ./
+RUN pip install uv --no-cache-dir && \
+    uv sync --no-dev --frozen
+# --------------------------------------------------------------------------
+FROM base AS runtime
+COPY --from=deps /app/.venv /app/.venv
+ENV PATH="/app/.venv/bin:$PATH"
+COPY --chown=appuser:appuser . .
+USER appuser
+EXPOSE 8000
+# Uvicorn — 4 workers in production, 1 in development (override with env)
+CMD ["uvicorn", "app:app", \
+     "--host", "0.0.0.0", \
+     "--port", "8000", \
+     "--workers", "4", \
+     "--log-config", "null"]

config.py ADDED Viewed

	@@ -0,0 +1,206 @@

+"""
+Training configuration schemas — Pydantic v2.
+All training jobs are validated against these models before execution.
+No raw dicts escape into the pipeline; everything is typed and constrained.
+"""
+from __future__ import annotations
+from enum import StrEnum
+from typing import Annotated
+from pydantic import BaseModel, Field, HttpUrl, model_validator
+from pydantic import PositiveFloat, PositiveInt
+# ---------------------------------------------------------------------------
+# Enums
+# ---------------------------------------------------------------------------
+class EvalStrategy(StrEnum):
+    NO = "no"
+    STEPS = "steps"
+    EPOCH = "epoch"
+class Precision(StrEnum):
+    FP32 = "fp32"
+    FP16 = "fp16"
+    BF16 = "bf16"
+    INT8 = "int8"
+class OptimizerType(StrEnum):
+    ADAMW = "adamw_torch"
+    ADAMW_8BIT = "adamw_8bit"
+    PAGED_ADAMW_8BIT = "paged_adamw_8bit"
+    SGD = "sgd"
+class EvalMetric(StrEnum):
+    PASS_AT_1 = "pass_at_1"
+    PASS_AT_10 = "pass_at_10"
+    BLEU = "bleu"
+    EXECUTION_ACCURACY = "execution_accuracy"
+    EXACT_MATCH = "exact_match"
+# ---------------------------------------------------------------------------
+# Sub-configs
+# ---------------------------------------------------------------------------
+class LoRAConfig(BaseModel):
+    """LoRA adapter configuration. Omit to disable LoRA (full fine-tune)."""
+    enabled: bool = True
+    r: Annotated[int, Field(ge=1, le=256)] = 16
+    alpha: Annotated[int, Field(ge=1)] = 32
+    dropout: Annotated[float, Field(ge=0.0, lt=1.0)] = 0.05
+    target_modules: list[str] = Field(
+        default_factory=lambda: ["q_proj", "v_proj"],
+        min_length=1,
+    )
+    bias: str = "none"
+    @model_validator(mode="after")
+    def alpha_geq_r(self) -> "LoRAConfig":
+        if self.alpha < self.r:
+            raise ValueError(f"lora.alpha ({self.alpha}) should be >= lora.r ({self.r})")
+        return self
+class TrainingHyperparams(BaseModel):
+    num_epochs: Annotated[int, Field(ge=1, le=100)] = 3
+    batch_size: Annotated[int, Field(ge=1, le=256)] = 8
+    gradient_accumulation_steps: Annotated[int, Field(ge=1, le=128)] = 4
+    learning_rate: Annotated[float, Field(gt=0.0, lt=1.0)] = 2e-5
+    weight_decay: Annotated[float, Field(ge=0.0, lt=1.0)] = 0.01
+    warmup_ratio: Annotated[float, Field(ge=0.0, lt=1.0)] = 0.1
+    max_seq_length: Annotated[int, Field(ge=64, le=32768)] = 1024
+    max_grad_norm: Annotated[float, Field(gt=0.0)] = 1.0
+    optimizer: OptimizerType = OptimizerType.ADAMW
+    precision: Precision = Precision.BF16
+    lr_scheduler: str = "cosine"
+    seed: int = 42
+    dataloader_num_workers: Annotated[int, Field(ge=0, le=32)] = 4
+    @property
+    def effective_batch_size(self) -> int:
+        return self.batch_size * self.gradient_accumulation_steps
+class EvaluationConfig(BaseModel):
+    enabled: bool = True
+    strategy: EvalStrategy = EvalStrategy.EPOCH
+    eval_steps: PositiveInt | None = None  # required when strategy=STEPS
+    metrics: list[EvalMetric] = Field(
+        default_factory=lambda: [EvalMetric.PASS_AT_1, EvalMetric.BLEU]
+    )
+    num_samples_per_problem: Annotated[int, Field(ge=1, le=200)] = 10
+    timeout_seconds: Annotated[int, Field(ge=1, le=60)] = 10
+    load_best_model_at_end: bool = True
+    metric_for_best_model: EvalMetric = EvalMetric.PASS_AT_1
+    greater_is_better: bool = True
+    @model_validator(mode="after")
+    def eval_steps_required_for_steps_strategy(self) -> "EvaluationConfig":
+        if self.strategy == EvalStrategy.STEPS and self.eval_steps is None:
+            raise ValueError("evaluation.eval_steps is required when strategy='steps'")
+        return self
+class CheckpointConfig(BaseModel):
+    save_strategy: EvalStrategy = EvalStrategy.EPOCH
+    save_steps: PositiveInt | None = None
+    save_total_limit: Annotated[int, Field(ge=1, le=20)] = 3
+    output_dir: str = "./checkpoints"
+    resume_from_checkpoint: str | None = None
+    @model_validator(mode="after")
+    def save_steps_required_for_steps_strategy(self) -> "CheckpointConfig":
+        if self.save_strategy == EvalStrategy.STEPS and self.save_steps is None:
+            raise ValueError("checkpoint.save_steps required when save_strategy='steps'")
+        return self
+class HubConfig(BaseModel):
+    push_to_hub: bool = False
+    repo_id: str | None = None
+    private: bool = True
+    commit_message: str = "Training checkpoint"
+    @model_validator(mode="after")
+    def repo_id_required_if_pushing(self) -> "HubConfig":
+        if self.push_to_hub and not self.repo_id:
+            raise ValueError("hub.repo_id is required when hub.push_to_hub=true")
+        return self
+class DatasetConfig(BaseModel):
+    dataset_id: str  # internal UUID or HF Hub dataset path
+    split_ratio: Annotated[float, Field(gt=0.0, lt=1.0)] = 0.9  # train split
+    max_samples: PositiveInt | None = None  # None = use all
+    text_column: str = "content"
+    shuffle: bool = True
+    shuffle_seed: int = 42
+# ---------------------------------------------------------------------------
+# Root job config
+# ---------------------------------------------------------------------------
+class TrainingJobConfig(BaseModel):
+    """
+    Complete training job specification.
+    Validated at job submission time. If validation passes, the job is
+    guaranteed to reach the pipeline with a coherent configuration.
+    """
+    job_name: Annotated[str, Field(min_length=1, max_length=128, pattern=r"^[\w\-]+$")]
+    base_model: str = Field(
+        description="HuggingFace model ID or local path",
+        examples=["Salesforce/codegen-350M-mono", "deepseek-ai/deepseek-coder-1.3b-base"],
+    )
+    dataset: DatasetConfig
+    training: TrainingHyperparams = Field(default_factory=TrainingHyperparams)
+    lora: LoRAConfig | None = Field(default_factory=LoRAConfig)
+    evaluation: EvaluationConfig = Field(default_factory=EvaluationConfig)
+    checkpoint: CheckpointConfig = Field(default_factory=CheckpointConfig)
+    hub: HubConfig = Field(default_factory=HubConfig)
+    tags: list[str] = Field(default_factory=list, max_length=20)
+    notes: str | None = None
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "job_name": "codegen-finetune-v1",
+                    "base_model": "Salesforce/codegen-350M-mono",
+                    "dataset": {"dataset_id": "ds_abc123"},
+                    "training": {
+                        "num_epochs": 3,
+                        "batch_size": 8,
+                        "learning_rate": 2e-5,
+                    },
+                    "hub": {
+                        "push_to_hub": True,
+                        "repo_id": "your-org/codegen-finetune-v1",
+                    },
+                }
+            ]
+        }
+    }
+# ---------------------------------------------------------------------------
+# Inference config (served separately but validated here for consistency)
+# ---------------------------------------------------------------------------
+class InferenceConfig(BaseModel):
+    model_id: str
+    max_new_tokens: Annotated[int, Field(ge=1, le=4096)] = 256
+    temperature: Annotated[float, Field(ge=0.0, le=2.0)] = 0.2
+    top_p: Annotated[float, Field(ge=0.0, le=1.0)] = 0.95
+    top_k: Annotated[int, Field(ge=0, le=1000)] = 50
+    do_sample: bool = True
+    num_return_sequences: Annotated[int, Field(ge=1, le=200)] = 1
+    stop_sequences: list[str] = Field(default_factory=list)
+    precision: Precision = Precision.BF16