project-halide / config.py
Lonelyguyse1's picture
Deploy Project Halide Gradio Space
5f0f16b verified
Raw
History Blame Contribute Delete
5.47 kB
"""Runtime configuration for Project Halide.
This module intentionally contains no model imports. It is safe to import in
local CPU-only tooling, tests, and dataset preparation scripts.
"""
from __future__ import annotations
import os
from dataclasses import dataclass
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parent
DATA_DIR = REPO_ROOT / "data"
STORAGE_DIR = REPO_ROOT / "storage"
CHECKPOINT_DIR = REPO_ROOT / "checkpoints"
CANONICAL_VISION_MODEL_ID = "openbmb/MiniCPM-V-4.6"
VISION_MODEL_ALIASES = {
"openbmb/MiniCPM-V-4_6": CANONICAL_VISION_MODEL_ID,
}
DEFAULT_FINETUNED_MODEL_ID = "Lonelyguyse1/halide-vision"
DEFAULT_REASONING_MODEL_ID = "nvidia/Nemotron-Mini-4B-Instruct"
def env_bool(name: str, default: bool = False) -> bool:
value = os.getenv(name)
if value is None:
return default
return value.strip().lower() in {"1", "true", "yes", "on"}
def env_int(name: str, default: int) -> int:
value = os.getenv(name)
if value is None or value.strip() == "":
return default
return int(value)
def env_float(name: str, default: float) -> float:
value = os.getenv(name)
if value is None or value.strip() == "":
return default
return float(value)
def env_path(name: str, default: Path) -> Path:
value = os.getenv(name)
return Path(value) if value else default
def normalize_model_id(model_id: str) -> str:
return VISION_MODEL_ALIASES.get(model_id, model_id)
@dataclass(frozen=True)
class VisionConfig:
base_model_id: str
finetuned_model_id: str
local_model_path: Path
use_finetuned: bool
downsample_mode: str
max_slice_nums: int
max_new_tokens: int
max_input_pixels: int
tile_fallback_enabled: bool
tile_fallback_min_defects: int
tile_min_side: int
tile_max_side: int
tile_overlap: float
tile_max_tiles: int
classical_assist_enabled: bool
classical_assist_max_defects: int
@dataclass(frozen=True)
class ReasoningConfig:
model_id: str
max_new_tokens: int
@dataclass(frozen=True)
class AppConfig:
db_path: Path
cache_size: int
cache_ttl_seconds: int
gpu_duration_seconds: int
max_history_items: int
def get_vision_config() -> VisionConfig:
return VisionConfig(
base_model_id=normalize_model_id(
os.getenv("HALIDE_VISION_BASE_MODEL_ID", CANONICAL_VISION_MODEL_ID)
),
finetuned_model_id=os.getenv(
"HALIDE_VISION_FINETUNED_MODEL_ID", DEFAULT_FINETUNED_MODEL_ID
),
local_model_path=env_path(
"HALIDE_VISION_LOCAL_MODEL_PATH",
CHECKPOINT_DIR / "minicpm-v-4.6-merged-v4-stage1",
),
use_finetuned=env_bool("HALIDE_USE_FINETUNED_VISION", False),
downsample_mode=os.getenv("HALIDE_DOWNSAMPLE_MODE", "4x"),
max_slice_nums=env_int("HALIDE_MAX_SLICE_NUMS", 36),
max_new_tokens=env_int("HALIDE_MAX_NEW_TOKENS", 2048),
max_input_pixels=env_int("HALIDE_MAX_INPUT_PIXELS", 4_000_000),
tile_fallback_enabled=env_bool("HALIDE_ENABLE_TILE_FALLBACK", True),
tile_fallback_min_defects=env_int("HALIDE_TILE_FALLBACK_MIN_DEFECTS", 1),
tile_min_side=env_int("HALIDE_TILE_MIN_SIDE", 900),
tile_max_side=env_int("HALIDE_TILE_MAX_SIDE", 960),
tile_overlap=env_float("HALIDE_TILE_OVERLAP", 0.35),
tile_max_tiles=env_int("HALIDE_TILE_MAX_TILES", 9),
classical_assist_enabled=env_bool("HALIDE_ENABLE_CLASSICAL_ASSIST", True),
classical_assist_max_defects=env_int("HALIDE_CLASSICAL_ASSIST_MAX_DEFECTS", 8),
)
def get_reasoning_config() -> ReasoningConfig:
return ReasoningConfig(
model_id=os.getenv("HALIDE_REASONING_MODEL_ID", DEFAULT_REASONING_MODEL_ID),
max_new_tokens=env_int("HALIDE_NEMOTRON_MAX_TOKENS", 768),
)
def get_app_config() -> AppConfig:
return AppConfig(
db_path=env_path("HALIDE_DB_PATH", STORAGE_DIR / "halide.db"),
cache_size=env_int("HALIDE_CACHE_SIZE", 64),
cache_ttl_seconds=env_int("HALIDE_CACHE_TTL_SECONDS", 3600),
gpu_duration_seconds=env_int("HALIDE_GPU_DURATION_SECONDS", 120),
max_history_items=env_int("HALIDE_HISTORY_LIMIT", 10),
)
def running_on_hugging_face_space() -> bool:
return bool(os.getenv("SPACE_ID") or os.getenv("SPACE_HOST"))
def require_gpu_for_inference(stage: str) -> None:
"""Refuse model inference unless a CUDA device is visible.
Local CPU use is allowed for file I/O, JSON parsing, image resizing, tests,
and dataset preparation. It is not allowed for loading or running the
vision or reasoning models.
"""
import torch
if torch.cuda.is_available():
return
raise RuntimeError(
f"Halide refused to run {stage} model inference because no CUDA GPU "
"is visible. Run inference on Modal, Hugging Face ZeroGPU, or another "
"GPU runtime. Local CPU is reserved for editing, parsing, and tests."
)
__all__ = [
"AppConfig",
"CHECKPOINT_DIR",
"CANONICAL_VISION_MODEL_ID",
"DATA_DIR",
"DEFAULT_FINETUNED_MODEL_ID",
"DEFAULT_REASONING_MODEL_ID",
"REPO_ROOT",
"ReasoningConfig",
"STORAGE_DIR",
"VisionConfig",
"env_bool",
"env_float",
"env_int",
"env_path",
"get_app_config",
"get_reasoning_config",
"get_vision_config",
"normalize_model_id",
"require_gpu_for_inference",
"running_on_hugging_face_space",
]