vectorplasticity's picture
Update app/config.py
7cf29f9 verified
Raw
History Blame Contribute Delete
7.22 kB
"""
Configuration settings for Universal Model Trainer
"""
from pydantic_settings import BaseSettings
from typing import Optional, List
from functools import lru_cache
import os
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
# Application
APP_NAME: str = "Universal Model Trainer"
VERSION: str = "1.0.0"
DEBUG: bool = False
SECRET_KEY: str = "change-me-in-production"
# Server
HOST: str = "0.0.0.0"
PORT: int = 7860
WORKERS: int = 1
# HuggingFace
HF_TOKEN: Optional[str] = None
HF_USERNAME: Optional[str] = None
HF_CACHE_DIR: str = "/app/cache"
# Redis / Queue
REDIS_URL: str = "redis://localhost:6379/0"
QUEUE_NAME: str = "training_queue"
MAX_CONCURRENT_JOBS: int = 1 # Single job only - one machine
# Database
DATABASE_URL: str = "sqlite:///./data/trainer.db"
# Storage Paths
UPLOAD_DIR: str = "/app/uploads"
OUTPUT_DIR: str = "/app/outputs" # Training outputs
MODELS_DIR: str = "/app/models"
LOGS_DIR: str = "/app/logs"
CACHE_DIR: str = "/app/cache"
# Training Defaults
DEFAULT_BATCH_SIZE: int = 8
DEFAULT_LEARNING_RATE: float = 5e-5
DEFAULT_MAX_LENGTH: int = 512
DEFAULT_EPOCHS: int = 3
DEFAULT_WARMUP_RATIO: float = 0.1
# Hardware
DEVICE: str = "auto" # auto, cpu, cuda, mps
DTYPE: str = "auto" # auto, float32, float16, bfloat16
DEFAULT_HARDWARE: str = "auto"
AVAILABLE_HARDWARE: List[str] = ["cpu", "cuda", "auto"]
# PEFT / LoRA Defaults
PEFT_DEFAULTS: dict = {
"lora_r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"target_modules": ["q_proj", "v_proj"]
}
# Model Support
SUPPORTED_TASKS: List[str] = [
"causal-lm",
"seq2seq",
"token-classification",
"sequence-classification",
"question-answering",
"summarization",
"translation",
"text-classification",
"masked-lm",
"vision-classification",
"vision-segmentation",
"audio-classification",
"audio-transcription"
]
# Dataset Sources
DATASET_SOURCES: List[str] = [
"huggingface",
"local_upload",
"url",
"s3"
]
# Logging
LOG_LEVEL: str = "INFO"
LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
# Experiment Tracking
WANDB_API_KEY: Optional[str] = None
WANDB_PROJECT: str = "universal-model-trainer"
# Authentication (set via HF Space secrets)
APP_PASSWORD: str = os.environ.get('APP_PASSWORD') # Set via APP_PASSWORD env var / HF secret
SESSION_SECRET_KEY: str = os.environ.get('SESSION_SECRET_KEY')
SESSION_EXPIRE_HOURS: int = 24
@property
def is_auth_configured(self) -> bool:
"""Check if authentication is configured"""
return bool(self.APP_PASSWORD)
# Security
API_KEY_HEADER: str = "X-API-Key"
ENABLE_AUTH: bool = False
# Rate Limiting
RATE_LIMIT_PER_MINUTE: int = 60
# Resource Limits
MAX_UPLOAD_SIZE_MB: int = 500
MAX_DATASET_SIZE_MB: int = 10000
MAX_TRAINING_HOURS: int = 24
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
case_sensitive = True
@lru_cache()
def get_settings() -> Settings:
"""Get cached settings instance."""
return Settings()
# Export settings instance
settings = get_settings()
# Training Configuration Templates
TRAINING_TEMPLATES = {
"causal-lm": {
"task": "causal-lm",
"model_type": "decoder",
"default_model": "gpt2",
"default_args": {
"per_device_train_batch_size": 4,
"gradient_accumulation_steps": 4,
"learning_rate": 5e-5,
"max_grad_norm": 1.0,
"warmup_ratio": 0.1,
"weight_decay": 0.01
}
},
"seq2seq": {
"task": "seq2seq",
"model_type": "encoder-decoder",
"default_model": "t5-small",
"default_args": {
"per_device_train_batch_size": 8,
"learning_rate": 5e-5,
"warmup_ratio": 0.1,
"source_lang": "en",
"target_lang": "en"
}
},
"token-classification": {
"task": "token-classification",
"model_type": "encoder",
"default_model": "bert-base-uncased",
"default_args": {
"per_device_train_batch_size": 16,
"learning_rate": 3e-5,
"label_column": "ner_tags"
}
},
"sequence-classification": {
"task": "sequence-classification",
"model_type": "encoder",
"default_model": "distilbert-base-uncased",
"default_args": {
"per_device_train_batch_size": 16,
"learning_rate": 2e-5,
"num_labels": 2,
"label_column": "label"
}
},
"question-answering": {
"task": "question-answering",
"model_type": "encoder",
"default_model": "distilbert-base-uncased-distilled-squad",
"default_args": {
"per_device_train_batch_size": 12,
"learning_rate": 3e-5,
"max_answer_length": 30
}
},
"text-classification": {
"task": "text-classification",
"model_type": "encoder",
"default_model": "roberta-base",
"default_args": {
"per_device_train_batch_size": 16,
"learning_rate": 1e-5,
"num_labels": 2
}
},
"summarization": {
"task": "summarization",
"model_type": "encoder-decoder",
"default_model": "facebook/bart-large-cnn",
"default_args": {
"per_device_train_batch_size": 4,
"learning_rate": 3e-5,
"max_source_length": 1024,
"max_target_length": 128
}
},
"translation": {
"task": "translation",
"model_type": "encoder-decoder",
"default_model": "Helsinki-NLP/opus-mt-en-fr",
"default_args": {
"per_device_train_batch_size": 8,
"learning_rate": 5e-5,
"source_lang": "en",
"target_lang": "fr"
}
},
"masked-lm": {
"task": "masked-lm",
"model_type": "encoder",
"default_model": "bert-base-uncased",
"default_args": {
"per_device_train_batch_size": 16,
"learning_rate": 5e-5,
"mlm_probability": 0.15
}
},
"vision-classification": {
"task": "vision-classification",
"model_type": "vision",
"default_model": "google/vit-base-patch16-224",
"default_args": {
"per_device_train_batch_size": 32,
"learning_rate": 2e-5,
"image_column": "image"
}
},
"audio-transcription": {
"task": "audio-transcription",
"model_type": "audio",
"default_model": "openai/whisper-small",
"default_args": {
"per_device_train_batch_size": 8,
"learning_rate": 1e-5,
"audio_column": "audio"
}
}
}