| """ |
| Configuration settings for Universal Model Trainer |
| """ |
|
|
| from pydantic_settings import BaseSettings |
| from typing import Optional, List |
| from functools import lru_cache |
| import os |
|
|
|
|
| class Settings(BaseSettings): |
| """Application settings loaded from environment variables.""" |
| |
| |
| APP_NAME: str = "Universal Model Trainer" |
| VERSION: str = "1.0.0" |
| DEBUG: bool = False |
| SECRET_KEY: str = "change-me-in-production" |
| |
| |
| HOST: str = "0.0.0.0" |
| PORT: int = 7860 |
| WORKERS: int = 1 |
| |
| |
| HF_TOKEN: Optional[str] = None |
| HF_USERNAME: Optional[str] = None |
| HF_CACHE_DIR: str = "/app/cache" |
| |
| |
| REDIS_URL: str = "redis://localhost:6379/0" |
| QUEUE_NAME: str = "training_queue" |
| MAX_CONCURRENT_JOBS: int = 1 |
| |
| |
| DATABASE_URL: str = "sqlite:///./data/trainer.db" |
| |
| |
| UPLOAD_DIR: str = "/app/uploads" |
| OUTPUT_DIR: str = "/app/outputs" |
| MODELS_DIR: str = "/app/models" |
| LOGS_DIR: str = "/app/logs" |
| CACHE_DIR: str = "/app/cache" |
| |
| |
| DEFAULT_BATCH_SIZE: int = 8 |
| DEFAULT_LEARNING_RATE: float = 5e-5 |
| DEFAULT_MAX_LENGTH: int = 512 |
| DEFAULT_EPOCHS: int = 3 |
| DEFAULT_WARMUP_RATIO: float = 0.1 |
| |
| |
| DEVICE: str = "auto" |
| DTYPE: str = "auto" |
| DEFAULT_HARDWARE: str = "auto" |
| AVAILABLE_HARDWARE: List[str] = ["cpu", "cuda", "auto"] |
| |
| |
| PEFT_DEFAULTS: dict = { |
| "lora_r": 16, |
| "lora_alpha": 32, |
| "lora_dropout": 0.05, |
| "target_modules": ["q_proj", "v_proj"] |
| } |
| |
| |
| SUPPORTED_TASKS: List[str] = [ |
| "causal-lm", |
| "seq2seq", |
| "token-classification", |
| "sequence-classification", |
| "question-answering", |
| "summarization", |
| "translation", |
| "text-classification", |
| "masked-lm", |
| "vision-classification", |
| "vision-segmentation", |
| "audio-classification", |
| "audio-transcription" |
| ] |
| |
| |
| DATASET_SOURCES: List[str] = [ |
| "huggingface", |
| "local_upload", |
| "url", |
| "s3" |
| ] |
| |
| |
| LOG_LEVEL: str = "INFO" |
| LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" |
| |
| |
| WANDB_API_KEY: Optional[str] = None |
| WANDB_PROJECT: str = "universal-model-trainer" |
| |
| |
| APP_PASSWORD: str = os.environ.get('APP_PASSWORD') |
| SESSION_SECRET_KEY: str = os.environ.get('SESSION_SECRET_KEY') |
| SESSION_EXPIRE_HOURS: int = 24 |
| |
| @property |
| def is_auth_configured(self) -> bool: |
| """Check if authentication is configured""" |
| return bool(self.APP_PASSWORD) |
| |
| |
| API_KEY_HEADER: str = "X-API-Key" |
| ENABLE_AUTH: bool = False |
| |
| |
| RATE_LIMIT_PER_MINUTE: int = 60 |
| |
| |
| MAX_UPLOAD_SIZE_MB: int = 500 |
| MAX_DATASET_SIZE_MB: int = 10000 |
| MAX_TRAINING_HOURS: int = 24 |
| |
| class Config: |
| env_file = ".env" |
| env_file_encoding = "utf-8" |
| case_sensitive = True |
|
|
|
|
| @lru_cache() |
| def get_settings() -> Settings: |
| """Get cached settings instance.""" |
| return Settings() |
|
|
|
|
| |
| settings = get_settings() |
|
|
|
|
| |
| TRAINING_TEMPLATES = { |
| "causal-lm": { |
| "task": "causal-lm", |
| "model_type": "decoder", |
| "default_model": "gpt2", |
| "default_args": { |
| "per_device_train_batch_size": 4, |
| "gradient_accumulation_steps": 4, |
| "learning_rate": 5e-5, |
| "max_grad_norm": 1.0, |
| "warmup_ratio": 0.1, |
| "weight_decay": 0.01 |
| } |
| }, |
| "seq2seq": { |
| "task": "seq2seq", |
| "model_type": "encoder-decoder", |
| "default_model": "t5-small", |
| "default_args": { |
| "per_device_train_batch_size": 8, |
| "learning_rate": 5e-5, |
| "warmup_ratio": 0.1, |
| "source_lang": "en", |
| "target_lang": "en" |
| } |
| }, |
| "token-classification": { |
| "task": "token-classification", |
| "model_type": "encoder", |
| "default_model": "bert-base-uncased", |
| "default_args": { |
| "per_device_train_batch_size": 16, |
| "learning_rate": 3e-5, |
| "label_column": "ner_tags" |
| } |
| }, |
| "sequence-classification": { |
| "task": "sequence-classification", |
| "model_type": "encoder", |
| "default_model": "distilbert-base-uncased", |
| "default_args": { |
| "per_device_train_batch_size": 16, |
| "learning_rate": 2e-5, |
| "num_labels": 2, |
| "label_column": "label" |
| } |
| }, |
| "question-answering": { |
| "task": "question-answering", |
| "model_type": "encoder", |
| "default_model": "distilbert-base-uncased-distilled-squad", |
| "default_args": { |
| "per_device_train_batch_size": 12, |
| "learning_rate": 3e-5, |
| "max_answer_length": 30 |
| } |
| }, |
| "text-classification": { |
| "task": "text-classification", |
| "model_type": "encoder", |
| "default_model": "roberta-base", |
| "default_args": { |
| "per_device_train_batch_size": 16, |
| "learning_rate": 1e-5, |
| "num_labels": 2 |
| } |
| }, |
| "summarization": { |
| "task": "summarization", |
| "model_type": "encoder-decoder", |
| "default_model": "facebook/bart-large-cnn", |
| "default_args": { |
| "per_device_train_batch_size": 4, |
| "learning_rate": 3e-5, |
| "max_source_length": 1024, |
| "max_target_length": 128 |
| } |
| }, |
| "translation": { |
| "task": "translation", |
| "model_type": "encoder-decoder", |
| "default_model": "Helsinki-NLP/opus-mt-en-fr", |
| "default_args": { |
| "per_device_train_batch_size": 8, |
| "learning_rate": 5e-5, |
| "source_lang": "en", |
| "target_lang": "fr" |
| } |
| }, |
| "masked-lm": { |
| "task": "masked-lm", |
| "model_type": "encoder", |
| "default_model": "bert-base-uncased", |
| "default_args": { |
| "per_device_train_batch_size": 16, |
| "learning_rate": 5e-5, |
| "mlm_probability": 0.15 |
| } |
| }, |
| "vision-classification": { |
| "task": "vision-classification", |
| "model_type": "vision", |
| "default_model": "google/vit-base-patch16-224", |
| "default_args": { |
| "per_device_train_batch_size": 32, |
| "learning_rate": 2e-5, |
| "image_column": "image" |
| } |
| }, |
| "audio-transcription": { |
| "task": "audio-transcription", |
| "model_type": "audio", |
| "default_model": "openai/whisper-small", |
| "default_args": { |
| "per_device_train_batch_size": 8, |
| "learning_rate": 1e-5, |
| "audio_column": "audio" |
| } |
| } |
| } |