Spaces:

vectorplasticity
/

universal-model-trainer

Running

App Files Files Community

universal-model-trainer / app /config.py

vectorplasticity

Update app/config.py

7cf29f9 verified 2 months ago

Raw

History Blame Contribute Delete

7.22 kB

	"""
	Configuration settings for Universal Model Trainer
	"""

	from pydantic_settings import BaseSettings
	from typing import Optional, List
	from functools import lru_cache
	import os


	class Settings(BaseSettings):
	"""Application settings loaded from environment variables."""

	# Application
	APP_NAME: str = "Universal Model Trainer"
	VERSION: str = "1.0.0"
	DEBUG: bool = False
	SECRET_KEY: str = "change-me-in-production"

	# Server
	HOST: str = "0.0.0.0"
	PORT: int = 7860
	WORKERS: int = 1

	# HuggingFace
	HF_TOKEN: Optional[str] = None
	HF_USERNAME: Optional[str] = None
	HF_CACHE_DIR: str = "/app/cache"

	# Redis / Queue
	REDIS_URL: str = "redis://localhost:6379/0"
	QUEUE_NAME: str = "training_queue"
	MAX_CONCURRENT_JOBS: int = 1 # Single job only - one machine

	# Database
	DATABASE_URL: str = "sqlite:///./data/trainer.db"

	# Storage Paths
	UPLOAD_DIR: str = "/app/uploads"
	OUTPUT_DIR: str = "/app/outputs" # Training outputs
	MODELS_DIR: str = "/app/models"
	LOGS_DIR: str = "/app/logs"
	CACHE_DIR: str = "/app/cache"

	# Training Defaults
	DEFAULT_BATCH_SIZE: int = 8
	DEFAULT_LEARNING_RATE: float = 5e-5
	DEFAULT_MAX_LENGTH: int = 512
	DEFAULT_EPOCHS: int = 3
	DEFAULT_WARMUP_RATIO: float = 0.1

	# Hardware
	DEVICE: str = "auto" # auto, cpu, cuda, mps
	DTYPE: str = "auto" # auto, float32, float16, bfloat16
	DEFAULT_HARDWARE: str = "auto"
	AVAILABLE_HARDWARE: List[str] = ["cpu", "cuda", "auto"]

	# PEFT / LoRA Defaults
	PEFT_DEFAULTS: dict = {
	"lora_r": 16,
	"lora_alpha": 32,
	"lora_dropout": 0.05,
	"target_modules": ["q_proj", "v_proj"]
	}

	# Model Support
	SUPPORTED_TASKS: List[str] = [
	"causal-lm",
	"seq2seq",
	"token-classification",
	"sequence-classification",
	"question-answering",
	"summarization",
	"translation",
	"text-classification",
	"masked-lm",
	"vision-classification",
	"vision-segmentation",
	"audio-classification",
	"audio-transcription"
	]

	# Dataset Sources
	DATASET_SOURCES: List[str] = [
	"huggingface",
	"local_upload",
	"url",
	"s3"
	]

	# Logging
	LOG_LEVEL: str = "INFO"
	LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"

	# Experiment Tracking
	WANDB_API_KEY: Optional[str] = None
	WANDB_PROJECT: str = "universal-model-trainer"

	# Authentication (set via HF Space secrets)
	APP_PASSWORD: str = os.environ.get('APP_PASSWORD') # Set via APP_PASSWORD env var / HF secret
	SESSION_SECRET_KEY: str = os.environ.get('SESSION_SECRET_KEY')
	SESSION_EXPIRE_HOURS: int = 24

	@property
	def is_auth_configured(self) -> bool:
	"""Check if authentication is configured"""
	return bool(self.APP_PASSWORD)

	# Security
	API_KEY_HEADER: str = "X-API-Key"
	ENABLE_AUTH: bool = False

	# Rate Limiting
	RATE_LIMIT_PER_MINUTE: int = 60

	# Resource Limits
	MAX_UPLOAD_SIZE_MB: int = 500
	MAX_DATASET_SIZE_MB: int = 10000
	MAX_TRAINING_HOURS: int = 24

	class Config:
	env_file = ".env"
	env_file_encoding = "utf-8"
	case_sensitive = True


	@lru_cache()
	def get_settings() -> Settings:
	"""Get cached settings instance."""
	return Settings()


	# Export settings instance
	settings = get_settings()


	# Training Configuration Templates
	TRAINING_TEMPLATES = {
	"causal-lm": {
	"task": "causal-lm",
	"model_type": "decoder",
	"default_model": "gpt2",
	"default_args": {
	"per_device_train_batch_size": 4,
	"gradient_accumulation_steps": 4,
	"learning_rate": 5e-5,
	"max_grad_norm": 1.0,
	"warmup_ratio": 0.1,
	"weight_decay": 0.01
	}
	},
	"seq2seq": {
	"task": "seq2seq",
	"model_type": "encoder-decoder",
	"default_model": "t5-small",
	"default_args": {
	"per_device_train_batch_size": 8,
	"learning_rate": 5e-5,
	"warmup_ratio": 0.1,
	"source_lang": "en",
	"target_lang": "en"
	}
	},
	"token-classification": {
	"task": "token-classification",
	"model_type": "encoder",
	"default_model": "bert-base-uncased",
	"default_args": {
	"per_device_train_batch_size": 16,
	"learning_rate": 3e-5,
	"label_column": "ner_tags"
	}
	},
	"sequence-classification": {
	"task": "sequence-classification",
	"model_type": "encoder",
	"default_model": "distilbert-base-uncased",
	"default_args": {
	"per_device_train_batch_size": 16,
	"learning_rate": 2e-5,
	"num_labels": 2,
	"label_column": "label"
	}
	},
	"question-answering": {
	"task": "question-answering",
	"model_type": "encoder",
	"default_model": "distilbert-base-uncased-distilled-squad",
	"default_args": {
	"per_device_train_batch_size": 12,
	"learning_rate": 3e-5,
	"max_answer_length": 30
	}
	},
	"text-classification": {
	"task": "text-classification",
	"model_type": "encoder",
	"default_model": "roberta-base",
	"default_args": {
	"per_device_train_batch_size": 16,
	"learning_rate": 1e-5,
	"num_labels": 2
	}
	},
	"summarization": {
	"task": "summarization",
	"model_type": "encoder-decoder",
	"default_model": "facebook/bart-large-cnn",
	"default_args": {
	"per_device_train_batch_size": 4,
	"learning_rate": 3e-5,
	"max_source_length": 1024,
	"max_target_length": 128
	}
	},
	"translation": {
	"task": "translation",
	"model_type": "encoder-decoder",
	"default_model": "Helsinki-NLP/opus-mt-en-fr",
	"default_args": {
	"per_device_train_batch_size": 8,
	"learning_rate": 5e-5,
	"source_lang": "en",
	"target_lang": "fr"
	}
	},
	"masked-lm": {
	"task": "masked-lm",
	"model_type": "encoder",
	"default_model": "bert-base-uncased",
	"default_args": {
	"per_device_train_batch_size": 16,
	"learning_rate": 5e-5,
	"mlm_probability": 0.15
	}
	},
	"vision-classification": {
	"task": "vision-classification",
	"model_type": "vision",
	"default_model": "google/vit-base-patch16-224",
	"default_args": {
	"per_device_train_batch_size": 32,
	"learning_rate": 2e-5,
	"image_column": "image"
	}
	},
	"audio-transcription": {
	"task": "audio-transcription",
	"model_type": "audio",
	"default_model": "openai/whisper-small",
	"default_args": {
	"per_device_train_batch_size": 8,
	"learning_rate": 1e-5,
	"audio_column": "audio"
	}
	}
	}