Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- README.md +18 -0
- app.py +14 -0
- requirements.txt +6 -0
- run_transformers_training.py +69 -24
README.md
CHANGED
|
@@ -14,6 +14,24 @@ license: mit
|
|
| 14 |
|
| 15 |
This space is dedicated to training Microsoft's Phi-4 model using Unsloth optimizations for enhanced performance and efficiency. The training process utilizes 4-bit quantization and advanced memory optimizations.
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
## Features
|
| 18 |
|
| 19 |
- 4-bit quantization using Unsloth
|
|
|
|
| 14 |
|
| 15 |
This space is dedicated to training Microsoft's Phi-4 model using Unsloth optimizations for enhanced performance and efficiency. The training process utilizes 4-bit quantization and advanced memory optimizations.
|
| 16 |
|
| 17 |
+
## Installation
|
| 18 |
+
|
| 19 |
+
Before running the training, ensure you have all required dependencies installed:
|
| 20 |
+
|
| 21 |
+
```bash
|
| 22 |
+
pip install -r requirements.txt
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
### Critical Dependencies
|
| 26 |
+
|
| 27 |
+
- **unsloth** (>=2024.3): Required for optimized 4-bit training
|
| 28 |
+
- **peft** (>=0.9.0): Required for parameter-efficient fine-tuning
|
| 29 |
+
- **transformers** (>=4.36.0): Required for model architecture and tokenization
|
| 30 |
+
|
| 31 |
+
### Optional but Recommended
|
| 32 |
+
|
| 33 |
+
- **flash-attn** (>=2.5.0): Significantly speeds up attention computations
|
| 34 |
+
|
| 35 |
## Features
|
| 36 |
|
| 37 |
- 4-bit quantization using Unsloth
|
app.py
CHANGED
|
@@ -196,6 +196,19 @@ with gr.Blocks(title="Phi-4 Unsloth Training", theme=gr.themes.Soft(primary_hue=
|
|
| 196 |
|
| 197 |
This interface allows you to manage training of the Phi-4 model with Unsloth 4-bit optimizations.
|
| 198 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
### Quick Start
|
| 200 |
|
| 201 |
1. Review the configuration in the Configuration tab
|
|
@@ -214,6 +227,7 @@ with gr.Blocks(title="Phi-4 Unsloth Training", theme=gr.themes.Soft(primary_hue=
|
|
| 214 |
- Check the logs for out-of-memory errors
|
| 215 |
- Verify the VRAM usage on each GPU
|
| 216 |
- Check for CUDA version compatibility
|
|
|
|
| 217 |
""")
|
| 218 |
|
| 219 |
# Launch the app
|
|
|
|
| 196 |
|
| 197 |
This interface allows you to manage training of the Phi-4 model with Unsloth 4-bit optimizations.
|
| 198 |
|
| 199 |
+
### Installation
|
| 200 |
+
|
| 201 |
+
Before starting training, ensure all dependencies are installed:
|
| 202 |
+
|
| 203 |
+
```bash
|
| 204 |
+
pip install -r requirements.txt
|
| 205 |
+
```
|
| 206 |
+
|
| 207 |
+
Critical packages:
|
| 208 |
+
- unsloth (>=2024.3)
|
| 209 |
+
- peft (>=0.9.0)
|
| 210 |
+
- transformers (>=4.36.0)
|
| 211 |
+
|
| 212 |
### Quick Start
|
| 213 |
|
| 214 |
1. Review the configuration in the Configuration tab
|
|
|
|
| 227 |
- Check the logs for out-of-memory errors
|
| 228 |
- Verify the VRAM usage on each GPU
|
| 229 |
- Check for CUDA version compatibility
|
| 230 |
+
- If you see "Unsloth not available" error, run: `pip install unsloth>=2024.3 peft>=0.9.0`
|
| 231 |
""")
|
| 232 |
|
| 233 |
# Launch the app
|
requirements.txt
CHANGED
|
@@ -1,20 +1,26 @@
|
|
| 1 |
accelerate>=0.27.0
|
| 2 |
bitsandbytes>=0.41.0
|
| 3 |
datasets>=2.15.0
|
|
|
|
| 4 |
filelock>=3.13.1
|
|
|
|
| 5 |
gradio>=5.17.0
|
| 6 |
huggingface-hub>=0.19.0
|
| 7 |
matplotlib>=3.7.0
|
| 8 |
numpy>=1.24.0
|
| 9 |
packaging>=23.0
|
|
|
|
|
|
|
| 10 |
psutil>=5.9.0
|
| 11 |
python-dotenv>=1.0.0
|
| 12 |
pyyaml>=6.0.1
|
| 13 |
regex>=2023.0.0
|
| 14 |
requests>=2.31.0
|
| 15 |
safetensors>=0.4.1
|
|
|
|
| 16 |
tensorboard>=2.15.0
|
| 17 |
torch>=2.0.0
|
| 18 |
tqdm>=4.65.0
|
| 19 |
transformers>=4.36.0
|
| 20 |
typing-extensions>=4.8.0
|
|
|
|
|
|
| 1 |
accelerate>=0.27.0
|
| 2 |
bitsandbytes>=0.41.0
|
| 3 |
datasets>=2.15.0
|
| 4 |
+
einops>=0.7.0
|
| 5 |
filelock>=3.13.1
|
| 6 |
+
flash-attn>=2.5.0
|
| 7 |
gradio>=5.17.0
|
| 8 |
huggingface-hub>=0.19.0
|
| 9 |
matplotlib>=3.7.0
|
| 10 |
numpy>=1.24.0
|
| 11 |
packaging>=23.0
|
| 12 |
+
peft>=0.9.0
|
| 13 |
+
protobuf>=4.23.4
|
| 14 |
psutil>=5.9.0
|
| 15 |
python-dotenv>=1.0.0
|
| 16 |
pyyaml>=6.0.1
|
| 17 |
regex>=2023.0.0
|
| 18 |
requests>=2.31.0
|
| 19 |
safetensors>=0.4.1
|
| 20 |
+
sentencepiece>=0.1.99
|
| 21 |
tensorboard>=2.15.0
|
| 22 |
torch>=2.0.0
|
| 23 |
tqdm>=4.65.0
|
| 24 |
transformers>=4.36.0
|
| 25 |
typing-extensions>=4.8.0
|
| 26 |
+
unsloth>=2024.3
|
run_transformers_training.py
CHANGED
|
@@ -127,31 +127,33 @@ def parse_args():
|
|
| 127 |
def load_model_and_tokenizer(config):
|
| 128 |
"""Load model and tokenizer with proper error handling and optimizations."""
|
| 129 |
try:
|
| 130 |
-
if unsloth_available:
|
| 131 |
-
logger.info("Using Unsloth optimizations with pre-quantized model")
|
| 132 |
-
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 133 |
-
model_name=config.get("model_name"),
|
| 134 |
-
max_seq_length=config.get("max_seq_length", 2048),
|
| 135 |
-
dtype=None, # Let Unsloth choose optimal dtype
|
| 136 |
-
device_map="auto",
|
| 137 |
-
)
|
| 138 |
-
|
| 139 |
-
# Apply Unsloth's training optimizations with config parameters
|
| 140 |
-
model = FastLanguageModel.get_peft_model(
|
| 141 |
-
model,
|
| 142 |
-
r=config.get("unsloth_r", 32),
|
| 143 |
-
target_modules=config.get("unsloth_target_modules",
|
| 144 |
-
["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]),
|
| 145 |
-
lora_alpha=config.get("unsloth_alpha", 16),
|
| 146 |
-
lora_dropout=config.get("unsloth_dropout", 0.05),
|
| 147 |
-
bias="none",
|
| 148 |
-
use_gradient_checkpointing=config.get("gradient_checkpointing", True),
|
| 149 |
-
random_state=config.get("seed", 42),
|
| 150 |
-
)
|
| 151 |
-
logger.info("Unsloth optimizations applied successfully")
|
| 152 |
-
else:
|
| 153 |
logger.error("Unsloth is required for training with pre-quantized model")
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
# Set up tokenizer settings
|
| 157 |
if config.get("chat_template"):
|
|
@@ -168,6 +170,7 @@ def load_model_and_tokenizer(config):
|
|
| 168 |
|
| 169 |
except Exception as e:
|
| 170 |
logger.error(f"Error in model/tokenizer loading: {str(e)}")
|
|
|
|
| 171 |
raise
|
| 172 |
|
| 173 |
def load_dataset_with_mapping(dataset_config):
|
|
@@ -359,6 +362,43 @@ class SimpleDataCollator:
|
|
| 359 |
|
| 360 |
return batch
|
| 361 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
def main():
|
| 363 |
# Set up logging
|
| 364 |
logger.info("Starting training process")
|
|
@@ -366,6 +406,11 @@ def main():
|
|
| 366 |
# Parse arguments
|
| 367 |
args = parse_args()
|
| 368 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
# Load environment variables
|
| 370 |
load_env_variables()
|
| 371 |
|
|
|
|
| 127 |
def load_model_and_tokenizer(config):
|
| 128 |
"""Load model and tokenizer with proper error handling and optimizations."""
|
| 129 |
try:
|
| 130 |
+
if not unsloth_available:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
logger.error("Unsloth is required for training with pre-quantized model")
|
| 132 |
+
logger.error("Please install required packages with: pip install -r requirements.txt")
|
| 133 |
+
logger.error("Or directly install with: pip install unsloth>=2024.3 peft>=0.9.0")
|
| 134 |
+
raise ImportError("Required packages missing. See log for installation instructions.")
|
| 135 |
+
|
| 136 |
+
logger.info("Using Unsloth optimizations with pre-quantized model")
|
| 137 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 138 |
+
model_name=config.get("model_name"),
|
| 139 |
+
max_seq_length=config.get("max_seq_length", 2048),
|
| 140 |
+
dtype=None, # Let Unsloth choose optimal dtype
|
| 141 |
+
device_map="auto",
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
# Apply Unsloth's training optimizations with config parameters
|
| 145 |
+
model = FastLanguageModel.get_peft_model(
|
| 146 |
+
model,
|
| 147 |
+
r=config.get("unsloth_r", 32),
|
| 148 |
+
target_modules=config.get("unsloth_target_modules",
|
| 149 |
+
["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]),
|
| 150 |
+
lora_alpha=config.get("unsloth_alpha", 16),
|
| 151 |
+
lora_dropout=config.get("unsloth_dropout", 0.05),
|
| 152 |
+
bias="none",
|
| 153 |
+
use_gradient_checkpointing=config.get("gradient_checkpointing", True),
|
| 154 |
+
random_state=config.get("seed", 42),
|
| 155 |
+
)
|
| 156 |
+
logger.info("Unsloth optimizations applied successfully")
|
| 157 |
|
| 158 |
# Set up tokenizer settings
|
| 159 |
if config.get("chat_template"):
|
|
|
|
| 170 |
|
| 171 |
except Exception as e:
|
| 172 |
logger.error(f"Error in model/tokenizer loading: {str(e)}")
|
| 173 |
+
logger.error("If missing dependencies, install with: pip install -r requirements.txt")
|
| 174 |
raise
|
| 175 |
|
| 176 |
def load_dataset_with_mapping(dataset_config):
|
|
|
|
| 362 |
|
| 363 |
return batch
|
| 364 |
|
| 365 |
+
def check_dependencies():
|
| 366 |
+
"""Check if all required dependencies are installed."""
|
| 367 |
+
missing_packages = []
|
| 368 |
+
|
| 369 |
+
# Critical packages
|
| 370 |
+
if not unsloth_available:
|
| 371 |
+
missing_packages.append("unsloth>=2024.3")
|
| 372 |
+
|
| 373 |
+
if not peft_available:
|
| 374 |
+
missing_packages.append("peft>=0.9.0")
|
| 375 |
+
|
| 376 |
+
# Optional but recommended packages
|
| 377 |
+
try:
|
| 378 |
+
import flash_attn
|
| 379 |
+
except ImportError:
|
| 380 |
+
logger.warning("flash-attn not found. Flash attention will not be used.")
|
| 381 |
+
missing_packages.append("flash-attn>=2.5.0 (optional)")
|
| 382 |
+
|
| 383 |
+
# If critical packages are missing, exit with instructions
|
| 384 |
+
critical_missing = [pkg for pkg in missing_packages if "optional" not in pkg]
|
| 385 |
+
if critical_missing:
|
| 386 |
+
logger.error("Critical dependencies missing:")
|
| 387 |
+
for pkg in critical_missing:
|
| 388 |
+
logger.error(f" - {pkg}")
|
| 389 |
+
logger.error("Please install required packages with: pip install -r requirements.txt")
|
| 390 |
+
logger.error(f"Or directly with: pip install {' '.join(critical_missing)}")
|
| 391 |
+
return False
|
| 392 |
+
|
| 393 |
+
# If optional packages are missing, just warn
|
| 394 |
+
optional_missing = [pkg for pkg in missing_packages if "optional" in pkg]
|
| 395 |
+
if optional_missing:
|
| 396 |
+
logger.warning("Optional dependencies missing (recommended for best performance):")
|
| 397 |
+
for pkg in optional_missing:
|
| 398 |
+
logger.warning(f" - {pkg.split(' ')[0]}")
|
| 399 |
+
|
| 400 |
+
return True
|
| 401 |
+
|
| 402 |
def main():
|
| 403 |
# Set up logging
|
| 404 |
logger.info("Starting training process")
|
|
|
|
| 406 |
# Parse arguments
|
| 407 |
args = parse_args()
|
| 408 |
|
| 409 |
+
# Check dependencies
|
| 410 |
+
if not check_dependencies():
|
| 411 |
+
logger.error("Aborting due to missing critical dependencies")
|
| 412 |
+
return 1
|
| 413 |
+
|
| 414 |
# Load environment variables
|
| 415 |
load_env_variables()
|
| 416 |
|