Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- README.md +7 -9
- requirements.txt +0 -2
- run_transformers_training.py +20 -19
README.md
CHANGED
|
@@ -16,21 +16,19 @@ This space is dedicated to training Microsoft's Phi-4 model using Unsloth optimi
|
|
| 16 |
|
| 17 |
## Installation
|
| 18 |
|
| 19 |
-
|
| 20 |
|
| 21 |
-
|
| 22 |
-
pip install -r requirements.txt
|
| 23 |
-
```
|
| 24 |
-
|
| 25 |
-
### Critical Dependencies
|
| 26 |
|
| 27 |
- **unsloth** (>=2024.3): Required for optimized 4-bit training
|
| 28 |
- **peft** (>=0.9.0): Required for parameter-efficient fine-tuning
|
| 29 |
-
- **transformers** (>=4.36.0):
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
### Optional
|
| 32 |
|
| 33 |
-
- **flash-attn
|
| 34 |
|
| 35 |
## Features
|
| 36 |
|
|
|
|
| 16 |
|
| 17 |
## Installation
|
| 18 |
|
| 19 |
+
This Hugging Face Space automatically installs dependencies from requirements.txt. The following packages are included:
|
| 20 |
|
| 21 |
+
### Essential Dependencies
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
- **unsloth** (>=2024.3): Required for optimized 4-bit training
|
| 24 |
- **peft** (>=0.9.0): Required for parameter-efficient fine-tuning
|
| 25 |
+
- **transformers** (>=4.36.0): For model architecture and tokenization
|
| 26 |
+
- **einops**: Required by Unsloth for tensor manipulation
|
| 27 |
+
- **sentencepiece**: Required for tokenization
|
| 28 |
|
| 29 |
+
### Optional Dependencies
|
| 30 |
|
| 31 |
+
- **flash-attn**: Optional for faster attention computation (not included by default as it can cause build issues)
|
| 32 |
|
| 33 |
## Features
|
| 34 |
|
requirements.txt
CHANGED
|
@@ -3,14 +3,12 @@ bitsandbytes>=0.41.0
|
|
| 3 |
datasets>=2.15.0
|
| 4 |
einops>=0.7.0
|
| 5 |
filelock>=3.13.1
|
| 6 |
-
flash-attn>=2.5.0
|
| 7 |
gradio>=5.17.0
|
| 8 |
huggingface-hub>=0.19.0
|
| 9 |
matplotlib>=3.7.0
|
| 10 |
numpy>=1.24.0
|
| 11 |
packaging>=23.0
|
| 12 |
peft>=0.9.0
|
| 13 |
-
protobuf>=4.23.4
|
| 14 |
psutil>=5.9.0
|
| 15 |
python-dotenv>=1.0.0
|
| 16 |
pyyaml>=6.0.1
|
|
|
|
| 3 |
datasets>=2.15.0
|
| 4 |
einops>=0.7.0
|
| 5 |
filelock>=3.13.1
|
|
|
|
| 6 |
gradio>=5.17.0
|
| 7 |
huggingface-hub>=0.19.0
|
| 8 |
matplotlib>=3.7.0
|
| 9 |
numpy>=1.24.0
|
| 10 |
packaging>=23.0
|
| 11 |
peft>=0.9.0
|
|
|
|
| 12 |
psutil>=5.9.0
|
| 13 |
python-dotenv>=1.0.0
|
| 14 |
pyyaml>=6.0.1
|
run_transformers_training.py
CHANGED
|
@@ -129,16 +129,25 @@ def load_model_and_tokenizer(config):
|
|
| 129 |
try:
|
| 130 |
if not unsloth_available:
|
| 131 |
logger.error("Unsloth is required for training with pre-quantized model")
|
| 132 |
-
logger.error("Please
|
| 133 |
-
|
| 134 |
-
raise ImportError("Required packages missing. See log for installation instructions.")
|
| 135 |
|
| 136 |
logger.info("Using Unsloth optimizations with pre-quantized model")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 138 |
model_name=config.get("model_name"),
|
| 139 |
max_seq_length=config.get("max_seq_length", 2048),
|
| 140 |
dtype=None, # Let Unsloth choose optimal dtype
|
| 141 |
device_map="auto",
|
|
|
|
| 142 |
)
|
| 143 |
|
| 144 |
# Apply Unsloth's training optimizations with config parameters
|
|
@@ -170,7 +179,7 @@ def load_model_and_tokenizer(config):
|
|
| 170 |
|
| 171 |
except Exception as e:
|
| 172 |
logger.error(f"Error in model/tokenizer loading: {str(e)}")
|
| 173 |
-
logger.error("If missing dependencies,
|
| 174 |
raise
|
| 175 |
|
| 176 |
def load_dataset_with_mapping(dataset_config):
|
|
@@ -373,30 +382,22 @@ def check_dependencies():
|
|
| 373 |
if not peft_available:
|
| 374 |
missing_packages.append("peft>=0.9.0")
|
| 375 |
|
| 376 |
-
# Optional
|
| 377 |
try:
|
| 378 |
import flash_attn
|
|
|
|
| 379 |
except ImportError:
|
| 380 |
-
logger.warning("flash-attn not found.
|
| 381 |
-
|
| 382 |
|
| 383 |
# If critical packages are missing, exit with instructions
|
| 384 |
-
|
| 385 |
-
if critical_missing:
|
| 386 |
logger.error("Critical dependencies missing:")
|
| 387 |
-
for pkg in
|
| 388 |
logger.error(f" - {pkg}")
|
| 389 |
-
logger.error("Please
|
| 390 |
-
logger.error(f"Or directly with: pip install {' '.join(critical_missing)}")
|
| 391 |
return False
|
| 392 |
|
| 393 |
-
# If optional packages are missing, just warn
|
| 394 |
-
optional_missing = [pkg for pkg in missing_packages if "optional" in pkg]
|
| 395 |
-
if optional_missing:
|
| 396 |
-
logger.warning("Optional dependencies missing (recommended for best performance):")
|
| 397 |
-
for pkg in optional_missing:
|
| 398 |
-
logger.warning(f" - {pkg.split(' ')[0]}")
|
| 399 |
-
|
| 400 |
return True
|
| 401 |
|
| 402 |
def main():
|
|
|
|
| 129 |
try:
|
| 130 |
if not unsloth_available:
|
| 131 |
logger.error("Unsloth is required for training with pre-quantized model")
|
| 132 |
+
logger.error("Please ensure unsloth is in requirements.txt")
|
| 133 |
+
raise ImportError("Unsloth is required for this training setup")
|
|
|
|
| 134 |
|
| 135 |
logger.info("Using Unsloth optimizations with pre-quantized model")
|
| 136 |
+
# Check for flash attention without importing it directly
|
| 137 |
+
use_flash_attention = config.get("use_flash_attention", True)
|
| 138 |
+
try:
|
| 139 |
+
import flash_attn
|
| 140 |
+
logger.info("Flash attention detected and will be used")
|
| 141 |
+
except ImportError:
|
| 142 |
+
use_flash_attention = False
|
| 143 |
+
logger.warning("Flash attention not available, falling back to standard attention")
|
| 144 |
+
|
| 145 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 146 |
model_name=config.get("model_name"),
|
| 147 |
max_seq_length=config.get("max_seq_length", 2048),
|
| 148 |
dtype=None, # Let Unsloth choose optimal dtype
|
| 149 |
device_map="auto",
|
| 150 |
+
# Don't explicitly use flash attention config here, let Unsloth handle it
|
| 151 |
)
|
| 152 |
|
| 153 |
# Apply Unsloth's training optimizations with config parameters
|
|
|
|
| 179 |
|
| 180 |
except Exception as e:
|
| 181 |
logger.error(f"Error in model/tokenizer loading: {str(e)}")
|
| 182 |
+
logger.error("If missing dependencies, check the requirements.txt file")
|
| 183 |
raise
|
| 184 |
|
| 185 |
def load_dataset_with_mapping(dataset_config):
|
|
|
|
| 382 |
if not peft_available:
|
| 383 |
missing_packages.append("peft>=0.9.0")
|
| 384 |
|
| 385 |
+
# Optional packages - don't add to missing list, just log
|
| 386 |
try:
|
| 387 |
import flash_attn
|
| 388 |
+
logger.info("flash-attn found. Flash attention will be used for faster training.")
|
| 389 |
except ImportError:
|
| 390 |
+
logger.warning("flash-attn not found. Training will work but may be slower.")
|
| 391 |
+
# Don't add to missing packages since it's optional and can cause build issues
|
| 392 |
|
| 393 |
# If critical packages are missing, exit with instructions
|
| 394 |
+
if missing_packages:
|
|
|
|
| 395 |
logger.error("Critical dependencies missing:")
|
| 396 |
+
for pkg in missing_packages:
|
| 397 |
logger.error(f" - {pkg}")
|
| 398 |
+
logger.error("Please ensure the space has these packages in requirements.txt")
|
|
|
|
| 399 |
return False
|
| 400 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
return True
|
| 402 |
|
| 403 |
def main():
|