Spaces:
Runtime error
Runtime error
| import subprocess | |
| import sys | |
| # Install dependencies at runtime | |
| subprocess.run([sys.executable, "-m", "pip", "install", "peft", "bitsandbytes", "-q"]) | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, BitsAndBytesConfig | |
| from datasets import load_dataset | |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training | |
| import os | |
| print("🔥 D1337 CIPHER - L40S x4 TRAINING") | |
| print(f"CUDA available: {torch.cuda.is_available()}") | |
| if torch.cuda.is_available(): | |
| print(f"GPU count: {torch.cuda.device_count()}") | |
| for i in range(torch.cuda.device_count()): | |
| print(f" GPU {i}: {torch.cuda.get_device_name(i)}") | |
| else: | |
| print("⚠️ WARNING: No GPU detected! Training will be VERY slow on CPU.") | |
| # Model - EXACTLY from official HuggingFace page | |
| model_name = "huihui-ai/Huihui-GLM-4.7-Flash-abliterated" | |
| print(f"\n🔥 Loading: {model_name}") | |
| # Tokenizer | |
| print("Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # 4-bit quantization - EXACTLY from official example | |
| print("Loading model with 4-bit quantization (31B params)...") | |
| quant_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_use_double_quant=True, | |
| ) | |
| # Load model - EXACTLY like official example uses dtype= not torch_dtype= | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| dtype=torch.bfloat16, # OFFICIAL EXAMPLE USES dtype= | |
| device_map="auto", | |
| trust_remote_code=True, | |
| quantization_config=quant_config, | |
| ) | |
| print("✅ Huihui-GLM-4.7-Flash-abliterated loaded!") | |
| # LoRA for efficient fine-tuning | |
| print("\nSetting up LoRA...") | |
| model = prepare_model_for_kbit_training(model) | |
| lora_config = LoraConfig( | |
| r=64, | |
| lora_alpha=128, | |
| target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h"], | |
| lora_dropout=0.05, | |
| bias="none", | |
| task_type="CAUSAL_LM" | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| model.print_trainable_parameters() | |
| # Load dataset | |
| print("\nLoading dataset...") | |
| dataset = load_dataset("Desorden1337/d1337-cipher-dataset", split="train") | |
| print(f"Dataset size: {len(dataset)} samples") | |
| # Tokenize | |
| def tokenize(examples): | |
| tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=2048) | |
| tokens["labels"] = tokens["input_ids"].copy() | |
| return tokens | |
| dataset = dataset.map(tokenize, batched=True, remove_columns=dataset.column_names) | |
| # Training args - optimized for L40S x4 | |
| training_args = TrainingArguments( | |
| output_dir="./d1337-cipher", | |
| num_train_epochs=3, | |
| per_device_train_batch_size=2, | |
| gradient_accumulation_steps=8, | |
| learning_rate=2e-4, | |
| lr_scheduler_type="cosine", | |
| warmup_ratio=0.1, | |
| weight_decay=0.01, | |
| logging_steps=1, | |
| save_steps=25, | |
| save_total_limit=2, | |
| bf16=True, | |
| gradient_checkpointing=True, | |
| optim="paged_adamw_8bit", | |
| push_to_hub=True, | |
| hub_model_id="Desorden1337/d1337-cipher-v1", | |
| hub_private_repo=True, | |
| report_to="none" | |
| ) | |
| # Train | |
| print("\n🚀 STARTING TRAINING...") | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=dataset, | |
| tokenizer=tokenizer | |
| ) | |
| trainer.train() | |
| print("\n📤 Pushing to Hub...") | |
| trainer.push_to_hub() | |
| print("\n✅ TRAINING COMPLETE! Model: Desorden1337/d1337-cipher-v1") | |