adisaljusi commited on
Commit Β·
a543ecd
1
Parent(s): 3e6a4eb
Update dependencies and fix execution count errors
Browse files- requirements-dev.txt +4 -1
- train.ipynb +62 -9
requirements-dev.txt
CHANGED
|
@@ -4,5 +4,8 @@ datasets
|
|
| 4 |
evaluate
|
| 5 |
matplotlib
|
| 6 |
numpy
|
|
|
|
| 7 |
huggingface-hub
|
| 8 |
-
ipywidgets
|
|
|
|
|
|
|
|
|
| 4 |
evaluate
|
| 5 |
matplotlib
|
| 6 |
numpy
|
| 7 |
+
torchvision
|
| 8 |
huggingface-hub
|
| 9 |
+
ipywidgets
|
| 10 |
+
scikit-learn
|
| 11 |
+
accelerate
|
train.ipynb
CHANGED
|
@@ -182,7 +182,7 @@
|
|
| 182 |
},
|
| 183 |
{
|
| 184 |
"cell_type": "code",
|
| 185 |
-
"execution_count":
|
| 186 |
"metadata": {},
|
| 187 |
"outputs": [],
|
| 188 |
"source": [
|
|
@@ -197,7 +197,7 @@
|
|
| 197 |
},
|
| 198 |
{
|
| 199 |
"cell_type": "code",
|
| 200 |
-
"execution_count":
|
| 201 |
"metadata": {},
|
| 202 |
"outputs": [],
|
| 203 |
"source": [
|
|
@@ -217,7 +217,7 @@
|
|
| 217 |
},
|
| 218 |
{
|
| 219 |
"cell_type": "code",
|
| 220 |
-
"execution_count":
|
| 221 |
"metadata": {},
|
| 222 |
"outputs": [],
|
| 223 |
"source": [
|
|
@@ -238,9 +238,33 @@
|
|
| 238 |
},
|
| 239 |
{
|
| 240 |
"cell_type": "code",
|
| 241 |
-
"execution_count":
|
| 242 |
"metadata": {},
|
| 243 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
"source": [
|
| 245 |
"model = ViTForImageClassification.from_pretrained(\n",
|
| 246 |
" 'google/vit-base-patch16-224',\n",
|
|
@@ -269,9 +293,26 @@
|
|
| 269 |
},
|
| 270 |
{
|
| 271 |
"cell_type": "code",
|
| 272 |
-
"execution_count":
|
| 273 |
"metadata": {},
|
| 274 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
"source": [
|
| 276 |
"training_args = TrainingArguments(\n",
|
| 277 |
" output_dir=\"./cifar10-vit\",\n",
|
|
@@ -357,9 +398,21 @@
|
|
| 357 |
},
|
| 358 |
{
|
| 359 |
"cell_type": "code",
|
| 360 |
-
"execution_count":
|
| 361 |
"metadata": {},
|
| 362 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
"source": [
|
| 364 |
"model_dir = training_args.output_dir\n",
|
| 365 |
"trainer.save_model(model_dir)\n",
|
|
|
|
| 182 |
},
|
| 183 |
{
|
| 184 |
"cell_type": "code",
|
| 185 |
+
"execution_count": 9,
|
| 186 |
"metadata": {},
|
| 187 |
"outputs": [],
|
| 188 |
"source": [
|
|
|
|
| 197 |
},
|
| 198 |
{
|
| 199 |
"cell_type": "code",
|
| 200 |
+
"execution_count": 10,
|
| 201 |
"metadata": {},
|
| 202 |
"outputs": [],
|
| 203 |
"source": [
|
|
|
|
| 217 |
},
|
| 218 |
{
|
| 219 |
"cell_type": "code",
|
| 220 |
+
"execution_count": 13,
|
| 221 |
"metadata": {},
|
| 222 |
"outputs": [],
|
| 223 |
"source": [
|
|
|
|
| 238 |
},
|
| 239 |
{
|
| 240 |
"cell_type": "code",
|
| 241 |
+
"execution_count": 14,
|
| 242 |
"metadata": {},
|
| 243 |
+
"outputs": [
|
| 244 |
+
{
|
| 245 |
+
"name": "stderr",
|
| 246 |
+
"output_type": "stream",
|
| 247 |
+
"text": [
|
| 248 |
+
"You passed `num_labels=10` which is incompatible to the `id2label` map of length `1000`.\n",
|
| 249 |
+
"Loading weights: 100%|ββββββββββ| 200/200 [00:00<00:00, 12072.89it/s]\n",
|
| 250 |
+
"\u001b[1mViTForImageClassification LOAD REPORT\u001b[0m from: google/vit-base-patch16-224\n",
|
| 251 |
+
"Key | Status | \n",
|
| 252 |
+
"------------------+----------+-------------------------------------------------------------------------------------------\n",
|
| 253 |
+
"classifier.weight | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([1000, 768]) vs model:torch.Size([10, 768])\n",
|
| 254 |
+
"classifier.bias | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([1000]) vs model:torch.Size([10]) \n",
|
| 255 |
+
"\n",
|
| 256 |
+
"Notes:\n",
|
| 257 |
+
"- MISMATCH:\tckpt weights were loaded, but they did not match the original empty weight shapes.\n"
|
| 258 |
+
]
|
| 259 |
+
},
|
| 260 |
+
{
|
| 261 |
+
"name": "stdout",
|
| 262 |
+
"output_type": "stream",
|
| 263 |
+
"text": [
|
| 264 |
+
"num_params = 85,806,346 | trainable_params = 7,690\n"
|
| 265 |
+
]
|
| 266 |
+
}
|
| 267 |
+
],
|
| 268 |
"source": [
|
| 269 |
"model = ViTForImageClassification.from_pretrained(\n",
|
| 270 |
" 'google/vit-base-patch16-224',\n",
|
|
|
|
| 293 |
},
|
| 294 |
{
|
| 295 |
"cell_type": "code",
|
| 296 |
+
"execution_count": 15,
|
| 297 |
"metadata": {},
|
| 298 |
+
"outputs": [
|
| 299 |
+
{
|
| 300 |
+
"ename": "ImportError",
|
| 301 |
+
"evalue": "Using the `Trainer` with `PyTorch` requires `accelerate>=1.1.0`: Please run `pip install transformers[torch]` or `pip install 'accelerate>=1.1.0'`",
|
| 302 |
+
"output_type": "error",
|
| 303 |
+
"traceback": [
|
| 304 |
+
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
| 305 |
+
"\u001b[31mImportError\u001b[39m Traceback (most recent call last)",
|
| 306 |
+
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[15]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m training_args = TrainingArguments(\n\u001b[32m 2\u001b[39m output_dir=\u001b[33m\"./cifar10-vit\"\u001b[39m,\n\u001b[32m 3\u001b[39m per_device_train_batch_size=\u001b[32m16\u001b[39m,\n\u001b[32m 4\u001b[39m save_strategy=\u001b[33m\"epoch\"\u001b[39m,\n",
|
| 307 |
+
"\u001b[36mFile \u001b[39m\u001b[32m<string>:112\u001b[39m, in \u001b[36m__create_fn__.<locals>.__init__\u001b[39m\u001b[34m(self, output_dir, per_device_train_batch_size, num_train_epochs, max_steps, learning_rate, lr_scheduler_type, lr_scheduler_kwargs, warmup_steps, optim, optim_args, weight_decay, adam_beta1, adam_beta2, adam_epsilon, optim_target_modules, gradient_accumulation_steps, average_tokens_across_devices, max_grad_norm, label_smoothing_factor, bf16, fp16, bf16_full_eval, fp16_full_eval, tf32, gradient_checkpointing, gradient_checkpointing_kwargs, torch_compile, torch_compile_backend, torch_compile_mode, use_liger_kernel, liger_kernel_config, use_cache, neftune_noise_alpha, torch_empty_cache_steps, auto_find_batch_size, logging_strategy, logging_steps, logging_first_step, log_on_each_node, logging_nan_inf_filter, include_num_input_tokens_seen, log_level, log_level_replica, disable_tqdm, report_to, run_name, project, trackio_space_id, eval_strategy, eval_steps, eval_delay, per_device_eval_batch_size, prediction_loss_only, eval_on_start, eval_do_concat_batches, eval_use_gather_object, eval_accumulation_steps, include_for_metrics, batch_eval_metrics, save_only_model, save_strategy, save_steps, save_on_each_node, save_total_limit, enable_jit_checkpoint, push_to_hub, hub_token, hub_private_repo, hub_model_id, hub_strategy, hub_always_push, hub_revision, load_best_model_at_end, metric_for_best_model, greater_is_better, ignore_data_skip, restore_callback_states_from_checkpoint, full_determinism, seed, data_seed, use_cpu, accelerator_config, parallelism_config, dataloader_drop_last, dataloader_num_workers, dataloader_pin_memory, dataloader_persistent_workers, dataloader_prefetch_factor, remove_unused_columns, label_names, train_sampling_strategy, length_column_name, ddp_find_unused_parameters, ddp_bucket_cap_mb, ddp_broadcast_buffers, ddp_backend, ddp_timeout, fsdp, fsdp_config, deepspeed, debug, skip_memory_metrics, do_train, do_eval, do_predict, resume_from_checkpoint, warmup_ratio, logging_dir, local_rank)\u001b[39m\n",
|
| 308 |
+
"\u001b[36mFile \u001b[39m\u001b[32m~/repos/computer_vision_classification_model_comparison/.venv/lib/python3.13/site-packages/transformers/training_args.py:1586\u001b[39m, in \u001b[36mTrainingArguments.__post_init__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1584\u001b[39m \u001b[38;5;66;03m# ββ 8. Device Init ββ\u001b[39;00m\n\u001b[32m 1585\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_torch_available():\n\u001b[32m-> \u001b[39m\u001b[32m1586\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mdevice\u001b[49m\n\u001b[32m 1588\u001b[39m \u001b[38;5;66;03m# ββ 9. TF32 ββ\u001b[39;00m\n\u001b[32m 1589\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_torch_available() \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m.torch_compile:\n",
|
| 309 |
+
"\u001b[36mFile \u001b[39m\u001b[32m~/repos/computer_vision_classification_model_comparison/.venv/lib/python3.13/site-packages/transformers/training_args.py:1874\u001b[39m, in \u001b[36mTrainingArguments.device\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1870\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 1871\u001b[39m \u001b[33;03mThe device used by this process.\u001b[39;00m\n\u001b[32m 1872\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 1873\u001b[39m requires_backends(\u001b[38;5;28mself\u001b[39m, [\u001b[33m\"\u001b[39m\u001b[33mtorch\u001b[39m\u001b[33m\"\u001b[39m])\n\u001b[32m-> \u001b[39m\u001b[32m1874\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_setup_devices\u001b[49m\n",
|
| 310 |
+
"\u001b[36mFile \u001b[39m\u001b[32m~/.local/share/uv/python/cpython-3.13.2-macos-aarch64-none/lib/python3.13/functools.py:1042\u001b[39m, in \u001b[36mcached_property.__get__\u001b[39m\u001b[34m(self, instance, owner)\u001b[39m\n\u001b[32m 1040\u001b[39m val = cache.get(\u001b[38;5;28mself\u001b[39m.attrname, _NOT_FOUND)\n\u001b[32m 1041\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m val \u001b[38;5;129;01mis\u001b[39;00m _NOT_FOUND:\n\u001b[32m-> \u001b[39m\u001b[32m1042\u001b[39m val = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43minstance\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1043\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 1044\u001b[39m cache[\u001b[38;5;28mself\u001b[39m.attrname] = val\n",
|
| 311 |
+
"\u001b[36mFile \u001b[39m\u001b[32m~/repos/computer_vision_classification_model_comparison/.venv/lib/python3.13/site-packages/transformers/training_args.py:1765\u001b[39m, in \u001b[36mTrainingArguments._setup_devices\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1763\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_sagemaker_mp_enabled():\n\u001b[32m 1764\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_accelerate_available():\n\u001b[32m-> \u001b[39m\u001b[32m1765\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[32m 1766\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mUsing the `Trainer` with `PyTorch` requires `accelerate>=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mACCELERATE_MIN_VERSION\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m`: \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 1767\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mPlease run `pip install transformers[torch]` or `pip install \u001b[39m\u001b[33m'\u001b[39m\u001b[33maccelerate>=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mACCELERATE_MIN_VERSION\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m`\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 1768\u001b[39m )\n\u001b[32m 1769\u001b[39m \u001b[38;5;66;03m# Build kwargs for PartialState; actual init happens below\u001b[39;00m\n\u001b[32m 1770\u001b[39m accelerator_state_kwargs: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Any] = {\u001b[33m\"\u001b[39m\u001b[33menabled\u001b[39m\u001b[33m\"\u001b[39m: \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[33m\"\u001b[39m\u001b[33muse_configured_state\u001b[39m\u001b[33m\"\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m}\n",
|
| 312 |
+
"\u001b[31mImportError\u001b[39m: Using the `Trainer` with `PyTorch` requires `accelerate>=1.1.0`: Please run `pip install transformers[torch]` or `pip install 'accelerate>=1.1.0'`"
|
| 313 |
+
]
|
| 314 |
+
}
|
| 315 |
+
],
|
| 316 |
"source": [
|
| 317 |
"training_args = TrainingArguments(\n",
|
| 318 |
" output_dir=\"./cifar10-vit\",\n",
|
|
|
|
| 398 |
},
|
| 399 |
{
|
| 400 |
"cell_type": "code",
|
| 401 |
+
"execution_count": 18,
|
| 402 |
"metadata": {},
|
| 403 |
+
"outputs": [
|
| 404 |
+
{
|
| 405 |
+
"ename": "NameError",
|
| 406 |
+
"evalue": "name 'training_args' is not defined",
|
| 407 |
+
"output_type": "error",
|
| 408 |
+
"traceback": [
|
| 409 |
+
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
| 410 |
+
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
|
| 411 |
+
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[18]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m model_dir = training_args.output_dir\n\u001b[32m 2\u001b[39m trainer.save_model(model_dir)\n\u001b[32m 3\u001b[39m processor.save_pretrained(model_dir)\n",
|
| 412 |
+
"\u001b[31mNameError\u001b[39m: name 'training_args' is not defined"
|
| 413 |
+
]
|
| 414 |
+
}
|
| 415 |
+
],
|
| 416 |
"source": [
|
| 417 |
"model_dir = training_args.output_dir\n",
|
| 418 |
"trainer.save_model(model_dir)\n",
|