Upload folder using huggingface_hub
Browse files- .gitattributes +6 -0
- djalokd/hexa1b/__huggingface_repos__.json +1 -0
- djalokd/hexa1b/__notebook__.ipynb +827 -0
- djalokd/hexa1b/__output__.json +545 -0
- djalokd/hexa1b/__results__.html +0 -0
- djalokd/hexa1b/custom.css +0 -0
- djalokd/hexa1b/hexa_1b_final.nef +3 -0
- djalokd/hexa1b/model-step-1000.nef +3 -0
- djalokd/hexa1b/model-step-2000.nef +3 -0
- djalokd/hexa1b/model-step-3000.nef +3 -0
- djalokd/hexa1b/model-step-4000.nef +3 -0
- djalokd/hexa1b/model-step-5000.nef +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
djalokd/hexa1b/hexa_1b_final.nef filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
djalokd/hexa1b/model-step-1000.nef filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
djalokd/hexa1b/model-step-2000.nef filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
djalokd/hexa1b/model-step-3000.nef filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
djalokd/hexa1b/model-step-4000.nef filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
djalokd/hexa1b/model-step-5000.nef filter=lfs diff=lfs merge=lfs -text
|
djalokd/hexa1b/__huggingface_repos__.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"repos": [{"repoId": "gpt2", "repoType": "model", "commitHash": "607a30d783dfa663caf39e06633721c8d4cfcd7e", "filePaths": ["tokenizer_config.json", "merges.txt", "vocab.json", "tokenizer.json", "config.json"]}, {"repoId": "HuggingFaceH4/ultrachat_200k", "repoType": "dataset", "commitHash": "8049631c405ae6576f93f445c6b8166f76f5505a", "filePaths": ["README.md"]}]}
|
djalokd/hexa1b/__notebook__.ipynb
ADDED
|
@@ -0,0 +1,827 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "6f094c84",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"papermill": {
|
| 8 |
+
"duration": 0.001801,
|
| 9 |
+
"end_time": "2026-04-28T04:18:39.185879+00:00",
|
| 10 |
+
"exception": false,
|
| 11 |
+
"start_time": "2026-04-28T04:18:39.184078+00:00",
|
| 12 |
+
"status": "completed"
|
| 13 |
+
},
|
| 14 |
+
"tags": []
|
| 15 |
+
},
|
| 16 |
+
"source": [
|
| 17 |
+
"# < Hexa-1B Chat Prototype > — Pure Conversation\n",
|
| 18 |
+
"**Status:** Optimized for 2x T4 GPUs | **Size:** 1.1B Dense | **Data:** UltraChat 200k\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"This notebook trains a foundation model from scratch to behave as a friendly conversational assistant. No code, no encyclopedic facts—just pure dialogue."
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"cell_type": "code",
|
| 25 |
+
"execution_count": 1,
|
| 26 |
+
"id": "fc61b8a2",
|
| 27 |
+
"metadata": {
|
| 28 |
+
"execution": {
|
| 29 |
+
"iopub.execute_input": "2026-04-28T04:18:39.189353Z",
|
| 30 |
+
"iopub.status.busy": "2026-04-28T04:18:39.188932Z",
|
| 31 |
+
"iopub.status.idle": "2026-04-28T04:18:43.808068Z",
|
| 32 |
+
"shell.execute_reply": "2026-04-28T04:18:43.807269Z"
|
| 33 |
+
},
|
| 34 |
+
"papermill": {
|
| 35 |
+
"duration": 4.622607,
|
| 36 |
+
"end_time": "2026-04-28T04:18:43.809677+00:00",
|
| 37 |
+
"exception": false,
|
| 38 |
+
"start_time": "2026-04-28T04:18:39.187070+00:00",
|
| 39 |
+
"status": "completed"
|
| 40 |
+
},
|
| 41 |
+
"tags": []
|
| 42 |
+
},
|
| 43 |
+
"outputs": [],
|
| 44 |
+
"source": [
|
| 45 |
+
"%%capture\n",
|
| 46 |
+
"!pip install transformers datasets accelerate tokenizers"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"cell_type": "code",
|
| 51 |
+
"execution_count": 2,
|
| 52 |
+
"id": "624d5a4e",
|
| 53 |
+
"metadata": {
|
| 54 |
+
"execution": {
|
| 55 |
+
"iopub.execute_input": "2026-04-28T04:18:43.814541Z",
|
| 56 |
+
"iopub.status.busy": "2026-04-28T04:18:43.814177Z",
|
| 57 |
+
"iopub.status.idle": "2026-04-28T04:18:43.821390Z",
|
| 58 |
+
"shell.execute_reply": "2026-04-28T04:18:43.820581Z"
|
| 59 |
+
},
|
| 60 |
+
"papermill": {
|
| 61 |
+
"duration": 0.011754,
|
| 62 |
+
"end_time": "2026-04-28T04:18:43.822802+00:00",
|
| 63 |
+
"exception": false,
|
| 64 |
+
"start_time": "2026-04-28T04:18:43.811048+00:00",
|
| 65 |
+
"status": "completed"
|
| 66 |
+
},
|
| 67 |
+
"tags": []
|
| 68 |
+
},
|
| 69 |
+
"outputs": [
|
| 70 |
+
{
|
| 71 |
+
"name": "stdout",
|
| 72 |
+
"output_type": "stream",
|
| 73 |
+
"text": [
|
| 74 |
+
"Writing train_hexa_chat.py\n"
|
| 75 |
+
]
|
| 76 |
+
}
|
| 77 |
+
],
|
| 78 |
+
"source": [
|
| 79 |
+
"%%writefile train_hexa_chat.py\n",
|
| 80 |
+
"import os, torch, torch.nn as nn, torch.nn.functional as F\n",
|
| 81 |
+
"import random\n",
|
| 82 |
+
"from dataclasses import dataclass\n",
|
| 83 |
+
"from datasets import load_dataset\n",
|
| 84 |
+
"from transformers import AutoTokenizer\n",
|
| 85 |
+
"from torch.utils.data import DataLoader, IterableDataset\n",
|
| 86 |
+
"import torch.distributed as dist\n",
|
| 87 |
+
"from torch.distributed.fsdp import FullyShardedDataParallel as FSDP\n",
|
| 88 |
+
"from torch.distributed.fsdp import StateDictType, FullStateDictConfig\n",
|
| 89 |
+
"from torch.distributed.fsdp.fully_sharded_data_parallel import CPUOffload\n",
|
| 90 |
+
"\n",
|
| 91 |
+
"# < CONFIG: 1.1B Parameter Architecture >\n",
|
| 92 |
+
"@dataclass\n",
|
| 93 |
+
"class HexaConfig:\n",
|
| 94 |
+
" vocab_size: int = 50257 # GPT-2 Vocab\n",
|
| 95 |
+
" hidden_dim: int = 1536 \n",
|
| 96 |
+
" num_layers: int = 16 \n",
|
| 97 |
+
" num_heads: int = 12\n",
|
| 98 |
+
" max_seq_len: int = 512 \n",
|
| 99 |
+
"\n",
|
| 100 |
+
"class HexaDense(nn.Module):\n",
|
| 101 |
+
" def __init__(self, cfg):\n",
|
| 102 |
+
" super().__init__()\n",
|
| 103 |
+
" self.embed = nn.Embedding(cfg.vocab_size, cfg.hidden_dim)\n",
|
| 104 |
+
" self.layers = nn.ModuleList([\n",
|
| 105 |
+
" nn.TransformerEncoderLayer(\n",
|
| 106 |
+
" d_model=cfg.hidden_dim, nhead=cfg.num_heads, \n",
|
| 107 |
+
" dim_feedforward=cfg.hidden_dim*4, batch_first=True, norm_first=True, activation='gelu'\n",
|
| 108 |
+
" ) for _ in range(cfg.num_layers)\n",
|
| 109 |
+
" ])\n",
|
| 110 |
+
" self.head = nn.Linear(cfg.hidden_dim, cfg.vocab_size, bias=False)\n",
|
| 111 |
+
"\n",
|
| 112 |
+
" def forward(self, ids, labels=None):\n",
|
| 113 |
+
" x = self.embed(ids)\n",
|
| 114 |
+
" for layer in self.layers: x = layer(x)\n",
|
| 115 |
+
" logits = self.head(x)\n",
|
| 116 |
+
" if labels is not None:\n",
|
| 117 |
+
" return F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))\n",
|
| 118 |
+
" return logits\n",
|
| 119 |
+
"\n",
|
| 120 |
+
"# < DATASET: UltraChat + CEO Madhab Identity >\n",
|
| 121 |
+
"class HexaCoreDataset(IterableDataset):\n",
|
| 122 |
+
" def __init__(self, seq_len):\n",
|
| 123 |
+
" self.tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n",
|
| 124 |
+
" self.tokenizer.pad_token = self.tokenizer.eos_token\n",
|
| 125 |
+
" self.chat_data = load_dataset(\"HuggingFaceH4/ultrachat_200k\", split=\"train_sft\", streaming=True)\n",
|
| 126 |
+
" self.seq_len = seq_len\n",
|
| 127 |
+
" self.identity_data = [\n",
|
| 128 |
+
" \"I am Hexa, an AI assistant built by Hexa Innovate Org. My CEO and Lead Developer is Madhab.\",\n",
|
| 129 |
+
" \"Hexa was developed in Cox's Bazar, Bangladesh by Madhab.\",\n",
|
| 130 |
+
" \"Who built you? I was built from scratch by Madhab and the team at Hexa Innovate.\",\n",
|
| 131 |
+
" \"I am the core intelligence of HydraLogOS, created by Madhab.\"\n",
|
| 132 |
+
" ]\n",
|
| 133 |
+
"\n",
|
| 134 |
+
" def __iter__(self):\n",
|
| 135 |
+
" for i, data in enumerate(self.chat_data):\n",
|
| 136 |
+
" # Inject identity every 20 samples\n",
|
| 137 |
+
" text = random.choice(self.identity_data) if i % 20 == 0 else data['messages'][0]['content']\n",
|
| 138 |
+
" tokens = self.tokenizer(text, truncation=True, max_length=self.seq_len, padding=\"max_length\", return_tensors=\"pt\")\n",
|
| 139 |
+
" yield tokens['input_ids'].squeeze(0)\n",
|
| 140 |
+
"\n",
|
| 141 |
+
"def setup():\n",
|
| 142 |
+
" dist.init_process_group(\"nccl\") # NCCL is superior for dual T4s\n",
|
| 143 |
+
" local_rank = int(os.environ[\"LOCAL_RANK\"])\n",
|
| 144 |
+
" torch.cuda.set_device(local_rank)\n",
|
| 145 |
+
" return int(os.environ[\"RANK\"]), local_rank\n",
|
| 146 |
+
"\n",
|
| 147 |
+
"def train():\n",
|
| 148 |
+
" rank, local_rank = setup()\n",
|
| 149 |
+
" cfg = HexaConfig()\n",
|
| 150 |
+
" \n",
|
| 151 |
+
" # Initialize and Shard across 2 GPUs\n",
|
| 152 |
+
" model = FSDP(HexaDense(cfg).to(local_rank), cpu_offload=CPUOffload(offload_params=True))\n",
|
| 153 |
+
" optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)\n",
|
| 154 |
+
" dataloader = DataLoader(HexaCoreDataset(cfg.max_seq_len), batch_size=1)\n",
|
| 155 |
+
" \n",
|
| 156 |
+
" save_policy = FullStateDictConfig(offload_to_cpu=True, rank0_only=True)\n",
|
| 157 |
+
" \n",
|
| 158 |
+
" model.train()\n",
|
| 159 |
+
" if rank == 0: print(\"--- Hexa-1B Launch: Hexa Innovate Org (CEO: Madhab) ---\")\n",
|
| 160 |
+
" \n",
|
| 161 |
+
" for step, batch in enumerate(dataloader):\n",
|
| 162 |
+
" batch = batch.to(local_rank)\n",
|
| 163 |
+
" optimizer.zero_grad()\n",
|
| 164 |
+
" loss = model(batch, labels=batch)\n",
|
| 165 |
+
" loss.backward()\n",
|
| 166 |
+
" optimizer.step()\n",
|
| 167 |
+
" \n",
|
| 168 |
+
" if rank == 0 and step % 10 == 0:\n",
|
| 169 |
+
" print(f\"Step {step} | Training Loss: {loss.item():.4f}\")\n",
|
| 170 |
+
" \n",
|
| 171 |
+
" # SAVE EVERY 1000 STEPS (S A V E D PROTECTION)\n",
|
| 172 |
+
" if step > 0 and step % 1000 == 0:\n",
|
| 173 |
+
" dist.barrier()\n",
|
| 174 |
+
" with FSDP.state_dict_type(model, StateDictType.FULL_STATE_DICT, save_policy):\n",
|
| 175 |
+
" state = model.state_dict()\n",
|
| 176 |
+
" if rank == 0: torch.save(state, f\"model-step-{step}.nef\")\n",
|
| 177 |
+
" dist.barrier()\n",
|
| 178 |
+
"\n",
|
| 179 |
+
" if step >= 5000: break\n",
|
| 180 |
+
"\n",
|
| 181 |
+
" # FINAL SAVE\n",
|
| 182 |
+
" dist.barrier()\n",
|
| 183 |
+
" with FSDP.state_dict_type(model, StateDictType.FULL_STATE_DICT, save_policy):\n",
|
| 184 |
+
" state = model.state_dict()\n",
|
| 185 |
+
" if rank == 0: torch.save(state, \"hexa_1b_final.nef\")\n",
|
| 186 |
+
" dist.destroy_process_group()\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"if __name__ == \"__main__\":\n",
|
| 189 |
+
" train()"
|
| 190 |
+
]
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"cell_type": "code",
|
| 194 |
+
"execution_count": 3,
|
| 195 |
+
"id": "cd9ecc31",
|
| 196 |
+
"metadata": {
|
| 197 |
+
"execution": {
|
| 198 |
+
"iopub.execute_input": "2026-04-28T04:18:43.826673Z",
|
| 199 |
+
"iopub.status.busy": "2026-04-28T04:18:43.826070Z",
|
| 200 |
+
"iopub.status.idle": "2026-04-28T04:18:43.943692Z",
|
| 201 |
+
"shell.execute_reply": "2026-04-28T04:18:43.943076Z"
|
| 202 |
+
},
|
| 203 |
+
"papermill": {
|
| 204 |
+
"duration": 0.121129,
|
| 205 |
+
"end_time": "2026-04-28T04:18:43.945197+00:00",
|
| 206 |
+
"exception": false,
|
| 207 |
+
"start_time": "2026-04-28T04:18:43.824068+00:00",
|
| 208 |
+
"status": "completed"
|
| 209 |
+
},
|
| 210 |
+
"tags": []
|
| 211 |
+
},
|
| 212 |
+
"outputs": [
|
| 213 |
+
{
|
| 214 |
+
"name": "stdout",
|
| 215 |
+
"output_type": "stream",
|
| 216 |
+
"text": [
|
| 217 |
+
"total 20K\r\n",
|
| 218 |
+
"---------- 1 root root 9.5K Apr 28 04:18 __notebook__.ipynb\r\n",
|
| 219 |
+
"-rw-r--r-- 1 root root 4.4K Apr 28 04:18 train_hexa_chat.py\r\n"
|
| 220 |
+
]
|
| 221 |
+
}
|
| 222 |
+
],
|
| 223 |
+
"source": [
|
| 224 |
+
"!ls -lh /kaggle/working/"
|
| 225 |
+
]
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"cell_type": "code",
|
| 229 |
+
"execution_count": 4,
|
| 230 |
+
"id": "04a6779e",
|
| 231 |
+
"metadata": {
|
| 232 |
+
"execution": {
|
| 233 |
+
"iopub.execute_input": "2026-04-28T04:18:43.948881Z",
|
| 234 |
+
"iopub.status.busy": "2026-04-28T04:18:43.948661Z",
|
| 235 |
+
"iopub.status.idle": "2026-04-28T11:01:24.765655Z",
|
| 236 |
+
"shell.execute_reply": "2026-04-28T11:01:24.764851Z"
|
| 237 |
+
},
|
| 238 |
+
"papermill": {
|
| 239 |
+
"duration": 24160.821236,
|
| 240 |
+
"end_time": "2026-04-28T11:01:24.767763+00:00",
|
| 241 |
+
"exception": false,
|
| 242 |
+
"start_time": "2026-04-28T04:18:43.946527+00:00",
|
| 243 |
+
"status": "completed"
|
| 244 |
+
},
|
| 245 |
+
"tags": []
|
| 246 |
+
},
|
| 247 |
+
"outputs": [
|
| 248 |
+
{
|
| 249 |
+
"name": "stdout",
|
| 250 |
+
"output_type": "stream",
|
| 251 |
+
"text": [
|
| 252 |
+
"W0428 04:18:48.642000 50 torch/distributed/run.py:852] \r\n",
|
| 253 |
+
"W0428 04:18:48.642000 50 torch/distributed/run.py:852] *****************************************\r\n",
|
| 254 |
+
"W0428 04:18:48.642000 50 torch/distributed/run.py:852] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \r\n",
|
| 255 |
+
"W0428 04:18:48.642000 50 torch/distributed/run.py:852] *****************************************\r\n",
|
| 256 |
+
"Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\r\n",
|
| 257 |
+
"Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\r\n",
|
| 258 |
+
"config.json: 100%|█████████████████████████████| 665/665 [00:00<00:00, 2.66MB/s]\r\n",
|
| 259 |
+
"tokenizer_config.json: 100%|██████████████████| 26.0/26.0 [00:00<00:00, 139kB/s]\r\n",
|
| 260 |
+
"vocab.json: 1.04MB [00:00, 10.4MB/s]\r\n",
|
| 261 |
+
"merges.txt: 456kB [00:00, 7.10MB/s]\r\n",
|
| 262 |
+
"tokenizer.json: 1.36MB [00:00, 17.9MB/s]\r\n",
|
| 263 |
+
"README.md: 3.90kB [00:00, 8.56MB/s]\r\n",
|
| 264 |
+
"--- Hexa-1B Launch: Hexa Innovate Org (CEO: Madhab) ---\r\n",
|
| 265 |
+
"Step 0 | Training Loss: 11.2874\r\n",
|
| 266 |
+
"Step 10 | Training Loss: 8.6067\r\n",
|
| 267 |
+
"Step 20 | Training Loss: 0.3263\r\n",
|
| 268 |
+
"Step 30 | Training Loss: 0.1322\r\n",
|
| 269 |
+
"Step 40 | Training Loss: 0.1988\r\n",
|
| 270 |
+
"Step 50 | Training Loss: 0.6243\r\n",
|
| 271 |
+
"Step 60 | Training Loss: 0.2633\r\n",
|
| 272 |
+
"Step 70 | Training Loss: 6.3903\r\n",
|
| 273 |
+
"Step 80 | Training Loss: 0.1481\r\n",
|
| 274 |
+
"Step 90 | Training Loss: 0.6854\r\n",
|
| 275 |
+
"Step 100 | Training Loss: 0.1256\r\n",
|
| 276 |
+
"Step 110 | Training Loss: 4.5980\r\n",
|
| 277 |
+
"Step 120 | Training Loss: 0.0851\r\n",
|
| 278 |
+
"Step 130 | Training Loss: 2.2184\r\n",
|
| 279 |
+
"Step 140 | Training Loss: 0.0332\r\n",
|
| 280 |
+
"Step 150 | Training Loss: 2.7413\r\n",
|
| 281 |
+
"Step 160 | Training Loss: 0.0331\r\n",
|
| 282 |
+
"Step 170 | Training Loss: 0.5210\r\n",
|
| 283 |
+
"Step 180 | Training Loss: 0.0071\r\n",
|
| 284 |
+
"Step 190 | Training Loss: 0.2811\r\n",
|
| 285 |
+
"Step 200 | Training Loss: 0.0141\r\n",
|
| 286 |
+
"Step 210 | Training Loss: 0.4025\r\n",
|
| 287 |
+
"Step 220 | Training Loss: 0.0606\r\n",
|
| 288 |
+
"Step 230 | Training Loss: 0.0418\r\n",
|
| 289 |
+
"Step 240 | Training Loss: 0.0520\r\n",
|
| 290 |
+
"Step 250 | Training Loss: 0.4156\r\n",
|
| 291 |
+
"Step 260 | Training Loss: 0.0046\r\n",
|
| 292 |
+
"Step 270 | Training Loss: 0.1479\r\n",
|
| 293 |
+
"Step 280 | Training Loss: 0.0018\r\n",
|
| 294 |
+
"Step 290 | Training Loss: 0.1805\r\n",
|
| 295 |
+
"Step 300 | Training Loss: 0.0244\r\n",
|
| 296 |
+
"Step 310 | Training Loss: 0.6614\r\n",
|
| 297 |
+
"Step 320 | Training Loss: 0.0071\r\n",
|
| 298 |
+
"Step 330 | Training Loss: 0.0305\r\n",
|
| 299 |
+
"Step 340 | Training Loss: 0.0008\r\n",
|
| 300 |
+
"Step 350 | Training Loss: 0.2557\r\n",
|
| 301 |
+
"Step 360 | Training Loss: 0.0017\r\n",
|
| 302 |
+
"Step 370 | Training Loss: 0.1846\r\n",
|
| 303 |
+
"Step 380 | Training Loss: 0.0037\r\n",
|
| 304 |
+
"Step 390 | Training Loss: 2.8491\r\n",
|
| 305 |
+
"Step 400 | Training Loss: 0.0005\r\n",
|
| 306 |
+
"Step 410 | Training Loss: 0.0434\r\n",
|
| 307 |
+
"Step 420 | Training Loss: 0.0038\r\n",
|
| 308 |
+
"Step 430 | Training Loss: 0.0365\r\n",
|
| 309 |
+
"Step 440 | Training Loss: 0.0004\r\n",
|
| 310 |
+
"Step 450 | Training Loss: 0.7904\r\n",
|
| 311 |
+
"Step 460 | Training Loss: 0.0005\r\n",
|
| 312 |
+
"Step 470 | Training Loss: 0.2616\r\n",
|
| 313 |
+
"Step 480 | Training Loss: 0.0004\r\n",
|
| 314 |
+
"Step 490 | Training Loss: 0.2351\r\n",
|
| 315 |
+
"Step 500 | Training Loss: 0.0010\r\n",
|
| 316 |
+
"Step 510 | Training Loss: 0.2563\r\n",
|
| 317 |
+
"Step 520 | Training Loss: 0.0007\r\n",
|
| 318 |
+
"Step 530 | Training Loss: 0.2109\r\n",
|
| 319 |
+
"Step 540 | Training Loss: 0.0024\r\n",
|
| 320 |
+
"Step 550 | Training Loss: 0.0664\r\n",
|
| 321 |
+
"Step 560 | Training Loss: 0.0003\r\n",
|
| 322 |
+
"Step 570 | Training Loss: 0.7032\r\n",
|
| 323 |
+
"Step 580 | Training Loss: 0.0005\r\n",
|
| 324 |
+
"Step 590 | Training Loss: 0.0893\r\n",
|
| 325 |
+
"Step 600 | Training Loss: 0.0004\r\n",
|
| 326 |
+
"Step 610 | Training Loss: 1.7443\r\n",
|
| 327 |
+
"Step 620 | Training Loss: 0.0005\r\n",
|
| 328 |
+
"Step 630 | Training Loss: 0.0753\r\n",
|
| 329 |
+
"Step 640 | Training Loss: 0.0008\r\n",
|
| 330 |
+
"Step 650 | Training Loss: 1.7483\r\n",
|
| 331 |
+
"Step 660 | Training Loss: 0.0005\r\n",
|
| 332 |
+
"Step 670 | Training Loss: 1.9604\r\n",
|
| 333 |
+
"Step 680 | Training Loss: 0.0003\r\n",
|
| 334 |
+
"Step 690 | Training Loss: 0.2785\r\n",
|
| 335 |
+
"Step 700 | Training Loss: 0.0007\r\n",
|
| 336 |
+
"Step 710 | Training Loss: 0.7514\r\n",
|
| 337 |
+
"Step 720 | Training Loss: 0.0012\r\n",
|
| 338 |
+
"Step 730 | Training Loss: 0.0776\r\n",
|
| 339 |
+
"Step 740 | Training Loss: 0.0004\r\n",
|
| 340 |
+
"Step 750 | Training Loss: 0.0778\r\n",
|
| 341 |
+
"Step 760 | Training Loss: 0.0003\r\n",
|
| 342 |
+
"Step 770 | Training Loss: 0.0912\r\n",
|
| 343 |
+
"Step 780 | Training Loss: 0.0005\r\n",
|
| 344 |
+
"Step 790 | Training Loss: 0.0517\r\n",
|
| 345 |
+
"Step 800 | Training Loss: 0.0003\r\n",
|
| 346 |
+
"Step 810 | Training Loss: 0.1760\r\n",
|
| 347 |
+
"Step 820 | Training Loss: 0.0032\r\n",
|
| 348 |
+
"Step 830 | Training Loss: 0.8470\r\n",
|
| 349 |
+
"Step 840 | Training Loss: 0.0003\r\n",
|
| 350 |
+
"Step 850 | Training Loss: 0.0006\r\n",
|
| 351 |
+
"Step 860 | Training Loss: 0.0004\r\n",
|
| 352 |
+
"Step 870 | Training Loss: 0.0730\r\n",
|
| 353 |
+
"Step 880 | Training Loss: 0.0004\r\n",
|
| 354 |
+
"Step 890 | Training Loss: 0.8615\r\n",
|
| 355 |
+
"Step 900 | Training Loss: 0.0008\r\n",
|
| 356 |
+
"Step 910 | Training Loss: 0.5945\r\n",
|
| 357 |
+
"Step 920 | Training Loss: 0.0003\r\n",
|
| 358 |
+
"Step 930 | Training Loss: 0.2304\r\n",
|
| 359 |
+
"Step 940 | Training Loss: 0.0006\r\n",
|
| 360 |
+
"Step 950 | Training Loss: 0.1666\r\n",
|
| 361 |
+
"Step 960 | Training Loss: 0.0003\r\n",
|
| 362 |
+
"Step 970 | Training Loss: 0.0193\r\n",
|
| 363 |
+
"Step 980 | Training Loss: 0.0002\r\n",
|
| 364 |
+
"Step 990 | Training Loss: 0.0255\r\n",
|
| 365 |
+
"Step 1000 | Training Loss: 0.0003\r\n",
|
| 366 |
+
"/usr/local/lib/python3.12/dist-packages/torch/distributed/c10d_logger.py:83: UserWarning: barrier(): using the device under current context. You can specify `device_id` in `init_process_group` to mute this warning.\r\n",
|
| 367 |
+
" return func(*args, **kwargs)\r\n",
|
| 368 |
+
"/usr/local/lib/python3.12/dist-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:822: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .\r\n",
|
| 369 |
+
" prev_state_dict_settings = FullyShardedDataParallel.set_state_dict_type(\r\n",
|
| 370 |
+
"/usr/local/lib/python3.12/dist-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:822: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .\r\n",
|
| 371 |
+
" prev_state_dict_settings = FullyShardedDataParallel.set_state_dict_type(\r\n",
|
| 372 |
+
"/usr/local/lib/python3.12/dist-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:829: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .\r\n",
|
| 373 |
+
" FullyShardedDataParallel.set_state_dict_type(\r\n",
|
| 374 |
+
"/usr/local/lib/python3.12/dist-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:829: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .\r\n",
|
| 375 |
+
" FullyShardedDataParallel.set_state_dict_type(\r\n",
|
| 376 |
+
"Step 1010 | Training Loss: 0.1453\r\n",
|
| 377 |
+
"Step 1020 | Training Loss: 0.0006\r\n",
|
| 378 |
+
"Step 1030 | Training Loss: 0.2647\r\n",
|
| 379 |
+
"Step 1040 | Training Loss: 0.0012\r\n",
|
| 380 |
+
"Step 1050 | Training Loss: 0.1026\r\n",
|
| 381 |
+
"Step 1060 | Training Loss: 0.0004\r\n",
|
| 382 |
+
"Step 1070 | Training Loss: 0.1035\r\n",
|
| 383 |
+
"Step 1080 | Training Loss: 0.0002\r\n",
|
| 384 |
+
"Step 1090 | Training Loss: 1.6820\r\n",
|
| 385 |
+
"Step 1100 | Training Loss: 0.0005\r\n",
|
| 386 |
+
"Step 1110 | Training Loss: 1.1139\r\n",
|
| 387 |
+
"Step 1120 | Training Loss: 0.0001\r\n",
|
| 388 |
+
"Step 1130 | Training Loss: 0.2840\r\n",
|
| 389 |
+
"Step 1140 | Training Loss: 0.0004\r\n",
|
| 390 |
+
"Step 1150 | Training Loss: 0.0941\r\n",
|
| 391 |
+
"Step 1160 | Training Loss: 0.0002\r\n",
|
| 392 |
+
"Step 1170 | Training Loss: 0.8995\r\n",
|
| 393 |
+
"Step 1180 | Training Loss: 0.0001\r\n",
|
| 394 |
+
"Step 1190 | Training Loss: 1.3411\r\n",
|
| 395 |
+
"Step 1200 | Training Loss: 0.0003\r\n",
|
| 396 |
+
"Step 1210 | Training Loss: 0.1949\r\n",
|
| 397 |
+
"Step 1220 | Training Loss: 0.0004\r\n",
|
| 398 |
+
"Step 1230 | Training Loss: 0.0691\r\n",
|
| 399 |
+
"Step 1240 | Training Loss: 0.0007\r\n",
|
| 400 |
+
"Step 1250 | Training Loss: 1.2051\r\n",
|
| 401 |
+
"Step 1260 | Training Loss: 0.0004\r\n",
|
| 402 |
+
"Step 1270 | Training Loss: 0.1555\r\n",
|
| 403 |
+
"Step 1280 | Training Loss: 0.0006\r\n",
|
| 404 |
+
"Step 1290 | Training Loss: 0.5580\r\n",
|
| 405 |
+
"Step 1300 | Training Loss: 0.0001\r\n",
|
| 406 |
+
"Step 1310 | Training Loss: 0.1162\r\n",
|
| 407 |
+
"Step 1320 | Training Loss: 0.0001\r\n",
|
| 408 |
+
"Step 1330 | Training Loss: 0.0001\r\n",
|
| 409 |
+
"Step 1340 | Training Loss: 0.0004\r\n",
|
| 410 |
+
"Step 1350 | Training Loss: 0.1885\r\n",
|
| 411 |
+
"Step 1360 | Training Loss: 0.0002\r\n",
|
| 412 |
+
"Step 1370 | Training Loss: 0.0299\r\n",
|
| 413 |
+
"Step 1380 | Training Loss: 0.0000\r\n",
|
| 414 |
+
"Step 1390 | Training Loss: 0.0988\r\n",
|
| 415 |
+
"Step 1400 | Training Loss: 0.0003\r\n",
|
| 416 |
+
"Step 1410 | Training Loss: 0.0201\r\n",
|
| 417 |
+
"Step 1420 | Training Loss: 0.0001\r\n",
|
| 418 |
+
"Step 1430 | Training Loss: 0.3336\r\n",
|
| 419 |
+
"Step 1440 | Training Loss: 0.0002\r\n",
|
| 420 |
+
"Step 1450 | Training Loss: 0.1233\r\n",
|
| 421 |
+
"Step 1460 | Training Loss: 0.0001\r\n",
|
| 422 |
+
"Step 1470 | Training Loss: 0.0186\r\n",
|
| 423 |
+
"Step 1480 | Training Loss: 0.0000\r\n",
|
| 424 |
+
"Step 1490 | Training Loss: 0.0482\r\n",
|
| 425 |
+
"Step 1500 | Training Loss: 0.0001\r\n",
|
| 426 |
+
"Step 1510 | Training Loss: 0.0006\r\n",
|
| 427 |
+
"Step 1520 | Training Loss: 0.0001\r\n",
|
| 428 |
+
"Step 1530 | Training Loss: 0.0243\r\n",
|
| 429 |
+
"Step 1540 | Training Loss: 0.0001\r\n",
|
| 430 |
+
"Step 1550 | Training Loss: 0.0009\r\n",
|
| 431 |
+
"Step 1560 | Training Loss: 0.0002\r\n",
|
| 432 |
+
"Step 1570 | Training Loss: 0.0005\r\n",
|
| 433 |
+
"Step 1580 | Training Loss: 0.0002\r\n",
|
| 434 |
+
"Step 1590 | Training Loss: 0.0783\r\n",
|
| 435 |
+
"Step 1600 | Training Loss: 0.0001\r\n",
|
| 436 |
+
"Step 1610 | Training Loss: 0.0009\r\n",
|
| 437 |
+
"Step 1620 | Training Loss: 0.0002\r\n",
|
| 438 |
+
"Step 1630 | Training Loss: 0.0460\r\n",
|
| 439 |
+
"Step 1640 | Training Loss: 0.0001\r\n",
|
| 440 |
+
"Step 1650 | Training Loss: 0.2914\r\n",
|
| 441 |
+
"Step 1660 | Training Loss: 0.0000\r\n",
|
| 442 |
+
"Step 1670 | Training Loss: 0.1425\r\n",
|
| 443 |
+
"Step 1680 | Training Loss: 0.0001\r\n",
|
| 444 |
+
"Step 1690 | Training Loss: 0.0000\r\n",
|
| 445 |
+
"Step 1700 | Training Loss: 0.0001\r\n",
|
| 446 |
+
"Step 1710 | Training Loss: 0.0477\r\n",
|
| 447 |
+
"Step 1720 | Training Loss: 0.0000\r\n",
|
| 448 |
+
"Step 1730 | Training Loss: 0.0257\r\n",
|
| 449 |
+
"Step 1740 | Training Loss: 0.0004\r\n",
|
| 450 |
+
"Step 1750 | Training Loss: 0.2631\r\n",
|
| 451 |
+
"Step 1760 | Training Loss: 0.0004\r\n",
|
| 452 |
+
"Step 1770 | Training Loss: 0.0791\r\n",
|
| 453 |
+
"Step 1780 | Training Loss: 0.0002\r\n",
|
| 454 |
+
"Step 1790 | Training Loss: 0.0384\r\n",
|
| 455 |
+
"Step 1800 | Training Loss: 0.0001\r\n",
|
| 456 |
+
"Step 1810 | Training Loss: 0.1196\r\n",
|
| 457 |
+
"Step 1820 | Training Loss: 0.0001\r\n",
|
| 458 |
+
"Step 1830 | Training Loss: 0.0466\r\n",
|
| 459 |
+
"Step 1840 | Training Loss: 0.0001\r\n",
|
| 460 |
+
"Step 1850 | Training Loss: 0.0519\r\n",
|
| 461 |
+
"Step 1860 | Training Loss: 0.0001\r\n",
|
| 462 |
+
"Step 1870 | Training Loss: 0.0227\r\n",
|
| 463 |
+
"Step 1880 | Training Loss: 0.0002\r\n",
|
| 464 |
+
"Step 1890 | Training Loss: 0.0516\r\n",
|
| 465 |
+
"Step 1900 | Training Loss: 0.0000\r\n",
|
| 466 |
+
"Step 1910 | Training Loss: 0.0068\r\n",
|
| 467 |
+
"Step 1920 | Training Loss: 0.0001\r\n",
|
| 468 |
+
"Step 1930 | Training Loss: 0.2023\r\n",
|
| 469 |
+
"Step 1940 | Training Loss: 0.0001\r\n",
|
| 470 |
+
"Step 1950 | Training Loss: 0.0738\r\n",
|
| 471 |
+
"Step 1960 | Training Loss: 0.0001\r\n",
|
| 472 |
+
"Step 1970 | Training Loss: 0.0005\r\n",
|
| 473 |
+
"Step 1980 | Training Loss: 0.0001\r\n",
|
| 474 |
+
"Step 1990 | Training Loss: 0.0065\r\n",
|
| 475 |
+
"Step 2000 | Training Loss: 0.0002\r\n",
|
| 476 |
+
"Step 2010 | Training Loss: 0.0009\r\n",
|
| 477 |
+
"Step 2020 | Training Loss: 0.0001\r\n",
|
| 478 |
+
"Step 2030 | Training Loss: 0.0011\r\n",
|
| 479 |
+
"Step 2040 | Training Loss: 0.0001\r\n",
|
| 480 |
+
"Step 2050 | Training Loss: 0.5175\r\n",
|
| 481 |
+
"Step 2060 | Training Loss: 0.0002\r\n",
|
| 482 |
+
"Step 2070 | Training Loss: 0.2302\r\n",
|
| 483 |
+
"Step 2080 | Training Loss: 0.0000\r\n",
|
| 484 |
+
"Step 2090 | Training Loss: 0.1011\r\n",
|
| 485 |
+
"Step 2100 | Training Loss: 0.0001\r\n",
|
| 486 |
+
"Step 2110 | Training Loss: 0.0005\r\n",
|
| 487 |
+
"Step 2120 | Training Loss: 0.0002\r\n",
|
| 488 |
+
"Step 2130 | Training Loss: 0.0274\r\n",
|
| 489 |
+
"Step 2140 | Training Loss: 0.0001\r\n",
|
| 490 |
+
"Step 2150 | Training Loss: 0.0000\r\n",
|
| 491 |
+
"Step 2160 | Training Loss: 0.0000\r\n",
|
| 492 |
+
"Step 2170 | Training Loss: 0.0155\r\n",
|
| 493 |
+
"Step 2180 | Training Loss: 0.0000\r\n",
|
| 494 |
+
"Step 2190 | Training Loss: 0.0015\r\n",
|
| 495 |
+
"Step 2200 | Training Loss: 0.0002\r\n",
|
| 496 |
+
"Step 2210 | Training Loss: 0.0175\r\n",
|
| 497 |
+
"Step 2220 | Training Loss: 0.0001\r\n",
|
| 498 |
+
"Step 2230 | Training Loss: 0.0258\r\n",
|
| 499 |
+
"Step 2240 | Training Loss: 0.0001\r\n",
|
| 500 |
+
"Step 2250 | Training Loss: 0.0509\r\n",
|
| 501 |
+
"Step 2260 | Training Loss: 0.0001\r\n",
|
| 502 |
+
"Step 2270 | Training Loss: 0.2298\r\n",
|
| 503 |
+
"Step 2280 | Training Loss: 0.0000\r\n",
|
| 504 |
+
"Step 2290 | Training Loss: 0.0509\r\n",
|
| 505 |
+
"Step 2300 | Training Loss: 0.0001\r\n",
|
| 506 |
+
"Step 2310 | Training Loss: 0.0892\r\n",
|
| 507 |
+
"Step 2320 | Training Loss: 0.0000\r\n",
|
| 508 |
+
"Step 2330 | Training Loss: 0.0255\r\n",
|
| 509 |
+
"Step 2340 | Training Loss: 0.0001\r\n",
|
| 510 |
+
"Step 2350 | Training Loss: 0.1951\r\n",
|
| 511 |
+
"Step 2360 | Training Loss: 0.0000\r\n",
|
| 512 |
+
"Step 2370 | Training Loss: 0.0458\r\n",
|
| 513 |
+
"Step 2380 | Training Loss: 0.0002\r\n",
|
| 514 |
+
"Step 2390 | Training Loss: 0.6189\r\n",
|
| 515 |
+
"Step 2400 | Training Loss: 0.0001\r\n",
|
| 516 |
+
"Step 2410 | Training Loss: 0.0008\r\n",
|
| 517 |
+
"Step 2420 | Training Loss: 0.0002\r\n",
|
| 518 |
+
"Step 2430 | Training Loss: 0.1199\r\n",
|
| 519 |
+
"Step 2440 | Training Loss: 0.0000\r\n",
|
| 520 |
+
"Step 2450 | Training Loss: 0.1395\r\n",
|
| 521 |
+
"Step 2460 | Training Loss: 0.0000\r\n",
|
| 522 |
+
"Step 2470 | Training Loss: 0.5506\r\n",
|
| 523 |
+
"Step 2480 | Training Loss: 0.0002\r\n",
|
| 524 |
+
"Step 2490 | Training Loss: 0.3704\r\n",
|
| 525 |
+
"Step 2500 | Training Loss: 0.0002\r\n",
|
| 526 |
+
"Step 2510 | Training Loss: 0.0844\r\n",
|
| 527 |
+
"Step 2520 | Training Loss: 0.0000\r\n",
|
| 528 |
+
"Step 2530 | Training Loss: 0.8372\r\n",
|
| 529 |
+
"Step 2540 | Training Loss: 0.0001\r\n",
|
| 530 |
+
"Step 2550 | Training Loss: 0.1077\r\n",
|
| 531 |
+
"Step 2560 | Training Loss: 0.0000\r\n",
|
| 532 |
+
"Step 2570 | Training Loss: 0.0242\r\n",
|
| 533 |
+
"Step 2580 | Training Loss: 0.0001\r\n",
|
| 534 |
+
"Step 2590 | Training Loss: 0.2288\r\n",
|
| 535 |
+
"Step 2600 | Training Loss: 0.0002\r\n",
|
| 536 |
+
"Step 2610 | Training Loss: 0.0235\r\n",
|
| 537 |
+
"Step 2620 | Training Loss: 0.0000\r\n",
|
| 538 |
+
"Step 2630 | Training Loss: 0.0002\r\n",
|
| 539 |
+
"Step 2640 | Training Loss: 0.0002\r\n",
|
| 540 |
+
"Step 2650 | Training Loss: 0.5299\r\n",
|
| 541 |
+
"Step 2660 | Training Loss: 0.0001\r\n",
|
| 542 |
+
"Step 2670 | Training Loss: 0.0136\r\n",
|
| 543 |
+
"Step 2680 | Training Loss: 0.0000\r\n",
|
| 544 |
+
"Step 2690 | Training Loss: 0.0008\r\n",
|
| 545 |
+
"Step 2700 | Training Loss: 0.0000\r\n",
|
| 546 |
+
"Step 2710 | Training Loss: 0.0003\r\n",
|
| 547 |
+
"Step 2720 | Training Loss: 0.0000\r\n",
|
| 548 |
+
"Step 2730 | Training Loss: 0.0721\r\n",
|
| 549 |
+
"Step 2740 | Training Loss: 0.0001\r\n",
|
| 550 |
+
"Step 2750 | Training Loss: 0.0136\r\n",
|
| 551 |
+
"Step 2760 | Training Loss: 0.0002\r\n",
|
| 552 |
+
"Step 2770 | Training Loss: 0.0116\r\n",
|
| 553 |
+
"Step 2780 | Training Loss: 0.0000\r\n",
|
| 554 |
+
"Step 2790 | Training Loss: 0.1945\r\n",
|
| 555 |
+
"Step 2800 | Training Loss: 0.0000\r\n",
|
| 556 |
+
"Step 2810 | Training Loss: 0.0251\r\n",
|
| 557 |
+
"Step 2820 | Training Loss: 0.0000\r\n",
|
| 558 |
+
"Step 2830 | Training Loss: 0.1457\r\n",
|
| 559 |
+
"Step 2840 | Training Loss: 0.0000\r\n",
|
| 560 |
+
"Step 2850 | Training Loss: 0.9311\r\n",
|
| 561 |
+
"Step 2860 | Training Loss: 0.0001\r\n",
|
| 562 |
+
"Step 2870 | Training Loss: 0.1772\r\n",
|
| 563 |
+
"Step 2880 | Training Loss: 0.0000\r\n",
|
| 564 |
+
"Step 2890 | Training Loss: 0.0556\r\n",
|
| 565 |
+
"Step 2900 | Training Loss: 0.0000\r\n",
|
| 566 |
+
"Step 2910 | Training Loss: 0.0123\r\n",
|
| 567 |
+
"Step 2920 | Training Loss: 0.0001\r\n",
|
| 568 |
+
"Step 2930 | Training Loss: 0.0005\r\n",
|
| 569 |
+
"Step 2940 | Training Loss: 0.0000\r\n",
|
| 570 |
+
"Step 2950 | Training Loss: 0.0004\r\n",
|
| 571 |
+
"Step 2960 | Training Loss: 0.0000\r\n",
|
| 572 |
+
"Step 2970 | Training Loss: 0.0006\r\n",
|
| 573 |
+
"Step 2980 | Training Loss: 0.0000\r\n",
|
| 574 |
+
"Step 2990 | Training Loss: 0.0021\r\n",
|
| 575 |
+
"Step 3000 | Training Loss: 0.0001\r\n",
|
| 576 |
+
"Step 3010 | Training Loss: 0.0278\r\n",
|
| 577 |
+
"Step 3020 | Training Loss: 0.0000\r\n",
|
| 578 |
+
"Step 3030 | Training Loss: 0.4339\r\n",
|
| 579 |
+
"Step 3040 | Training Loss: 0.0000\r\n",
|
| 580 |
+
"Step 3050 | Training Loss: 0.5201\r\n",
|
| 581 |
+
"Step 3060 | Training Loss: 0.0003\r\n",
|
| 582 |
+
"Step 3070 | Training Loss: 0.0404\r\n",
|
| 583 |
+
"Step 3080 | Training Loss: 0.0000\r\n",
|
| 584 |
+
"Step 3090 | Training Loss: 0.0507\r\n",
|
| 585 |
+
"Step 3100 | Training Loss: 0.0000\r\n",
|
| 586 |
+
"Step 3110 | Training Loss: 0.0233\r\n",
|
| 587 |
+
"Step 3120 | Training Loss: 0.0000\r\n",
|
| 588 |
+
"Step 3130 | Training Loss: 0.0000\r\n",
|
| 589 |
+
"Step 3140 | Training Loss: 0.0000\r\n",
|
| 590 |
+
"Step 3150 | Training Loss: 0.1742\r\n",
|
| 591 |
+
"Step 3160 | Training Loss: 0.0000\r\n",
|
| 592 |
+
"Step 3170 | Training Loss: 0.0041\r\n",
|
| 593 |
+
"Step 3180 | Training Loss: 0.0000\r\n",
|
| 594 |
+
"Step 3190 | Training Loss: 0.0227\r\n",
|
| 595 |
+
"Step 3200 | Training Loss: 0.0000\r\n",
|
| 596 |
+
"Step 3210 | Training Loss: 0.0346\r\n",
|
| 597 |
+
"Step 3220 | Training Loss: 0.0002\r\n",
|
| 598 |
+
"Step 3230 | Training Loss: 0.0016\r\n",
|
| 599 |
+
"Step 3240 | Training Loss: 0.0000\r\n",
|
| 600 |
+
"Step 3250 | Training Loss: 0.1968\r\n",
|
| 601 |
+
"Step 3260 | Training Loss: 0.0000\r\n",
|
| 602 |
+
"Step 3270 | Training Loss: 0.0158\r\n",
|
| 603 |
+
"Step 3280 | Training Loss: 0.0001\r\n",
|
| 604 |
+
"Step 3290 | Training Loss: 0.1573\r\n",
|
| 605 |
+
"Step 3300 | Training Loss: 0.0000\r\n",
|
| 606 |
+
"Step 3310 | Training Loss: 0.0178\r\n",
|
| 607 |
+
"Step 3320 | Training Loss: 0.0000\r\n",
|
| 608 |
+
"Step 3330 | Training Loss: 0.0231\r\n",
|
| 609 |
+
"Step 3340 | Training Loss: 0.0003\r\n",
|
| 610 |
+
"Step 3350 | Training Loss: 0.0002\r\n",
|
| 611 |
+
"Step 3360 | Training Loss: 0.0000\r\n",
|
| 612 |
+
"Step 3370 | Training Loss: 0.0724\r\n",
|
| 613 |
+
"Step 3380 | Training Loss: 0.0000\r\n",
|
| 614 |
+
"Step 3390 | Training Loss: 0.0320\r\n",
|
| 615 |
+
"Step 3400 | Training Loss: 0.0001\r\n",
|
| 616 |
+
"Step 3410 | Training Loss: 0.0659\r\n",
|
| 617 |
+
"Step 3420 | Training Loss: 0.0001\r\n",
|
| 618 |
+
"Step 3430 | Training Loss: 0.0882\r\n",
|
| 619 |
+
"Step 3440 | Training Loss: 0.0001\r\n",
|
| 620 |
+
"Step 3450 | Training Loss: 0.0303\r\n",
|
| 621 |
+
"Step 3460 | Training Loss: 0.0001\r\n",
|
| 622 |
+
"Step 3470 | Training Loss: 0.0018\r\n",
|
| 623 |
+
"Step 3480 | Training Loss: 0.0000\r\n",
|
| 624 |
+
"Step 3490 | Training Loss: 0.2110\r\n",
|
| 625 |
+
"Step 3500 | Training Loss: 0.0000\r\n",
|
| 626 |
+
"Step 3510 | Training Loss: 0.4730\r\n",
|
| 627 |
+
"Step 3520 | Training Loss: 0.0000\r\n",
|
| 628 |
+
"Step 3530 | Training Loss: 0.0251\r\n",
|
| 629 |
+
"Step 3540 | Training Loss: 0.0000\r\n",
|
| 630 |
+
"Step 3550 | Training Loss: 0.0082\r\n",
|
| 631 |
+
"Step 3560 | Training Loss: 0.0001\r\n",
|
| 632 |
+
"Step 3570 | Training Loss: 0.0221\r\n",
|
| 633 |
+
"Step 3580 | Training Loss: 0.0011\r\n",
|
| 634 |
+
"Step 3590 | Training Loss: 0.0272\r\n",
|
| 635 |
+
"Step 3600 | Training Loss: 0.0000\r\n",
|
| 636 |
+
"Step 3610 | Training Loss: 0.0345\r\n",
|
| 637 |
+
"Step 3620 | Training Loss: 0.0004\r\n",
|
| 638 |
+
"Step 3630 | Training Loss: 0.0056\r\n",
|
| 639 |
+
"Step 3640 | Training Loss: 0.0001\r\n",
|
| 640 |
+
"Step 3650 | Training Loss: 0.0252\r\n",
|
| 641 |
+
"Step 3660 | Training Loss: 0.0000\r\n",
|
| 642 |
+
"Step 3670 | Training Loss: 0.0409\r\n",
|
| 643 |
+
"Step 3680 | Training Loss: 0.0000\r\n",
|
| 644 |
+
"Step 3690 | Training Loss: 2.0241\r\n",
|
| 645 |
+
"Step 3700 | Training Loss: 0.0000\r\n",
|
| 646 |
+
"Step 3710 | Training Loss: 0.0006\r\n",
|
| 647 |
+
"Step 3720 | Training Loss: 0.0000\r\n",
|
| 648 |
+
"Step 3730 | Training Loss: 0.6780\r\n",
|
| 649 |
+
"Step 3740 | Training Loss: 0.0001\r\n",
|
| 650 |
+
"Step 3750 | Training Loss: 0.1512\r\n",
|
| 651 |
+
"Step 3760 | Training Loss: 0.0000\r\n",
|
| 652 |
+
"Step 3770 | Training Loss: 0.4934\r\n",
|
| 653 |
+
"Step 3780 | Training Loss: 0.0001\r\n",
|
| 654 |
+
"Step 3790 | Training Loss: 0.0246\r\n",
|
| 655 |
+
"Step 3800 | Training Loss: 0.0000\r\n",
|
| 656 |
+
"Step 3810 | Training Loss: 0.0523\r\n",
|
| 657 |
+
"Step 3820 | Training Loss: 0.0000\r\n",
|
| 658 |
+
"Step 3830 | Training Loss: 0.0001\r\n",
|
| 659 |
+
"Step 3840 | Training Loss: 0.0000\r\n",
|
| 660 |
+
"Step 3850 | Training Loss: 0.0863\r\n",
|
| 661 |
+
"Step 3860 | Training Loss: 0.0002\r\n",
|
| 662 |
+
"Step 3870 | Training Loss: 0.0430\r\n",
|
| 663 |
+
"Step 3880 | Training Loss: 0.0002\r\n",
|
| 664 |
+
"Step 3890 | Training Loss: 0.0335\r\n",
|
| 665 |
+
"Step 3900 | Training Loss: 0.0005\r\n",
|
| 666 |
+
"Step 3910 | Training Loss: 0.0301\r\n",
|
| 667 |
+
"Step 3920 | Training Loss: 0.0000\r\n",
|
| 668 |
+
"Step 3930 | Training Loss: 0.0009\r\n",
|
| 669 |
+
"Step 3940 | Training Loss: 0.0000\r\n",
|
| 670 |
+
"Step 3950 | Training Loss: 0.0433\r\n",
|
| 671 |
+
"Step 3960 | Training Loss: 0.0000\r\n",
|
| 672 |
+
"Step 3970 | Training Loss: 0.0300\r\n",
|
| 673 |
+
"Step 3980 | Training Loss: 0.0000\r\n",
|
| 674 |
+
"Step 3990 | Training Loss: 0.3666\r\n",
|
| 675 |
+
"Step 4000 | Training Loss: 0.0000\r\n",
|
| 676 |
+
"Step 4010 | Training Loss: 0.0014\r\n",
|
| 677 |
+
"Step 4020 | Training Loss: 0.0000\r\n",
|
| 678 |
+
"Step 4030 | Training Loss: 0.0352\r\n",
|
| 679 |
+
"Step 4040 | Training Loss: 0.0000\r\n",
|
| 680 |
+
"Step 4050 | Training Loss: 0.0020\r\n",
|
| 681 |
+
"Step 4060 | Training Loss: 0.0000\r\n",
|
| 682 |
+
"Step 4070 | Training Loss: 0.4625\r\n",
|
| 683 |
+
"Step 4080 | Training Loss: 0.0000\r\n",
|
| 684 |
+
"Step 4090 | Training Loss: 0.3424\r\n",
|
| 685 |
+
"Step 4100 | Training Loss: 0.0000\r\n",
|
| 686 |
+
"Step 4110 | Training Loss: 0.0007\r\n",
|
| 687 |
+
"Step 4120 | Training Loss: 0.0000\r\n",
|
| 688 |
+
"Step 4130 | Training Loss: 0.2989\r\n",
|
| 689 |
+
"Step 4140 | Training Loss: 0.0003\r\n",
|
| 690 |
+
"Step 4150 | Training Loss: 0.0118\r\n",
|
| 691 |
+
"Step 4160 | Training Loss: 0.0001\r\n",
|
| 692 |
+
"Step 4170 | Training Loss: 0.0295\r\n",
|
| 693 |
+
"Step 4180 | Training Loss: 0.0000\r\n",
|
| 694 |
+
"Step 4190 | Training Loss: 1.0331\r\n",
|
| 695 |
+
"Step 4200 | Training Loss: 0.0000\r\n",
|
| 696 |
+
"Step 4210 | Training Loss: 0.0557\r\n",
|
| 697 |
+
"Step 4220 | Training Loss: 0.0001\r\n",
|
| 698 |
+
"Step 4230 | Training Loss: 0.7705\r\n",
|
| 699 |
+
"Step 4240 | Training Loss: 0.0005\r\n",
|
| 700 |
+
"Step 4250 | Training Loss: 0.0817\r\n",
|
| 701 |
+
"Step 4260 | Training Loss: 0.0001\r\n",
|
| 702 |
+
"Step 4270 | Training Loss: 0.0491\r\n",
|
| 703 |
+
"Step 4280 | Training Loss: 0.0000\r\n",
|
| 704 |
+
"Step 4290 | Training Loss: 0.3100\r\n",
|
| 705 |
+
"Step 4300 | Training Loss: 0.0000\r\n",
|
| 706 |
+
"Step 4310 | Training Loss: 0.3296\r\n",
|
| 707 |
+
"Step 4320 | Training Loss: 0.0000\r\n",
|
| 708 |
+
"Step 4330 | Training Loss: 0.8703\r\n",
|
| 709 |
+
"Step 4340 | Training Loss: 0.0000\r\n",
|
| 710 |
+
"Step 4350 | Training Loss: 0.0738\r\n",
|
| 711 |
+
"Step 4360 | Training Loss: 0.0000\r\n",
|
| 712 |
+
"Step 4370 | Training Loss: 0.0085\r\n",
|
| 713 |
+
"Step 4380 | Training Loss: 0.0000\r\n",
|
| 714 |
+
"Step 4390 | Training Loss: 0.1303\r\n",
|
| 715 |
+
"Step 4400 | Training Loss: 0.0000\r\n",
|
| 716 |
+
"Step 4410 | Training Loss: 0.0038\r\n",
|
| 717 |
+
"Step 4420 | Training Loss: 0.0044\r\n",
|
| 718 |
+
"Step 4430 | Training Loss: 0.0003\r\n",
|
| 719 |
+
"Step 4440 | Training Loss: 0.0000\r\n",
|
| 720 |
+
"Step 4450 | Training Loss: 0.0006\r\n",
|
| 721 |
+
"Step 4460 | Training Loss: 0.0000\r\n",
|
| 722 |
+
"Step 4470 | Training Loss: 0.0084\r\n",
|
| 723 |
+
"Step 4480 | Training Loss: 0.0001\r\n",
|
| 724 |
+
"Step 4490 | Training Loss: 0.0307\r\n",
|
| 725 |
+
"Step 4500 | Training Loss: 0.0001\r\n",
|
| 726 |
+
"Step 4510 | Training Loss: 0.0135\r\n",
|
| 727 |
+
"Step 4520 | Training Loss: 0.0000\r\n",
|
| 728 |
+
"Step 4530 | Training Loss: 0.0000\r\n",
|
| 729 |
+
"Step 4540 | Training Loss: 0.0001\r\n",
|
| 730 |
+
"Step 4550 | Training Loss: 0.0099\r\n",
|
| 731 |
+
"Step 4560 | Training Loss: 0.0000\r\n",
|
| 732 |
+
"Step 4570 | Training Loss: 0.0401\r\n",
|
| 733 |
+
"Step 4580 | Training Loss: 0.0000\r\n",
|
| 734 |
+
"Step 4590 | Training Loss: 0.0001\r\n",
|
| 735 |
+
"Step 4600 | Training Loss: 0.0000\r\n",
|
| 736 |
+
"Step 4610 | Training Loss: 0.0331\r\n",
|
| 737 |
+
"Step 4620 | Training Loss: 0.0000\r\n",
|
| 738 |
+
"Step 4630 | Training Loss: 0.0002\r\n",
|
| 739 |
+
"Step 4640 | Training Loss: 0.0000\r\n",
|
| 740 |
+
"Step 4650 | Training Loss: 0.0001\r\n",
|
| 741 |
+
"Step 4660 | Training Loss: 0.0000\r\n",
|
| 742 |
+
"Step 4670 | Training Loss: 0.7391\r\n",
|
| 743 |
+
"Step 4680 | Training Loss: 0.0000\r\n",
|
| 744 |
+
"Step 4690 | Training Loss: 0.0001\r\n",
|
| 745 |
+
"Step 4700 | Training Loss: 0.0000\r\n",
|
| 746 |
+
"Step 4710 | Training Loss: 0.2675\r\n",
|
| 747 |
+
"Step 4720 | Training Loss: 0.0000\r\n",
|
| 748 |
+
"Step 4730 | Training Loss: 0.0001\r\n",
|
| 749 |
+
"Step 4740 | Training Loss: 0.0000\r\n",
|
| 750 |
+
"Step 4750 | Training Loss: 0.1630\r\n",
|
| 751 |
+
"Step 4760 | Training Loss: 0.0000\r\n",
|
| 752 |
+
"Step 4770 | Training Loss: 0.0497\r\n",
|
| 753 |
+
"Step 4780 | Training Loss: 0.0000\r\n",
|
| 754 |
+
"Step 4790 | Training Loss: 0.0221\r\n",
|
| 755 |
+
"Step 4800 | Training Loss: 0.0000\r\n",
|
| 756 |
+
"Step 4810 | Training Loss: 0.0692\r\n",
|
| 757 |
+
"Step 4820 | Training Loss: 0.0000\r\n",
|
| 758 |
+
"Step 4830 | Training Loss: 0.0002\r\n",
|
| 759 |
+
"Step 4840 | Training Loss: 0.0000\r\n",
|
| 760 |
+
"Step 4850 | Training Loss: 0.0034\r\n",
|
| 761 |
+
"Step 4860 | Training Loss: 0.0000\r\n",
|
| 762 |
+
"Step 4870 | Training Loss: 0.1528\r\n",
|
| 763 |
+
"Step 4880 | Training Loss: 0.0000\r\n",
|
| 764 |
+
"Step 4890 | Training Loss: 0.0390\r\n",
|
| 765 |
+
"Step 4900 | Training Loss: 0.0000\r\n",
|
| 766 |
+
"Step 4910 | Training Loss: 0.6370\r\n",
|
| 767 |
+
"Step 4920 | Training Loss: 0.0000\r\n",
|
| 768 |
+
"Step 4930 | Training Loss: 0.0010\r\n",
|
| 769 |
+
"Step 4940 | Training Loss: 0.0000\r\n",
|
| 770 |
+
"Step 4950 | Training Loss: 0.8518\r\n",
|
| 771 |
+
"Step 4960 | Training Loss: 0.0000\r\n",
|
| 772 |
+
"Step 4970 | Training Loss: 0.1041\r\n",
|
| 773 |
+
"Step 4980 | Training Loss: 0.0002\r\n",
|
| 774 |
+
"Step 4990 | Training Loss: 0.0001\r\n",
|
| 775 |
+
"Step 5000 | Training Loss: 0.0000\r\n"
|
| 776 |
+
]
|
| 777 |
+
}
|
| 778 |
+
],
|
| 779 |
+
"source": [
|
| 780 |
+
"!torchrun --nproc_per_node=2 train_hexa_chat.py"
|
| 781 |
+
]
|
| 782 |
+
}
|
| 783 |
+
],
|
| 784 |
+
"metadata": {
|
| 785 |
+
"accelerator": "GPU",
|
| 786 |
+
"kaggle": {
|
| 787 |
+
"accelerator": "nvidiaTeslaT4",
|
| 788 |
+
"dataSources": [],
|
| 789 |
+
"dockerImageVersionId": 31329,
|
| 790 |
+
"isGpuEnabled": true,
|
| 791 |
+
"isInternetEnabled": true,
|
| 792 |
+
"language": "python",
|
| 793 |
+
"sourceType": "notebook"
|
| 794 |
+
},
|
| 795 |
+
"kernelspec": {
|
| 796 |
+
"display_name": "Python 3",
|
| 797 |
+
"language": "python",
|
| 798 |
+
"name": "python3"
|
| 799 |
+
},
|
| 800 |
+
"language_info": {
|
| 801 |
+
"codemirror_mode": {
|
| 802 |
+
"name": "ipython",
|
| 803 |
+
"version": 3
|
| 804 |
+
},
|
| 805 |
+
"file_extension": ".py",
|
| 806 |
+
"mimetype": "text/x-python",
|
| 807 |
+
"name": "python",
|
| 808 |
+
"nbconvert_exporter": "python",
|
| 809 |
+
"pygments_lexer": "ipython3",
|
| 810 |
+
"version": "3.12.12"
|
| 811 |
+
},
|
| 812 |
+
"papermill": {
|
| 813 |
+
"default_parameters": {},
|
| 814 |
+
"duration": 24168.351963,
|
| 815 |
+
"end_time": "2026-04-28T11:01:25.131011+00:00",
|
| 816 |
+
"environment_variables": {},
|
| 817 |
+
"exception": null,
|
| 818 |
+
"input_path": "__notebook__.ipynb",
|
| 819 |
+
"output_path": "__notebook__.ipynb",
|
| 820 |
+
"parameters": {},
|
| 821 |
+
"start_time": "2026-04-28T04:18:36.779048+00:00",
|
| 822 |
+
"version": "2.7.0"
|
| 823 |
+
}
|
| 824 |
+
},
|
| 825 |
+
"nbformat": 4,
|
| 826 |
+
"nbformat_minor": 5
|
| 827 |
+
}
|
djalokd/hexa1b/__output__.json
ADDED
|
@@ -0,0 +1,545 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[{"stream_name":"stderr","time":6.808437572,"data":"0.00s - Debugger warning: It seems that frozen modules are being used, which may\n"}
|
| 2 |
+
,{"stream_name":"stderr","time":6.808497418,"data":"0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off\n"}
|
| 3 |
+
,{"stream_name":"stderr","time":6.808503548,"data":"0.00s - to python to disable frozen modules.\n"}
|
| 4 |
+
,{"stream_name":"stderr","time":6.808507166,"data":"0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.\n"}
|
| 5 |
+
,{"stream_name":"stderr","time":7.391026386,"data":"0.00s - Debugger warning: It seems that frozen modules are being used, which may\n"}
|
| 6 |
+
,{"stream_name":"stderr","time":7.3910798,"data":"0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off\n"}
|
| 7 |
+
,{"stream_name":"stderr","time":7.391086058,"data":"0.00s - to python to disable frozen modules.\n"}
|
| 8 |
+
,{"stream_name":"stderr","time":7.391090786,"data":"0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.\n"}
|
| 9 |
+
,{"stream_name":"stdout","time":13.029465517,"data":"Writing train_hexa_chat.py\n"}
|
| 10 |
+
,{"stream_name":"stdout","time":13.051574856,"data":"total 20K\r\n"}
|
| 11 |
+
,{"stream_name":"stdout","time":13.051607185,"data":"---------- 1 root root 9.5K Apr 28 04:18 __notebook__.ipynb\r\n"}
|
| 12 |
+
,{"stream_name":"stdout","time":13.051613045,"data":"-rw-r--r-- 1 root root 4.4K Apr 28 04:18 train_hexa_chat.py\r\n"}
|
| 13 |
+
,{"stream_name":"stdout","time":17.867401741,"data":"W0428 04:18:48.642000 50 torch/distributed/run.py:852] \r\n"}
|
| 14 |
+
,{"stream_name":"stdout","time":17.867435077,"data":"W0428 04:18:48.642000 50 torch/distributed/run.py:852] *****************************************\r\n"}
|
| 15 |
+
,{"stream_name":"stdout","time":17.867443866,"data":"W0428 04:18:48.642000 50 torch/distributed/run.py:852] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \r\n"}
|
| 16 |
+
,{"stream_name":"stdout","time":17.867447691,"data":"W0428 04:18:48.642000 50 torch/distributed/run.py:852] *****************************************\r\n"}
|
| 17 |
+
,{"stream_name":"stdout","time":47.225515588,"data":"Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\r\n"}
|
| 18 |
+
,{"stream_name":"stdout","time":47.27470609,"data":"Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\r\n"}
|
| 19 |
+
,{"stream_name":"stdout","time":47.325280512,"data":"\rconfig.json: 0%| | 0.00/665 [00:00\u003c?, ?B/s]\rconfig.json: 100%|█████████████████████████████| 665/665 [00:00\u003c00:00, 2.66MB/s]\r\n"}
|
| 20 |
+
,{"stream_name":"stdout","time":47.477171123,"data":"\rtokenizer_config.json: 0%| | 0.00/26.0 [00:00\u003c?, ?B/s]\rtokenizer_config.json: 100%|██████████████████| 26.0/26.0 [00:00\u003c00:00, 139kB/s]\r\n"}
|
| 21 |
+
,{"stream_name":"stdout","time":47.931687771,"data":"\rvocab.json: 0.00B [00:00, ?B/s]\rvocab.json: 1.04MB [00:00, 10.4MB/s]\r\n"}
|
| 22 |
+
,{"stream_name":"stdout","time":48.134181943,"data":"\rmerges.txt: 0.00B [00:00, ?B/s]\rmerges.txt: 456kB [00:00, 7.10MB/s]\r\n"}
|
| 23 |
+
,{"stream_name":"stdout","time":48.488396243,"data":"\rtokenizer.json: 0.00B [00:00, ?B/s]\rtokenizer.json: 1.36MB [00:00, 17.9MB/s]\r\n"}
|
| 24 |
+
,{"stream_name":"stdout","time":48.944691142,"data":"\rREADME.md: 0.00B [00:00, ?B/s]\rREADME.md: 3.90kB [00:00, 8.56MB/s]\r\n"}
|
| 25 |
+
,{"stream_name":"stdout","time":49.65203475,"data":"--- Hexa-1B Launch: Hexa Innovate Org (CEO: Madhab) ---\r\n"}
|
| 26 |
+
,{"stream_name":"stdout","time":61.90247752,"data":"Step 0 | Training Loss: 11.2874\r\n"}
|
| 27 |
+
,{"stream_name":"stdout","time":108.383813125,"data":"Step 10 | Training Loss: 8.6067\r\n"}
|
| 28 |
+
,{"stream_name":"stdout","time":155.204178837,"data":"Step 20 | Training Loss: 0.3263\r\n"}
|
| 29 |
+
,{"stream_name":"stdout","time":201.81732827,"data":"Step 30 | Training Loss: 0.1322\r\n"}
|
| 30 |
+
,{"stream_name":"stdout","time":248.465930559,"data":"Step 40 | Training Loss: 0.1988\r\n"}
|
| 31 |
+
,{"stream_name":"stdout","time":295.617594775,"data":"Step 50 | Training Loss: 0.6243\r\n"}
|
| 32 |
+
,{"stream_name":"stdout","time":342.815805496,"data":"Step 60 | Training Loss: 0.2633\r\n"}
|
| 33 |
+
,{"stream_name":"stdout","time":389.970312742,"data":"Step 70 | Training Loss: 6.3903\r\n"}
|
| 34 |
+
,{"stream_name":"stdout","time":437.286587541,"data":"Step 80 | Training Loss: 0.1481\r\n"}
|
| 35 |
+
,{"stream_name":"stdout","time":484.493630414,"data":"Step 90 | Training Loss: 0.6854\r\n"}
|
| 36 |
+
,{"stream_name":"stdout","time":531.952124285,"data":"Step 100 | Training Loss: 0.1256\r\n"}
|
| 37 |
+
,{"stream_name":"stdout","time":578.95939013,"data":"Step 110 | Training Loss: 4.5980\r\n"}
|
| 38 |
+
,{"stream_name":"stdout","time":626.369219815,"data":"Step 120 | Training Loss: 0.0851\r\n"}
|
| 39 |
+
,{"stream_name":"stdout","time":673.578903192,"data":"Step 130 | Training Loss: 2.2184\r\n"}
|
| 40 |
+
,{"stream_name":"stdout","time":720.732445861,"data":"Step 140 | Training Loss: 0.0332\r\n"}
|
| 41 |
+
,{"stream_name":"stdout","time":767.740265819,"data":"Step 150 | Training Loss: 2.7413\r\n"}
|
| 42 |
+
,{"stream_name":"stdout","time":814.571832823,"data":"Step 160 | Training Loss: 0.0331\r\n"}
|
| 43 |
+
,{"stream_name":"stdout","time":861.517013124,"data":"Step 170 | Training Loss: 0.5210\r\n"}
|
| 44 |
+
,{"stream_name":"stdout","time":908.353292758,"data":"Step 180 | Training Loss: 0.0071\r\n"}
|
| 45 |
+
,{"stream_name":"stdout","time":955.25218932,"data":"Step 190 | Training Loss: 0.2811\r\n"}
|
| 46 |
+
,{"stream_name":"stdout","time":1002.343513181,"data":"Step 200 | Training Loss: 0.0141\r\n"}
|
| 47 |
+
,{"stream_name":"stdout","time":1049.703510039,"data":"Step 210 | Training Loss: 0.4025\r\n"}
|
| 48 |
+
,{"stream_name":"stdout","time":1096.971265201,"data":"Step 220 | Training Loss: 0.0606\r\n"}
|
| 49 |
+
,{"stream_name":"stdout","time":1143.977726049,"data":"Step 230 | Training Loss: 0.0418\r\n"}
|
| 50 |
+
,{"stream_name":"stdout","time":1191.082157737,"data":"Step 240 | Training Loss: 0.0520\r\n"}
|
| 51 |
+
,{"stream_name":"stdout","time":1238.068213932,"data":"Step 250 | Training Loss: 0.4156\r\n"}
|
| 52 |
+
,{"stream_name":"stdout","time":1284.915325528,"data":"Step 260 | Training Loss: 0.0046\r\n"}
|
| 53 |
+
,{"stream_name":"stdout","time":1331.972957753,"data":"Step 270 | Training Loss: 0.1479\r\n"}
|
| 54 |
+
,{"stream_name":"stdout","time":1378.918994738,"data":"Step 280 | Training Loss: 0.0018\r\n"}
|
| 55 |
+
,{"stream_name":"stdout","time":1425.874653066,"data":"Step 290 | Training Loss: 0.1805\r\n"}
|
| 56 |
+
,{"stream_name":"stdout","time":1472.572203138,"data":"Step 300 | Training Loss: 0.0244\r\n"}
|
| 57 |
+
,{"stream_name":"stdout","time":1519.114334213,"data":"Step 310 | Training Loss: 0.6614\r\n"}
|
| 58 |
+
,{"stream_name":"stdout","time":1565.655457301,"data":"Step 320 | Training Loss: 0.0071\r\n"}
|
| 59 |
+
,{"stream_name":"stdout","time":1612.339782704,"data":"Step 330 | Training Loss: 0.0305\r\n"}
|
| 60 |
+
,{"stream_name":"stdout","time":1658.936917472,"data":"Step 340 | Training Loss: 0.0008\r\n"}
|
| 61 |
+
,{"stream_name":"stdout","time":1705.536396325,"data":"Step 350 | Training Loss: 0.2557\r\n"}
|
| 62 |
+
,{"stream_name":"stdout","time":1752.334752759,"data":"Step 360 | Training Loss: 0.0017\r\n"}
|
| 63 |
+
,{"stream_name":"stdout","time":1798.885688902,"data":"Step 370 | Training Loss: 0.1846\r\n"}
|
| 64 |
+
,{"stream_name":"stdout","time":1845.785532985,"data":"Step 380 | Training Loss: 0.0037\r\n"}
|
| 65 |
+
,{"stream_name":"stdout","time":1892.580086853,"data":"Step 390 | Training Loss: 2.8491\r\n"}
|
| 66 |
+
,{"stream_name":"stdout","time":1939.43017557,"data":"Step 400 | Training Loss: 0.0005\r\n"}
|
| 67 |
+
,{"stream_name":"stdout","time":1986.542103979,"data":"Step 410 | Training Loss: 0.0434\r\n"}
|
| 68 |
+
,{"stream_name":"stdout","time":2033.496598894,"data":"Step 420 | Training Loss: 0.0038\r\n"}
|
| 69 |
+
,{"stream_name":"stdout","time":2080.762465041,"data":"Step 430 | Training Loss: 0.0365\r\n"}
|
| 70 |
+
,{"stream_name":"stdout","time":2127.508031857,"data":"Step 440 | Training Loss: 0.0004\r\n"}
|
| 71 |
+
,{"stream_name":"stdout","time":2174.15572461,"data":"Step 450 | Training Loss: 0.7904\r\n"}
|
| 72 |
+
,{"stream_name":"stdout","time":2220.904541731,"data":"Step 460 | Training Loss: 0.0005\r\n"}
|
| 73 |
+
,{"stream_name":"stdout","time":2267.498727435,"data":"Step 470 | Training Loss: 0.2616\r\n"}
|
| 74 |
+
,{"stream_name":"stdout","time":2314.097261208,"data":"Step 480 | Training Loss: 0.0004\r\n"}
|
| 75 |
+
,{"stream_name":"stdout","time":2360.695388488,"data":"Step 490 | Training Loss: 0.2351\r\n"}
|
| 76 |
+
,{"stream_name":"stdout","time":2407.595503689,"data":"Step 500 | Training Loss: 0.0010\r\n"}
|
| 77 |
+
,{"stream_name":"stdout","time":2454.350551172,"data":"Step 510 | Training Loss: 0.2563\r\n"}
|
| 78 |
+
,{"stream_name":"stdout","time":2501.103094239,"data":"Step 520 | Training Loss: 0.0007\r\n"}
|
| 79 |
+
,{"stream_name":"stdout","time":2548.054201699,"data":"Step 530 | Training Loss: 0.2109\r\n"}
|
| 80 |
+
,{"stream_name":"stdout","time":2594.755137203,"data":"Step 540 | Training Loss: 0.0024\r\n"}
|
| 81 |
+
,{"stream_name":"stdout","time":2641.550108414,"data":"Step 550 | Training Loss: 0.0664\r\n"}
|
| 82 |
+
,{"stream_name":"stdout","time":2688.297649935,"data":"Step 560 | Training Loss: 0.0003\r\n"}
|
| 83 |
+
,{"stream_name":"stdout","time":2734.988681274,"data":"Step 570 | Training Loss: 0.7032\r\n"}
|
| 84 |
+
,{"stream_name":"stdout","time":2781.642493066,"data":"Step 580 | Training Loss: 0.0005\r\n"}
|
| 85 |
+
,{"stream_name":"stdout","time":2827.976514436,"data":"Step 590 | Training Loss: 0.0893\r\n"}
|
| 86 |
+
,{"stream_name":"stdout","time":2874.424109427,"data":"Step 600 | Training Loss: 0.0004\r\n"}
|
| 87 |
+
,{"stream_name":"stdout","time":2921.175015973,"data":"Step 610 | Training Loss: 1.7443\r\n"}
|
| 88 |
+
,{"stream_name":"stdout","time":2967.766642518,"data":"Step 620 | Training Loss: 0.0005\r\n"}
|
| 89 |
+
,{"stream_name":"stdout","time":3014.458575671,"data":"Step 630 | Training Loss: 0.0753\r\n"}
|
| 90 |
+
,{"stream_name":"stdout","time":3061.299308654,"data":"Step 640 | Training Loss: 0.0008\r\n"}
|
| 91 |
+
,{"stream_name":"stdout","time":3108.037402895,"data":"Step 650 | Training Loss: 1.7483\r\n"}
|
| 92 |
+
,{"stream_name":"stdout","time":3154.676746,"data":"Step 660 | Training Loss: 0.0005\r\n"}
|
| 93 |
+
,{"stream_name":"stdout","time":3201.417837541,"data":"Step 670 | Training Loss: 1.9604\r\n"}
|
| 94 |
+
,{"stream_name":"stdout","time":3248.016377668,"data":"Step 680 | Training Loss: 0.0003\r\n"}
|
| 95 |
+
,{"stream_name":"stdout","time":3294.858218866,"data":"Step 690 | Training Loss: 0.2785\r\n"}
|
| 96 |
+
,{"stream_name":"stdout","time":3341.69543324,"data":"Step 700 | Training Loss: 0.0007\r\n"}
|
| 97 |
+
,{"stream_name":"stdout","time":3388.692478376,"data":"Step 710 | Training Loss: 0.7514\r\n"}
|
| 98 |
+
,{"stream_name":"stdout","time":3435.636583256,"data":"Step 720 | Training Loss: 0.0012\r\n"}
|
| 99 |
+
,{"stream_name":"stdout","time":3482.735058233,"data":"Step 730 | Training Loss: 0.0776\r\n"}
|
| 100 |
+
,{"stream_name":"stdout","time":3529.734287033,"data":"Step 740 | Training Loss: 0.0004\r\n"}
|
| 101 |
+
,{"stream_name":"stdout","time":3576.639108713,"data":"Step 750 | Training Loss: 0.0778\r\n"}
|
| 102 |
+
,{"stream_name":"stdout","time":3623.799314556,"data":"Step 760 | Training Loss: 0.0003\r\n"}
|
| 103 |
+
,{"stream_name":"stdout","time":3670.849912731,"data":"Step 770 | Training Loss: 0.0912\r\n"}
|
| 104 |
+
,{"stream_name":"stdout","time":3717.844867666,"data":"Step 780 | Training Loss: 0.0005\r\n"}
|
| 105 |
+
,{"stream_name":"stdout","time":3764.845471614,"data":"Step 790 | Training Loss: 0.0517\r\n"}
|
| 106 |
+
,{"stream_name":"stdout","time":3812.196740008,"data":"Step 800 | Training Loss: 0.0003\r\n"}
|
| 107 |
+
,{"stream_name":"stdout","time":3859.188801882,"data":"Step 810 | Training Loss: 0.1760\r\n"}
|
| 108 |
+
,{"stream_name":"stdout","time":3906.180831022,"data":"Step 820 | Training Loss: 0.0032\r\n"}
|
| 109 |
+
,{"stream_name":"stdout","time":3953.010473391,"data":"Step 830 | Training Loss: 0.8470\r\n"}
|
| 110 |
+
,{"stream_name":"stdout","time":3999.795775898,"data":"Step 840 | Training Loss: 0.0003\r\n"}
|
| 111 |
+
,{"stream_name":"stdout","time":4046.892587335,"data":"Step 850 | Training Loss: 0.0006\r\n"}
|
| 112 |
+
,{"stream_name":"stdout","time":4093.784846125,"data":"Step 860 | Training Loss: 0.0004\r\n"}
|
| 113 |
+
,{"stream_name":"stdout","time":4140.982482668,"data":"Step 870 | Training Loss: 0.0730\r\n"}
|
| 114 |
+
,{"stream_name":"stdout","time":4188.122449379,"data":"Step 880 | Training Loss: 0.0004\r\n"}
|
| 115 |
+
,{"stream_name":"stdout","time":4235.376932521,"data":"Step 890 | Training Loss: 0.8615\r\n"}
|
| 116 |
+
,{"stream_name":"stdout","time":4282.726250544,"data":"Step 900 | Training Loss: 0.0008\r\n"}
|
| 117 |
+
,{"stream_name":"stdout","time":4329.778364551,"data":"Step 910 | Training Loss: 0.5945\r\n"}
|
| 118 |
+
,{"stream_name":"stdout","time":4377.031796887,"data":"Step 920 | Training Loss: 0.0003\r\n"}
|
| 119 |
+
,{"stream_name":"stdout","time":4424.23343341,"data":"Step 930 | Training Loss: 0.2304\r\n"}
|
| 120 |
+
,{"stream_name":"stdout","time":4471.535353832,"data":"Step 940 | Training Loss: 0.0006\r\n"}
|
| 121 |
+
,{"stream_name":"stdout","time":4518.576950463,"data":"Step 950 | Training Loss: 0.1666\r\n"}
|
| 122 |
+
,{"stream_name":"stdout","time":4565.980865473,"data":"Step 960 | Training Loss: 0.0003\r\n"}
|
| 123 |
+
,{"stream_name":"stdout","time":4613.382627414,"data":"Step 970 | Training Loss: 0.0193\r\n"}
|
| 124 |
+
,{"stream_name":"stdout","time":4661.203782099,"data":"Step 980 | Training Loss: 0.0002\r\n"}
|
| 125 |
+
,{"stream_name":"stdout","time":4708.552894254,"data":"Step 990 | Training Loss: 0.0255\r\n"}
|
| 126 |
+
,{"stream_name":"stdout","time":4755.805220008,"data":"Step 1000 | Training Loss: 0.0003\r\n"}
|
| 127 |
+
,{"stream_name":"stdout","time":4755.805280867,"data":"/usr/local/lib/python3.12/dist-packages/torch/distributed/c10d_logger.py:83: UserWarning: barrier(): using the device under current context. You can specify `device_id` in `init_process_group` to mute this warning.\r\n"}
|
| 128 |
+
,{"stream_name":"stdout","time":4755.805288553,"data":" return func(*args, **kwargs)\r\n"}
|
| 129 |
+
,{"stream_name":"stdout","time":4755.8052941,"data":"/usr/local/lib/python3.12/dist-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:822: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .\r\n"}
|
| 130 |
+
,{"stream_name":"stdout","time":4755.805300855,"data":" prev_state_dict_settings = FullyShardedDataParallel.set_state_dict_type(\r\n"}
|
| 131 |
+
,{"stream_name":"stdout","time":4755.805305657,"data":"/usr/local/lib/python3.12/dist-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:822: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .\r\n"}
|
| 132 |
+
,{"stream_name":"stdout","time":4755.805318443,"data":" prev_state_dict_settings = FullyShardedDataParallel.set_state_dict_type(\r\n"}
|
| 133 |
+
,{"stream_name":"stdout","time":4755.805322078,"data":"/usr/local/lib/python3.12/dist-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:829: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .\r\n"}
|
| 134 |
+
,{"stream_name":"stdout","time":4755.805326861,"data":" FullyShardedDataParallel.set_state_dict_type(\r\n"}
|
| 135 |
+
,{"stream_name":"stdout","time":4763.91712272,"data":"/usr/local/lib/python3.12/dist-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:829: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .\r\n"}
|
| 136 |
+
,{"stream_name":"stdout","time":4763.917213199,"data":" FullyShardedDataParallel.set_state_dict_type(\r\n"}
|
| 137 |
+
,{"stream_name":"stdout","time":4812.102295237,"data":"Step 1010 | Training Loss: 0.1453\r\n"}
|
| 138 |
+
,{"stream_name":"stdout","time":4859.882108769,"data":"Step 1020 | Training Loss: 0.0006\r\n"}
|
| 139 |
+
,{"stream_name":"stdout","time":4907.454210115,"data":"Step 1030 | Training Loss: 0.2647\r\n"}
|
| 140 |
+
,{"stream_name":"stdout","time":4955.368242307,"data":"Step 1040 | Training Loss: 0.0012\r\n"}
|
| 141 |
+
,{"stream_name":"stdout","time":5003.385523148,"data":"Step 1050 | Training Loss: 0.1026\r\n"}
|
| 142 |
+
,{"stream_name":"stdout","time":5051.398675192,"data":"Step 1060 | Training Loss: 0.0004\r\n"}
|
| 143 |
+
,{"stream_name":"stdout","time":5099.472584862,"data":"Step 1070 | Training Loss: 0.1035\r\n"}
|
| 144 |
+
,{"stream_name":"stdout","time":5147.443910901,"data":"Step 1080 | Training Loss: 0.0002\r\n"}
|
| 145 |
+
,{"stream_name":"stdout","time":5195.474166797,"data":"Step 1090 | Training Loss: 1.6820\r\n"}
|
| 146 |
+
,{"stream_name":"stdout","time":5243.390251336,"data":"Step 1100 | Training Loss: 0.0005\r\n"}
|
| 147 |
+
,{"stream_name":"stdout","time":5291.24644043,"data":"Step 1110 | Training Loss: 1.1139\r\n"}
|
| 148 |
+
,{"stream_name":"stdout","time":5338.737168099,"data":"Step 1120 | Training Loss: 0.0001\r\n"}
|
| 149 |
+
,{"stream_name":"stdout","time":5386.743877359,"data":"Step 1130 | Training Loss: 0.2840\r\n"}
|
| 150 |
+
,{"stream_name":"stdout","time":5434.351232105,"data":"Step 1140 | Training Loss: 0.0004\r\n"}
|
| 151 |
+
,{"stream_name":"stdout","time":5482.258724548,"data":"Step 1150 | Training Loss: 0.0941\r\n"}
|
| 152 |
+
,{"stream_name":"stdout","time":5529.907595833,"data":"Step 1160 | Training Loss: 0.0002\r\n"}
|
| 153 |
+
,{"stream_name":"stdout","time":5577.66211,"data":"Step 1170 | Training Loss: 0.8995\r\n"}
|
| 154 |
+
,{"stream_name":"stdout","time":5625.422534388,"data":"Step 1180 | Training Loss: 0.0001\r\n"}
|
| 155 |
+
,{"stream_name":"stdout","time":5673.117784228,"data":"Step 1190 | Training Loss: 1.3411\r\n"}
|
| 156 |
+
,{"stream_name":"stdout","time":5720.930187603,"data":"Step 1200 | Training Loss: 0.0003\r\n"}
|
| 157 |
+
,{"stream_name":"stdout","time":5768.585581434,"data":"Step 1210 | Training Loss: 0.1949\r\n"}
|
| 158 |
+
,{"stream_name":"stdout","time":5816.501288092,"data":"Step 1220 | Training Loss: 0.0004\r\n"}
|
| 159 |
+
,{"stream_name":"stdout","time":5864.16233361,"data":"Step 1230 | Training Loss: 0.0691\r\n"}
|
| 160 |
+
,{"stream_name":"stdout","time":5911.85040685,"data":"Step 1240 | Training Loss: 0.0007\r\n"}
|
| 161 |
+
,{"stream_name":"stdout","time":5959.712574355,"data":"Step 1250 | Training Loss: 1.2051\r\n"}
|
| 162 |
+
,{"stream_name":"stdout","time":6007.877709467,"data":"Step 1260 | Training Loss: 0.0004\r\n"}
|
| 163 |
+
,{"stream_name":"stdout","time":6056.001992296,"data":"Step 1270 | Training Loss: 0.1555\r\n"}
|
| 164 |
+
,{"stream_name":"stdout","time":6104.108405619,"data":"Step 1280 | Training Loss: 0.0006\r\n"}
|
| 165 |
+
,{"stream_name":"stdout","time":6152.024082329,"data":"Step 1290 | Training Loss: 0.5580\r\n"}
|
| 166 |
+
,{"stream_name":"stdout","time":6199.944481163,"data":"Step 1300 | Training Loss: 0.0001\r\n"}
|
| 167 |
+
,{"stream_name":"stdout","time":6247.804684893,"data":"Step 1310 | Training Loss: 0.1162\r\n"}
|
| 168 |
+
,{"stream_name":"stdout","time":6295.6129921,"data":"Step 1320 | Training Loss: 0.0001\r\n"}
|
| 169 |
+
,{"stream_name":"stdout","time":6343.622646497,"data":"Step 1330 | Training Loss: 0.0001\r\n"}
|
| 170 |
+
,{"stream_name":"stdout","time":6391.332560056,"data":"Step 1340 | Training Loss: 0.0004\r\n"}
|
| 171 |
+
,{"stream_name":"stdout","time":6439.554346073,"data":"Step 1350 | Training Loss: 0.1885\r\n"}
|
| 172 |
+
,{"stream_name":"stdout","time":6487.735699153,"data":"Step 1360 | Training Loss: 0.0002\r\n"}
|
| 173 |
+
,{"stream_name":"stdout","time":6535.854168446,"data":"Step 1370 | Training Loss: 0.0299\r\n"}
|
| 174 |
+
,{"stream_name":"stdout","time":6583.919860542,"data":"Step 1380 | Training Loss: 0.0000\r\n"}
|
| 175 |
+
,{"stream_name":"stdout","time":6631.836710166,"data":"Step 1390 | Training Loss: 0.0988\r\n"}
|
| 176 |
+
,{"stream_name":"stdout","time":6679.542831141,"data":"Step 1400 | Training Loss: 0.0003\r\n"}
|
| 177 |
+
,{"stream_name":"stdout","time":6727.554324613,"data":"Step 1410 | Training Loss: 0.0201\r\n"}
|
| 178 |
+
,{"stream_name":"stdout","time":6775.317800414,"data":"Step 1420 | Training Loss: 0.0001\r\n"}
|
| 179 |
+
,{"stream_name":"stdout","time":6823.470334427,"data":"Step 1430 | Training Loss: 0.3336\r\n"}
|
| 180 |
+
,{"stream_name":"stdout","time":6871.37692737,"data":"Step 1440 | Training Loss: 0.0002\r\n"}
|
| 181 |
+
,{"stream_name":"stdout","time":6919.284300922,"data":"Step 1450 | Training Loss: 0.1233\r\n"}
|
| 182 |
+
,{"stream_name":"stdout","time":6967.505978789,"data":"Step 1460 | Training Loss: 0.0001\r\n"}
|
| 183 |
+
,{"stream_name":"stdout","time":7015.52657389,"data":"Step 1470 | Training Loss: 0.0186\r\n"}
|
| 184 |
+
,{"stream_name":"stdout","time":7063.751781054,"data":"Step 1480 | Training Loss: 0.0000\r\n"}
|
| 185 |
+
,{"stream_name":"stdout","time":7111.858047466,"data":"Step 1490 | Training Loss: 0.0482\r\n"}
|
| 186 |
+
,{"stream_name":"stdout","time":7159.658458072,"data":"Step 1500 | Training Loss: 0.0001\r\n"}
|
| 187 |
+
,{"stream_name":"stdout","time":7207.662492858,"data":"Step 1510 | Training Loss: 0.0006\r\n"}
|
| 188 |
+
,{"stream_name":"stdout","time":7255.82932487,"data":"Step 1520 | Training Loss: 0.0001\r\n"}
|
| 189 |
+
,{"stream_name":"stdout","time":7303.793077624,"data":"Step 1530 | Training Loss: 0.0243\r\n"}
|
| 190 |
+
,{"stream_name":"stdout","time":7351.603975531,"data":"Step 1540 | Training Loss: 0.0001\r\n"}
|
| 191 |
+
,{"stream_name":"stdout","time":7399.415345643,"data":"Step 1550 | Training Loss: 0.0009\r\n"}
|
| 192 |
+
,{"stream_name":"stdout","time":7447.737379603,"data":"Step 1560 | Training Loss: 0.0002\r\n"}
|
| 193 |
+
,{"stream_name":"stdout","time":7496.112652686,"data":"Step 1570 | Training Loss: 0.0005\r\n"}
|
| 194 |
+
,{"stream_name":"stdout","time":7544.327514788,"data":"Step 1580 | Training Loss: 0.0002\r\n"}
|
| 195 |
+
,{"stream_name":"stdout","time":7592.64285207,"data":"Step 1590 | Training Loss: 0.0783\r\n"}
|
| 196 |
+
,{"stream_name":"stdout","time":7640.553459067,"data":"Step 1600 | Training Loss: 0.0001\r\n"}
|
| 197 |
+
,{"stream_name":"stdout","time":7688.76009634,"data":"Step 1610 | Training Loss: 0.0009\r\n"}
|
| 198 |
+
,{"stream_name":"stdout","time":7736.772224448,"data":"Step 1620 | Training Loss: 0.0002\r\n"}
|
| 199 |
+
,{"stream_name":"stdout","time":7784.881637017,"data":"Step 1630 | Training Loss: 0.0460\r\n"}
|
| 200 |
+
,{"stream_name":"stdout","time":7832.939793228,"data":"Step 1640 | Training Loss: 0.0001\r\n"}
|
| 201 |
+
,{"stream_name":"stdout","time":7881.358438893,"data":"Step 1650 | Training Loss: 0.2914\r\n"}
|
| 202 |
+
,{"stream_name":"stdout","time":7929.568153147,"data":"Step 1660 | Training Loss: 0.0000\r\n"}
|
| 203 |
+
,{"stream_name":"stdout","time":7977.723974606,"data":"Step 1670 | Training Loss: 0.1425\r\n"}
|
| 204 |
+
,{"stream_name":"stdout","time":8025.832625249,"data":"Step 1680 | Training Loss: 0.0001\r\n"}
|
| 205 |
+
,{"stream_name":"stdout","time":8074.193793917,"data":"Step 1690 | Training Loss: 0.0000\r\n"}
|
| 206 |
+
,{"stream_name":"stdout","time":8122.656383952,"data":"Step 1700 | Training Loss: 0.0001\r\n"}
|
| 207 |
+
,{"stream_name":"stdout","time":8170.798601539,"data":"Step 1710 | Training Loss: 0.0477\r\n"}
|
| 208 |
+
,{"stream_name":"stdout","time":8219.166304341,"data":"Step 1720 | Training Loss: 0.0000\r\n"}
|
| 209 |
+
,{"stream_name":"stdout","time":8267.580624377,"data":"Step 1730 | Training Loss: 0.0257\r\n"}
|
| 210 |
+
,{"stream_name":"stdout","time":8315.680862001,"data":"Step 1740 | Training Loss: 0.0004\r\n"}
|
| 211 |
+
,{"stream_name":"stdout","time":8363.734931554,"data":"Step 1750 | Training Loss: 0.2631\r\n"}
|
| 212 |
+
,{"stream_name":"stdout","time":8411.94452979,"data":"Step 1760 | Training Loss: 0.0004\r\n"}
|
| 213 |
+
,{"stream_name":"stdout","time":8459.888270087,"data":"Step 1770 | Training Loss: 0.0791\r\n"}
|
| 214 |
+
,{"stream_name":"stdout","time":8508.609884681,"data":"Step 1780 | Training Loss: 0.0002\r\n"}
|
| 215 |
+
,{"stream_name":"stdout","time":8556.720874265,"data":"Step 1790 | Training Loss: 0.0384\r\n"}
|
| 216 |
+
,{"stream_name":"stdout","time":8604.982741838,"data":"Step 1800 | Training Loss: 0.0001\r\n"}
|
| 217 |
+
,{"stream_name":"stdout","time":8653.395544701,"data":"Step 1810 | Training Loss: 0.1196\r\n"}
|
| 218 |
+
,{"stream_name":"stdout","time":8702.023179721,"data":"Step 1820 | Training Loss: 0.0001\r\n"}
|
| 219 |
+
,{"stream_name":"stdout","time":8750.530975787,"data":"Step 1830 | Training Loss: 0.0466\r\n"}
|
| 220 |
+
,{"stream_name":"stdout","time":8799.059151276,"data":"Step 1840 | Training Loss: 0.0001\r\n"}
|
| 221 |
+
,{"stream_name":"stdout","time":8847.57612919,"data":"Step 1850 | Training Loss: 0.0519\r\n"}
|
| 222 |
+
,{"stream_name":"stdout","time":8895.636976668,"data":"Step 1860 | Training Loss: 0.0001\r\n"}
|
| 223 |
+
,{"stream_name":"stdout","time":8944.050300751,"data":"Step 1870 | Training Loss: 0.0227\r\n"}
|
| 224 |
+
,{"stream_name":"stdout","time":8992.101234764,"data":"Step 1880 | Training Loss: 0.0002\r\n"}
|
| 225 |
+
,{"stream_name":"stdout","time":9040.325735749,"data":"Step 1890 | Training Loss: 0.0516\r\n"}
|
| 226 |
+
,{"stream_name":"stdout","time":9088.63660473,"data":"Step 1900 | Training Loss: 0.0000\r\n"}
|
| 227 |
+
,{"stream_name":"stdout","time":9137.415846989,"data":"Step 1910 | Training Loss: 0.0068\r\n"}
|
| 228 |
+
,{"stream_name":"stdout","time":9185.728635067,"data":"Step 1920 | Training Loss: 0.0001\r\n"}
|
| 229 |
+
,{"stream_name":"stdout","time":9234.875243625,"data":"Step 1930 | Training Loss: 0.2023\r\n"}
|
| 230 |
+
,{"stream_name":"stdout","time":9283.141236508,"data":"Step 1940 | Training Loss: 0.0001\r\n"}
|
| 231 |
+
,{"stream_name":"stdout","time":9331.5436015,"data":"Step 1950 | Training Loss: 0.0738\r\n"}
|
| 232 |
+
,{"stream_name":"stdout","time":9380.160375661,"data":"Step 1960 | Training Loss: 0.0001\r\n"}
|
| 233 |
+
,{"stream_name":"stdout","time":9428.899574607,"data":"Step 1970 | Training Loss: 0.0005\r\n"}
|
| 234 |
+
,{"stream_name":"stdout","time":9477.26809798,"data":"Step 1980 | Training Loss: 0.0001\r\n"}
|
| 235 |
+
,{"stream_name":"stdout","time":9525.718275429,"data":"Step 1990 | Training Loss: 0.0065\r\n"}
|
| 236 |
+
,{"stream_name":"stdout","time":9574.233678476,"data":"Step 2000 | Training Loss: 0.0002\r\n"}
|
| 237 |
+
,{"stream_name":"stdout","time":9631.060675239,"data":"Step 2010 | Training Loss: 0.0009\r\n"}
|
| 238 |
+
,{"stream_name":"stdout","time":9679.688411793,"data":"Step 2020 | Training Loss: 0.0001\r\n"}
|
| 239 |
+
,{"stream_name":"stdout","time":9728.156635587,"data":"Step 2030 | Training Loss: 0.0011\r\n"}
|
| 240 |
+
,{"stream_name":"stdout","time":9776.589138239,"data":"Step 2040 | Training Loss: 0.0001\r\n"}
|
| 241 |
+
,{"stream_name":"stdout","time":9824.955186704,"data":"Step 2050 | Training Loss: 0.5175\r\n"}
|
| 242 |
+
,{"stream_name":"stdout","time":9873.285441773,"data":"Step 2060 | Training Loss: 0.0002\r\n"}
|
| 243 |
+
,{"stream_name":"stdout","time":9921.502797565,"data":"Step 2070 | Training Loss: 0.2302\r\n"}
|
| 244 |
+
,{"stream_name":"stdout","time":9969.553685845,"data":"Step 2080 | Training Loss: 0.0000\r\n"}
|
| 245 |
+
,{"stream_name":"stdout","time":10017.673397318,"data":"Step 2090 | Training Loss: 0.1011\r\n"}
|
| 246 |
+
,{"stream_name":"stdout","time":10065.680193595,"data":"Step 2100 | Training Loss: 0.0001\r\n"}
|
| 247 |
+
,{"stream_name":"stdout","time":10113.677285646,"data":"Step 2110 | Training Loss: 0.0005\r\n"}
|
| 248 |
+
,{"stream_name":"stdout","time":10161.317414601,"data":"Step 2120 | Training Loss: 0.0002\r\n"}
|
| 249 |
+
,{"stream_name":"stdout","time":10209.068643255,"data":"Step 2130 | Training Loss: 0.0274\r\n"}
|
| 250 |
+
,{"stream_name":"stdout","time":10257.180170176,"data":"Step 2140 | Training Loss: 0.0001\r\n"}
|
| 251 |
+
,{"stream_name":"stdout","time":10305.03358383,"data":"Step 2150 | Training Loss: 0.0000\r\n"}
|
| 252 |
+
,{"stream_name":"stdout","time":10353.205568729,"data":"Step 2160 | Training Loss: 0.0000\r\n"}
|
| 253 |
+
,{"stream_name":"stdout","time":10400.902835464,"data":"Step 2170 | Training Loss: 0.0155\r\n"}
|
| 254 |
+
,{"stream_name":"stdout","time":10448.654623348,"data":"Step 2180 | Training Loss: 0.0000\r\n"}
|
| 255 |
+
,{"stream_name":"stdout","time":10496.66403938,"data":"Step 2190 | Training Loss: 0.0015\r\n"}
|
| 256 |
+
,{"stream_name":"stdout","time":10544.502190693,"data":"Step 2200 | Training Loss: 0.0002\r\n"}
|
| 257 |
+
,{"stream_name":"stdout","time":10592.609885182,"data":"Step 2210 | Training Loss: 0.0175\r\n"}
|
| 258 |
+
,{"stream_name":"stdout","time":10640.561500595,"data":"Step 2220 | Training Loss: 0.0001\r\n"}
|
| 259 |
+
,{"stream_name":"stdout","time":10688.669171201,"data":"Step 2230 | Training Loss: 0.0258\r\n"}
|
| 260 |
+
,{"stream_name":"stdout","time":10736.734963731,"data":"Step 2240 | Training Loss: 0.0001\r\n"}
|
| 261 |
+
,{"stream_name":"stdout","time":10784.855844654,"data":"Step 2250 | Training Loss: 0.0509\r\n"}
|
| 262 |
+
,{"stream_name":"stdout","time":10833.074204572,"data":"Step 2260 | Training Loss: 0.0001\r\n"}
|
| 263 |
+
,{"stream_name":"stdout","time":10881.284583119,"data":"Step 2270 | Training Loss: 0.2298\r\n"}
|
| 264 |
+
,{"stream_name":"stdout","time":10929.49513228,"data":"Step 2280 | Training Loss: 0.0000\r\n"}
|
| 265 |
+
,{"stream_name":"stdout","time":10978.233622818,"data":"Step 2290 | Training Loss: 0.0509\r\n"}
|
| 266 |
+
,{"stream_name":"stdout","time":11026.807980761,"data":"Step 2300 | Training Loss: 0.0001\r\n"}
|
| 267 |
+
,{"stream_name":"stdout","time":11075.070673196,"data":"Step 2310 | Training Loss: 0.0892\r\n"}
|
| 268 |
+
,{"stream_name":"stdout","time":11123.386057763,"data":"Step 2320 | Training Loss: 0.0000\r\n"}
|
| 269 |
+
,{"stream_name":"stdout","time":11171.482455969,"data":"Step 2330 | Training Loss: 0.0255\r\n"}
|
| 270 |
+
,{"stream_name":"stdout","time":11219.633084586,"data":"Step 2340 | Training Loss: 0.0001\r\n"}
|
| 271 |
+
,{"stream_name":"stdout","time":11267.795767782,"data":"Step 2350 | Training Loss: 0.1951\r\n"}
|
| 272 |
+
,{"stream_name":"stdout","time":11316.052288864,"data":"Step 2360 | Training Loss: 0.0000\r\n"}
|
| 273 |
+
,{"stream_name":"stdout","time":11364.102817969,"data":"Step 2370 | Training Loss: 0.0458\r\n"}
|
| 274 |
+
,{"stream_name":"stdout","time":11412.314451529,"data":"Step 2380 | Training Loss: 0.0002\r\n"}
|
| 275 |
+
,{"stream_name":"stdout","time":11460.519832412,"data":"Step 2390 | Training Loss: 0.6189\r\n"}
|
| 276 |
+
,{"stream_name":"stdout","time":11508.72580332,"data":"Step 2400 | Training Loss: 0.0001\r\n"}
|
| 277 |
+
,{"stream_name":"stdout","time":11556.975910021,"data":"Step 2410 | Training Loss: 0.0008\r\n"}
|
| 278 |
+
,{"stream_name":"stdout","time":11605.077335173,"data":"Step 2420 | Training Loss: 0.0002\r\n"}
|
| 279 |
+
,{"stream_name":"stdout","time":11653.32268846,"data":"Step 2430 | Training Loss: 0.1199\r\n"}
|
| 280 |
+
,{"stream_name":"stdout","time":11701.378311921,"data":"Step 2440 | Training Loss: 0.0000\r\n"}
|
| 281 |
+
,{"stream_name":"stdout","time":11749.4245447,"data":"Step 2450 | Training Loss: 0.1395\r\n"}
|
| 282 |
+
,{"stream_name":"stdout","time":11797.732336407,"data":"Step 2460 | Training Loss: 0.0000\r\n"}
|
| 283 |
+
,{"stream_name":"stdout","time":11845.622837549,"data":"Step 2470 | Training Loss: 0.5506\r\n"}
|
| 284 |
+
,{"stream_name":"stdout","time":11893.826706321,"data":"Step 2480 | Training Loss: 0.0002\r\n"}
|
| 285 |
+
,{"stream_name":"stdout","time":11941.981597797,"data":"Step 2490 | Training Loss: 0.3704\r\n"}
|
| 286 |
+
,{"stream_name":"stdout","time":11990.286572307,"data":"Step 2500 | Training Loss: 0.0002\r\n"}
|
| 287 |
+
,{"stream_name":"stdout","time":12038.439453397,"data":"Step 2510 | Training Loss: 0.0844\r\n"}
|
| 288 |
+
,{"stream_name":"stdout","time":12086.791797578,"data":"Step 2520 | Training Loss: 0.0000\r\n"}
|
| 289 |
+
,{"stream_name":"stdout","time":12134.89266628,"data":"Step 2530 | Training Loss: 0.8372\r\n"}
|
| 290 |
+
,{"stream_name":"stdout","time":12183.256479484,"data":"Step 2540 | Training Loss: 0.0001\r\n"}
|
| 291 |
+
,{"stream_name":"stdout","time":12231.507832762,"data":"Step 2550 | Training Loss: 0.1077\r\n"}
|
| 292 |
+
,{"stream_name":"stdout","time":12279.705800937,"data":"Step 2560 | Training Loss: 0.0000\r\n"}
|
| 293 |
+
,{"stream_name":"stdout","time":12327.913891106,"data":"Step 2570 | Training Loss: 0.0242\r\n"}
|
| 294 |
+
,{"stream_name":"stdout","time":12375.972268516,"data":"Step 2580 | Training Loss: 0.0001\r\n"}
|
| 295 |
+
,{"stream_name":"stdout","time":12424.334068139,"data":"Step 2590 | Training Loss: 0.2288\r\n"}
|
| 296 |
+
,{"stream_name":"stdout","time":12472.587447912,"data":"Step 2600 | Training Loss: 0.0002\r\n"}
|
| 297 |
+
,{"stream_name":"stdout","time":12520.797256758,"data":"Step 2610 | Training Loss: 0.0235\r\n"}
|
| 298 |
+
,{"stream_name":"stdout","time":12569.093494229,"data":"Step 2620 | Training Loss: 0.0000\r\n"}
|
| 299 |
+
,{"stream_name":"stdout","time":12617.198884574,"data":"Step 2630 | Training Loss: 0.0002\r\n"}
|
| 300 |
+
,{"stream_name":"stdout","time":12665.093328592,"data":"Step 2640 | Training Loss: 0.0002\r\n"}
|
| 301 |
+
,{"stream_name":"stdout","time":12713.408533417,"data":"Step 2650 | Training Loss: 0.5299\r\n"}
|
| 302 |
+
,{"stream_name":"stdout","time":12761.767827104,"data":"Step 2660 | Training Loss: 0.0001\r\n"}
|
| 303 |
+
,{"stream_name":"stdout","time":12809.865067521,"data":"Step 2670 | Training Loss: 0.0136\r\n"}
|
| 304 |
+
,{"stream_name":"stdout","time":12858.230243978,"data":"Step 2680 | Training Loss: 0.0000\r\n"}
|
| 305 |
+
,{"stream_name":"stdout","time":12906.33074439,"data":"Step 2690 | Training Loss: 0.0008\r\n"}
|
| 306 |
+
,{"stream_name":"stdout","time":12954.47409079,"data":"Step 2700 | Training Loss: 0.0000\r\n"}
|
| 307 |
+
,{"stream_name":"stdout","time":13002.676907936,"data":"Step 2710 | Training Loss: 0.0003\r\n"}
|
| 308 |
+
,{"stream_name":"stdout","time":13050.568219472,"data":"Step 2720 | Training Loss: 0.0000\r\n"}
|
| 309 |
+
,{"stream_name":"stdout","time":13098.831028953,"data":"Step 2730 | Training Loss: 0.0721\r\n"}
|
| 310 |
+
,{"stream_name":"stdout","time":13146.980227144,"data":"Step 2740 | Training Loss: 0.0001\r\n"}
|
| 311 |
+
,{"stream_name":"stdout","time":13195.180417814,"data":"Step 2750 | Training Loss: 0.0136\r\n"}
|
| 312 |
+
,{"stream_name":"stdout","time":13243.341285046,"data":"Step 2760 | Training Loss: 0.0002\r\n"}
|
| 313 |
+
,{"stream_name":"stdout","time":13291.541751866,"data":"Step 2770 | Training Loss: 0.0116\r\n"}
|
| 314 |
+
,{"stream_name":"stdout","time":13339.744655223,"data":"Step 2780 | Training Loss: 0.0000\r\n"}
|
| 315 |
+
,{"stream_name":"stdout","time":13388.153938032,"data":"Step 2790 | Training Loss: 0.1945\r\n"}
|
| 316 |
+
,{"stream_name":"stdout","time":13436.357333285,"data":"Step 2800 | Training Loss: 0.0000\r\n"}
|
| 317 |
+
,{"stream_name":"stdout","time":13484.518706384,"data":"Step 2810 | Training Loss: 0.0251\r\n"}
|
| 318 |
+
,{"stream_name":"stdout","time":13532.880920581,"data":"Step 2820 | Training Loss: 0.0000\r\n"}
|
| 319 |
+
,{"stream_name":"stdout","time":13581.084895869,"data":"Step 2830 | Training Loss: 0.1457\r\n"}
|
| 320 |
+
,{"stream_name":"stdout","time":13629.844984614,"data":"Step 2840 | Training Loss: 0.0000\r\n"}
|
| 321 |
+
,{"stream_name":"stdout","time":13678.414779744,"data":"Step 2850 | Training Loss: 0.9311\r\n"}
|
| 322 |
+
,{"stream_name":"stdout","time":13726.833249909,"data":"Step 2860 | Training Loss: 0.0001\r\n"}
|
| 323 |
+
,{"stream_name":"stdout","time":13775.434843976,"data":"Step 2870 | Training Loss: 0.1772\r\n"}
|
| 324 |
+
,{"stream_name":"stdout","time":13823.949314154,"data":"Step 2880 | Training Loss: 0.0000\r\n"}
|
| 325 |
+
,{"stream_name":"stdout","time":13872.686423709,"data":"Step 2890 | Training Loss: 0.0556\r\n"}
|
| 326 |
+
,{"stream_name":"stdout","time":13921.081973969,"data":"Step 2900 | Training Loss: 0.0000\r\n"}
|
| 327 |
+
,{"stream_name":"stdout","time":13969.797750806,"data":"Step 2910 | Training Loss: 0.0123\r\n"}
|
| 328 |
+
,{"stream_name":"stdout","time":14018.145630421,"data":"Step 2920 | Training Loss: 0.0001\r\n"}
|
| 329 |
+
,{"stream_name":"stdout","time":14066.612400506,"data":"Step 2930 | Training Loss: 0.0005\r\n"}
|
| 330 |
+
,{"stream_name":"stdout","time":14115.374993398,"data":"Step 2940 | Training Loss: 0.0000\r\n"}
|
| 331 |
+
,{"stream_name":"stdout","time":14163.785569929,"data":"Step 2950 | Training Loss: 0.0004\r\n"}
|
| 332 |
+
,{"stream_name":"stdout","time":14212.617086811,"data":"Step 2960 | Training Loss: 0.0000\r\n"}
|
| 333 |
+
,{"stream_name":"stdout","time":14260.919626194,"data":"Step 2970 | Training Loss: 0.0006\r\n"}
|
| 334 |
+
,{"stream_name":"stdout","time":14309.119366511,"data":"Step 2980 | Training Loss: 0.0000\r\n"}
|
| 335 |
+
,{"stream_name":"stdout","time":14357.726380415,"data":"Step 2990 | Training Loss: 0.0021\r\n"}
|
| 336 |
+
,{"stream_name":"stdout","time":14406.384222748,"data":"Step 3000 | Training Loss: 0.0001\r\n"}
|
| 337 |
+
,{"stream_name":"stdout","time":14463.202067486,"data":"Step 3010 | Training Loss: 0.0278\r\n"}
|
| 338 |
+
,{"stream_name":"stdout","time":14511.763538442,"data":"Step 3020 | Training Loss: 0.0000\r\n"}
|
| 339 |
+
,{"stream_name":"stdout","time":14560.37498688,"data":"Step 3030 | Training Loss: 0.4339\r\n"}
|
| 340 |
+
,{"stream_name":"stdout","time":14608.886303543,"data":"Step 3040 | Training Loss: 0.0000\r\n"}
|
| 341 |
+
,{"stream_name":"stdout","time":14657.525612971,"data":"Step 3050 | Training Loss: 0.5201\r\n"}
|
| 342 |
+
,{"stream_name":"stdout","time":14706.191179102,"data":"Step 3060 | Training Loss: 0.0003\r\n"}
|
| 343 |
+
,{"stream_name":"stdout","time":14754.901196453,"data":"Step 3070 | Training Loss: 0.0404\r\n"}
|
| 344 |
+
,{"stream_name":"stdout","time":14803.813412315,"data":"Step 3080 | Training Loss: 0.0000\r\n"}
|
| 345 |
+
,{"stream_name":"stdout","time":14852.615926466,"data":"Step 3090 | Training Loss: 0.0507\r\n"}
|
| 346 |
+
,{"stream_name":"stdout","time":14900.970057318,"data":"Step 3100 | Training Loss: 0.0000\r\n"}
|
| 347 |
+
,{"stream_name":"stdout","time":14949.385769626,"data":"Step 3110 | Training Loss: 0.0233\r\n"}
|
| 348 |
+
,{"stream_name":"stdout","time":14998.256273312,"data":"Step 3120 | Training Loss: 0.0000\r\n"}
|
| 349 |
+
,{"stream_name":"stdout","time":15047.07561482,"data":"Step 3130 | Training Loss: 0.0000\r\n"}
|
| 350 |
+
,{"stream_name":"stdout","time":15095.633112375,"data":"Step 3140 | Training Loss: 0.0000\r\n"}
|
| 351 |
+
,{"stream_name":"stdout","time":15144.292614945,"data":"Step 3150 | Training Loss: 0.1742\r\n"}
|
| 352 |
+
,{"stream_name":"stdout","time":15193.053175799,"data":"Step 3160 | Training Loss: 0.0000\r\n"}
|
| 353 |
+
,{"stream_name":"stdout","time":15241.611262561,"data":"Step 3170 | Training Loss: 0.0041\r\n"}
|
| 354 |
+
,{"stream_name":"stdout","time":15290.581339617,"data":"Step 3180 | Training Loss: 0.0000\r\n"}
|
| 355 |
+
,{"stream_name":"stdout","time":15339.543645267,"data":"Step 3190 | Training Loss: 0.0227\r\n"}
|
| 356 |
+
,{"stream_name":"stdout","time":15388.31388081,"data":"Step 3200 | Training Loss: 0.0000\r\n"}
|
| 357 |
+
,{"stream_name":"stdout","time":15437.119334513,"data":"Step 3210 | Training Loss: 0.0346\r\n"}
|
| 358 |
+
,{"stream_name":"stdout","time":15485.627486757,"data":"Step 3220 | Training Loss: 0.0002\r\n"}
|
| 359 |
+
,{"stream_name":"stdout","time":15534.488478314,"data":"Step 3230 | Training Loss: 0.0016\r\n"}
|
| 360 |
+
,{"stream_name":"stdout","time":15583.155939828,"data":"Step 3240 | Training Loss: 0.0000\r\n"}
|
| 361 |
+
,{"stream_name":"stdout","time":15631.59462484,"data":"Step 3250 | Training Loss: 0.1968\r\n"}
|
| 362 |
+
,{"stream_name":"stdout","time":15679.937384263,"data":"Step 3260 | Training Loss: 0.0000\r\n"}
|
| 363 |
+
,{"stream_name":"stdout","time":15728.533847415,"data":"Step 3270 | Training Loss: 0.0158\r\n"}
|
| 364 |
+
,{"stream_name":"stdout","time":15777.087533043,"data":"Step 3280 | Training Loss: 0.0001\r\n"}
|
| 365 |
+
,{"stream_name":"stdout","time":15825.378473935,"data":"Step 3290 | Training Loss: 0.1573\r\n"}
|
| 366 |
+
,{"stream_name":"stdout","time":15873.685010863,"data":"Step 3300 | Training Loss: 0.0000\r\n"}
|
| 367 |
+
,{"stream_name":"stdout","time":15922.186115432,"data":"Step 3310 | Training Loss: 0.0178\r\n"}
|
| 368 |
+
,{"stream_name":"stdout","time":15970.693688866,"data":"Step 3320 | Training Loss: 0.0000\r\n"}
|
| 369 |
+
,{"stream_name":"stdout","time":16019.297793642,"data":"Step 3330 | Training Loss: 0.0231\r\n"}
|
| 370 |
+
,{"stream_name":"stdout","time":16067.803931815,"data":"Step 3340 | Training Loss: 0.0003\r\n"}
|
| 371 |
+
,{"stream_name":"stdout","time":16116.357650631,"data":"Step 3350 | Training Loss: 0.0002\r\n"}
|
| 372 |
+
,{"stream_name":"stdout","time":16164.759289263,"data":"Step 3360 | Training Loss: 0.0000\r\n"}
|
| 373 |
+
,{"stream_name":"stdout","time":16213.322592326,"data":"Step 3370 | Training Loss: 0.0724\r\n"}
|
| 374 |
+
,{"stream_name":"stdout","time":16262.092514709,"data":"Step 3380 | Training Loss: 0.0000\r\n"}
|
| 375 |
+
,{"stream_name":"stdout","time":16310.709926205,"data":"Step 3390 | Training Loss: 0.0320\r\n"}
|
| 376 |
+
,{"stream_name":"stdout","time":16359.507674907,"data":"Step 3400 | Training Loss: 0.0001\r\n"}
|
| 377 |
+
,{"stream_name":"stdout","time":16408.525705942,"data":"Step 3410 | Training Loss: 0.0659\r\n"}
|
| 378 |
+
,{"stream_name":"stdout","time":16457.345597751,"data":"Step 3420 | Training Loss: 0.0001\r\n"}
|
| 379 |
+
,{"stream_name":"stdout","time":16505.856659197,"data":"Step 3430 | Training Loss: 0.0882\r\n"}
|
| 380 |
+
,{"stream_name":"stdout","time":16554.629038762,"data":"Step 3440 | Training Loss: 0.0001\r\n"}
|
| 381 |
+
,{"stream_name":"stdout","time":16603.292428455,"data":"Step 3450 | Training Loss: 0.0303\r\n"}
|
| 382 |
+
,{"stream_name":"stdout","time":16651.682411049,"data":"Step 3460 | Training Loss: 0.0001\r\n"}
|
| 383 |
+
,{"stream_name":"stdout","time":16699.783609036,"data":"Step 3470 | Training Loss: 0.0018\r\n"}
|
| 384 |
+
,{"stream_name":"stdout","time":16748.287659474,"data":"Step 3480 | Training Loss: 0.0000\r\n"}
|
| 385 |
+
,{"stream_name":"stdout","time":16796.640797618,"data":"Step 3490 | Training Loss: 0.2110\r\n"}
|
| 386 |
+
,{"stream_name":"stdout","time":16844.938239026,"data":"Step 3500 | Training Loss: 0.0000\r\n"}
|
| 387 |
+
,{"stream_name":"stdout","time":16893.556886133,"data":"Step 3510 | Training Loss: 0.4730\r\n"}
|
| 388 |
+
,{"stream_name":"stdout","time":16942.176207499,"data":"Step 3520 | Training Loss: 0.0000\r\n"}
|
| 389 |
+
,{"stream_name":"stdout","time":16990.378816435,"data":"Step 3530 | Training Loss: 0.0251\r\n"}
|
| 390 |
+
,{"stream_name":"stdout","time":17038.68337353,"data":"Step 3540 | Training Loss: 0.0000\r\n"}
|
| 391 |
+
,{"stream_name":"stdout","time":17087.234967293,"data":"Step 3550 | Training Loss: 0.0082\r\n"}
|
| 392 |
+
,{"stream_name":"stdout","time":17135.492342606,"data":"Step 3560 | Training Loss: 0.0001\r\n"}
|
| 393 |
+
,{"stream_name":"stdout","time":17183.947815938,"data":"Step 3570 | Training Loss: 0.0221\r\n"}
|
| 394 |
+
,{"stream_name":"stdout","time":17232.61139734,"data":"Step 3580 | Training Loss: 0.0011\r\n"}
|
| 395 |
+
,{"stream_name":"stdout","time":17281.112377796,"data":"Step 3590 | Training Loss: 0.0272\r\n"}
|
| 396 |
+
,{"stream_name":"stdout","time":17329.56771638,"data":"Step 3600 | Training Loss: 0.0000\r\n"}
|
| 397 |
+
,{"stream_name":"stdout","time":17378.231160135,"data":"Step 3610 | Training Loss: 0.0345\r\n"}
|
| 398 |
+
,{"stream_name":"stdout","time":17426.904472048,"data":"Step 3620 | Training Loss: 0.0004\r\n"}
|
| 399 |
+
,{"stream_name":"stdout","time":17475.632677823,"data":"Step 3630 | Training Loss: 0.0056\r\n"}
|
| 400 |
+
,{"stream_name":"stdout","time":17524.547406387,"data":"Step 3640 | Training Loss: 0.0001\r\n"}
|
| 401 |
+
,{"stream_name":"stdout","time":17573.467743804,"data":"Step 3650 | Training Loss: 0.0252\r\n"}
|
| 402 |
+
,{"stream_name":"stdout","time":17622.181002064,"data":"Step 3660 | Training Loss: 0.0000\r\n"}
|
| 403 |
+
,{"stream_name":"stdout","time":17670.583890813,"data":"Step 3670 | Training Loss: 0.0409\r\n"}
|
| 404 |
+
,{"stream_name":"stdout","time":17718.990510163,"data":"Step 3680 | Training Loss: 0.0000\r\n"}
|
| 405 |
+
,{"stream_name":"stdout","time":17767.439034654,"data":"Step 3690 | Training Loss: 2.0241\r\n"}
|
| 406 |
+
,{"stream_name":"stdout","time":17815.987954397,"data":"Step 3700 | Training Loss: 0.0000\r\n"}
|
| 407 |
+
,{"stream_name":"stdout","time":17864.595691869,"data":"Step 3710 | Training Loss: 0.0006\r\n"}
|
| 408 |
+
,{"stream_name":"stdout","time":17913.259888888,"data":"Step 3720 | Training Loss: 0.0000\r\n"}
|
| 409 |
+
,{"stream_name":"stdout","time":17961.663746001,"data":"Step 3730 | Training Loss: 0.6780\r\n"}
|
| 410 |
+
,{"stream_name":"stdout","time":18010.105099133,"data":"Step 3740 | Training Loss: 0.0001\r\n"}
|
| 411 |
+
,{"stream_name":"stdout","time":18058.511524086,"data":"Step 3750 | Training Loss: 0.1512\r\n"}
|
| 412 |
+
,{"stream_name":"stdout","time":18106.859574272,"data":"Step 3760 | Training Loss: 0.0000\r\n"}
|
| 413 |
+
,{"stream_name":"stdout","time":18155.253863703,"data":"Step 3770 | Training Loss: 0.4934\r\n"}
|
| 414 |
+
,{"stream_name":"stdout","time":18203.626237196,"data":"Step 3780 | Training Loss: 0.0001\r\n"}
|
| 415 |
+
,{"stream_name":"stdout","time":18252.381601219,"data":"Step 3790 | Training Loss: 0.0246\r\n"}
|
| 416 |
+
,{"stream_name":"stdout","time":18300.687943228,"data":"Step 3800 | Training Loss: 0.0000\r\n"}
|
| 417 |
+
,{"stream_name":"stdout","time":18349.149597588,"data":"Step 3810 | Training Loss: 0.0523\r\n"}
|
| 418 |
+
,{"stream_name":"stdout","time":18397.70907493,"data":"Step 3820 | Training Loss: 0.0000\r\n"}
|
| 419 |
+
,{"stream_name":"stdout","time":18446.355006575,"data":"Step 3830 | Training Loss: 0.0001\r\n"}
|
| 420 |
+
,{"stream_name":"stdout","time":18494.818943225,"data":"Step 3840 | Training Loss: 0.0000\r\n"}
|
| 421 |
+
,{"stream_name":"stdout","time":18543.633929143,"data":"Step 3850 | Training Loss: 0.0863\r\n"}
|
| 422 |
+
,{"stream_name":"stdout","time":18592.023729011,"data":"Step 3860 | Training Loss: 0.0002\r\n"}
|
| 423 |
+
,{"stream_name":"stdout","time":18640.800434455,"data":"Step 3870 | Training Loss: 0.0430\r\n"}
|
| 424 |
+
,{"stream_name":"stdout","time":18689.610009973,"data":"Step 3880 | Training Loss: 0.0002\r\n"}
|
| 425 |
+
,{"stream_name":"stdout","time":18738.563947461,"data":"Step 3890 | Training Loss: 0.0335\r\n"}
|
| 426 |
+
,{"stream_name":"stdout","time":18787.631521217,"data":"Step 3900 | Training Loss: 0.0005\r\n"}
|
| 427 |
+
,{"stream_name":"stdout","time":18836.190670585,"data":"Step 3910 | Training Loss: 0.0301\r\n"}
|
| 428 |
+
,{"stream_name":"stdout","time":18884.700433541,"data":"Step 3920 | Training Loss: 0.0000\r\n"}
|
| 429 |
+
,{"stream_name":"stdout","time":18933.352551205,"data":"Step 3930 | Training Loss: 0.0009\r\n"}
|
| 430 |
+
,{"stream_name":"stdout","time":18981.761953163,"data":"Step 3940 | Training Loss: 0.0000\r\n"}
|
| 431 |
+
,{"stream_name":"stdout","time":19030.275188847,"data":"Step 3950 | Training Loss: 0.0433\r\n"}
|
| 432 |
+
,{"stream_name":"stdout","time":19079.138489376,"data":"Step 3960 | Training Loss: 0.0000\r\n"}
|
| 433 |
+
,{"stream_name":"stdout","time":19127.95942962,"data":"Step 3970 | Training Loss: 0.0300\r\n"}
|
| 434 |
+
,{"stream_name":"stdout","time":19176.813602528,"data":"Step 3980 | Training Loss: 0.0000\r\n"}
|
| 435 |
+
,{"stream_name":"stdout","time":19225.381677137,"data":"Step 3990 | Training Loss: 0.3666\r\n"}
|
| 436 |
+
,{"stream_name":"stdout","time":19274.033597687,"data":"Step 4000 | Training Loss: 0.0000\r\n"}
|
| 437 |
+
,{"stream_name":"stdout","time":19330.370327348,"data":"Step 4010 | Training Loss: 0.0014\r\n"}
|
| 438 |
+
,{"stream_name":"stdout","time":19379.187591185,"data":"Step 4020 | Training Loss: 0.0000\r\n"}
|
| 439 |
+
,{"stream_name":"stdout","time":19428.03854059,"data":"Step 4030 | Training Loss: 0.0352\r\n"}
|
| 440 |
+
,{"stream_name":"stdout","time":19476.858693396,"data":"Step 4040 | Training Loss: 0.0000\r\n"}
|
| 441 |
+
,{"stream_name":"stdout","time":19525.556636186,"data":"Step 4050 | Training Loss: 0.0020\r\n"}
|
| 442 |
+
,{"stream_name":"stdout","time":19574.156651536,"data":"Step 4060 | Training Loss: 0.0000\r\n"}
|
| 443 |
+
,{"stream_name":"stdout","time":19623.019614237,"data":"Step 4070 | Training Loss: 0.4625\r\n"}
|
| 444 |
+
,{"stream_name":"stdout","time":19671.77280065,"data":"Step 4080 | Training Loss: 0.0000\r\n"}
|
| 445 |
+
,{"stream_name":"stdout","time":19720.323045235,"data":"Step 4090 | Training Loss: 0.3424\r\n"}
|
| 446 |
+
,{"stream_name":"stdout","time":19769.090285045,"data":"Step 4100 | Training Loss: 0.0000\r\n"}
|
| 447 |
+
,{"stream_name":"stdout","time":19817.84300973,"data":"Step 4110 | Training Loss: 0.0007\r\n"}
|
| 448 |
+
,{"stream_name":"stdout","time":19866.453379974,"data":"Step 4120 | Training Loss: 0.0000\r\n"}
|
| 449 |
+
,{"stream_name":"stdout","time":19915.362297406,"data":"Step 4130 | Training Loss: 0.2989\r\n"}
|
| 450 |
+
,{"stream_name":"stdout","time":19964.285596167,"data":"Step 4140 | Training Loss: 0.0003\r\n"}
|
| 451 |
+
,{"stream_name":"stdout","time":20013.242861472,"data":"Step 4150 | Training Loss: 0.0118\r\n"}
|
| 452 |
+
,{"stream_name":"stdout","time":20061.893159716,"data":"Step 4160 | Training Loss: 0.0001\r\n"}
|
| 453 |
+
,{"stream_name":"stdout","time":20110.38751996,"data":"Step 4170 | Training Loss: 0.0295\r\n"}
|
| 454 |
+
,{"stream_name":"stdout","time":20158.940734733,"data":"Step 4180 | Training Loss: 0.0000\r\n"}
|
| 455 |
+
,{"stream_name":"stdout","time":20207.34005143,"data":"Step 4190 | Training Loss: 1.0331\r\n"}
|
| 456 |
+
,{"stream_name":"stdout","time":20255.476342927,"data":"Step 4200 | Training Loss: 0.0000\r\n"}
|
| 457 |
+
,{"stream_name":"stdout","time":20304.182768136,"data":"Step 4210 | Training Loss: 0.0557\r\n"}
|
| 458 |
+
,{"stream_name":"stdout","time":20353.063785969,"data":"Step 4220 | Training Loss: 0.0001\r\n"}
|
| 459 |
+
,{"stream_name":"stdout","time":20401.928579941,"data":"Step 4230 | Training Loss: 0.7705\r\n"}
|
| 460 |
+
,{"stream_name":"stdout","time":20450.695908392,"data":"Step 4240 | Training Loss: 0.0005\r\n"}
|
| 461 |
+
,{"stream_name":"stdout","time":20499.807874689,"data":"Step 4250 | Training Loss: 0.0817\r\n"}
|
| 462 |
+
,{"stream_name":"stdout","time":20548.925492079,"data":"Step 4260 | Training Loss: 0.0001\r\n"}
|
| 463 |
+
,{"stream_name":"stdout","time":20597.631906868,"data":"Step 4270 | Training Loss: 0.0491\r\n"}
|
| 464 |
+
,{"stream_name":"stdout","time":20646.238855144,"data":"Step 4280 | Training Loss: 0.0000\r\n"}
|
| 465 |
+
,{"stream_name":"stdout","time":20695.051173656,"data":"Step 4290 | Training Loss: 0.3100\r\n"}
|
| 466 |
+
,{"stream_name":"stdout","time":20743.857149025,"data":"Step 4300 | Training Loss: 0.0000\r\n"}
|
| 467 |
+
,{"stream_name":"stdout","time":20792.416636927,"data":"Step 4310 | Training Loss: 0.3296\r\n"}
|
| 468 |
+
,{"stream_name":"stdout","time":20840.806554702,"data":"Step 4320 | Training Loss: 0.0000\r\n"}
|
| 469 |
+
,{"stream_name":"stdout","time":20889.370268489,"data":"Step 4330 | Training Loss: 0.8703\r\n"}
|
| 470 |
+
,{"stream_name":"stdout","time":20937.975335165,"data":"Step 4340 | Training Loss: 0.0000\r\n"}
|
| 471 |
+
,{"stream_name":"stdout","time":20986.373670474,"data":"Step 4350 | Training Loss: 0.0738\r\n"}
|
| 472 |
+
,{"stream_name":"stdout","time":21034.714888468,"data":"Step 4360 | Training Loss: 0.0000\r\n"}
|
| 473 |
+
,{"stream_name":"stdout","time":21083.321178741,"data":"Step 4370 | Training Loss: 0.0085\r\n"}
|
| 474 |
+
,{"stream_name":"stdout","time":21131.91588219,"data":"Step 4380 | Training Loss: 0.0000\r\n"}
|
| 475 |
+
,{"stream_name":"stdout","time":21180.50922574,"data":"Step 4390 | Training Loss: 0.1303\r\n"}
|
| 476 |
+
,{"stream_name":"stdout","time":21229.468677627,"data":"Step 4400 | Training Loss: 0.0000\r\n"}
|
| 477 |
+
,{"stream_name":"stdout","time":21278.185678317,"data":"Step 4410 | Training Loss: 0.0038\r\n"}
|
| 478 |
+
,{"stream_name":"stdout","time":21326.692733082,"data":"Step 4420 | Training Loss: 0.0044\r\n"}
|
| 479 |
+
,{"stream_name":"stdout","time":21375.246292812,"data":"Step 4430 | Training Loss: 0.0003\r\n"}
|
| 480 |
+
,{"stream_name":"stdout","time":21423.953765953,"data":"Step 4440 | Training Loss: 0.0000\r\n"}
|
| 481 |
+
,{"stream_name":"stdout","time":21472.550855928,"data":"Step 4450 | Training Loss: 0.0006\r\n"}
|
| 482 |
+
,{"stream_name":"stdout","time":21521.000525125,"data":"Step 4460 | Training Loss: 0.0000\r\n"}
|
| 483 |
+
,{"stream_name":"stdout","time":21569.606979192,"data":"Step 4470 | Training Loss: 0.0084\r\n"}
|
| 484 |
+
,{"stream_name":"stdout","time":21618.315299901,"data":"Step 4480 | Training Loss: 0.0001\r\n"}
|
| 485 |
+
,{"stream_name":"stdout","time":21666.952651797,"data":"Step 4490 | Training Loss: 0.0307\r\n"}
|
| 486 |
+
,{"stream_name":"stdout","time":21715.658911061,"data":"Step 4500 | Training Loss: 0.0001\r\n"}
|
| 487 |
+
,{"stream_name":"stdout","time":21764.462529606,"data":"Step 4510 | Training Loss: 0.0135\r\n"}
|
| 488 |
+
,{"stream_name":"stdout","time":21813.268043595,"data":"Step 4520 | Training Loss: 0.0000\r\n"}
|
| 489 |
+
,{"stream_name":"stdout","time":21861.91300263,"data":"Step 4530 | Training Loss: 0.0000\r\n"}
|
| 490 |
+
,{"stream_name":"stdout","time":21910.358940365,"data":"Step 4540 | Training Loss: 0.0001\r\n"}
|
| 491 |
+
,{"stream_name":"stdout","time":21959.32275268,"data":"Step 4550 | Training Loss: 0.0099\r\n"}
|
| 492 |
+
,{"stream_name":"stdout","time":22008.177645857,"data":"Step 4560 | Training Loss: 0.0000\r\n"}
|
| 493 |
+
,{"stream_name":"stdout","time":22057.189592942,"data":"Step 4570 | Training Loss: 0.0401\r\n"}
|
| 494 |
+
,{"stream_name":"stdout","time":22105.891542051,"data":"Step 4580 | Training Loss: 0.0000\r\n"}
|
| 495 |
+
,{"stream_name":"stdout","time":22154.792730208,"data":"Step 4590 | Training Loss: 0.0001\r\n"}
|
| 496 |
+
,{"stream_name":"stdout","time":22203.847658097,"data":"Step 4600 | Training Loss: 0.0000\r\n"}
|
| 497 |
+
,{"stream_name":"stdout","time":22252.846417982,"data":"Step 4610 | Training Loss: 0.0331\r\n"}
|
| 498 |
+
,{"stream_name":"stdout","time":22301.957604286,"data":"Step 4620 | Training Loss: 0.0000\r\n"}
|
| 499 |
+
,{"stream_name":"stdout","time":22350.860246187,"data":"Step 4630 | Training Loss: 0.0002\r\n"}
|
| 500 |
+
,{"stream_name":"stdout","time":22400.127660066,"data":"Step 4640 | Training Loss: 0.0000\r\n"}
|
| 501 |
+
,{"stream_name":"stdout","time":22449.012001906,"data":"Step 4650 | Training Loss: 0.0001\r\n"}
|
| 502 |
+
,{"stream_name":"stdout","time":22498.25958275,"data":"Step 4660 | Training Loss: 0.0000\r\n"}
|
| 503 |
+
,{"stream_name":"stdout","time":22546.854144365,"data":"Step 4670 | Training Loss: 0.7391\r\n"}
|
| 504 |
+
,{"stream_name":"stdout","time":22595.393059861,"data":"Step 4680 | Training Loss: 0.0000\r\n"}
|
| 505 |
+
,{"stream_name":"stdout","time":22644.051023849,"data":"Step 4690 | Training Loss: 0.0001\r\n"}
|
| 506 |
+
,{"stream_name":"stdout","time":22692.755234438,"data":"Step 4700 | Training Loss: 0.0000\r\n"}
|
| 507 |
+
,{"stream_name":"stdout","time":22741.002387208,"data":"Step 4710 | Training Loss: 0.2675\r\n"}
|
| 508 |
+
,{"stream_name":"stdout","time":22789.703559045,"data":"Step 4720 | Training Loss: 0.0000\r\n"}
|
| 509 |
+
,{"stream_name":"stdout","time":22838.215702733,"data":"Step 4730 | Training Loss: 0.0001\r\n"}
|
| 510 |
+
,{"stream_name":"stdout","time":22886.819238464,"data":"Step 4740 | Training Loss: 0.0000\r\n"}
|
| 511 |
+
,{"stream_name":"stdout","time":22935.308395738,"data":"Step 4750 | Training Loss: 0.1630\r\n"}
|
| 512 |
+
,{"stream_name":"stdout","time":22984.006914862,"data":"Step 4760 | Training Loss: 0.0000\r\n"}
|
| 513 |
+
,{"stream_name":"stdout","time":23032.758845248,"data":"Step 4770 | Training Loss: 0.0497\r\n"}
|
| 514 |
+
,{"stream_name":"stdout","time":23081.257674891,"data":"Step 4780 | Training Loss: 0.0000\r\n"}
|
| 515 |
+
,{"stream_name":"stdout","time":23129.759012584,"data":"Step 4790 | Training Loss: 0.0221\r\n"}
|
| 516 |
+
,{"stream_name":"stdout","time":23178.274305482,"data":"Step 4800 | Training Loss: 0.0000\r\n"}
|
| 517 |
+
,{"stream_name":"stdout","time":23226.836600616,"data":"Step 4810 | Training Loss: 0.0692\r\n"}
|
| 518 |
+
,{"stream_name":"stdout","time":23275.239922901,"data":"Step 4820 | Training Loss: 0.0000\r\n"}
|
| 519 |
+
,{"stream_name":"stdout","time":23323.787158774,"data":"Step 4830 | Training Loss: 0.0002\r\n"}
|
| 520 |
+
,{"stream_name":"stdout","time":23372.339226258,"data":"Step 4840 | Training Loss: 0.0000\r\n"}
|
| 521 |
+
,{"stream_name":"stdout","time":23420.799108206,"data":"Step 4850 | Training Loss: 0.0034\r\n"}
|
| 522 |
+
,{"stream_name":"stdout","time":23469.234119704,"data":"Step 4860 | Training Loss: 0.0000\r\n"}
|
| 523 |
+
,{"stream_name":"stdout","time":23517.737093164,"data":"Step 4870 | Training Loss: 0.1528\r\n"}
|
| 524 |
+
,{"stream_name":"stdout","time":23566.332646771,"data":"Step 4880 | Training Loss: 0.0000\r\n"}
|
| 525 |
+
,{"stream_name":"stdout","time":23614.829191387,"data":"Step 4890 | Training Loss: 0.0390\r\n"}
|
| 526 |
+
,{"stream_name":"stdout","time":23665.269573262,"data":"Step 4900 | Training Loss: 0.0000\r\n"}
|
| 527 |
+
,{"stream_name":"stdout","time":23714.086731517,"data":"Step 4910 | Training Loss: 0.6370\r\n"}
|
| 528 |
+
,{"stream_name":"stdout","time":23762.834485533,"data":"Step 4920 | Training Loss: 0.0000\r\n"}
|
| 529 |
+
,{"stream_name":"stdout","time":23811.444254035,"data":"Step 4930 | Training Loss: 0.0010\r\n"}
|
| 530 |
+
,{"stream_name":"stdout","time":23860.101406718,"data":"Step 4940 | Training Loss: 0.0000\r\n"}
|
| 531 |
+
,{"stream_name":"stdout","time":23908.594611574,"data":"Step 4950 | Training Loss: 0.8518\r\n"}
|
| 532 |
+
,{"stream_name":"stdout","time":23957.158840517,"data":"Step 4960 | Training Loss: 0.0000\r\n"}
|
| 533 |
+
,{"stream_name":"stdout","time":24005.355271545,"data":"Step 4970 | Training Loss: 0.1041\r\n"}
|
| 534 |
+
,{"stream_name":"stdout","time":24053.912362779,"data":"Step 4980 | Training Loss: 0.0002\r\n"}
|
| 535 |
+
,{"stream_name":"stdout","time":24102.613278632,"data":"Step 4990 | Training Loss: 0.0001\r\n"}
|
| 536 |
+
,{"stream_name":"stdout","time":24151.104612433,"data":"Step 5000 | Training Loss: 0.0000\r\n"}
|
| 537 |
+
,{"stream_name":"stderr","time":24177.244250522,"data":"/usr/local/lib/python3.12/dist-packages/mistune.py:435: SyntaxWarning: invalid escape sequence '\\|'\n"}
|
| 538 |
+
,{"stream_name":"stderr","time":24177.244291509,"data":" cells[i][c] = re.sub('\\\\\\\\\\|', '|', cell)\n"}
|
| 539 |
+
,{"stream_name":"stderr","time":24177.548979868,"data":"/usr/local/lib/python3.12/dist-packages/nbconvert/filters/filter_links.py:36: SyntaxWarning: invalid escape sequence '\\_'\n"}
|
| 540 |
+
,{"stream_name":"stderr","time":24177.549016548,"data":" text = re.sub(r'_', '\\_', text) # Escape underscores in display text\n"}
|
| 541 |
+
,{"stream_name":"stderr","time":24178.904491192,"data":"[NbConvertApp] Converting notebook __notebook__.ipynb to notebook\n"}
|
| 542 |
+
,{"stream_name":"stderr","time":24179.249568123,"data":"[NbConvertApp] Writing 37815 bytes to __notebook__.ipynb\n"}
|
| 543 |
+
,{"stream_name":"stderr","time":24181.514947994,"data":"[NbConvertApp] Converting notebook __notebook__.ipynb to html\n"}
|
| 544 |
+
,{"stream_name":"stderr","time":24182.626672929,"data":"[NbConvertApp] Writing 318940 bytes to __results__.html\n"}
|
| 545 |
+
]
|
djalokd/hexa1b/__results__.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
djalokd/hexa1b/custom.css
ADDED
|
File without changes
|
djalokd/hexa1b/hexa_1b_final.nef
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e974bc8dd55a23f499990308ef0ee5727dfe374094665a123d6b75af884d677
|
| 3 |
+
size 2430849707
|
djalokd/hexa1b/model-step-1000.nef
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f3b3cf875c8931847d81e7a7302e691a016ebdd09d0d2517978782793364205
|
| 3 |
+
size 2430850107
|
djalokd/hexa1b/model-step-2000.nef
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cc87d62ebd38d7dd7ce2c519cd0c0e03a119269d09438605734558292173b84
|
| 3 |
+
size 2430850107
|
djalokd/hexa1b/model-step-3000.nef
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c954f9a3b947f566843d2b60360fffb5244f1822f0eb275359d7b040e55c8c9
|
| 3 |
+
size 2430850107
|
djalokd/hexa1b/model-step-4000.nef
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9955a409f1a3a28a3077700cfb09e44f0f650e5c14be03b96a3b98d466066101
|
| 3 |
+
size 2430850107
|
djalokd/hexa1b/model-step-5000.nef
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bb7398cbec2d55539dd25624c12b63de8dcb90a080905708ef64990d1b902e1
|
| 3 |
+
size 2430850107
|