a12354 commited on
Commit
7d22995
Β·
verified Β·
1 Parent(s): 8d2b389

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. img/architecture.png +3 -0
  3. rats40k_adapter/README.md +70 -0
  4. rats40k_adapter/eval_rats40k.py +244 -0
  5. rats40k_adapter/finetune_rats40k_lora.py +314 -0
  6. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/README.md +202 -0
  7. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/adapter_config.json +37 -0
  8. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/added_tokens.json +0 -0
  9. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/rng_state_0.pth +3 -0
  10. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/rng_state_1.pth +3 -0
  11. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/rng_state_2.pth +3 -0
  12. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/rng_state_3.pth +3 -0
  13. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/scaler.pt +3 -0
  14. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/scheduler.pt +3 -0
  15. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/tokenizer.model +3 -0
  16. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/training_args.bin +3 -0
  17. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/added_tokens.json +0 -0
  18. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/scheduler.pt +3 -0
  19. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/special_tokens_map.json +30 -0
  20. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/tokenizer.model +3 -0
  21. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/tokenizer_config.json +0 -0
  22. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/trainer_state.json +692 -0
  23. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/training_args.bin +3 -0
  24. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/special_tokens_map.json +30 -0
  25. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/tokenizer.json +0 -0
  26. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/tokenizer.model +3 -0
  27. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/tokenizer_config.json +0 -0
  28. rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/training_args.bin +3 -0
  29. rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/RATs-Uni-TSImage_Reason_Reason_by_chattime_sft.json +0 -0
  30. rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/TSAD_test_metrics.json +16 -0
  31. rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_sft.rank0.json +0 -0
  32. rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_sft.rank1.json +0 -0
  33. rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_sft.rank2.json +0 -0
  34. rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_sft.rank3.json +0 -0
  35. rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/RATs-Uni-TSImage_Reason_Reason_by_chattime_zeroshot.json +0 -0
  36. rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/TSAD_test_metrics.json +16 -0
  37. rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_zeroshot.rank0.json +0 -0
  38. rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_zeroshot.rank1.json +0 -0
  39. rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_zeroshot.rank2.json +0 -0
  40. rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_zeroshot.rank3.json +0 -0
  41. rats40k_adapter/rats40k_common.py +347 -0
  42. rats40k_adapter/run_sft_4gpu.sh +145 -0
  43. rats40k_adapter/run_zeroshot_4gpu.sh +81 -0
  44. rats40k_adapter/run_zeroshot_then_sft_4gpu.sh +75 -0
  45. training/finetune.py +133 -0
  46. training/finetune.sh +37 -0
  47. training/pretrain.py +154 -0
  48. training/pretrain.sh +37 -0
  49. tsqa_adapter/logs/sft_4gpu_20260615_140322.log +875 -0
  50. tsqa_adapter/logs/sft_4gpu_20260615_141604.log +210 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ img/architecture.png filter=lfs diff=lfs merge=lfs -text
img/architecture.png ADDED

Git LFS Details

  • SHA256: acc14adddd5e8986d8857509d1f3f731020ee423970c0848e488b865c6c6231b
  • Pointer size: 131 Bytes
  • Size of remote file: 310 kB
rats40k_adapter/README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RATs40K Adapter for ChatTime
2
+
3
+ This folder adapts ChatTime to the RATs40K univariate anomaly QA task.
4
+
5
+ It intentionally uses the numeric `Observation` field only. It does not use
6
+ `FigurePath`, so the resulting baseline should be compared with numeric-only
7
+ Time-RA settings rather than VLM image-input settings.
8
+
9
+ ## Required inputs
10
+
11
+ - `MODEL_PATH`: local ChatTime model directory. This is required by default.
12
+ - `PYTHON_BIN`: Python executable. The shell scripts default to
13
+ `/dev/shm/suiqk/conda_envs/scalerag-ts-v4/bin/python`.
14
+ - `DATA_PATH`: defaults to
15
+ `/mnt/share01/sqk/datasets/RATs40K/RATs-Uni-TSImage_Reason.json`.
16
+
17
+ The scripts do not download HuggingFace weights unless `ALLOW_HF_DOWNLOAD=1`
18
+ is set explicitly.
19
+
20
+ The default precision is FP16 because the configured four-GPU machine uses
21
+ Tesla V100 GPUs. SFT defaults to regular FP16 LoRA with
22
+ `LOAD_IN_4BIT=0`, `PER_DEVICE_TRAIN_BATCH_SIZE=1`, and
23
+ `GRADIENT_ACCUMULATION_STEPS=16`.
24
+
25
+ Evaluation defaults to `EVAL_BATCH_SIZE=4` per GPU. With four GPUs, the
26
+ maximum global evaluation batch size is 16.
27
+
28
+ The task prompt is aligned with Time-RA's univariate
29
+ `USER_DETECTION_PROMPT`; ChatTime still receives the normalized/discretized
30
+ series through its native `### Input` section. Evaluation checks prompt token
31
+ lengths and fails instead of truncating. Defaults are `MAX_INPUT_TOKENS=3936`,
32
+ `MAX_NEW_TOKENS=160`, and `MAX_SEQ_LENGTH=4096` for SFT.
33
+
34
+ ## Zero-shot
35
+
36
+ ```bash
37
+ cd /mnt/share01/sqk/ChatTime
38
+ MODEL_PATH=/mnt/share01/sqk/models/ChatTime-1-7B-Chat \
39
+ bash rats40k_adapter/run_zeroshot_4gpu.sh
40
+ ```
41
+
42
+ ## SFT + Eval
43
+
44
+ ```bash
45
+ cd /mnt/share01/sqk/ChatTime
46
+ MODEL_PATH=/mnt/share01/sqk/models/ChatTime-1-7B-Chat \
47
+ bash rats40k_adapter/run_sft_4gpu.sh
48
+ ```
49
+
50
+ ## Zero-shot Then SFT + Eval
51
+
52
+ ```bash
53
+ cd /mnt/share01/sqk/ChatTime
54
+ bash rats40k_adapter/run_zeroshot_then_sft_4gpu.sh
55
+ ```
56
+
57
+ ## Saved Results
58
+
59
+ - Zero-shot outputs:
60
+ `/mnt/share01/sqk/ChatTime/rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot`
61
+ - SFT outputs:
62
+ `/mnt/share01/sqk/ChatTime/rats40k_adapter/outputs/pipeline_20260608_175250/sft`
63
+
64
+ Useful smoke-test knobs:
65
+
66
+ ```bash
67
+ MAX_TRAIN_SAMPLES=128 MAX_EVAL_SAMPLES=64 bash rats40k_adapter/run_zeroshot_then_sft_4gpu.sh
68
+ MAX_TRAIN_SAMPLES=128 MAX_EVAL_SAMPLES=64 bash rats40k_adapter/run_sft_4gpu.sh
69
+ MAX_EVAL_SAMPLES=64 bash rats40k_adapter/run_zeroshot_4gpu.sh
70
+ ```
rats40k_adapter/eval_rats40k.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import copy
3
+ import os
4
+ from pathlib import Path
5
+
6
+ import torch
7
+ import torch.distributed as dist
8
+ from tqdm import tqdm
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer
10
+
11
+ from rats40k_common import (
12
+ atomic_write_json,
13
+ build_prediction,
14
+ build_prompt,
15
+ compute_metrics,
16
+ load_dataset_json,
17
+ valid_split_items,
18
+ )
19
+
20
+
21
+ def parse_args():
22
+ parser = argparse.ArgumentParser(
23
+ description="Evaluate ChatTime on RATs40K univariate anomaly QA."
24
+ )
25
+ parser.add_argument("--model_path", required=True)
26
+ parser.add_argument("--adapter_path", default=None)
27
+ parser.add_argument(
28
+ "--data_path",
29
+ default="/mnt/share01/sqk/datasets/RATs40K/RATs-Uni-TSImage_Reason.json",
30
+ )
31
+ parser.add_argument("--split", default="TSAD_test")
32
+ parser.add_argument("--output_dir", required=True)
33
+ parser.add_argument("--result_name", default=None)
34
+ parser.add_argument("--max_eval_samples", type=int, default=None)
35
+ parser.add_argument("--eval_batch_size", type=int, default=4)
36
+ parser.add_argument("--max_input_tokens", type=int, default=3936)
37
+ parser.add_argument("--max_new_tokens", type=int, default=160)
38
+ parser.add_argument("--temperature", type=float, default=0.0)
39
+ parser.add_argument("--top_p", type=float, default=1.0)
40
+ parser.add_argument("--top_k", type=int, default=50)
41
+ parser.add_argument("--torch_dtype", choices=["auto", "bf16", "fp16", "fp32"], default="fp16")
42
+ parser.add_argument("--allow_hf_download", action="store_true")
43
+ return parser.parse_args()
44
+
45
+
46
+ def init_distributed():
47
+ world_size = int(os.environ.get("WORLD_SIZE", "1"))
48
+ rank = int(os.environ.get("RANK", "0"))
49
+ local_rank = int(os.environ.get("LOCAL_RANK", "0"))
50
+ if torch.cuda.is_available():
51
+ torch.cuda.set_device(local_rank)
52
+ if world_size > 1 and not dist.is_initialized():
53
+ dist.init_process_group(backend="nccl")
54
+ return rank, local_rank, world_size
55
+
56
+
57
+ def dtype_from_arg(value):
58
+ if value == "auto":
59
+ return "auto"
60
+ if value == "bf16":
61
+ return torch.bfloat16
62
+ if value == "fp16":
63
+ return torch.float16
64
+ return torch.float32
65
+
66
+
67
+ def load_model_and_tokenizer(args, local_rank):
68
+ local_files_only = not args.allow_hf_download
69
+ tokenizer = AutoTokenizer.from_pretrained(
70
+ args.model_path,
71
+ trust_remote_code=True,
72
+ local_files_only=local_files_only,
73
+ )
74
+ if tokenizer.pad_token is None:
75
+ tokenizer.pad_token = tokenizer.eos_token
76
+ tokenizer.padding_side = "left"
77
+
78
+ device = f"cuda:{local_rank}" if torch.cuda.is_available() else "cpu"
79
+ model = AutoModelForCausalLM.from_pretrained(
80
+ args.model_path,
81
+ trust_remote_code=True,
82
+ torch_dtype=dtype_from_arg(args.torch_dtype),
83
+ low_cpu_mem_usage=True,
84
+ device_map={"": device} if torch.cuda.is_available() else None,
85
+ local_files_only=local_files_only,
86
+ )
87
+
88
+ if args.adapter_path:
89
+ try:
90
+ from peft import PeftModel
91
+ except Exception as exc:
92
+ raise RuntimeError(
93
+ "peft is required when --adapter_path is provided. "
94
+ "Install peft in the selected Python environment."
95
+ ) from exc
96
+ model = PeftModel.from_pretrained(
97
+ model,
98
+ args.adapter_path,
99
+ local_files_only=local_files_only,
100
+ )
101
+
102
+ model.eval()
103
+ return model, tokenizer, device
104
+
105
+
106
+ def generate_responses(model, tokenizer, device, prompts, args):
107
+ raw_encodings = tokenizer(
108
+ prompts,
109
+ add_special_tokens=True,
110
+ truncation=False,
111
+ )
112
+ prompt_lengths = [len(input_ids) for input_ids in raw_encodings["input_ids"]]
113
+ max_prompt_length = max(prompt_lengths) if prompt_lengths else 0
114
+ if max_prompt_length > args.max_input_tokens:
115
+ longest = max(range(len(prompt_lengths)), key=lambda idx: prompt_lengths[idx])
116
+ raise RuntimeError(
117
+ "Prompt token length exceeds max_input_tokens. "
118
+ f"max_prompt_length={max_prompt_length}, "
119
+ f"max_input_tokens={args.max_input_tokens}, "
120
+ f"batch_index={longest}. Increase MAX_INPUT_TOKENS or shorten the prompt."
121
+ )
122
+
123
+ model_context = getattr(model.config, "max_position_embeddings", None)
124
+ if model_context and max_prompt_length + args.max_new_tokens > model_context:
125
+ raise RuntimeError(
126
+ "Prompt plus generation budget exceeds model context length. "
127
+ f"max_prompt_length={max_prompt_length}, "
128
+ f"max_new_tokens={args.max_new_tokens}, "
129
+ f"model_context={model_context}. "
130
+ "Lower MAX_NEW_TOKENS or MAX_INPUT_TOKENS."
131
+ )
132
+
133
+ inputs = tokenizer(
134
+ prompts,
135
+ return_tensors="pt",
136
+ padding=True,
137
+ truncation=False,
138
+ )
139
+ inputs = {key: value.to(device) for key, value in inputs.items()}
140
+ do_sample = args.temperature > 0
141
+ generation_config = copy.deepcopy(model.generation_config)
142
+ generation_config.do_sample = do_sample
143
+ generation_config.pad_token_id = tokenizer.pad_token_id
144
+ generation_config.eos_token_id = tokenizer.eos_token_id
145
+ if do_sample:
146
+ generation_config.temperature = args.temperature
147
+ generation_config.top_p = args.top_p
148
+ generation_config.top_k = args.top_k
149
+ else:
150
+ generation_config.temperature = None
151
+ generation_config.top_p = None
152
+ generation_config.top_k = None
153
+
154
+ generation_kwargs = {
155
+ "max_new_tokens": args.max_new_tokens,
156
+ }
157
+ with torch.inference_mode():
158
+ output = model.generate(
159
+ **inputs,
160
+ generation_config=generation_config,
161
+ **generation_kwargs,
162
+ )
163
+ new_tokens = output[:, inputs["input_ids"].shape[-1] :]
164
+ return [
165
+ response.strip()
166
+ for response in tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
167
+ ]
168
+
169
+
170
+ def main():
171
+ args = parse_args()
172
+ rank, local_rank, world_size = init_distributed()
173
+ output_dir = Path(args.output_dir)
174
+ shard_dir = output_dir / "shards"
175
+ shard_dir.mkdir(parents=True, exist_ok=True)
176
+
177
+ data = load_dataset_json(args.data_path)
178
+ items = valid_split_items(data, args.split)
179
+ if args.max_eval_samples is not None and args.max_eval_samples >= 0:
180
+ items = items[: args.max_eval_samples]
181
+ shard_items = items[rank::world_size]
182
+
183
+ if rank == 0:
184
+ print(f"Dataset: {args.data_path}")
185
+ print(f"Split: {args.split}")
186
+ print(f"Total samples: {len(items)}")
187
+ print(f"World size: {world_size}")
188
+ print(f"Per-device eval batch size: {args.eval_batch_size}")
189
+ print(f"Maximum global eval batch size: {args.eval_batch_size * world_size}")
190
+ print(f"Output dir: {output_dir}")
191
+
192
+ model, tokenizer, device = load_model_and_tokenizer(args, local_rank)
193
+
194
+ predictions = {}
195
+ batch_size = max(1, args.eval_batch_size)
196
+ batch_starts = range(0, len(shard_items), batch_size)
197
+ for start in tqdm(
198
+ batch_starts,
199
+ total=(len(shard_items) + batch_size - 1) // batch_size,
200
+ desc=f"rank {rank}",
201
+ disable=rank != 0,
202
+ ):
203
+ batch_items = shard_items[start : start + batch_size]
204
+ prompts = [
205
+ build_prompt(item["Observation"], item.get("Source"))
206
+ for _, item in batch_items
207
+ ]
208
+ responses = generate_responses(model, tokenizer, device, prompts, args)
209
+ for (idx, _), response in zip(batch_items, responses):
210
+ predictions[idx] = build_prediction(response)
211
+
212
+ result_name = args.result_name or f"{args.split}_predictions.json"
213
+ shard_path = shard_dir / f"{Path(result_name).stem}.rank{rank}.json"
214
+ atomic_write_json(predictions, shard_path)
215
+
216
+ if world_size > 1:
217
+ dist.barrier()
218
+
219
+ if rank == 0:
220
+ merged = {}
221
+ for shard_rank in range(world_size):
222
+ path = shard_dir / f"{Path(result_name).stem}.rank{shard_rank}.json"
223
+ shard = load_dataset_json(path)
224
+ merged.update(shard)
225
+ def sort_key(pair):
226
+ key = pair[0]
227
+ return (0, int(key)) if key.isdigit() else (1, key)
228
+
229
+ merged = dict(sorted(merged.items(), key=sort_key))
230
+
231
+ result_path = output_dir / result_name
232
+ metrics_path = output_dir / f"{args.split}_metrics.json"
233
+ atomic_write_json({args.split: merged}, result_path)
234
+ atomic_write_json(compute_metrics(data, merged, args.split), metrics_path)
235
+ print(f"Saved predictions: {result_path}")
236
+ print(f"Saved metrics: {metrics_path}")
237
+
238
+ if world_size > 1:
239
+ dist.barrier()
240
+ dist.destroy_process_group()
241
+
242
+
243
+ if __name__ == "__main__":
244
+ main()
rats40k_adapter/finetune_rats40k_lora.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import inspect
3
+ import os
4
+ from pathlib import Path
5
+
6
+ import torch
7
+ from torch.utils.data import Dataset
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
9
+
10
+ from rats40k_common import build_prompt, build_response, load_dataset_json, valid_split_items
11
+
12
+
13
+ def patch_accelerate_compatibility():
14
+ from accelerate import Accelerator
15
+
16
+ signature = inspect.signature(Accelerator.unwrap_model)
17
+ if "keep_torch_compile" in signature.parameters:
18
+ return
19
+
20
+ original_unwrap_model = Accelerator.unwrap_model
21
+
22
+ def unwrap_model_compat(
23
+ self,
24
+ model,
25
+ keep_fp32_wrapper=True,
26
+ keep_torch_compile=True,
27
+ ):
28
+ del keep_torch_compile
29
+ return original_unwrap_model(
30
+ self,
31
+ model,
32
+ keep_fp32_wrapper=keep_fp32_wrapper,
33
+ )
34
+
35
+ Accelerator.unwrap_model = unwrap_model_compat
36
+ print(
37
+ "Applied accelerate compatibility patch: "
38
+ "Accelerator.unwrap_model accepts keep_torch_compile."
39
+ )
40
+
41
+
42
+ class PromptResponseDataset(Dataset):
43
+ def __init__(self, rows, tokenizer, max_seq_length):
44
+ self.rows = rows
45
+ self.tokenizer = tokenizer
46
+ self.max_seq_length = max_seq_length
47
+ self._validate_lengths()
48
+
49
+ def __len__(self):
50
+ return len(self.rows)
51
+
52
+ def __getitem__(self, index):
53
+ _, prompt, response = self.rows[index]
54
+ prompt_ids = self.tokenizer(
55
+ prompt,
56
+ add_special_tokens=True,
57
+ truncation=False,
58
+ )["input_ids"]
59
+ response_ids = self.tokenizer(
60
+ response + self.tokenizer.eos_token,
61
+ add_special_tokens=False,
62
+ truncation=False,
63
+ )["input_ids"]
64
+
65
+ input_ids = prompt_ids + response_ids
66
+ prompt_len = len(prompt_ids)
67
+ labels = [-100] * prompt_len + input_ids[prompt_len:]
68
+ attention_mask = [1] * len(input_ids)
69
+ return {
70
+ "input_ids": input_ids,
71
+ "attention_mask": attention_mask,
72
+ "labels": labels,
73
+ }
74
+
75
+ def _validate_lengths(self):
76
+ max_prompt_len = 0
77
+ max_total_len = 0
78
+ too_long = []
79
+ for idx, prompt, response in self.rows:
80
+ prompt_ids = self.tokenizer(
81
+ prompt,
82
+ add_special_tokens=True,
83
+ truncation=False,
84
+ )["input_ids"]
85
+ response_ids = self.tokenizer(
86
+ response + self.tokenizer.eos_token,
87
+ add_special_tokens=False,
88
+ truncation=False,
89
+ )["input_ids"]
90
+ prompt_len = len(prompt_ids)
91
+ total_len = prompt_len + len(response_ids)
92
+ max_prompt_len = max(max_prompt_len, prompt_len)
93
+ max_total_len = max(max_total_len, total_len)
94
+ if total_len > self.max_seq_length:
95
+ too_long.append((idx, prompt_len, total_len))
96
+ if len(too_long) >= 5:
97
+ break
98
+
99
+ print(
100
+ "SFT token length check: "
101
+ f"max_prompt_len={max_prompt_len}, "
102
+ f"max_total_len={max_total_len}, "
103
+ f"max_seq_length={self.max_seq_length}"
104
+ )
105
+ if too_long:
106
+ examples = ", ".join(
107
+ f"{idx}:prompt={prompt_len},total={total_len}"
108
+ for idx, prompt_len, total_len in too_long
109
+ )
110
+ raise RuntimeError(
111
+ "Some SFT samples exceed max_seq_length and would be truncated. "
112
+ f"Examples: {examples}. Increase MAX_SEQ_LENGTH or shorten the prompt."
113
+ )
114
+
115
+
116
+ class CausalLMCollator:
117
+ def __init__(self, tokenizer):
118
+ self.tokenizer = tokenizer
119
+
120
+ def __call__(self, features):
121
+ max_len = max(len(feature["input_ids"]) for feature in features)
122
+ input_ids = []
123
+ attention_mask = []
124
+ labels = []
125
+ for feature in features:
126
+ pad_len = max_len - len(feature["input_ids"])
127
+ input_ids.append(feature["input_ids"] + [self.tokenizer.pad_token_id] * pad_len)
128
+ attention_mask.append(feature["attention_mask"] + [0] * pad_len)
129
+ labels.append(feature["labels"] + [-100] * pad_len)
130
+ return {
131
+ "input_ids": torch.tensor(input_ids, dtype=torch.long),
132
+ "attention_mask": torch.tensor(attention_mask, dtype=torch.long),
133
+ "labels": torch.tensor(labels, dtype=torch.long),
134
+ }
135
+
136
+
137
+ def parse_args():
138
+ parser = argparse.ArgumentParser(
139
+ description="LoRA SFT for ChatTime on RATs40K univariate anomaly QA."
140
+ )
141
+ parser.add_argument("--model_path", required=True)
142
+ parser.add_argument(
143
+ "--data_path",
144
+ default="/mnt/share01/sqk/datasets/RATs40K/RATs-Uni-TSImage_Reason.json",
145
+ )
146
+ parser.add_argument("--train_split", default="TSAD_train")
147
+ parser.add_argument("--output_dir", required=True)
148
+ parser.add_argument("--max_train_samples", type=int, default=None)
149
+ parser.add_argument("--max_seq_length", type=int, default=4096)
150
+ parser.add_argument("--per_device_train_batch_size", type=int, default=2)
151
+ parser.add_argument("--gradient_accumulation_steps", type=int, default=8)
152
+ parser.add_argument("--num_train_epochs", type=float, default=2.0)
153
+ parser.add_argument("--learning_rate", type=float, default=2e-4)
154
+ parser.add_argument("--warmup_ratio", type=float, default=0.05)
155
+ parser.add_argument("--weight_decay", type=float, default=0.01)
156
+ parser.add_argument("--logging_steps", type=int, default=10)
157
+ parser.add_argument("--save_steps", type=int, default=200)
158
+ parser.add_argument("--save_total_limit", type=int, default=2)
159
+ parser.add_argument("--dataloader_num_workers", type=int, default=4)
160
+ parser.add_argument("--lora_rank", type=int, default=16)
161
+ parser.add_argument("--lora_alpha", type=int, default=32)
162
+ parser.add_argument("--lora_dropout", type=float, default=0.05)
163
+ parser.add_argument("--load_in_4bit", action="store_true")
164
+ parser.add_argument("--gradient_checkpointing", action="store_true")
165
+ parser.add_argument("--torch_dtype", choices=["bf16", "fp16", "fp32"], default="fp16")
166
+ parser.add_argument("--allow_hf_download", action="store_true")
167
+ return parser.parse_args()
168
+
169
+
170
+ def dtype_from_arg(value):
171
+ if value == "bf16":
172
+ return torch.bfloat16
173
+ if value == "fp16":
174
+ return torch.float16
175
+ return torch.float32
176
+
177
+
178
+ def local_rank():
179
+ return int(os.environ.get("LOCAL_RANK", "0"))
180
+
181
+
182
+ def build_rows(data_path, split, max_samples):
183
+ data = load_dataset_json(data_path)
184
+ items = valid_split_items(data, split)
185
+ if max_samples is not None and max_samples >= 0:
186
+ items = items[:max_samples]
187
+ rows = []
188
+ for idx, item in items:
189
+ prompt = build_prompt(item["Observation"], item.get("Source"))
190
+ response = build_response(item.get("Thought", ""), item.get("ActionID"))
191
+ rows.append((idx, prompt, response))
192
+ return rows
193
+
194
+
195
+ def load_model(args):
196
+ try:
197
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
198
+ except Exception as exc:
199
+ raise RuntimeError(
200
+ "peft is required for SFT. Install peft in the selected Python environment."
201
+ ) from exc
202
+
203
+ dtype = dtype_from_arg(args.torch_dtype)
204
+ local_files_only = not args.allow_hf_download
205
+ device = f"cuda:{local_rank()}" if torch.cuda.is_available() else "cpu"
206
+
207
+ quantization_config = None
208
+ if args.load_in_4bit:
209
+ try:
210
+ from transformers import BitsAndBytesConfig
211
+ except Exception as exc:
212
+ raise RuntimeError(
213
+ "transformers BitsAndBytesConfig is required for --load_in_4bit."
214
+ ) from exc
215
+ quantization_config = BitsAndBytesConfig(
216
+ load_in_4bit=True,
217
+ bnb_4bit_compute_dtype=dtype,
218
+ bnb_4bit_quant_type="nf4",
219
+ bnb_4bit_use_double_quant=True,
220
+ )
221
+
222
+ model = AutoModelForCausalLM.from_pretrained(
223
+ args.model_path,
224
+ trust_remote_code=True,
225
+ torch_dtype=dtype,
226
+ low_cpu_mem_usage=True,
227
+ quantization_config=quantization_config,
228
+ device_map={"": device} if torch.cuda.is_available() and args.load_in_4bit else None,
229
+ local_files_only=local_files_only,
230
+ )
231
+ if not args.load_in_4bit and torch.cuda.is_available():
232
+ model.to(device)
233
+ if args.load_in_4bit:
234
+ model = prepare_model_for_kbit_training(
235
+ model,
236
+ use_gradient_checkpointing=args.gradient_checkpointing,
237
+ )
238
+ elif args.gradient_checkpointing:
239
+ model.gradient_checkpointing_enable()
240
+
241
+ lora_config = LoraConfig(
242
+ r=args.lora_rank,
243
+ lora_alpha=args.lora_alpha,
244
+ lora_dropout=args.lora_dropout,
245
+ bias="none",
246
+ task_type="CAUSAL_LM",
247
+ target_modules=[
248
+ "q_proj",
249
+ "k_proj",
250
+ "v_proj",
251
+ "o_proj",
252
+ "gate_proj",
253
+ "up_proj",
254
+ "down_proj",
255
+ ],
256
+ )
257
+ model = get_peft_model(model, lora_config)
258
+ model.print_trainable_parameters()
259
+ return model
260
+
261
+
262
+ def main():
263
+ args = parse_args()
264
+ patch_accelerate_compatibility()
265
+ if torch.cuda.is_available():
266
+ torch.cuda.set_device(local_rank())
267
+
268
+ local_files_only = not args.allow_hf_download
269
+ tokenizer = AutoTokenizer.from_pretrained(
270
+ args.model_path,
271
+ trust_remote_code=True,
272
+ local_files_only=local_files_only,
273
+ )
274
+ if tokenizer.pad_token is None:
275
+ tokenizer.pad_token = tokenizer.eos_token
276
+ tokenizer.padding_side = "right"
277
+
278
+ rows = build_rows(args.data_path, args.train_split, args.max_train_samples)
279
+ train_dataset = PromptResponseDataset(rows, tokenizer, args.max_seq_length)
280
+ model = load_model(args)
281
+
282
+ bf16 = args.torch_dtype == "bf16"
283
+ fp16 = args.torch_dtype == "fp16"
284
+ training_args = TrainingArguments(
285
+ output_dir=args.output_dir,
286
+ per_device_train_batch_size=args.per_device_train_batch_size,
287
+ gradient_accumulation_steps=args.gradient_accumulation_steps,
288
+ num_train_epochs=args.num_train_epochs,
289
+ learning_rate=args.learning_rate,
290
+ warmup_ratio=args.warmup_ratio,
291
+ weight_decay=args.weight_decay,
292
+ logging_steps=args.logging_steps,
293
+ save_steps=args.save_steps,
294
+ save_total_limit=args.save_total_limit,
295
+ dataloader_num_workers=args.dataloader_num_workers,
296
+ bf16=bf16,
297
+ fp16=fp16,
298
+ report_to="none",
299
+ remove_unused_columns=False,
300
+ ddp_find_unused_parameters=False,
301
+ )
302
+ trainer = Trainer(
303
+ model=model,
304
+ args=training_args,
305
+ train_dataset=train_dataset,
306
+ data_collator=CausalLMCollator(tokenizer),
307
+ )
308
+ trainer.train()
309
+ trainer.save_model(args.output_dir)
310
+ tokenizer.save_pretrained(args.output_dir)
311
+
312
+
313
+ if __name__ == "__main__":
314
+ main()
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: /mnt/share01/sqk/models/ChatTime-1-7B-Chat
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.14.0
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/mnt/share01/sqk/models/ChatTime-1-7B-Chat",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": false,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 32,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0.05,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": null,
21
+ "peft_type": "LORA",
22
+ "r": 16,
23
+ "rank_pattern": {},
24
+ "revision": null,
25
+ "target_modules": [
26
+ "gate_proj",
27
+ "o_proj",
28
+ "v_proj",
29
+ "up_proj",
30
+ "q_proj",
31
+ "k_proj",
32
+ "down_proj"
33
+ ],
34
+ "task_type": "CAUSAL_LM",
35
+ "use_dora": false,
36
+ "use_rslora": false
37
+ }
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/added_tokens.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c9bdb7f1fdf439aa0f3c5fb41c3ce23e5e6e873bea3f378cf26a709d3a3d22
3
+ size 15024
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f195cb3d44682c30ad9c0e1b320d29a952dc22676a666d5b7c0a105f554e012b
3
+ size 15024
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d644f876963b59d7d58322d0dbd4f84b5f005eb85a095c14ef20d7e8528948b
3
+ size 15024
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2835319bf595568b23d432fbbab931291be0d746234b19ee4344a5852238e357
3
+ size 15024
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba165e391bcfa2e1188f6c4a775e972bb6f49e4c5970a96da748324529cedb20
3
+ size 988
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a175fc835d2784e8615427cf828af918ee04b274e34925b9edf89d29106ab1c1
3
+ size 1064
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:277bbf113ecf76ec5b62586e2b4fa91501b2571b1380f4721de69ef68675511f
3
+ size 5432
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/added_tokens.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:617fa12ac6cb39783256991c6577b58ec2981bdfd4cdfb58008163c743049429
3
+ size 1064
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/trainer_state.json ADDED
@@ -0,0 +1,692 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 946,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.021144442976080348,
14
+ "grad_norm": 6.2254319190979,
15
+ "learning_rate": 3.3333333333333335e-05,
16
+ "loss": 3.0149,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.042288885952160696,
21
+ "grad_norm": 0.7327573895454407,
22
+ "learning_rate": 7.500000000000001e-05,
23
+ "loss": 1.623,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.06343332892824105,
28
+ "grad_norm": 0.5618261098861694,
29
+ "learning_rate": 0.00011250000000000001,
30
+ "loss": 1.1099,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.08457777190432139,
35
+ "grad_norm": 0.48980122804641724,
36
+ "learning_rate": 0.00015416666666666668,
37
+ "loss": 0.9131,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.10572221488040175,
42
+ "grad_norm": 0.488565593957901,
43
+ "learning_rate": 0.00019583333333333334,
44
+ "loss": 0.7716,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.1268666578564821,
49
+ "grad_norm": 0.4297373592853546,
50
+ "learning_rate": 0.0001979955456570156,
51
+ "loss": 0.723,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.14801110083256244,
56
+ "grad_norm": 0.4536869525909424,
57
+ "learning_rate": 0.0001957683741648107,
58
+ "loss": 0.6879,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.16915554380864278,
63
+ "grad_norm": 0.41550788283348083,
64
+ "learning_rate": 0.0001935412026726058,
65
+ "loss": 0.6586,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.19029998678472315,
70
+ "grad_norm": 0.38494592905044556,
71
+ "learning_rate": 0.0001913140311804009,
72
+ "loss": 0.6324,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.2114444297608035,
77
+ "grad_norm": 0.3633956015110016,
78
+ "learning_rate": 0.000189086859688196,
79
+ "loss": 0.631,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.23258887273688383,
84
+ "grad_norm": 0.3775959312915802,
85
+ "learning_rate": 0.0001868596881959911,
86
+ "loss": 0.6103,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.2537333157129642,
91
+ "grad_norm": 0.35080480575561523,
92
+ "learning_rate": 0.0001846325167037862,
93
+ "loss": 0.6159,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.2748777586890445,
98
+ "grad_norm": 0.4399946928024292,
99
+ "learning_rate": 0.0001824053452115813,
100
+ "loss": 0.5983,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.2960222016651249,
105
+ "grad_norm": 0.4049876928329468,
106
+ "learning_rate": 0.0001801781737193764,
107
+ "loss": 0.5881,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.31716664464120525,
112
+ "grad_norm": 0.3834834396839142,
113
+ "learning_rate": 0.0001779510022271715,
114
+ "loss": 0.5703,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.33831108761728557,
119
+ "grad_norm": 0.3201199471950531,
120
+ "learning_rate": 0.0001757238307349666,
121
+ "loss": 0.5777,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.35945553059336594,
126
+ "grad_norm": 0.3475135564804077,
127
+ "learning_rate": 0.0001734966592427617,
128
+ "loss": 0.5627,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.3805999735694463,
133
+ "grad_norm": 0.3944849371910095,
134
+ "learning_rate": 0.0001712694877505568,
135
+ "loss": 0.569,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.4017444165455266,
140
+ "grad_norm": 0.3674592673778534,
141
+ "learning_rate": 0.0001690423162583519,
142
+ "loss": 0.5621,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.422888859521607,
147
+ "grad_norm": 0.3651335835456848,
148
+ "learning_rate": 0.000166815144766147,
149
+ "loss": 0.5597,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.4440333024976873,
154
+ "grad_norm": 0.3435162901878357,
155
+ "learning_rate": 0.0001645879732739421,
156
+ "loss": 0.5538,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.46517774547376767,
161
+ "grad_norm": 0.3870578408241272,
162
+ "learning_rate": 0.0001623608017817372,
163
+ "loss": 0.5574,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.48632218844984804,
168
+ "grad_norm": 0.40335071086883545,
169
+ "learning_rate": 0.0001601336302895323,
170
+ "loss": 0.5394,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.5074666314259284,
175
+ "grad_norm": 0.3105282187461853,
176
+ "learning_rate": 0.0001579064587973274,
177
+ "loss": 0.5403,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.5286110744020087,
182
+ "grad_norm": 0.3729188144207001,
183
+ "learning_rate": 0.00015567928730512252,
184
+ "loss": 0.5466,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.549755517378089,
189
+ "grad_norm": 0.3619287312030792,
190
+ "learning_rate": 0.0001534521158129176,
191
+ "loss": 0.5305,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.5708999603541695,
196
+ "grad_norm": 0.34232136607170105,
197
+ "learning_rate": 0.0001512249443207127,
198
+ "loss": 0.5319,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.5920444033302498,
203
+ "grad_norm": 0.38660332560539246,
204
+ "learning_rate": 0.0001489977728285078,
205
+ "loss": 0.5242,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.6131888463063301,
210
+ "grad_norm": 0.35314109921455383,
211
+ "learning_rate": 0.0001467706013363029,
212
+ "loss": 0.5255,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.6343332892824105,
217
+ "grad_norm": 0.3418401777744293,
218
+ "learning_rate": 0.00014454342984409802,
219
+ "loss": 0.5357,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 0.6554777322584908,
224
+ "grad_norm": 0.357149213552475,
225
+ "learning_rate": 0.0001423162583518931,
226
+ "loss": 0.5131,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 0.6766221752345711,
231
+ "grad_norm": 0.3720100224018097,
232
+ "learning_rate": 0.0001400890868596882,
233
+ "loss": 0.5072,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 0.6977666182106516,
238
+ "grad_norm": 0.342650443315506,
239
+ "learning_rate": 0.0001378619153674833,
240
+ "loss": 0.5194,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 0.7189110611867319,
245
+ "grad_norm": 0.34781211614608765,
246
+ "learning_rate": 0.00013563474387527841,
247
+ "loss": 0.5094,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 0.7400555041628122,
252
+ "grad_norm": 0.3401576280593872,
253
+ "learning_rate": 0.00013340757238307352,
254
+ "loss": 0.5192,
255
+ "step": 350
256
+ },
257
+ {
258
+ "epoch": 0.7611999471388926,
259
+ "grad_norm": 0.3490856885910034,
260
+ "learning_rate": 0.0001311804008908686,
261
+ "loss": 0.5045,
262
+ "step": 360
263
+ },
264
+ {
265
+ "epoch": 0.7823443901149729,
266
+ "grad_norm": 0.3488720655441284,
267
+ "learning_rate": 0.0001289532293986637,
268
+ "loss": 0.502,
269
+ "step": 370
270
+ },
271
+ {
272
+ "epoch": 0.8034888330910532,
273
+ "grad_norm": 0.37278613448143005,
274
+ "learning_rate": 0.00012672605790645878,
275
+ "loss": 0.5038,
276
+ "step": 380
277
+ },
278
+ {
279
+ "epoch": 0.8246332760671337,
280
+ "grad_norm": 0.3677748441696167,
281
+ "learning_rate": 0.00012449888641425391,
282
+ "loss": 0.505,
283
+ "step": 390
284
+ },
285
+ {
286
+ "epoch": 0.845777719043214,
287
+ "grad_norm": 0.3815574049949646,
288
+ "learning_rate": 0.00012227171492204902,
289
+ "loss": 0.4997,
290
+ "step": 400
291
+ },
292
+ {
293
+ "epoch": 0.8669221620192943,
294
+ "grad_norm": 0.37245893478393555,
295
+ "learning_rate": 0.0001200445434298441,
296
+ "loss": 0.4989,
297
+ "step": 410
298
+ },
299
+ {
300
+ "epoch": 0.8880666049953746,
301
+ "grad_norm": 0.3642374277114868,
302
+ "learning_rate": 0.0001178173719376392,
303
+ "loss": 0.4992,
304
+ "step": 420
305
+ },
306
+ {
307
+ "epoch": 0.909211047971455,
308
+ "grad_norm": 0.32838189601898193,
309
+ "learning_rate": 0.0001155902004454343,
310
+ "loss": 0.4947,
311
+ "step": 430
312
+ },
313
+ {
314
+ "epoch": 0.9303554909475353,
315
+ "grad_norm": 0.36527854204177856,
316
+ "learning_rate": 0.00011336302895322941,
317
+ "loss": 0.4952,
318
+ "step": 440
319
+ },
320
+ {
321
+ "epoch": 0.9514999339236156,
322
+ "grad_norm": 0.3686304986476898,
323
+ "learning_rate": 0.0001111358574610245,
324
+ "loss": 0.4964,
325
+ "step": 450
326
+ },
327
+ {
328
+ "epoch": 0.9726443768996961,
329
+ "grad_norm": 0.3496793210506439,
330
+ "learning_rate": 0.0001089086859688196,
331
+ "loss": 0.4827,
332
+ "step": 460
333
+ },
334
+ {
335
+ "epoch": 0.9937888198757764,
336
+ "grad_norm": 0.3722958266735077,
337
+ "learning_rate": 0.0001066815144766147,
338
+ "loss": 0.4838,
339
+ "step": 470
340
+ },
341
+ {
342
+ "epoch": 1.0148011100832564,
343
+ "grad_norm": 0.3902372717857361,
344
+ "learning_rate": 0.00010445434298440981,
345
+ "loss": 0.4696,
346
+ "step": 480
347
+ },
348
+ {
349
+ "epoch": 1.0359455530593367,
350
+ "grad_norm": 0.3780229687690735,
351
+ "learning_rate": 0.00010222717149220491,
352
+ "loss": 0.4686,
353
+ "step": 490
354
+ },
355
+ {
356
+ "epoch": 1.057089996035417,
357
+ "grad_norm": 0.3552299737930298,
358
+ "learning_rate": 0.0001,
359
+ "loss": 0.457,
360
+ "step": 500
361
+ },
362
+ {
363
+ "epoch": 1.0782344390114973,
364
+ "grad_norm": 0.3887428045272827,
365
+ "learning_rate": 9.77728285077951e-05,
366
+ "loss": 0.4735,
367
+ "step": 510
368
+ },
369
+ {
370
+ "epoch": 1.0993788819875776,
371
+ "grad_norm": 0.3928622603416443,
372
+ "learning_rate": 9.55456570155902e-05,
373
+ "loss": 0.4675,
374
+ "step": 520
375
+ },
376
+ {
377
+ "epoch": 1.120523324963658,
378
+ "grad_norm": 0.3686327636241913,
379
+ "learning_rate": 9.331848552338531e-05,
380
+ "loss": 0.4804,
381
+ "step": 530
382
+ },
383
+ {
384
+ "epoch": 1.1416677679397385,
385
+ "grad_norm": 0.35772374272346497,
386
+ "learning_rate": 9.109131403118041e-05,
387
+ "loss": 0.4609,
388
+ "step": 540
389
+ },
390
+ {
391
+ "epoch": 1.1628122109158188,
392
+ "grad_norm": 0.35283800959587097,
393
+ "learning_rate": 8.88641425389755e-05,
394
+ "loss": 0.4693,
395
+ "step": 550
396
+ },
397
+ {
398
+ "epoch": 1.183956653891899,
399
+ "grad_norm": 0.37653160095214844,
400
+ "learning_rate": 8.663697104677061e-05,
401
+ "loss": 0.4551,
402
+ "step": 560
403
+ },
404
+ {
405
+ "epoch": 1.2051010968679794,
406
+ "grad_norm": 0.35314637422561646,
407
+ "learning_rate": 8.44097995545657e-05,
408
+ "loss": 0.4539,
409
+ "step": 570
410
+ },
411
+ {
412
+ "epoch": 1.2262455398440597,
413
+ "grad_norm": 0.35260340571403503,
414
+ "learning_rate": 8.21826280623608e-05,
415
+ "loss": 0.4531,
416
+ "step": 580
417
+ },
418
+ {
419
+ "epoch": 1.24738998282014,
420
+ "grad_norm": 0.3616096079349518,
421
+ "learning_rate": 7.995545657015591e-05,
422
+ "loss": 0.4645,
423
+ "step": 590
424
+ },
425
+ {
426
+ "epoch": 1.2685344257962203,
427
+ "grad_norm": 0.3933924436569214,
428
+ "learning_rate": 7.7728285077951e-05,
429
+ "loss": 0.4469,
430
+ "step": 600
431
+ },
432
+ {
433
+ "epoch": 1.2896788687723006,
434
+ "grad_norm": 0.3878353536128998,
435
+ "learning_rate": 7.550111358574611e-05,
436
+ "loss": 0.467,
437
+ "step": 610
438
+ },
439
+ {
440
+ "epoch": 1.3108233117483812,
441
+ "grad_norm": 0.41165846586227417,
442
+ "learning_rate": 7.32739420935412e-05,
443
+ "loss": 0.4504,
444
+ "step": 620
445
+ },
446
+ {
447
+ "epoch": 1.3319677547244615,
448
+ "grad_norm": 0.36190614104270935,
449
+ "learning_rate": 7.10467706013363e-05,
450
+ "loss": 0.4517,
451
+ "step": 630
452
+ },
453
+ {
454
+ "epoch": 1.3531121977005418,
455
+ "grad_norm": 0.3983185887336731,
456
+ "learning_rate": 6.881959910913141e-05,
457
+ "loss": 0.444,
458
+ "step": 640
459
+ },
460
+ {
461
+ "epoch": 1.3742566406766221,
462
+ "grad_norm": 0.38672661781311035,
463
+ "learning_rate": 6.659242761692652e-05,
464
+ "loss": 0.4488,
465
+ "step": 650
466
+ },
467
+ {
468
+ "epoch": 1.3954010836527027,
469
+ "grad_norm": 0.36232879757881165,
470
+ "learning_rate": 6.436525612472161e-05,
471
+ "loss": 0.4371,
472
+ "step": 660
473
+ },
474
+ {
475
+ "epoch": 1.416545526628783,
476
+ "grad_norm": 0.40571126341819763,
477
+ "learning_rate": 6.21380846325167e-05,
478
+ "loss": 0.4427,
479
+ "step": 670
480
+ },
481
+ {
482
+ "epoch": 1.4376899696048633,
483
+ "grad_norm": 0.36234796047210693,
484
+ "learning_rate": 5.9910913140311805e-05,
485
+ "loss": 0.4439,
486
+ "step": 680
487
+ },
488
+ {
489
+ "epoch": 1.4588344125809436,
490
+ "grad_norm": 0.4014786183834076,
491
+ "learning_rate": 5.7683741648106904e-05,
492
+ "loss": 0.4548,
493
+ "step": 690
494
+ },
495
+ {
496
+ "epoch": 1.479978855557024,
497
+ "grad_norm": 0.3884125053882599,
498
+ "learning_rate": 5.545657015590201e-05,
499
+ "loss": 0.4531,
500
+ "step": 700
501
+ },
502
+ {
503
+ "epoch": 1.5011232985331042,
504
+ "grad_norm": 0.3621061146259308,
505
+ "learning_rate": 5.322939866369711e-05,
506
+ "loss": 0.4407,
507
+ "step": 710
508
+ },
509
+ {
510
+ "epoch": 1.5222677415091845,
511
+ "grad_norm": 0.3601549565792084,
512
+ "learning_rate": 5.100222717149221e-05,
513
+ "loss": 0.439,
514
+ "step": 720
515
+ },
516
+ {
517
+ "epoch": 1.5434121844852648,
518
+ "grad_norm": 0.37766754627227783,
519
+ "learning_rate": 4.8775055679287305e-05,
520
+ "loss": 0.4397,
521
+ "step": 730
522
+ },
523
+ {
524
+ "epoch": 1.5645566274613452,
525
+ "grad_norm": 0.38728606700897217,
526
+ "learning_rate": 4.654788418708241e-05,
527
+ "loss": 0.4455,
528
+ "step": 740
529
+ },
530
+ {
531
+ "epoch": 1.5857010704374257,
532
+ "grad_norm": 0.3532933294773102,
533
+ "learning_rate": 4.432071269487751e-05,
534
+ "loss": 0.4375,
535
+ "step": 750
536
+ },
537
+ {
538
+ "epoch": 1.606845513413506,
539
+ "grad_norm": 0.37484633922576904,
540
+ "learning_rate": 4.209354120267261e-05,
541
+ "loss": 0.4386,
542
+ "step": 760
543
+ },
544
+ {
545
+ "epoch": 1.6279899563895863,
546
+ "grad_norm": 0.40252485871315,
547
+ "learning_rate": 3.986636971046771e-05,
548
+ "loss": 0.4394,
549
+ "step": 770
550
+ },
551
+ {
552
+ "epoch": 1.6491343993656669,
553
+ "grad_norm": 0.3895283043384552,
554
+ "learning_rate": 3.7639198218262804e-05,
555
+ "loss": 0.4356,
556
+ "step": 780
557
+ },
558
+ {
559
+ "epoch": 1.6702788423417472,
560
+ "grad_norm": 0.4058088958263397,
561
+ "learning_rate": 3.541202672605791e-05,
562
+ "loss": 0.4461,
563
+ "step": 790
564
+ },
565
+ {
566
+ "epoch": 1.6914232853178275,
567
+ "grad_norm": 0.40314358472824097,
568
+ "learning_rate": 3.318485523385301e-05,
569
+ "loss": 0.4311,
570
+ "step": 800
571
+ },
572
+ {
573
+ "epoch": 1.7125677282939078,
574
+ "grad_norm": 0.384658545255661,
575
+ "learning_rate": 3.095768374164811e-05,
576
+ "loss": 0.4363,
577
+ "step": 810
578
+ },
579
+ {
580
+ "epoch": 1.7337121712699881,
581
+ "grad_norm": 0.3810129463672638,
582
+ "learning_rate": 2.873051224944321e-05,
583
+ "loss": 0.4383,
584
+ "step": 820
585
+ },
586
+ {
587
+ "epoch": 1.7548566142460684,
588
+ "grad_norm": 0.39279329776763916,
589
+ "learning_rate": 2.650334075723831e-05,
590
+ "loss": 0.4228,
591
+ "step": 830
592
+ },
593
+ {
594
+ "epoch": 1.7760010572221487,
595
+ "grad_norm": 0.39959919452667236,
596
+ "learning_rate": 2.427616926503341e-05,
597
+ "loss": 0.4262,
598
+ "step": 840
599
+ },
600
+ {
601
+ "epoch": 1.797145500198229,
602
+ "grad_norm": 0.3827113211154938,
603
+ "learning_rate": 2.2048997772828508e-05,
604
+ "loss": 0.4311,
605
+ "step": 850
606
+ },
607
+ {
608
+ "epoch": 1.8182899431743094,
609
+ "grad_norm": 0.39276352524757385,
610
+ "learning_rate": 1.982182628062361e-05,
611
+ "loss": 0.4341,
612
+ "step": 860
613
+ },
614
+ {
615
+ "epoch": 1.83943438615039,
616
+ "grad_norm": 0.38558751344680786,
617
+ "learning_rate": 1.759465478841871e-05,
618
+ "loss": 0.4207,
619
+ "step": 870
620
+ },
621
+ {
622
+ "epoch": 1.8605788291264702,
623
+ "grad_norm": 0.4052915573120117,
624
+ "learning_rate": 1.5367483296213807e-05,
625
+ "loss": 0.4254,
626
+ "step": 880
627
+ },
628
+ {
629
+ "epoch": 1.8817232721025505,
630
+ "grad_norm": 0.3884909749031067,
631
+ "learning_rate": 1.3140311804008909e-05,
632
+ "loss": 0.4198,
633
+ "step": 890
634
+ },
635
+ {
636
+ "epoch": 1.902867715078631,
637
+ "grad_norm": 0.39251548051834106,
638
+ "learning_rate": 1.091314031180401e-05,
639
+ "loss": 0.4312,
640
+ "step": 900
641
+ },
642
+ {
643
+ "epoch": 1.9240121580547114,
644
+ "grad_norm": 0.382098525762558,
645
+ "learning_rate": 8.685968819599109e-06,
646
+ "loss": 0.4257,
647
+ "step": 910
648
+ },
649
+ {
650
+ "epoch": 1.9451566010307917,
651
+ "grad_norm": 0.3773449957370758,
652
+ "learning_rate": 6.45879732739421e-06,
653
+ "loss": 0.4215,
654
+ "step": 920
655
+ },
656
+ {
657
+ "epoch": 1.966301044006872,
658
+ "grad_norm": 0.39837542176246643,
659
+ "learning_rate": 4.231625835189309e-06,
660
+ "loss": 0.4232,
661
+ "step": 930
662
+ },
663
+ {
664
+ "epoch": 1.9874454869829523,
665
+ "grad_norm": 0.38558995723724365,
666
+ "learning_rate": 2.00445434298441e-06,
667
+ "loss": 0.4254,
668
+ "step": 940
669
+ }
670
+ ],
671
+ "logging_steps": 10,
672
+ "max_steps": 946,
673
+ "num_input_tokens_seen": 0,
674
+ "num_train_epochs": 2,
675
+ "save_steps": 200,
676
+ "stateful_callbacks": {
677
+ "TrainerControl": {
678
+ "args": {
679
+ "should_epoch_stop": false,
680
+ "should_evaluate": false,
681
+ "should_log": false,
682
+ "should_save": true,
683
+ "should_training_stop": true
684
+ },
685
+ "attributes": {}
686
+ }
687
+ },
688
+ "total_flos": 6.876005077664924e+18,
689
+ "train_batch_size": 1,
690
+ "trial_name": null,
691
+ "trial_params": null
692
+ }
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/checkpoint-946/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:277bbf113ecf76ec5b62586e2b4fa91501b2571b1380f4721de69ef68675511f
3
+ size 5432
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/sft/adapter/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:277bbf113ecf76ec5b62586e2b4fa91501b2571b1380f4721de69ef68675511f
3
+ size 5432
rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/RATs-Uni-TSImage_Reason_Reason_by_chattime_sft.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/TSAD_test_metrics.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "split": "TSAD_test",
3
+ "num_dataset_samples": 6034,
4
+ "num_prediction_samples": 6034,
5
+ "num_valid_samples": 5980,
6
+ "num_missing_predictions": 0,
7
+ "num_invalid_predictions": 54,
8
+ "type_accuracy": 0.19581939799331102,
9
+ "type_precision_macro": 0.14972735341824286,
10
+ "type_recall_macro": 0.2321495128168519,
11
+ "type_f1_macro": 0.11749066864105973,
12
+ "binary_accuracy": 0.8377926421404682,
13
+ "binary_precision_macro": 0.7210006797954495,
14
+ "binary_recall_macro": 0.7288534724234521,
15
+ "binary_f1_macro": 0.7247792958267192
16
+ }
rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_sft.rank0.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_sft.rank1.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_sft.rank2.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/sft/eval/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_sft.rank3.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/RATs-Uni-TSImage_Reason_Reason_by_chattime_zeroshot.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/TSAD_test_metrics.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "split": "TSAD_test",
3
+ "num_dataset_samples": 6034,
4
+ "num_prediction_samples": 6034,
5
+ "num_valid_samples": 6034,
6
+ "num_missing_predictions": 0,
7
+ "num_invalid_predictions": 0,
8
+ "type_accuracy": 0.14053695724229368,
9
+ "type_precision_macro": 0.016068805185920968,
10
+ "type_recall_macro": 0.0665676819309319,
11
+ "type_f1_macro": 0.025105704354246135,
12
+ "binary_accuracy": 0.3667550546900895,
13
+ "binary_precision_macro": 0.519821130521865,
14
+ "binary_recall_macro": 0.5270888471072895,
15
+ "binary_f1_macro": 0.36091116180192573
16
+ }
rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_zeroshot.rank0.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_zeroshot.rank1.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_zeroshot.rank2.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/outputs/pipeline_20260608_175250/zeroshot/shards/RATs-Uni-TSImage_Reason_Reason_by_chattime_zeroshot.rank3.json ADDED
The diff for this file is too large to render. See raw diff
 
rats40k_adapter/rats40k_common.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib.util
2
+ import json
3
+ import os
4
+ import re
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import numpy as np
9
+
10
+
11
+ CHAT_TIME_DIR = Path(__file__).resolve().parents[1]
12
+ TIME_RA_PROMPT_PATH = (
13
+ CHAT_TIME_DIR.parent / "Time-RA" / "prompts" / "prompt_llama_anoclf_reason.py"
14
+ )
15
+ _TIME_RA_PROMPT_TEMPLATE = None
16
+ _TIME_RA_PROMPT_TEMPLATE_LOADED = False
17
+ if str(CHAT_TIME_DIR) not in sys.path:
18
+ sys.path.insert(0, str(CHAT_TIME_DIR))
19
+
20
+ from utils.prompt import getPrompt # noqa: E402
21
+ from utils.tools import Discretizer, Serializer # noqa: E402
22
+
23
+
24
+ ACTION_ID_TO_NAME = {
25
+ 0: "Normal Sequence",
26
+ 1: "Point Anomaly",
27
+ 2: "Periodic Change Anomaly",
28
+ 3: "Trend Change Anomaly",
29
+ 4: "Change Point Anomaly",
30
+ 5: "Distributional Change Anomaly",
31
+ 6: "Amplitude Anomaly",
32
+ 7: "Pattern Change Anomaly",
33
+ 8: "Sparse Anomaly",
34
+ 9: "Repeated Value Anomaly",
35
+ 10: "Sudden Flatline Anomaly",
36
+ 11: "Drift Anomaly",
37
+ 12: "Sudden Spike Anomaly",
38
+ 13: "Continuous Segment Anomaly",
39
+ 14: "Nonlinear Pattern Anomaly",
40
+ }
41
+
42
+ ACTION_DESCRIPTIONS = {
43
+ 0: "There are no abnormal situations in this time series.",
44
+ 1: "A single data point significantly deviates from the local or global pattern.",
45
+ 2: "The original periodic pattern is disrupted.",
46
+ 3: "A sudden change appears in the long-term trend.",
47
+ 4: "Statistical properties such as mean or variance change abruptly.",
48
+ 5: "The statistical distribution changes significantly.",
49
+ 6: "The amplitude exceeds normal upper or lower bounds.",
50
+ 7: "The pattern suddenly changes from one form to another.",
51
+ 8: "Isolated anomalous patterns occasionally appear in a long series.",
52
+ 9: "Continuous or intermittent repeated values disrupt normal fluctuations.",
53
+ 10: "The series suddenly becomes a flat line with no normal fluctuations.",
54
+ 11: "The data gradually drifts away from the normal level.",
55
+ 12: "The data suddenly spikes or drops briefly and then returns to normal.",
56
+ 13: "A continuous segment deviates from the normal pattern.",
57
+ 14: "Nonlinear changes break the original linear rule.",
58
+ }
59
+
60
+
61
+ def action_mapping_text():
62
+ lines = []
63
+ for action_id in sorted(ACTION_ID_TO_NAME):
64
+ lines.append(
65
+ f"{action_id}. {ACTION_ID_TO_NAME[action_id]}: "
66
+ f"{ACTION_DESCRIPTIONS[action_id]}"
67
+ )
68
+ return "\n".join(lines)
69
+
70
+
71
+ def build_instruction(source):
72
+ source_text = source or "unknown"
73
+ time_ra_template = load_time_ra_prompt_template()
74
+ if time_ra_template:
75
+ return time_ra_template.format(
76
+ our_source=source_text,
77
+ our_observation="the serialized time series provided in the ### Input section",
78
+ )
79
+
80
+ source_text = source or "unknown"
81
+ return (
82
+ "Classify the provided univariate time series for anomaly detection. "
83
+ "The sequence is from the domain of "
84
+ f"{source_text}.\n\n"
85
+ "Use exactly one ActionID from the following mapping:\n"
86
+ f"{action_mapping_text()}\n\n"
87
+ "Return exactly two fields: Thought and ActionID. "
88
+ "Do not return a category name instead of ActionID."
89
+ )
90
+
91
+
92
+ def load_time_ra_prompt_template():
93
+ global _TIME_RA_PROMPT_TEMPLATE, _TIME_RA_PROMPT_TEMPLATE_LOADED
94
+ if _TIME_RA_PROMPT_TEMPLATE_LOADED:
95
+ return _TIME_RA_PROMPT_TEMPLATE
96
+ _TIME_RA_PROMPT_TEMPLATE_LOADED = True
97
+ if not TIME_RA_PROMPT_PATH.exists():
98
+ return None
99
+ spec = importlib.util.spec_from_file_location(
100
+ "time_ra_prompt_llama_anoclf_reason",
101
+ TIME_RA_PROMPT_PATH,
102
+ )
103
+ if spec is None or spec.loader is None:
104
+ return None
105
+ module = importlib.util.module_from_spec(spec)
106
+ spec.loader.exec_module(module)
107
+ _TIME_RA_PROMPT_TEMPLATE = getattr(module, "USER_DETECTION_PROMPT", None)
108
+ return _TIME_RA_PROMPT_TEMPLATE
109
+
110
+
111
+ def _to_float_array(series):
112
+ if isinstance(series, np.ndarray):
113
+ arr = series.astype(float, copy=False)
114
+ else:
115
+ arr = np.asarray(series, dtype=float)
116
+ if arr.ndim != 1:
117
+ arr = arr.reshape(-1)
118
+ return arr
119
+
120
+
121
+ def serialize_observation(series):
122
+ arr = _to_float_array(series)
123
+ discretizer = Discretizer()
124
+ serializer = Serializer()
125
+ return serializer.serialize(discretizer.discretize(arr))
126
+
127
+
128
+ def build_prompt(series, source, response=None):
129
+ instruction = build_instruction(source)
130
+ serialized = serialize_observation(series)
131
+ return getPrompt(
132
+ flag="analysis",
133
+ instruction=instruction,
134
+ input=serialized,
135
+ response="" if response is None else response,
136
+ )
137
+
138
+
139
+ def build_response(thought, action_id):
140
+ thought = (thought or "").strip()
141
+ try:
142
+ action_id = int(action_id)
143
+ except (TypeError, ValueError):
144
+ action_id = -1
145
+ return f"Thought: {thought}\nActionID: {action_id}"
146
+
147
+
148
+ def load_dataset_json(path):
149
+ with open(path, "r", encoding="utf-8") as f:
150
+ return json.load(f)
151
+
152
+
153
+ def valid_split_items(data, split):
154
+ split_data = data.get(split, {})
155
+ if not isinstance(split_data, dict):
156
+ return []
157
+ items = [
158
+ (str(idx), item)
159
+ for idx, item in split_data.items()
160
+ if isinstance(item, dict) and isinstance(item.get("Observation"), list)
161
+ ]
162
+ return sorted(items, key=lambda pair: _sort_key(pair[0]))
163
+
164
+
165
+ def _sort_key(value):
166
+ try:
167
+ return (0, int(value))
168
+ except (TypeError, ValueError):
169
+ return (1, str(value))
170
+
171
+
172
+ def _json_candidates(text):
173
+ text = "" if text is None else str(text).strip()
174
+ yield text
175
+ fenced = re.findall(r"```(?:json)?\s*(\{.*?\})\s*```", text, flags=re.S | re.I)
176
+ for snippet in fenced:
177
+ yield snippet
178
+ match = re.search(r"\{.*\}", text, flags=re.S)
179
+ if match:
180
+ yield match.group(0)
181
+
182
+
183
+ def parse_model_response(response):
184
+ text = "" if response is None else str(response)
185
+ thought = ""
186
+ action_id = None
187
+
188
+ for snippet in _json_candidates(text):
189
+ try:
190
+ obj = json.loads(snippet)
191
+ except (TypeError, ValueError):
192
+ continue
193
+ if not isinstance(obj, dict):
194
+ continue
195
+ for key in ("Thought", "thought", "Reason", "reason"):
196
+ if key in obj:
197
+ thought = str(obj[key]).strip()
198
+ break
199
+ for key in ("ActionID", "action_id", "actionId", "Action", "Label", "label"):
200
+ if key in obj:
201
+ action_id = _parse_action_id(obj[key])
202
+ break
203
+ if action_id is not None:
204
+ return thought, action_id
205
+
206
+ thought_match = re.search(
207
+ r"Thought\s*[::]\s*(.*?)(?=\n\s*(?:ActionID|Action\s*ID|Action|Label)\s*[::]|$)",
208
+ text,
209
+ flags=re.I | re.S,
210
+ )
211
+ if thought_match:
212
+ thought = thought_match.group(1).strip()
213
+
214
+ patterns = [
215
+ r"(?:ActionID|Action\s*ID)\s*[::]\s*(-?\d{1,2})",
216
+ r'"ActionID"\s*:\s*(-?\d{1,2})',
217
+ r"\bAction\s*[::]\s*(-?\d{1,2})",
218
+ r"\bLabel\s*[::]\s*(-?\d{1,2})",
219
+ r"\b(-?\d{1,2})\b",
220
+ ]
221
+ for pattern in patterns:
222
+ match = re.search(pattern, text, flags=re.I)
223
+ if not match:
224
+ continue
225
+ action_id = _parse_action_id(match.group(1))
226
+ if action_id is not None:
227
+ return thought, action_id
228
+
229
+ lowered = re.sub(r"[^a-z0-9]+", " ", text.lower())
230
+ for candidate_id, name in ACTION_ID_TO_NAME.items():
231
+ normalized_name = re.sub(r"[^a-z0-9]+", " ", name.lower())
232
+ if normalized_name in lowered:
233
+ return thought, candidate_id
234
+
235
+ return thought, None
236
+
237
+
238
+ def _parse_action_id(value):
239
+ if isinstance(value, bool):
240
+ return None
241
+ if isinstance(value, int):
242
+ return value if 0 <= value <= 14 else None
243
+ match = re.search(r"-?\d{1,2}", str(value))
244
+ if not match:
245
+ return None
246
+ action_id = int(match.group(0))
247
+ return action_id if 0 <= action_id <= 14 else None
248
+
249
+
250
+ def build_prediction(response):
251
+ thought, action_id = parse_model_response(response)
252
+ if action_id is None:
253
+ return {
254
+ "Thought": thought,
255
+ "RawResponse": "" if response is None else str(response),
256
+ "ParseError": "unrecognized_action_id",
257
+ }
258
+ return {
259
+ "Thought": thought,
260
+ "ActionID": action_id,
261
+ "Action": ACTION_ID_TO_NAME[action_id],
262
+ "Label": 0 if action_id == 0 else 1,
263
+ "RawResponse": "" if response is None else str(response),
264
+ }
265
+
266
+
267
+ def compute_metrics(data, predictions, split):
268
+ items = dict(valid_split_items(data, split))
269
+ y_true = []
270
+ y_pred = []
271
+ valid_keys = []
272
+ for idx, item in items.items():
273
+ pred = predictions.get(idx)
274
+ if not isinstance(pred, dict):
275
+ continue
276
+ pred_id = _parse_action_id(pred.get("ActionID"))
277
+ true_id = _parse_action_id(item.get("ActionID"))
278
+ if pred_id is None or true_id is None:
279
+ continue
280
+ y_true.append(true_id)
281
+ y_pred.append(pred_id)
282
+ valid_keys.append(idx)
283
+
284
+ metrics = {
285
+ "split": split,
286
+ "num_dataset_samples": len(items),
287
+ "num_prediction_samples": len(predictions),
288
+ "num_valid_samples": len(valid_keys),
289
+ "num_missing_predictions": len(items) - len(set(items) & set(predictions)),
290
+ "num_invalid_predictions": len(set(items) & set(predictions)) - len(valid_keys),
291
+ }
292
+ if not y_true:
293
+ return metrics
294
+
295
+ try:
296
+ from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
297
+
298
+ y_true_binary = [0 if x == 0 else 1 for x in y_true]
299
+ y_pred_binary = [0 if x == 0 else 1 for x in y_pred]
300
+ metrics.update(
301
+ {
302
+ "type_accuracy": float(accuracy_score(y_true, y_pred)),
303
+ "type_precision_macro": float(
304
+ precision_score(y_true, y_pred, average="macro", zero_division=0)
305
+ ),
306
+ "type_recall_macro": float(
307
+ recall_score(y_true, y_pred, average="macro", zero_division=0)
308
+ ),
309
+ "type_f1_macro": float(
310
+ f1_score(y_true, y_pred, average="macro", zero_division=0)
311
+ ),
312
+ "binary_accuracy": float(accuracy_score(y_true_binary, y_pred_binary)),
313
+ "binary_precision_macro": float(
314
+ precision_score(
315
+ y_true_binary, y_pred_binary, average="macro", zero_division=0
316
+ )
317
+ ),
318
+ "binary_recall_macro": float(
319
+ recall_score(
320
+ y_true_binary, y_pred_binary, average="macro", zero_division=0
321
+ )
322
+ ),
323
+ "binary_f1_macro": float(
324
+ f1_score(y_true_binary, y_pred_binary, average="macro", zero_division=0)
325
+ ),
326
+ }
327
+ )
328
+ except Exception:
329
+ correct = sum(int(a == b) for a, b in zip(y_true, y_pred))
330
+ metrics["type_accuracy"] = correct / len(y_true)
331
+ true_binary = [0 if x == 0 else 1 for x in y_true]
332
+ pred_binary = [0 if x == 0 else 1 for x in y_pred]
333
+ metrics["binary_accuracy"] = sum(
334
+ int(a == b) for a, b in zip(true_binary, pred_binary)
335
+ ) / len(y_true)
336
+ metrics["metric_warning"] = "sklearn unavailable; only accuracy was computed."
337
+
338
+ return metrics
339
+
340
+
341
+ def atomic_write_json(obj, path):
342
+ path = Path(path)
343
+ path.parent.mkdir(parents=True, exist_ok=True)
344
+ tmp = path.with_suffix(path.suffix + f".tmp.{os.getpid()}")
345
+ with open(tmp, "w", encoding="utf-8") as f:
346
+ json.dump(obj, f, indent=4, ensure_ascii=False)
347
+ os.replace(tmp, path)
rats40k_adapter/run_sft_4gpu.sh ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -Eeuo pipefail
3
+
4
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
+ PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
6
+ cd "$PROJECT_DIR"
7
+
8
+ RUN_ID="${RUN_ID:-$(date +%Y%m%d_%H%M%S)}"
9
+ PYTHON_BIN="${PYTHON_BIN:-/dev/shm/suiqk/conda_envs/scalerag-ts-v4/bin/python}"
10
+ ACCELERATE_CONFIG="${ACCELERATE_CONFIG:-/mnt/share01/sqk/ITFormer/accelerate_config.yaml}"
11
+ CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2,3}"
12
+ MODEL_PATH="${MODEL_PATH:-/mnt/share01/sqk/models/ChatTime-1-7B-Chat}"
13
+ ALLOW_HF_DOWNLOAD="${ALLOW_HF_DOWNLOAD:-0}"
14
+ DATA_PATH="${DATA_PATH:-/mnt/share01/sqk/datasets/RATs40K/RATs-Uni-TSImage_Reason.json}"
15
+ TRAIN_SPLIT="${TRAIN_SPLIT:-TSAD_train}"
16
+ EVAL_SPLIT="${EVAL_SPLIT:-TSAD_test}"
17
+ OUTPUT_ROOT="${OUTPUT_ROOT:-${PROJECT_DIR}/rats40k_adapter/outputs/sft_${RUN_ID}}"
18
+ ADAPTER_OUTPUT_DIR="${ADAPTER_OUTPUT_DIR:-${OUTPUT_ROOT}/adapter}"
19
+ EVAL_OUTPUT_DIR="${EVAL_OUTPUT_DIR:-${OUTPUT_ROOT}/eval}"
20
+ RUN_EVAL_AFTER_SFT="${RUN_EVAL_AFTER_SFT:-1}"
21
+ RESULT_NAME="${RESULT_NAME:-RATs-Uni-TSImage_Reason_Reason_by_chattime_sft.json}"
22
+ MAX_TRAIN_SAMPLES="${MAX_TRAIN_SAMPLES:-}"
23
+ MAX_EVAL_SAMPLES="${MAX_EVAL_SAMPLES:-}"
24
+ EVAL_BATCH_SIZE="${EVAL_BATCH_SIZE:-4}"
25
+ MAX_SEQ_LENGTH="${MAX_SEQ_LENGTH:-4096}"
26
+ PER_DEVICE_TRAIN_BATCH_SIZE="${PER_DEVICE_TRAIN_BATCH_SIZE:-1}"
27
+ GRADIENT_ACCUMULATION_STEPS="${GRADIENT_ACCUMULATION_STEPS:-16}"
28
+ NUM_TRAIN_EPOCHS="${NUM_TRAIN_EPOCHS:-2}"
29
+ LEARNING_RATE="${LEARNING_RATE:-2e-4}"
30
+ LORA_RANK="${LORA_RANK:-16}"
31
+ LORA_ALPHA="${LORA_ALPHA:-32}"
32
+ LORA_DROPOUT="${LORA_DROPOUT:-0.05}"
33
+ SAVE_STEPS="${SAVE_STEPS:-200}"
34
+ LOGGING_STEPS="${LOGGING_STEPS:-10}"
35
+ SAVE_TOTAL_LIMIT="${SAVE_TOTAL_LIMIT:-2}"
36
+ DATALOADER_NUM_WORKERS="${DATALOADER_NUM_WORKERS:-4}"
37
+ LOAD_IN_4BIT="${LOAD_IN_4BIT:-0}"
38
+ GRADIENT_CHECKPOINTING="${GRADIENT_CHECKPOINTING:-1}"
39
+ TORCH_DTYPE="${TORCH_DTYPE:-fp16}"
40
+ MAX_NEW_TOKENS="${MAX_NEW_TOKENS:-160}"
41
+ MAX_INPUT_TOKENS="${MAX_INPUT_TOKENS:-3936}"
42
+ LOG_DIR="${LOG_DIR:-${PROJECT_DIR}/rats40k_adapter/logs}"
43
+ LOG_FILE="${LOG_FILE:-${LOG_DIR}/sft_4gpu_${RUN_ID}.log}"
44
+
45
+ mkdir -p "$LOG_DIR" "$OUTPUT_ROOT"
46
+
47
+ fail() {
48
+ echo "$*" >&2
49
+ exit 1
50
+ }
51
+
52
+ exec > >(tee -a "$LOG_FILE") 2>&1
53
+
54
+ export CUDA_VISIBLE_DEVICES
55
+ export PYTHONPATH="${PROJECT_DIR}:${PYTHONPATH:-}"
56
+ export TOKENIZERS_PARALLELISM=false
57
+ export PYTHONWARNINGS="ignore::FutureWarning:transformers.utils.hub"
58
+ export WANDB_MODE=offline
59
+
60
+ [ -x "$PYTHON_BIN" ] || fail "Python executable not found: $PYTHON_BIN. Set PYTHON_BIN=/path/to/bin/python."
61
+ [ -f "$ACCELERATE_CONFIG" ] || fail "Accelerate config not found: $ACCELERATE_CONFIG"
62
+ [ -f "$DATA_PATH" ] || fail "RATs40K data file not found: $DATA_PATH"
63
+ [ -n "$MODEL_PATH" ] || fail "MODEL_PATH is required. Use a local ChatTime model path, or set ALLOW_HF_DOWNLOAD=1 with a HuggingFace model id."
64
+
65
+ if [ ! -d "$MODEL_PATH" ] && [ "$ALLOW_HF_DOWNLOAD" != "1" ]; then
66
+ fail "MODEL_PATH is not a local directory: $MODEL_PATH. Set ALLOW_HF_DOWNLOAD=1 if you intentionally want HuggingFace downloads."
67
+ fi
68
+
69
+ "$PYTHON_BIN" -c "import accelerate; print('accelerate:', accelerate.__version__)" || \
70
+ fail "The selected Python cannot import accelerate: $PYTHON_BIN"
71
+ ACCELERATE_CMD=("$PYTHON_BIN" -m accelerate.commands.accelerate_cli)
72
+
73
+ if [ "$LOAD_IN_4BIT" = "1" ]; then
74
+ "$PYTHON_BIN" -c "import importlib.metadata as m; print('bitsandbytes:', m.version('bitsandbytes'))" || \
75
+ fail "LOAD_IN_4BIT=1 requires bitsandbytes in $PYTHON_BIN. Install it with: $PYTHON_BIN -m pip install bitsandbytes. To run without downloading it, set LOAD_IN_4BIT=0 PER_DEVICE_TRAIN_BATCH_SIZE=1 GRADIENT_ACCUMULATION_STEPS=16."
76
+ fi
77
+
78
+ TRAIN_EXTRA_ARGS=()
79
+ if [ -n "$MAX_TRAIN_SAMPLES" ]; then
80
+ TRAIN_EXTRA_ARGS+=(--max_train_samples "$MAX_TRAIN_SAMPLES")
81
+ fi
82
+ if [ "$ALLOW_HF_DOWNLOAD" = "1" ]; then
83
+ TRAIN_EXTRA_ARGS+=(--allow_hf_download)
84
+ fi
85
+ if [ "$LOAD_IN_4BIT" = "1" ]; then
86
+ TRAIN_EXTRA_ARGS+=(--load_in_4bit)
87
+ fi
88
+ if [ "$GRADIENT_CHECKPOINTING" = "1" ]; then
89
+ TRAIN_EXTRA_ARGS+=(--gradient_checkpointing)
90
+ fi
91
+
92
+ echo "Run id: $RUN_ID"
93
+ echo "Python: $PYTHON_BIN"
94
+ echo "Accelerate: ${ACCELERATE_CMD[*]}"
95
+ echo "CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES"
96
+ echo "Model path: $MODEL_PATH"
97
+ echo "Data path: $DATA_PATH"
98
+ echo "Adapter output dir: $ADAPTER_OUTPUT_DIR"
99
+ echo "Eval output dir: $EVAL_OUTPUT_DIR"
100
+ echo "Log file: $LOG_FILE"
101
+
102
+ "${ACCELERATE_CMD[@]}" launch --config_file "$ACCELERATE_CONFIG" \
103
+ rats40k_adapter/finetune_rats40k_lora.py \
104
+ --model_path "$MODEL_PATH" \
105
+ --data_path "$DATA_PATH" \
106
+ --train_split "$TRAIN_SPLIT" \
107
+ --output_dir "$ADAPTER_OUTPUT_DIR" \
108
+ --max_seq_length "$MAX_SEQ_LENGTH" \
109
+ --per_device_train_batch_size "$PER_DEVICE_TRAIN_BATCH_SIZE" \
110
+ --gradient_accumulation_steps "$GRADIENT_ACCUMULATION_STEPS" \
111
+ --num_train_epochs "$NUM_TRAIN_EPOCHS" \
112
+ --learning_rate "$LEARNING_RATE" \
113
+ --lora_rank "$LORA_RANK" \
114
+ --lora_alpha "$LORA_ALPHA" \
115
+ --lora_dropout "$LORA_DROPOUT" \
116
+ --save_steps "$SAVE_STEPS" \
117
+ --logging_steps "$LOGGING_STEPS" \
118
+ --save_total_limit "$SAVE_TOTAL_LIMIT" \
119
+ --dataloader_num_workers "$DATALOADER_NUM_WORKERS" \
120
+ --torch_dtype "$TORCH_DTYPE" \
121
+ "${TRAIN_EXTRA_ARGS[@]}"
122
+
123
+ if [ "$RUN_EVAL_AFTER_SFT" = "1" ]; then
124
+ EVAL_EXTRA_ARGS=()
125
+ if [ -n "$MAX_EVAL_SAMPLES" ]; then
126
+ EVAL_EXTRA_ARGS+=(--max_eval_samples "$MAX_EVAL_SAMPLES")
127
+ fi
128
+ if [ "$ALLOW_HF_DOWNLOAD" = "1" ]; then
129
+ EVAL_EXTRA_ARGS+=(--allow_hf_download)
130
+ fi
131
+
132
+ "${ACCELERATE_CMD[@]}" launch --config_file "$ACCELERATE_CONFIG" \
133
+ rats40k_adapter/eval_rats40k.py \
134
+ --model_path "$MODEL_PATH" \
135
+ --adapter_path "$ADAPTER_OUTPUT_DIR" \
136
+ --data_path "$DATA_PATH" \
137
+ --split "$EVAL_SPLIT" \
138
+ --output_dir "$EVAL_OUTPUT_DIR" \
139
+ --result_name "$RESULT_NAME" \
140
+ --eval_batch_size "$EVAL_BATCH_SIZE" \
141
+ --max_new_tokens "$MAX_NEW_TOKENS" \
142
+ --max_input_tokens "$MAX_INPUT_TOKENS" \
143
+ --torch_dtype "$TORCH_DTYPE" \
144
+ "${EVAL_EXTRA_ARGS[@]}"
145
+ fi
rats40k_adapter/run_zeroshot_4gpu.sh ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -Eeuo pipefail
3
+
4
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
+ PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
6
+ cd "$PROJECT_DIR"
7
+
8
+ RUN_ID="${RUN_ID:-$(date +%Y%m%d_%H%M%S)}"
9
+ PYTHON_BIN="${PYTHON_BIN:-/dev/shm/suiqk/conda_envs/scalerag-ts-v4/bin/python}"
10
+ ACCELERATE_CONFIG="${ACCELERATE_CONFIG:-/mnt/share01/sqk/ITFormer/accelerate_config.yaml}"
11
+ CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2,3}"
12
+ MODEL_PATH="${MODEL_PATH:-/mnt/share01/sqk/models/ChatTime-1-7B-Chat}"
13
+ ALLOW_HF_DOWNLOAD="${ALLOW_HF_DOWNLOAD:-0}"
14
+ DATA_PATH="${DATA_PATH:-/mnt/share01/sqk/datasets/RATs40K/RATs-Uni-TSImage_Reason.json}"
15
+ SPLIT="${SPLIT:-TSAD_test}"
16
+ OUTPUT_DIR="${OUTPUT_DIR:-${PROJECT_DIR}/rats40k_adapter/outputs/zeroshot_${RUN_ID}}"
17
+ RESULT_NAME="${RESULT_NAME:-RATs-Uni-TSImage_Reason_Reason_by_chattime_zeroshot.json}"
18
+ MAX_EVAL_SAMPLES="${MAX_EVAL_SAMPLES:-}"
19
+ EVAL_BATCH_SIZE="${EVAL_BATCH_SIZE:-4}"
20
+ MAX_NEW_TOKENS="${MAX_NEW_TOKENS:-160}"
21
+ MAX_INPUT_TOKENS="${MAX_INPUT_TOKENS:-3936}"
22
+ TORCH_DTYPE="${TORCH_DTYPE:-fp16}"
23
+ LOG_DIR="${LOG_DIR:-${PROJECT_DIR}/rats40k_adapter/logs}"
24
+ LOG_FILE="${LOG_FILE:-${LOG_DIR}/zeroshot_4gpu_${RUN_ID}.log}"
25
+
26
+ mkdir -p "$LOG_DIR" "$OUTPUT_DIR"
27
+
28
+ fail() {
29
+ echo "$*" >&2
30
+ exit 1
31
+ }
32
+
33
+ exec > >(tee -a "$LOG_FILE") 2>&1
34
+
35
+ export CUDA_VISIBLE_DEVICES
36
+ export PYTHONPATH="${PROJECT_DIR}:${PYTHONPATH:-}"
37
+ export TOKENIZERS_PARALLELISM=false
38
+ export PYTHONWARNINGS="ignore::FutureWarning:transformers.utils.hub"
39
+
40
+ [ -x "$PYTHON_BIN" ] || fail "Python executable not found: $PYTHON_BIN. Set PYTHON_BIN=/path/to/bin/python."
41
+ [ -f "$ACCELERATE_CONFIG" ] || fail "Accelerate config not found: $ACCELERATE_CONFIG"
42
+ [ -f "$DATA_PATH" ] || fail "RATs40K data file not found: $DATA_PATH"
43
+ [ -n "$MODEL_PATH" ] || fail "MODEL_PATH is required. Use a local ChatTime model path, or set ALLOW_HF_DOWNLOAD=1 with a HuggingFace model id."
44
+
45
+ if [ ! -d "$MODEL_PATH" ] && [ "$ALLOW_HF_DOWNLOAD" != "1" ]; then
46
+ fail "MODEL_PATH is not a local directory: $MODEL_PATH. Set ALLOW_HF_DOWNLOAD=1 if you intentionally want HuggingFace downloads."
47
+ fi
48
+
49
+ "$PYTHON_BIN" -c "import accelerate; print('accelerate:', accelerate.__version__)" || \
50
+ fail "The selected Python cannot import accelerate: $PYTHON_BIN"
51
+ ACCELERATE_CMD=("$PYTHON_BIN" -m accelerate.commands.accelerate_cli)
52
+
53
+ EXTRA_ARGS=()
54
+ if [ -n "$MAX_EVAL_SAMPLES" ]; then
55
+ EXTRA_ARGS+=(--max_eval_samples "$MAX_EVAL_SAMPLES")
56
+ fi
57
+ if [ "$ALLOW_HF_DOWNLOAD" = "1" ]; then
58
+ EXTRA_ARGS+=(--allow_hf_download)
59
+ fi
60
+
61
+ echo "Run id: $RUN_ID"
62
+ echo "Python: $PYTHON_BIN"
63
+ echo "Accelerate: ${ACCELERATE_CMD[*]}"
64
+ echo "CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES"
65
+ echo "Model path: $MODEL_PATH"
66
+ echo "Data path: $DATA_PATH"
67
+ echo "Output dir: $OUTPUT_DIR"
68
+ echo "Log file: $LOG_FILE"
69
+
70
+ "${ACCELERATE_CMD[@]}" launch --config_file "$ACCELERATE_CONFIG" \
71
+ rats40k_adapter/eval_rats40k.py \
72
+ --model_path "$MODEL_PATH" \
73
+ --data_path "$DATA_PATH" \
74
+ --split "$SPLIT" \
75
+ --output_dir "$OUTPUT_DIR" \
76
+ --result_name "$RESULT_NAME" \
77
+ --eval_batch_size "$EVAL_BATCH_SIZE" \
78
+ --max_new_tokens "$MAX_NEW_TOKENS" \
79
+ --max_input_tokens "$MAX_INPUT_TOKENS" \
80
+ --torch_dtype "$TORCH_DTYPE" \
81
+ "${EXTRA_ARGS[@]}"
rats40k_adapter/run_zeroshot_then_sft_4gpu.sh ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -Eeuo pipefail
3
+
4
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
+ PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
6
+ cd "$PROJECT_DIR"
7
+
8
+ RUN_ID="${RUN_ID:-$(date +%Y%m%d_%H%M%S)}"
9
+ PYTHON_BIN="${PYTHON_BIN:-/dev/shm/suiqk/conda_envs/scalerag-ts-v4/bin/python}"
10
+ ACCELERATE_CONFIG="${ACCELERATE_CONFIG:-/mnt/share01/sqk/ITFormer/accelerate_config.yaml}"
11
+ CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2,3}"
12
+ MODEL_PATH="${MODEL_PATH:-/mnt/share01/sqk/models/ChatTime-1-7B-Chat}"
13
+ DATA_PATH="${DATA_PATH:-/mnt/share01/sqk/datasets/RATs40K/RATs-Uni-TSImage_Reason.json}"
14
+ ALLOW_HF_DOWNLOAD="${ALLOW_HF_DOWNLOAD:-0}"
15
+ LOAD_IN_4BIT="${LOAD_IN_4BIT:-0}"
16
+ PER_DEVICE_TRAIN_BATCH_SIZE="${PER_DEVICE_TRAIN_BATCH_SIZE:-1}"
17
+ GRADIENT_ACCUMULATION_STEPS="${GRADIENT_ACCUMULATION_STEPS:-16}"
18
+
19
+ OUTPUT_BASE="${OUTPUT_BASE:-${PROJECT_DIR}/rats40k_adapter/outputs/pipeline_${RUN_ID}}"
20
+ LOG_DIR="${LOG_DIR:-${PROJECT_DIR}/rats40k_adapter/logs}"
21
+
22
+ ZERO_SHOT_OUTPUT_DIR="${ZERO_SHOT_OUTPUT_DIR:-${OUTPUT_BASE}/zeroshot}"
23
+ SFT_OUTPUT_ROOT="${SFT_OUTPUT_ROOT:-${OUTPUT_BASE}/sft}"
24
+
25
+ ZERO_SHOT_LOG_FILE="${ZERO_SHOT_LOG_FILE:-${LOG_DIR}/pipeline_${RUN_ID}_zeroshot.log}"
26
+ SFT_LOG_FILE="${SFT_LOG_FILE:-${LOG_DIR}/pipeline_${RUN_ID}_sft.log}"
27
+
28
+ mkdir -p "$OUTPUT_BASE" "$LOG_DIR"
29
+
30
+ echo "Pipeline run id: $RUN_ID"
31
+ echo "Project dir: $PROJECT_DIR"
32
+ echo "Python: $PYTHON_BIN"
33
+ echo "CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES"
34
+ echo "Model path: $MODEL_PATH"
35
+ echo "Data path: $DATA_PATH"
36
+ echo "LOAD_IN_4BIT: $LOAD_IN_4BIT"
37
+ echo "Per-device train batch size: $PER_DEVICE_TRAIN_BATCH_SIZE"
38
+ echo "Gradient accumulation steps: $GRADIENT_ACCUMULATION_STEPS"
39
+ echo "Zero-shot output dir: $ZERO_SHOT_OUTPUT_DIR"
40
+ echo "SFT output root: $SFT_OUTPUT_ROOT"
41
+
42
+ echo ""
43
+ echo "========== Stage 1/2: Zero-shot eval =========="
44
+ RUN_ID="$RUN_ID" \
45
+ PYTHON_BIN="$PYTHON_BIN" \
46
+ ACCELERATE_CONFIG="$ACCELERATE_CONFIG" \
47
+ CUDA_VISIBLE_DEVICES="$CUDA_VISIBLE_DEVICES" \
48
+ MODEL_PATH="$MODEL_PATH" \
49
+ DATA_PATH="$DATA_PATH" \
50
+ ALLOW_HF_DOWNLOAD="$ALLOW_HF_DOWNLOAD" \
51
+ OUTPUT_DIR="$ZERO_SHOT_OUTPUT_DIR" \
52
+ LOG_FILE="$ZERO_SHOT_LOG_FILE" \
53
+ bash rats40k_adapter/run_zeroshot_4gpu.sh
54
+
55
+ echo ""
56
+ echo "========== Stage 2/2: SFT + eval =========="
57
+ RUN_ID="$RUN_ID" \
58
+ PYTHON_BIN="$PYTHON_BIN" \
59
+ ACCELERATE_CONFIG="$ACCELERATE_CONFIG" \
60
+ CUDA_VISIBLE_DEVICES="$CUDA_VISIBLE_DEVICES" \
61
+ MODEL_PATH="$MODEL_PATH" \
62
+ DATA_PATH="$DATA_PATH" \
63
+ ALLOW_HF_DOWNLOAD="$ALLOW_HF_DOWNLOAD" \
64
+ LOAD_IN_4BIT="$LOAD_IN_4BIT" \
65
+ PER_DEVICE_TRAIN_BATCH_SIZE="$PER_DEVICE_TRAIN_BATCH_SIZE" \
66
+ GRADIENT_ACCUMULATION_STEPS="$GRADIENT_ACCUMULATION_STEPS" \
67
+ OUTPUT_ROOT="$SFT_OUTPUT_ROOT" \
68
+ LOG_FILE="$SFT_LOG_FILE" \
69
+ RUN_EVAL_AFTER_SFT="${RUN_EVAL_AFTER_SFT:-1}" \
70
+ bash rats40k_adapter/run_sft_4gpu.sh
71
+
72
+ echo ""
73
+ echo "Pipeline finished."
74
+ echo "Zero-shot outputs: $ZERO_SHOT_OUTPUT_DIR"
75
+ echo "SFT outputs: $SFT_OUTPUT_ROOT"
training/finetune.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import sys
3
+
4
+ import torch
5
+ from datasets import load_dataset
6
+ from transformers import TrainingArguments, LlamaTokenizer
7
+ from trl import SFTTrainer
8
+ from unsloth import FastLanguageModel, is_bfloat16_supported
9
+
10
+ if __name__ == "__main__":
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("--code_path", type=str, required=True, default=None)
13
+ parser.add_argument("--model_path", type=str, required=True, default=None)
14
+ parser.add_argument("--dataset_path", type=str, required=True, default=None)
15
+ parser.add_argument("--log_path", type=str, required=True, default=None)
16
+ parser.add_argument("--output_path", type=str, required=True, default=None)
17
+
18
+ parser.add_argument("--max_seq_length", type=int, default=2048)
19
+ parser.add_argument("--load_in_4bit", action="store_true", default=False)
20
+
21
+ parser.add_argument("--lora_rank", type=int, default=16)
22
+ parser.add_argument("--lora_alpha", type=int, default=16)
23
+ parser.add_argument("--lora_dropout", type=float, default=0.00)
24
+ parser.add_argument("--random_seed", type=int, default=3407)
25
+
26
+ parser.add_argument("--num_train_epochs", type=int, default=1)
27
+ parser.add_argument("--per_device_train_batch_size", type=int, default=64)
28
+ parser.add_argument("--gradient_accumulation_steps", type=int, default=2)
29
+ parser.add_argument("--save_steps", type=int, default=2)
30
+ parser.add_argument("--logging_steps", type=int, default=2)
31
+ parser.add_argument("--max_steps", type=int, default=-1)
32
+
33
+ args = parser.parse_args()
34
+
35
+ sys.path.append(args.code_path)
36
+
37
+ # load tokenizer
38
+ tokenizer = LlamaTokenizer.from_pretrained(args.model_path, trust_remote_code=True)
39
+ tokenizer.pad_token = tokenizer.eos_token
40
+ tokenizer.padding_side = "right"
41
+ print(f"\nVocabulary number: {len(tokenizer.get_vocab())}\n")
42
+
43
+ EOS_TOKEN = tokenizer.eos_token
44
+
45
+ # load model
46
+ model, _ = FastLanguageModel.from_pretrained(
47
+ model_name=args.model_path,
48
+ max_seq_length=args.max_seq_length,
49
+ dtype=None,
50
+ load_in_4bit=args.load_in_4bit,
51
+ )
52
+
53
+ # add lora to llama model
54
+ model = FastLanguageModel.get_peft_model(
55
+ model,
56
+ r=args.lora_rank,
57
+ lora_alpha=args.lora_alpha,
58
+ lora_dropout=args.lora_dropout,
59
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ],
60
+ # modules_to_save=["embed_tokens", "lm_head", ],
61
+ bias="none",
62
+ use_gradient_checkpointing="unsloth",
63
+ random_state=args.random_seed,
64
+ max_seq_length=args.max_seq_length,
65
+ )
66
+
67
+
68
+ # load dataset
69
+ def formatting_func(example):
70
+ return example["text"] + EOS_TOKEN
71
+
72
+
73
+ print(f"\nLoading dataset in {args.dataset_path}")
74
+ dataset = load_dataset(args.dataset_path, split="train")
75
+ print(f"Dataset example: \n{dataset[0]['text']}\n")
76
+
77
+ # train model
78
+ trainer = SFTTrainer(
79
+ model=model,
80
+ tokenizer=tokenizer,
81
+ train_dataset=dataset,
82
+ dataset_text_field="text",
83
+ max_seq_length=args.max_seq_length,
84
+ dataset_num_proc=64,
85
+ packing=False,
86
+ formatting_func=formatting_func,
87
+ args=TrainingArguments(
88
+ per_device_train_batch_size=args.per_device_train_batch_size,
89
+ gradient_accumulation_steps=args.gradient_accumulation_steps,
90
+ num_train_epochs=args.num_train_epochs,
91
+ weight_decay=0.01,
92
+ warmup_ratio=0.05,
93
+ max_grad_norm=1.0,
94
+ learning_rate=2e-4,
95
+ logging_strategy="steps",
96
+ logging_steps=args.logging_steps,
97
+ save_strategy="steps",
98
+ save_steps=args.save_steps,
99
+ max_steps=args.max_steps,
100
+ save_total_limit=1,
101
+ logging_first_step=True,
102
+ optim="adamw_8bit",
103
+ lr_scheduler_type="cosine",
104
+ seed=args.random_seed,
105
+ output_dir=args.log_path,
106
+ fp16=not is_bfloat16_supported(),
107
+ bf16=is_bfloat16_supported(),
108
+ ),
109
+ )
110
+
111
+ # title Show current memory stats
112
+ gpu_stats = torch.cuda.get_device_properties(0)
113
+ start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
114
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
115
+ print(f"\nGPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
116
+ print(f"{start_gpu_memory} GB of memory reserved.\n")
117
+
118
+ trainer_stats = trainer.train()
119
+
120
+ # title Show final memory and time stats
121
+ used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
122
+ used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
123
+ used_percentage = round(used_memory / max_memory * 100, 3)
124
+ lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
125
+ print(f"\n{trainer_stats.metrics['train_runtime']} seconds used for training.")
126
+ print(f"{round(trainer_stats.metrics['train_runtime'] / 60, 2)} minutes used for training.")
127
+ print(f"Peak reserved memory = {used_memory} GB.")
128
+ print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
129
+ print(f"Peak reserved memory % of max memory = {used_percentage} %.")
130
+ print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.\n")
131
+
132
+ # save model and tokenizer
133
+ model.save_pretrained_merged(args.output_path, tokenizer)
training/finetune.sh ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA_PATH=""
2
+ CODE_PATH=""
3
+ MODEL_PATH=""
4
+
5
+ code_path=$CODE_PATH
6
+ model_path=$MODEL_PATH/ChatTime-1-7B-Base/
7
+ dataset_path=$DATA_PATH/ChatTime-1-Finetune-100K/
8
+ log_path=$MODEL_PATH/log_finetune/
9
+ output_path=$MODEL_PATH/ChatTime-1-7B-Chat/
10
+
11
+ lora_rank=8
12
+ lora_alpha=16
13
+ lora_dropout=0.00
14
+
15
+ num_train_epochs=4
16
+ per_device_train_batch_size=8
17
+ gradient_accumulation_steps=32
18
+ save_steps=40
19
+ logging_steps=4
20
+ max_steps=-1
21
+
22
+ python "$code_path/training/source/finetune.py" \
23
+ --code_path "$code_path" \
24
+ --model_path "$model_path" \
25
+ --dataset_path "$dataset_path" \
26
+ --log_path "$log_path" \
27
+ --output_path "$output_path" \
28
+ --lora_rank $lora_rank \
29
+ --lora_alpha $lora_alpha \
30
+ --lora_dropout $lora_dropout \
31
+ --num_train_epochs $num_train_epochs \
32
+ --per_device_train_batch_size $per_device_train_batch_size \
33
+ --gradient_accumulation_steps $gradient_accumulation_steps \
34
+ --save_steps $save_steps \
35
+ --logging_steps $logging_steps \
36
+ --max_steps $max_steps \
37
+ --load_in_4bit
training/pretrain.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import sys
3
+
4
+ import numpy as np
5
+ import torch
6
+ from datasets import load_dataset
7
+ from transformers import TrainingArguments, LlamaTokenizer
8
+ from trl import SFTTrainer
9
+ from unsloth import FastLanguageModel, is_bfloat16_supported
10
+
11
+ if __name__ == "__main__":
12
+ parser = argparse.ArgumentParser()
13
+ parser.add_argument("--code_path", type=str, required=True, default=None)
14
+ parser.add_argument("--model_path", type=str, required=True, default=None)
15
+ parser.add_argument("--dataset_path", type=str, required=True, default=None)
16
+ parser.add_argument("--log_path", type=str, required=True, default=None)
17
+ parser.add_argument("--output_path", type=str, required=True, default=None)
18
+
19
+ parser.add_argument("--max_seq_length", type=int, default=2048)
20
+ parser.add_argument("--load_in_4bit", action="store_true", default=False)
21
+
22
+ parser.add_argument("--lora_rank", type=int, default=16)
23
+ parser.add_argument("--lora_alpha", type=int, default=16)
24
+ parser.add_argument("--lora_dropout", type=float, default=0.00)
25
+ parser.add_argument("--random_seed", type=int, default=3407)
26
+
27
+ parser.add_argument("--num_train_epochs", type=int, default=1)
28
+ parser.add_argument("--per_device_train_batch_size", type=int, default=64)
29
+ parser.add_argument("--gradient_accumulation_steps", type=int, default=2)
30
+ parser.add_argument("--save_steps", type=int, default=2)
31
+ parser.add_argument("--logging_steps", type=int, default=2)
32
+ parser.add_argument("--max_steps", type=int, default=-1)
33
+
34
+ parser.add_argument("--low_limit", type=float, default=-1)
35
+ parser.add_argument("--high_limit", type=float, default=1)
36
+ parser.add_argument("--n_tokens", type=int, default=10002)
37
+ parser.add_argument("--prec", type=int, default=4)
38
+ parser.add_argument("--time_sep", type=str, default=" ")
39
+ parser.add_argument("--time_flag", type=str, default="###")
40
+ parser.add_argument("--nan_flag", type=str, default="Nan")
41
+
42
+ args = parser.parse_args()
43
+
44
+ sys.path.append(args.code_path)
45
+ from utils.tools import Discretizer, Serializer
46
+
47
+ # construct vocabulary
48
+ discretizer = Discretizer(low_limit=args.low_limit, high_limit=args.high_limit, n_tokens=args.n_tokens)
49
+ serializer = Serializer(prec=args.prec, time_sep=args.time_sep, time_flag=args.time_flag, nan_flag=args.nan_flag)
50
+
51
+ vocabulary = np.concatenate((discretizer.centers[1:-1], [np.NaN])).reshape(-1, 1)
52
+ vocabulary = np.array([serializer.serialize(i) for i in vocabulary])
53
+ print(f"\nVocabulary: \n{vocabulary}\n")
54
+
55
+ # add token to llama tokenizer
56
+ tokenizer = LlamaTokenizer.from_pretrained(args.model_path, trust_remote_code=True)
57
+ tokenizer.pad_token = tokenizer.eos_token
58
+ tokenizer.padding_side = "right"
59
+ print(f"Old model pieces: {len(tokenizer.get_vocab())}")
60
+ tokenizer.add_tokens(vocabulary.tolist())
61
+ print(f"New model pieces: {len(tokenizer.get_vocab())}")
62
+
63
+ EOS_TOKEN = tokenizer.eos_token
64
+
65
+ # load model
66
+ model, _ = FastLanguageModel.from_pretrained(
67
+ model_name=args.model_path,
68
+ max_seq_length=args.max_seq_length,
69
+ dtype=None,
70
+ load_in_4bit=args.load_in_4bit,
71
+ resize_model_vocab=len(tokenizer.get_vocab()),
72
+ )
73
+
74
+ # add lora to llama model
75
+ model = FastLanguageModel.get_peft_model(
76
+ model,
77
+ r=args.lora_rank,
78
+ lora_alpha=args.lora_alpha,
79
+ lora_dropout=args.lora_dropout,
80
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ],
81
+ modules_to_save=["embed_tokens", "lm_head", ],
82
+ bias="none",
83
+ use_gradient_checkpointing="unsloth",
84
+ random_state=args.random_seed,
85
+ max_seq_length=args.max_seq_length,
86
+ )
87
+
88
+
89
+ # load dataset
90
+ def formatting_func(example):
91
+ return example["text"] + EOS_TOKEN
92
+
93
+
94
+ print(f"\nLoading dataset in {args.dataset_path}")
95
+ dataset = load_dataset(args.dataset_path, split="train")
96
+ print(f"Dataset example: \n{dataset[0]['text']}\n")
97
+
98
+ # train model
99
+ trainer = SFTTrainer(
100
+ model=model,
101
+ tokenizer=tokenizer,
102
+ train_dataset=dataset,
103
+ dataset_text_field="text",
104
+ max_seq_length=args.max_seq_length,
105
+ dataset_num_proc=64,
106
+ packing=False,
107
+ formatting_func=formatting_func,
108
+ args=TrainingArguments(
109
+ per_device_train_batch_size=args.per_device_train_batch_size,
110
+ gradient_accumulation_steps=args.gradient_accumulation_steps,
111
+ num_train_epochs=args.num_train_epochs,
112
+ weight_decay=0.01,
113
+ warmup_ratio=0.05,
114
+ max_grad_norm=1.0,
115
+ learning_rate=2e-4,
116
+ logging_strategy="steps",
117
+ logging_steps=args.logging_steps,
118
+ save_strategy="steps",
119
+ save_steps=args.save_steps,
120
+ max_steps=args.max_steps,
121
+ save_total_limit=1,
122
+ logging_first_step=True,
123
+ optim="adamw_8bit",
124
+ lr_scheduler_type="cosine",
125
+ seed=args.random_seed,
126
+ output_dir=args.log_path,
127
+ fp16=not is_bfloat16_supported(),
128
+ bf16=is_bfloat16_supported(),
129
+ ),
130
+ )
131
+
132
+ # title Show current memory stats
133
+ gpu_stats = torch.cuda.get_device_properties(0)
134
+ start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
135
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
136
+ print(f"\nGPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
137
+ print(f"{start_gpu_memory} GB of memory reserved.\n")
138
+
139
+ trainer_stats = trainer.train()
140
+
141
+ # title Show final memory and time stats
142
+ used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
143
+ used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
144
+ used_percentage = round(used_memory / max_memory * 100, 3)
145
+ lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
146
+ print(f"\n{trainer_stats.metrics['train_runtime']} seconds used for training.")
147
+ print(f"{round(trainer_stats.metrics['train_runtime'] / 60, 2)} minutes used for training.")
148
+ print(f"Peak reserved memory = {used_memory} GB.")
149
+ print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
150
+ print(f"Peak reserved memory % of max memory = {used_percentage} %.")
151
+ print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.\n")
152
+
153
+ # save model and tokenizer
154
+ model.save_pretrained_merged(args.output_path, tokenizer)
training/pretrain.sh ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA_PATH=""
2
+ CODE_PATH=""
3
+ MODEL_PATH=""
4
+
5
+ code_path=$CODE_PATH
6
+ model_path=meta-llama/Llama-2-7b-hf
7
+ dataset_path=$DATA_PATH/ChatTime-1-Pretrain-1M/
8
+ log_path=$MODEL_PATH/log_pretrain/
9
+ output_path=$MODEL_PATH/ChatTime-1-7B-Base/
10
+
11
+ lora_rank=8
12
+ lora_alpha=16
13
+ lora_dropout=0.00
14
+
15
+ num_train_epochs=2
16
+ per_device_train_batch_size=8
17
+ gradient_accumulation_steps=32
18
+ save_steps=200
19
+ logging_steps=20
20
+ max_steps=-1
21
+
22
+ python "$code_path/training/source/pretrain.py" \
23
+ --code_path "$code_path" \
24
+ --model_path "$model_path" \
25
+ --dataset_path "$dataset_path" \
26
+ --log_path "$log_path" \
27
+ --output_path "$output_path" \
28
+ --lora_rank $lora_rank \
29
+ --lora_alpha $lora_alpha \
30
+ --lora_dropout $lora_dropout \
31
+ --num_train_epochs $num_train_epochs \
32
+ --per_device_train_batch_size $per_device_train_batch_size \
33
+ --gradient_accumulation_steps $gradient_accumulation_steps \
34
+ --save_steps $save_steps \
35
+ --logging_steps $logging_steps \
36
+ --max_steps $max_steps \
37
+ --load_in_4bit
tsqa_adapter/logs/sft_4gpu_20260615_140322.log ADDED
@@ -0,0 +1,875 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/250 [00:00<?, ?it/s]Traceback (most recent call last):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate: 0.34.2
2
+ Run id: 20260615_140322
3
+ Python: /dev/shm/suiqk/conda_envs/scalerag-ts-v4/bin/python
4
+ Accelerate: /dev/shm/suiqk/conda_envs/scalerag-ts-v4/bin/python -m accelerate.commands.accelerate_cli
5
+ CUDA_VISIBLE_DEVICES: 0,1,2,3
6
+ Model path: /mnt/share01/sqk/models/ChatTime-1-7B-Chat
7
+ Data root: /mnt/share01/sqk/datasets/Time-MQA_TSQA/tmp (train=train.jsonl eval=eval.jsonl)
8
+ Adapter output dir: /mnt/share01/sqk/ChatTime/tsqa_adapter/outputs/sft_20260615_140322/adapter
9
+ Eval output dir: /mnt/share01/sqk/ChatTime/tsqa_adapter/outputs/sft_20260615_140322/eval
10
+ Log file: /mnt/share01/sqk/ChatTime/tsqa_adapter/logs/sft_4gpu_20260615_140322.log
11
+ βš™οΈ Running in WANDB offline modeβš™οΈ Running in WANDB offline mode
12
+
13
+ βš™οΈ Running in WANDB offline mode
14
+ βš™οΈ Running in WANDB offline mode
15
+ Applied accelerate compatibility patch: Accelerator.unwrap_model accepts keep_torch_compile.
16
+ Applied accelerate compatibility patch: Accelerator.unwrap_model accepts keep_torch_compile.
17
+ Applied accelerate compatibility patch: Accelerator.unwrap_model accepts keep_torch_compile.
18
+ Applied accelerate compatibility patch: Accelerator.unwrap_model accepts keep_torch_compile.
19
+ SFT token length check: input_rows=8000, kept_rows=7987, skipped_overlong=13, left_truncated_rows=0, max_prompt_len=3818, max_total_len=3842, max_seq_length=4096, skip_overlong=True
20
+ SFT token length check: input_rows=8000, kept_rows=7987, skipped_overlong=13, left_truncated_rows=0, max_prompt_len=3818, max_total_len=3842, max_seq_length=4096, skip_overlong=True
21
+ SFT token length check: input_rows=8000, kept_rows=7987, skipped_overlong=13, left_truncated_rows=0, max_prompt_len=3818, max_total_len=3842, max_seq_length=4096, skip_overlong=True
22
+ SFT token length check: input_rows=8000, kept_rows=7987, skipped_overlong=13, left_truncated_rows=0, max_prompt_len=3818, max_total_len=3842, max_seq_length=4096, skip_overlong=True
23
+
24
+
25
+
26
+
27
+ trainable params: 39,976,960 || all params: 6,860,320,768 || trainable%: 0.5827
28
+ trainable params: 39,976,960 || all params: 6,860,320,768 || trainable%: 0.5827
29
+ /dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/accelerate/accelerator.py:494: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
30
+ self.scaler = torch.cuda.amp.GradScaler(**kwargs)
31
+ /dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/accelerate/accelerator.py:494: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
32
+ self.scaler = torch.cuda.amp.GradScaler(**kwargs)
33
+ No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
34
+ No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
35
+ trainable params: 39,976,960 || all params: 6,860,320,768 || trainable%: 0.5827
36
+ /dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/accelerate/accelerator.py:494: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
37
+ self.scaler = torch.cuda.amp.GradScaler(**kwargs)
38
+ No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
39
+ trainable params: 39,976,960 || all params: 6,860,320,768 || trainable%: 0.5827
40
+ /dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/accelerate/accelerator.py:494: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
41
+ self.scaler = torch.cuda.amp.GradScaler(**kwargs)
42
+ Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
43
+ No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
44
+
45
  0%| | 0/250 [00:00<?, ?it/s]Traceback (most recent call last):
46
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
47
+ obj = _ForkingPickler.dumps(obj)
48
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
49
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
50
+ cls(buf, protocol).dump(obj)
51
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
52
+ df = multiprocessing.reduction.DupFd(fd)
53
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
54
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
55
+ return resource_sharer.DupFd(fd)
56
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
57
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
58
+ self._id = _resource_sharer.register(send, close)
59
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
60
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
61
+ self._start()
62
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
63
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
64
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
65
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
66
+ self._listener = SocketListener(address, family, backlog)
67
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
68
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
69
+ self._socket.bind(address)
70
+ PermissionError: [Errno 1] Operation not permitted
71
+ Traceback (most recent call last):
72
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
73
+ obj = _ForkingPickler.dumps(obj)
74
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
75
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
76
+ cls(buf, protocol).dump(obj)
77
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
78
+ df = multiprocessing.reduction.DupFd(fd)
79
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
80
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
81
+ return resource_sharer.DupFd(fd)
82
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
83
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
84
+ self._id = _resource_sharer.register(send, close)
85
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
86
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
87
+ self._start()
88
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
89
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
90
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
91
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
92
+ self._listener = SocketListener(address, family, backlog)
93
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
94
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
95
+ self._socket.bind(address)
96
+ PermissionError: [Errno 1] Operation not permitted
97
+ Traceback (most recent call last):
98
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
99
+ obj = _ForkingPickler.dumps(obj)
100
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
101
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
102
+ cls(buf, protocol).dump(obj)
103
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
104
+ df = multiprocessing.reduction.DupFd(fd)
105
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
106
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
107
+ return resource_sharer.DupFd(fd)
108
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
109
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
110
+ self._id = _resource_sharer.register(send, close)
111
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
112
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
113
+ self._start()
114
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
115
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
116
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
117
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
118
+ self._listener = SocketListener(address, family, backlog)
119
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
120
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
121
+ self._socket.bind(address)
122
+ PermissionError: [Errno 1] Operation not permitted
123
+ Traceback (most recent call last):
124
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
125
+ obj = _ForkingPickler.dumps(obj)
126
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
127
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
128
+ cls(buf, protocol).dump(obj)
129
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
130
+ df = multiprocessing.reduction.DupFd(fd)
131
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
132
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
133
+ return resource_sharer.DupFd(fd)
134
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
135
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
136
+ self._id = _resource_sharer.register(send, close)
137
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
138
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
139
+ self._start()
140
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
141
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
142
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
143
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
144
+ self._listener = SocketListener(address, family, backlog)
145
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
146
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
147
+ self._socket.bind(address)
148
+ PermissionError: [Errno 1] Operation not permitted
149
+ Traceback (most recent call last):
150
+ Traceback (most recent call last):
151
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
152
+ obj = _ForkingPickler.dumps(obj)
153
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
154
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
155
+ cls(buf, protocol).dump(obj)
156
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
157
+ df = multiprocessing.reduction.DupFd(fd)
158
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
159
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
160
+ return resource_sharer.DupFd(fd)
161
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
162
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
163
+ self._id = _resource_sharer.register(send, close)
164
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
165
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
166
+ self._start()
167
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
168
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
169
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
170
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
171
+ self._listener = SocketListener(address, family, backlog)
172
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
173
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
174
+ self._socket.bind(address)
175
+ PermissionError: [Errno 1] Operation not permitted
176
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
177
+ obj = _ForkingPickler.dumps(obj)
178
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
179
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
180
+ cls(buf, protocol).dump(obj)
181
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
182
+ df = multiprocessing.reduction.DupFd(fd)
183
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
184
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
185
+ return resource_sharer.DupFd(fd)
186
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
187
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
188
+ self._id = _resource_sharer.register(send, close)
189
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
190
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
191
+ self._start()
192
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
193
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
194
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
195
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
196
+ self._listener = SocketListener(address, family, backlog)
197
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
198
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
199
+ self._socket.bind(address)
200
+ PermissionError: [Errno 1] Operation not permitted
201
+ Traceback (most recent call last):
202
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
203
+ obj = _ForkingPickler.dumps(obj)
204
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
205
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
206
+ cls(buf, protocol).dump(obj)
207
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
208
+ df = multiprocessing.reduction.DupFd(fd)
209
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
210
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
211
+ return resource_sharer.DupFd(fd)
212
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
213
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
214
+ self._id = _resource_sharer.register(send, close)
215
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
216
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
217
+ self._start()
218
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
219
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
220
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
221
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
222
+ self._listener = SocketListener(address, family, backlog)
223
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
224
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
225
+ self._socket.bind(address)
226
+ PermissionError: [Errno 1] Operation not permitted
227
+ Traceback (most recent call last):
228
+ Traceback (most recent call last):
229
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
230
+ obj = _ForkingPickler.dumps(obj)
231
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
232
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
233
+ cls(buf, protocol).dump(obj)
234
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
235
+ obj = _ForkingPickler.dumps(obj)
236
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
237
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
238
+ df = multiprocessing.reduction.DupFd(fd)
239
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
240
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
241
+ cls(buf, protocol).dump(obj)
242
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
243
+ return resource_sharer.DupFd(fd)
244
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
245
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
246
+ self._id = _resource_sharer.register(send, close)
247
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
248
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
249
+ df = multiprocessing.reduction.DupFd(fd)
250
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
251
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
252
+ self._start()
253
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
254
+ return resource_sharer.DupFd(fd)
255
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
256
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
257
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
258
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
259
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
260
+ self._id = _resource_sharer.register(send, close)
261
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
262
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
263
+ self._listener = SocketListener(address, family, backlog)
264
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
265
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
266
+ self._start()
267
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
268
+ self._socket.bind(address)
269
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
270
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
271
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
272
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
273
+ self._listener = SocketListener(address, family, backlog)
274
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
275
+ PermissionError: [Errno 1] Operation not permitted
276
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
277
+ self._socket.bind(address)
278
+ PermissionError: [Errno 1] Operation not permitted
279
+ Traceback (most recent call last):
280
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
281
+ obj = _ForkingPickler.dumps(obj)
282
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
283
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
284
+ cls(buf, protocol).dump(obj)
285
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
286
+ df = multiprocessing.reduction.DupFd(fd)
287
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
288
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
289
+ return resource_sharer.DupFd(fd)
290
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
291
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
292
+ self._id = _resource_sharer.register(send, close)
293
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
294
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
295
+ self._start()
296
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
297
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
298
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
299
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
300
+ self._listener = SocketListener(address, family, backlog)
301
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
302
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
303
+ self._socket.bind(address)
304
+ PermissionError: [Errno 1] Operation not permitted
305
+ Traceback (most recent call last):
306
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
307
+ obj = _ForkingPickler.dumps(obj)
308
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
309
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
310
+ cls(buf, protocol).dump(obj)
311
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
312
+ df = multiprocessing.reduction.DupFd(fd)
313
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
314
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
315
+ return resource_sharer.DupFd(fd)
316
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
317
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
318
+ self._id = _resource_sharer.register(send, close)
319
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
320
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
321
+ self._start()
322
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
323
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
324
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
325
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
326
+ self._listener = SocketListener(address, family, backlog)
327
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
328
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
329
+ self._socket.bind(address)
330
+ PermissionError: [Errno 1] Operation not permitted
331
+ Traceback (most recent call last):
332
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
333
+ obj = _ForkingPickler.dumps(obj)
334
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
335
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
336
+ cls(buf, protocol).dump(obj)
337
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
338
+ df = multiprocessing.reduction.DupFd(fd)
339
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
340
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
341
+ return resource_sharer.DupFd(fd)
342
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
343
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
344
+ self._id = _resource_sharer.register(send, close)
345
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
346
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
347
+ self._start()
348
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
349
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
350
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
351
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
352
+ self._listener = SocketListener(address, family, backlog)
353
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
354
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
355
+ self._socket.bind(address)
356
+ PermissionError: [Errno 1] Operation not permitted
357
+ Traceback (most recent call last):
358
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
359
+ obj = _ForkingPickler.dumps(obj)
360
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
361
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
362
+ cls(buf, protocol).dump(obj)
363
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
364
+ df = multiprocessing.reduction.DupFd(fd)
365
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
366
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
367
+ return resource_sharer.DupFd(fd)
368
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
369
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
370
+ self._id = _resource_sharer.register(send, close)
371
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
372
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
373
+ self._start()
374
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
375
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
376
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
377
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
378
+ self._listener = SocketListener(address, family, backlog)
379
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
380
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
381
+ self._socket.bind(address)
382
+ PermissionError: [Errno 1] Operation not permitted
383
+ Traceback (most recent call last):
384
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
385
+ obj = _ForkingPickler.dumps(obj)
386
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
387
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
388
+ cls(buf, protocol).dump(obj)
389
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
390
+ df = multiprocessing.reduction.DupFd(fd)
391
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
392
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
393
+ return resource_sharer.DupFd(fd)
394
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
395
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
396
+ self._id = _resource_sharer.register(send, close)
397
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
398
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
399
+ self._start()
400
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
401
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
402
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
403
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
404
+ self._listener = SocketListener(address, family, backlog)
405
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
406
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
407
+ self._socket.bind(address)
408
+ PermissionError: [Errno 1] Operation not permitted
409
+ Traceback (most recent call last):
410
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
411
+ obj = _ForkingPickler.dumps(obj)
412
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
413
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
414
+ cls(buf, protocol).dump(obj)
415
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
416
+ df = multiprocessing.reduction.DupFd(fd)
417
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
418
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
419
+ return resource_sharer.DupFd(fd)
420
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
421
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
422
+ self._id = _resource_sharer.register(send, close)
423
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
424
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
425
+ self._start()
426
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
427
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
428
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
429
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
430
+ self._listener = SocketListener(address, family, backlog)
431
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
432
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
433
+ self._socket.bind(address)
434
+ PermissionError: [Errno 1] Operation not permitted
435
+ Traceback (most recent call last):
436
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
437
+ obj = _ForkingPickler.dumps(obj)
438
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
439
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
440
+ cls(buf, protocol).dump(obj)
441
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
442
+ df = multiprocessing.reduction.DupFd(fd)
443
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
444
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
445
+ return resource_sharer.DupFd(fd)
446
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
447
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
448
+ self._id = _resource_sharer.register(send, close)
449
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
450
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
451
+ self._start()
452
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
453
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
454
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
455
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
456
+ self._listener = SocketListener(address, family, backlog)
457
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
458
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
459
+ self._socket.bind(address)
460
+ PermissionError: [Errno 1] Operation not permitted
461
+ Traceback (most recent call last):
462
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
463
+ obj = _ForkingPickler.dumps(obj)
464
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
465
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
466
+ cls(buf, protocol).dump(obj)
467
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
468
+ df = multiprocessing.reduction.DupFd(fd)
469
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
470
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
471
+ return resource_sharer.DupFd(fd)
472
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
473
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
474
+ self._id = _resource_sharer.register(send, close)
475
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
476
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
477
+ self._start()
478
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
479
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
480
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
481
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
482
+ self._listener = SocketListener(address, family, backlog)
483
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
484
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
485
+ self._socket.bind(address)
486
+ PermissionError: [Errno 1] Operation not permitted
487
+ Traceback (most recent call last):
488
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
489
+ obj = _ForkingPickler.dumps(obj)
490
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
491
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
492
+ cls(buf, protocol).dump(obj)
493
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
494
+ df = multiprocessing.reduction.DupFd(fd)
495
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
496
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
497
+ return resource_sharer.DupFd(fd)
498
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
499
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
500
+ self._id = _resource_sharer.register(send, close)
501
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
502
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
503
+ self._start()
504
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
505
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
506
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
507
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
508
+ self._listener = SocketListener(address, family, backlog)
509
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
510
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
511
+ self._socket.bind(address)
512
+ PermissionError: [Errno 1] Operation not permitted
513
+ Traceback (most recent call last):
514
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
515
+ obj = _ForkingPickler.dumps(obj)
516
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
517
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
518
+ cls(buf, protocol).dump(obj)
519
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
520
+ df = multiprocessing.reduction.DupFd(fd)
521
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
522
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
523
+ return resource_sharer.DupFd(fd)
524
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
525
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
526
+ self._id = _resource_sharer.register(send, close)
527
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
528
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
529
+ self._start()
530
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
531
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
532
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
533
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
534
+ self._listener = SocketListener(address, family, backlog)
535
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
536
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
537
+ self._socket.bind(address)
538
+ PermissionError: [Errno 1] Operation not permitted
539
+ Traceback (most recent call last):
540
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
541
+ obj = _ForkingPickler.dumps(obj)
542
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
543
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
544
+ cls(buf, protocol).dump(obj)
545
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
546
+ df = multiprocessing.reduction.DupFd(fd)
547
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
548
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
549
+ return resource_sharer.DupFd(fd)
550
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
551
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
552
+ self._id = _resource_sharer.register(send, close)
553
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
554
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
555
+ self._start()
556
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
557
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
558
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
559
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
560
+ self._listener = SocketListener(address, family, backlog)
561
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
562
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
563
+ self._socket.bind(address)
564
+ PermissionError: [Errno 1] Operation not permitted
565
+ Traceback (most recent call last):
566
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
567
+ obj = _ForkingPickler.dumps(obj)
568
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
569
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
570
+ cls(buf, protocol).dump(obj)
571
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
572
+ df = multiprocessing.reduction.DupFd(fd)
573
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
574
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
575
+ return resource_sharer.DupFd(fd)
576
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
577
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
578
+ self._id = _resource_sharer.register(send, close)
579
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
580
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
581
+ self._start()
582
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
583
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
584
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
585
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
586
+ self._listener = SocketListener(address, family, backlog)
587
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
588
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
589
+ self._socket.bind(address)
590
+ PermissionError: [Errno 1] Operation not permitted
591
+ Traceback (most recent call last):
592
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
593
+ obj = _ForkingPickler.dumps(obj)
594
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
595
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
596
+ cls(buf, protocol).dump(obj)
597
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
598
+ df = multiprocessing.reduction.DupFd(fd)
599
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
600
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
601
+ return resource_sharer.DupFd(fd)
602
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
603
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
604
+ self._id = _resource_sharer.register(send, close)
605
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
606
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
607
+ self._start()
608
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
609
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
610
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
611
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
612
+ self._listener = SocketListener(address, family, backlog)
613
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
614
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
615
+ self._socket.bind(address)
616
+ PermissionError: [Errno 1] Operation not permitted
617
+ Traceback (most recent call last):
618
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
619
+ obj = _ForkingPickler.dumps(obj)
620
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
621
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
622
+ cls(buf, protocol).dump(obj)
623
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
624
+ df = multiprocessing.reduction.DupFd(fd)
625
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
626
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
627
+ return resource_sharer.DupFd(fd)
628
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
629
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
630
+ self._id = _resource_sharer.register(send, close)
631
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
632
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
633
+ self._start()
634
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
635
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
636
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
637
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
638
+ self._listener = SocketListener(address, family, backlog)
639
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
640
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
641
+ self._socket.bind(address)
642
+ PermissionError: [Errno 1] Operation not permitted
643
+ Traceback (most recent call last):
644
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
645
+ obj = _ForkingPickler.dumps(obj)
646
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
647
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
648
+ cls(buf, protocol).dump(obj)
649
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
650
+ df = multiprocessing.reduction.DupFd(fd)
651
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
652
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
653
+ return resource_sharer.DupFd(fd)
654
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
655
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
656
+ self._id = _resource_sharer.register(send, close)
657
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
658
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
659
+ self._start()
660
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
661
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
662
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
663
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
664
+ self._listener = SocketListener(address, family, backlog)
665
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
666
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
667
+ self._socket.bind(address)
668
+ PermissionError: [Errno 1] Operation not permitted
669
+ Traceback (most recent call last):
670
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
671
+ obj = _ForkingPickler.dumps(obj)
672
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
673
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
674
+ cls(buf, protocol).dump(obj)
675
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
676
+ df = multiprocessing.reduction.DupFd(fd)
677
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
678
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
679
+ return resource_sharer.DupFd(fd)
680
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
681
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
682
+ self._id = _resource_sharer.register(send, close)
683
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
684
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
685
+ self._start()
686
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
687
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
688
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
689
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
690
+ self._listener = SocketListener(address, family, backlog)
691
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
692
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
693
+ self._socket.bind(address)
694
+ PermissionError: [Errno 1] Operation not permitted
695
+ Traceback (most recent call last):
696
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
697
+ obj = _ForkingPickler.dumps(obj)
698
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
699
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
700
+ cls(buf, protocol).dump(obj)
701
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
702
+ df = multiprocessing.reduction.DupFd(fd)
703
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
704
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
705
+ return resource_sharer.DupFd(fd)
706
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
707
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
708
+ self._id = _resource_sharer.register(send, close)
709
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
710
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
711
+ self._start()
712
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
713
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
714
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
715
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
716
+ self._listener = SocketListener(address, family, backlog)
717
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
718
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
719
+ self._socket.bind(address)
720
+ PermissionError: [Errno 1] Operation not permitted
721
+ Traceback (most recent call last):
722
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
723
+ obj = _ForkingPickler.dumps(obj)
724
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
725
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
726
+ cls(buf, protocol).dump(obj)
727
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
728
+ df = multiprocessing.reduction.DupFd(fd)
729
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
730
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
731
+ return resource_sharer.DupFd(fd)
732
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
733
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
734
+ self._id = _resource_sharer.register(send, close)
735
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
736
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
737
+ self._start()
738
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
739
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
740
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
741
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
742
+ self._listener = SocketListener(address, family, backlog)
743
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
744
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
745
+ self._socket.bind(address)
746
+ PermissionError: [Errno 1] Operation not permitted
747
+ Traceback (most recent call last):
748
+ Traceback (most recent call last):
749
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
750
+ obj = _ForkingPickler.dumps(obj)
751
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
752
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
753
+ cls(buf, protocol).dump(obj)
754
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
755
+ df = multiprocessing.reduction.DupFd(fd)
756
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
757
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
758
+ return resource_sharer.DupFd(fd)
759
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
760
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
761
+ self._id = _resource_sharer.register(send, close)
762
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
763
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
764
+ self._start()
765
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
766
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
767
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
768
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
769
+ self._listener = SocketListener(address, family, backlog)
770
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
771
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
772
+ self._socket.bind(address)
773
+ PermissionError: [Errno 1] Operation not permitted
774
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
775
+ obj = _ForkingPickler.dumps(obj)
776
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
777
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
778
+ cls(buf, protocol).dump(obj)
779
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
780
+ df = multiprocessing.reduction.DupFd(fd)
781
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
782
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
783
+ return resource_sharer.DupFd(fd)
784
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
785
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
786
+ self._id = _resource_sharer.register(send, close)
787
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
788
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
789
+ self._start()
790
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
791
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
792
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
793
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
794
+ self._listener = SocketListener(address, family, backlog)
795
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
796
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
797
+ self._socket.bind(address)
798
+ PermissionError: [Errno 1] Operation not permitted
799
+ Traceback (most recent call last):
800
+ Traceback (most recent call last):
801
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
802
+ obj = _ForkingPickler.dumps(obj)
803
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
804
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
805
+ cls(buf, protocol).dump(obj)
806
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
807
+ df = multiprocessing.reduction.DupFd(fd)
808
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
809
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
810
+ return resource_sharer.DupFd(fd)
811
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
812
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
813
+ self._id = _resource_sharer.register(send, close)
814
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
815
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
816
+ self._start()
817
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
818
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
819
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
820
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
821
+ self._listener = SocketListener(address, family, backlog)
822
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
823
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
824
+ self._socket.bind(address)
825
+ PermissionError: [Errno 1] Operation not permitted
826
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
827
+ obj = _ForkingPickler.dumps(obj)
828
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
829
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
830
+ cls(buf, protocol).dump(obj)
831
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
832
+ df = multiprocessing.reduction.DupFd(fd)
833
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
834
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
835
+ return resource_sharer.DupFd(fd)
836
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
837
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
838
+ self._id = _resource_sharer.register(send, close)
839
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
840
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
841
+ self._start()
842
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
843
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
844
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
845
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
846
+ self._listener = SocketListener(address, family, backlog)
847
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
848
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
849
+ self._socket.bind(address)
850
+ PermissionError: [Errno 1] Operation not permitted
851
+ Traceback (most recent call last):
852
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/queues.py", line 244, in _feed
853
+ obj = _ForkingPickler.dumps(obj)
854
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
855
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 51, in dumps
856
+ cls(buf, protocol).dump(obj)
857
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/torch/multiprocessing/reductions.py", line 619, in reduce_storage
858
+ df = multiprocessing.reduction.DupFd(fd)
859
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
860
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/reduction.py", line 198, in DupFd
861
+ return resource_sharer.DupFd(fd)
862
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
863
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 53, in __init__
864
+ self._id = _resource_sharer.register(send, close)
865
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
866
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 76, in register
867
+ self._start()
868
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/resource_sharer.py", line 126, in _start
869
+ self._listener = Listener(authkey=process.current_process().authkey, backlog=128)
870
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
871
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 464, in __init__
872
+ self._listener = SocketListener(address, family, backlog)
873
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
874
+ File "/dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/multiprocessing/connection.py", line 608, in __init__
875
+ self._socket.bind(address)
876
+ PermissionError: [Errno 1] Operation not permitted
tsqa_adapter/logs/sft_4gpu_20260615_141604.log ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/250 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
 
 
 
 
1
  0%| | 1/250 [00:14<58:06, 14.00s/it]
2
  1%| | 2/250 [00:24<50:27, 12.21s/it]
3
  1%| | 3/250 [00:37<50:29, 12.27s/it]
4
  2%|▏ | 4/250 [00:49<49:31, 12.08s/it]
5
  2%|▏ | 5/250 [01:00<48:41, 11.92s/it]
6
  2%|▏ | 6/250 [01:14<51:41, 12.71s/it]
7
  3%|β–Ž | 7/250 [01:27<51:10, 12.64s/it]
8
  3%|β–Ž | 8/250 [01:41<53:20, 13.22s/it]
9
  4%|β–Ž | 9/250 [01:52<50:23, 12.55s/it]
10
  4%|▍ | 10/250 [02:08<53:48, 13.45s/it]
11
 
 
12
  4%|▍ | 10/250 [02:08<53:48, 13.45s/it]
13
  4%|▍ | 11/250 [02:19<50:52, 12.77s/it]
14
  5%|▍ | 12/250 [02:32<51:15, 12.92s/it]
15
  5%|β–Œ | 13/250 [02:46<51:17, 12.99s/it]
16
  6%|β–Œ | 14/250 [02:59<51:03, 12.98s/it]
17
  6%|β–Œ | 15/250 [03:11<50:07, 12.80s/it]
18
  6%|β–‹ | 16/250 [03:22<48:26, 12.42s/it]
19
  7%|β–‹ | 17/250 [03:36<50:04, 12.89s/it]
20
  7%|β–‹ | 18/250 [03:48<48:20, 12.50s/it]
21
  8%|β–Š | 19/250 [03:58<45:26, 11.80s/it]
22
  8%|β–Š | 20/250 [04:09<44:36, 11.64s/it]
23
 
 
24
  8%|β–Š | 20/250 [04:09<44:36, 11.64s/it]
25
  8%|β–Š | 21/250 [04:24<48:03, 12.59s/it]
26
  9%|β–‰ | 22/250 [04:36<46:53, 12.34s/it]
27
  9%|β–‰ | 23/250 [04:50<49:03, 12.97s/it]
28
  10%|β–‰ | 24/250 [05:07<52:25, 13.92s/it]
29
  10%|β–ˆ | 25/250 [05:20<52:07, 13.90s/it]
30
  10%|β–ˆ | 26/250 [05:36<53:41, 14.38s/it]
31
  11%|β–ˆ | 27/250 [05:52<54:58, 14.79s/it]
32
  11%|β–ˆ | 28/250 [06:09<57:57, 15.66s/it]
33
  12%|β–ˆβ– | 29/250 [06:23<55:16, 15.01s/it]
34
  12%|β–ˆβ– | 30/250 [06:36<53:20, 14.55s/it]
35
 
 
36
  12%|β–ˆβ– | 30/250 [06:36<53:20, 14.55s/it]
37
  12%|β–ˆβ– | 31/250 [06:51<52:42, 14.44s/it]
38
  13%|β–ˆβ–Ž | 32/250 [07:02<49:31, 13.63s/it]
39
  13%|β–ˆβ–Ž | 33/250 [07:14<47:28, 13.13s/it]
40
  14%|β–ˆβ–Ž | 34/250 [07:25<44:37, 12.39s/it]
41
  14%|β–ˆβ– | 35/250 [07:42<48:57, 13.66s/it]
42
  14%|β–ˆβ– | 36/250 [07:56<49:01, 13.74s/it]
43
  15%|β–ˆβ– | 37/250 [08:06<45:37, 12.85s/it]
44
  15%|β–ˆβ–Œ | 38/250 [08:22<48:08, 13.62s/it]
45
  16%|β–ˆβ–Œ | 39/250 [08:33<45:37, 12.97s/it]
46
  16%|β–ˆβ–Œ | 40/250 [08:46<44:47, 12.80s/it]
47
 
 
48
  16%|β–ˆβ–Œ | 40/250 [08:46<44:47, 12.80s/it]
49
  16%|β–ˆβ–‹ | 41/250 [08:55<41:16, 11.85s/it]
50
  17%|β–ˆβ–‹ | 42/250 [09:06<40:00, 11.54s/it]
51
  17%|β–ˆβ–‹ | 43/250 [09:24<46:47, 13.56s/it]
52
  18%|β–ˆβ–Š | 44/250 [09:36<44:43, 13.03s/it]
53
  18%|β–ˆβ–Š | 45/250 [09:50<45:32, 13.33s/it]
54
  18%|β–ˆβ–Š | 46/250 [10:03<44:21, 13.05s/it]
55
  19%|β–ˆβ–‰ | 47/250 [10:19<47:26, 14.02s/it]
56
  19%|β–ˆβ–‰ | 48/250 [10:36<50:17, 14.94s/it]
57
  20%|β–ˆβ–‰ | 49/250 [10:54<53:33, 15.99s/it]
58
  20%|β–ˆβ–ˆ | 50/250 [11:06<49:05, 14.73s/it]
59
 
 
60
  20%|β–ˆβ–ˆ | 50/250 [11:06<49:05, 14.73s/it]
61
  20%|β–ˆβ–ˆ | 51/250 [11:20<47:34, 14.34s/it]
62
  21%|β–ˆβ–ˆ | 52/250 [11:32<45:08, 13.68s/it]
63
  21%|β–ˆβ–ˆ | 53/250 [11:44<43:53, 13.37s/it]
64
  22%|β–ˆβ–ˆβ– | 54/250 [11:58<43:35, 13.34s/it]
65
  22%|β–ˆβ–ˆβ– | 55/250 [12:11<43:18, 13.32s/it]
66
  22%|β–ˆβ–ˆβ– | 56/250 [12:21<40:23, 12.49s/it]
67
  23%|β–ˆβ–ˆβ–Ž | 57/250 [12:35<41:07, 12.78s/it]
68
  23%|β–ˆβ–ˆβ–Ž | 58/250 [12:50<43:07, 13.48s/it]
69
  24%|β–ˆβ–ˆβ–Ž | 59/250 [13:05<44:15, 13.90s/it]
70
  24%|β–ˆβ–ˆβ– | 60/250 [13:18<42:56, 13.56s/it]
71
 
 
72
  24%|β–ˆβ–ˆβ– | 60/250 [13:18<42:56, 13.56s/it]
73
  24%|β–ˆβ–ˆβ– | 61/250 [13:30<41:38, 13.22s/it]
74
  25%|β–ˆβ–ˆβ– | 62/250 [13:43<40:45, 13.01s/it]
75
  25%|β–ˆβ–ˆβ–Œ | 63/250 [13:57<41:43, 13.39s/it]
76
  26%|β–ˆβ–ˆβ–Œ | 64/250 [14:09<40:29, 13.06s/it]
77
  26%|β–ˆβ–ˆβ–Œ | 65/250 [14:24<41:52, 13.58s/it]
78
  26%|β–ˆβ–ˆβ–‹ | 66/250 [14:38<42:00, 13.70s/it]
79
  27%|β–ˆβ–ˆβ–‹ | 67/250 [14:53<43:03, 14.12s/it]
80
  27%|β–ˆβ–ˆβ–‹ | 68/250 [15:08<43:21, 14.29s/it]
81
  28%|β–ˆβ–ˆβ–Š | 69/250 [15:21<42:37, 14.13s/it]
82
  28%|β–ˆβ–ˆβ–Š | 70/250 [15:34<41:12, 13.74s/it]
83
 
 
84
  28%|β–ˆβ–ˆβ–Š | 70/250 [15:34<41:12, 13.74s/it]
85
  28%|β–ˆβ–ˆβ–Š | 71/250 [15:51<43:27, 14.57s/it]
86
  29%|β–ˆβ–ˆβ–‰ | 72/250 [16:03<41:16, 13.91s/it]
87
  29%|β–ˆβ–ˆβ–‰ | 73/250 [16:15<39:02, 13.24s/it]
88
  30%|β–ˆβ–ˆβ–‰ | 74/250 [16:25<35:48, 12.21s/it]
89
  30%|β–ˆβ–ˆβ–ˆ | 75/250 [16:36<34:30, 11.83s/it]
90
  30%|β–ˆβ–ˆβ–ˆ | 76/250 [16:49<35:39, 12.29s/it]
91
  31%|β–ˆβ–ˆβ–ˆ | 77/250 [17:00<34:42, 12.04s/it]
92
  31%|β–ˆβ–ˆβ–ˆ | 78/250 [17:14<35:30, 12.38s/it]
93
  32%|β–ˆβ–ˆβ–ˆβ– | 79/250 [17:25<34:33, 12.12s/it]
94
  32%|β–ˆβ–ˆβ–ˆβ– | 80/250 [17:41<37:08, 13.11s/it]
95
 
 
96
  32%|β–ˆβ–ˆβ–ˆβ– | 80/250 [17:41<37:08, 13.11s/it]
97
  32%|β–ˆβ–ˆβ–ˆβ– | 81/250 [17:53<36:08, 12.83s/it]
98
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 82/250 [18:08<37:48, 13.50s/it]
99
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 83/250 [18:20<36:12, 13.01s/it]
100
  34%|β–ˆβ–ˆβ–ˆβ–Ž | 84/250 [18:38<40:30, 14.64s/it]
101
  34%|β–ˆβ–ˆβ–ˆβ– | 85/250 [18:52<39:39, 14.42s/it]
102
  34%|β–ˆβ–ˆβ–ˆβ– | 86/250 [19:10<42:03, 15.39s/it]
103
  35%|β–ˆβ–ˆβ–ˆβ– | 87/250 [19:24<41:21, 15.23s/it]
104
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 88/250 [19:36<38:19, 14.19s/it]
105
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 89/250 [19:53<40:26, 15.07s/it]
106
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 90/250 [20:07<38:51, 14.57s/it]
107
 
 
108
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 90/250 [20:07<38:51, 14.57s/it]
109
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 91/250 [20:19<36:35, 13.81s/it]
110
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 92/250 [20:31<34:47, 13.21s/it]
111
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 93/250 [20:44<34:36, 13.22s/it]
112
  38%|β–ˆβ–ˆβ–ˆβ–Š | 94/250 [20:56<33:31, 12.90s/it]
113
  38%|β–ˆβ–ˆβ–ˆβ–Š | 95/250 [21:07<31:39, 12.25s/it]
114
  38%|β–ˆβ–ˆβ–ˆβ–Š | 96/250 [21:21<33:07, 12.91s/it]
115
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 97/250 [21:35<33:24, 13.10s/it]
116
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 98/250 [21:47<32:27, 12.81s/it]
117
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 99/250 [21:59<32:04, 12.74s/it]
118
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 100/250 [22:15<33:41, 13.47s/it]
119
 
 
120
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 100/250 [22:15<33:41, 13.47s/it]
121
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 101/250 [22:26<32:09, 12.95s/it]
122
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 102/250 [22:41<32:55, 13.35s/it]
123
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 103/250 [22:54<32:21, 13.21s/it]
124
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 104/250 [23:05<30:35, 12.57s/it]
125
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 105/250 [23:15<29:06, 12.05s/it]
126
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 106/250 [23:28<29:07, 12.14s/it]
127
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 107/250 [23:39<28:36, 12.00s/it]
128
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 108/250 [23:55<30:41, 12.97s/it]
129
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 109/250 [24:08<30:29, 12.97s/it]
130
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 110/250 [24:19<28:46, 12.33s/it]
131
 
 
132
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 110/250 [24:19<28:46, 12.33s/it]
133
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 111/250 [24:31<28:51, 12.46s/it]
134
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 112/250 [24:45<29:12, 12.70s/it]
135
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 113/250 [24:59<30:04, 13.17s/it]
136
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 114/250 [25:13<30:19, 13.38s/it]
137
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 115/250 [25:26<30:21, 13.49s/it]
138
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 116/250 [25:39<29:41, 13.30s/it]
139
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 117/250 [25:54<30:07, 13.59s/it]
140
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 118/250 [26:10<31:36, 14.37s/it]
141
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 119/250 [26:24<31:30, 14.43s/it]
142
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 120/250 [26:40<32:11, 14.86s/it]
143
 
 
144
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 120/250 [26:40<32:11, 14.86s/it]
145
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 121/250 [26:54<31:23, 14.60s/it]
146
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 122/250 [27:06<29:25, 13.79s/it]
147
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 123/250 [27:19<28:45, 13.58s/it]
148
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 124/250 [27:33<28:21, 13.50s/it]
149
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 125/250 [27:45<27:38, 13.27s/it]
150
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 126/250 [27:58<27:08, 13.13s/it]
151
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 127/250 [28:11<27:02, 13.19s/it]
152
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 128/250 [28:22<25:27, 12.52s/it]
153
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 129/250 [28:41<29:07, 14.44s/it]
154
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 130/250 [28:55<28:43, 14.36s/it]
155
 
 
156
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 130/250 [28:55<28:43, 14.36s/it]
157
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 131/250 [29:08<27:21, 13.79s/it]
158
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 132/250 [29:20<26:09, 13.30s/it]
159
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 133/250 [29:33<25:54, 13.29s/it]
160
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 134/250 [29:46<25:12, 13.04s/it]
161
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 135/250 [30:00<25:46, 13.44s/it]
162
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 136/250 [30:11<24:14, 12.76s/it]
163
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 137/250 [30:24<23:43, 12.60s/it]
164
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 138/250 [30:40<25:54, 13.88s/it]
165
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 139/250 [30:54<25:16, 13.66s/it]
166
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 140/250 [31:07<24:42, 13.48s/it]
167
 
 
168
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 140/250 [31:07<24:42, 13.48s/it]
169
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 141/250 [31:24<26:26, 14.55s/it]
170
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 142/250 [31:38<26:20, 14.64s/it]
171
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 143/250 [31:50<24:34, 13.78s/it]
172
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 144/250 [32:03<23:47, 13.46s/it]
173
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 145/250 [32:13<21:46, 12.45s/it]
174
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 146/250 [32:26<21:41, 12.51s/it]
175
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 147/250 [32:39<21:56, 12.78s/it]
176
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 148/250 [32:55<23:31, 13.84s/it]
177
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 149/250 [33:09<23:13, 13.80s/it]
178
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 150/250 [33:22<22:24, 13.45s/it]
179
 
 
180
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 150/250 [33:22<22:24, 13.45s/it]
181
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 151/250 [33:37<23:04, 13.98s/it]
182
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 152/250 [33:49<21:46, 13.34s/it]
183
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 153/250 [33:59<20:08, 12.46s/it]
184
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 154/250 [34:15<21:29, 13.43s/it]
185
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 155/250 [34:28<21:07, 13.34s/it]
186
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 156/250 [34:38<19:31, 12.46s/it]
187
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 157/250 [34:51<19:25, 12.53s/it]
188
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 158/250 [35:05<19:40, 12.83s/it]
189
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 159/250 [35:20<20:21, 13.43s/it]
190
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 160/250 [35:33<20:09, 13.44s/it]
191
 
 
192
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 160/250 [35:33<20:09, 13.44s/it]
193
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 161/250 [35:44<18:52, 12.72s/it]
194
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 162/250 [35:57<18:36, 12.69s/it]
195
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 163/250 [36:08<17:39, 12.18s/it]
196
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 164/250 [36:21<18:00, 12.56s/it]
197
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 165/250 [36:35<18:22, 12.97s/it]
198
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 166/250 [36:48<18:19, 13.09s/it]
199
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 167/250 [37:01<18:04, 13.06s/it]
200
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 168/250 [37:12<16:44, 12.25s/it]
201
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 169/250 [37:25<16:57, 12.56s/it]
202
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 170/250 [37:38<16:46, 12.58s/it]
203
 
 
204
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 170/250 [37:38<16:46, 12.58s/it]
205
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 171/250 [37:50<16:35, 12.60s/it]
206
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 172/250 [38:05<17:17, 13.30s/it]
207
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 173/250 [38:21<17:52, 13.93s/it]
208
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 174/250 [38:37<18:29, 14.60s/it]
209
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 175/250 [38:50<17:50, 14.27s/it]
210
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 176/250 [39:04<17:30, 14.20s/it]
211
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 177/250 [39:16<16:22, 13.46s/it]
212
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 178/250 [39:28<15:42, 13.09s/it]
213
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 179/250 [39:40<15:06, 12.77s/it]
214
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 180/250 [39:54<15:17, 13.10s/it]
215
 
 
216
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 180/250 [39:54<15:17, 13.10s/it]
217
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 181/250 [40:04<14:04, 12.24s/it]
218
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 182/250 [40:18<14:27, 12.75s/it]
219
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 183/250 [40:32<14:31, 13.01s/it]
220
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 184/250 [40:47<14:53, 13.53s/it]
221
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 185/250 [41:02<15:19, 14.15s/it]
222
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 186/250 [41:16<14:48, 13.88s/it]
223
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 187/250 [41:31<14:55, 14.22s/it]
224
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 188/250 [41:42<13:57, 13.51s/it]
225
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 189/250 [41:54<13:05, 12.87s/it]
226
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 190/250 [42:07<12:50, 12.83s/it]
227
 
 
228
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 190/250 [42:07<12:50, 12.83s/it]
229
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 191/250 [42:21<13:14, 13.46s/it]
230
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 192/250 [42:37<13:29, 13.96s/it]
231
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 193/250 [42:50<13:01, 13.71s/it]
232
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 194/250 [43:02<12:17, 13.16s/it]
233
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 195/250 [43:15<12:14, 13.35s/it]
234
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 196/250 [43:29<12:06, 13.45s/it]
235
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 197/250 [43:42<11:45, 13.31s/it]
236
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 198/250 [43:53<10:47, 12.45s/it]
237
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 199/250 [44:09<11:29, 13.52s/it]
238
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 200/250 [44:21<10:54, 13.08s/it]
239
 
 
240
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 200/250 [44:21<10:54, 13.08s/it]
241
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 201/250 [44:39<12:06, 14.83s/it]
242
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 202/250 [44:55<11:56, 14.92s/it]
243
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 203/250 [45:07<11:07, 14.20s/it]
244
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 204/250 [45:22<11:06, 14.49s/it]
245
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 205/250 [45:35<10:33, 14.08s/it]
246
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 206/250 [45:48<10:04, 13.75s/it]
247
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 207/250 [45:59<09:16, 12.95s/it]
248
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 208/250 [46:16<09:42, 13.88s/it]
249
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 209/250 [46:29<09:23, 13.75s/it]
250
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 210/250 [46:41<08:47, 13.18s/it]
251
 
 
252
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 210/250 [46:41<08:47, 13.18s/it]
253
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 211/250 [46:53<08:27, 13.02s/it]
254
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 212/250 [47:05<08:01, 12.66s/it]
255
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 213/250 [47:19<07:58, 12.93s/it]
256
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 214/250 [47:31<07:38, 12.74s/it]
257
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 215/250 [47:46<07:45, 13.30s/it]
258
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 216/250 [47:59<07:28, 13.18s/it]
259
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 217/250 [48:12<07:15, 13.18s/it]
260
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 218/250 [48:29<07:38, 14.32s/it]
261
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 219/250 [48:43<07:26, 14.41s/it]
262
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 220/250 [48:57<07:06, 14.23s/it]
263
 
 
264
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 220/250 [48:57<07:06, 14.23s/it]
265
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 221/250 [49:10<06:37, 13.71s/it]
266
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 222/250 [49:29<07:08, 15.32s/it]
267
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 223/250 [49:44<06:55, 15.38s/it]
268
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 224/250 [49:55<06:03, 14.00s/it]
269
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 225/250 [50:09<05:51, 14.04s/it]
270
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 226/250 [50:26<05:56, 14.84s/it]
271
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 227/250 [50:36<05:06, 13.34s/it]
272
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 228/250 [50:52<05:14, 14.31s/it]
273
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 229/250 [51:04<04:44, 13.54s/it]
274
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 230/250 [51:17<04:24, 13.22s/it]
275
 
 
276
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 230/250 [51:17<04:24, 13.22s/it]
277
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 231/250 [51:29<04:08, 13.09s/it]
278
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 232/250 [51:41<03:48, 12.71s/it]
279
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 233/250 [51:55<03:39, 12.90s/it]
280
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 234/250 [52:06<03:21, 12.58s/it]
281
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 235/250 [52:22<03:23, 13.54s/it]
282
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 236/250 [52:35<03:06, 13.35s/it]
283
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 237/250 [52:49<02:56, 13.58s/it]
284
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 238/250 [53:03<02:44, 13.67s/it]
285
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 239/250 [53:16<02:26, 13.31s/it]
286
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 240/250 [53:29<02:12, 13.29s/it]
287
 
 
288
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 240/250 [53:29<02:12, 13.29s/it]
289
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 241/250 [53:40<01:54, 12.74s/it]
290
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 242/250 [53:54<01:44, 13.02s/it]
291
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 243/250 [54:09<01:34, 13.53s/it]
292
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 244/250 [54:22<01:21, 13.58s/it]
293
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 245/250 [54:34<01:04, 12.89s/it]
294
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 246/250 [54:46<00:51, 12.81s/it]
295
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 247/250 [54:59<00:38, 12.85s/it]
296
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 248/250 [55:15<00:27, 13.65s/it]
297
 
 
298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate: 0.34.2
2
+ Run id: 20260615_141604
3
+ Python: /dev/shm/suiqk/conda_envs/scalerag-ts-v4/bin/python
4
+ Accelerate: /dev/shm/suiqk/conda_envs/scalerag-ts-v4/bin/python -m accelerate.commands.accelerate_cli
5
+ CUDA_VISIBLE_DEVICES: 0,1,2,3
6
+ Model path: /mnt/share01/sqk/models/ChatTime-1-7B-Chat
7
+ Data root: /mnt/share01/sqk/datasets/Time-MQA_TSQA/tmp (train=train.jsonl eval=eval.jsonl)
8
+ Adapter output dir: /mnt/share01/sqk/ChatTime/tsqa_adapter/outputs/sft_20260615_141604/adapter
9
+ Eval output dir: /mnt/share01/sqk/ChatTime/tsqa_adapter/outputs/sft_20260615_141604/eval
10
+ Log file: /mnt/share01/sqk/ChatTime/tsqa_adapter/logs/sft_4gpu_20260615_141604.log
11
+ βš™οΈ Running in WANDB offline modeβš™οΈ Running in WANDB offline mode
12
+
13
+ βš™οΈ Running in WANDB offline mode
14
+ βš™οΈ Running in WANDB offline mode
15
+ Applied accelerate compatibility patch: Accelerator.unwrap_model accepts keep_torch_compile.
16
+ Applied accelerate compatibility patch: Accelerator.unwrap_model accepts keep_torch_compile.
17
+ Applied accelerate compatibility patch: Accelerator.unwrap_model accepts keep_torch_compile.
18
+ Applied accelerate compatibility patch: Accelerator.unwrap_model accepts keep_torch_compile.
19
+ SFT token length check: input_rows=8000, kept_rows=7987, skipped_overlong=13, left_truncated_rows=0, max_prompt_len=3818, max_total_len=3842, max_seq_length=4096, skip_overlong=True
20
+ SFT token length check: input_rows=8000, kept_rows=7987, skipped_overlong=13, left_truncated_rows=0, max_prompt_len=3818, max_total_len=3842, max_seq_length=4096, skip_overlong=True
21
+ SFT token length check: input_rows=8000, kept_rows=7987, skipped_overlong=13, left_truncated_rows=0, max_prompt_len=3818, max_total_len=3842, max_seq_length=4096, skip_overlong=True
22
+ SFT token length check: input_rows=8000, kept_rows=7987, skipped_overlong=13, left_truncated_rows=0, max_prompt_len=3818, max_total_len=3842, max_seq_length=4096, skip_overlong=True
23
+
24
+
25
+
26
+
27
+ trainable params: 39,976,960 || all params: 6,860,320,768 || trainable%: 0.5827
28
+ /dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/accelerate/accelerator.py:494: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
29
+ self.scaler = torch.cuda.amp.GradScaler(**kwargs)
30
+ No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
31
+ trainable params: 39,976,960 || all params: 6,860,320,768 || trainable%: 0.5827
32
+ trainable params: 39,976,960 || all params: 6,860,320,768 || trainable%: 0.5827
33
+ trainable params: 39,976,960 || all params: 6,860,320,768 || trainable%: 0.5827
34
+ /dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/accelerate/accelerator.py:494: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
35
+ self.scaler = torch.cuda.amp.GradScaler(**kwargs)
36
+ /dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/accelerate/accelerator.py:494: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
37
+ self.scaler = torch.cuda.amp.GradScaler(**kwargs)
38
+ Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
39
+ No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
40
+ /dev/shm/suiqk/conda_envs/scalerag-ts-v4/lib/python3.11/site-packages/accelerate/accelerator.py:494: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
41
+ self.scaler = torch.cuda.amp.GradScaler(**kwargs)
42
+ No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
43
+ No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
44
+
45
  0%| | 0/250 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
46
+ `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
47
+ `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
48
+ `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
49
+
50
  0%| | 1/250 [00:14<58:06, 14.00s/it]
51
  1%| | 2/250 [00:24<50:27, 12.21s/it]
52
  1%| | 3/250 [00:37<50:29, 12.27s/it]
53
  2%|▏ | 4/250 [00:49<49:31, 12.08s/it]
54
  2%|▏ | 5/250 [01:00<48:41, 11.92s/it]
55
  2%|▏ | 6/250 [01:14<51:41, 12.71s/it]
56
  3%|β–Ž | 7/250 [01:27<51:10, 12.64s/it]
57
  3%|β–Ž | 8/250 [01:41<53:20, 13.22s/it]
58
  4%|β–Ž | 9/250 [01:52<50:23, 12.55s/it]
59
  4%|▍ | 10/250 [02:08<53:48, 13.45s/it]
60
 
61
+
62
  4%|▍ | 10/250 [02:08<53:48, 13.45s/it]
63
  4%|▍ | 11/250 [02:19<50:52, 12.77s/it]
64
  5%|▍ | 12/250 [02:32<51:15, 12.92s/it]
65
  5%|β–Œ | 13/250 [02:46<51:17, 12.99s/it]
66
  6%|β–Œ | 14/250 [02:59<51:03, 12.98s/it]
67
  6%|β–Œ | 15/250 [03:11<50:07, 12.80s/it]
68
  6%|β–‹ | 16/250 [03:22<48:26, 12.42s/it]
69
  7%|β–‹ | 17/250 [03:36<50:04, 12.89s/it]
70
  7%|β–‹ | 18/250 [03:48<48:20, 12.50s/it]
71
  8%|β–Š | 19/250 [03:58<45:26, 11.80s/it]
72
  8%|β–Š | 20/250 [04:09<44:36, 11.64s/it]
73
 
74
+
75
  8%|β–Š | 20/250 [04:09<44:36, 11.64s/it]
76
  8%|β–Š | 21/250 [04:24<48:03, 12.59s/it]
77
  9%|β–‰ | 22/250 [04:36<46:53, 12.34s/it]
78
  9%|β–‰ | 23/250 [04:50<49:03, 12.97s/it]
79
  10%|β–‰ | 24/250 [05:07<52:25, 13.92s/it]
80
  10%|β–ˆ | 25/250 [05:20<52:07, 13.90s/it]
81
  10%|β–ˆ | 26/250 [05:36<53:41, 14.38s/it]
82
  11%|β–ˆ | 27/250 [05:52<54:58, 14.79s/it]
83
  11%|β–ˆ | 28/250 [06:09<57:57, 15.66s/it]
84
  12%|β–ˆβ– | 29/250 [06:23<55:16, 15.01s/it]
85
  12%|β–ˆβ– | 30/250 [06:36<53:20, 14.55s/it]
86
 
87
+
88
  12%|β–ˆβ– | 30/250 [06:36<53:20, 14.55s/it]
89
  12%|β–ˆβ– | 31/250 [06:51<52:42, 14.44s/it]
90
  13%|β–ˆβ–Ž | 32/250 [07:02<49:31, 13.63s/it]
91
  13%|β–ˆβ–Ž | 33/250 [07:14<47:28, 13.13s/it]
92
  14%|β–ˆβ–Ž | 34/250 [07:25<44:37, 12.39s/it]
93
  14%|β–ˆβ– | 35/250 [07:42<48:57, 13.66s/it]
94
  14%|β–ˆβ– | 36/250 [07:56<49:01, 13.74s/it]
95
  15%|β–ˆβ– | 37/250 [08:06<45:37, 12.85s/it]
96
  15%|β–ˆβ–Œ | 38/250 [08:22<48:08, 13.62s/it]
97
  16%|β–ˆβ–Œ | 39/250 [08:33<45:37, 12.97s/it]
98
  16%|β–ˆβ–Œ | 40/250 [08:46<44:47, 12.80s/it]
99
 
100
+
101
  16%|β–ˆβ–Œ | 40/250 [08:46<44:47, 12.80s/it]
102
  16%|β–ˆβ–‹ | 41/250 [08:55<41:16, 11.85s/it]
103
  17%|β–ˆβ–‹ | 42/250 [09:06<40:00, 11.54s/it]
104
  17%|β–ˆβ–‹ | 43/250 [09:24<46:47, 13.56s/it]
105
  18%|β–ˆβ–Š | 44/250 [09:36<44:43, 13.03s/it]
106
  18%|β–ˆβ–Š | 45/250 [09:50<45:32, 13.33s/it]
107
  18%|β–ˆβ–Š | 46/250 [10:03<44:21, 13.05s/it]
108
  19%|β–ˆβ–‰ | 47/250 [10:19<47:26, 14.02s/it]
109
  19%|β–ˆβ–‰ | 48/250 [10:36<50:17, 14.94s/it]
110
  20%|β–ˆβ–‰ | 49/250 [10:54<53:33, 15.99s/it]
111
  20%|β–ˆβ–ˆ | 50/250 [11:06<49:05, 14.73s/it]
112
 
113
+
114
  20%|β–ˆβ–ˆ | 50/250 [11:06<49:05, 14.73s/it]
115
  20%|β–ˆβ–ˆ | 51/250 [11:20<47:34, 14.34s/it]
116
  21%|β–ˆβ–ˆ | 52/250 [11:32<45:08, 13.68s/it]
117
  21%|β–ˆβ–ˆ | 53/250 [11:44<43:53, 13.37s/it]
118
  22%|β–ˆβ–ˆβ– | 54/250 [11:58<43:35, 13.34s/it]
119
  22%|β–ˆβ–ˆβ– | 55/250 [12:11<43:18, 13.32s/it]
120
  22%|β–ˆβ–ˆβ– | 56/250 [12:21<40:23, 12.49s/it]
121
  23%|β–ˆβ–ˆβ–Ž | 57/250 [12:35<41:07, 12.78s/it]
122
  23%|β–ˆβ–ˆβ–Ž | 58/250 [12:50<43:07, 13.48s/it]
123
  24%|β–ˆβ–ˆβ–Ž | 59/250 [13:05<44:15, 13.90s/it]
124
  24%|β–ˆβ–ˆβ– | 60/250 [13:18<42:56, 13.56s/it]
125
 
126
+
127
  24%|β–ˆβ–ˆβ– | 60/250 [13:18<42:56, 13.56s/it]
128
  24%|β–ˆβ–ˆβ– | 61/250 [13:30<41:38, 13.22s/it]
129
  25%|β–ˆβ–ˆβ– | 62/250 [13:43<40:45, 13.01s/it]
130
  25%|β–ˆβ–ˆβ–Œ | 63/250 [13:57<41:43, 13.39s/it]
131
  26%|β–ˆβ–ˆβ–Œ | 64/250 [14:09<40:29, 13.06s/it]
132
  26%|β–ˆβ–ˆβ–Œ | 65/250 [14:24<41:52, 13.58s/it]
133
  26%|β–ˆβ–ˆβ–‹ | 66/250 [14:38<42:00, 13.70s/it]
134
  27%|β–ˆβ–ˆβ–‹ | 67/250 [14:53<43:03, 14.12s/it]
135
  27%|β–ˆβ–ˆβ–‹ | 68/250 [15:08<43:21, 14.29s/it]
136
  28%|β–ˆβ–ˆβ–Š | 69/250 [15:21<42:37, 14.13s/it]
137
  28%|β–ˆβ–ˆβ–Š | 70/250 [15:34<41:12, 13.74s/it]
138
 
139
+
140
  28%|β–ˆβ–ˆβ–Š | 70/250 [15:34<41:12, 13.74s/it]
141
  28%|β–ˆβ–ˆβ–Š | 71/250 [15:51<43:27, 14.57s/it]
142
  29%|β–ˆβ–ˆβ–‰ | 72/250 [16:03<41:16, 13.91s/it]
143
  29%|β–ˆβ–ˆβ–‰ | 73/250 [16:15<39:02, 13.24s/it]
144
  30%|β–ˆβ–ˆβ–‰ | 74/250 [16:25<35:48, 12.21s/it]
145
  30%|β–ˆβ–ˆβ–ˆ | 75/250 [16:36<34:30, 11.83s/it]
146
  30%|β–ˆβ–ˆβ–ˆ | 76/250 [16:49<35:39, 12.29s/it]
147
  31%|β–ˆβ–ˆβ–ˆ | 77/250 [17:00<34:42, 12.04s/it]
148
  31%|β–ˆβ–ˆβ–ˆ | 78/250 [17:14<35:30, 12.38s/it]
149
  32%|β–ˆβ–ˆβ–ˆβ– | 79/250 [17:25<34:33, 12.12s/it]
150
  32%|β–ˆβ–ˆβ–ˆβ– | 80/250 [17:41<37:08, 13.11s/it]
151
 
152
+
153
  32%|β–ˆβ–ˆβ–ˆβ– | 80/250 [17:41<37:08, 13.11s/it]
154
  32%|β–ˆβ–ˆβ–ˆβ– | 81/250 [17:53<36:08, 12.83s/it]
155
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 82/250 [18:08<37:48, 13.50s/it]
156
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 83/250 [18:20<36:12, 13.01s/it]
157
  34%|β–ˆβ–ˆβ–ˆβ–Ž | 84/250 [18:38<40:30, 14.64s/it]
158
  34%|β–ˆβ–ˆβ–ˆβ– | 85/250 [18:52<39:39, 14.42s/it]
159
  34%|β–ˆβ–ˆβ–ˆβ– | 86/250 [19:10<42:03, 15.39s/it]
160
  35%|β–ˆβ–ˆβ–ˆβ– | 87/250 [19:24<41:21, 15.23s/it]
161
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 88/250 [19:36<38:19, 14.19s/it]
162
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 89/250 [19:53<40:26, 15.07s/it]
163
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 90/250 [20:07<38:51, 14.57s/it]
164
 
165
+
166
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 90/250 [20:07<38:51, 14.57s/it]
167
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 91/250 [20:19<36:35, 13.81s/it]
168
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 92/250 [20:31<34:47, 13.21s/it]
169
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 93/250 [20:44<34:36, 13.22s/it]
170
  38%|β–ˆβ–ˆβ–ˆβ–Š | 94/250 [20:56<33:31, 12.90s/it]
171
  38%|β–ˆβ–ˆβ–ˆβ–Š | 95/250 [21:07<31:39, 12.25s/it]
172
  38%|β–ˆβ–ˆβ–ˆβ–Š | 96/250 [21:21<33:07, 12.91s/it]
173
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 97/250 [21:35<33:24, 13.10s/it]
174
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 98/250 [21:47<32:27, 12.81s/it]
175
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 99/250 [21:59<32:04, 12.74s/it]
176
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 100/250 [22:15<33:41, 13.47s/it]
177
 
178
+
179
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 100/250 [22:15<33:41, 13.47s/it]
180
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 101/250 [22:26<32:09, 12.95s/it]
181
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 102/250 [22:41<32:55, 13.35s/it]
182
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 103/250 [22:54<32:21, 13.21s/it]
183
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 104/250 [23:05<30:35, 12.57s/it]
184
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 105/250 [23:15<29:06, 12.05s/it]
185
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 106/250 [23:28<29:07, 12.14s/it]
186
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 107/250 [23:39<28:36, 12.00s/it]
187
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 108/250 [23:55<30:41, 12.97s/it]
188
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 109/250 [24:08<30:29, 12.97s/it]
189
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 110/250 [24:19<28:46, 12.33s/it]
190
 
191
+
192
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 110/250 [24:19<28:46, 12.33s/it]
193
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 111/250 [24:31<28:51, 12.46s/it]
194
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 112/250 [24:45<29:12, 12.70s/it]
195
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 113/250 [24:59<30:04, 13.17s/it]
196
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 114/250 [25:13<30:19, 13.38s/it]
197
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 115/250 [25:26<30:21, 13.49s/it]
198
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 116/250 [25:39<29:41, 13.30s/it]
199
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 117/250 [25:54<30:07, 13.59s/it]
200
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 118/250 [26:10<31:36, 14.37s/it]
201
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 119/250 [26:24<31:30, 14.43s/it]
202
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 120/250 [26:40<32:11, 14.86s/it]
203
 
204
+
205
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 120/250 [26:40<32:11, 14.86s/it]
206
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 121/250 [26:54<31:23, 14.60s/it]
207
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 122/250 [27:06<29:25, 13.79s/it]
208
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 123/250 [27:19<28:45, 13.58s/it]
209
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 124/250 [27:33<28:21, 13.50s/it]
210
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 125/250 [27:45<27:38, 13.27s/it]
211
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 126/250 [27:58<27:08, 13.13s/it]
212
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 127/250 [28:11<27:02, 13.19s/it]
213
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 128/250 [28:22<25:27, 12.52s/it]
214
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 129/250 [28:41<29:07, 14.44s/it]
215
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 130/250 [28:55<28:43, 14.36s/it]
216
 
217
+
218
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 130/250 [28:55<28:43, 14.36s/it]
219
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 131/250 [29:08<27:21, 13.79s/it]
220
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 132/250 [29:20<26:09, 13.30s/it]
221
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 133/250 [29:33<25:54, 13.29s/it]
222
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 134/250 [29:46<25:12, 13.04s/it]
223
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 135/250 [30:00<25:46, 13.44s/it]
224
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 136/250 [30:11<24:14, 12.76s/it]
225
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 137/250 [30:24<23:43, 12.60s/it]
226
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 138/250 [30:40<25:54, 13.88s/it]
227
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 139/250 [30:54<25:16, 13.66s/it]
228
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 140/250 [31:07<24:42, 13.48s/it]
229
 
230
+
231
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 140/250 [31:07<24:42, 13.48s/it]
232
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 141/250 [31:24<26:26, 14.55s/it]
233
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 142/250 [31:38<26:20, 14.64s/it]
234
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 143/250 [31:50<24:34, 13.78s/it]
235
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 144/250 [32:03<23:47, 13.46s/it]
236
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 145/250 [32:13<21:46, 12.45s/it]
237
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 146/250 [32:26<21:41, 12.51s/it]
238
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 147/250 [32:39<21:56, 12.78s/it]
239
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 148/250 [32:55<23:31, 13.84s/it]
240
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 149/250 [33:09<23:13, 13.80s/it]
241
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 150/250 [33:22<22:24, 13.45s/it]
242
 
243
+
244
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 150/250 [33:22<22:24, 13.45s/it]
245
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 151/250 [33:37<23:04, 13.98s/it]
246
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 152/250 [33:49<21:46, 13.34s/it]
247
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 153/250 [33:59<20:08, 12.46s/it]
248
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 154/250 [34:15<21:29, 13.43s/it]
249
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 155/250 [34:28<21:07, 13.34s/it]
250
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 156/250 [34:38<19:31, 12.46s/it]
251
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 157/250 [34:51<19:25, 12.53s/it]
252
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 158/250 [35:05<19:40, 12.83s/it]
253
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 159/250 [35:20<20:21, 13.43s/it]
254
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 160/250 [35:33<20:09, 13.44s/it]
255
 
256
+
257
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 160/250 [35:33<20:09, 13.44s/it]
258
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 161/250 [35:44<18:52, 12.72s/it]
259
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 162/250 [35:57<18:36, 12.69s/it]
260
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 163/250 [36:08<17:39, 12.18s/it]
261
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 164/250 [36:21<18:00, 12.56s/it]
262
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 165/250 [36:35<18:22, 12.97s/it]
263
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 166/250 [36:48<18:19, 13.09s/it]
264
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 167/250 [37:01<18:04, 13.06s/it]
265
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 168/250 [37:12<16:44, 12.25s/it]
266
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 169/250 [37:25<16:57, 12.56s/it]
267
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 170/250 [37:38<16:46, 12.58s/it]
268
 
269
+
270
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 170/250 [37:38<16:46, 12.58s/it]
271
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 171/250 [37:50<16:35, 12.60s/it]
272
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 172/250 [38:05<17:17, 13.30s/it]
273
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 173/250 [38:21<17:52, 13.93s/it]
274
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 174/250 [38:37<18:29, 14.60s/it]
275
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 175/250 [38:50<17:50, 14.27s/it]
276
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 176/250 [39:04<17:30, 14.20s/it]
277
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 177/250 [39:16<16:22, 13.46s/it]
278
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 178/250 [39:28<15:42, 13.09s/it]
279
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 179/250 [39:40<15:06, 12.77s/it]
280
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 180/250 [39:54<15:17, 13.10s/it]
281
 
282
+
283
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 180/250 [39:54<15:17, 13.10s/it]
284
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 181/250 [40:04<14:04, 12.24s/it]
285
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 182/250 [40:18<14:27, 12.75s/it]
286
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 183/250 [40:32<14:31, 13.01s/it]
287
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 184/250 [40:47<14:53, 13.53s/it]
288
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 185/250 [41:02<15:19, 14.15s/it]
289
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 186/250 [41:16<14:48, 13.88s/it]
290
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 187/250 [41:31<14:55, 14.22s/it]
291
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 188/250 [41:42<13:57, 13.51s/it]
292
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 189/250 [41:54<13:05, 12.87s/it]
293
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 190/250 [42:07<12:50, 12.83s/it]
294
 
295
+
296
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 190/250 [42:07<12:50, 12.83s/it]
297
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 191/250 [42:21<13:14, 13.46s/it]
298
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 192/250 [42:37<13:29, 13.96s/it]
299
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 193/250 [42:50<13:01, 13.71s/it]
300
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 194/250 [43:02<12:17, 13.16s/it]
301
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 195/250 [43:15<12:14, 13.35s/it]
302
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 196/250 [43:29<12:06, 13.45s/it]
303
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 197/250 [43:42<11:45, 13.31s/it]
304
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 198/250 [43:53<10:47, 12.45s/it]
305
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 199/250 [44:09<11:29, 13.52s/it]
306
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 200/250 [44:21<10:54, 13.08s/it]
307
 
308
+
309
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 200/250 [44:21<10:54, 13.08s/it]
310
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 201/250 [44:39<12:06, 14.83s/it]
311
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 202/250 [44:55<11:56, 14.92s/it]
312
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 203/250 [45:07<11:07, 14.20s/it]
313
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 204/250 [45:22<11:06, 14.49s/it]
314
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 205/250 [45:35<10:33, 14.08s/it]
315
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 206/250 [45:48<10:04, 13.75s/it]
316
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 207/250 [45:59<09:16, 12.95s/it]
317
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 208/250 [46:16<09:42, 13.88s/it]
318
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 209/250 [46:29<09:23, 13.75s/it]
319
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 210/250 [46:41<08:47, 13.18s/it]
320
 
321
+
322
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 210/250 [46:41<08:47, 13.18s/it]
323
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 211/250 [46:53<08:27, 13.02s/it]
324
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 212/250 [47:05<08:01, 12.66s/it]
325
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 213/250 [47:19<07:58, 12.93s/it]
326
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 214/250 [47:31<07:38, 12.74s/it]
327
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 215/250 [47:46<07:45, 13.30s/it]
328
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 216/250 [47:59<07:28, 13.18s/it]
329
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 217/250 [48:12<07:15, 13.18s/it]
330
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 218/250 [48:29<07:38, 14.32s/it]
331
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 219/250 [48:43<07:26, 14.41s/it]
332
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 220/250 [48:57<07:06, 14.23s/it]
333
 
334
+
335
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 220/250 [48:57<07:06, 14.23s/it]
336
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 221/250 [49:10<06:37, 13.71s/it]
337
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 222/250 [49:29<07:08, 15.32s/it]
338
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 223/250 [49:44<06:55, 15.38s/it]
339
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 224/250 [49:55<06:03, 14.00s/it]
340
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 225/250 [50:09<05:51, 14.04s/it]
341
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 226/250 [50:26<05:56, 14.84s/it]
342
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 227/250 [50:36<05:06, 13.34s/it]
343
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 228/250 [50:52<05:14, 14.31s/it]
344
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 229/250 [51:04<04:44, 13.54s/it]
345
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 230/250 [51:17<04:24, 13.22s/it]
346
 
347
+
348
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 230/250 [51:17<04:24, 13.22s/it]
349
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 231/250 [51:29<04:08, 13.09s/it]
350
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 232/250 [51:41<03:48, 12.71s/it]
351
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 233/250 [51:55<03:39, 12.90s/it]
352
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 234/250 [52:06<03:21, 12.58s/it]
353
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 235/250 [52:22<03:23, 13.54s/it]
354
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 236/250 [52:35<03:06, 13.35s/it]
355
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 237/250 [52:49<02:56, 13.58s/it]
356
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 238/250 [53:03<02:44, 13.67s/it]
357
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 239/250 [53:16<02:26, 13.31s/it]
358
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 240/250 [53:29<02:12, 13.29s/it]
359
 
360
+
361
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 240/250 [53:29<02:12, 13.29s/it]
362
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 241/250 [53:40<01:54, 12.74s/it]
363
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 242/250 [53:54<01:44, 13.02s/it]
364
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 243/250 [54:09<01:34, 13.53s/it]
365
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 244/250 [54:22<01:21, 13.58s/it]
366
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 245/250 [54:34<01:04, 12.89s/it]
367
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 246/250 [54:46<00:51, 12.81s/it]
368
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 247/250 [54:59<00:38, 12.85s/it]
369
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 248/250 [55:15<00:27, 13.65s/it]
370
 
371
+
372
 
373
+
374
+ [rank0]:[W615 15:17:28.247429157 ProcessGroupNCCL.cpp:1496] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
375
+ Dataset: /mnt/share01/sqk/datasets/Time-MQA_TSQA/tmp/eval.jsonl
376
+ Total samples: 800
377
+ World size: 4
378
+ Per-device eval batch size: 4
379
+ Maximum global eval batch size: 16
380
+ Output dir: /mnt/share01/sqk/ChatTime/tsqa_adapter/outputs/sft_20260615_141604/eval
381
+
382
+
383
+
384
+
385
+
386
+ [rank0]:[W615 15:26:42.394095285 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
387
+ [rank2]:[W615 15:26:46.615147024 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
388
+ [rank3]:[W615 15:27:02.342169152 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
389
+ [rank1]:[W615 15:27:07.925204166 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
390
+ {
391
+ "by_group": {
392
+ "anomaly_detection": {
393
+ "count": 200,
394
+ "accuracy": 0.82,
395
+ "correct": 164,
396
+ "parsed": 200
397
+ },
398
+ "classification": {
399
+ "count": 200,
400
+ "accuracy": 0.74,
401
+ "correct": 148,
402
+ "parsed": 200
403
+ },
404
+ "forecasting": {
405
+ "count": 200,
406
+ "valid_samples": 170,
407
+ "valid_points": 3164,
408
+ "mse": 825899.547396054,
409
+ "mae": 151.69107746555417
410
+ },
411
+ "open_ended": {
412
+ "count": 200,
413
+ "accuracy": 0.45,
414
+ "parsed_accuracy": 0.45685279187817257,
415
+ "parse_rate": 0.985,
416
+ "correct": 90,
417
+ "parsed": 197,
418
+ "unparsed": 3,
419
+ "by_format": {
420
+ "multiple_choice": {
421
+ "count": 67,
422
+ "accuracy": 0.2835820895522388,
423
+ "parsed_accuracy": 0.2878787878787879,
424
+ "correct": 19,
425
+ "parsed": 66,
426
+ "unparsed": 1
427
+ },
428
+ "open_ended_question": {
429
+ "count": 67,
430
+ "accuracy": 0.417910447761194,
431
+ "parsed_accuracy": 0.4307692307692308,
432
+ "correct": 28,
433
+ "parsed": 65,
434
+ "unparsed": 2
435
+ },
436
+ "true_false": {
437
+ "count": 66,
438
+ "accuracy": 0.6515151515151515,
439
+ "parsed_accuracy": 0.6515151515151515,
440
+ "correct": 43,
441
+ "parsed": 66,
442
+ "unparsed": 0
443
+ }
444
+ },
445
+ "by_method": {
446
+ "anomaly": {
447
+ "count": 1,
448
+ "accuracy": 1.0,
449
+ "correct": 1
450
+ },
451
+ "cyclical": {
452
+ "count": 4,
453
+ "accuracy": 0.75,
454
+ "correct": 3
455
+ },
456
+ "multiple_choice": {
457
+ "count": 64,
458
+ "accuracy": 0.296875,
459
+ "correct": 19
460
+ },
461
+ "numeric_scalar": {
462
+ "count": 31,
463
+ "accuracy": 0.25806451612903225,
464
+ "correct": 8
465
+ },
466
+ "numeric_sequence": {
467
+ "count": 1,
468
+ "accuracy": 0.0,
469
+ "correct": 0
470
+ },
471
+ "seasonality": {
472
+ "count": 3,
473
+ "accuracy": 1.0,
474
+ "correct": 3
475
+ },
476
+ "trend": {
477
+ "count": 20,
478
+ "accuracy": 0.45,
479
+ "correct": 9
480
+ },
481
+ "true_false": {
482
+ "count": 65,
483
+ "accuracy": 0.6461538461538462,
484
+ "correct": 42
485
+ },
486
+ "volatility": {
487
+ "count": 8,
488
+ "accuracy": 0.625,
489
+ "correct": 5
490
+ }
491
+ }
492
+ }
493
+ },
494
+ "text_overall": {
495
+ "count": 800,
496
+ "exact_match": 0.39,
497
+ "normalized_exact_match": 0.39,
498
+ "token_f1": 0.6366794082162608
499
+ },
500
+ "num_samples": 800,
501
+ "counts_by_group": {
502
+ "anomaly_detection": 200,
503
+ "classification": 200,
504
+ "forecasting": 200,
505
+ "open_ended": 200
506
+ }
507
+ }
508
+ Saved predictions: /mnt/share01/sqk/ChatTime/tsqa_adapter/outputs/sft_20260615_141604/eval/predictions.jsonl
509
+ Saved metrics: /mnt/share01/sqk/ChatTime/tsqa_adapter/outputs/sft_20260615_141604/eval/metrics.json