Yaning1001 commited on Mar 4, 2025

Commit

54f7697

verified ·

1 Parent(s): 711e738

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

babylm_dataset.py +141 -0
babylm_dataset_llama.py +141 -0
babylm_dataset_test.py +145 -0
run.sh +82 -0
run_train.sh +28 -0
train_accelerate.py +99 -0
train_deep.py +233 -0
train_ftp.py +137 -0
train_gpt2.py +117 -0
train_llama.py +99 -0
train_llama_1B.py +117 -0
train_llama_3B.py +117 -0
train_qwen.py +97 -0
train_qwen_lora.py +93 -0
wandb/debug-cli.chunhui.log +0 -0
wandb/debug-internal.log +17 -0
wandb/debug.log +33 -0
wandb/run-20241030_010306-uhzyjdga/run-uhzyjdga.wandb +0 -0
wandb/run-20241030_011013-8qrwqf2b/files/config.yaml +47 -0
wandb/run-20241030_011013-8qrwqf2b/files/wandb-metadata.json +97 -0
wandb/run-20241030_011509-3dp0dtmk/files/output.log +15 -0
wandb/run-20241030_011509-3dp0dtmk/logs/debug.log +26 -0
wandb/run-20241030_011509-cqcwsj7s/logs/debug.log +26 -0
wandb/run-20241030_013141-v317zdzd/files/config.yaml +47 -0
wandb/run-20241030_013141-v317zdzd/files/output.log +46 -0
wandb/run-20241030_013141-v317zdzd/files/requirements.txt +147 -0
wandb/run-20241030_013141-v317zdzd/files/wandb-metadata.json +97 -0
wandb/run-20241030_013141-v317zdzd/logs/debug-internal.log +11 -0
wandb/run-20241030_013141-v317zdzd/logs/debug.log +27 -0
wandb/run-20241030_222932-l8nv7d2l/files/output.log +14 -0
wandb/run-20241030_222932-l8nv7d2l/logs/debug-internal.log +10 -0
wandb/run-20241030_222932-l8nv7d2l/logs/debug.log +26 -0
wandb/run-20241030_222932-lsfm0d2q/files/wandb-metadata.json +97 -0
wandb/run-20241030_222932-lsfm0d2q/logs/debug-internal.log +10 -0
wandb/run-20241030_222932-lsfm0d2q/logs/debug.log +26 -0
wandb/run-20241101_012733-4u8e027p/files/output.log +16 -0
wandb/run-20241101_012733-4u8e027p/files/requirements.txt +147 -0
wandb/run-20241101_012733-4u8e027p/files/wandb-metadata.json +97 -0
wandb/run-20241101_012733-4u8e027p/logs/debug.log +26 -0
wandb/run-20241101_012733-e3zsr634/files/output.log +20 -0
wandb/run-20241101_012733-e3zsr634/files/wandb-metadata.json +97 -0
wandb/run-20241101_200502-28ivel81/files/output.log +1 -0
wandb/run-20241101_200502-28ivel81/files/wandb-metadata.json +97 -0
wandb/run-20241101_201708-b4wkk29o/files/output.log +13 -0
wandb/run-20241101_201708-b4wkk29o/files/wandb-metadata.json +97 -0
wandb/run-20241101_201708-b4wkk29o/logs/debug.log +26 -0
wandb/run-20241101_201708-b4wkk29o/run-b4wkk29o.wandb +0 -0
wandb/run-20241101_201926-5y6ulxig/files/output.log +13 -0
wandb/run-20241101_201926-5y6ulxig/files/requirements.txt +147 -0
wandb/run-20241101_201926-5y6ulxig/files/wandb-metadata.json +97 -0

babylm_dataset.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# babylm_dataset.py
+# author: Julie Kallini
+import datasets
+import os
+import glob
+import tqdm
+from numpy.random import default_rng
+from itertools import product
+logger = datasets.logging.get_logger(__name__)
+_DESCRIPTION = """\
+    Pre-tokenized BabyLM HuggingFace dataset for verb perturbations.
+"""
+MODEL_NAME = "Llama-3.2-3B"
+_PERTURBED_DATA_PATH = f"../data/Perturbed_data/{MODEL_NAME}"
+_PERTURBATIONS = ["hop_control", "hop_tokens4", "hop_words4",
+                  "reverse_control", "reverse_partial", "reverse_full",
+                  "shuffle_control", "shuffle_nondeterministic",
+                  "shuffle_deterministic21", "shuffle_deterministic57", "shuffle_deterministic84",
+                  "shuffle_local3", "shuffle_local5", "shuffle_local10",
+                  "shuffle_even_odd"]
+# _RANDOM_SEEDS = [0, 14, 41, 53, 96]
+_RANDOM_SEEDS = [0]
+# _TRAIN_SETS = ["100M", "10M"]
+_TRAIN_SETS = ["10M"]
+_EOS_TOKEN_ID = 50256
+class BabyConfig(datasets.BuilderConfig):
+    def __init__(self, data_dir, babylm_train_set, random_seed, **kwargs):
+        """BuilderConfig for IzParens
+        Args:
+          data_dir: path to directory of tokenized, perturbed BabyLM dataset
+        """
+        super(BabyConfig, self).__init__(
+            **kwargs,
+        )
+        self.data_dir = data_dir
+        self.babylm_train_set = babylm_train_set
+        self.random_seed = random_seed
+class BabyLMCorpus(datasets.GeneratorBasedBuilder):
+    BUILDER_CONFIGS = [
+        BabyConfig(
+            name=f"babylm_{perturbation}_{train_set}_seed{random_seed}",
+            data_dir=os.path.join(
+                _PERTURBED_DATA_PATH, "babylm_" + perturbation),
+            babylm_train_set=train_set,
+            random_seed=random_seed,
+        ) for perturbation, train_set, random_seed in list(product(_PERTURBATIONS, _TRAIN_SETS, _RANDOM_SEEDS))
+    ]
+    def _info(self):
+        return datasets.DatasetInfo(
+            # This is the description that will appear on the datasets page.
+            description=_DESCRIPTION,
+            # datasets.features.FeatureConnectors
+            features=datasets.Features(
+                {
+                    "text": datasets.Value("string")
+                    # These are the features of your dataset like images, labels ...
+                }
+            ),
+            # If there's a common (input, target) tuple from the features,
+            # specify them here. They'll be used if as_supervised=True in
+            # builder.as_dataset.
+            supervised_keys=None,
+        )
+    def _split_generators(self, dl_manager):
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={"data_dir": os.path.join(
+                    self.config.data_dir, "babylm_" + self.config.babylm_train_set), "random_seed": self.config.random_seed, "split": "train"},
+            ),
+            # datasets.SplitGenerator(
+            #     name=datasets.Split.VALIDATION,
+            #     gen_kwargs={"data_dir": os.path.join(
+            #         self.config.data_dir, "babylm_dev"), "random_seed": self.config.random_seed, "split": "valid"},
+            #
+        ]
+    def __chunk(self, sentences, eos_token):
+        # Tokenize each sentence
+        logger.info("Loading pre-tokenized data")
+        tokenized_sentences = []
+        for sent in tqdm.tqdm(sentences):
+            tokenized_sentences.append([int(tok) for tok in sent.split()])
+        # Concatenate the tokenized sentences using the EOS token
+        logger.info("Concatenating tokenized data using EOS token")
+        all_tokens = []
+        for tokens in tqdm.tqdm(tokenized_sentences):
+            all_tokens.extend(tokens)
+            all_tokens.append(eos_token)
+        # Chunk the tokens into sublists of max_seq_len tokens each
+        logger.info("Chunking tokens into sublists of 1024")
+        max_seq_len = 1024
+        chunked_tokens = []
+        for i in tqdm.tqdm(range(0, len(all_tokens), max_seq_len)):
+            chunked_tokens.append(all_tokens[i:i + max_seq_len])
+        # Drop last line if not a multiple of max_seq_len
+        if len(chunked_tokens[-1]) < max_seq_len:
+            chunked_tokens.pop()
+        return chunked_tokens
+    def _generate_examples(self, data_dir, random_seed, split):
+        """This function returns the BabyLM text in the discretized, tokenized form."""
+        logger.info("Generating examples from = %s", data_dir)
+        infiles = sorted(glob.glob(os.path.join(data_dir, "*")))
+        # Extend sentences
+        all_sentences = []
+        for infile in infiles:
+            f = open(infile, encoding="utf-8")
+            all_sentences.extend(f.readlines())
+        logger.info("Total sentences: {}".format(len(all_sentences)))
+        # Shuffle because we are pre-tokenizing
+        rng = default_rng(seed=random_seed)
+        rng.shuffle(all_sentences)
+        # Tokenize and chunk
+        tokenized_lines = self.__chunk(all_sentences, _EOS_TOKEN_ID)
+        # Generate data
+        logger.info("Writing dataset as space-separated sequences of tokens")
+        for idx, line in enumerate(tokenized_lines):
+            l = " ".join([str(tok) for tok in line]) + "\n"
+            yield idx, {"text": l}

babylm_dataset_llama.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# babylm_dataset.py
+# author: Julie Kallini
+import datasets
+import os
+import glob
+import tqdm
+from numpy.random import default_rng
+from itertools import product
+logger = datasets.logging.get_logger(__name__)
+_DESCRIPTION = """\
+    Pre-tokenized BabyLM HuggingFace dataset for verb perturbations.
+"""
+MODEL_NAME = "Llama-3.2-3B"
+_PERTURBED_DATA_PATH = f"../data/Perturbed_data/{MODEL_NAME}"
+_PERTURBATIONS = ["hop_control", "hop_tokens4", "hop_words4",
+                  "reverse_control", "reverse_partial", "reverse_full",
+                  "shuffle_control", "shuffle_nondeterministic",
+                  "shuffle_deterministic21", "shuffle_deterministic57", "shuffle_deterministic84",
+                  "shuffle_local3", "shuffle_local5", "shuffle_local10",
+                  "shuffle_even_odd"]
+# _RANDOM_SEEDS = [0, 14, 41, 53, 96]
+_RANDOM_SEEDS = [0]
+# _TRAIN_SETS = ["100M", "10M"]
+_TRAIN_SETS = ["10M"]
+_EOS_TOKEN_ID = 50256
+class BabyConfig(datasets.BuilderConfig):
+    def __init__(self, data_dir, babylm_train_set, random_seed, **kwargs):
+        """BuilderConfig for IzParens
+        Args:
+          data_dir: path to directory of tokenized, perturbed BabyLM dataset
+        """
+        super(BabyConfig, self).__init__(
+            **kwargs,
+        )
+        self.data_dir = data_dir
+        self.babylm_train_set = babylm_train_set
+        self.random_seed = random_seed
+class BabyLMCorpus(datasets.GeneratorBasedBuilder):
+    BUILDER_CONFIGS = [
+        BabyConfig(
+            name=f"babylm_{perturbation}_{train_set}_seed{random_seed}",
+            data_dir=os.path.join(
+                _PERTURBED_DATA_PATH, "babylm_" + perturbation),
+            babylm_train_set=train_set,
+            random_seed=random_seed,
+        ) for perturbation, train_set, random_seed in list(product(_PERTURBATIONS, _TRAIN_SETS, _RANDOM_SEEDS))
+    ]
+    def _info(self):
+        return datasets.DatasetInfo(
+            # This is the description that will appear on the datasets page.
+            description=_DESCRIPTION,
+            # datasets.features.FeatureConnectors
+            features=datasets.Features(
+                {
+                    "text": datasets.Value("string")
+                    # These are the features of your dataset like images, labels ...
+                }
+            ),
+            # If there's a common (input, target) tuple from the features,
+            # specify them here. They'll be used if as_supervised=True in
+            # builder.as_dataset.
+            supervised_keys=None,
+        )
+    def _split_generators(self, dl_manager):
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={"data_dir": os.path.join(
+                    self.config.data_dir, "babylm_" + self.config.babylm_train_set), "random_seed": self.config.random_seed, "split": "train"},
+            ),
+            # datasets.SplitGenerator(
+            #     name=datasets.Split.VALIDATION,
+            #     gen_kwargs={"data_dir": os.path.join(
+            #         self.config.data_dir, "babylm_dev"), "random_seed": self.config.random_seed, "split": "valid"},
+            #
+        ]
+    def __chunk(self, sentences, eos_token):
+        # Tokenize each sentence
+        logger.info("Loading pre-tokenized data")
+        tokenized_sentences = []
+        for sent in tqdm.tqdm(sentences):
+            tokenized_sentences.append([int(tok) for tok in sent.split()])
+        # Concatenate the tokenized sentences using the EOS token
+        logger.info("Concatenating tokenized data using EOS token")
+        all_tokens = []
+        for tokens in tqdm.tqdm(tokenized_sentences):
+            all_tokens.extend(tokens)
+            all_tokens.append(eos_token)
+        # Chunk the tokens into sublists of max_seq_len tokens each
+        logger.info("Chunking tokens into sublists of 1024")
+        max_seq_len = 1024
+        chunked_tokens = []
+        for i in tqdm.tqdm(range(0, len(all_tokens), max_seq_len)):
+            chunked_tokens.append(all_tokens[i:i + max_seq_len])
+        # Drop last line if not a multiple of max_seq_len
+        if len(chunked_tokens[-1]) < max_seq_len:
+            chunked_tokens.pop()
+        return chunked_tokens
+    def _generate_examples(self, data_dir, random_seed, split):
+        """This function returns the BabyLM text in the discretized, tokenized form."""
+        logger.info("Generating examples from = %s", data_dir)
+        infiles = sorted(glob.glob(os.path.join(data_dir, "*")))
+        # Extend sentences
+        all_sentences = []
+        for infile in infiles:
+            f = open(infile, encoding="utf-8")
+            all_sentences.extend(f.readlines())
+        logger.info("Total sentences: {}".format(len(all_sentences)))
+        # Shuffle because we are pre-tokenizing
+        rng = default_rng(seed=random_seed)
+        rng.shuffle(all_sentences)
+        # Tokenize and chunk
+        tokenized_lines = self.__chunk(all_sentences, _EOS_TOKEN_ID)
+        # Generate data
+        logger.info("Writing dataset as space-separated sequences of tokens")
+        for idx, line in enumerate(tokenized_lines):
+            l = " ".join([str(tok) for tok in line]) + "\n"
+            yield idx, {"text": l}

babylm_dataset_test.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# babylm_dataset_test.py
+import datasets
+import os
+import glob
+import tqdm
+from numpy.random import default_rng
+from itertools import product
+logger = datasets.logging.get_logger(__name__)
+_DESCRIPTION = """\
+    Pre-tokenized BabyLM HuggingFace dataset for verb perturbations.
+"""
+MODEL_NAME = "Llama-3.2-3B"
+_PERTURBED_DATA_PATH = f"../data/Perturbed_data/{MODEL_NAME}"
+_PERTURBATIONS = ["hop_control", "hop_tokens4", "hop_words4",
+                  "reverse_control", "reverse_partial", "reverse_full",
+                  "shuffle_control", "shuffle_nondeterministic",
+                  "shuffle_deterministic21", "shuffle_deterministic57", "shuffle_deterministic84",
+                  "shuffle_local3", "shuffle_local5", "shuffle_local10",
+                  "shuffle_even_odd"]
+# _RANDOM_SEEDS = [0, 14, 41, 53, 96]
+_RANDOM_SEEDS = [0]
+# _TRAIN_SETS = ["100M", "10M"]
+_TRAIN_SETS = ["10M"]
+_EOS_TOKEN_ID = 50256
+class BabyConfig(datasets.BuilderConfig):
+    def __init__(self, data_dir, babylm_train_set, random_seed, **kwargs):
+        """BuilderConfig for IzParens
+        Args:
+          data_dir: path to directory of tokenized, perturbed BabyLM dataset
+        """
+        super(BabyConfig, self).__init__(
+            **kwargs,
+        )
+        self.data_dir = data_dir
+        self.babylm_train_set = babylm_train_set
+        self.random_seed = random_seed
+class BabyLMCorpus(datasets.GeneratorBasedBuilder):
+    BUILDER_CONFIGS = [
+        BabyConfig(
+            name=f"babylm_{perturbation}_{train_set}_seed{random_seed}",
+            data_dir=os.path.join(
+                _PERTURBED_DATA_PATH, "babylm_" + perturbation),
+            babylm_train_set=train_set,
+            random_seed=random_seed,
+        ) for perturbation, train_set, random_seed in list(product(_PERTURBATIONS, _TRAIN_SETS, _RANDOM_SEEDS))
+    ]
+    def _info(self):
+        return datasets.DatasetInfo(
+            # This is the description that will appear on the datasets page.
+            description=_DESCRIPTION,
+            # datasets.features.FeatureConnectors
+            features=datasets.Features(
+                {
+                    "text": datasets.Value("string")
+                    # These are the features of your dataset like images, labels ...
+                }
+            ),
+            # If there's a common (input, target) tuple from the features,
+            # specify them here. They'll be used if as_supervised=True in
+            # builder.as_dataset.
+            supervised_keys=None,
+        )
+    def _split_generators(self, dl_manager):
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={"data_dir": os.path.join(
+                    self.config.data_dir, "babylm_" + self.config.babylm_train_set), "random_seed": self.config.random_seed, "split": "train"},
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.VALIDATION,
+                gen_kwargs={"data_dir": os.path.join(
+                    self.config.data_dir, "babylm_dev"), "random_seed": self.config.random_seed, "split": "valid"},
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST,
+                gen_kwargs={"data_dir": os.path.join(
+                    self.config.data_dir, "babylm_test_affected"), "random_seed": self.config.random_seed, "split": "test"},
+        ),
+        ]
+    def __chunk(self, sentences, eos_token):
+        # Tokenize each sentence
+        logger.info("Loading pre-tokenized data")
+        tokenized_sentences = []
+        for sent in tqdm.tqdm(sentences):
+            tokenized_sentences.append([int(tok) for tok in sent.split()])
+        # Concatenate the tokenized sentences using the EOS token
+        logger.info("Concatenating tokenized data using EOS token")
+        all_tokens = []
+        for tokens in tqdm.tqdm(tokenized_sentences):
+            all_tokens.extend(tokens)
+            all_tokens.append(eos_token)
+        # Chunk the tokens into sublists of max_seq_len tokens each
+        logger.info("Chunking tokens into sublists of 1024")
+        max_seq_len = 1024
+        chunked_tokens = []
+        for i in tqdm.tqdm(range(0, len(all_tokens), max_seq_len)):
+            chunked_tokens.append(all_tokens[i:i + max_seq_len])
+        # Drop last line if not a multiple of max_seq_len
+        if len(chunked_tokens[-1]) < max_seq_len:
+            chunked_tokens.pop()
+        return chunked_tokens
+    def _generate_examples(self, data_dir, random_seed, split):
+        """This function returns the BabyLM text in the discretized, tokenized form."""
+        logger.info("Generating examples from = %s", data_dir)
+        infiles = sorted(glob.glob(os.path.join(data_dir, "*")))
+        # Extend sentences
+        all_sentences = []
+        for infile in infiles:
+            f = open(infile, encoding="utf-8")
+            all_sentences.extend(f.readlines())
+        logger.info("Total sentences: {}".format(len(all_sentences)))
+        # Shuffle because we are pre-tokenizing
+        rng = default_rng(seed=random_seed)
+        rng.shuffle(all_sentences)
+        # Tokenize and chunk
+        tokenized_lines = self.__chunk(all_sentences, _EOS_TOKEN_ID)
+        # Generate data
+        logger.info("Writing dataset as space-separated sequences of tokens")
+        for idx, line in enumerate(tokenized_lines):
+            l = " ".join([str(tok) for tok in line]) + "\n"
+            yield idx, {"text": l}

run.sh ADDED Viewed

	@@ -0,0 +1,82 @@

+#!/bin/bash
+# Launch the first task in the background
+CUDA_VISIBLE_DEVICES=1,2,3 torchrun --nproc_per_node=3 --master_port=22224 train_deep_wandb.py --perturbation reverse_full --train_set 10M --batch_size 3 --epoch 3 --seed 0 > log_1.out 2>&1 &
+# Launch the second task in the background
+CUDA_VISIBLE_DEVICES=5,6,7 torchrun --nproc_per_node=3 --master_port=22225 train_deep_wandb.py --perturbation reverse_partial --train_set 10M --batch_size 3 --epoch 3 --seed 0 > log_2.out 2>&1 &
+# Launch the second task in the background
+# tmux attach-session -t impo1-0
+CUDA_VISIBLE_DEVICES=1,2,3 torchrun --nproc_per_node=3 --master_port=22226 train_deep_wandb.py --perturbation reverse_control --train_set 10M --batch_size 3 --epoch 3 --seed 0 > log_3.out 2>&1 &
+# Launch the second task in the background
+# tmux attach-session -t impo3
+CUDA_VISIBLE_DEVICES=5,6,7 torchrun --nproc_per_node=3 --master_port=22227 train_deep_hop.py --perturbation hop_control --train_set 10M --batch_size 3 --epoch 3 --seed 0 > log_3.out 2>&1 &
+# tmux attach-session -t impo1-0
+CUDA_VISIBLE_DEVICES=1,2,3 torchrun --nproc_per_node=3 --master_port=22228 train_deep_hop.py --perturbation hop_words4 --train_set 10M --batch_size 3 --epoch 3 --seed 0 > log_3.out 2>&1 &
+# tmux attach-session -t impo3
+CUDA_VISIBLE_DEVICES=5,6,7 torchrun --nproc_per_node=3 --master_port=22229 train_deep_hop.py --perturbation hop_tokens4 --train_set 10M --batch_size 3 --epoch 3 --seed 0 > log_3.out 2>&1 &
+# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+# tmux attach-session -t impo1-0
+CUDA_VISIBLE_DEVICES=2,3,4 torchrun --nproc_per_node=3 --master_port=22230 train_deep_wandb.py --perturbation shuffle_deterministic21 --train_set 10M --batch_size 3 --epoch 3 --seed 0 > log_3.out 2>&1 &
+# tmux attach-session -t impo2
+CUDA_VISIBLE_DEVICES=5,6,7 torchrun --nproc_per_node=3 --master_port=22231 train_deep_wandb.py --perturbation shuffle_deterministic57 --train_set 10M --batch_size 3 --epoch 3 --seed 0 > log_3.out 2>&1 &
+# tmux attach-session -t impo2-1
+CUDA_VISIBLE_DEVICES=5,6,7 torchrun --nproc_per_node=3 --master_port=22231 train_deep_wandb.py --perturbation shuffle_deterministic84 --train_set 10M --batch_size 3 --epoch 3 --seed 0 > log_3.out 2>&1 &
+# tmux attach-session -t impo3
+CUDA_VISIBLE_DEVICES=2,3,4 torchrun --nproc_per_node=3 --master_port=22231 train_deep_wandb.py --perturbation shuffle_even_odd --train_set 10M --batch_size 3 --epoch 3 --seed 0 > log_3.out 2>&1 &
+#----------------------------------------------------------------------------------------------------------------------------------------------------
+#----------------------------------------------------------------------------------------------------------------------------------------------------
+# tmux attach-session -t impo1-0
+CUDA_VISIBLE_DEVICES=0,1,2 torchrun --nproc_per_node=3 --master_port=22229 train_ftp.py --perturbation reverse_full --train_set 10M --batch_size 3 --epoch 3 --seed 0
+# *tmux attach-session -t impo2-1
+CUDA_VISIBLE_DEVICES=3,4,5 torchrun --nproc_per_node=3 --master_port=22230 train_ftp.py --perturbation reverse_partial --train_set 10M --batch_size 3 --epoch 3 --seed 0
+# *tmux attach-session -t impo2-1
+CUDA_VISIBLE_DEVICES=3,4,5 torchrun --nproc_per_node=3 --master_port=22230 train_ftp.py --perturbation reverse_control --train_set 10M --batch_size 3 --epoch 3 --seed 0
+# tmux attach-session -t impo1-0
+CUDA_VISIBLE_DEVICES=3,4,5 torchrun --nproc_per_node=3 --master_port=22230 train_ftp.py --perturbation shuffle_deterministic84 --train_set 10M --batch_size 3 --epoch 3 --seed 0
+# tmux attach-session -t impo1-0
+CUDA_VISIBLE_DEVICES=1,2,3 torchrun --nproc_per_node=3 --master_port=22230 train_ftp.py --perturbation shuffle_nondeterministic --train_set 10M --batch_size 3 --epoch 3 --seed 0
+# Wait for all background processes to complete
+###
+# LLama3.2-1B
+###
+# tmux attach-session -t impo1-0
+CUDA_VISIBLE_DEVICES=0,1,2 torchrun --nproc_per_node=3 --master_port=22229 train_llama_1B.py --perturbation reverse_control --train_set 10M --batch_size 3 --epoch 3 --seed 0
+# tmux attach-session -t impo2-7
+CUDA_VISIBLE_DEVICES=3,4,5 torchrun --nproc_per_node=3 --master_port=22230 train_llama_1B.py --perturbation reverse_full --train_set 10M --batch_size 3 --epoch 3 --seed 0
+###
+# GPT-2
+###
+# tmux attach-session -t impo1-0
+CUDA_VISIBLE_DEVICES=0,1,2 torchrun --nproc_per_node=3 --master_port=22235 train_gpt2.py --perturbation reverse_control --train_set 10M --batch_size 3 --epoch 3 --seed 0
+# tmux attach-session -t impo2-1
+CUDA_VISIBLE_DEVICES=3,4,5 torchrun --nproc_per_node=3 --master_port=22236 train_gpt2.py --perturbation reverse_full --train_set 10M --batch_size 3 --epoch 3 --seed 0
+# tmux attach-session -t impo1-0
+CUDA_VISIBLE_DEVICES=0,1,2 torchrun --nproc_per_node=3 --master_port=22237 train_gpt2.py --perturbation reverse_partial --train_set 10M --batch_size 3 --epoch 3 --seed 0
+wait
+echo "Both tasks have been launched."

run_train.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+#!/bin/bash
+CUDA_VISIBLE_DEVICES=1,2,3,5,6,7 torchrun --nproc_per_node=6 train_deep_wandb.py --perturbation reverse_full --train_set 10M --batch_size 3 --epoch 7 --seed 0
+if [ $? -eq 0 ]; then
+  echo "First script completed successfully."
+else
+  echo "First script failed."
+  exit 1
+fi
+CUDA_VISIBLE_DEVICES=1,2,3,5,6,7 torchrun --nproc_per_node=6 train_deep_wandb.py --perturbation reverse_partial --train_set 10M --batch_size 3 --epoch 7 --seed 0
+if [ $? -eq 0 ]; then
+  echo "Second script completed successfully."
+else
+  echo "Second script failed."
+  exit 1
+fi
+CUDA_VISIBLE_DEVICES=1,2,3,5,6,7 torchrun --nproc_per_node=6 train_deep_wandb.py --perturbation reverse_control --train_set 10M --batch_size 3 --epoch 7 --seed 0
+if [ $? -eq 0 ]; then
+  echo "Third script completed successfully."
+else
+  echo "Third script failed."
+  exit 1
+fi

train_accelerate.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import sys
+import torch
+sys.path.append("..")
+import os
+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
+from utils_llama import PERTURBATIONS, BABYLM_SPLITS, BABYLM_DATA_PATH, \
+    GENRES, MARKER_TOKEN_IDS, marker_sg_token, marker_pl_token, marker_rev_token, write_file
+import argparse
+# import wandb
+# Setup for Weights & Biases
+# wandb.init(project="kallini", group="babylm-perturbation-experiments", name=run_id)
+if __name__ == "__main__":
+    # === CONFIGURATION SETTINGS ===
+    parser = argparse.ArgumentParser(description="Training configuration.")
+    parser.add_argument('--perturbation', type=str, default='hop_tokens4', help='Type of perturbation to use.')
+    parser.add_argument('--train_set', type=str, default='10M', help='Dataset size for training.')
+    parser.add_argument('--batch_size', type=int, default=4, help='Batch size for training.')
+    parser.add_argument('--epoch', type=int, default=20, help='train epoch')
+    parser.add_argument('--seed', type=int, default=0, help='Random seed.')
+    args = parser.parse_args()
+    # no_pos_encodings_underscore = ""  # Ex: "_nopos" if needed
+    ckpt_path = "./checkpoints"
+    # effective_bsz = 512
+    model_name = "meta-llama/Llama-3.2-3B"
+    model_save_name = "Llama-3.2-3B"
+    # === FILE PATHS BASED ON CONFIGURATION ===
+    run_id = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    cache_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "artifacts")
+    run_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "runs")
+    os.makedirs(cache_dir, exist_ok=True)
+    os.makedirs(run_dir, exist_ok=True)
+    # === DATASET LOADING ===
+    dataset_name = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    dataset = load_dataset('babylm_dataset_llama.py', name=dataset_name, trust_remote_code=True)
+    train_dataset = dataset['train']
+    # === TOKENIZER & MODEL LOADING ===
+    # model_name = f"gpt2{'' if no_pos_encodings_underscore == '' else '-no-pos'}-small-{perturbation}-{paren_model}"
+    # tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+    tokenizer = PERTURBATIONS[args.perturbation]['llama_tokenizer']
+    model = AutoModelForCausalLM.from_pretrained(model_name,
+                                                device_map="auto",
+                                                cache_dir=cache_dir)
+    # print("model:", model)
+    # === TOKENIZATION ===
+    def tokenize_function(examples):
+        return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=1024)
+    tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    # === DATA COLLATOR ===
+    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
+    # === TRAINING ARGUMENTS ===
+    training_args = TrainingArguments(
+        output_dir=run_dir,
+        # evaluation_strategy="steps",
+        evaluation_strategy="no",
+        # per_device_train_batch_size=int(effective_bsz / 1),  # Assuming 1 GPU for this example
+        per_device_train_batch_size=args.batch_size,  # Assuming 1 GPU for this example
+        logging_dir='./logs',
+        logging_steps=1000,
+        save_steps=1000,
+        # save_total_limit=5,
+        learning_rate=2e-5,
+        num_train_epochs=args.epoch,
+        seed=args.seed,
+        # load_best_model_at_end=True,
+        gradient_accumulation_steps=1, # help reduce gpu memory
+        fp16 = True, # Enable mixed precision training
+        report_to="none",
+    )
+    # === TRAINER ===
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_train,
+        tokenizer=tokenizer,
+        data_collator=data_collator
+    )
+    # === TRAIN MODEL ===
+    trainer.train()
+    # End logging
+    # wandb.finish()

train_deep.py ADDED Viewed

	@@ -0,0 +1,233 @@

+import sys
+import torch
+sys.path.append("..")
+import os
+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
+from utils_llama import PERTURBATIONS, BABYLM_SPLITS, BABYLM_DATA_PATH, \
+    GENRES, MARKER_TOKEN_IDS, marker_sg_token, marker_pl_token, marker_rev_token, write_file
+import argparse
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# import wandb
+# Setup for Weights & Biases
+# wandb.init(project="kallini", group="babylm-perturbation-experiments", name=run_id)
+if __name__ == "__main__":
+    # === CONFIGURATION SETTINGS ===
+    parser = argparse.ArgumentParser(description="Training configuration.")
+    parser.add_argument('--perturbation', type=str, default='hop_tokens4', help='Type of perturbation to use.')
+    parser.add_argument('--train_set', type=str, default='10M', help='Dataset size for training.')
+    parser.add_argument('--batch_size', type=int, default=4, help='Batch size for training.')
+    parser.add_argument('--epoch', type=int, default=20, help='train epoch')
+    parser.add_argument('--seed', type=int, default=0, help='Random seed.')
+    args = parser.parse_args()
+    # no_pos_encodings_underscore = ""  # Ex: "_nopos" if needed
+    ckpt_path = "./checkpoints"
+    # effective_bsz = 512
+    model_name = "meta-llama/Llama-3.2-3B"
+    model_save_name = "Llama-3.2-3B"
+    # === FILE PATHS BASED ON CONFIGURATION ===
+    run_id = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    cache_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "artifacts")
+    run_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "runs")
+    os.makedirs(cache_dir, exist_ok=True)
+    os.makedirs(run_dir, exist_ok=True)
+    # === DATASET LOADING ===
+    dataset_name = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    # dataset = load_dataset('babylm_dataset_test.py', name=dataset_name, trust_remote_code=True)
+    dataset = load_dataset('babylm_dataset_test.py', name=dataset_name, trust_remote_code=True)
+    train_dataset = dataset['train']
+    val_dataset = dataset['validation']
+    print(train_dataset)
+    # === TOKENIZER & MODEL LOADING ===
+    # model_name = f"gpt2{'' if no_pos_encodings_underscore == '' else '-no-pos'}-small-{perturbation}-{paren_model}"
+    # tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+    tokenizer = PERTURBATIONS[args.perturbation]['llama_tokenizer']
+    model = AutoModelForCausalLM.from_pretrained(model_name,
+                                                # device_map="auto",  # deepspeed needs to delete this setting
+                                                cache_dir=cache_dir)
+    # print("model:", model)
+    # === TOKENIZATION ===
+    def tokenize_function(examples):
+        return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=1024)
+    tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    tokenized_valid = val_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    shuffled_valid = tokenized_valid.shuffle()
+    tokenized_valid = shuffled_valid.select(range(600))
+    # === DATA COLLATOR ===
+    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
+    # === TRAINING ARGUMENTS ===
+    training_args = TrainingArguments(
+        output_dir=run_dir,
+        evaluation_strategy="steps",
+        eval_steps=10,
+        per_device_train_batch_size=args.batch_size,  # set "auto" in deepspeed config, adjust it in trainer
+        logging_dir='./logs',
+        logging_steps=10,
+        save_steps=150,
+        learning_rate=5e-5, # align with deepspeed
+        num_train_epochs=args.epoch,
+        seed=args.seed,
+        gradient_accumulation_steps=2, # # set "auto" in deepspeed config, adjust it in trainer
+        fp16 = True, # align with deepspeed
+        report_to="none",
+        deepspeed="deepspeed_config/train_dp_config.json"
+    )
+        # === TRAINER ===
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_train,
+        eval_dataset=tokenized_valid,
+        tokenizer=tokenizer,
+        data_collator=data_collator
+    )
+    # === TRAIN MODEL ===
+    trainer.train()
+    # End logging
+    # wandb.finish()
+# import sys
+# import torch
+# sys.path.append("..")
+# import os
+# from datasets import load_dataset
+# from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
+# from utils_llama import PERTURBATIONS, BABYLM_SPLITS, BABYLM_DATA_PATH, \
+#     GENRES, MARKER_TOKEN_IDS, marker_sg_token, marker_pl_token, marker_rev_token, write_file
+# import argparse
+# os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# class Trainer(Trainer):
+#     def save_model(self, output_dir=None, _internal_call=False):
+#         if output_dir is None:
+#             output_dir = self.args.output_dir
+#         # 确保输出目录存在
+#         os.makedirs(output_dir, exist_ok=True)
+#         # 保存检查点
+#         super().save_model(output_dir, _internal_call=_internal_call)
+#         # 检查 output_dir 中的每个子文件夹
+#         for folder_name in os.listdir(output_dir):
+#             folder_path = os.path.join(output_dir, folder_name)
+#             if os.path.isdir(folder_path):
+#                 print(f"Checking contents of {folder_path}")
+#                 # 检查当前子文件夹的一级目录
+#                 for name in os.listdir(folder_path):
+#                     path = os.path.join(folder_path, name)
+#                     if os.path.isdir(path) and "global_step" in name:
+#                         shutil.rmtree(path)
+#                         print(f"Removed directory {path}")
+# # Setup for Weights & Biases
+# # wandb.init(project="kallini", group="babylm-perturbation-experiments", name=run_id)
+# if __name__ == "__main__":
+#     # === CONFIGURATION SETTINGS ===
+#     parser = argparse.ArgumentParser(description="Training configuration.")
+#     parser.add_argument('--perturbation', type=str, default='hop_tokens4', help='Type of perturbation to use.')
+#     parser.add_argument('--train_set', type=str, default='10M', help='Dataset size for training.')
+#     parser.add_argument('--batch_size', type=int, default=4, help='Batch size for training.')
+#     parser.add_argument('--epoch', type=int, default=20, help='train epoch')
+#     parser.add_argument('--seed', type=int, default=0, help='Random seed.')
+#     args = parser.parse_args()
+#     # no_pos_encodings_underscore = ""  # Ex: "_nopos" if needed
+#     ckpt_path = "./checkpoints"
+#     # effective_bsz = 512
+#     model_name = "meta-llama/Llama-3.2-3B"
+#     model_save_name = "Llama-3.2-3B"
+#     # === FILE PATHS BASED ON CONFIGURATION ===
+#     run_id = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+#     cache_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "artifacts")
+#     run_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "runs")
+#     os.makedirs(cache_dir, exist_ok=True)
+#     os.makedirs(run_dir, exist_ok=True)
+#     # === DATASET LOADING ===
+#     dataset_name = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+#     # dataset = load_dataset('babylm_dataset_llama.py', name=dataset_name)
+#     dataset = load_dataset('babylm_dataset_llama.py', name=dataset_name, trust_remote_code=True)
+#     train_dataset = dataset['train']
+#     # === TOKENIZER & MODEL LOADING ===
+#     # model_name = f"gpt2{'' if no_pos_encodings_underscore == '' else '-no-pos'}-small-{perturbation}-{paren_model}"
+#     # tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+#     tokenizer = PERTURBATIONS[args.perturbation]['llama_tokenizer']
+#     model = AutoModelForCausalLM.from_pretrained(model_name,
+#                                                 # device_map="auto",  # deepspeed needs to delete this setting
+#                                                 cache_dir=cache_dir)
+#     # print("model:", model)
+#     # === TOKENIZATION ===
+#     def tokenize_function(examples):
+#         return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=1024)
+#     tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+#     # === DATA COLLATOR ===
+#     data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
+#     # === TRAINING ARGUMENTS ===
+#     training_args = TrainingArguments(
+#         output_dir=run_dir,
+#         evaluation_strategy="no",
+#         per_device_train_batch_size=args.batch_size,  # set "auto" in deepspeed config, adjust it in trainer
+#         logging_dir='./logs',
+#         logging_steps=150,
+#         save_steps=5,
+#         learning_rate=5e-5, # align with deepspeed
+#         num_train_epochs=args.epoch,
+#         seed=args.seed,
+#         gradient_accumulation_steps=4, # # set "auto" in deepspeed config, adjust it in trainer
+#         fp16 = True, # align with deepspeed
+#         report_to="none",
+#         deepspeed="deepspeed_config/train_dp_config.json"
+#     )
+#         # === TRAINER ===
+#     trainer = Trainer(
+#         model=model,
+#         args=training_args,
+#         train_dataset=tokenized_train,
+#         tokenizer=tokenizer,
+#         data_collator=data_collator
+#     )
+#     # === TRAIN MODEL ===
+#     trainer.train()
+#     # End logging
+#     # wandb.finish()

train_ftp.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import sys
+import torch
+sys.path.append("..")
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
+from utils_llama import PERTURBATIONS, BABYLM_SPLITS, BABYLM_DATA_PATH, \
+    GENRES, MARKER_TOKEN_IDS, marker_sg_token, marker_pl_token, marker_rev_token, write_file
+from datasets import load_dataset
+from FTP import AdamP
+import wandb
+import argparse
+import copy
+import math
+import os
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ftp_k = 1
+class TrainerAdamP(Trainer):
+    def create_optimizer(self):
+        optimizer_params = {
+            "lr": 5e-6,
+            "weight_decay": 0.0,
+            "k": ftp_k,  # Example parameter for AdamP
+            "exclude_set": set()  # Use empty set if you don't want exclusion
+        }
+        # Cache pre-trained model weights
+        params_to_opt = [x[1] for x in self.model.named_parameters() if x[1].requires_grad]
+        params_to_opt_name = [x[0] for x in self.model.named_parameters() if x[1].requires_grad]
+        params_anchor = copy.deepcopy(params_to_opt)
+        param_group = [{'params': params_to_opt, 'pre': params_anchor, 'name': params_to_opt_name}]
+        # Initialize the AdamP optimizer
+        self.optimizer = AdamP(param_group, **optimizer_params)
+if __name__ == "__main__":
+    # === CONFIGURATION SETTINGS ===
+    parser = argparse.ArgumentParser(description="Training configuration.")
+    parser.add_argument('--perturbation', type=str, default='hop_tokens4', help='Type of perturbation to use.')
+    parser.add_argument('--train_set', type=str, default='10M', help='Dataset size for training.')
+    parser.add_argument('--batch_size', type=int, default=3, help='Batch size for training.')
+    parser.add_argument('--epoch', type=int, default=3, help='train epoch')
+    parser.add_argument('--seed', type=int, default=0, help='Random seed.')
+    parser.add_argument('--lr', type=float, default=5e-6, help='Learning rate.')
+    args = parser.parse_args()
+    # no_pos_encodings_underscore = ""  # Ex: "_nopos" if needed
+    ckpt_path = "./checkpoints"
+    # effective_bsz = 512
+    model_name = "meta-llama/Llama-3.2-3B"
+    model_save_name = "Llama-3.2-3B-FTP"
+    # === FILE PATHS BASED ON CONFIGURATION ===
+    wandb_id = f"{model_save_name}_{args.perturbation}_train_set_{args.train_set}_epoch_{args.epoch}_batch_size_{args.batch_size}_seed_{args.seed}_lr_{args.lr}_wandb_ftp_{ftp_k}"
+    wandb.init(project="exp-impo-shuffle", group="ftp-1", name=wandb_id)
+    wandb.config.update(args)
+    run_id = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    cache_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "artifacts")
+    run_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "runs")
+    os.makedirs(cache_dir, exist_ok=True)
+    os.makedirs(run_dir, exist_ok=True)
+    # === DATASET LOADING ===
+    dataset_name = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    dataset = load_dataset('babylm_dataset_test.py', name=dataset_name, trust_remote_code=True)
+    train_dataset = dataset['train']
+    valid_dataset = dataset['validation']
+    # === TOKENIZER & MODEL LOADING ===
+    # model_name = f"gpt2{'' if no_pos_encodings_underscore == '' else '-no-pos'}-small-{perturbation}-{paren_model}"
+    # tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+    tokenizer = PERTURBATIONS[args.perturbation]['llama_tokenizer']
+    model = AutoModelForCausalLM.from_pretrained(model_name,
+                                                # device_map="auto",  # deepspeed needs to delete this setting
+                                                cache_dir=cache_dir)
+    # print("model:", model)
+    # === TOKENIZATION ===
+    def tokenize_function(examples):
+        return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=1024)
+    tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    tokenized_valid = valid_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    shuffled_valid = tokenized_valid.shuffle()
+    tokenized_valid = shuffled_valid.select(range(1000))
+    print("tokenized_valid:", tokenized_valid)
+    # print(train_dataset)
+    # === DATA COLLATOR ===2
+    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
+    # === TRAINING ARGUMENTS ===
+    training_args = TrainingArguments(
+        output_dir=run_dir,
+        evaluation_strategy="steps",
+        eval_steps=10,
+        per_device_train_batch_size=args.batch_size,  # set "auto" in deepspeed config, adjust it in trainer
+        logging_dir='./logs',
+        logging_steps=1,
+        save_steps=100,
+        learning_rate=args.lr, # align with deepspeed
+        num_train_epochs=args.epoch,
+        seed=args.seed,
+        gradient_accumulation_steps=2, # # set "auto" in deepspeed config, adjust it in trainer
+        fp16=True, # align with deepspeed
+        report_to="wandb",
+        warmup_ratio=0.1,
+        deepspeed="deepspeed_config/train_dp_config.json"
+    )
+    # === TRAINER ===
+    trainer = TrainerAdamP(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_train,
+        eval_dataset=tokenized_valid,
+        tokenizer=tokenizer,
+        data_collator=data_collator
+    )
+    # === TRAIN MODEL ===
+    trainer.train()
+    # End logging
+    wandb.finish()

train_gpt2.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import sys
+import torch
+sys.path.append("..")
+import os
+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
+from utils_gpt2 import PERTURBATIONS, BABYLM_SPLITS, BABYLM_DATA_PATH, \
+    GENRES, MARKER_TOKEN_IDS, marker_sg_token, marker_pl_token, marker_rev_token, write_file
+import wandb
+import argparse
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# os.environ['MASTER_PORT'] = '12345'
+# import wandb
+# Setup for Weights & Biases
+# wandb.init(project="kallini", group="babylm-perturbation-experiments", name=run_id)
+if __name__ == "__main__":
+    # === CONFIGURATION SETTINGS ===
+    parser = argparse.ArgumentParser(description="Training configuration.")
+    parser.add_argument('--perturbation', type=str, default='hop_tokens4', help='Type of perturbation to use.')
+    parser.add_argument('--train_set', type=str, default='10M', help='Dataset size for training.')
+    parser.add_argument('--batch_size', type=int, default=4, help='Batch size for training.')
+    parser.add_argument('--epoch', type=int, default=3, help='train epoch')
+    parser.add_argument('--seed', type=int, default=0, help='Random seed.')
+    parser.add_argument('--lr', type=float, default=5e-6, help='Learning rate.')
+    args = parser.parse_args()
+    # no_pos_encodings_underscore = ""  # Ex: "_nopos" if needed
+    ckpt_path = "./checkpoints"
+    # effective_bsz = 512
+    model_name = "gpt2"
+    model_save_name = "GPT2"
+    # === FILE PATHS BASED ON CONFIGURATION ===
+    wandb_id = f"{model_save_name}_{args.perturbation}_train_set_{args.train_set}_epoch_{args.epoch}_batch_size_{args.batch_size}_seed_{args.seed}_lr_{args.lr}_wandb"
+    wandb.init(project="exp-impo-reverse", group="reverse-gpt2", name=wandb_id)
+    wandb.config.update(args)
+    run_id = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    cache_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "artifacts")
+    run_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "runs")
+    os.makedirs(cache_dir, exist_ok=True)
+    os.makedirs(run_dir, exist_ok=True)
+    # === DATASET LOADING ===
+    dataset_name = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    dataset = load_dataset('babylm_dataset_test.py', name=dataset_name, trust_remote_code=True)
+    train_dataset = dataset['train']
+    valid_dataset = dataset['validation']
+    # === TOKENIZER & MODEL LOADING ===
+    # model_name = f"gpt2{'' if no_pos_encodings_underscore == '' else '-no-pos'}-small-{perturbation}-{paren_model}"
+    # tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+    tokenizer = PERTURBATIONS[args.perturbation]['gpt2_tokenizer']
+    model = AutoModelForCausalLM.from_pretrained(model_name,
+                                                # device_map="auto",  # deepspeed needs to delete this setting
+                                                cache_dir=cache_dir)
+    # print("model:", model)
+    # === TOKENIZATION ===
+    def tokenize_function(examples):
+        return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=1024)
+    tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    tokenized_valid = valid_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    shuffled_valid = tokenized_valid.shuffle()
+    tokenized_valid = shuffled_valid.select(range(1000))
+    print("tokenized_valid:", tokenized_valid)
+    # print(train_dataset)
+    # === DATA COLLATOR ===2
+    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
+    # === TRAINING ARGUMENTS ===
+    training_args = TrainingArguments(
+        output_dir=run_dir,
+        evaluation_strategy="steps",
+        eval_steps=10,
+        per_device_train_batch_size=args.batch_size,  # set "auto" in deepspeed config, adjust it in trainer
+        logging_dir='./logs',
+        logging_steps=1,
+        save_steps=100,
+        learning_rate=args.lr, # align with deepspeed
+        num_train_epochs=args.epoch,
+        seed=args.seed,
+        gradient_accumulation_steps=2, # # set "auto" in deepspeed config, adjust it in trainer
+        fp16=True, # align with deepspeed
+        report_to="wandb",
+        warmup_ratio=0.1,
+        deepspeed="deepspeed_config/train_dp_config.json"
+    )
+    # === TRAINER ===
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_train,
+        eval_dataset=tokenized_valid,
+        tokenizer=tokenizer,
+        data_collator=data_collator
+    )
+    # === TRAIN MODEL ===
+    trainer.train()
+    # End logging
+    wandb.finish()

train_llama.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import sys
+import torch
+sys.path.append("..")
+import os
+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
+from utils_llama import PERTURBATIONS, BABYLM_SPLITS, BABYLM_DATA_PATH, \
+    GENRES, MARKER_TOKEN_IDS, marker_sg_token, marker_pl_token, marker_rev_token, write_file
+import argparse
+# import wandb
+# Setup for Weights & Biases
+# wandb.init(project="kallini", group="babylm-perturbation-experiments", name=run_id)
+if __name__ == "__main__":
+    # === CONFIGURATION SETTINGS ===
+    parser = argparse.ArgumentParser(description="Training configuration.")
+    parser.add_argument('--perturbation', type=str, default='hop_tokens4', help='Type of perturbation to use.')
+    parser.add_argument('--train_set', type=str, default='10M', help='Dataset size for training.')
+    parser.add_argument('--batch_size', type=int, default=4, help='Batch size for training.')
+    parser.add_argument('--epoch', type=int, default=20, help='train epoch')
+    parser.add_argument('--seed', type=int, default=0, help='Random seed.')
+    args = parser.parse_args()
+    # no_pos_encodings_underscore = ""  # Ex: "_nopos" if needed
+    ckpt_path = "./checkpoints"
+    # effective_bsz = 512
+    model_name = "meta-llama/Llama-3.2-3B"
+    model_save_name = "Llama-3.2-3B"
+    # === FILE PATHS BASED ON CONFIGURATION ===
+    run_id = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    cache_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "artifacts")
+    run_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "runs")
+    os.makedirs(cache_dir, exist_ok=True)
+    os.makedirs(run_dir, exist_ok=True)
+    # === DATASET LOADING ===
+    dataset_name = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    dataset = load_dataset('babylm_dataset_llama.py', name=dataset_name, trust_remote_code=True)
+    train_dataset = dataset['train']
+    # === TOKENIZER & MODEL LOADING ===
+    # model_name = f"gpt2{'' if no_pos_encodings_underscore == '' else '-no-pos'}-small-{perturbation}-{paren_model}"
+    # tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+    tokenizer = PERTURBATIONS[args.perturbation]['llama_tokenizer']
+    model = AutoModelForCausalLM.from_pretrained(model_name,
+                                                device_map="auto",
+                                                cache_dir=cache_dir)
+    # print("model:", model)
+    # === TOKENIZATION ===
+    def tokenize_function(examples):
+        return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=1024)
+    tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    # === DATA COLLATOR ===
+    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
+    # === TRAINING ARGUMENTS ===
+    training_args = TrainingArguments(
+        output_dir=run_dir,
+        # evaluation_strategy="steps",
+        evaluation_strategy="no",
+        # per_device_train_batch_size=int(effective_bsz / 1),  # Assuming 1 GPU for this example
+        per_device_train_batch_size=args.batch_size,  # Assuming 1 GPU for this example
+        logging_dir='./logs',
+        logging_steps=1000,
+        save_steps=1000,
+        # save_total_limit=5,
+        learning_rate=2e-5,
+        num_train_epochs=args.epoch,
+        seed=args.seed,
+        # load_best_model_at_end=True,
+        gradient_accumulation_steps=1, # help reduce gpu memory
+        fp16 = True, # Enable mixed precision training
+        report_to="none",
+    )
+    # === TRAINER ===
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_train,
+        tokenizer=tokenizer,
+        data_collator=data_collator
+    )
+    # === TRAIN MODEL ===
+    trainer.train()
+    # End logging
+    # wandb.finish()

train_llama_1B.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import sys
+import torch
+sys.path.append("..")
+import os
+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
+from utils_llama_1B import PERTURBATIONS, BABYLM_SPLITS, BABYLM_DATA_PATH, \
+    GENRES, MARKER_TOKEN_IDS, marker_sg_token, marker_pl_token, marker_rev_token, write_file
+import wandb
+import argparse
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# os.environ['MASTER_PORT'] = '12345'
+# import wandb
+# Setup for Weights & Biases
+# wandb.init(project="kallini", group="babylm-perturbation-experiments", name=run_id)
+if __name__ == "__main__":
+    # === CONFIGURATION SETTINGS ===
+    parser = argparse.ArgumentParser(description="Training configuration.")
+    parser.add_argument('--perturbation', type=str, default='hop_tokens4', help='Type of perturbation to use.')
+    parser.add_argument('--train_set', type=str, default='10M', help='Dataset size for training.')
+    parser.add_argument('--batch_size', type=int, default=4, help='Batch size for training.')
+    parser.add_argument('--epoch', type=int, default=3, help='train epoch')
+    parser.add_argument('--seed', type=int, default=0, help='Random seed.')
+    parser.add_argument('--lr', type=float, default=5e-6, help='Learning rate.')
+    args = parser.parse_args()
+    # no_pos_encodings_underscore = ""  # Ex: "_nopos" if needed
+    ckpt_path = "./checkpoints"
+    # effective_bsz = 512
+    model_name = "meta-llama/Llama-3.2-1B"
+    model_save_name = "Llama-3.2-1B"
+    # === FILE PATHS BASED ON CONFIGURATION ===
+    wandb_id = f"{model_save_name}_{args.perturbation}_train_set_{args.train_set}_epoch_{args.epoch}_batch_size_{args.batch_size}_seed_{args.seed}_lr_{args.lr}_wandb"
+    wandb.init(project="exp-impo-reverse", group="reverse-1B", name=wandb_id)
+    wandb.config.update(args)
+    run_id = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    cache_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "artifacts")
+    run_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "runs")
+    os.makedirs(cache_dir, exist_ok=True)
+    os.makedirs(run_dir, exist_ok=True)
+    # === DATASET LOADING ===
+    dataset_name = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    dataset = load_dataset('babylm_dataset_test.py', name=dataset_name, trust_remote_code=True)
+    train_dataset = dataset['train']
+    valid_dataset = dataset['validation']
+    # === TOKENIZER & MODEL LOADING ===
+    # model_name = f"gpt2{'' if no_pos_encodings_underscore == '' else '-no-pos'}-small-{perturbation}-{paren_model}"
+    # tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+    tokenizer = PERTURBATIONS[args.perturbation]['llama_tokenizer']
+    model = AutoModelForCausalLM.from_pretrained(model_name,
+                                                # device_map="auto",  # deepspeed needs to delete this setting
+                                                cache_dir=cache_dir)
+    # print("model:", model)
+    # === TOKENIZATION ===
+    def tokenize_function(examples):
+        return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=1024)
+    tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    tokenized_valid = valid_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    shuffled_valid = tokenized_valid.shuffle()
+    tokenized_valid = shuffled_valid.select(range(1000))
+    print("tokenized_valid:", tokenized_valid)
+    # print(train_dataset)
+    # === DATA COLLATOR ===2
+    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
+    # === TRAINING ARGUMENTS ===
+    training_args = TrainingArguments(
+        output_dir=run_dir,
+        evaluation_strategy="steps",
+        eval_steps=10,
+        per_device_train_batch_size=args.batch_size,  # set "auto" in deepspeed config, adjust it in trainer
+        logging_dir='./logs',
+        logging_steps=1,
+        save_steps=100,
+        learning_rate=args.lr, # align with deepspeed
+        num_train_epochs=args.epoch,
+        seed=args.seed,
+        gradient_accumulation_steps=2, # # set "auto" in deepspeed config, adjust it in trainer
+        fp16=True, # align with deepspeed
+        report_to="wandb",
+        warmup_ratio=0.1,
+        deepspeed="deepspeed_config/train_dp_config.json"
+    )
+    # === TRAINER ===
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_train,
+        eval_dataset=tokenized_valid,
+        tokenizer=tokenizer,
+        data_collator=data_collator
+    )
+    # === TRAIN MODEL ===
+    trainer.train()
+    # End logging
+    wandb.finish()

train_llama_3B.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import sys
+import torch
+sys.path.append("..")
+import os
+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
+from utils_llama_3B import PERTURBATIONS, BABYLM_SPLITS, BABYLM_DATA_PATH, \
+    GENRES, MARKER_TOKEN_IDS, marker_sg_token, marker_pl_token, marker_rev_token, write_file
+import wandb
+import argparse
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# os.environ['MASTER_PORT'] = '12345'
+# import wandb
+# Setup for Weights & Biases
+# wandb.init(project="kallini", group="babylm-perturbation-experiments", name=run_id)
+if __name__ == "__main__":
+    # === CONFIGURATION SETTINGS ===
+    parser = argparse.ArgumentParser(description="Training configuration.")
+    parser.add_argument('--perturbation', type=str, default='hop_tokens4', help='Type of perturbation to use.')
+    parser.add_argument('--train_set', type=str, default='10M', help='Dataset size for training.')
+    parser.add_argument('--batch_size', type=int, default=4, help='Batch size for training.')
+    parser.add_argument('--epoch', type=int, default=3, help='train epoch')
+    parser.add_argument('--seed', type=int, default=0, help='Random seed.')
+    parser.add_argument('--lr', type=float, default=5e-6, help='Learning rate.')
+    args = parser.parse_args()
+    # no_pos_encodings_underscore = ""  # Ex: "_nopos" if needed
+    ckpt_path = "./checkpoints"
+    # effective_bsz = 512
+    model_name = "meta-llama/Llama-3.2-3B"
+    model_save_name = "Llama-3.2-3B"
+    # === FILE PATHS BASED ON CONFIGURATION ===
+    wandb_id = f"{model_save_name}_{args.perturbation}_train_set_{args.train_set}_epoch_{args.epoch}_batch_size_{args.batch_size}_seed_{args.seed}_lr_{args.lr}_wandb"
+    wandb.init(project="exp-impo-shuffle", group="shuffle", name=wandb_id)
+    wandb.config.update(args)
+    run_id = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    cache_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "artifacts")
+    run_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "runs")
+    os.makedirs(cache_dir, exist_ok=True)
+    os.makedirs(run_dir, exist_ok=True)
+    # === DATASET LOADING ===
+    dataset_name = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    dataset = load_dataset('babylm_dataset_test.py', name=dataset_name, trust_remote_code=True)
+    train_dataset = dataset['train']
+    valid_dataset = dataset['validation']
+    # === TOKENIZER & MODEL LOADING ===
+    # model_name = f"gpt2{'' if no_pos_encodings_underscore == '' else '-no-pos'}-small-{perturbation}-{paren_model}"
+    # tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+    tokenizer = PERTURBATIONS[args.perturbation]['llama_tokenizer']
+    model = AutoModelForCausalLM.from_pretrained(model_name,
+                                                # device_map="auto",  # deepspeed needs to delete this setting
+                                                cache_dir=cache_dir)
+    # print("model:", model)
+    # === TOKENIZATION ===
+    def tokenize_function(examples):
+        return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=1024)
+    tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    tokenized_valid = valid_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    shuffled_valid = tokenized_valid.shuffle()
+    tokenized_valid = shuffled_valid.select(range(1000))
+    print("tokenized_valid:", tokenized_valid)
+    # print(train_dataset)
+    # === DATA COLLATOR ===2
+    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
+    # === TRAINING ARGUMENTS ===
+    training_args = TrainingArguments(
+        output_dir=run_dir,
+        evaluation_strategy="steps",
+        eval_steps=10,
+        per_device_train_batch_size=args.batch_size,  # set "auto" in deepspeed config, adjust it in trainer
+        logging_dir='./logs',
+        logging_steps=1,
+        save_steps=100,
+        learning_rate=args.lr, # align with deepspeed
+        num_train_epochs=args.epoch,
+        seed=args.seed,
+        gradient_accumulation_steps=2, # # set "auto" in deepspeed config, adjust it in trainer
+        fp16=True, # align with deepspeed
+        report_to="wandb",
+        warmup_ratio=0.1,
+        deepspeed="deepspeed_config/train_dp_config.json"
+    )
+    # === TRAINER ===
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_train,
+        eval_dataset=tokenized_valid,
+        tokenizer=tokenizer,
+        data_collator=data_collator
+    )
+    # === TRAIN MODEL ===
+    trainer.train()
+    # End logging
+    wandb.finish()

train_qwen.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import sys
+sys.path.append("..")
+import os
+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
+from utils_qwen import PERTURBATIONS, BABYLM_SPLITS, BABYLM_DATA_PATH, \
+    GENRES, MARKER_TOKEN_IDS, marker_sg_token, marker_pl_token, marker_rev_token, write_file
+import argparse
+# import wandb
+# Setup for Weights & Biases
+# wandb.init(project="kallini", group="babylm-perturbation-experiments", name=run_id)
+if __name__ == "__main__":
+    # === CONFIGURATION SETTINGS ===
+    parser = argparse.ArgumentParser(description="Training configuration.")
+    parser.add_argument('--perturbation', type=str, default='hop_tokens4', help='Type of perturbation to use.')
+    parser.add_argument('--train_set', type=str, default='10M', help='Dataset size for training.')
+    parser.add_argument('--batch_size', type=int, default=4, help='Batch size for training.')
+    parser.add_argument('--epoch', type=int, default=20, help='train epoch')
+    parser.add_argument('--seed', type=int, default=0, help='Random seed.')
+    args = parser.parse_args()
+    # no_pos_encodings_underscore = ""  # Ex: "_nopos" if needed
+    ckpt_path = "./checkpoints"
+    # effective_bsz = 512
+    model_name = "Qwen/Qwen2.5-7B"
+    model_save_name = "Qwen2.5-7B"
+    # === FILE PATHS BASED ON CONFIGURATION ===
+    run_id = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    cache_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "artifacts")
+    run_dir = os.path.join(ckpt_path, f"{model_save_name}", run_id, "runs")
+    os.makedirs(cache_dir, exist_ok=True)
+    os.makedirs(run_dir, exist_ok=True)
+    # === DATASET LOADING ===
+    dataset_name = f"babylm_{args.perturbation}_{args.train_set}_seed{args.seed}"
+    dataset = load_dataset('babylm_dataset.py', name=dataset_name, trust_remote_code=True)
+    train_dataset = dataset['train']
+    # === TOKENIZER & MODEL LOADING ===
+    # model_name = f"gpt2{'' if no_pos_encodings_underscore == '' else '-no-pos'}-small-{perturbation}-{paren_model}"
+    # tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+    tokenizer = PERTURBATIONS[args.perturbation]['qwen_tokenizer']
+    model = AutoModelForCausalLM.from_pretrained(model_name,
+                                                device_map="auto",  # Place different layers of the model on different GPUs
+                                                cache_dir=cache_dir)
+    # print("model:", model)
+    # === TOKENIZATION ===
+    def tokenize_function(examples):
+        return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=1024)
+    tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    # === DATA COLLATOR ===
+    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
+    # === TRAINING ARGUMENTS ===
+    training_args = TrainingArguments(
+        output_dir=run_dir,
+        # evaluation_strategy="steps",
+        evaluation_strategy="no",
+        # per_device_train_batch_size=int(effective_bsz / 1),  # Assuming 1 GPU for this example
+        per_device_train_batch_size=args.batch_size,  # Assuming 1 GPU for this example
+        logging_dir='./logs',
+        logging_steps=1000,
+        save_steps=1000,
+        # save_total_limit=5,
+        learning_rate=2e-5,
+        num_train_epochs=args.epoch,
+        seed=args.seed,
+        # load_best_model_at_end=True,
+        gradient_accumulation_steps=1, # help reduce gpu memory
+        fp16 = True, # Enable mixed precision training
+        # report_to="wandb"
+    )
+    # === TRAINER ===
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_train,
+        tokenizer=tokenizer,
+        data_collator=data_collator
+    )
+    # === TRAIN MODEL ===
+    trainer.train()
+    # End logging
+    # wandb.finish()

train_qwen_lora.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import sys
+sys.path.append("..")
+import os
+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
+from utils_qwen import PERTURBATIONS, BABYLM_SPLITS, BABYLM_DATA_PATH, \
+    GENRES, MARKER_TOKEN_IDS, marker_sg_token, marker_pl_token, marker_rev_token, write_file
+from peft import get_peft_model, LoraConfig, TaskType  # Import PEFT components for LoRA
+# import wandb
+# === CONFIGURATION SETTINGS ===
+perturbation = "shuffle_deterministic21"
+train_set = "10M"
+seed = 0
+ckpt_path = "./checkpoints"
+effective_bsz = 512
+# === FILE PATHS BASED ON CONFIGURATION ===
+run_id = f"babylm_{perturbation}_{train_set}_seed{seed}"
+cache_dir = os.path.join(ckpt_path, "babylm_lora", run_id, "artifacts")
+run_dir = os.path.join(ckpt_path, "babylm_lora", run_id, "runs")
+os.makedirs(cache_dir, exist_ok=True)
+os.makedirs(run_dir, exist_ok=True)
+# Setup for Weights & Biases
+# wandb.init(project="kallini", group="babylm-perturbation-experiments", name=run_id)
+# === DATASET LOADING ===
+dataset_name = f"babylm_{perturbation}_{train_set}_seed{seed}"
+dataset = load_dataset('babylm_dataset.py', name=dataset_name, trust_remote_code=True)
+train_dataset = dataset['train']
+# === TOKENIZER & MODEL LOADING ===
+model_name = "Qwen/Qwen2.5-0.5B"
+tokenizer = PERTURBATIONS[perturbation]['qwen_tokenizer']
+model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir)
+# === APPLYING LoRA ===
+lora_config = LoraConfig(
+    task_type=TaskType.CAUSAL_LM,  # This specifies the task type
+    r=16,                           # Rank of the decomposed matrices
+    lora_alpha=16,                 # Amplitude of the LoRA updates
+    lora_dropout=0.1,              # Dropout for LoRA layers
+)
+model = get_peft_model(model, lora_config)
+# print("model:", model)
+# for name, param in model.named_parameters():
+#     if param.requires_grad:
+#         print(f"Trainable parameter: {name}, shape: {param.shape}")
+# === TOKENIZATION ===
+def tokenize_function(examples):
+    return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=1024)
+tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+# === DATA COLLATOR ===
+data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
+# === TRAINING ARGUMENTS ===
+training_args = TrainingArguments(
+    output_dir=run_dir,
+    # evaluation_strategy="steps", # use with load_best_model_at_end=True
+    evaluation_strategy="no",
+    per_device_train_batch_size=1,  # Set based on your hardware capabilities
+    logging_dir='./logs',
+    logging_steps=10,
+    save_steps=10,
+    # save_total_limit=5,
+    learning_rate=5e-4,  # You may want to tune this for LoRA
+    num_train_epochs=10,  # Fewer epochs might be sufficient due to the efficiency of LoRA
+    seed=seed,
+    # load_best_model_at_end=True,
+    gradient_accumulation_steps=1,
+    fp16=True,
+    warmup_ratio=0.1,
+    # report_to="wandb"
+)
+# === TRAINER ===
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_train,
+    tokenizer=tokenizer,
+    data_collator=data_collator
+)
+# === TRAIN MODEL ===
+trainer.train()
+# End logging
+# wandb.finish()

wandb/debug-cli.chunhui.log ADDED Viewed

File without changes

wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,17 @@

+{"time":"2024-11-30T01:12:00.584497778-05:00","level":"INFO","msg":"using version","core version":"0.18.5"}
+{"time":"2024-11-30T01:12:00.584512378-05:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241130_011200-7p4fy9o8/logs/debug-core.log"}
+{"time":"2024-11-30T01:12:00.690595233-05:00","level":"INFO","msg":"created new stream","id":"7p4fy9o8"}
+{"time":"2024-11-30T01:12:00.690619613-05:00","level":"INFO","msg":"stream: started","id":"7p4fy9o8"}
+{"time":"2024-11-30T01:12:00.690681993-05:00","level":"INFO","msg":"sender: started","stream_id":"7p4fy9o8"}
+{"time":"2024-11-30T01:12:00.690644643-05:00","level":"INFO","msg":"handler: started","stream_id":{"value":"7p4fy9o8"}}
+{"time":"2024-11-30T01:12:00.690641003-05:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"7p4fy9o8"}}
+{"time":"2024-11-30T01:12:00.859481271-05:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-11-30T02:11:11.340074116-05:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2024-11-30T02:11:11.340981582-05:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2024-11-30T02:11:11.766871033-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2024-11-30T02:11:11.948700811-05:00","level":"INFO","msg":"handler: operation stats","stats":{}}
+{"time":"2024-11-30T02:11:12.958514865-05:00","level":"INFO","msg":"stream: closing","id":"7p4fy9o8"}
+{"time":"2024-11-30T02:11:12.958555006-05:00","level":"INFO","msg":"handler: closed","stream_id":{"value":"7p4fy9o8"}}
+{"time":"2024-11-30T02:11:12.958588266-05:00","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"7p4fy9o8"}}
+{"time":"2024-11-30T02:11:12.958624136-05:00","level":"INFO","msg":"sender: closed","stream_id":"7p4fy9o8"}
+{"time":"2024-11-30T02:11:12.958703497-05:00","level":"INFO","msg":"stream: closed","id":"7p4fy9o8"}

wandb/debug.log ADDED Viewed

	@@ -0,0 +1,33 @@

+2024-11-30 01:12:00,579 INFO    MainThread:3204336 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_setup.py:_flush():79] Configure stats pid to 3204336
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_gpt2.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_gpt2.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_gpt2.py'}
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241130_011200-7p4fy9o8/logs/debug.log
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241130_011200-7p4fy9o8/logs/debug-internal.log
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_init.py:init():621] calling init triggers
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_init.py:init():628] wandb.init called with sweep_config: {}
+config: {}
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_init.py:init():671] starting backend
+2024-11-30 01:12:00,580 INFO    MainThread:3204336 [wandb_init.py:init():675] sending inform_init request
+2024-11-30 01:12:00,581 INFO    MainThread:3204336 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-11-30 01:12:00,582 INFO    MainThread:3204336 [wandb_init.py:init():688] backend started and connected
+2024-11-30 01:12:00,585 INFO    MainThread:3204336 [wandb_init.py:init():783] updated telemetry
+2024-11-30 01:12:00,613 INFO    MainThread:3204336 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout
+2024-11-30 01:12:00,856 INFO    MainThread:3204336 [wandb_init.py:init():867] starting run threads in backend
+2024-11-30 01:12:00,949 INFO    MainThread:3204336 [wandb_run.py:_console_start():2463] atexit reg
+2024-11-30 01:12:00,949 INFO    MainThread:3204336 [wandb_run.py:_redirect():2311] redirect: wrap_raw
+2024-11-30 01:12:00,949 INFO    MainThread:3204336 [wandb_run.py:_redirect():2376] Wrapping output streams.
+2024-11-30 01:12:00,949 INFO    MainThread:3204336 [wandb_run.py:_redirect():2401] Redirects installed.
+2024-11-30 01:12:00,951 INFO    MainThread:3204336 [wandb_init.py:init():911] run started, returning control to user process
+2024-11-30 01:12:00,951 INFO    MainThread:3204336 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'reverse_partial', 'train_set': '10M', 'batch_size': 3, 'epoch': 3, 'seed': 0, 'lr': 5e-06}
+2024-11-30 02:11:11,338 INFO    MainThread:3204336 [wandb_run.py:_finish():2158] finishing run yaning1001-dartmouth-college/exp-impo-reverse/7p4fy9o8
+2024-11-30 02:11:11,339 INFO    MainThread:3204336 [wandb_run.py:_atexit_cleanup():2426] got exitcode: 0
+2024-11-30 02:11:11,339 INFO    MainThread:3204336 [wandb_run.py:_restore():2408] restore
+2024-11-30 02:11:11,339 INFO    MainThread:3204336 [wandb_run.py:_restore():2414] restore done
+2024-11-30 02:11:12,952 INFO    MainThread:3204336 [wandb_run.py:_footer_history_summary_info():3975] rendering history
+2024-11-30 02:11:12,952 INFO    MainThread:3204336 [wandb_run.py:_footer_history_summary_info():4007] rendering summary
+2024-11-30 02:11:12,957 INFO    MainThread:3204336 [wandb_run.py:_footer_sync_info():3934] logging synced files

wandb/run-20241030_010306-uhzyjdga/run-uhzyjdga.wandb ADDED Viewed

Binary file (1.6 kB). View file

wandb/run-20241030_011013-8qrwqf2b/files/config.yaml ADDED Viewed

	@@ -0,0 +1,47 @@

+_wandb:
+    value:
+        cli_version: 0.18.5
+        m: []
+        python_version: 3.9.19
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+            "3":
+                - 13
+                - 23
+                - 55
+            "4": 3.9.19
+            "5": 0.18.5
+            "6": 4.45.1
+            "8":
+                - 5
+            "12": 0.18.5
+            "13": linux-x86_64
+batch_size:
+    value: 3
+epoch:
+    value: 7
+perturbation:
+    value: reverse_control
+seed:
+    value: 0
+train_set:
+    value: 10M

wandb/run-20241030_011013-8qrwqf2b/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "os":  "Linux-5.4.0-162-generic-x86_64-with-glibc2.31",
+  "python":  "3.9.19",
+  "startedAt":  "2024-10-30T05:10:13.809520Z",
+  "args":  [
+    "--perturbation",
+    "reverse_control",
+    "--train_set",
+    "10M",
+    "--batch_size",
+    "3",
+    "--epoch",
+    "7",
+    "--seed",
+    "0"
+  ],
+  "program":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py",
+  "codePath":  "train/train_deep_wandb.py",
+  "git":  {
+    "remote":  "git@hf.co:Yaning1001/Impossible_llm.git",
+    "commit":  "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4"
+  },
+  "email":  "yaning1001@gmail.com",
+  "root":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train",
+  "host":  "mms-large-2",
+  "username":  "chunhui",
+  "executable":  "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python",
+  "codePathLocal":  "train_deep_wandb.py",
+  "cpu_count":  32,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA RTX A6000",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1888559353856",
+      "used":  "1719200362496"
+    }
+  },
+  "memory":  {
+    "total":  "202617098240"
+  },
+  "cpu":  {
+    "count":  32,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    }
+  ],
+  "cudaVersion":  "11.8"
+}

wandb/run-20241030_011509-3dp0dtmk/files/output.log ADDED Viewed

	@@ -0,0 +1,15 @@

+Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.29s/it]
+Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17519/17519 [00:55<00:00, 313.12 examples/s]
+Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18140/18140 [00:54<00:00, 330.90 examples/s]
+tokenized_valid: Dataset({
+    features: ['input_ids', 'attention_mask'],
+    num_rows: 600
+})
+/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2024-10-30 01:17:06,721] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+[2024-10-30 01:17:14,061] [INFO] [comm.py:652:init_distributed] cdb=None
+Installed CUDA version 11.8 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination
+Using /home/chunhui/.cache/torch_extensions/py39_cu117 as PyTorch extensions root...
+Loading extension module cpu_adam...
+Time to load cpu_adam op: 4.238509893417358 seconds

wandb/run-20241030_011509-3dp0dtmk/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2024-10-30 01:15:09,509 INFO    MainThread:324927 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5
+2024-10-30 01:15:09,509 INFO    MainThread:324927 [wandb_setup.py:_flush():79] Configure stats pid to 324927
+2024-10-30 01:15:09,509 INFO    MainThread:324927 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings
+2024-10-30 01:15:09,509 INFO    MainThread:324927 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings
+2024-10-30 01:15:09,509 INFO    MainThread:324927 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2024-10-30 01:15:09,509 INFO    MainThread:324927 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-10-30 01:15:09,509 INFO    MainThread:324927 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'}
+2024-10-30 01:15:09,509 INFO    MainThread:324927 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-10-30 01:15:09,509 INFO    MainThread:324927 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_011509-3dp0dtmk/logs/debug.log
+2024-10-30 01:15:09,510 INFO    MainThread:324927 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_011509-3dp0dtmk/logs/debug-internal.log
+2024-10-30 01:15:09,510 INFO    MainThread:324927 [wandb_init.py:init():621] calling init triggers
+2024-10-30 01:15:09,510 INFO    MainThread:324927 [wandb_init.py:init():628] wandb.init called with sweep_config: {}
+config: {}
+2024-10-30 01:15:09,510 INFO    MainThread:324927 [wandb_init.py:init():671] starting backend
+2024-10-30 01:15:09,510 INFO    MainThread:324927 [wandb_init.py:init():675] sending inform_init request
+2024-10-30 01:15:09,510 INFO    MainThread:324927 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-10-30 01:15:09,511 INFO    MainThread:324927 [wandb_init.py:init():688] backend started and connected
+2024-10-30 01:15:09,514 INFO    MainThread:324927 [wandb_init.py:init():783] updated telemetry
+2024-10-30 01:15:09,557 INFO    MainThread:324927 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout
+2024-10-30 01:15:09,824 INFO    MainThread:324927 [wandb_init.py:init():867] starting run threads in backend
+2024-10-30 01:15:09,917 INFO    MainThread:324927 [wandb_run.py:_console_start():2463] atexit reg
+2024-10-30 01:15:09,917 INFO    MainThread:324927 [wandb_run.py:_redirect():2311] redirect: wrap_raw
+2024-10-30 01:15:09,917 INFO    MainThread:324927 [wandb_run.py:_redirect():2376] Wrapping output streams.
+2024-10-30 01:15:09,917 INFO    MainThread:324927 [wandb_run.py:_redirect():2401] Redirects installed.
+2024-10-30 01:15:09,919 INFO    MainThread:324927 [wandb_init.py:init():911] run started, returning control to user process
+2024-10-30 01:15:09,919 INFO    MainThread:324927 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'reverse_control', 'train_set': '10M', 'batch_size': 3, 'epoch': 7, 'seed': 0}

wandb/run-20241030_011509-cqcwsj7s/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_setup.py:_flush():79] Configure stats pid to 324930
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'}
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_011509-cqcwsj7s/logs/debug.log
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_011509-cqcwsj7s/logs/debug-internal.log
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_init.py:init():621] calling init triggers
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_init.py:init():628] wandb.init called with sweep_config: {}
+config: {}
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_init.py:init():671] starting backend
+2024-10-30 01:15:09,346 INFO    MainThread:324930 [wandb_init.py:init():675] sending inform_init request
+2024-10-30 01:15:09,347 INFO    MainThread:324930 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-10-30 01:15:09,348 INFO    MainThread:324930 [wandb_init.py:init():688] backend started and connected
+2024-10-30 01:15:09,351 INFO    MainThread:324930 [wandb_init.py:init():783] updated telemetry
+2024-10-30 01:15:09,378 INFO    MainThread:324930 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout
+2024-10-30 01:15:09,675 INFO    MainThread:324930 [wandb_init.py:init():867] starting run threads in backend
+2024-10-30 01:15:09,766 INFO    MainThread:324930 [wandb_run.py:_console_start():2463] atexit reg
+2024-10-30 01:15:09,766 INFO    MainThread:324930 [wandb_run.py:_redirect():2311] redirect: wrap_raw
+2024-10-30 01:15:09,766 INFO    MainThread:324930 [wandb_run.py:_redirect():2376] Wrapping output streams.
+2024-10-30 01:15:09,766 INFO    MainThread:324930 [wandb_run.py:_redirect():2401] Redirects installed.
+2024-10-30 01:15:09,767 INFO    MainThread:324930 [wandb_init.py:init():911] run started, returning control to user process
+2024-10-30 01:15:09,768 INFO    MainThread:324930 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'reverse_control', 'train_set': '10M', 'batch_size': 3, 'epoch': 7, 'seed': 0}

wandb/run-20241030_013141-v317zdzd/files/config.yaml ADDED Viewed

	@@ -0,0 +1,47 @@

+_wandb:
+    value:
+        cli_version: 0.18.5
+        m: []
+        python_version: 3.9.19
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+            "3":
+                - 13
+                - 23
+                - 55
+            "4": 3.9.19
+            "5": 0.18.5
+            "6": 4.45.1
+            "8":
+                - 5
+            "12": 0.18.5
+            "13": linux-x86_64
+batch_size:
+    value: 3
+epoch:
+    value: 7
+perturbation:
+    value: reverse_full
+seed:
+    value: 0
+train_set:
+    value: 10M

wandb/run-20241030_013141-v317zdzd/files/output.log ADDED Viewed

	@@ -0,0 +1,46 @@

+100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1098360/1098360 [00:04<00:00, 230478.22it/s]
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1098360/1098360 [00:00<00:00, 2642280.53it/s]
+100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17520/17520 [00:00<00:00, 30255.44it/s]
+Generating train split: 17519 examples [00:08, 1969.90 examples/s]█████████████████████████████████████████████████▎                          | 14106/17520 [00:00<00:00, 30804.20it/s]
+100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1086121/1086121 [00:05<00:00, 181782.17it/s]
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1086121/1086121 [00:00<00:00, 2919420.19it/s]
+100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18141/18141 [00:00<00:00, 30801.29it/s]
+Generating validation split: 18140 examples [00:10, 1711.07 examples/s]███████████████████████████████████████████████████████▌               | 16094/18141 [00:00<00:00, 32755.54it/s]
+100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1031323/1031323 [00:05<00:00, 192774.00it/s]
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1031323/1031323 [00:00<00:00, 1666459.47it/s]
+100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16483/16483 [00:00<00:00, 23461.88it/s]
+Generating test split: 16482 examples [00:09, 1649.33 examples/s]███████████████████████████████████████████████████████████████████▌         | 15349/16483 [00:00<00:00, 26830.11it/s]
+Downloading shards:   0%|                                                                                                                                        | 0/2 [01:04<?, ?it/s]
+Error in sys.excepthook:
+Traceback (most recent call last):
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/lib/exit_hooks.py", line 41, in exc_handler
+    def exc_handler(
+KeyboardInterrupt
+Original exception was:
+Traceback (most recent call last):
+  File "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", line 172, in <module>
+    model = AutoModelForCausalLM.from_pretrained(model_name,
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/models/auto/auto_factory.py", line 564, in from_pretrained
+    return model_class.from_pretrained(
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/modeling_utils.py", line 3769, in from_pretrained
+    resolved_archive_file, sharded_metadata = get_checkpoint_shard_files(
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/utils/hub.py", line 1098, in get_checkpoint_shard_files
+    cached_filename = cached_file(
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/utils/hub.py", line 403, in cached_file
+    resolved_file = hf_hub_download(
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/utils/_deprecation.py", line 101, in inner_f
+    return f(*args, **kwargs)
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
+    return fn(*args, **kwargs)
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/file_download.py", line 1232, in hf_hub_download
+    return _hf_hub_download_to_cache_dir(
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/file_download.py", line 1380, in _hf_hub_download_to_cache_dir
+    with WeakFileLock(lock_path):
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/contextlib.py", line 119, in __enter__
+    return next(self.gen)
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/utils/_fixes.py", line 98, in WeakFileLock
+    lock.acquire()
+  File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/filelock/_api.py", line 225, in acquire
+    time.sleep(poll_interval)
+KeyboardInterrupt

wandb/run-20241030_013141-v317zdzd/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,147 @@

+funcsigs==1.0.2
+sentry-sdk==2.17.0
+multiprocess==0.70.16
+numpy==1.26.2
+pluralizer==1.2.0
+debugpy==1.6.7
+nvidia-cudnn-cu11==8.5.0.96
+deepspeed==0.15.2
+data==0.4
+pandas==2.1.3
+tomli==2.0.1
+charset-normalizer==3.3.2
+attrs==24.2.0
+aiosignal==1.3.1
+fsspec==2023.10.0
+nvidia-cusparse-cu11==11.7.4.91
+zipp==3.12.0
+mypy-extensions==1.0.0
+datasets==3.0.1
+joblib==1.3.2
+hjson==3.1.0
+traitlets==5.7.1
+stack-data==0.6.0
+transformers==4.45.1
+sympy==1.11.1
+Pygments==2.15.0
+docker-pycreds==0.4.0
+dill==0.3.8
+wheel==0.44.0
+prompt-toolkit==3.0.30
+parso==0.8.3
+ipykernel==6.23.1
+pyarrow==17.0.0
+certifi==2023.11.17
+nvidia-cufft-cu11==10.9.0.58
+six==1.16.0
+pydantic==2.9.2
+click==8.1.7
+nest-asyncio==1.5.6
+gmpy2==2.1.0
+matplotlib==3.8.2
+scipy==1.11.4
+typing_extensions==4.12.2
+statsmodels==0.14.0
+huggingface-hub==0.25.0
+frozenlist==1.4.1
+gpustat==1.1.1
+nvidia-nvtx-cu11==11.7.91
+safetensors==0.4.5
+stanza==1.9.2
+decorator==5.1.1
+seaborn==0.13.0
+sentencepiece==0.2.0
+PyYAML==6.0.1
+black==24.8.0
+protobuf==4.25.1
+pickleshare==0.7.5
+peft==0.13.0
+triton==2.0.0
+nvidia-cuda-runtime-cu11==11.7.99
+Jinja2==3.1.2
+nvidia-cusolver-cu11==11.4.0.1
+executing==1.2.0
+jupyter_client==8.1.0
+pluggy==1.3.0
+cmake==3.30.3
+pytz==2023.3.post1
+aiohappyeyeballs==2.4.2
+kiwisolver==1.4.5
+py-cpuinfo==9.0.0
+Pillow==10.1.0
+ptyprocess==0.7.0
+importlib_resources==6.4.5
+GitPython==3.1.43
+importlib-metadata==6.0.0
+iniconfig==2.0.0
+scikit-learn==1.3.2
+exceptiongroup==1.1.0
+networkx==2.8.6
+accelerate==1.0.0
+nltk==3.8.1
+shutilwhich==1.1.0
+fonttools==4.45.1
+future==0.18.3
+aiohttp==3.10.6
+wcwidth==0.2.5
+idna==3.6
+filelock==3.12.2
+pathspec==0.12.1
+jupyter_core==5.1.0
+lit==18.1.8
+nvidia-curand-cu11==10.2.10.91
+nvidia-cublas-cu11==11.10.3.66
+nvidia-ml-py==12.560.30
+msgpack==1.1.0
+python-dateutil==2.8.2
+blessed==1.20.0
+packaging==23.0
+gitdb==4.0.11
+yarl==1.13.0
+emoji==2.8.0
+tzdata==2023.3
+cycler==0.12.1
+tornado==6.2
+backcall==0.2.0
+plotnine==0.12.4
+ninja==1.11.1.1
+latex==0.7.0
+wandb==0.18.5
+setproctitle==1.3.3
+threadpoolctl==3.2.0
+requests==2.32.3
+pyparsing==3.1.1
+smmap==5.0.1
+pyzmq==23.0.0
+async-timeout==4.0.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.6
+latexcodec==1.0.0
+ipython==8.0.0
+patsy==0.5.3
+contourpy==1.2.0
+multidict==6.1.0
+mizani==0.9.3
+urllib3==2.1.0
+tokenizers==0.20.0
+MarkupSafe==2.1.2
+pip==24.2
+pexpect==4.8.0
+tqdm==4.66.5
+jedi==0.18.2
+pydantic_core==2.23.4
+tempdir==0.7.1
+mpmath==1.2.1
+setuptools==72.1.0
+pytest==7.4.3
+pure-eval==0.2.2
+psutil==5.9.1
+comm==0.1.2
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+regex==2023.10.3
+platformdirs==2.5.2
+asttokens==2.2.1
+torch==2.0.0
+nvidia-nccl-cu11==2.14.3
+xxhash==3.5.0

wandb/run-20241030_013141-v317zdzd/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "os":  "Linux-5.4.0-162-generic-x86_64-with-glibc2.31",
+  "python":  "3.9.19",
+  "startedAt":  "2024-10-30T05:31:41.692035Z",
+  "args":  [
+    "--perturbation",
+    "reverse_full",
+    "--train_set",
+    "10M",
+    "--batch_size",
+    "3",
+    "--epoch",
+    "7",
+    "--seed",
+    "0"
+  ],
+  "program":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py",
+  "codePath":  "train/train_deep_wandb.py",
+  "git":  {
+    "remote":  "git@hf.co:Yaning1001/Impossible_llm.git",
+    "commit":  "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4"
+  },
+  "email":  "yaning1001@gmail.com",
+  "root":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train",
+  "host":  "mms-large-2",
+  "username":  "chunhui",
+  "executable":  "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python",
+  "codePathLocal":  "train_deep_wandb.py",
+  "cpu_count":  32,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA RTX A6000",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1888559353856",
+      "used":  "1709824413696"
+    }
+  },
+  "memory":  {
+    "total":  "202617098240"
+  },
+  "cpu":  {
+    "count":  32,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    }
+  ],
+  "cudaVersion":  "11.8"
+}

wandb/run-20241030_013141-v317zdzd/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,11 @@

+{"time":"2024-10-30T01:31:41.694124018-04:00","level":"INFO","msg":"using version","core version":"0.18.5"}
+{"time":"2024-10-30T01:31:41.694138749-04:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_013141-v317zdzd/logs/debug-core.log"}
+{"time":"2024-10-30T01:31:41.802315796-04:00","level":"INFO","msg":"created new stream","id":"v317zdzd"}
+{"time":"2024-10-30T01:31:41.802356857-04:00","level":"INFO","msg":"stream: started","id":"v317zdzd"}
+{"time":"2024-10-30T01:31:41.802407437-04:00","level":"INFO","msg":"sender: started","stream_id":"v317zdzd"}
+{"time":"2024-10-30T01:31:41.802396467-04:00","level":"INFO","msg":"handler: started","stream_id":{"value":"v317zdzd"}}
+{"time":"2024-10-30T01:31:41.802381677-04:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"v317zdzd"}}
+{"time":"2024-10-30T01:31:42.031691859-04:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-10-30T01:33:16.596421902-04:00","level":"INFO","msg":"stream: closing","id":"v317zdzd"}
+{"time":"2024-10-30T01:33:16.596523562-04:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2024-10-30T01:33:16.59760984-04:00","level":"INFO","msg":"Stopped system monitor"}

wandb/run-20241030_013141-v317zdzd/logs/debug.log ADDED Viewed

	@@ -0,0 +1,27 @@

+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_setup.py:_flush():79] Configure stats pid to 335756
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'}
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_013141-v317zdzd/logs/debug.log
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_013141-v317zdzd/logs/debug-internal.log
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_init.py:init():621] calling init triggers
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_init.py:init():628] wandb.init called with sweep_config: {}
+config: {}
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_init.py:init():671] starting backend
+2024-10-30 01:31:41,690 INFO    MainThread:335756 [wandb_init.py:init():675] sending inform_init request
+2024-10-30 01:31:41,691 INFO    MainThread:335756 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-10-30 01:31:41,691 INFO    MainThread:335756 [wandb_init.py:init():688] backend started and connected
+2024-10-30 01:31:41,694 INFO    MainThread:335756 [wandb_init.py:init():783] updated telemetry
+2024-10-30 01:31:41,727 INFO    MainThread:335756 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout
+2024-10-30 01:31:42,028 INFO    MainThread:335756 [wandb_init.py:init():867] starting run threads in backend
+2024-10-30 01:31:42,134 INFO    MainThread:335756 [wandb_run.py:_console_start():2463] atexit reg
+2024-10-30 01:31:42,134 INFO    MainThread:335756 [wandb_run.py:_redirect():2311] redirect: wrap_raw
+2024-10-30 01:31:42,135 INFO    MainThread:335756 [wandb_run.py:_redirect():2376] Wrapping output streams.
+2024-10-30 01:31:42,135 INFO    MainThread:335756 [wandb_run.py:_redirect():2401] Redirects installed.
+2024-10-30 01:31:42,136 INFO    MainThread:335756 [wandb_init.py:init():911] run started, returning control to user process
+2024-10-30 01:31:42,136 INFO    MainThread:335756 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'reverse_full', 'train_set': '10M', 'batch_size': 3, 'epoch': 7, 'seed': 0}
+2024-10-30 01:33:16,596 WARNING MsgRouterThr:335756 [router.py:message_loop():77] message_loop has been closed

wandb/run-20241030_222932-l8nv7d2l/files/output.log ADDED Viewed

	@@ -0,0 +1,14 @@

+Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:18<00:00,  9.39s/it]
+tokenized_valid: Dataset({
+    features: ['input_ids', 'attention_mask'],
+    num_rows: 600
+})
+/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2024-10-30 22:29:54,207] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+[2024-10-30 22:30:03,646] [INFO] [comm.py:652:init_distributed] cdb=None
+Installed CUDA version 11.8 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination
+Using /home/chunhui/.cache/torch_extensions/py39_cu117 as PyTorch extensions root...
+Loading extension module cpu_adam...
+Time to load cpu_adam op: 5.236328601837158 seconds
+[34m[1mwandb[0m: [33mWARNING[0m Fatal error while uploading data. Some run data will not be synced, but it will still be written to disk. Use `wandb sync` at the end of the run to try uploading.

wandb/run-20241030_222932-l8nv7d2l/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,10 @@

+{"time":"2024-10-30T22:29:32.388391759-04:00","level":"INFO","msg":"using version","core version":"0.18.5"}
+{"time":"2024-10-30T22:29:32.388402599-04:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_222932-l8nv7d2l/logs/debug-core.log"}
+{"time":"2024-10-30T22:29:32.494517518-04:00","level":"INFO","msg":"created new stream","id":"l8nv7d2l"}
+{"time":"2024-10-30T22:29:32.494545668-04:00","level":"INFO","msg":"stream: started","id":"l8nv7d2l"}
+{"time":"2024-10-30T22:29:32.494613299-04:00","level":"INFO","msg":"sender: started","stream_id":"l8nv7d2l"}
+{"time":"2024-10-30T22:29:32.494582519-04:00","level":"INFO","msg":"handler: started","stream_id":{"value":"l8nv7d2l"}}
+{"time":"2024-10-30T22:29:32.494578598-04:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"l8nv7d2l"}}
+{"time":"2024-10-30T22:29:32.702441334-04:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-10-30T22:56:33.113473032-04:00","level":"ERROR","msg":"HTTP error","status":404,"method":"POST","url":"https://api.wandb.ai/files/yaning1001-dartmouth-college/impossible_llm_reverse/l8nv7d2l/file_stream"}
+{"time":"2024-10-30T22:56:33.117771758-04:00","level":"ERROR+4","msg":"filestream: fatal error: filestream: failed to upload: 404 Not Found path=files/yaning1001-dartmouth-college/impossible_llm_reverse/l8nv7d2l/file_stream: {\"error\":\"run impossible_llm_reverse/l8nv7d2l not found while streaming file\"}"}

wandb/run-20241030_222932-l8nv7d2l/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2024-10-30 22:29:32,383 INFO    MainThread:447696 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_setup.py:_flush():79] Configure stats pid to 447696
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'}
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_222932-l8nv7d2l/logs/debug.log
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_222932-l8nv7d2l/logs/debug-internal.log
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_init.py:init():621] calling init triggers
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_init.py:init():628] wandb.init called with sweep_config: {}
+config: {}
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_init.py:init():671] starting backend
+2024-10-30 22:29:32,384 INFO    MainThread:447696 [wandb_init.py:init():675] sending inform_init request
+2024-10-30 22:29:32,385 INFO    MainThread:447696 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-10-30 22:29:32,385 INFO    MainThread:447696 [wandb_init.py:init():688] backend started and connected
+2024-10-30 22:29:32,388 INFO    MainThread:447696 [wandb_init.py:init():783] updated telemetry
+2024-10-30 22:29:32,418 INFO    MainThread:447696 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout
+2024-10-30 22:29:32,698 INFO    MainThread:447696 [wandb_init.py:init():867] starting run threads in backend
+2024-10-30 22:29:32,826 INFO    MainThread:447696 [wandb_run.py:_console_start():2463] atexit reg
+2024-10-30 22:29:32,827 INFO    MainThread:447696 [wandb_run.py:_redirect():2311] redirect: wrap_raw
+2024-10-30 22:29:32,827 INFO    MainThread:447696 [wandb_run.py:_redirect():2376] Wrapping output streams.
+2024-10-30 22:29:32,827 INFO    MainThread:447696 [wandb_run.py:_redirect():2401] Redirects installed.
+2024-10-30 22:29:32,828 INFO    MainThread:447696 [wandb_init.py:init():911] run started, returning control to user process
+2024-10-30 22:29:32,829 INFO    MainThread:447696 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'reverse_control', 'train_set': '10M', 'batch_size': 3, 'epoch': 3, 'seed': 0}

wandb/run-20241030_222932-lsfm0d2q/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "os":  "Linux-5.4.0-162-generic-x86_64-with-glibc2.31",
+  "python":  "3.9.19",
+  "startedAt":  "2024-10-31T02:29:32.440797Z",
+  "args":  [
+    "--perturbation",
+    "reverse_control",
+    "--train_set",
+    "10M",
+    "--batch_size",
+    "3",
+    "--epoch",
+    "3",
+    "--seed",
+    "0"
+  ],
+  "program":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py",
+  "codePath":  "train/train_deep_wandb.py",
+  "git":  {
+    "remote":  "git@hf.co:Yaning1001/Impossible_llm.git",
+    "commit":  "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4"
+  },
+  "email":  "yaning1001@gmail.com",
+  "root":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train",
+  "host":  "mms-large-2",
+  "username":  "chunhui",
+  "executable":  "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python",
+  "codePathLocal":  "train_deep_wandb.py",
+  "cpu_count":  32,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA RTX A6000",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1888559353856",
+      "used":  "1710969503744"
+    }
+  },
+  "memory":  {
+    "total":  "202617098240"
+  },
+  "cpu":  {
+    "count":  32,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    }
+  ],
+  "cudaVersion":  "11.8"
+}

wandb/run-20241030_222932-lsfm0d2q/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,10 @@

+{"time":"2024-10-30T22:29:32.443244692-04:00","level":"INFO","msg":"using version","core version":"0.18.5"}
+{"time":"2024-10-30T22:29:32.443263202-04:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_222932-lsfm0d2q/logs/debug-core.log"}
+{"time":"2024-10-30T22:29:32.554317529-04:00","level":"INFO","msg":"created new stream","id":"lsfm0d2q"}
+{"time":"2024-10-30T22:29:32.554346489-04:00","level":"INFO","msg":"stream: started","id":"lsfm0d2q"}
+{"time":"2024-10-30T22:29:32.554411019-04:00","level":"INFO","msg":"sender: started","stream_id":"lsfm0d2q"}
+{"time":"2024-10-30T22:29:32.554371289-04:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"lsfm0d2q"}}
+{"time":"2024-10-30T22:29:32.554397639-04:00","level":"INFO","msg":"handler: started","stream_id":{"value":"lsfm0d2q"}}
+{"time":"2024-10-30T22:29:32.714899908-04:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-10-30T22:56:18.124763411-04:00","level":"ERROR","msg":"HTTP error","status":404,"method":"POST","url":"https://api.wandb.ai/files/yaning1001-dartmouth-college/impossible_llm_reverse/lsfm0d2q/file_stream"}
+{"time":"2024-10-30T22:56:18.129259917-04:00","level":"ERROR+4","msg":"filestream: fatal error: filestream: failed to upload: 404 Not Found path=files/yaning1001-dartmouth-college/impossible_llm_reverse/lsfm0d2q/file_stream: {\"error\":\"run impossible_llm_reverse/lsfm0d2q not found while streaming file\"}"}

wandb/run-20241030_222932-lsfm0d2q/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_setup.py:_flush():79] Configure stats pid to 447700
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'}
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_222932-lsfm0d2q/logs/debug.log
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_222932-lsfm0d2q/logs/debug-internal.log
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_init.py:init():621] calling init triggers
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_init.py:init():628] wandb.init called with sweep_config: {}
+config: {}
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_init.py:init():671] starting backend
+2024-10-30 22:29:32,439 INFO    MainThread:447700 [wandb_init.py:init():675] sending inform_init request
+2024-10-30 22:29:32,440 INFO    MainThread:447700 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-10-30 22:29:32,440 INFO    MainThread:447700 [wandb_init.py:init():688] backend started and connected
+2024-10-30 22:29:32,443 INFO    MainThread:447700 [wandb_init.py:init():783] updated telemetry
+2024-10-30 22:29:32,469 INFO    MainThread:447700 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout
+2024-10-30 22:29:32,711 INFO    MainThread:447700 [wandb_init.py:init():867] starting run threads in backend
+2024-10-30 22:29:32,849 INFO    MainThread:447700 [wandb_run.py:_console_start():2463] atexit reg
+2024-10-30 22:29:32,850 INFO    MainThread:447700 [wandb_run.py:_redirect():2311] redirect: wrap_raw
+2024-10-30 22:29:32,850 INFO    MainThread:447700 [wandb_run.py:_redirect():2376] Wrapping output streams.
+2024-10-30 22:29:32,850 INFO    MainThread:447700 [wandb_run.py:_redirect():2401] Redirects installed.
+2024-10-30 22:29:32,851 INFO    MainThread:447700 [wandb_init.py:init():911] run started, returning control to user process
+2024-10-30 22:29:32,852 INFO    MainThread:447700 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'reverse_control', 'train_set': '10M', 'batch_size': 3, 'epoch': 3, 'seed': 0}

wandb/run-20241101_012733-4u8e027p/files/output.log ADDED Viewed

	@@ -0,0 +1,16 @@

+Downloading shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [02:32<00:00, 76.34s/it]
+Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:09<00:00,  4.62s/it]
+Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16425/16425 [00:54<00:00, 301.45 examples/s]
+Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17013/17013 [00:55<00:00, 307.56 examples/s]
+tokenized_valid: Dataset({
+    features: ['input_ids', 'attention_mask'],
+    num_rows: 600
+})
+/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2024-11-01 01:32:35,965] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+[2024-11-01 01:32:46,292] [INFO] [comm.py:652:init_distributed] cdb=None
+Installed CUDA version 11.8 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination
+Using /home/chunhui/.cache/torch_extensions/py39_cu117 as PyTorch extensions root...
+Loading extension module cpu_adam...
+Time to load cpu_adam op: 5.491261959075928 seconds

wandb/run-20241101_012733-4u8e027p/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,147 @@

+funcsigs==1.0.2
+sentry-sdk==2.17.0
+multiprocess==0.70.16
+numpy==1.26.2
+pluralizer==1.2.0
+debugpy==1.6.7
+nvidia-cudnn-cu11==8.5.0.96
+deepspeed==0.15.2
+data==0.4
+pandas==2.1.3
+tomli==2.0.1
+charset-normalizer==3.3.2
+attrs==24.2.0
+aiosignal==1.3.1
+fsspec==2023.10.0
+nvidia-cusparse-cu11==11.7.4.91
+zipp==3.12.0
+mypy-extensions==1.0.0
+datasets==3.0.1
+joblib==1.3.2
+hjson==3.1.0
+traitlets==5.7.1
+stack-data==0.6.0
+transformers==4.45.1
+sympy==1.11.1
+Pygments==2.15.0
+docker-pycreds==0.4.0
+dill==0.3.8
+wheel==0.44.0
+prompt-toolkit==3.0.30
+parso==0.8.3
+ipykernel==6.23.1
+pyarrow==17.0.0
+certifi==2023.11.17
+nvidia-cufft-cu11==10.9.0.58
+six==1.16.0
+pydantic==2.9.2
+click==8.1.7
+nest-asyncio==1.5.6
+gmpy2==2.1.0
+matplotlib==3.8.2
+scipy==1.11.4
+typing_extensions==4.12.2
+statsmodels==0.14.0
+huggingface-hub==0.25.0
+frozenlist==1.4.1
+gpustat==1.1.1
+nvidia-nvtx-cu11==11.7.91
+safetensors==0.4.5
+stanza==1.9.2
+decorator==5.1.1
+seaborn==0.13.0
+sentencepiece==0.2.0
+PyYAML==6.0.1
+black==24.8.0
+protobuf==4.25.1
+pickleshare==0.7.5
+peft==0.13.0
+triton==2.0.0
+nvidia-cuda-runtime-cu11==11.7.99
+Jinja2==3.1.2
+nvidia-cusolver-cu11==11.4.0.1
+executing==1.2.0
+jupyter_client==8.1.0
+pluggy==1.3.0
+cmake==3.30.3
+pytz==2023.3.post1
+aiohappyeyeballs==2.4.2
+kiwisolver==1.4.5
+py-cpuinfo==9.0.0
+Pillow==10.1.0
+ptyprocess==0.7.0
+importlib_resources==6.4.5
+GitPython==3.1.43
+importlib-metadata==6.0.0
+iniconfig==2.0.0
+scikit-learn==1.3.2
+exceptiongroup==1.1.0
+networkx==2.8.6
+accelerate==1.0.0
+nltk==3.8.1
+shutilwhich==1.1.0
+fonttools==4.45.1
+future==0.18.3
+aiohttp==3.10.6
+wcwidth==0.2.5
+idna==3.6
+filelock==3.12.2
+pathspec==0.12.1
+jupyter_core==5.1.0
+lit==18.1.8
+nvidia-curand-cu11==10.2.10.91
+nvidia-cublas-cu11==11.10.3.66
+nvidia-ml-py==12.560.30
+msgpack==1.1.0
+python-dateutil==2.8.2
+blessed==1.20.0
+packaging==23.0
+gitdb==4.0.11
+yarl==1.13.0
+emoji==2.8.0
+tzdata==2023.3
+cycler==0.12.1
+tornado==6.2
+backcall==0.2.0
+plotnine==0.12.4
+ninja==1.11.1.1
+latex==0.7.0
+wandb==0.18.5
+setproctitle==1.3.3
+threadpoolctl==3.2.0
+requests==2.32.3
+pyparsing==3.1.1
+smmap==5.0.1
+pyzmq==23.0.0
+async-timeout==4.0.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.6
+latexcodec==1.0.0
+ipython==8.0.0
+patsy==0.5.3
+contourpy==1.2.0
+multidict==6.1.0
+mizani==0.9.3
+urllib3==2.1.0
+tokenizers==0.20.0
+MarkupSafe==2.1.2
+pip==24.2
+pexpect==4.8.0
+tqdm==4.66.5
+jedi==0.18.2
+pydantic_core==2.23.4
+tempdir==0.7.1
+mpmath==1.2.1
+setuptools==72.1.0
+pytest==7.4.3
+pure-eval==0.2.2
+psutil==5.9.1
+comm==0.1.2
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+regex==2023.10.3
+platformdirs==2.5.2
+asttokens==2.2.1
+torch==2.0.0
+nvidia-nccl-cu11==2.14.3
+xxhash==3.5.0

wandb/run-20241101_012733-4u8e027p/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "os":  "Linux-5.4.0-162-generic-x86_64-with-glibc2.31",
+  "python":  "3.9.19",
+  "startedAt":  "2024-11-01T05:27:33.993570Z",
+  "args":  [
+    "--perturbation",
+    "shuffle_nondeterministic",
+    "--train_set",
+    "10M",
+    "--batch_size",
+    "3",
+    "--epoch",
+    "6",
+    "--seed",
+    "0"
+  ],
+  "program":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py",
+  "codePath":  "train/train_deep_wandb.py",
+  "git":  {
+    "remote":  "git@hf.co:Yaning1001/Impossible_llm.git",
+    "commit":  "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4"
+  },
+  "email":  "yaning1001@gmail.com",
+  "root":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train",
+  "host":  "mms-large-2",
+  "username":  "chunhui",
+  "executable":  "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python",
+  "codePathLocal":  "train_deep_wandb.py",
+  "cpu_count":  32,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA RTX A6000",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1888559353856",
+      "used":  "1753992269824"
+    }
+  },
+  "memory":  {
+    "total":  "202617098240"
+  },
+  "cpu":  {
+    "count":  32,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    }
+  ],
+  "cudaVersion":  "11.8"
+}

wandb/run-20241101_012733-4u8e027p/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_setup.py:_flush():79] Configure stats pid to 678556
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'}
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241101_012733-4u8e027p/logs/debug.log
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241101_012733-4u8e027p/logs/debug-internal.log
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_init.py:init():621] calling init triggers
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_init.py:init():628] wandb.init called with sweep_config: {}
+config: {}
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_init.py:init():671] starting backend
+2024-11-01 01:27:33,991 INFO    MainThread:678556 [wandb_init.py:init():675] sending inform_init request
+2024-11-01 01:27:33,993 INFO    MainThread:678556 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-11-01 01:27:33,993 INFO    MainThread:678556 [wandb_init.py:init():688] backend started and connected
+2024-11-01 01:27:33,996 INFO    MainThread:678556 [wandb_init.py:init():783] updated telemetry
+2024-11-01 01:27:34,021 INFO    MainThread:678556 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout
+2024-11-01 01:27:34,320 INFO    MainThread:678556 [wandb_init.py:init():867] starting run threads in backend
+2024-11-01 01:27:34,405 INFO    MainThread:678556 [wandb_run.py:_console_start():2463] atexit reg
+2024-11-01 01:27:34,405 INFO    MainThread:678556 [wandb_run.py:_redirect():2311] redirect: wrap_raw
+2024-11-01 01:27:34,405 INFO    MainThread:678556 [wandb_run.py:_redirect():2376] Wrapping output streams.
+2024-11-01 01:27:34,405 INFO    MainThread:678556 [wandb_run.py:_redirect():2401] Redirects installed.
+2024-11-01 01:27:34,407 INFO    MainThread:678556 [wandb_init.py:init():911] run started, returning control to user process
+2024-11-01 01:27:34,407 INFO    MainThread:678556 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'shuffle_nondeterministic', 'train_set': '10M', 'batch_size': 3, 'epoch': 6, 'seed': 0, 'lr': 5e-06}

wandb/run-20241101_012733-e3zsr634/files/output.log ADDED Viewed

	@@ -0,0 +1,20 @@

+Downloading shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [02:32<00:00, 76.36s/it]
+Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:06<00:00,  3.18s/it]
+generation_config.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 185/185 [00:00<00:00, 47.5kB/s]
+Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16425/16425 [00:51<00:00, 321.80 examples/s]
+Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17013/17013 [00:49<00:00, 346.44 examples/s]
+tokenized_valid: Dataset({
+    features: ['input_ids', 'attention_mask'],
+    num_rows: 600
+})
+/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2024-11-01 01:32:23,603] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+[2024-11-01 01:32:32,774] [INFO] [comm.py:652:init_distributed] cdb=None
+Installed CUDA version 11.8 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination
+Using /home/chunhui/.cache/torch_extensions/py39_cu117 as PyTorch extensions root...
+Emitting ninja build file /home/chunhui/.cache/torch_extensions/py39_cu117/cpu_adam/build.ninja...
+Building extension module cpu_adam...
+Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
+Loading extension module cpu_adam...
+Time to load cpu_adam op: 5.578649520874023 seconds

wandb/run-20241101_012733-e3zsr634/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "os":  "Linux-5.4.0-162-generic-x86_64-with-glibc2.31",
+  "python":  "3.9.19",
+  "startedAt":  "2024-11-01T05:27:33.958355Z",
+  "args":  [
+    "--perturbation",
+    "shuffle_nondeterministic",
+    "--train_set",
+    "10M",
+    "--batch_size",
+    "3",
+    "--epoch",
+    "6",
+    "--seed",
+    "0"
+  ],
+  "program":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py",
+  "codePath":  "train/train_deep_wandb.py",
+  "git":  {
+    "remote":  "git@hf.co:Yaning1001/Impossible_llm.git",
+    "commit":  "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4"
+  },
+  "email":  "yaning1001@gmail.com",
+  "root":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train",
+  "host":  "mms-large-2",
+  "username":  "chunhui",
+  "executable":  "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python",
+  "codePathLocal":  "train_deep_wandb.py",
+  "cpu_count":  32,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA RTX A6000",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1888559353856",
+      "used":  "1753992261632"
+    }
+  },
+  "memory":  {
+    "total":  "202617098240"
+  },
+  "cpu":  {
+    "count":  32,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    }
+  ],
+  "cudaVersion":  "11.8"
+}

wandb/run-20241101_200502-28ivel81/files/output.log ADDED Viewed

	@@ -0,0 +1 @@


1	+ Loading checkpoint shards: 0%\| \| 0/2 [00:00<?, ?it/s]

wandb/run-20241101_200502-28ivel81/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "os":  "Linux-5.4.0-162-generic-x86_64-with-glibc2.31",
+  "python":  "3.9.19",
+  "startedAt":  "2024-11-02T00:05:02.693656Z",
+  "args":  [
+    "--perturbation",
+    "shuffle_nondeterministic",
+    "--train_set",
+    "10M",
+    "--batch_size",
+    "3",
+    "--epoch",
+    "3",
+    "--seed",
+    "0"
+  ],
+  "program":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py",
+  "codePath":  "train/train_deep_wandb.py",
+  "git":  {
+    "remote":  "git@hf.co:Yaning1001/Impossible_llm.git",
+    "commit":  "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4"
+  },
+  "email":  "yaning1001@gmail.com",
+  "root":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train",
+  "host":  "mms-large-2",
+  "username":  "chunhui",
+  "executable":  "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python",
+  "codePathLocal":  "train_deep_wandb.py",
+  "cpu_count":  32,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA RTX A6000",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1888559353856",
+      "used":  "1754801463296"
+    }
+  },
+  "memory":  {
+    "total":  "202617098240"
+  },
+  "cpu":  {
+    "count":  32,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    }
+  ],
+  "cudaVersion":  "11.8"
+}

wandb/run-20241101_201708-b4wkk29o/files/output.log ADDED Viewed

	@@ -0,0 +1,13 @@

+Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.56s/it]
+tokenized_valid: Dataset({
+    features: ['input_ids', 'attention_mask'],
+    num_rows: 600
+})
+/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2024-11-01 20:17:16,912] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+[2024-11-01 20:17:26,148] [INFO] [comm.py:652:init_distributed] cdb=None
+Installed CUDA version 11.8 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination
+Using /home/chunhui/.cache/torch_extensions/py39_cu117 as PyTorch extensions root...
+Loading extension module cpu_adam...
+Time to load cpu_adam op: 5.02955174446106 seconds

wandb/run-20241101_201708-b4wkk29o/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "os":  "Linux-5.4.0-162-generic-x86_64-with-glibc2.31",
+  "python":  "3.9.19",
+  "startedAt":  "2024-11-02T00:17:08.113936Z",
+  "args":  [
+    "--perturbation",
+    "shuffle_nondeterministic",
+    "--train_set",
+    "10M",
+    "--batch_size",
+    "3",
+    "--epoch",
+    "3",
+    "--seed",
+    "0"
+  ],
+  "program":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py",
+  "codePath":  "train/train_deep_wandb.py",
+  "git":  {
+    "remote":  "git@hf.co:Yaning1001/Impossible_llm.git",
+    "commit":  "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4"
+  },
+  "email":  "yaning1001@gmail.com",
+  "root":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train",
+  "host":  "mms-large-2",
+  "username":  "chunhui",
+  "executable":  "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python",
+  "codePathLocal":  "train_deep_wandb.py",
+  "cpu_count":  32,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA RTX A6000",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1888559353856",
+      "used":  "1754802659328"
+    }
+  },
+  "memory":  {
+    "total":  "202617098240"
+  },
+  "cpu":  {
+    "count":  32,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    }
+  ],
+  "cudaVersion":  "11.8"
+}

wandb/run-20241101_201708-b4wkk29o/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2024-11-01 20:17:08,110 INFO    MainThread:875622 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_setup.py:_flush():79] Configure stats pid to 875622
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'}
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241101_201708-b4wkk29o/logs/debug.log
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241101_201708-b4wkk29o/logs/debug-internal.log
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_init.py:init():621] calling init triggers
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_init.py:init():628] wandb.init called with sweep_config: {}
+config: {}
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_init.py:init():671] starting backend
+2024-11-01 20:17:08,111 INFO    MainThread:875622 [wandb_init.py:init():675] sending inform_init request
+2024-11-01 20:17:08,113 INFO    MainThread:875622 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-11-01 20:17:08,113 INFO    MainThread:875622 [wandb_init.py:init():688] backend started and connected
+2024-11-01 20:17:08,116 INFO    MainThread:875622 [wandb_init.py:init():783] updated telemetry
+2024-11-01 20:17:08,142 INFO    MainThread:875622 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout
+2024-11-01 20:17:09,975 INFO    MainThread:875622 [wandb_init.py:init():867] starting run threads in backend
+2024-11-01 20:17:10,065 INFO    MainThread:875622 [wandb_run.py:_console_start():2463] atexit reg
+2024-11-01 20:17:10,065 INFO    MainThread:875622 [wandb_run.py:_redirect():2311] redirect: wrap_raw
+2024-11-01 20:17:10,065 INFO    MainThread:875622 [wandb_run.py:_redirect():2376] Wrapping output streams.
+2024-11-01 20:17:10,065 INFO    MainThread:875622 [wandb_run.py:_redirect():2401] Redirects installed.
+2024-11-01 20:17:10,067 INFO    MainThread:875622 [wandb_init.py:init():911] run started, returning control to user process
+2024-11-01 20:17:10,067 INFO    MainThread:875622 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'shuffle_nondeterministic', 'train_set': '10M', 'batch_size': 3, 'epoch': 3, 'seed': 0, 'lr': 5e-06}

wandb/run-20241101_201708-b4wkk29o/run-b4wkk29o.wandb ADDED Viewed

Binary file (32.8 kB). View file

wandb/run-20241101_201926-5y6ulxig/files/output.log ADDED Viewed

	@@ -0,0 +1,13 @@

+Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.41s/it]
+tokenized_valid: Dataset({
+    features: ['input_ids', 'attention_mask'],
+    num_rows: 600
+})
+/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2024-11-01 20:19:34,030] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+[2024-11-01 20:19:43,157] [INFO] [comm.py:652:init_distributed] cdb=None
+Installed CUDA version 11.8 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination
+Using /home/chunhui/.cache/torch_extensions/py39_cu117 as PyTorch extensions root...
+Loading extension module cpu_adam...
+Time to load cpu_adam op: 5.544436693191528 seconds

wandb/run-20241101_201926-5y6ulxig/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,147 @@

+funcsigs==1.0.2
+sentry-sdk==2.17.0
+multiprocess==0.70.16
+numpy==1.26.2
+pluralizer==1.2.0
+debugpy==1.6.7
+nvidia-cudnn-cu11==8.5.0.96
+deepspeed==0.15.2
+data==0.4
+pandas==2.1.3
+tomli==2.0.1
+charset-normalizer==3.3.2
+attrs==24.2.0
+aiosignal==1.3.1
+fsspec==2023.10.0
+nvidia-cusparse-cu11==11.7.4.91
+zipp==3.12.0
+mypy-extensions==1.0.0
+datasets==3.0.1
+joblib==1.3.2
+hjson==3.1.0
+traitlets==5.7.1
+stack-data==0.6.0
+transformers==4.45.1
+sympy==1.11.1
+Pygments==2.15.0
+docker-pycreds==0.4.0
+dill==0.3.8
+wheel==0.44.0
+prompt-toolkit==3.0.30
+parso==0.8.3
+ipykernel==6.23.1
+pyarrow==17.0.0
+certifi==2023.11.17
+nvidia-cufft-cu11==10.9.0.58
+six==1.16.0
+pydantic==2.9.2
+click==8.1.7
+nest-asyncio==1.5.6
+gmpy2==2.1.0
+matplotlib==3.8.2
+scipy==1.11.4
+typing_extensions==4.12.2
+statsmodels==0.14.0
+huggingface-hub==0.25.0
+frozenlist==1.4.1
+gpustat==1.1.1
+nvidia-nvtx-cu11==11.7.91
+safetensors==0.4.5
+stanza==1.9.2
+decorator==5.1.1
+seaborn==0.13.0
+sentencepiece==0.2.0
+PyYAML==6.0.1
+black==24.8.0
+protobuf==4.25.1
+pickleshare==0.7.5
+peft==0.13.0
+triton==2.0.0
+nvidia-cuda-runtime-cu11==11.7.99
+Jinja2==3.1.2
+nvidia-cusolver-cu11==11.4.0.1
+executing==1.2.0
+jupyter_client==8.1.0
+pluggy==1.3.0
+cmake==3.30.3
+pytz==2023.3.post1
+aiohappyeyeballs==2.4.2
+kiwisolver==1.4.5
+py-cpuinfo==9.0.0
+Pillow==10.1.0
+ptyprocess==0.7.0
+importlib_resources==6.4.5
+GitPython==3.1.43
+importlib-metadata==6.0.0
+iniconfig==2.0.0
+scikit-learn==1.3.2
+exceptiongroup==1.1.0
+networkx==2.8.6
+accelerate==1.0.0
+nltk==3.8.1
+shutilwhich==1.1.0
+fonttools==4.45.1
+future==0.18.3
+aiohttp==3.10.6
+wcwidth==0.2.5
+idna==3.6
+filelock==3.12.2
+pathspec==0.12.1
+jupyter_core==5.1.0
+lit==18.1.8
+nvidia-curand-cu11==10.2.10.91
+nvidia-cublas-cu11==11.10.3.66
+nvidia-ml-py==12.560.30
+msgpack==1.1.0
+python-dateutil==2.8.2
+blessed==1.20.0
+packaging==23.0
+gitdb==4.0.11
+yarl==1.13.0
+emoji==2.8.0
+tzdata==2023.3
+cycler==0.12.1
+tornado==6.2
+backcall==0.2.0
+plotnine==0.12.4
+ninja==1.11.1.1
+latex==0.7.0
+wandb==0.18.5
+setproctitle==1.3.3
+threadpoolctl==3.2.0
+requests==2.32.3
+pyparsing==3.1.1
+smmap==5.0.1
+pyzmq==23.0.0
+async-timeout==4.0.3
+annotated-types==0.7.0
+matplotlib-inline==0.1.6
+latexcodec==1.0.0
+ipython==8.0.0
+patsy==0.5.3
+contourpy==1.2.0
+multidict==6.1.0
+mizani==0.9.3
+urllib3==2.1.0
+tokenizers==0.20.0
+MarkupSafe==2.1.2
+pip==24.2
+pexpect==4.8.0
+tqdm==4.66.5
+jedi==0.18.2
+pydantic_core==2.23.4
+tempdir==0.7.1
+mpmath==1.2.1
+setuptools==72.1.0
+pytest==7.4.3
+pure-eval==0.2.2
+psutil==5.9.1
+comm==0.1.2
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+regex==2023.10.3
+platformdirs==2.5.2
+asttokens==2.2.1
+torch==2.0.0
+nvidia-nccl-cu11==2.14.3
+xxhash==3.5.0

wandb/run-20241101_201926-5y6ulxig/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "os":  "Linux-5.4.0-162-generic-x86_64-with-glibc2.31",
+  "python":  "3.9.19",
+  "startedAt":  "2024-11-02T00:19:26.870793Z",
+  "args":  [
+    "--perturbation",
+    "shuffle_nondeterministic",
+    "--train_set",
+    "10M",
+    "--batch_size",
+    "3",
+    "--epoch",
+    "3",
+    "--seed",
+    "0"
+  ],
+  "program":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py",
+  "codePath":  "train/train_deep_wandb.py",
+  "git":  {
+    "remote":  "git@hf.co:Yaning1001/Impossible_llm.git",
+    "commit":  "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4"
+  },
+  "email":  "yaning1001@gmail.com",
+  "root":  "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train",
+  "host":  "mms-large-2",
+  "username":  "chunhui",
+  "executable":  "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python",
+  "codePathLocal":  "train_deep_wandb.py",
+  "cpu_count":  32,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA RTX A6000",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "1888559353856",
+      "used":  "1754803675136"
+    }
+  },
+  "memory":  {
+    "total":  "202617098240"
+  },
+  "cpu":  {
+    "count":  32,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    },
+    {
+      "name":  "NVIDIA RTX A6000",
+      "memoryTotal":  "51527024640",
+      "cudaCores":  10752,
+      "architecture":  "Ampere"
+    }
+  ],
+  "cudaVersion":  "11.8"
+}