diff --git a/.gitattributes b/.gitattributes index 0d931251c7c39c44439a9ede45ff0242e6666184..18972b9ac4784437c2764dfcb34198a6a608b58a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -35,3 +35,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text Finetune-MULTI/wandb/run-20260324_040756-vgxlb14a/run-vgxlb14a.wandb filter=lfs diff=lfs merge=lfs -text Finetune-MULTI/wandb/run-20260324_041355-7nzklvmz/run-7nzklvmz.wandb filter=lfs diff=lfs merge=lfs -text +Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/run-qaue51mb.wandb filter=lfs diff=lfs merge=lfs -text +Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/run-md2c6b0g.wandb filter=lfs diff=lfs merge=lfs -text +Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/run-f0nbbv1z.wandb filter=lfs diff=lfs merge=lfs -text +Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/run-ljb653t8.wandb filter=lfs diff=lfs merge=lfs -text +Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/run-pfwf0zeu.wandb filter=lfs diff=lfs merge=lfs -text +Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/run-qxvbi54v.wandb filter=lfs diff=lfs merge=lfs -text +Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/run-oww9zr78.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/Finetune-GenomicBenchmarks/.gitattributes b/Finetune-GenomicBenchmarks/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/Finetune-GenomicBenchmarks/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/Finetune-GenomicBenchmarks/__pycache__/tokenization_dna.cpython-39.pyc b/Finetune-GenomicBenchmarks/__pycache__/tokenization_dna.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..325d61d10ac18a842be343c6163c536df237902d Binary files /dev/null and b/Finetune-GenomicBenchmarks/__pycache__/tokenization_dna.cpython-39.pyc differ diff --git a/Finetune-GenomicBenchmarks/__pycache__/tokenization_motif.cpython-39.pyc b/Finetune-GenomicBenchmarks/__pycache__/tokenization_motif.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5f8b55917d883c9cfaf78ab7379b4c5a04232de9 Binary files /dev/null and b/Finetune-GenomicBenchmarks/__pycache__/tokenization_motif.cpython-39.pyc differ diff --git a/Finetune-GenomicBenchmarks/data_download.py b/Finetune-GenomicBenchmarks/data_download.py new file mode 100644 index 0000000000000000000000000000000000000000..d2fe14fb02e7bd1b3d50fcf444820ba380404749 --- /dev/null +++ b/Finetune-GenomicBenchmarks/data_download.py @@ -0,0 +1,23 @@ +from datasets import load_dataset +from pathlib import Path + +datasets = [ + "demo_human_or_worm", + "dummy_mouse_enhancers_ensembl", + "human_enhancers_ensembl", + "human_nontata_promoters", + "demo_coding_vs_intergenomic_seqs", + "drosophila_enhancers_stark", + "human_enhancers_cohn", + "human_ensembl_regulatory", + "human_ocr_ensembl", +] + +out_root = Path("hf_raw") +out_root.mkdir(parents=True, exist_ok=True) + +for name in datasets: + hf_id = f"katarinagresova/Genomic_Benchmarks_{name}" + ds = load_dataset(hf_id) # downloads to the HF cache + ds.save_to_disk(out_root / name) # optional: persist locally for reuse + print(f"downloaded {hf_id}") diff --git a/Finetune-GenomicBenchmarks/data_split.py b/Finetune-GenomicBenchmarks/data_split.py new file mode 100644 index 0000000000000000000000000000000000000000..9fd44708064bf90768d39a87b5536c1ad7901797 --- /dev/null +++ b/Finetune-GenomicBenchmarks/data_split.py @@ -0,0 +1,28 @@ +from datasets import load_from_disk, concatenate_datasets +from pathlib import Path +import pandas as pd + +src_root = Path("hf_raw") +dst_root = Path("ft_data") +seed = 42 + +for ds_dir in src_root.iterdir(): + if not ds_dir.is_dir(): + continue + ds = load_from_disk(ds_dir) + + # Combine all available splits, shuffle, then 80/10/10 + full = concatenate_datasets([ds[s] for s in ds.keys()]) + full = full.shuffle(seed=seed) + split1 = full.train_test_split(test_size=0.2, seed=seed) + train = split1["train"] + split2 = split1["test"].train_test_split(test_size=0.5, seed=seed) + dev, test = split2["train"], split2["test"] + + out = dst_root / ds_dir.name / "split" + out.mkdir(parents=True, exist_ok=True) + for name, subset in [("train", train), ("dev", dev), ("test", test)]: + subset.to_pandas()[["seq", "label"]].rename( + columns={"seq": "sequence", "label": "labels"} + ).to_csv(out / f"{name}.csv", sep="\t", index=False) + print(f"Wrote {out/f'{name}.csv'}") diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2.log b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2.log new file mode 100644 index 0000000000000000000000000000000000000000..43e96cab14c76e533ffddde22d27ac555807ad86 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2.log @@ -0,0 +1,1793 @@ +nohup: ignoring input +[RUN ] base5120_demo_coding_vs_intergenomic_seqs_lr3e-5_wd0.0_wr0.06_ep3_seed42 +wandb: Appending key for api.wandb.ai to your netrc file: /home/nanhuang/.netrc +wandb: Currently logged in as: n5huang (n5huang-uc-san-diego) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/utils/generic.py:441: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + _torch_pytree._register_pytree_node( +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/utils/generic.py:309: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + _torch_pytree._register_pytree_node( +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/utils/generic.py:309: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + _torch_pytree._register_pytree_node( +wandb: setting up run qaue51mb +wandb: Tracking run with wandb version 0.23.1 +wandb: Run data is saved locally in /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run misunderstood-snowball-1 +wandb: ⭐️ View project at https://wandb.ai/n5huang-uc-san-diego/genomic_bench_DNAbert2 +wandb: 🚀 View run at https://wandb.ai/n5huang-uc-san-diego/genomic_bench_DNAbert2/runs/qaue51mb +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'classifier.bias', 'bert.pooler.dense.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 80,000 + Num Epochs = 3 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 1,875 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/1875 [00:00 + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 355, in forward + attention_probs = nn.functional.softmax(attention_scores, dim=-1) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 2137, in softmax + ret = input.softmax(dim) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 46.32 GiB memory in use. Of the allocated memory 44.69 GiB is allocated by PyTorch, and 1.31 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 355, in forward + attention_probs = nn.functional.softmax(attention_scores, dim=-1) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 2137, in softmax + ret = input.softmax(dim) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 46.32 GiB memory in use. Of the allocated memory 44.69 GiB is allocated by PyTorch, and 1.31 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +wandb: +wandb: 🚀 View run pleasant-sun-6 at:  +wandb: Find logs at: wandb/run-20260324_052750-4zki9d15/logs diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/config.json new file mode 100644 index 0000000000000000000000000000000000000000..45e4c6c10a6211acf374c78e8078ab7ac74985f9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.35.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 4096 +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/model.safetensors b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6675fbc2734adb77fa626e76fda9965205061ea6 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021ad14b2f38dbff7f8eeea18f7132f657a60426d38ab02c696c2744fa19ccf6 +size 356777880 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/optimizer.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5890dfef25264bbfc925c285678c0d33ca1ed401 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96fa9e420b35ab5abb870bfc4fbcf26b1e0681ccb0c1bd1cdd4dfdc78bb41f50 +size 713677451 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/rng_state.pth b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85e8d4159b9fafa99dc8a9113177d4b797dabdfd --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52f47a8b0e975cf59e02cf3ebc58d149a406e70a9b82323945324cc788745b08 +size 14645 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/scheduler.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9345c28629ce44813b14cb461f141165daea984 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c44eacc41015ff6a31c3be859e9d08482db72ae3540b67fdb6bec6cf82b0a8 +size 1465 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/special_tokens_map.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8b3208c2884c4efb86e49300fdd3dc877220cdf --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "unk_token": "[UNK]" +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/tokenizer.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c390760bdfa97b696a762628a15dd3bf7932038a --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/tokenizer.json @@ -0,0 +1,8340 @@ +{ + "version": "1.0", + "truncation": { + "direction": "Right", + "max_length": 100, + "strategy": "LongestFirst", + "stride": 0 + }, + "padding": { + "strategy": "BatchLongest", + "direction": "Right", + "pad_to_multiple_of": null, + "pad_id": 3, + "pad_type_id": 0, + "pad_token": "[PAD]" + }, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 1 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 2 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "vocab": { + "[UNK]": 0, + "[CLS]": 1, + "[SEP]": 2, + "[PAD]": 3, + "[MASK]": 4, + "A": 5, + "C": 6, + "G": 7, + "T": 8, + "AA": 9, + "TT": 10, + "TG": 11, + "CA": 12, + "CC": 13, + "TA": 14, + "GG": 15, + "TC": 16, + "GA": 17, + "AAA": 18, + "GC": 19, + "TAA": 20, + "TTTT": 21, + "TCA": 22, + "TGA": 23, + "TTA": 24, + "GAA": 25, + "TCC": 26, + "CAA": 27, + "CTG": 28, + "CTT": 29, + "GTG": 30, + "GTT": 31, + "GCA": 32, + "GGA": 33, + "CCA": 34, + "GTA": 35, + "GCC": 36, + "CTA": 37, + "TAAA": 38, + "AAAA": 39, + "CTC": 40, + "GTC": 41, + "TGTG": 42, + "TATT": 43, + "CACA": 44, + "GAAA": 45, + "TATA": 46, + "TCTT": 47, + "TGTT": 48, + "CAAA": 49, + "GAGA": 50, + "CATT": 51, + "TGAA": 52, + "CAGG": 53, + "TCTG": 54, + "CAGA": 55, + "TCAA": 56, + "GGAA": 57, + "TAAAA": 58, + "CTGA": 59, + "GCTT": 60, + "GTGA": 61, + "GCTG": 62, + "CTCA": 63, + "CCTT": 64, + "CATG": 65, + "GCAA": 66, + "GTCA": 67, + "GTAA": 68, + "TTTTA": 69, + "TATG": 70, + "GAGG": 71, + "CGG": 72, + "GATT": 73, + "CCTG": 74, + "TCTC": 75, + "CCAA": 76, + "GTTA": 77, + "CTCC": 78, + "CTAA": 79, + "TACA": 80, + "CTTA": 81, + "TCCA": 82, + "GATG": 83, + "TTAA": 84, + "GAAAA": 85, + "TTTG": 86, + "GTTTT": 87, + "TCTA": 88, + "GCCA": 89, + "GTCC": 90, + "CTTTT": 91, + "GGGG": 92, + "CGA": 93, + "TTTA": 94, + "CCCA": 95, + "CAAAA": 96, + "TGGG": 97, + "TAGA": 98, + "TAGG": 99, + "GACA": 100, + "GGTT": 101, + "CCCC": 102, + "GGTG": 103, + "CATA": 104, + "GCTA": 105, + "TGTA": 106, + "TCAAA": 107, + "TGGA": 108, + "TAATT": 109, + "TTATT": 110, + "TGCA": 111, + "GGCA": 112, + "GATA": 113, + "CCTA": 114, + "TTCA": 115, + "TCTCA": 116, + "GGGA": 117, + "CGC": 118, + "CTGAA": 119, + "GTAAA": 120, + "TCTCC": 121, + "TTTTTT": 122, + "CGTG": 123, + "GCAAA": 124, + "TAAAAA": 125, + "TCTGA": 126, + "TCATT": 127, + "GGAAA": 128, + "TGAAA": 129, + "TCCTT": 130, + "CCAAA": 131, + "GAATT": 132, + "CTAAA": 133, + "CGTT": 134, + "GTGAA": 135, + "GGCC": 136, + "TAATA": 137, + "GGTA": 138, + "TGCC": 139, + "CACC": 140, + "TGATT": 141, + "AAAAAA": 142, + "GCTCA": 143, + "TCCAA": 144, + "GAGAA": 145, + "CTGTT": 146, + "TATTA": 147, + "CAGCA": 148, + "CTCTT": 149, + "CTTAA": 150, + "CAGAA": 151, + "GCTGA": 152, + "GTTAA": 153, + "TCTTA": 154, + "TATTTT": 155, + "GCCAA": 156, + "CTTTG": 157, + "GACC": 158, + "CGCA": 159, + "GTATT": 160, + "GTCTT": 161, + "CAATT": 162, + "GTGTT": 163, + "CTCAA": 164, + "GGAGG": 165, + "CGAA": 166, + "TCTTTT": 167, + "GTCAA": 168, + "CGCC": 169, + "TATAA": 170, + "TACC": 171, + "TCTAA": 172, + "CCATT": 173, + "CGGA": 174, + "CAAAAA": 175, + "CAGTG": 176, + "TCCTG": 177, + "CTCTG": 178, + "GAAAAA": 179, + "CTGTG": 180, + "CAGC": 181, + "TTTTAA": 182, + "GCATT": 183, + "GCCTT": 184, + "TAATG": 185, + "CTATT": 186, + "GTTTG": 187, + "TGATG": 188, + "GGCTG": 189, + "CCTCA": 190, + "GAGGA": 191, + "GCCTG": 192, + "AAATT": 193, + "CGTA": 194, + "TCAAAA": 195, + "TACAA": 196, + "CATCA": 197, + "CAGTT": 198, + "TGAGA": 199, + "GGGAA": 200, + "CACTG": 201, + "CACAA": 202, + "CAGGA": 203, + "CCCCA": 204, + "CCCTG": 205, + "TTTTTTTT": 206, + "TAGAA": 207, + "GAGCA": 208, + "CCTCC": 209, + "CACCA": 210, + "TATCA": 211, + "GAGC": 212, + "CATTA": 213, + "CACACACA": 214, + "GAGTG": 215, + "GGATT": 216, + "TGTGTGTG": 217, + "TACTT": 218, + "CACTT": 219, + "GTCTG": 220, + "TGAGG": 221, + "GAGTT": 222, + "GAATG": 223, + "TCATG": 224, + "GACAA": 225, + "GACTT": 226, + "TATTAA": 227, + "TAATAA": 228, + "GGCCA": 229, + "CATTTT": 230, + "CAGCC": 231, + "CCCTT": 232, + "GCTAA": 233, + "TATATATA": 234, + "GTGTG": 235, + "TACTG": 236, + "TAGTT": 237, + "CAATG": 238, + "GCTC": 239, + "CAGTA": 240, + "GCTCC": 241, + "CATAA": 242, + "TTATG": 243, + "TAAATT": 244, + "GATGA": 245, + "CATGA": 246, + "GCGG": 247, + "AAAAAAAA": 248, + "CCATG": 249, + "GATAA": 250, + "GACTG": 251, + "TATGA": 252, + "GCAGG": 253, + "GATCA": 254, + "GTTTTA": 255, + "GGATG": 256, + "CCTGA": 257, + "GTAAAA": 258, + "GAAGG": 259, + "GATTA": 260, + "CCTC": 261, + "GACCA": 262, + "GCTTA": 263, + "CCCAA": 264, + "AAATG": 265, + "GCATG": 266, + "TAGTA": 267, + "TACCA": 268, + "GGCTT": 269, + "CGTC": 270, + "TCTCTT": 271, + "GGTCA": 272, + "TTATTA": 273, + "TACTA": 274, + "TAGCA": 275, + "TATC": 276, + "CTGGG": 277, + "CATC": 278, + "CTTTTA": 279, + "CTAAAA": 280, + "GTGGG": 281, + "GAGTA": 282, + "CCAGG": 283, + "GATTTT": 284, + "TAGTG": 285, + "GAAATT": 286, + "CACTA": 287, + "TCGG": 288, + "TCAGG": 289, + "CAGGAA": 290, + "GCAAAA": 291, + "CCTTA": 292, + "CATCC": 293, + "CTTGG": 294, + "TGTGAA": 295, + "TATTTG": 296, + "CCTAA": 297, + "CTATG": 298, + "GAGAAA": 299, + "GAGAGAGA": 300, + "GCTTTT": 301, + "TATAAA": 302, + "CAAGG": 303, + "TCTCTG": 304, + "TGTTAA": 305, + "TGTGTT": 306, + "GAGCC": 307, + "GACTA": 308, + "TATATT": 309, + "TAAAAAA": 310, + "TTTTTG": 311, + "GTATG": 312, + "CATTAA": 313, + "TAGGA": 314, + "TAGC": 315, + "GTTGG": 316, + "GAAGAA": 317, + "TAAATG": 318, + "TCTGTT": 319, + "CAGAAA": 320, + "CAAATT": 321, + "TAATTA": 322, + "TCTGTG": 323, + "TATCC": 324, + "TGAATT": 325, + "CTCCA": 326, + "GTGAAA": 327, + "GGCAA": 328, + "GGAGA": 329, + "GAAGA": 330, + "GGTGA": 331, + "GGGCA": 332, + "CCAAAA": 333, + "TCTCTCTC": 334, + "CTGCA": 335, + "CTTCTT": 336, + "TCTTAA": 337, + "CCCTA": 338, + "TGTGTG": 339, + "AAATA": 340, + "TGTTTG": 341, + "GGGTT": 342, + "GTGCTG": 343, + "GGAAAA": 344, + "GGGGA": 345, + "TCAGA": 346, + "CCTTTT": 347, + "GAAATG": 348, + "GCAGCA": 349, + "TCTGAA": 350, + "GGGTG": 351, + "CACATT": 352, + "TCTTTG": 353, + "GGGC": 354, + "TCCCA": 355, + "TCCATT": 356, + "CTGAAA": 357, + "CTTTA": 358, + "TCGA": 359, + "GTTTA": 360, + "CAACAA": 361, + "CTTCC": 362, + "GCCTCC": 363, + "TTAAA": 364, + "GCTCTG": 365, + "GTTTCA": 366, + "GGAGGA": 367, + "CGTGA": 368, + "CAGTC": 369, + "GAATA": 370, + "CAGAGA": 371, + "CCCTC": 372, + "CAAATG": 373, + "CTGCTG": 374, + "GATCC": 375, + "TTTTATT": 376, + "AAAATT": 377, + "TTATA": 378, + "TCAATT": 379, + "GGTAA": 380, + "GTTATT": 381, + "GCCAGG": 382, + "GGAGAA": 383, + "CATTTG": 384, + "TCACC": 385, + "CTCAAA": 386, + "GGTTA": 387, + "TCCAAA": 388, + "TCTATT": 389, + "GCAGA": 390, + "CTTCA": 391, + "TCATCA": 392, + "CGAGG": 393, + "TAACA": 394, + "GTTGTT": 395, + "CTTATT": 396, + "CGTCA": 397, + "TAAGA": 398, + "TAATTTT": 399, + "CTGTA": 400, + "TCCACA": 401, + "GCTGTG": 402, + "CGCTG": 403, + "TCTAAA": 404, + "GCGA": 405, + "CAATA": 406, + "CCACCA": 407, + "GAACA": 408, + "CGAAA": 409, + "CAGATT": 410, + "TCACA": 411, + "TTATTTT": 412, + "TCTCAA": 413, + "TGACA": 414, + "CTCCAA": 415, + "AAAAAAA": 416, + "TATATG": 417, + "TCCTCC": 418, + "TCACTT": 419, + "TCCAGG": 420, + "CAAGA": 421, + "GGCTA": 422, + "GTGGTG": 423, + "CGTAA": 424, + "CGAGA": 425, + "TGATA": 426, + "GGATTA": 427, + "CAACA": 428, + "CGATT": 429, + "TGAGAA": 430, + "CTCCTT": 431, + "CTCATT": 432, + "GTTAAA": 433, + "TCATA": 434, + "CCTCTG": 435, + "CTCTA": 436, + "GCTGAA": 437, + "CTGGA": 438, + "TAAGG": 439, + "CTTAAA": 440, + "TATTTA": 441, + "CCACA": 442, + "CCGG": 443, + "GTCAAA": 444, + "TGGAA": 445, + "CGGAA": 446, + "TGATGA": 447, + "GTTCA": 448, + "TAACAA": 449, + "GCTGTT": 450, + "TAAGAA": 451, + "CTGCC": 452, + "TTAATT": 453, + "CCAGA": 454, + "TCAGAA": 455, + "GTCATT": 456, + "CGCTT": 457, + "GATTAA": 458, + "CTGATT": 459, + "GCCACA": 460, + "GTAATT": 461, + "TCCAGA": 462, + "GCCAAA": 463, + "GTGATT": 464, + "TAAAATT": 465, + "CAAGAA": 466, + "CCACC": 467, + "TAATCC": 468, + "GTTCTT": 469, + "TCCATG": 470, + "GCTCTT": 471, + "TGCTG": 472, + "GGGTA": 473, + "TTACA": 474, + "GCCATT": 475, + "GCACA": 476, + "GCAATT": 477, + "TCCCTG": 478, + "TGTGA": 479, + "TCGAA": 480, + "GGACA": 481, + "GGAATT": 482, + "GTGGA": 483, + "CTTCTG": 484, + "TCCCC": 485, + "GCCCC": 486, + "CTTGA": 487, + "TAATGA": 488, + "TAAATA": 489, + "TATATA": 490, + "CTGCAA": 491, + "TCATTA": 492, + "GTATA": 493, + "TCCCCA": 494, + "CGTTA": 495, + "GCAGAA": 496, + "TGAGTT": 497, + "CTTTTTT": 498, + "CGATG": 499, + "CTTTCA": 500, + "AAAATG": 501, + "CAGGTT": 502, + "CTAATT": 503, + "CGCCA": 504, + "TGAAAAA": 505, + "GTTCC": 506, + "GTCCTT": 507, + "GTCCAA": 508, + "GTTTTTT": 509, + "CTCTGA": 510, + "GCGC": 511, + "GTTGA": 512, + "TGAATG": 513, + "CTATA": 514, + "GCAGTG": 515, + "CCTTAA": 516, + "TCACCA": 517, + "TCACTG": 518, + "GCCCTG": 519, + "TAACTT": 520, + "CAGATG": 521, + "GTAGG": 522, + "TCTATA": 523, + "GAGATT": 524, + "GTCTA": 525, + "TTTTAAA": 526, + "CACATG": 527, + "TGACC": 528, + "CACAAA": 529, + "GTGTA": 530, + "GGGAGG": 531, + "GCTTTG": 532, + "CAAAAAA": 533, + "GAGGAA": 534, + "GTTCTG": 535, + "TTTTTA": 536, + "GTCTCA": 537, + "GTTCAA": 538, + "TCGTG": 539, + "GCTTAA": 540, + "GCACC": 541, + "CTCCTG": 542, + "TAAATAAA": 543, + "CTACA": 544, + "CTTCCA": 545, + "TCCTCA": 546, + "CGCAA": 547, + "GAAAAAA": 548, + "GCCCA": 549, + "TCGTT": 550, + "GTAGA": 551, + "CTCTCA": 552, + "GTCCA": 553, + "TGACTT": 554, + "TCCCTT": 555, + "GCCATG": 556, + "CACACACACACACACA": 557, + "GTGATG": 558, + "CCTCTT": 559, + "GCCAGA": 560, + "TCCTA": 561, + "CGTTTT": 562, + "GTACA": 563, + "GCATA": 564, + "GAATTA": 565, + "TGTGTGTGTGTGTGTG": 566, + "CCCAGG": 567, + "GGTTTT": 568, + "TCAAAAA": 569, + "TCTATG": 570, + "CCATA": 571, + "TGACAA": 572, + "GGATA": 573, + "TCAGTG": 574, + "GTATTTT": 575, + "GAGATG": 576, + "GCGTG": 577, + "CGTCC": 578, + "TTAAAAA": 579, + "TAATCA": 580, + "CAATTA": 581, + "CCACTG": 582, + "CGGTT": 583, + "GTTGAA": 584, + "TGATTA": 585, + "CCTTTG": 586, + "CGGTG": 587, + "CAGGTG": 588, + "TCAATG": 589, + "CTGATG": 590, + "TCAGGA": 591, + "GTTTAA": 592, + "TATTAAA": 593, + "CTCTTA": 594, + "GCAGGA": 595, + "CTCTCC": 596, + "GAACC": 597, + "CTTTAA": 598, + "GGGCC": 599, + "GTATTA": 600, + "GCGCC": 601, + "CCAATT": 602, + "GCTAAA": 603, + "TGACTG": 604, + "GATTTG": 605, + "GATAAA": 606, + "TCAGCA": 607, + "GTTCCA": 608, + "GAAATA": 609, + "GACAAA": 610, + "GAGTC": 611, + "GCTATT": 612, + "TCACAA": 613, + "GAGGTT": 614, + "TAACC": 615, + "GAAGGA": 616, + "GCTCAA": 617, + "GAAAATT": 618, + "CCAGCA": 619, + "GTTTTAA": 620, + "GTGCC": 621, + "TGAGGA": 622, + "CATAAA": 623, + "GGTCC": 624, + "TCATTTT": 625, + "TATTTATT": 626, + "TAATAAA": 627, + "GCCTA": 628, + "CTTTTAA": 629, + "TAAGTG": 630, + "TAAGTA": 631, + "CTGGAA": 632, + "CACACA": 633, + "GACAGA": 634, + "CAACC": 635, + "GGGAAA": 636, + "CCAGAA": 637, + "TCAGTT": 638, + "TAACTA": 639, + "CTAAAAA": 640, + "TGGGTT": 641, + "TGAGTG": 642, + "TAAAATG": 643, + "TATATATATATATATA": 644, + "GCACTG": 645, + "GACTC": 646, + "TACAAA": 647, + "TAAAAAAA": 648, + "TCTACA": 649, + "GTTGTG": 650, + "TCGCC": 651, + "CCCAAA": 652, + "GTCATG": 653, + "CTGCTT": 654, + "GGAATG": 655, + "CTATTA": 656, + "GATATT": 657, + "TAGAAA": 658, + "GGCAGG": 659, + "GATGAA": 660, + "GTAGAA": 661, + "TCCTGA": 662, + "TAACTG": 663, + "GCTGGG": 664, + "GCAATG": 665, + "GCCCCA": 666, + "GTTTGA": 667, + "CATTTA": 668, + "GTGCA": 669, + "CTTGAA": 670, + "GTGGAA": 671, + "CTTCAA": 672, + "TAAATTA": 673, + "GTGGCA": 674, + "TCCTTA": 675, + "GGAAAAA": 676, + "TTTTTTA": 677, + "CCTGTG": 678, + "GTAATG": 679, + "GTGTTA": 680, + "CTAGG": 681, + "CAGGCTG": 682, + "GACACA": 683, + "GAAAAAAA": 684, + "TCGC": 685, + "GTAAAAA": 686, + "TGTTTA": 687, + "TCTCTA": 688, + "GTCCTG": 689, + "CCAGGA": 690, + "GAACAA": 691, + "TAAGTT": 692, + "TGAGCA": 693, + "GCTCCA": 694, + "TAAGCA": 695, + "CTCATG": 696, + "GTCTTA": 697, + "CCCACA": 698, + "CATATT": 699, + "GCCTCA": 700, + "CACTC": 701, + "CTTCTA": 702, + "TGATTTT": 703, + "TCGCA": 704, + "CCTGTT": 705, + "GAAGCA": 706, + "GCAAAAA": 707, + "GCGGA": 708, + "CCACAA": 709, + "GCGCA": 710, + "CATATA": 711, + "GACATT": 712, + "GTTCTA": 713, + "CAAAATT": 714, + "GAAAGAAA": 715, + "CCCGG": 716, + "TACACA": 717, + "CCAAAAA": 718, + "GAGGTG": 719, + "GGCTCA": 720, + "CAGTGA": 721, + "TCCCAA": 722, + "TATCTT": 723, + "TGAGTA": 724, + "TCGTA": 725, + "TTTTCTT": 726, + "GTGGGA": 727, + "GAGCTG": 728, + "CCCTCC": 729, + "TAGGTT": 730, + "TTAGG": 731, + "TAATATT": 732, + "CCAGCC": 733, + "CATCTT": 734, + "GTCTGA": 735, + "GTTTCC": 736, + "CCTGAA": 737, + "GGAGCA": 738, + "GAAAATG": 739, + "TCAGTA": 740, + "TAACCA": 741, + "GATGTT": 742, + "CTGTTA": 743, + "CATGTT": 744, + "GGCGG": 745, + "CATGTG": 746, + "GGGAGA": 747, + "CTTTGA": 748, + "TCTTTCTT": 749, + "AAAAAAAAA": 750, + "GGGGTG": 751, + "CTTTCC": 752, + "CTTGTT": 753, + "GCATTA": 754, + "CCCAGA": 755, + "CAAATA": 756, + "TCGGA": 757, + "CAGCTT": 758, + "TCACTA": 759, + "TAATTAA": 760, + "TAAGGA": 761, + "GAACTG": 762, + "GCACAA": 763, + "GCGTT": 764, + "GGCTC": 765, + "TCTTTTA": 766, + "CCTCCA": 767, + "GGCAAA": 768, + "CAGCTG": 769, + "CTACAA": 770, + "TACATT": 771, + "GCTATG": 772, + "CTTGTG": 773, + "GAGTCA": 774, + "GTTATG": 775, + "CTGCCA": 776, + "GTCTCC": 777, + "TGACCA": 778, + "CACCTG": 779, + "TATATTA": 780, + "TGATCA": 781, + "CAGCAA": 782, + "GATGTG": 783, + "GTCTTTT": 784, + "CTAGAA": 785, + "GCTACA": 786, + "CTGGGA": 787, + "GGGGTT": 788, + "CAAGTA": 789, + "CAAGGA": 790, + "CCCTCA": 791, + "TAGCC": 792, + "GTTGGA": 793, + "GCTATA": 794, + "TCTGAAA": 795, + "TATGTT": 796, + "CCCCTT": 797, + "GTTGTA": 798, + "CCCTGA": 799, + "TGACTA": 800, + "CAAGCA": 801, + "CAATAA": 802, + "GAACTT": 803, + "CATGAA": 804, + "CTTATG": 805, + "CTAATG": 806, + "TCTAAAA": 807, + "CCAATG": 808, + "GAAGTG": 809, + "CCTCAA": 810, + "CCCATT": 811, + "CAGTCA": 812, + "GAGAGAGAGAGAGAGA": 813, + "TATGTG": 814, + "GCAGTGA": 815, + "TCTCCTT": 816, + "TCCCAAA": 817, + "CCATTA": 818, + "CCAGTG": 819, + "GCATCA": 820, + "TCAAATT": 821, + "GATCTT": 822, + "GACAGG": 823, + "GGAGTG": 824, + "GTAGTA": 825, + "CAACTT": 826, + "GAAGTT": 827, + "CCCCTG": 828, + "TCTCAAA": 829, + "GGGTC": 830, + "GAGCTT": 831, + "TATGAAA": 832, + "TATGAA": 833, + "GACATG": 834, + "CAAGTG": 835, + "GATATA": 836, + "CATCTG": 837, + "CTGTGA": 838, + "TAATTTA": 839, + "GGCAGA": 840, + "GCGAA": 841, + "CCTAAA": 842, + "CCATCA": 843, + "CACTGA": 844, + "GGACTA": 845, + "GACGG": 846, + "CTCTTTT": 847, + "CTGTCA": 848, + "TCTCTCTCTCTCTCTC": 849, + "TTAATG": 850, + "GCAGCC": 851, + "CAAAAAAA": 852, + "GCACCA": 853, + "CTATTTT": 854, + "GAGCAA": 855, + "CTTGGA": 856, + "CTGGTG": 857, + "GAATAA": 858, + "TCCTTTT": 859, + "GAAGTA": 860, + "CAGTAA": 861, + "CAACCA": 862, + "CTGTAA": 863, + "TGATAA": 864, + "GCAGTT": 865, + "CACGG": 866, + "TAAATAA": 867, + "CTGTTTT": 868, + "CTACTA": 869, + "GCTCTA": 870, + "CGAAAA": 871, + "CAAGTT": 872, + "CTTGTA": 873, + "GAATGA": 874, + "GAGTGA": 875, + "GCCTGA": 876, + "GGTTTG": 877, + "CCCATG": 878, + "GGGGAA": 879, + "GAAGAAA": 880, + "TGTTA": 881, + "CAATTTT": 882, + "TATATTTT": 883, + "CTCAAAA": 884, + "GGTGGG": 885, + "CCGTG": 886, + "TATTTCA": 887, + "CCCCAA": 888, + "TATTTAA": 889, + "GGCTGA": 890, + "GGTGTG": 891, + "CATCAA": 892, + "CACTCA": 893, + "TCTCATT": 894, + "GAATTTT": 895, + "GAATCA": 896, + "CAGGAAA": 897, + "CATACA": 898, + "TATTTTA": 899, + "TTATAA": 900, + "GAGGAAA": 901, + "CATATG": 902, + "CTTTCTT": 903, + "CAACTG": 904, + "GGGCTG": 905, + "CCCCCA": 906, + "TTTGAAA": 907, + "CATTAAA": 908, + "CTTAAAA": 909, + "GACTGA": 910, + "CAATGA": 911, + "GGCACA": 912, + "CCAGTA": 913, + "GGATGA": 914, + "GTTTTTG": 915, + "GCATTTT": 916, + "GTGCCA": 917, + "GCAGTA": 918, + "GCCCTT": 919, + "TCGTC": 920, + "GAACTA": 921, + "GTGGTT": 922, + "GTGTGA": 923, + "GTGCTT": 924, + "CGCTA": 925, + "GTGTCA": 926, + "TCTTTA": 927, + "GCCTTA": 928, + "CCTATT": 929, + "CAAAATG": 930, + "GAACCA": 931, + "CTCCAGG": 932, + "GACTCA": 933, + "CATGAAA": 934, + "GCTAGG": 935, + "TGTTAAA": 936, + "GCGTA": 937, + "GCACTT": 938, + "TCTTAAA": 939, + "TAAGAAA": 940, + "GGCCTG": 941, + "TCCCTA": 942, + "GTGGTA": 943, + "CTGCTA": 944, + "GGAGTT": 945, + "GGTAAA": 946, + "CAAACAAA": 947, + "GATATG": 948, + "TCATGA": 949, + "GACCTT": 950, + "TAATATA": 951, + "GCTAGA": 952, + "GGACTG": 953, + "GGCATT": 954, + "CAGTTA": 955, + "CCCTAA": 956, + "CACCTT": 957, + "GGTGAA": 958, + "CAGCTA": 959, + "GTGTTTT": 960, + "CAACTA": 961, + "GATCAA": 962, + "GAGAAAA": 963, + "TGTGAAA": 964, + "AAAATA": 965, + "GATGAAA": 966, + "CTCTAA": 967, + "TTACTT": 968, + "GATCTG": 969, + "CCACTT": 970, + "GAGTTA": 971, + "CAATCA": 972, + "GGATTACAGG": 973, + "TTTATTTT": 974, + "TACATA": 975, + "TTTTATG": 976, + "GAGTAA": 977, + "GCTGAAA": 978, + "GTACTG": 979, + "GCTCTC": 980, + "TATGTA": 981, + "TGTGTA": 982, + "TCATAA": 983, + "GGACTT": 984, + "TCTCCAA": 985, + "GCATGA": 986, + "GACGA": 987, + "CGCCTG": 988, + "GACCTG": 989, + "GGTCTT": 990, + "CACCAA": 991, + "GATC": 992, + "GACCAA": 993, + "AAAATTA": 994, + "GTAAATT": 995, + "CCAGTT": 996, + "CAGAAAA": 997, + "TAACAAA": 998, + "GGTGTT": 999, + "GAAATTA": 1000, + "TGCCTCA": 1001, + "CCGCC": 1002, + "CCATTTT": 1003, + "CTTGCC": 1004, + "TCTGTA": 1005, + "CTGGCA": 1006, + "GGGATG": 1007, + "CCATGA": 1008, + "CTACTT": 1009, + "TAGGTG": 1010, + "TAAAAATT": 1011, + "GAAAGAA": 1012, + "TAAAATA": 1013, + "CTTTTTG": 1014, + "GTCAAAA": 1015, + "GGACAA": 1016, + "TCTGATT": 1017, + "CTCTCTT": 1018, + "TAATTTG": 1019, + "CTCTTTG": 1020, + "GGCCTT": 1021, + "GGATTTT": 1022, + "CTACTG": 1023, + "GTTGCA": 1024, + "GGCTCC": 1025, + "CTCTGTG": 1026, + "CTCCAGCC": 1027, + "TTACAA": 1028, + "GGACCA": 1029, + "GGAAGGAA": 1030, + "TAAAGAA": 1031, + "TTAGAA": 1032, + "GTGAAAA": 1033, + "CTTGCA": 1034, + "TGGGTG": 1035, + "GGAGCC": 1036, + "CCTCTA": 1037, + "CT": 1038, + "GGGCTT": 1039, + "GGCATG": 1040, + "CTGGTT": 1041, + "TACAGA": 1042, + "GATTAAA": 1043, + "CTCTGTT": 1044, + "TTATCA": 1045, + "CTGAAAA": 1046, + "GTAGTT": 1047, + "GGGTCA": 1048, + "GT": 1049, + "CAGCCA": 1050, + "GCGTC": 1051, + "CACTTA": 1052, + "GTGCTA": 1053, + "TCTTATT": 1054, + "GTACTT": 1055, + "GGTATT": 1056, + "TAGAGA": 1057, + "TACATG": 1058, + "CCACTA": 1059, + "TGAGAAA": 1060, + "CAATAAA": 1061, + "TCCAAAA": 1062, + "CGTGAA": 1063, + "GGTCTG": 1064, + "CTGAATT": 1065, + "TCAGCC": 1066, + "CCTCTC": 1067, + "GTTAAAA": 1068, + "GGGATT": 1069, + "TCCTAA": 1070, + "CACTAA": 1071, + "GGAGAAA": 1072, + "CCTTCCTT": 1073, + "GTTTCTT": 1074, + "TATCAA": 1075, + "GATACA": 1076, + "TAATCCCAGCA": 1077, + "CCGCA": 1078, + "TGAAATT": 1079, + "CGTAAA": 1080, + "CTCTCTG": 1081, + "TCTTTTTT": 1082, + "GTACAA": 1083, + "CCAAATT": 1084, + "TGTATTTT": 1085, + "TCGCTT": 1086, + "GGGTGA": 1087, + "GATAGA": 1088, + "CTTTATT": 1089, + "TAAACAA": 1090, + "GTTTATT": 1091, + "TGAATA": 1092, + "CTACCA": 1093, + "GTGTCC": 1094, + "CCCGA": 1095, + "TTTATTA": 1096, + "CTCCAAA": 1097, + "TTTTTTTTTTTT": 1098, + "TCATCC": 1099, + "GAAGCC": 1100, + "CTAAATT": 1101, + "CAAATTA": 1102, + "CCCCAAA": 1103, + "TCTTCTT": 1104, + "TAGGAAA": 1105, + "CACGA": 1106, + "CATTTTA": 1107, + "GTGCAA": 1108, + "TCTCCTG": 1109, + "TATTTTAA": 1110, + "GTTTGTT": 1111, + "GAGCCA": 1112, + "GGCCAA": 1113, + "CATTTCA": 1114, + "CATCCA": 1115, + "CCTATA": 1116, + "GACTTA": 1117, + "TCAAATG": 1118, + "GTATCA": 1119, + "TAAATTTT": 1120, + "CTGAGGCA": 1121, + "GCCCAA": 1122, + "GGTTAA": 1123, + "TATCTG": 1124, + "TGACAGA": 1125, + "GGAGAGA": 1126, + "GCTGCTG": 1127, + "CCCTTA": 1128, + "TCCTCTG": 1129, + "GTAGCA": 1130, + "CCTGAAA": 1131, + "CCGAA": 1132, + "TTTTTAA": 1133, + "CTATAA": 1134, + "CCTGTA": 1135, + "TTACTG": 1136, + "GTATAA": 1137, + "GGCGA": 1138, + "GACTAA": 1139, + "TCAGAAA": 1140, + "GTGTGTG": 1141, + "CAAAGAA": 1142, + "CCTATG": 1143, + "GCAGAGA": 1144, + "CCGTT": 1145, + "TTTTATTTT": 1146, + "GGAAGAA": 1147, + "TTACTA": 1148, + "GCCTGGG": 1149, + "TCCCTC": 1150, + "TCCTCTT": 1151, + "GGATCA": 1152, + "GGTCAA": 1153, + "TCGAGA": 1154, + "TATTCTT": 1155, + "TACTC": 1156, + "GTTAATT": 1157, + "GCGAGA": 1158, + "CTTAATT": 1159, + "TCCTTTG": 1160, + "GTCTAA": 1161, + "CACCCA": 1162, + "GGGTTA": 1163, + "GGGCAA": 1164, + "GGAAATG": 1165, + "GCAAATT": 1166, + "TAGATG": 1167, + "GCAGAAA": 1168, + "AAAAAAAAAAAAAAAA": 1169, + "CCTACA": 1170, + "GGAGTA": 1171, + "TCTAATT": 1172, + "CAACAAA": 1173, + "TAGATT": 1174, + "GGTTTA": 1175, + "CCTAGA": 1176, + "CTTTAAA": 1177, + "TACTTA": 1178, + "TAATGAA": 1179, + "CTATCA": 1180, + "TAGTAA": 1181, + "CAGAGAA": 1182, + "CAAGAAA": 1183, + "GGGGAAA": 1184, + "CGTTAA": 1185, + "CGTGTT": 1186, + "TCTGTCTG": 1187, + "TTTTAATT": 1188, + "CTGGCC": 1189, + "TAAATGA": 1190, + "CGTCAA": 1191, + "TTAGTA": 1192, + "GTCTCTG": 1193, + "TTTTAAAA": 1194, + "CAGTTTT": 1195, + "CTTCCTT": 1196, + "TATATAA": 1197, + "GCTTTTA": 1198, + "TTTTTCA": 1199, + "GGTC": 1200, + "TTATTAA": 1201, + "TTTTGTT": 1202, + "CATAGA": 1203, + "TAGGAA": 1204, + "GAGAGAA": 1205, + "GTAGCTG": 1206, + "TTATGA": 1207, + "GTAGTG": 1208, + "GGAGAGG": 1209, + "CTCTGAA": 1210, + "TAGTC": 1211, + "GACTCC": 1212, + "TCCCTCC": 1213, + "TAATGTT": 1214, + "CATCTA": 1215, + "GCCACCA": 1216, + "GTACTA": 1217, + "TGGGAAA": 1218, + "CGCCTT": 1219, + "GCCCGG": 1220, + "GGAGGAA": 1221, + "GTACCA": 1222, + "CGCAAA": 1223, + "CATAAAA": 1224, + "TAACATT": 1225, + "GCTAAAA": 1226, + "TCTTCTG": 1227, + "GCCAAAA": 1228, + "GTATGA": 1229, + "GTCTTTG": 1230, + "TACTGA": 1231, + "TCCCAGG": 1232, + "TTATTTA": 1233, + "TTAGTT": 1234, + "GGACC": 1235, + "TATAAAA": 1236, + "CAAACAA": 1237, + "CTTCTC": 1238, + "TCTATCTA": 1239, + "GAAATAA": 1240, + "GTGTAA": 1241, + "CTTTGTT": 1242, + "GATAAAA": 1243, + "GCCCAGG": 1244, + "GCGATT": 1245, + "AAAAAATT": 1246, + "TACAGG": 1247, + "GGCTAA": 1248, + "TAGCTT": 1249, + "GTCTCTA": 1250, + "CTCCTGA": 1251, + "GAATAAA": 1252, + "TTACCA": 1253, + "GGGACA": 1254, + "GCCACTG": 1255, + "GTTTAAA": 1256, + "GTCTGTG": 1257, + "TGACAAA": 1258, + "TACATTTT": 1259, + "GCCACC": 1260, + "TGTTTT": 1261, + "TAGCAA": 1262, + "TTATAAA": 1263, + "GACCCA": 1264, + "GCAGC": 1265, + "CAGACAGA": 1266, + "CACAAAA": 1267, + "GCCCTA": 1268, + "TATTAAAA": 1269, + "CGTATT": 1270, + "CCATCC": 1271, + "TCGATT": 1272, + "GAAGGAA": 1273, + "GATCCA": 1274, + "TATTTGA": 1275, + "GTGAATT": 1276, + "TACCTT": 1277, + "CGTCTT": 1278, + "CCTAGG": 1279, + "TCGAAA": 1280, + "CTTTCTG": 1281, + "TGAAGAA": 1282, + "TCTCTCA": 1283, + "GTCTCTT": 1284, + "GGAGGGG": 1285, + "GTCTGTT": 1286, + "CTATGA": 1287, + "GGAAATT": 1288, + "GCACACA": 1289, + "GCCTTTT": 1290, + "CAGTCC": 1291, + "CTGGTA": 1292, + "GCATCC": 1293, + "TAGTTA": 1294, + "GGCTTA": 1295, + "GAGTCC": 1296, + "TGAAAA": 1297, + "TAGATAGA": 1298, + "TGTTTGTT": 1299, + "TACTCA": 1300, + "CATTTAA": 1301, + "GATTTTA": 1302, + "CACTCC": 1303, + "GAAACAA": 1304, + "GCGCTG": 1305, + "TCTTTCA": 1306, + "CTGTCC": 1307, + "GAACTCA": 1308, + "CGGAAA": 1309, + "TATTGTT": 1310, + "GCACTA": 1311, + "TATTCAA": 1312, + "GCGGGG": 1313, + "GTGGCC": 1314, + "TAATTAAA": 1315, + "TACTAA": 1316, + "GCGGTG": 1317, + "TACCAA": 1318, + "GGTATA": 1319, + "CTAGTT": 1320, + "GCAGAGG": 1321, + "CTTTTTTTT": 1322, + "TTTTTTTTTTTTTTTT": 1323, + "TACAGTA": 1324, + "CCATGTT": 1325, + "TAGTGA": 1326, + "CGTGTG": 1327, + "GCTCTGA": 1328, + "CTTCCTG": 1329, + "TCGCTG": 1330, + "TAAATCA": 1331, + "TCCAATT": 1332, + "GTTTCTG": 1333, + "GAAGAGA": 1334, + "GGGTAA": 1335, + "CCATAA": 1336, + "TTATATT": 1337, + "CGAATT": 1338, + "CCGGA": 1339, + "TGAGCC": 1340, + "CCGTA": 1341, + "CAGAGGA": 1342, + "GTGTTTG": 1343, + "GACAAAA": 1344, + "TTTTTTAAA": 1345, + "GTTGCC": 1346, + "GAGTTTT": 1347, + "TCAAAAAA": 1348, + "TGTTTCA": 1349, + "TATCTA": 1350, + "TCTCTCC": 1351, + "CTCCACA": 1352, + "TAAATATT": 1353, + "TTTTCTG": 1354, + "CTCTCAA": 1355, + "CCTTAAA": 1356, + "TCTTTTAA": 1357, + "GAACAAA": 1358, + "TTAGCA": 1359, + "GCTCATG": 1360, + "TAAAGTA": 1361, + "GGATAA": 1362, + "TTATTAAA": 1363, + "CTCCATT": 1364, + "TCTCTGA": 1365, + "TTATTTG": 1366, + "CCTGTAA": 1367, + "TTATATA": 1368, + "GACTTTT": 1369, + "TGTTGTT": 1370, + "GCAAATG": 1371, + "CTTCAAA": 1372, + "GAATATT": 1373, + "GAATCC": 1374, + "CTCTTAA": 1375, + "GCATAA": 1376, + "GAATGAA": 1377, + "CTTAAAAA": 1378, + "TAAAAATG": 1379, + "TTTTAAAAA": 1380, + "CTCTGGG": 1381, + "TGATCC": 1382, + "GCTCTCA": 1383, + "CTCCAGA": 1384, + "GAGTGCAGTG": 1385, + "CAATATT": 1386, + "TAGAAAA": 1387, + "GTAAATG": 1388, + "TAGCTG": 1389, + "GCTCAAA": 1390, + "GCAGGAA": 1391, + "TACCTG": 1392, + "GGGAAAA": 1393, + "TTTTCTA": 1394, + "GGGGGGGG": 1395, + "CCGA": 1396, + "CTTTGAA": 1397, + "GGAGGTG": 1398, + "TAGTCA": 1399, + "GGCCCA": 1400, + "TGATGTT": 1401, + "CAAATAA": 1402, + "TCTTCCA": 1403, + "GCGCTT": 1404, + "GTATTTG": 1405, + "GTCTC": 1406, + "GAAATCA": 1407, + "TGATAAA": 1408, + "CATTCTT": 1409, + "TATCCA": 1410, + "GCCTCTG": 1411, + "TGAGATG": 1412, + "CGCCAA": 1413, + "GTTTTATT": 1414, + "TATATATT": 1415, + "GTAGGA": 1416, + "GACAGAA": 1417, + "CTCCAGCCTGGG": 1418, + "GCGTGA": 1419, + "GGTATG": 1420, + "GAGGGAGG": 1421, + "TCATTTG": 1422, + "CTACC": 1423, + "TACAGAA": 1424, + "GGTAGA": 1425, + "GATCTA": 1426, + "GTCCATG": 1427, + "TGAGGAA": 1428, + "TAATAAAA": 1429, + "TAAACTT": 1430, + "TCACATT": 1431, + "GGAGGCC": 1432, + "TCACAAA": 1433, + "CACTTTT": 1434, + "CGGCC": 1435, + "CAACAGA": 1436, + "GTAGAGA": 1437, + "GTTATTTT": 1438, + "CGTTTG": 1439, + "TCGTCA": 1440, + "TCTGCTG": 1441, + "CAACACA": 1442, + "GGTAGG": 1443, + "GCAGCTG": 1444, + "TAGTAGAGA": 1445, + "CAAGCC": 1446, + "GCATTTG": 1447, + "TAATATG": 1448, + "GCTTAAA": 1449, + "GCTTCTG": 1450, + "CTCTCCA": 1451, + "TCATCTT": 1452, + "CGTCTG": 1453, + "TCATTTA": 1454, + "CATAGG": 1455, + "GCTCCTT": 1456, + "TGTTCTT": 1457, + "TACATTA": 1458, + "CACAGAA": 1459, + "TAAATATA": 1460, + "TAGAGG": 1461, + "GATAGG": 1462, + "TCCTGAA": 1463, + "GGAGCTG": 1464, + "TGATATT": 1465, + "TCATTAA": 1466, + "CTTTTAAA": 1467, + "TCGTTA": 1468, + "TAAACTA": 1469, + "GTTTGAA": 1470, + "TAAAATTA": 1471, + "CACCCC": 1472, + "TCAGAGA": 1473, + "CTCCTGCCTCA": 1474, + "TGACATT": 1475, + "GTATTTA": 1476, + "CTTCATT": 1477, + "GAAACTG": 1478, + "TAACACA": 1479, + "GTTCAAA": 1480, + "GGAGATG": 1481, + "TCGGCC": 1482, + "CAGCATT": 1483, + "TCGATG": 1484, + "TATTCTA": 1485, + "CTGTGAA": 1486, + "TATTGAA": 1487, + "TTTTCCA": 1488, + "TATTTCTT": 1489, + "GGTGAAA": 1490, + "CTGAGAA": 1491, + "GCACAGA": 1492, + "GCGAGG": 1493, + "CTGTGTG": 1494, + "TGAAATG": 1495, + "TGATGAA": 1496, + "GTCCAAA": 1497, + "CTCAATT": 1498, + "TCCAGAA": 1499, + "GTATATA": 1500, + "TAAAGTT": 1501, + "TCTCAAAA": 1502, + "TCCATCA": 1503, + "GTCTGAA": 1504, + "TGAGAGA": 1505, + "TGATTTG": 1506, + "TTAGCC": 1507, + "CTCCATG": 1508, + "TCCCTGA": 1509, + "GAGCTA": 1510, + "CCCCCCCC": 1511, + "GTGGAAA": 1512, + "CTGGGAA": 1513, + "CAATGAA": 1514, + "CCACACA": 1515, + "CTTTCAA": 1516, + "CGGAGG": 1517, + "TCGTGA": 1518, + "CCAGAAA": 1519, + "GTTTTAAA": 1520, + "TGTTGAA": 1521, + "TCCTGTG": 1522, + "CTAAATG": 1523, + "TCCTTTA": 1524, + "GTCTGGG": 1525, + "TCTCTTTT": 1526, + "TACGG": 1527, + "TATTGTA": 1528, + "TTAGTG": 1529, + "TTACC": 1530, + "TAATCCCAGCACTTTG": 1531, + "TCTGGAA": 1532, + "CTTCTCA": 1533, + "CGCATT": 1534, + "TATTTAAA": 1535, + "TCACACA": 1536, + "TAATCAA": 1537, + "GCGAAA": 1538, + "GGGCCA": 1539, + "GTTCATT": 1540, + "GAGAAAAA": 1541, + "TTTTGTA": 1542, + "TACTTTT": 1543, + "TCGAGG": 1544, + "GTGAAAAA": 1545, + "CAATATA": 1546, + "TCCCATG": 1547, + "CAATTAA": 1548, + "CTGGAAA": 1549, + "CCCAGCA": 1550, + "TCCCATT": 1551, + "TCCTGTT": 1552, + "CTCTTTA": 1553, + "TCCCCTT": 1554, + "GTTTCAA": 1555, + "GTCCAGG": 1556, + "GGAAGGA": 1557, + "TAGTTTT": 1558, + "TGACCTT": 1559, + "GTGCTGGGATTACAGG": 1560, + "TATTTATA": 1561, + "TCTGCAA": 1562, + "CTGAAAAA": 1563, + "TATGTTA": 1564, + "CTTCACA": 1565, + "GCACAGG": 1566, + "CCTGCTG": 1567, + "TTTTTTAA": 1568, + "GTTATTA": 1569, + "CCCTTTT": 1570, + "TGATTTA": 1571, + "TACAAAA": 1572, + "TAAGTAA": 1573, + "TTTTTAAA": 1574, + "CATCTC": 1575, + "GTGGTGA": 1576, + "GTGGAGA": 1577, + "CTCTGCA": 1578, + "GTTAAAAA": 1579, + "TACATACA": 1580, + "CTTTGTG": 1581, + "GGACACA": 1582, + "TCTGATG": 1583, + "TATTATT": 1584, + "TCTTCTA": 1585, + "CTGTGTT": 1586, + "TCAGCTT": 1587, + "CTTTATA": 1588, + "GGCGC": 1589, + "TCCCTCA": 1590, + "GTACC": 1591, + "TGGAGAA": 1592, + "CAAAAATT": 1593, + "TCTTTAA": 1594, + "CTCTCTC": 1595, + "TGAGTGA": 1596, + "GCAGCTT": 1597, + "CGGATT": 1598, + "TACGA": 1599, + "TCTTGTT": 1600, + "TCGTAA": 1601, + "GCCTGTG": 1602, + "TATTCTG": 1603, + "GGGATA": 1604, + "GGGTCC": 1605, + "TGAGATT": 1606, + "CTTTTATT": 1607, + "TCCCACA": 1608, + "CATGGTG": 1609, + "TTAGGA": 1610, + "GAACACA": 1611, + "TCATAAA": 1612, + "CAACATT": 1613, + "GGTCCA": 1614, + "GAATTTG": 1615, + "TATTAATT": 1616, + "TCCTGGG": 1617, + "GCAGCAA": 1618, + "CTCTTCA": 1619, + "GAAGAGG": 1620, + "TCTGTCA": 1621, + "CTGAATG": 1622, + "CCACAAA": 1623, + "GTGGAGG": 1624, + "TGATTAA": 1625, + "CTCCCTCC": 1626, + "CACACACACACACACACACACACACACACACA": 1627, + "GCGATG": 1628, + "CATTCTG": 1629, + "GTAGAAA": 1630, + "TCATCAA": 1631, + "TTTTCAA": 1632, + "TATGTATG": 1633, + "CCAAATG": 1634, + "TAATTTTA": 1635, + "TAAGGAA": 1636, + "CTTGAAA": 1637, + "AAAAAAAAAAAA": 1638, + "GCTCCTG": 1639, + "GCAGATG": 1640, + "GAAAAATT": 1641, + "GACGC": 1642, + "GTGGGGG": 1643, + "GTCAATT": 1644, + "CTTGCTT": 1645, + "TGACACA": 1646, + "GTGTGTT": 1647, + "CCAGAGA": 1648, + "CCCAGCC": 1649, + "TAAAGAAA": 1650, + "GTCCATT": 1651, + "TAAATTAA": 1652, + "CCCAAAA": 1653, + "GAATTAA": 1654, + "TGAATTA": 1655, + "TTTTTTTG": 1656, + "CCAGCTT": 1657, + "CAATTTG": 1658, + "CTGTTTG": 1659, + "GTCTCAA": 1660, + "GTTTGTG": 1661, + "GGCATA": 1662, + "GGTACA": 1663, + "TGATGTG": 1664, + "GATTTCA": 1665, + "TCTGCTT": 1666, + "GTAATTA": 1667, + "TAAAAAAAA": 1668, + "GCCGCC": 1669, + "TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG": 1670, + "GCGTCA": 1671, + "GCTCATT": 1672, + "GAACCTG": 1673, + "TAAACAAA": 1674, + "GTGCTGA": 1675, + "TCAGGAA": 1676, + "TCCTCAA": 1677, + "TCTATTTT": 1678, + "TCTGTTTT": 1679, + "CAGAGCA": 1680, + "CCAGGAA": 1681, + "GTCTTTA": 1682, + "TCTTCAA": 1683, + "TCAAAATT": 1684, + "GCTTATT": 1685, + "GTTCCTT": 1686, + "CACCTA": 1687, + "TCACTGA": 1688, + "GAAGCAA": 1689, + "TAAAGA": 1690, + "TCCTTCA": 1691, + "TCTCATG": 1692, + "TCAGTGA": 1693, + "TACACAA": 1694, + "CACGTG": 1695, + "CCTAAAA": 1696, + "GCCTTTG": 1697, + "GGCTTTT": 1698, + "GTTGAAA": 1699, + "GTTCTC": 1700, + "CTAGA": 1701, + "CTACAAA": 1702, + "GCACAAA": 1703, + "TTACATT": 1704, + "GGCCCC": 1705, + "TAATGTG": 1706, + "CTGCCTT": 1707, + "TCCCAGA": 1708, + "GTGAATG": 1709, + "GGACAGG": 1710, + "GGATGTG": 1711, + "GTTTATA": 1712, + "TGACCAA": 1713, + "GTGGCTG": 1714, + "GTTCTCA": 1715, + "CTTATTTT": 1716, + "CTGGAGA": 1717, + "TTACAAA": 1718, + "GTCTTCA": 1719, + "CAAGAGA": 1720, + "CCATTTG": 1721, + "TCACAGA": 1722, + "CTAGTA": 1723, + "CATTATT": 1724, + "TTAGA": 1725, + "GCTCTCC": 1726, + "GCGCCA": 1727, + "TATGTTTT": 1728, + "TCCTCCA": 1729, + "CAGAAAAA": 1730, + "GTGGGAA": 1731, + "TAATCTT": 1732, + "TGAGTCA": 1733, + "CTGCTC": 1734, + "GTCTCCA": 1735, + "TCATGTT": 1736, + "GTTTCCA": 1737, + "TAAGCAA": 1738, + "CTAAAAATA": 1739, + "TGACTGA": 1740, + "TCGGTT": 1741, + "TTAGAAA": 1742, + "TAAGCC": 1743, + "TAAAGCA": 1744, + "CCTCTCC": 1745, + "CCTCCTT": 1746, + "TCAGATT": 1747, + "TATGAAAA": 1748, + "GCTGATG": 1749, + "CATATTTT": 1750, + "GCTCCAA": 1751, + "CGGCGG": 1752, + "CCACTGA": 1753, + "CAGCAAA": 1754, + "CTGTCTT": 1755, + "CTAGCA": 1756, + "TCGGGG": 1757, + "CACAGCA": 1758, + "GCTGATT": 1759, + "CTAGGA": 1760, + "TAACTC": 1761, + "TCATATT": 1762, + "CCTTCTT": 1763, + "CTGCAAA": 1764, + "CCCGC": 1765, + "GGTCTA": 1766, + "CCCAGGA": 1767, + "GTGTCTG": 1768, + "TAATAATAATAA": 1769, + "TCACATG": 1770, + "CAATTTA": 1771, + "TATATATATATATATATATATATATATATATA": 1772, + "CCACAGA": 1773, + "TCAATTTT": 1774, + "GTATTAA": 1775, + "GAACATT": 1776, + "TCTCTTA": 1777, + "CTATTTG": 1778, + "TCTTTCC": 1779, + "GGTTAAA": 1780, + "GCTAATT": 1781, + "CTGCTGA": 1782, + "TACCTA": 1783, + "CAGGGTT": 1784, + "TCGCCA": 1785, + "CAAAAATTA": 1786, + "CTTCTGA": 1787, + "GCATGTG": 1788, + "CTATTAA": 1789, + "GCACATG": 1790, + "CAACATG": 1791, + "TCATGAA": 1792, + "GAATGTT": 1793, + "GGGTTTT": 1794, + "CTGCCTG": 1795, + "GTCCACA": 1796, + "TAAACA": 1797, + "CTCTGGA": 1798, + "GACCCC": 1799, + "GGCAAAA": 1800, + "TCTGTTA": 1801, + "CTAGTG": 1802, + "CTATATA": 1803, + "TCAGTCA": 1804, + "TAACTAA": 1805, + "GAAGATG": 1806, + "GTCTTAA": 1807, + "CAAGGAA": 1808, + "GTAAAAAA": 1809, + "TCCCCTG": 1810, + "TCGCAA": 1811, + "TCTGCCTG": 1812, + "CCTTTTA": 1813, + "GTCCCAGCTA": 1814, + "TATATATG": 1815, + "TATTGTG": 1816, + "TGTGTTTT": 1817, + "GCGCAA": 1818, + "CACAGTG": 1819, + "TAAGATT": 1820, + "CTCTGTA": 1821, + "GGAGGCTGA": 1822, + "GGACAAA": 1823, + "TATTAAAAA": 1824, + "TCGTCC": 1825, + "TCGGAA": 1826, + "CTATAAA": 1827, + "CTTCAGA": 1828, + "CTAGAAA": 1829, + "CATTCAA": 1830, + "CACGCA": 1831, + "CAGGATT": 1832, + "CCATCTT": 1833, + "GTAGCC": 1834, + "GAATTTA": 1835, + "CACGC": 1836, + "CAATCC": 1837, + "TGAGCAA": 1838, + "GAAGCTG": 1839, + "TCAATTA": 1840, + "GAAGTCA": 1841, + "CTGCACA": 1842, + "CCACGG": 1843, + "GGATCTT": 1844, + "CTCCTGCCTCAGCCTCC": 1845, + "TAAATGAA": 1846, + "CCGTC": 1847, + "TCGGTG": 1848, + "TTTTATTA": 1849, + "GCAGGGG": 1850, + "GCAGGTG": 1851, + "TCTATTA": 1852, + "TAACTTA": 1853, + "CTAATTTT": 1854, + "CCCGCC": 1855, + "TAATACA": 1856, + "GGATTAAA": 1857, + "TCTCTCTG": 1858, + "GCTTCTT": 1859, + "CATTTATT": 1860, + "CCAGAGG": 1861, + "GGACAGA": 1862, + "GCCAATT": 1863, + "TCCCCAA": 1864, + "GTTGATT": 1865, + "GAAGAAAA": 1866, + "GCATTTA": 1867, + "CTCTAAA": 1868, + "CACACACACACA": 1869, + "CCTCAAA": 1870, + "TATAATT": 1871, + "CAATGTT": 1872, + "GCCCAGA": 1873, + "GTATATT": 1874, + "CTAAAAAA": 1875, + "CCACAGG": 1876, + "TAAGAGA": 1877, + "TCCTTAA": 1878, + "TATTTTTT": 1879, + "GAATATA": 1880, + "GGATTTG": 1881, + "GTGTGAA": 1882, + "CTGGCTT": 1883, + "GCGGCA": 1884, + "TCCGCC": 1885, + "GCATCTT": 1886, + "TCTAATA": 1887, + "CTGCATT": 1888, + "CTCTGCC": 1889, + "TCACTCA": 1890, + "TCAGCAA": 1891, + "TATTATG": 1892, + "CCAGCTG": 1893, + "GATCTC": 1894, + "GCCTCTT": 1895, + "CTTCCAA": 1896, + "TCCTAAA": 1897, + "TCATCTG": 1898, + "CTATTTA": 1899, + "CTGCAGG": 1900, + "CAAGCAA": 1901, + "GCGGAA": 1902, + "GAAATAAA": 1903, + "TAAAATAA": 1904, + "TCACCTT": 1905, + "CCATGTG": 1906, + "GACCTA": 1907, + "CAGATGA": 1908, + "GTGGCTT": 1909, + "TTATTATTATTA": 1910, + "TCCCGG": 1911, + "TATTTGTT": 1912, + "CTGTAAA": 1913, + "TCCATCCA": 1914, + "CTGTATA": 1915, + "GTTTCTA": 1916, + "GTTGCTT": 1917, + "CCATGAA": 1918, + "GCTCTTA": 1919, + "CTTCATG": 1920, + "GTTCCTG": 1921, + "GCTGGGA": 1922, + "TCAGAGG": 1923, + "CATTAAAA": 1924, + "TCAGTAA": 1925, + "GAATGTG": 1926, + "CTTATTA": 1927, + "GCACTGA": 1928, + "TGAGGTT": 1929, + "CATCAAA": 1930, + "CTTCTCC": 1931, + "GTTTATG": 1932, + "CTTTCCA": 1933, + "GTGCCTG": 1934, + "GAAAGGA": 1935, + "GCATCTG": 1936, + "TACCCA": 1937, + "TAACAGA": 1938, + "AAAAAAAAAAA": 1939, + "CTATGAA": 1940, + "CAGTAAA": 1941, + "TAGCTA": 1942, + "TCGTTTT": 1943, + "GTGTCTT": 1944, + "GAGCAAA": 1945, + "TCTAAAAA": 1946, + "GTTCACA": 1947, + "GAAATGA": 1948, + "CAAATGA": 1949, + "GCCCTGA": 1950, + "GTGTTTA": 1951, + "TCATGTG": 1952, + "CATATTA": 1953, + "TCAAAAAAA": 1954, + "TAAGTTA": 1955, + "TCTCTCTT": 1956, + "CCAGTGA": 1957, + "CCTCTGA": 1958, + "CAAGATG": 1959, + "GCCTGTT": 1960, + "GTTTGGG": 1961, + "CATTCATT": 1962, + "GCCCCTG": 1963, + "GTTCTGA": 1964, + "GCGGCC": 1965, + "GCGGTT": 1966, + "CAAAACAAAA": 1967, + "TACATATA": 1968, + "GAATTAAA": 1969, + "TCAAGAA": 1970, + "CTGTATT": 1971, + "TTTTTATT": 1972, + "GATTATT": 1973, + "TCTAATG": 1974, + "GTTGCTG": 1975, + "TGAATGAA": 1976, + "TCAGCTG": 1977, + "CTTGATT": 1978, + "CAGAATG": 1979, + "CTAATTA": 1980, + "TATAATG": 1981, + "GTTTTGTTTT": 1982, + "CCAGCCTG": 1983, + "TGATGGA": 1984, + "GCAGATT": 1985, + "CTCTATT": 1986, + "GCAGTCA": 1987, + "TAAGTGA": 1988, + "CTACACA": 1989, + "CGCATG": 1990, + "TAGCCA": 1991, + "GTGGCTCA": 1992, + "CAAATAAA": 1993, + "GTGCTCA": 1994, + "TTTTTTTTTT": 1995, + "TAACATG": 1996, + "TCCCAGCTA": 1997, + "CAAAGTA": 1998, + "TCATATA": 1999, + "CAGCATG": 2000, + "TGATCTT": 2001, + "CATAATT": 2002, + "TGTGTTA": 2003, + "TTTTGAA": 2004, + "TTAATTA": 2005, + "GATATTA": 2006, + "TCATTCA": 2007, + "TGATATA": 2008, + "TGACTCA": 2009, + "GACGTT": 2010, + "TGACATG": 2011, + "GTTGTGA": 2012, + "CATTTTTT": 2013, + "GCCTGGA": 2014, + "CTATGTT": 2015, + "CTTTGGG": 2016, + "GTCTCAAA": 2017, + "CTGGCTG": 2018, + "CCACATG": 2019, + "GGCGTG": 2020, + "CTTAATG": 2021, + "TAAGATG": 2022, + "GTATAAA": 2023, + "TGTATTA": 2024, + "TAACTCA": 2025, + "GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA": 2026, + "GCATGAA": 2027, + "GTTAATG": 2028, + "TCCAGGA": 2029, + "GAGAGAAA": 2030, + "TCTCTGTG": 2031, + "CTCTCTA": 2032, + "CCACCTG": 2033, + "GCCAGGA": 2034, + "CTGGAGG": 2035, + "CCATTTA": 2036, + "GTCTGGA": 2037, + "GCCCACA": 2038, + "TAGAGAA": 2039, + "CAACTCA": 2040, + "GGCAGGA": 2041, + "TCTTATG": 2042, + "CAAAGGA": 2043, + "GGTAAAA": 2044, + "GAGAGGA": 2045, + "GTCCAGA": 2046, + "GCCCTCA": 2047, + "GATATTTT": 2048, + "CAGGGAA": 2049, + "CCACATT": 2050, + "GAGGAGG": 2051, + "GAAACTT": 2052, + "CAGAATT": 2053, + "TCAGATG": 2054, + "TATTTCC": 2055, + "TACAGTG": 2056, + "TGAGCTG": 2057, + "CCATCTG": 2058, + "GAGAATG": 2059, + "TCAACAA": 2060, + "ATT": 2061, + "TAACTGA": 2062, + "TGAGAGG": 2063, + "CACTGAA": 2064, + "CCACCTT": 2065, + "CTGCAGA": 2066, + "TCACCAA": 2067, + "TGAGCTT": 2068, + "CAAAGCA": 2069, + "GGTTTTA": 2070, + "CGGGGTT": 2071, + "TCCAAAAA": 2072, + "TATGTATA": 2073, + "CCAGATG": 2074, + "TCCATTTT": 2075, + "CTGCTCA": 2076, + "GATAATT": 2077, + "CCACCAA": 2078, + "CTCCTCC": 2079, + "GAGAATT": 2080, + "GAAAGTA": 2081, + "TAAAATAAAA": 2082, + "CTTCTTA": 2083, + "CTGTTTA": 2084, + "GAATCAA": 2085, + "GCATGTT": 2086, + "GCACGG": 2087, + "GACTGAA": 2088, + "GTGCACA": 2089, + "GACGTG": 2090, + "TATACAA": 2091, + "TCGACA": 2092, + "GAAGACA": 2093, + "TAAAGGA": 2094, + "GATCAAA": 2095, + "CAGTGTG": 2096, + "CTAGCC": 2097, + "GAGGAAAA": 2098, + "TCTGAAAA": 2099, + "GAACCCA": 2100, + "GATGGATG": 2101, + "GTTCTTA": 2102, + "CTATATT": 2103, + "GCATTAA": 2104, + "TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC": 2105, + "TCAGTC": 2106, + "TATTTTTG": 2107, + "GAGGATT": 2108, + "GTATGTG": 2109, + "TAACCAA": 2110, + "GTTGTTTT": 2111, + "TTTTTCTT": 2112, + "GTGTTAA": 2113, + "CTTGGAA": 2114, + "AAAAAATG": 2115, + "CAATGTG": 2116, + "GTGCCTT": 2117, + "GCCTCAA": 2118, + "GAGTCTT": 2119, + "GCTAATTTT": 2120, + "CGAAAAA": 2121, + "GTGTATA": 2122, + "GCGTTA": 2123, + "CTGCACTCCAGCCTGGG": 2124, + "GTTCATG": 2125, + "CAAAGAAA": 2126, + "GCAGTAA": 2127, + "GGATGAA": 2128, + "CTTTATG": 2129, + "CAGGAAAA": 2130, + "TCCTGCA": 2131, + "CTGTCTG": 2132, + "GAACATG": 2133, + "GGATGGA": 2134, + "GCCTGAA": 2135, + "CAAAAATG": 2136, + "TCCAATG": 2137, + "CCAGCAA": 2138, + "GGCCTA": 2139, + "CAACTGA": 2140, + "GCACCTG": 2141, + "GTCTATT": 2142, + "CCTCTCA": 2143, + "GTGGTCA": 2144, + "GTGTAAA": 2145, + "GTACACA": 2146, + "GTAAAATT": 2147, + "GTACATT": 2148, + "TATATAAA": 2149, + "CTGTTAA": 2150, + "TAAGTCA": 2151, + "GCCTCCA": 2152, + "AAATTAAA": 2153, + "GTGCAGG": 2154, + "TCCTGGA": 2155, + "GTGCAAA": 2156, + "GCGTCC": 2157, + "CCATTAA": 2158, + "GGAGGGA": 2159, + "TCACTTA": 2160, + "TCATTAAA": 2161, + "CAACATA": 2162, + "TAATAGA": 2163, + "TAATGTA": 2164, + "GATTTTTT": 2165, + "GTTGTCA": 2166, + "GGAGACA": 2167, + "GTGTGGG": 2168, + "TCACAGG": 2169, + "TCGGCA": 2170, + "CTCCCTG": 2171, + "GACCAAA": 2172, + "TGTTTATT": 2173, + "CGAATG": 2174, + "CTCAATG": 2175, + "TCACCTG": 2176, + "CAGTGTT": 2177, + "TGAGACA": 2178, + "TAGGGG": 2179, + "GAAAAATG": 2180, + "GTTGAGA": 2181, + "TCGATA": 2182, + "CTCGGGAGG": 2183, + "GTTGTC": 2184, + "CCAGTCA": 2185, + "GCCCAGGCTG": 2186, + "GAACAGA": 2187, + "GGCTCACTGCAA": 2188, + "GCAGACA": 2189, + "TGAGGTG": 2190, + "CACGTT": 2191, + "TAAGAAAA": 2192, + "CCAGGCA": 2193, + "GTATCTT": 2194, + "CTTGGGAGG": 2195, + "CTTTCTA": 2196, + "CCGCTG": 2197, + "GAGCTCA": 2198, + "GAGACAGA": 2199, + "CTTCAGG": 2200, + "GCACATT": 2201, + "GTACAAA": 2202, + "CTTGTAA": 2203, + "GTGGGTG": 2204, + "GAAGTGA": 2205, + "GGTCTC": 2206, + "GTATGTT": 2207, + "GCACTCA": 2208, + "TTATGTT": 2209, + "CAAGTCA": 2210, + "CAAGTGA": 2211, + "GAAACTA": 2212, + "TAAATAAAA": 2213, + "TCTTAAAA": 2214, + "GTTGGAA": 2215, + "GTTCTAA": 2216, + "CCACTC": 2217, + "CAGTGAA": 2218, + "GAAAGG": 2219, + "GCACGA": 2220, + "TAACTTTT": 2221, + "GTTGTTA": 2222, + "TCAGTTA": 2223, + "CGGATG": 2224, + "TATTTGAA": 2225, + "CCCTGAA": 2226, + "GCCCTC": 2227, + "CTTCTAA": 2228, + "TTTGTTTT": 2229, + "GAGCTGA": 2230, + "CTGTGGG": 2231, + "CAAGATT": 2232, + "GAAGCTT": 2233, + "TGAGTAA": 2234, + "CTTGCTG": 2235, + "GGATGGG": 2236, + "CGTATG": 2237, + "TCCATTA": 2238, + "GTCTGCA": 2239, + "GCCATTTT": 2240, + "GTTGTAA": 2241, + "CACACAA": 2242, + "GGACTACAGG": 2243, + "CGTTTTA": 2244, + "TCTTCC": 2245, + "TAACCTT": 2246, + "CTTTAAAA": 2247, + "TGAATTTT": 2248, + "CTACAGA": 2249, + "GCAAGAA": 2250, + "TAACAAAA": 2251, + "CAATTAAA": 2252, + "CCACTCA": 2253, + "CATGGTGAAA": 2254, + "CCCAGAA": 2255, + "CTACATT": 2256, + "CCGAGG": 2257, + "TCCAGTG": 2258, + "TGAGTTA": 2259, + "GGAGTCA": 2260, + "TAACGA": 2261, + "GAGTAAA": 2262, + "GACTCTG": 2263, + "GGAGCTT": 2264, + "TACTCC": 2265, + "CTGCATG": 2266, + "GCTTTTTT": 2267, + "GTCTAAA": 2268, + "GTGCGG": 2269, + "CATCTCA": 2270, + "TGATCAA": 2271, + "GGAGATT": 2272, + "GCAAAAAA": 2273, + "CACCAAA": 2274, + "TGACGG": 2275, + "CAGAGG": 2276, + "GTTGATG": 2277, + "CTTGTCA": 2278, + "TCCACCTG": 2279, + "GGAGCAA": 2280, + "CAAGTAA": 2281, + "CCATAAA": 2282, + "GTGCATG": 2283, + "GCATATT": 2284, + "GTAGATT": 2285, + "GCCTAA": 2286, + "CTCAAAAA": 2287, + "GGAGAAAA": 2288, + "CTATCC": 2289, + "TAATATTA": 2290, + "GTGCTC": 2291, + "CAATATG": 2292, + "TGTGGAA": 2293, + "TGACTC": 2294, + "GTGTATG": 2295, + "TTTTAATG": 2296, + "GCTCTAA": 2297, + "CACAATG": 2298, + "CAGCTCA": 2299, + "GTTGGTT": 2300, + "CTAAAATT": 2301, + "GTCTATG": 2302, + "TGTGAAAA": 2303, + "CTGGGTT": 2304, + "CCCCTCC": 2305, + "CCCTCTT": 2306, + "GCAGGGA": 2307, + "GAAACCA": 2308, + "CATTTCC": 2309, + "GCAGCCA": 2310, + "TCATATG": 2311, + "GCAGGCA": 2312, + "CGTAAAA": 2313, + "TGACCTG": 2314, + "CAGAGGTT": 2315, + "CTTGTGA": 2316, + "TTATCTT": 2317, + "CTGTATG": 2318, + "GTCAATG": 2319, + "GGACGG": 2320, + "GCGTAA": 2321, + "CAAACTA": 2322, + "TAAATGTT": 2323, + "CTTCGG": 2324, + "CTCCCCA": 2325, + "TACAATG": 2326, + "TCTGTAA": 2327, + "GAATATG": 2328, + "GCGGGA": 2329, + "GGACATT": 2330, + "TTATGAA": 2331, + "GGATGTT": 2332, + "GGACATG": 2333, + "TCAGGTG": 2334, + "CAACAAAA": 2335, + "GAAAGAGA": 2336, + "GTGGATG": 2337, + "GGGCTA": 2338, + "CCATCAA": 2339, + "CAGCTGA": 2340, + "CTCCACC": 2341, + "CAATCAA": 2342, + "GTGGTC": 2343, + "TGACAGG": 2344, + "CCATTCA": 2345, + "GTCCCTG": 2346, + "CAGACACA": 2347, + "GTTGGTG": 2348, + "CCTCCTG": 2349, + "GAACTGA": 2350, + "TATTCATT": 2351, + "GCCCATG": 2352, + "CAATCTT": 2353, + "GAAAGCA": 2354, + "GAATCTG": 2355, + "TTATTTTA": 2356, + "GTTTGGA": 2357, + "TTTTTGTT": 2358, + "GGGAATG": 2359, + "GCGACA": 2360, + "TAAACTG": 2361, + "CCATATT": 2362, + "GGATCC": 2363, + "CAAGCTT": 2364, + "TAAAAAAAAA": 2365, + "TCACTC": 2366, + "CACTGTT": 2367, + "TGTTAATT": 2368, + "GGACTGA": 2369, + "GGAGTGA": 2370, + "CATACACA": 2371, + "GTTTGTA": 2372, + "TCCAGCA": 2373, + "GTGCATT": 2374, + "GGAAAAAA": 2375, + "CCAAGAA": 2376, + "TCAATA": 2377, + "CTTCCCA": 2378, + "TGAGAAAA": 2379, + "GGCCTCCCAAA": 2380, + "CAAGCTG": 2381, + "GCCCAAA": 2382, + "TGACTTA": 2383, + "CAGCCTT": 2384, + "CTGGATT": 2385, + "TTTTTTTA": 2386, + "TCACGG": 2387, + "GCAGTTA": 2388, + "TGACTAA": 2389, + "TTACAGG": 2390, + "TGATATG": 2391, + "TAATTATT": 2392, + "TCTTGAA": 2393, + "GCCCCTT": 2394, + "GTTCAGA": 2395, + "CTCTATG": 2396, + "CCATGGA": 2397, + "GAGGGAA": 2398, + "GGAGGCA": 2399, + "CTTTGCA": 2400, + "TCTTGG": 2401, + "GGAGGTT": 2402, + "GCCAATG": 2403, + "CTGGTGA": 2404, + "CAACCAA": 2405, + "CCAGTC": 2406, + "CTTGAGA": 2407, + "TACAGCA": 2408, + "CTTGTC": 2409, + "GACGGA": 2410, + "CTTCTTTT": 2411, + "GTGGC": 2412, + "GAGGATG": 2413, + "CAATAAAA": 2414, + "GAAATTTT": 2415, + "AAAAAAAAAA": 2416, + "CTCTATA": 2417, + "GTATGAA": 2418, + "CTTGTTA": 2419, + "TAACATA": 2420, + "CAAACACA": 2421, + "TGATTAAA": 2422, + "GCTCTGTT": 2423, + "GTGGGTT": 2424, + "GTTGGGG": 2425, + "GTGTGTA": 2426, + "GTAATTTT": 2427, + "GTATCC": 2428, + "TGTGTGTGTGTG": 2429, + "TCTTCCTT": 2430, + "TCACTAA": 2431, + "TCTCCAAA": 2432, + "TATCAAA": 2433, + "TGATGGG": 2434, + "GGATATT": 2435, + "CAAATTTT": 2436, + "GTTCAGG": 2437, + "GTGGATT": 2438, + "GTGCAGA": 2439, + "GCTGCC": 2440, + "CTCAGAA": 2441, + "GCAGTC": 2442, + "GGATAAA": 2443, + "GCCTTCA": 2444, + "CCAGGTG": 2445, + "TATCTC": 2446, + "CAATGCA": 2447, + "CCCACTG": 2448, + "GTGTATT": 2449, + "CGACAGA": 2450, + "TGAGATA": 2451, + "CCAGGTT": 2452, + "TGTTTAA": 2453, + "CATCATG": 2454, + "TGATTCA": 2455, + "GCAATTA": 2456, + "GAAATGAA": 2457, + "CTTGGTT": 2458, + "GAAGATT": 2459, + "GGATTAA": 2460, + "CCTCATT": 2461, + "GGCCAGGCTG": 2462, + "GCTATTA": 2463, + "GCCAGCA": 2464, + "GAGACAGG": 2465, + "CTTGAGG": 2466, + "CAGTCTT": 2467, + "GTTCTCC": 2468, + "TATTTCAA": 2469, + "TGACGA": 2470, + "CATGAAAA": 2471, + "CATTATG": 2472, + "TAAATTTA": 2473, + "GAGTGAA": 2474, + "CAACAGG": 2475, + "TAAGCTT": 2476, + "CACATTTT": 2477, + "GATCTCA": 2478, + "TAGTCC": 2479, + "GACCCTG": 2480, + "TAATGCA": 2481, + "TAAGTC": 2482, + "TAATAATT": 2483, + "GAAGTAA": 2484, + "CAACTC": 2485, + "CATCATT": 2486, + "GACGAA": 2487, + "GAAACAAA": 2488, + "TATTTCTG": 2489, + "CATTAATT": 2490, + "CCACCCC": 2491, + "TAATATTTT": 2492, + "GTTTAAAA": 2493, + "GTATCTG": 2494, + "GTCAAAAA": 2495, + "GATGCTG": 2496, + "TGTTCTG": 2497, + "GGTCAAA": 2498, + "GTAGGAA": 2499, + "GTATATG": 2500, + "TGATCTG": 2501, + "GGGGCTG": 2502, + "GCATCAA": 2503, + "GCCAAAAA": 2504, + "CCACGA": 2505, + "GCTAATG": 2506, + "CAGAGAAA": 2507, + "CCTTCTG": 2508, + "TCCTCTA": 2509, + "GCAGGTT": 2510, + "CTCACTG": 2511, + "TAGATTA": 2512, + "GCCGAGA": 2513, + "CCATCCA": 2514, + "CTTTACA": 2515, + "GTACATG": 2516, + "GCACCAA": 2517, + "CTTTGTA": 2518, + "CTATGTG": 2519, + "TCACTTTT": 2520, + "TGAGTC": 2521, + "CAAGAAAA": 2522, + "CTGACTG": 2523, + "GTTTTTTTT": 2524, + "GCATAAA": 2525, + "TAATCTG": 2526, + "GAAAAAAAA": 2527, + "CAGGATG": 2528, + "TGAGCCA": 2529, + "GAATTCA": 2530, + "TCAGACA": 2531, + "GTTCCAA": 2532, + "TCAGGTT": 2533, + "CAAACTG": 2534, + "CATTTCTT": 2535, + "TGTTAAAA": 2536, + "CCAGACA": 2537, + "CAAGTTA": 2538, + "CATGTTA": 2539, + "CATTCTA": 2540, + "TCTTTTTG": 2541, + "TGAGGGG": 2542, + "CACATTA": 2543, + "TAAAATAAA": 2544, + "GCATATA": 2545, + "TGTTCTA": 2546, + "GAAGGGG": 2547, + "GAGTGTG": 2548, + "TAAGACA": 2549, + "GAACTC": 2550, + "CCAGTAA": 2551, + "GAGAGAGG": 2552, + "GCGACC": 2553, + "CAATTCA": 2554, + "CGGCTG": 2555, + "CCAGATT": 2556, + "CCTGGG": 2557, + "GGAAGAAA": 2558, + "GAGAGG": 2559, + "TCAAAATG": 2560, + "CCTCATG": 2561, + "TAAAGG": 2562, + "CTTTGGA": 2563, + "CCAGGGA": 2564, + "GTACAGA": 2565, + "CTGAGGCAGGA": 2566, + "TGTTTCTT": 2567, + "CCAGGCTG": 2568, + "CTGAGG": 2569, + "GAGGCTG": 2570, + "CTCCTGGG": 2571, + "GAAGTC": 2572, + "CGACC": 2573, + "GGACTCA": 2574, + "GGAGTC": 2575, + "CACAATT": 2576, + "GTGTTCA": 2577, + "GACTAAA": 2578, + "GTCATTA": 2579, + "CAAAATTA": 2580, + "TGAAGAAA": 2581, + "GCACCTT": 2582, + "GTTTGCA": 2583, + "TCCTGCC": 2584, + "GTAGATG": 2585, + "GCCTGCA": 2586, + "GAGTTAA": 2587, + "TCCCTTA": 2588, + "GTGGTTA": 2589, + "TCGGGA": 2590, + "TACATAA": 2591, + "TCTCTCCA": 2592, + "CACTAAA": 2593, + "TATATATATATA": 2594, + "GTGGCAA": 2595, + "CACCATG": 2596, + "TTTGAAAA": 2597, + "CACACTG": 2598, + "CTTGGTG": 2599, + "TACACTG": 2600, + "CCTCCAA": 2601, + "CAACCTT": 2602, + "CAGCCAA": 2603, + "TTTTCAAA": 2604, + "TGATAGA": 2605, + "TACACTA": 2606, + "TCTGGG": 2607, + "TCCCAGCA": 2608, + "TAGGAAAA": 2609, + "CTTGGGG": 2610, + "TCTGTGAA": 2611, + "CCTTATT": 2612, + "CATTTAAA": 2613, + "TTTTATTTTA": 2614, + "GCCCTCC": 2615, + "CTGAGCA": 2616, + "CCCGTG": 2617, + "GTAGTGA": 2618, + "TCCTATT": 2619, + "GAAGGTG": 2620, + "TGTGCTG": 2621, + "TCCACTG": 2622, + "TAATCTA": 2623, + "TGATGTA": 2624, + "GTGGTAA": 2625, + "TAATGGA": 2626, + "GATGAAAA": 2627, + "GTAGTAA": 2628, + "GTGGGGA": 2629, + "GTGTCAA": 2630, + "CAGACTG": 2631, + "TCGAAAA": 2632, + "CTCATTA": 2633, + "TAATAATA": 2634, + "CTCAGAAA": 2635, + "CATCCTT": 2636, + "CCGCTT": 2637, + "GGAAGG": 2638, + "CCGTGA": 2639, + "CCACTCC": 2640, + "CTAGAGA": 2641, + "TAGAATG": 2642, + "GGATTTA": 2643, + "TTAATTTT": 2644, + "GCTAATA": 2645, + "TCCCCCA": 2646, + "CAAATATT": 2647, + "GATCATG": 2648, + "TCTTAATT": 2649, + "CAGTATT": 2650, + "GTCTTGAA": 2651, + "CCGAAA": 2652, + "CTATTCA": 2653, + "TAAGATA": 2654, + "CTTGCAA": 2655, + "GCCCCAA": 2656, + "TCCCTAA": 2657, + "GAAGTTA": 2658, + "GATGATG": 2659, + "CTTGATG": 2660, + "CCCTAAA": 2661, + "CCTGCCTG": 2662, + "GACATTTT": 2663, + "CCAGCCA": 2664, + "TGTGTGTGTG": 2665, + "GTCTATA": 2666, + "TCTCTGTT": 2667, + "GTCTGTA": 2668, + "TATAATA": 2669, + "CTTGTTTT": 2670, + "CGCCATT": 2671, + "CTCAGCA": 2672, + "TACAGTT": 2673, + "CAAGAGG": 2674, + "GGAAGCA": 2675, + "GCCTTTA": 2676, + "CCCCATT": 2677, + "CAACGA": 2678, + "GTCATTTT": 2679, + "CCCGCA": 2680, + "CAGTTAA": 2681, + "GAATCTT": 2682, + "CATGTTTT": 2683, + "CCGGGG": 2684, + "CTACTGA": 2685, + "TCACGA": 2686, + "TAAATTTG": 2687, + "GCCCATT": 2688, + "CTCTAGG": 2689, + "GGACCTG": 2690, + "TCAGGGA": 2691, + "GAGACTG": 2692, + "CCAAAAAA": 2693, + "GCCGG": 2694, + "CCAGGGG": 2695, + "TCAGAAAA": 2696, + "CATCTGA": 2697, + "TCTTCAAA": 2698, + "CTACAGG": 2699, + "GAGGCAGG": 2700, + "CATTGTA": 2701, + "TAAATCAA": 2702, + "GACTCTT": 2703, + "CTGATTA": 2704, + "GCATATG": 2705, + "GGACCTT": 2706, + "CAAGACA": 2707, + "TATTTATG": 2708, + "TATTTTAAA": 2709, + "CCGAGA": 2710, + "TCATTTTA": 2711, + "CTCACTCA": 2712, + "CCACCCA": 2713, + "CTCTAGA": 2714, + "CTACATG": 2715, + "GTGCTTA": 2716, + "CAACCTG": 2717, + "TCTGTGTT": 2718, + "TAAATATG": 2719, + "CAAAGG": 2720, + "CCCTGTT": 2721, + "GTTCGG": 2722, + "TGATAAAA": 2723, + "CACGAA": 2724, + "GTTGAGG": 2725, + "CAGAGTGA": 2726, + "GAAATTAA": 2727, + "CACATA": 2728, + "GAACAGG": 2729, + "TCTCCTGA": 2730, + "CCTGAGG": 2731, + "GGAGGCCAA": 2732, + "GTTTACA": 2733, + "TAACAGG": 2734, + "TGTGGTG": 2735, + "GCCTCCCAAA": 2736, + "CCATCCTG": 2737, + "GATTCTT": 2738, + "GAATGGA": 2739, + "GTAGTCA": 2740, + "CTCCTCTG": 2741, + "GAAAGAAAGAAAGAAA": 2742, + "CCCTGTG": 2743, + "CAGTATG": 2744, + "GCGATA": 2745, + "GGACTC": 2746, + "GAAAGA": 2747, + "TGTTGG": 2748, + "GTAGCTT": 2749, + "CATTTTAA": 2750, + "CCCTCTG": 2751, + "GCATTCA": 2752, + "CGATTA": 2753, + "TCACATA": 2754, + "TAATGAAA": 2755, + "GGAATTA": 2756, + "CTGTCAA": 2757, + "TAAATTAAA": 2758, + "CAAGTC": 2759, + "GTATTCA": 2760, + "GGCCATG": 2761, + "CTTTAGA": 2762, + "TGTTTCC": 2763, + "CATGTA": 2764, + "GAATAAAA": 2765, + "CAACTAA": 2766, + "TCATCTA": 2767, + "CACTCTT": 2768, + "CAGTTTG": 2769, + "CATAAAAA": 2770, + "GCATGCA": 2771, + "GATTTA": 2772, + "GAACCAA": 2773, + "TCTGTGA": 2774, + "TCAGCCA": 2775, + "TCTCCACA": 2776, + "TCTCAGCTCA": 2777, + "TATCATG": 2778, + "GCACTTA": 2779, + "CGCCAGG": 2780, + "CGGGG": 2781, + "CATTAAAAA": 2782, + "TTTGTTA": 2783, + "GGATATA": 2784, + "TCGACC": 2785, + "TAATCCA": 2786, + "CCGC": 2787, + "CATTGTT": 2788, + "CCAGTTA": 2789, + "GTAGTTA": 2790, + "CTAGGAA": 2791, + "CCTAATT": 2792, + "TCATGGG": 2793, + "GAACTAA": 2794, + "GCTATTTT": 2795, + "CCGTCA": 2796, + "CAGATTA": 2797, + "CCATATA": 2798, + "CAACTTA": 2799, + "TCAGTTTT": 2800, + "CTACCTT": 2801, + "GCACTC": 2802, + "GTGTGGA": 2803, + "GTGCCAA": 2804, + "GACAATG": 2805, + "GACAATT": 2806, + "GTACCTT": 2807, + "TAAACATT": 2808, + "CAGGAGG": 2809, + "GTGCGA": 2810, + "GAAAATTA": 2811, + "TCTCTTAA": 2812, + "CCGATT": 2813, + "GATGATT": 2814, + "CCATGGG": 2815, + "TCGGTA": 2816, + "CCATATG": 2817, + "CCAGTCC": 2818, + "GCCTTAA": 2819, + "TGATCCA": 2820, + "GTTGCAA": 2821, + "GTAGAGG": 2822, + "CAGATTTT": 2823, + "GTACTTA": 2824, + "TCTTTCTTTCTTTCTT": 2825, + "GCTCTGTG": 2826, + "TCAATAA": 2827, + "GTTTAGA": 2828, + "GTTCGA": 2829, + "CAAGGTT": 2830, + "CTCATTTT": 2831, + "CACAGG": 2832, + "CATGCTG": 2833, + "GAACGG": 2834, + "TATAAAAA": 2835, + "GAAGGCA": 2836, + "GAGCATT": 2837, + "TGTTTGTG": 2838, + "GCTGTTA": 2839, + "GTCACTG": 2840, + "CAAATGAA": 2841, + "GTGACTG": 2842, + "GTTCTTTT": 2843, + "CAGGCTGGAGTGCAGTG": 2844, + "TGATGAAA": 2845, + "TAACGG": 2846, + "CTACTAA": 2847, + "GACATTA": 2848, + "GGACGA": 2849, + "GAGCATG": 2850, + "GCATGGG": 2851, + "CCACTTA": 2852, + "CTATCAA": 2853, + "GCTGTTTT": 2854, + "GTCGTG": 2855, + "CCTGGCC": 2856, + "TCTCTGAA": 2857, + "TGTTGTA": 2858, + "CAGCCAGG": 2859, + "GTTTAGG": 2860, + "CCGCAA": 2861, + "GGAGTAA": 2862, + "CCAATTA": 2863, + "CAGCAAAA": 2864, + "TCATCCA": 2865, + "CACGTA": 2866, + "TCATAGA": 2867, + "TAATTAAAA": 2868, + "CACTTAA": 2869, + "TCTTTATT": 2870, + "GAGATTA": 2871, + "TAAGAGG": 2872, + "CAAATTAA": 2873, + "GACGCA": 2874, + "CACGGA": 2875, + "GTGTGCA": 2876, + "TCT": 2877, + "TATTATTA": 2878, + "GAAATATT": 2879, + "GGAGTTA": 2880, + "TCTTTGA": 2881, + "CTGATTTT": 2882, + "TGTGAATT": 2883, + "TCCCACC": 2884, + "CCCTTTG": 2885, + "CAAGGTG": 2886, + "CAGAGTT": 2887, + "CCCCATG": 2888, + "CTACCAA": 2889, + "CTCCAAAA": 2890, + "CTTCCCC": 2891, + "CTGCTAA": 2892, + "GATTAAAA": 2893, + "GCTTATG": 2894, + "CTACTTA": 2895, + "TAAAAAATT": 2896, + "TCAGTCC": 2897, + "CTATTAAA": 2898, + "GAATGGG": 2899, + "CACAGTA": 2900, + "CAACGG": 2901, + "GGTTATT": 2902, + "TCACCCA": 2903, + "TGATGCA": 2904, + "TAATTTTTT": 2905, + "GTTTGAGA": 2906, + "GTATTAAA": 2907, + "GCCCCCA": 2908, + "TATAGTA": 2909, + "TAGTAAA": 2910, + "TGATACA": 2911, + "GTGGTTTT": 2912, + "CCACTAA": 2913, + "CACAGAGA": 2914, + "CCTCTGCCTCC": 2915, + "CAAAAAAAA": 2916, + "CTCTCTCC": 2917, + "CATAATA": 2918, + "GAAGCCA": 2919, + "GTTCCCA": 2920, + "TGTGTTTG": 2921, + "CAATGGA": 2922, + "TGAAGTA": 2923, + "CTTCATA": 2924, + "CACTGTG": 2925, + "GCTCTTTT": 2926, + "TGACATA": 2927, + "TAAAGAAAA": 2928, + "GAGAAATG": 2929, + "CAGGGAGG": 2930, + "TGTTCAA": 2931, + "GAGCCAA": 2932, + "GACAGAGA": 2933, + "GGCTGAA": 2934, + "CAAATATA": 2935, + "GTGGAAAA": 2936, + "TAAGGTT": 2937, + "GTGATTA": 2938, + "GGATCTG": 2939, + "GATGTTA": 2940, + "GACTACACA": 2941, + "TCCTATA": 2942, + "CTGCCAA": 2943, + "TCCCGA": 2944, + "GTGATTTT": 2945, + "GCGTTTT": 2946, + "CAGAGTA": 2947, + "GAAAGGAA": 2948, + "CACTTTG": 2949, + "CCCCAAAA": 2950, + "GCAACCCA": 2951, + "TGCATTTT": 2952, + "TCTAGAA": 2953, + "TACTTTG": 2954, + "TGAGGCA": 2955, + "CATCTCC": 2956, + "TCGCTA": 2957, + "TGACTTTT": 2958, + "GAGCCTG": 2959, + "CATTTGTT": 2960, + "TCTTTGTT": 2961, + "GCAAAATT": 2962, + "CCTGATT": 2963, + "GATAAAAA": 2964, + "GAGTGTT": 2965, + "TCCTGTA": 2966, + "TACAGAAA": 2967, + "TCCAGGAA": 2968, + "GCCAGTG": 2969, + "TAGATTTT": 2970, + "TAATAGG": 2971, + "CTCCTCA": 2972, + "CATTTTTG": 2973, + "CATTTCAA": 2974, + "GCCATCA": 2975, + "TAAAATATA": 2976, + "GACTGTT": 2977, + "GCATGGA": 2978, + "CAAAGTT": 2979, + "CATGATT": 2980, + "GAGTTTG": 2981, + "CTAGCAA": 2982, + "CTTCCTA": 2983, + "GGGGAGG": 2984, + "CTATATG": 2985, + "TATTTATTTT": 2986, + "CACCATT": 2987, + "CCCTCAA": 2988, + "TTTTTTTTTTTTTT": 2989, + "GATCATT": 2990, + "GTACATA": 2991, + "CTCCATA": 2992, + "CCCCGTCTCTA": 2993, + "GCCTGCC": 2994, + "CTAGCTT": 2995, + "CCCGGA": 2996, + "GATGTTTT": 2997, + "GTATTTTA": 2998, + "TCAGATA": 2999, + "CCTGGAA": 3000, + "TATTCCA": 3001, + "GGACCAA": 3002, + "GCCATTA": 3003, + "CGACTGA": 3004, + "TAAGCTG": 3005, + "TAAACACA": 3006, + "GTTTCTC": 3007, + "CATCTTA": 3008, + "GAAATTTG": 3009, + "TAATGGG": 3010, + "TAAAATTTT": 3011, + "CTGTTCA": 3012, + "CCTGTTA": 3013, + "TACTGAA": 3014, + "TGACCCA": 3015, + "TGATTTTA": 3016, + "CTCCTTA": 3017, + "TATAGAA": 3018, + "CTGCGG": 3019, + "GCGGTA": 3020, + "GTGCTAA": 3021, + "CAGAGGAA": 3022, + "TACATCA": 3023, + "TCAATCAA": 3024, + "CTGCAGCC": 3025, + "TGAATATT": 3026, + "TCTACAA": 3027, + "CCACATA": 3028, + "CCCGTT": 3029, + "TATACACA": 3030, + "TCCTCTC": 3031, + "TCTACTT": 3032, + "CCGGAA": 3033, + "CTTTTTTA": 3034, + "GAAAGAAAA": 3035, + "CTATCTT": 3036, + "GACTTTG": 3037, + "TGAACAA": 3038, + "GCAGTTTT": 3039, + "GCTAAAAA": 3040, + "GAGGCGG": 3041, + "TAATAAAAA": 3042, + "CTGGTCA": 3043, + "CAGACAA": 3044, + "GGATATG": 3045, + "TGAAGG": 3046, + "GCCAGAA": 3047, + "CCAGGCC": 3048, + "CCACCATG": 3049, + "CAAACTT": 3050, + "TCATGTA": 3051, + "GCTGCTT": 3052, + "GTAATA": 3053, + "CCCCCAA": 3054, + "CAGCCTG": 3055, + "TCAACTT": 3056, + "TAAAATTAA": 3057, + "GCTGAAAA": 3058, + "CGACGA": 3059, + "GTGGGCA": 3060, + "TGAGGGA": 3061, + "CGCTCC": 3062, + "TTTTGTTTT": 3063, + "GAGTCAA": 3064, + "TCATGCA": 3065, + "CTGCTTA": 3066, + "TAAGTTTT": 3067, + "GTAGCAA": 3068, + "CCTTGG": 3069, + "TGACAAAA": 3070, + "CTGGTAA": 3071, + "TCTTTATA": 3072, + "TGTGTGTT": 3073, + "CTGGTC": 3074, + "CTGGCAA": 3075, + "CATTTCTG": 3076, + "CTCTACC": 3077, + "CTGAGGA": 3078, + "CTAAAATG": 3079, + "CTAGATT": 3080, + "GTATCAA": 3081, + "CAGTCAA": 3082, + "CTGGGTG": 3083, + "CCTCTTA": 3084, + "TGAGTTTT": 3085, + "TTTTATTTA": 3086, + "CCTTTTTT": 3087, + "TATATACA": 3088, + "TAGCAAA": 3089, + "AAATTA": 3090, + "CTGGATG": 3091, + "GATAATA": 3092, + "GACAAAAA": 3093, + "CCTGGGA": 3094, + "GCTTTCA": 3095, + "GTACAGG": 3096, + "GCTGGAA": 3097, + "CTACTCA": 3098, + "CAATGTA": 3099, + "GCGTGAA": 3100, + "GATCCTT": 3101, + "TATTAATG": 3102, + "GCCCGA": 3103, + "TAAAGTG": 3104, + "GCTTCCA": 3105, + "CATGGAA": 3106, + "TGAAGTT": 3107, + "CTTTCTC": 3108, + "TCTGTGTG": 3109, + "GTATGTA": 3110, + "CAATACA": 3111, + "TCAAGG": 3112, + "CCTCTAA": 3113, + "TGTGGG": 3114, + "GATCTGA": 3115, + "GTACTGA": 3116, + "TTAATTAA": 3117, + "GCAGAAAA": 3118, + "CTACATA": 3119, + "CCGGTG": 3120, + "GGGGAAAA": 3121, + "TACAAAAAA": 3122, + "TTTTGG": 3123, + "GTGAGAA": 3124, + "TCAATAAA": 3125, + "TCAAGTT": 3126, + "CTCAGGA": 3127, + "CTACTC": 3128, + "CAAATCA": 3129, + "GGCAGAA": 3130, + "CCCGAA": 3131, + "TGTTGTG": 3132, + "GAGCAAAA": 3133, + "TATTTGTG": 3134, + "GTAGGTT": 3135, + "CTACCTG": 3136, + "CACAAAAA": 3137, + "CTCAGG": 3138, + "GCTTTA": 3139, + "CAGAGCAA": 3140, + "CTCAGTG": 3141, + "GGAAGAGA": 3142, + "TAACCTG": 3143, + "GAAATATA": 3144, + "CGAGAA": 3145, + "GTGAGG": 3146, + "CATTTATA": 3147, + "GGCAGCA": 3148, + "TCTAAATT": 3149, + "CCCAGTG": 3150, + "GCCTAGG": 3151, + "TGCATTA": 3152, + "CCGTAA": 3153, + "CATTCCA": 3154, + "CTAGTTA": 3155, + "GACTTAA": 3156, + "CTATACA": 3157, + "GACACAA": 3158, + "TCTTCACA": 3159, + "CCGGTT": 3160, + "TAAAGTAA": 3161, + "CTGTGGA": 3162, + "TAAGGTG": 3163, + "TCCAGTA": 3164, + "CAAATTTA": 3165, + "AAATTAAAA": 3166, + "CCATCTA": 3167, + "CTCCCTT": 3168, + "CTCCTTTT": 3169, + "GAGAGAGAGAGA": 3170, + "GGAGATA": 3171, + "CCTATTA": 3172, + "CACCAAAA": 3173, + "CCGTTA": 3174, + "TGTTTATA": 3175, + "CTCAGGAGG": 3176, + "GACGTA": 3177, + "GTCCTTA": 3178, + "GAAAGTT": 3179, + "GCTGGTG": 3180, + "CTCTACA": 3181, + "CAATAGA": 3182, + "TAAAATATT": 3183, + "GTACCTG": 3184, + "GTACTAA": 3185, + "CTTTGAAA": 3186, + "CCTTTCC": 3187, + "TAAAAATTA": 3188, + "CTCGG": 3189, + "CAAGATA": 3190, + "CATTTGA": 3191, + "CACCTCA": 3192, + "GCCAGCC": 3193, + "GTCGG": 3194, + "GCACATA": 3195, + "CACTCAA": 3196, + "CTTTTAAAA": 3197, + "CAGGAATT": 3198, + "GCCTATT": 3199, + "TCTTTCTG": 3200, + "CTGAGGCAGGAGAA": 3201, + "CAGGCAGG": 3202, + "CTAGTAA": 3203, + "TCCATA": 3204, + "GAACTTA": 3205, + "CG": 3206, + "GCTGTGA": 3207, + "GAAAATA": 3208, + "TCTTCATT": 3209, + "GAGGGAGA": 3210, + "CCCATCC": 3211, + "GAGGTGGG": 3212, + "GCCTCTA": 3213, + "GTAGGTG": 3214, + "TAAACCA": 3215, + "GAAGGAAA": 3216, + "TATTGG": 3217, + "ATG": 3218, + "TCCAGTT": 3219, + "CCCACAA": 3220, + "GAAACACA": 3221, + "GTCTCAAAA": 3222, + "CTTTTCTTTT": 3223, + "TGAAGGA": 3224, + "TATTGATT": 3225, + "CTATGTA": 3226, + "AAAAAAAAAAAAAA": 3227, + "TCCTTAAA": 3228, + "GCGCTA": 3229, + "TCCACTT": 3230, + "GACTCAA": 3231, + "TAAATACA": 3232, + "TCATGGA": 3233, + "TCTGGGA": 3234, + "TCCTATG": 3235, + "CTGTGCA": 3236, + "TCAAGTGA": 3237, + "TCATAAAA": 3238, + "CATCCAA": 3239, + "CCTTCCA": 3240, + "CTGTACA": 3241, + "GAAGGTT": 3242, + "CTGTGTA": 3243, + "GTCACTT": 3244, + "TCACAAAA": 3245, + "TCAGGCA": 3246, + "GTGTTAAA": 3247, + "CCCTTAA": 3248, + "CAAAGTG": 3249, + "GAAATGTT": 3250, + "CTGGGGA": 3251, + "GACGCC": 3252, + "TATATGTG": 3253, + "CTAGATG": 3254, + "GAAATTAAA": 3255, + "GAATGCA": 3256, + "GCACTAA": 3257, + "CGGGAGG": 3258, + "GCCACAA": 3259, + "CGCTTA": 3260, + "TCCACAA": 3261, + "CAGATA": 3262, + "TCTGAATT": 3263, + "TATTATTTT": 3264, + "GCGCGG": 3265, + "CTCTGAAA": 3266, + "TCTCTTTG": 3267, + "TATTTCTA": 3268, + "GGGGTGGG": 3269, + "GGATGCA": 3270, + "CCACACC": 3271, + "TAAATGTG": 3272, + "TCTTCCTG": 3273, + "GCAAGG": 3274, + "CTGCTCC": 3275, + "CTGGAGTG": 3276, + "CTGTTAAA": 3277, + "CACACAAA": 3278, + "CTGACTT": 3279, + "GAAAAGAAAA": 3280, + "CCTTCTCC": 3281, + "GAAATAAAA": 3282, + "CCTCAGGTGA": 3283, + "GATAATG": 3284, + "GAATTGCTT": 3285, + "CCAAAATT": 3286, + "CGTGAAA": 3287, + "CACTGAAA": 3288, + "CAGTGAAA": 3289, + "GATCTTA": 3290, + "GAGATGGG": 3291, + "TCTGCCA": 3292, + "TGAGGTA": 3293, + "TATGGAA": 3294, + "TATATTTTA": 3295, + "TGAACTT": 3296, + "GCAGATA": 3297, + "CTTTTCTT": 3298, + "GTAAAATG": 3299, + "TCTCTAA": 3300, + "TCTGCAAA": 3301, + "GAGCCTT": 3302, + "TATCATT": 3303, + "CAATTTTA": 3304, + "CCGCCA": 3305, + "TATTTAAAA": 3306, + "GAGAGATG": 3307, + "GAGATGGA": 3308, + "GCCAGGATG": 3309, + "CGAGTAGCTG": 3310, + "TTCATTTT": 3311, + "TATACTT": 3312, + "GTCTACA": 3313, + "GTGAGTGA": 3314, + "GCTACACA": 3315, + "GGGAGGA": 3316, + "CAAGGCA": 3317, + "GCTTTTAA": 3318, + "CACTATT": 3319, + "GTTCATA": 3320, + "TCCTC": 3321, + "GTGGACA": 3322, + "TATTTGGA": 3323, + "CTCCAGTA": 3324, + "GTTCAGTT": 3325, + "CCAAGG": 3326, + "CAGAGCC": 3327, + "CTCGCC": 3328, + "CCGATG": 3329, + "GGAATTTT": 3330, + "TCCAGCC": 3331, + "CCTCTTTT": 3332, + "GAACCTT": 3333, + "CATGCACA": 3334, + "GTTTC": 3335, + "GAAGATA": 3336, + "TACCCC": 3337, + "GCTGCCA": 3338, + "GGGGGAGG": 3339, + "GCAGTGAGCTGA": 3340, + "CTGTCTA": 3341, + "CGAGGA": 3342, + "CAATGGG": 3343, + "GCTGTGAA": 3344, + "GAAAGTG": 3345, + "TACCAAAA": 3346, + "GTCAGG": 3347, + "CAGCTCC": 3348, + "TGTGCTT": 3349, + "GTCTAGG": 3350, + "TTTTTGTA": 3351, + "TTATATG": 3352, + "TCAGGGG": 3353, + "TATTGTTA": 3354, + "CCTGAGA": 3355, + "TATCTCA": 3356, + "CAATCTG": 3357, + "CACTCTG": 3358, + "GATTTAA": 3359, + "TGAATAA": 3360, + "TCTTGTA": 3361, + "TCAACTG": 3362, + "TCTCCAGG": 3363, + "CTAGAGG": 3364, + "CTGAGAAA": 3365, + "CTAGCTG": 3366, + "TCCACCA": 3367, + "CGATTTT": 3368, + "CCGGCC": 3369, + "GTTGACA": 3370, + "CTTAGAA": 3371, + "CATAATG": 3372, + "GAGTATT": 3373, + "CACAGAAA": 3374, + "GACTGTG": 3375, + "CTATTTTA": 3376, + "TGAGGAAA": 3377, + "TTATTAAAA": 3378, + "CTTATTTA": 3379, + "CAGACTT": 3380, + "CACGCC": 3381, + "GCTTGG": 3382, + "CCTGCTT": 3383, + "TAAAGCAA": 3384, + "CCTCGTGA": 3385, + "TAGAATT": 3386, + "CTTACAA": 3387, + "TAAAGGAA": 3388, + "GTCTAGA": 3389, + "GTGACTT": 3390, + "TACATATG": 3391, + "GTCAGGA": 3392, + "GCTCCAGG": 3393, + "GAAGGGA": 3394, + "CATGATG": 3395, + "TCATCAAA": 3396, + "CGTTAAA": 3397, + "GTACTCA": 3398, + "CTCCCAA": 3399, + "TATATGTA": 3400, + "GGTATTTT": 3401, + "TAAGCCA": 3402, + "CGAAATT": 3403, + "GTTTGTTTT": 3404, + "TCTGTCTT": 3405, + "TATATCA": 3406, + "TGTTCATT": 3407, + "CAAACCA": 3408, + "TTCATTA": 3409, + "TATTTGTA": 3410, + "GATTGAA": 3411, + "CTATAAAA": 3412, + "GATTAATT": 3413, + "CCCACCA": 3414, + "TCCTAGG": 3415, + "TAAATGTA": 3416, + "CTCTTAAA": 3417, + "GCAGTCC": 3418, + "GCGGCTG": 3419, + "GTCTCGAA": 3420, + "TGAATGA": 3421, + "CTGGGGG": 3422, + "GTCTCGA": 3423, + "GAACAAAA": 3424, + "TGAATCA": 3425, + "TGTATTTTTAGTAGAGA": 3426, + "GTTATTAA": 3427, + "TTTTTTAAAA": 3428, + "GTCAGTG": 3429, + "CCCATTA": 3430, + "CACAGGA": 3431, + "TATTCCTT": 3432, + "TCTGCCTT": 3433, + "CCTGGTG": 3434, + "GCGAGC": 3435, + "TACTAAA": 3436, + "TACACAAA": 3437, + "CCGTCC": 3438, + "GCTTTGTT": 3439, + "GCATCCA": 3440, + "CATCTAA": 3441, + "GCTGTGTT": 3442, + "GTAGACA": 3443, + "GCCTATG": 3444, + "TCTTTGTG": 3445, + "GATTCTG": 3446, + "CGCCCGG": 3447, + "GATGAGA": 3448, + "TATCTGA": 3449, + "TGAATTTG": 3450, + "CCTGATG": 3451, + "TAAAACAA": 3452, + "CTTTAGG": 3453, + "TTTTCCTT": 3454, + "TGAATAAA": 3455, + "CGGGGA": 3456, + "CAAACATT": 3457, + "GTATGGA": 3458, + "GCTTAAAA": 3459, + "TACCAAA": 3460, + "CAAAGAGA": 3461, + "CTCCTGCC": 3462, + "GTAAAAAAA": 3463, + "CACAGCC": 3464, + "CCATGCA": 3465, + "TACAATT": 3466, + "CTAGTGA": 3467, + "CTGAGTT": 3468, + "GAGTGAAA": 3469, + "TCTGTTTG": 3470, + "CTGTAGG": 3471, + "TATAAAAAA": 3472, + "GCATTAAA": 3473, + "GTCCATA": 3474, + "TGTTAAAAA": 3475, + "TGTTTGA": 3476, + "GAATAGA": 3477, + "CTTCAAAA": 3478, + "CTGGACA": 3479, + "CTGTAGA": 3480, + "CCATTAAA": 3481, + "CTATCTG": 3482, + "CACTATG": 3483, + "TTATCAA": 3484, + "TAAGTAAA": 3485, + "TAATCCCAGCACTTTGGGAGGCC": 3486, + "CCAGAAAA": 3487, + "TGAAGCA": 3488, + "TCCCTTTT": 3489, + "TCATACA": 3490, + "TACGTT": 3491, + "GCCGTG": 3492, + "GGAAGTG": 3493, + "GGCCAAA": 3494, + "GTACCAA": 3495, + "TCTCTACTAAAAATA": 3496, + "CATTGTG": 3497, + "TGTGTGA": 3498, + "GAAACAGA": 3499, + "CTTGACA": 3500, + "GATGAGG": 3501, + "GAGATTTT": 3502, + "CCTTCAA": 3503, + "GAATCTA": 3504, + "CTCTCCTT": 3505, + "GGCGGA": 3506, + "TCTATCTATCTATCTA": 3507, + "CACACAGA": 3508, + "TGTGTGTA": 3509, + "CAAAGCC": 3510, + "TGTGCCA": 3511, + "GTTGAAAA": 3512, + "CTCCAGCA": 3513, + "TCAAGGA": 3514, + "TAGCTCA": 3515, + "CGCTGA": 3516, + "CCTGAAAA": 3517, + "GACTATT": 3518, + "GATTCCA": 3519, + "GCTTCTA": 3520, + "GTCTGCC": 3521, + "CTTGGCA": 3522, + "TGTGGTA": 3523, + "GCTTTGA": 3524, + "GCTCTCTG": 3525, + "CTCACAGA": 3526, + "TCTTTAAA": 3527, + "CAAAGCAA": 3528, + "TACTTAA": 3529, + "GCTTCAA": 3530, + "CATTGAA": 3531, + "GGAGGAAA": 3532, + "CTATAGA": 3533, + "CTGAGGAA": 3534, + "CCTGGCA": 3535, + "CCCTATT": 3536, + "CTCGTG": 3537, + "TTACACA": 3538, + "TTAGGAA": 3539, + "CTGGTTA": 3540, + "GTTGTCC": 3541, + "TAATGAAAA": 3542, + "TATTTACA": 3543, + "GGGAATT": 3544, + "GTAGTTTT": 3545, + "GCTGCAA": 3546, + "CTACGG": 3547, + "GCCGGA": 3548, + "CTGGGCA": 3549, + "CCTTAAAA": 3550, + "GATGGAA": 3551, + "TAGATAGATAGATAGA": 3552, + "TATGTAA": 3553, + "GTACGG": 3554, + "TATTCAAA": 3555, + "GATCTCC": 3556, + "CCTGTTTT": 3557, + "TATTGCA": 3558, + "GGAAGGAAGGAAGGAA": 3559, + "GGTAATT": 3560, + "TTACAGA": 3561, + "TCAGC": 3562, + "GCAAAATG": 3563, + "GAGAGCA": 3564, + "GTAGAAAA": 3565, + "CATTTGAA": 3566, + "TCTTCTTTT": 3567, + "TCCCATA": 3568, + "GTTATTTA": 3569, + "CTATCTA": 3570, + "CATCCTG": 3571, + "TCTTGTG": 3572, + "TTATTATT": 3573, + "CCCGTC": 3574, + "TACTATG": 3575, + "TAAACATA": 3576, + "TAAGGAAA": 3577, + "GCTTGTG": 3578, + "CTCTAAAA": 3579, + "GTTTTAAAA": 3580, + "GACAGGA": 3581, + "TCCTAGA": 3582, + "TCCACCCA": 3583, + "GTTTGAAA": 3584, + "CCATCTCA": 3585, + "CTAAGAA": 3586, + "GTATCTA": 3587, + "GTGAGGA": 3588, + "GCTGGAGG": 3589, + "CCTGTAATCCCAGCTA": 3590, + "GCAACAA": 3591, + "CTTTCAAA": 3592, + "CAAATGTT": 3593, + "CTTGTCC": 3594, + "TCTCAAAAA": 3595, + "TATTTATTA": 3596, + "TAAGGCA": 3597, + "GAGAGGAA": 3598, + "TATGATT": 3599, + "GCATCTA": 3600, + "CGTTATT": 3601, + "GCCTGTA": 3602, + "GTTTCAAA": 3603, + "CCTTCCTTCCTTCCTT": 3604, + "GGCTTTG": 3605, + "GTCAGAA": 3606, + "CATGCATG": 3607, + "GTCATTTA": 3608, + "CTGGAAAA": 3609, + "CTTCGA": 3610, + "CCTATTTT": 3611, + "CCAACAA": 3612, + "TCCATCC": 3613, + "TAAAGTTA": 3614, + "GTCTCTC": 3615, + "TAATCAAA": 3616, + "GATTTTTG": 3617, + "GATTTCTT": 3618, + "GGGCTGA": 3619, + "GCATGTA": 3620, + "CCTGGGTT": 3621, + "GAGACAA": 3622, + "GCTGTCA": 3623, + "TGATAGG": 3624, + "GGAGACC": 3625, + "CCGGCA": 3626, + "TAATCTCA": 3627, + "TGAATTAA": 3628, + "TCTGGTG": 3629, + "GCCTC": 3630, + "GGCGCA": 3631, + "CCAGCTA": 3632, + "CAGTCTG": 3633, + "TGAACTA": 3634, + "GTAAGAA": 3635, + "CCTTTCA": 3636, + "TCCATGA": 3637, + "CAAAGGAA": 3638, + "CTCTC": 3639, + "CTCTCTCA": 3640, + "CTCCAGC": 3641, + "GTAGATA": 3642, + "CCCCCTCC": 3643, + "GGCGCC": 3644, + "TCTGTCC": 3645, + "GACCATT": 3646, + "CTTGAAAA": 3647, + "TTATCC": 3648, + "TACATGTG": 3649, + "CAAATTTG": 3650, + "TTTTGTG": 3651, + "CAGAGTG": 3652, + "GTAATAA": 3653, + "GTGAGTG": 3654, + "TTTTTCC": 3655, + "GGCTCTG": 3656, + "GCCCTAA": 3657, + "GGCTGTT": 3658, + "CCCAATT": 3659, + "CAGAGCTT": 3660, + "TATAAATG": 3661, + "GAGTCTG": 3662, + "TCTTAAAAA": 3663, + "GTTTTATG": 3664, + "GATCCAA": 3665, + "GGCCCTG": 3666, + "GATCCTG": 3667, + "TCAAGTG": 3668, + "GATTCAA": 3669, + "CCTCTCTT": 3670, + "GAGACGG": 3671, + "CAGATCA": 3672, + "TAAAAGAA": 3673, + "CTGAGCAA": 3674, + "CCTGCCA": 3675, + "CCTTCTA": 3676, + "CGCTCA": 3677, + "GGCTGTG": 3678, + "TGGGAAAA": 3679, + "GGAGCCTG": 3680, + "CTGAGTG": 3681, + "CGTCAAA": 3682, + "TCAAGTA": 3683, + "CGTAATT": 3684, + "TTACTTA": 3685, + "TATACTA": 3686, + "GGGCAAA": 3687, + "CAACTTTT": 3688, + "CTTTGCC": 3689, + "GCCAGGAA": 3690, + "CACACTA": 3691, + "GCCCAGC": 3692, + "TAAATAAATAAATAAA": 3693, + "CTTTCCTT": 3694, + "GGGAGAA": 3695, + "TATGGTA": 3696, + "CGGCCA": 3697, + "CCTCTCTG": 3698, + "GAAAGCAA": 3699, + "CAAGCCA": 3700, + "GGCGTT": 3701, + "CTCTTTTA": 3702, + "TCGGCCTCCCAAA": 3703, + "GATTTATT": 3704, + "CAAGTCC": 3705, + "TATCTTA": 3706, + "GTTCAAGACCA": 3707, + "CTCACACA": 3708, + "GAAATCAA": 3709, + "TGAGACC": 3710, + "GGGTAAA": 3711, + "GCTTGTT": 3712, + "GATTTTAA": 3713, + "TTTTTATA": 3714, + "CAGAGCTG": 3715, + "TCTGTTAA": 3716, + "GTAATTAA": 3717, + "TCTTTGAA": 3718, + "CTTGCCA": 3719, + "TTTTCATT": 3720, + "CCATGTA": 3721, + "TCTCGGCTCACTGCAA": 3722, + "GGATTCA": 3723, + "TCTATTAA": 3724, + "TACATAAA": 3725, + "GATTGATT": 3726, + "GGAGAGGA": 3727, + "CGCAAAA": 3728, + "GGACTAA": 3729, + "TTATGTG": 3730, + "GTCACTCA": 3731, + "GACAGCA": 3732, + "CGAGTT": 3733, + "GATGGTT": 3734, + "GGAAGAGG": 3735, + "GCCAACATGGTGAAA": 3736, + "GGAGCCA": 3737, + "TGAACTG": 3738, + "CCTCTGTG": 3739, + "GTATAAAA": 3740, + "TCCCAGAA": 3741, + "CATTTATG": 3742, + "GATTATG": 3743, + "TGTTTCTG": 3744, + "GAGTGGGTT": 3745, + "TACATATT": 3746, + "CTCCAGGA": 3747, + "GACACTG": 3748, + "GGTCTCA": 3749, + "CCGGGA": 3750, + "TGTTTAAA": 3751, + "CTCACCA": 3752, + "GGACTTA": 3753, + "GCCCACC": 3754, + "CAAATCAA": 3755, + "GAAATGTG": 3756, + "TAGTTAA": 3757, + "TCTATAA": 3758, + "TTAGATT": 3759, + "GTGTAGG": 3760, + "TACTGAAA": 3761, + "GCACCCA": 3762, + "GTGGGCTG": 3763, + "GAATGAAA": 3764, + "TCTAGTT": 3765, + "TCAGGAGA": 3766, + "TCCACTA": 3767, + "CTCAGTT": 3768, + "TACTTAAA": 3769, + "GACTCCA": 3770, + "TCCATTTG": 3771, + "CACAGCAA": 3772, + "GCTCATGCCTG": 3773, + "GGTGCTG": 3774, + "GCTTTCTT": 3775, + "GTGGCCA": 3776, + "TACGTG": 3777, + "GTGCAGTG": 3778, + "TGAAGTCA": 3779, + "CCTTTAA": 3780, + "TCTCAGCTCACTGCAA": 3781, + "GAAATATG": 3782, + "CCTCAAAA": 3783, + "GGGGCGG": 3784, + "CGACAA": 3785, + "GGTGATG": 3786, + "GTCTTAAA": 3787, + "CAGAAATG": 3788, + "CGTCATT": 3789, + "CCAAGCA": 3790, + "GGATCAA": 3791, + "GTGCTGGGATTA": 3792, + "GCTGGCC": 3793, + "CGGAGCTT": 3794, + "TACATGA": 3795, + "TGTTTGAA": 3796, + "TCTCCATT": 3797, + "TAAGCAAA": 3798, + "CCTTTCTT": 3799, + "TACTGTT": 3800, + "TCCATCTT": 3801, + "CTTACTT": 3802, + "CGGAGGTT": 3803, + "CAAAACAA": 3804, + "TCATAGG": 3805, + "TTACTAA": 3806, + "CTTATTTG": 3807, + "GAATGTA": 3808, + "CCCCATGGA": 3809, + "TTACTGA": 3810, + "CGGAAAA": 3811, + "CTCCAGTG": 3812, + "TGTTCCA": 3813, + "CAGATGAA": 3814, + "GTTGATA": 3815, + "TCCCCCC": 3816, + "CATTGCA": 3817, + "CTCAGCC": 3818, + "CTTACTG": 3819, + "TATCCTT": 3820, + "CTTTTATG": 3821, + "TGAGTAGCTG": 3822, + "GACTGAAA": 3823, + "CAATGAAA": 3824, + "CGACTG": 3825, + "CTTGGGA": 3826, + "GCAAGCA": 3827, + "TCACTCC": 3828, + "GATTTGA": 3829, + "CATTTTAAA": 3830, + "TCAACTA": 3831, + "GTCCAAAA": 3832, + "CACCCTG": 3833, + "TTACCTT": 3834, + "CAAGGGG": 3835, + "TTTTGGA": 3836, + "GTTATTTG": 3837, + "GCTACTG": 3838, + "CTGAGGCAGGAGAATG": 3839, + "GTGATGA": 3840, + "GTAGTC": 3841, + "TAGTATG": 3842, + "GTATAGA": 3843, + "GTGTCTA": 3844, + "GCTGCTA": 3845, + "TTAGTAA": 3846, + "TAAACATG": 3847, + "GTCACCA": 3848, + "CATCTTTT": 3849, + "CATATAA": 3850, + "TCTCTCTA": 3851, + "TTTTATTAA": 3852, + "TATTCTAA": 3853, + "GAAATTTA": 3854, + "CTTCCCTG": 3855, + "TAAAGATG": 3856, + "TACGTA": 3857, + "GTTTATTA": 3858, + "GAAAAGAA": 3859, + "CCCACCCA": 3860, + "CAATTAAAA": 3861, + "CCGACA": 3862, + "CAAAGTGA": 3863, + "CAAACAAAA": 3864, + "GCAATTTT": 3865, + "CGATTAA": 3866, + "TTAGAGA": 3867, + "CTGATGA": 3868, + "GGAGGAGG": 3869, + "GTCCTGGG": 3870, + "TCATGAAA": 3871, + "GCAACCA": 3872, + "GTTGGCA": 3873, + "GCGGCGG": 3874, + "GTCCCCA": 3875, + "GTAGGGG": 3876, + "GCCATGTT": 3877, + "GTTCGAGA": 3878, + "GCCTATA": 3879, + "TAAATTCA": 3880, + "GGCCATT": 3881, + "GAAAACAA": 3882, + "TGTGTATG": 3883, + "GTACTC": 3884, + "TAGGGAA": 3885, + "CCTTGAA": 3886, + "TCTATTTG": 3887, + "GAGGGCA": 3888, + "GAAACTGA": 3889, + "TACGC": 3890, + "TACAAAAA": 3891, + "TCATTATT": 3892, + "GGAAAATT": 3893, + "TCAATATT": 3894, + "CCCGTA": 3895, + "GGAGAGAA": 3896, + "TTAGTTA": 3897, + "CTCAGAGA": 3898, + "TCGAGC": 3899, + "CTAGTCA": 3900, + "GATGGCA": 3901, + "TGAACATT": 3902, + "CTATGGG": 3903, + "CACACCA": 3904, + "TCAATTAA": 3905, + "GGAACTG": 3906, + "TTACATG": 3907, + "CTTTCATT": 3908, + "CAGCTCTG": 3909, + "TCTTTTTTTT": 3910, + "TAAATCTT": 3911, + "TGATCTA": 3912, + "CATACAA": 3913, + "GCTCAAAA": 3914, + "GCTGTGTG": 3915, + "TCAATCA": 3916, + "GATTTGAA": 3917, + "CCAAGGA": 3918, + "GTCCTCA": 3919, + "GTGCTCC": 3920, + "AAAATAA": 3921, + "GTGACAA": 3922, + "GCTCACGCCTG": 3923, + "CGACGG": 3924, + "TATCCAA": 3925, + "CACACATG": 3926, + "TCTCTCTCC": 3927, + "TGTGGTT": 3928, + "CTTGGTA": 3929, + "TCTGGTT": 3930, + "TTTATAA": 3931, + "CTGCTTTT": 3932, + "TGTGTCA": 3933, + "CACATCA": 3934, + "CCTAATG": 3935, + "CGTTTTTT": 3936, + "GCTGGCA": 3937, + "GACGTC": 3938, + "TATAATTA": 3939, + "TACAGTAA": 3940, + "GAAAGTAA": 3941, + "GTCTGAAA": 3942, + "CCCATTTT": 3943, + "TATATGA": 3944, + "CTTGATA": 3945, + "CTTTATTTT": 3946, + "CTTTATTA": 3947, + "GGCGAA": 3948, + "CCATGCC": 3949, + "CCTGCCTT": 3950, + "GAAGAAGAAGAA": 3951, + "CTGACTGA": 3952, + "GCCCTTA": 3953, + "TATCTAA": 3954, + "GTGTTTTA": 3955, + "TGTGGCA": 3956, + "TATTGTAA": 3957, + "GCCAGAAA": 3958, + "CCCTGTCTC": 3959, + "CACAGGAA": 3960, + "AAAACAA": 3961, + "AAAAAAAAAAAAAAA": 3962, + "TAACTCC": 3963, + "GCCTAAA": 3964, + "CGAGTA": 3965, + "TAGTATT": 3966, + "GTATTTTTAGTAGAGA": 3967, + "GCTGCAGG": 3968, + "TATTGAAA": 3969, + "CCAGCCTGGG": 3970, + "GCTCCAAA": 3971, + "TACGAA": 3972, + "GGCCTCC": 3973, + "TATACAAA": 3974, + "CATGGCA": 3975, + "CATGCAA": 3976, + "TACACCA": 3977, + "CTTTACCA": 3978, + "TACAGAGA": 3979, + "TATTCTTA": 3980, + "TATGTCA": 3981, + "TCAAGCA": 3982, + "TCAATGA": 3983, + "GGCTCTT": 3984, + "GGAAGTT": 3985, + "TCCATGTT": 3986, + "GCTTTCC": 3987, + "TATGTGA": 3988, + "GTGTAGA": 3989, + "TTTTTAAAA": 3990, + "GCTGGAGA": 3991, + "GTGAGAGA": 3992, + "CCTAGAA": 3993, + "CCTCCAAA": 3994, + "CCAATGA": 3995, + "CAGGGCA": 3996, + "CTATGCA": 3997, + "CTTCACC": 3998, + "CTACAAAA": 3999, + "CTCACC": 4000, + "GAGTATG": 4001, + "TAGAAAAA": 4002, + "CTTTTGAA": 4003, + "TAAAGAGA": 4004, + "CATGTCA": 4005, + "TCTTTTAAA": 4006, + "CACAGTGA": 4007, + "GATCTAA": 4008, + "TAAGGTA": 4009, + "CATAGAA": 4010, + "CGCGCC": 4011, + "CAGCTTA": 4012, + "TATAGTT": 4013, + "CGGGCC": 4014, + "TATCCATT": 4015, + "TGTTTGTTTT": 4016, + "GCTGGCTG": 4017, + "TACAGGA": 4018, + "CTCCTTTG": 4019, + "CAATCTA": 4020, + "CCCCCTG": 4021, + "TATACTG": 4022, + "CTGAGCC": 4023, + "CGGTTA": 4024, + "TGAAGTG": 4025, + "GCTTCCTT": 4026, + "TTTTATTTG": 4027, + "TAGTGAA": 4028, + "CTGAGGTG": 4029, + "TCTTCTC": 4030, + "GACAGAAA": 4031, + "CTGAACTGAA": 4032, + "CCTGGGAA": 4033, + "TCCCCAAA": 4034, + "TATGTATT": 4035, + "GATTTCTG": 4036, + "CATTCAAA": 4037, + "CACAGTT": 4038, + "GCTTGAA": 4039, + "GTGGATCA": 4040, + "CTGAGTGA": 4041, + "TGAATTTA": 4042, + "TCAACAAA": 4043, + "GGTCATT": 4044, + "GTAATTTA": 4045, + "GCGACTT": 4046, + "CTGAGAGA": 4047, + "GTGCCCA": 4048, + "CTAGGTT": 4049, + "TCCTGAAA": 4050, + "GTCCACC": 4051, + "TCACAGAA": 4052, + "GCGAAAA": 4053, + "GTATGGG": 4054, + "TGAACAAA": 4055, + "TAAACAAAA": 4056, + "CCGTTTT": 4057, + "TCTCAATT": 4058, + "TCCAGAAA": 4059, + "GTAACAA": 4060, + "GCATTTTA": 4061, + "TCTCCATG": 4062, + "TTATAAAA": 4063, + "CAGGCAA": 4064, + "CTAAAAAAA": 4065, + "GTTGGGA": 4066, + "TAAAGATT": 4067, + "TGAAGAGA": 4068, + "CCCCTCA": 4069, + "TGTTTATG": 4070, + "TCTACTG": 4071, + "CCAATTTT": 4072, + "GGTGGTG": 4073, + "GGAACAA": 4074, + "TGTGGGA": 4075, + "TCTGCTA": 4076, + "GAACGA": 4077, + "GTAAGTA": 4078, + "GTTGCCA": 4079, + "AAAATTTT": 4080, + "GCGCGA": 4081, + "GAAAGATG": 4082, + "GTCTCTCA": 4083, + "TCCATCAA": 4084, + "GCAGCTA": 4085, + "CACATTTG": 4086, + "CTGACAA": 4087, + "TCCACC": 4088, + "GCT": 4089, + "CCCACTT": 4090, + "GCAGGTA": 4091, + "GAGGCCA": 4092, + "TAAAGTCA": 4093, + "CTGGATA": 4094, + "CGGCAA": 4095 + }, + "merges": [ + "A A", + "T T", + "T G", + "C A", + "C C", + "T A", + "G G", + "T C", + "G A", + "AA A", + "G C", + "T AA", + "TT TT", + "T CA", + "TG A", + "TT A", + "G AA", + "T CC", + "C AA", + "C TG", + "C TT", + "G TG", + "G TT", + "G CA", + "GG A", + "C CA", + "G TA", + "G CC", + "C TA", + "T AAA", + "AA AA", + "C TC", + "G TC", + "TG TG", + "TA TT", + "CA CA", + "G AAA", + "TA TA", + "TC TT", + "TG TT", + "C AAA", + "GA GA", + "CA TT", + "TG AA", + "CA GG", + "TC TG", + "CA GA", + "TC AA", + "GG AA", + "TAA AA", + "C TGA", + "GC TT", + "G TGA", + "GC TG", + "C TCA", + "CC TT", + "CA TG", + "GC AA", + "G TCA", + "G TAA", + "TTTT A", + "TA TG", + "GA GG", + "C GG", + "GA TT", + "CC TG", + "TC TC", + "CC AA", + "G TTA", + "C TCC", + "C TAA", + "TA CA", + "C TTA", + "TC CA", + "GA TG", + "TT AA", + "GAA AA", + "TT TG", + "G TTTT", + "TC TA", + "GC CA", + "G TCC", + "C TTTT", + "GG GG", + "C GA", + "TT TA", + "CC CA", + "CAA AA", + "TG GG", + "TA GA", + "TA GG", + "GA CA", + "GG TT", + "CC CC", + "GG TG", + "CA TA", + "GC TA", + "TG TA", + "TC AAA", + "TG GA", + "TAA TT", + "TTA TT", + "TG CA", + "GG CA", + "GA TA", + "CC TA", + "TT CA", + "TC TCA", + "GG GA", + "C GC", + "CTG AA", + "G TAAA", + "TC TCC", + "TTTT TT", + "C GTG", + "GC AAA", + "TAA AAA", + "TC TGA", + "TCA TT", + "GG AAA", + "TG AAA", + "TCC TT", + "CC AAA", + "GAA TT", + "C TAAA", + "C GTT", + "GTG AA", + "GG CC", + "TAA TA", + "GG TA", + "TG CC", + "CA CC", + "TGA TT", + "AAAA AA", + "GC TCA", + "TCC AA", + "GA GAA", + "CTG TT", + "TA TTA", + "CA GCA", + "CTC TT", + "CTT AA", + "CA GAA", + "GC TGA", + "GTT AA", + "TC TTA", + "TA TTTT", + "GCC AA", + "CTT TG", + "GA CC", + "C GCA", + "GTA TT", + "GTC TT", + "CAA TT", + "GTG TT", + "CTC AA", + "GGA GG", + "C GAA", + "TC TTTT", + "GTC AA", + "C GCC", + "TA TAA", + "TA CC", + "TC TAA", + "CCA TT", + "C GGA", + "CAA AAA", + "CA GTG", + "TCC TG", + "CTC TG", + "GAA AAA", + "CTG TG", + "CA GC", + "TTTT AA", + "GCA TT", + "GCC TT", + "TAA TG", + "CTA TT", + "GTT TG", + "TGA TG", + "GG CTG", + "CC TCA", + "GA GGA", + "GCC TG", + "AAA TT", + "C GTA", + "TC AAAA", + "TA CAA", + "CA TCA", + "CA GTT", + "TGA GA", + "GG GAA", + "CA CTG", + "CA CAA", + "CA GGA", + "CC CCA", + "CC CTG", + "TTTT TTTT", + "TA GAA", + "GA GCA", + "CC TCC", + "CA CCA", + "TA TCA", + "GA GC", + "CA TTA", + "CACA CACA", + "GA GTG", + "GGA TT", + "TGTG TGTG", + "TA CTT", + "CA CTT", + "GTC TG", + "TGA GG", + "GA GTT", + "GAA TG", + "TCA TG", + "GA CAA", + "GA CTT", + "TATT AA", + "TAA TAA", + "GG CCA", + "CA TTTT", + "CA GCC", + "CC CTT", + "GC TAA", + "TATA TATA", + "GTG TG", + "TA CTG", + "TA GTT", + "CAA TG", + "GC TC", + "CA GTA", + "GC TCC", + "CA TAA", + "TTA TG", + "TAAA TT", + "GA TGA", + "CA TGA", + "GC GG", + "AAAA AAAA", + "CCA TG", + "GA TAA", + "GA CTG", + "TA TGA", + "GCA GG", + "GA TCA", + "G TTTTA", + "GGA TG", + "CC TGA", + "G TAAAA", + "GAA GG", + "GA TTA", + "CC TC", + "GA CCA", + "GC TTA", + "CC CAA", + "AAA TG", + "GCA TG", + "TA GTA", + "TA CCA", + "GG CTT", + "C GTC", + "TC TCTT", + "GG TCA", + "TTA TTA", + "TA CTA", + "TA GCA", + "TA TC", + "CTG GG", + "CA TC", + "C TTTTA", + "C TAAAA", + "GTG GG", + "GA GTA", + "CCA GG", + "GA TTTT", + "TA GTG", + "GAAA TT", + "CA CTA", + "TC GG", + "TCA GG", + "CAGG AA", + "GC AAAA", + "CC TTA", + "CA TCC", + "CTT GG", + "TGTG AA", + "TATT TG", + "CC TAA", + "CTA TG", + "GA GAAA", + "GAGA GAGA", + "GC TTTT", + "TA TAAA", + "CAA GG", + "TC TCTG", + "TGTT AA", + "TGTG TT", + "GA GCC", + "GA CTA", + "TA TATT", + "TAA AAAA", + "TTTT TG", + "GTA TG", + "CATT AA", + "TA GGA", + "TA GC", + "GTT GG", + "GAA GAA", + "TAAA TG", + "TC TGTT", + "CA GAAA", + "CAAA TT", + "TAA TTA", + "TC TGTG", + "TA TCC", + "TGAA TT", + "CTC CA", + "GTG AAA", + "GG CAA", + "GGA GA", + "GAA GA", + "GG TGA", + "GG GCA", + "CC AAAA", + "TCTC TCTC", + "CTG CA", + "CTT CTT", + "TCTT AA", + "CC CTA", + "TGTG TG", + "AAA TA", + "TGTT TG", + "GG GTT", + "GTG CTG", + "GG AAAA", + "GG GGA", + "TCA GA", + "CC TTTT", + "GAAA TG", + "GCA GCA", + "TC TGAA", + "GG GTG", + "CACA TT", + "TCTT TG", + "GG GC", + "TCC CA", + "TC CATT", + "CTG AAA", + "CTT TA", + "TC GA", + "GTT TA", + "CAA CAA", + "CTT CC", + "GCC TCC", + "TT AAA", + "GC TCTG", + "GTT TCA", + "GGA GGA", + "C GTGA", + "CA GTC", + "GAA TA", + "CA GAGA", + "CC CTC", + "CAAA TG", + "CTG CTG", + "GA TCC", + "TTTTA TT", + "AAAA TT", + "TTA TA", + "TCAA TT", + "GG TAA", + "GTTA TT", + "GC CAGG", + "GGA GAA", + "CATT TG", + "TCA CC", + "CTC AAA", + "GG TTA", + "TCC AAA", + "TC TATT", + "GCA GA", + "CTT CA", + "TCA TCA", + "C GAGG", + "TAA CA", + "GTT GTT", + "CTTA TT", + "C GTCA", + "TAA GA", + "TAA TTTT", + "CTG TA", + "TC CACA", + "GC TGTG", + "C GCTG", + "TC TAAA", + "GC GA", + "CAA TA", + "CCA CCA", + "GAA CA", + "C GAAA", + "CAGA TT", + "TCA CA", + "TTA TTTT", + "TC TCAA", + "TGA CA", + "CTCC AA", + "AAAA AAA", + "TATA TG", + "TCC TCC", + "TCA CTT", + "TC CAGG", + "CAA GA", + "GG CTA", + "GTG GTG", + "C GTAA", + "C GAGA", + "TGA TA", + "GGA TTA", + "CAA CA", + "C GATT", + "TGA GAA", + "CTCC TT", + "CTCA TT", + "GTT AAA", + "TCA TA", + "CC TCTG", + "CTC TA", + "GC TGAA", + "CTG GA", + "TAA GG", + "CTT AAA", + "TATT TA", + "CCA CA", + "CC GG", + "GTC AAA", + "TG GAA", + "C GGAA", + "TGA TGA", + "GTT CA", + "TAA CAA", + "GC TGTT", + "TAA GAA", + "CTG CC", + "TTAA TT", + "CCA GA", + "TCA GAA", + "GTCA TT", + "C GCTT", + "GATT AA", + "CTGA TT", + "GC CACA", + "GTAA TT", + "TC CAGA", + "GCC AAA", + "GTGA TT", + "TAAAA TT", + "CAA GAA", + "CCA CC", + "TAA TCC", + "GTT CTT", + "TC CATG", + "GC TCTT", + "TG CTG", + "GG GTA", + "TTA CA", + "GC CATT", + "GCA CA", + "GCAA TT", + "TCC CTG", + "TG TGA", + "TC GAA", + "GGA CA", + "GGAA TT", + "GTG GA", + "CTT CTG", + "TCC CC", + "GCC CC", + "CTT GA", + "TAA TGA", + "TAAA TA", + "TATA TA", + "CTG CAA", + "TCA TTA", + "GTA TA", + "TCC CCA", + "C GTTA", + "GCA GAA", + "TGA GTT", + "CTTTT TT", + "C GATG", + "CTT TCA", + "AAAA TG", + "CAGG TT", + "CTAA TT", + "C GCCA", + "TGAA AAA", + "GTT CC", + "GTCC TT", + "GTCC AA", + "GTTTT TT", + "CTC TGA", + "GC GC", + "GTT GA", + "TGAA TG", + "CTA TA", + "GCA GTG", + "CCTT AA", + "TCA CCA", + "TCA CTG", + "GCC CTG", + "TAA CTT", + "CAGA TG", + "GTA GG", + "TC TATA", + "GAGA TT", + "GTC TA", + "TTTT AAA", + "CACA TG", + "TGA CC", + "CA CAAA", + "GTG TA", + "GG GAGG", + "GCTT TG", + "CAA AAAA", + "GA GGAA", + "GTT CTG", + "TTTT TA", + "GTC TCA", + "GTT CAA", + "TC GTG", + "GCTT AA", + "GCA CC", + "CTCC TG", + "TAAA TAAA", + "CTA CA", + "CTT CCA", + "TCC TCA", + "C GCAA", + "GAA AAAA", + "GCC CA", + "TC GTT", + "GTA GA", + "CTC TCA", + "GTC CA", + "TGA CTT", + "TCC CTT", + "GC CATG", + "CACACACA CACACACA", + "GTGA TG", + "CC TCTT", + "GC CAGA", + "TCC TA", + "C GTTTT", + "GTA CA", + "GCA TA", + "GAA TTA", + "TGTGTGTG TGTGTGTG", + "CC CAGG", + "GG TTTT", + "TCAA AAA", + "TC TATG", + "CCA TA", + "TGA CAA", + "GGA TA", + "TCA GTG", + "GTA TTTT", + "GAGA TG", + "GC GTG", + "C GTCC", + "TTAA AAA", + "TAA TCA", + "CAA TTA", + "CCA CTG", + "CGG TT", + "GTT GAA", + "TGA TTA", + "CCTT TG", + "CGG TG", + "CAGG TG", + "TCAA TG", + "CTGA TG", + "TCA GGA", + "GTT TAA", + "TATT AAA", + "CTC TTA", + "GCA GGA", + "CTC TCC", + "GAA CC", + "CTT TAA", + "GG GCC", + "GTA TTA", + "GC GCC", + "CCAA TT", + "GC TAAA", + "TGA CTG", + "GATT TG", + "GA TAAA", + "TCA GCA", + "GTT CCA", + "GAAA TA", + "GA CAAA", + "GA GTC", + "GC TATT", + "TCA CAA", + "GAGG TT", + "TAA CC", + "GAA GGA", + "GC TCAA", + "GAAAA TT", + "CCA GCA", + "GTTTT AA", + "GTG CC", + "TGA GGA", + "CA TAAA", + "GG TCC", + "TCA TTTT", + "TATT TATT", + "TAA TAAA", + "GCC TA", + "CTTTT AA", + "TAA GTG", + "TAA GTA", + "CTG GAA", + "CACA CA", + "GA CAGA", + "CAA CC", + "GG GAAA", + "CCA GAA", + "TCA GTT", + "TAA CTA", + "CTAA AAA", + "TGGG TT", + "TGA GTG", + "TAAAA TG", + "TATATATA TATATATA", + "GCA CTG", + "GA CTC", + "TA CAAA", + "TAAAA AAA", + "TC TACA", + "GTT GTG", + "TC GCC", + "CC CAAA", + "GTCA TG", + "CTG CTT", + "GGAA TG", + "CTA TTA", + "GA TATT", + "TA GAAA", + "GG CAGG", + "GA TGAA", + "GTA GAA", + "TCC TGA", + "TAA CTG", + "GCTG GG", + "GCAA TG", + "GCC CCA", + "GTT TGA", + "CATT TA", + "GTG CA", + "CTT GAA", + "GTG GAA", + "CTT CAA", + "TAAA TTA", + "GTG GCA", + "TCC TTA", + "GGAA AAA", + "TTTT TTA", + "CC TGTG", + "GTAA TG", + "GTG TTA", + "CTA GG", + "CAGG CTG", + "GA CACA", + "GAAAA AAA", + "TC GC", + "GTAA AAA", + "TGTT TA", + "TCTC TA", + "GTCC TG", + "CCA GGA", + "GAA CAA", + "TAA GTT", + "TGA GCA", + "GC TCCA", + "TAA GCA", + "CTCA TG", + "GTC TTA", + "CC CACA", + "CA TATT", + "GCC TCA", + "CA CTC", + "CTT CTA", + "TGA TTTT", + "TC GCA", + "CC TGTT", + "GAA GCA", + "GCAA AAA", + "GC GGA", + "CCA CAA", + "GC GCA", + "CA TATA", + "GA CATT", + "GTT CTA", + "CAAAA TT", + "GAAA GAAA", + "CC CGG", + "TA CACA", + "CCAA AAA", + "GAGG TG", + "GG CTCA", + "CA GTGA", + "TCC CAA", + "TA TCTT", + "TGA GTA", + "TC GTA", + "TTTT CTT", + "GTG GGA", + "GA GCTG", + "CC CTCC", + "TAGG TT", + "TTA GG", + "TAA TATT", + "CCA GCC", + "CA TCTT", + "GTC TGA", + "GTT TCC", + "CC TGAA", + "GGA GCA", + "GAAAA TG", + "TCA GTA", + "TAA CCA", + "GA TGTT", + "CTG TTA", + "CA TGTT", + "GG CGG", + "CA TGTG", + "GG GAGA", + "CTT TGA", + "TCTT TCTT", + "AAAAAA AAA", + "GGGG TG", + "CTT TCC", + "CTT GTT", + "GCA TTA", + "CC CAGA", + "CAAA TA", + "TC GGA", + "CA GCTT", + "TCA CTA", + "TAA TTAA", + "TAA GGA", + "GAA CTG", + "GCA CAA", + "GC GTT", + "GG CTC", + "TC TTTTA", + "CC TCCA", + "GG CAAA", + "CA GCTG", + "CTA CAA", + "TA CATT", + "GC TATG", + "CTT GTG", + "GA GTCA", + "GTTA TG", + "CTG CCA", + "GTC TCC", + "TGA CCA", + "CA CCTG", + "TATA TTA", + "TGA TCA", + "CA GCAA", + "GA TGTG", + "GTC TTTT", + "CTA GAA", + "GC TACA", + "CTG GGA", + "GGGG TT", + "CAA GTA", + "CAA GGA", + "CC CTCA", + "TA GCC", + "GTT GGA", + "GC TATA", + "TCTG AAA", + "TA TGTT", + "CC CCTT", + "GTT GTA", + "CC CTGA", + "TGA CTA", + "CAA GCA", + "CAA TAA", + "GAA CTT", + "CA TGAA", + "CTTA TG", + "CTAA TG", + "TC TAAAA", + "CCAA TG", + "GAA GTG", + "CC TCAA", + "CC CATT", + "CA GTCA", + "GAGAGAGA GAGAGAGA", + "TA TGTG", + "GCA GTGA", + "TCTCC TT", + "TCC CAAA", + "CCA TTA", + "CCA GTG", + "GCA TCA", + "TCAAA TT", + "GA TCTT", + "GA CAGG", + "GGA GTG", + "GTA GTA", + "CAA CTT", + "GAA GTT", + "CC CCTG", + "TCTC AAA", + "GG GTC", + "GA GCTT", + "TATG AAA", + "TA TGAA", + "GA CATG", + "CAA GTG", + "GA TATA", + "CA TCTG", + "CTG TGA", + "TAA TTTA", + "GG CAGA", + "GC GAA", + "CC TAAA", + "CCA TCA", + "CA CTGA", + "GGA CTA", + "GA CGG", + "CTC TTTT", + "CTG TCA", + "TCTCTCTC TCTCTCTC", + "TTAA TG", + "GCA GCC", + "CAAAA AAA", + "GCA CCA", + "CTA TTTT", + "GA GCAA", + "CTT GGA", + "CTG GTG", + "GAA TAA", + "TCC TTTT", + "GAA GTA", + "CA GTAA", + "CAA CCA", + "CTG TAA", + "TGA TAA", + "GCA GTT", + "CA CGG", + "TAAA TAA", + "CTG TTTT", + "CTA CTA", + "GC TCTA", + "C GAAAA", + "CAA GTT", + "CTT GTA", + "GAA TGA", + "GA GTGA", + "GCC TGA", + "GG TTTG", + "CC CATG", + "GG GGAA", + "GAA GAAA", + "TG TTA", + "CAA TTTT", + "TATA TTTT", + "CTC AAAA", + "GG TGGG", + "CC GTG", + "TATT TCA", + "CC CCAA", + "TATT TAA", + "GG CTGA", + "GG TGTG", + "CA TCAA", + "CA CTCA", + "TCTCA TT", + "GAA TTTT", + "GAA TCA", + "CAGG AAA", + "CA TACA", + "TA TTTTA", + "TTA TAA", + "GAGG AAA", + "CA TATG", + "CTT TCTT", + "CAA CTG", + "GG GCTG", + "CC CCCA", + "TTTG AAA", + "CATT AAA", + "CTT AAAA", + "GA CTGA", + "CAA TGA", + "GG CACA", + "CCA GTA", + "GGA TGA", + "GTTTT TG", + "GCA TTTT", + "GTG CCA", + "GCA GTA", + "GCC CTT", + "TC GTC", + "GAA CTA", + "GTG GTT", + "GTG TGA", + "GTG CTT", + "C GCTA", + "GTG TCA", + "TCTT TA", + "GCC TTA", + "CC TATT", + "CAAAA TG", + "GAA CCA", + "CTC CAGG", + "GA CTCA", + "CATG AAA", + "GC TAGG", + "TGTT AAA", + "GC GTA", + "GCA CTT", + "TCTT AAA", + "TAA GAAA", + "GG CCTG", + "TCC CTA", + "GTG GTA", + "CTG CTA", + "GGA GTT", + "GG TAAA", + "CAAA CAAA", + "GA TATG", + "TCA TGA", + "GA CCTT", + "TAA TATA", + "GC TAGA", + "GGA CTG", + "GG CATT", + "CA GTTA", + "CC CTAA", + "CA CCTT", + "GG TGAA", + "CA GCTA", + "GTG TTTT", + "CAA CTA", + "GA TCAA", + "GA GAAAA", + "TGTG AAA", + "AAAA TA", + "GATG AAA", + "CTC TAA", + "TTA CTT", + "GA TCTG", + "CCA CTT", + "GA GTTA", + "CAA TCA", + "GGATTA CAGG", + "TTTA TTTT", + "TACA TA", + "TTTTA TG", + "GA GTAA", + "GCTG AAA", + "GTA CTG", + "GC TCTC", + "TATG TA", + "TGTG TA", + "TCA TAA", + "GGA CTT", + "TCTCC AA", + "GCA TGA", + "GA CGA", + "CGCC TG", + "GA CCTG", + "GG TCTT", + "CA CCAA", + "GA TC", + "GA CCAA", + "AAAA TTA", + "GTAAA TT", + "CCA GTT", + "CA GAAAA", + "TAA CAAA", + "GG TGTT", + "GAAA TTA", + "TGCC TCA", + "CC GCC", + "CCA TTTT", + "CTT GCC", + "TCTG TA", + "CTG GCA", + "GG GATG", + "CCA TGA", + "CTA CTT", + "TAGG TG", + "TAAAAA TT", + "GAAA GAA", + "TAAAA TA", + "CTTTT TG", + "GTC AAAA", + "GGA CAA", + "TCTGA TT", + "CTC TCTT", + "TAA TTTG", + "CTC TTTG", + "GG CCTT", + "GGA TTTT", + "CTA CTG", + "GTT GCA", + "GG CTCC", + "CTC TGTG", + "CTC CAGCC", + "TTA CAA", + "GGA CCA", + "GGAA GGAA", + "TAAA GAA", + "TTA GAA", + "GTG AAAA", + "CTT GCA", + "TGGG TG", + "GGA GCC", + "CC TCTA", + "C T", + "GG GCTT", + "GG CATG", + "CTG GTT", + "TA CAGA", + "GATT AAA", + "CTC TGTT", + "TTA TCA", + "CTG AAAA", + "GTA GTT", + "GG GTCA", + "G T", + "CA GCCA", + "GC GTC", + "CA CTTA", + "GTG CTA", + "TC TTATT", + "GTA CTT", + "GG TATT", + "TA GAGA", + "TA CATG", + "CCA CTA", + "TGA GAAA", + "CAA TAAA", + "TCC AAAA", + "CGTG AA", + "GG TCTG", + "CTGAA TT", + "TCA GCC", + "CC TCTC", + "GTT AAAA", + "GG GATT", + "TCC TAA", + "CA CTAA", + "GGA GAAA", + "CCTT CCTT", + "GTT TCTT", + "TA TCAA", + "GA TACA", + "TAATCC CAGCA", + "CC GCA", + "TGAAA TT", + "C GTAAA", + "CTC TCTG", + "TC TTTTTT", + "GTA CAA", + "CCAAA TT", + "TGTA TTTT", + "TC GCTT", + "GG GTGA", + "GA TAGA", + "CTT TATT", + "TAAA CAA", + "GTT TATT", + "TGAA TA", + "CTA CCA", + "GTG TCC", + "CC CGA", + "TTTA TTA", + "CTCC AAA", + "TTTTTTTT TTTT", + "TCA TCC", + "GAA GCC", + "CTAAA TT", + "CAAA TTA", + "CCCC AAA", + "TCTT CTT", + "TAGG AAA", + "CA CGA", + "CA TTTTA", + "GTG CAA", + "TCTCC TG", + "TATTTT AA", + "GTT TGTT", + "GA GCCA", + "GG CCAA", + "CATT TCA", + "CA TCCA", + "CC TATA", + "GA CTTA", + "TCAAA TG", + "GTA TCA", + "TAAA TTTT", + "CTGA GGCA", + "GCC CAA", + "GG TTAA", + "TA TCTG", + "TGA CAGA", + "GGA GAGA", + "GCTG CTG", + "CC CTTA", + "TCC TCTG", + "GTA GCA", + "CCTG AAA", + "CC GAA", + "TTTT TAA", + "CTA TAA", + "CCTG TA", + "TTA CTG", + "GTA TAA", + "GG CGA", + "GA CTAA", + "TCA GAAA", + "GTG TGTG", + "CAAA GAA", + "CC TATG", + "GCA GAGA", + "CC GTT", + "TTTTA TTTT", + "GGAA GAA", + "TTA CTA", + "GCC TGGG", + "TCC CTC", + "TCC TCTT", + "GGA TCA", + "GG TCAA", + "TC GAGA", + "TATT CTT", + "TA CTC", + "GTTAA TT", + "GC GAGA", + "CTTAA TT", + "TCC TTTG", + "GTC TAA", + "CA CCCA", + "GG GTTA", + "GG GCAA", + "GGAAA TG", + "GCAAA TT", + "TA GATG", + "GCA GAAA", + "AAAAAAAA AAAAAAAA", + "CC TACA", + "GGA GTA", + "TC TAATT", + "CAA CAAA", + "TA GATT", + "GG TTTA", + "CC TAGA", + "CTT TAAA", + "TA CTTA", + "TAA TGAA", + "CTA TCA", + "TA GTAA", + "CAGA GAA", + "CAA GAAA", + "GGGG AAA", + "CGTT AA", + "CGTG TT", + "TCTG TCTG", + "TTTTAA TT", + "CTG GCC", + "TAAA TGA", + "C GTCAA", + "TTA GTA", + "GTC TCTG", + "TTTT AAAA", + "CA GTTTT", + "CTT CCTT", + "TATA TAA", + "GC TTTTA", + "TTTT TCA", + "GG TC", + "TTA TTAA", + "TTTT GTT", + "CA TAGA", + "TA GGAA", + "GAGA GAA", + "GTA GCTG", + "TTA TGA", + "GTA GTG", + "GGA GAGG", + "CTC TGAA", + "TA GTC", + "GA CTCC", + "TCC CTCC", + "TAA TGTT", + "CA TCTA", + "GCCA CCA", + "GTA CTA", + "TGGG AAA", + "CGCC TT", + "GCC CGG", + "GGA GGAA", + "GTA CCA", + "CGC AAA", + "CA TAAAA", + "TAA CATT", + "GC TAAAA", + "TCTT CTG", + "GCC AAAA", + "GTA TGA", + "GTC TTTG", + "TA CTGA", + "TCC CAGG", + "TTA TTTA", + "TTA GTT", + "GGA CC", + "TA TAAAA", + "CAAA CAA", + "CTT CTC", + "TCTA TCTA", + "GAAA TAA", + "GTG TAA", + "CTT TGTT", + "GA TAAAA", + "GCC CAGG", + "GC GATT", + "AAAAAA TT", + "TA CAGG", + "GG CTAA", + "TA GCTT", + "GTC TCTA", + "CTCC TGA", + "GAA TAAA", + "TTA CCA", + "GG GACA", + "GCCA CTG", + "GTT TAAA", + "GTC TGTG", + "TGA CAAA", + "TACA TTTT", + "GCCA CC", + "TG TTTT", + "TA GCAA", + "TTA TAAA", + "GA CCCA", + "GCA GC", + "CAGA CAGA", + "CA CAAAA", + "GCC CTA", + "TATT AAAA", + "C GTATT", + "CCA TCC", + "TC GATT", + "GAA GGAA", + "GA TCCA", + "TATT TGA", + "GTGAA TT", + "TA CCTT", + "C GTCTT", + "CC TAGG", + "TC GAAA", + "CTT TCTG", + "TGAA GAA", + "TCTC TCA", + "GTC TCTT", + "GGA GGGG", + "GTC TGTT", + "CTA TGA", + "GGAAA TT", + "GCA CACA", + "GCC TTTT", + "CA GTCC", + "CTG GTA", + "GCA TCC", + "TA GTTA", + "GG CTTA", + "GA GTCC", + "TG AAAA", + "TAGA TAGA", + "TGTT TGTT", + "TA CTCA", + "CATT TAA", + "GA TTTTA", + "CA CTCC", + "GAAA CAA", + "GC GCTG", + "TCTT TCA", + "CTG TCC", + "GAA CTCA", + "CGG AAA", + "TATT GTT", + "GCA CTA", + "TATT CAA", + "GC GGGG", + "GTG GCC", + "TAATT AAA", + "TA CTAA", + "GC GGTG", + "TA CCAA", + "GG TATA", + "CTA GTT", + "GCA GAGG", + "CTTTT TTTT", + "TTTTTTTT TTTTTTTT", + "TACA GTA", + "CCA TGTT", + "TA GTGA", + "CGTG TG", + "GC TCTGA", + "CTT CCTG", + "TC GCTG", + "TAAA TCA", + "TCCAA TT", + "GTT TCTG", + "GAA GAGA", + "GG GTAA", + "CCA TAA", + "TTA TATT", + "C GAATT", + "CC GGA", + "TGA GCC", + "CC GTA", + "CAGA GGA", + "GTG TTTG", + "GA CAAAA", + "TTTTTT AAA", + "GTT GCC", + "GA GTTTT", + "TC AAAAAA", + "TGTT TCA", + "TA TCTA", + "TCTC TCC", + "CTC CACA", + "TAAA TATT", + "TTTT CTG", + "CTC TCAA", + "CCTT AAA", + "TCTTTT AA", + "GAA CAAA", + "TTA GCA", + "GCTCA TG", + "TAAA GTA", + "GGA TAA", + "TTATT AAA", + "CTC CATT", + "TCTC TGA", + "TTA TTTG", + "CCTG TAA", + "TTA TATA", + "GA CTTTT", + "TGTT GTT", + "GCAAA TG", + "CTT CAAA", + "GAA TATT", + "GAA TCC", + "CTC TTAA", + "GCA TAA", + "GAA TGAA", + "CTTAA AAA", + "TAAAAA TG", + "TTTTAA AAA", + "CTC TGGG", + "TGA TCC", + "GC TCTCA", + "CTC CAGA", + "GAGTG CAGTG", + "CAA TATT", + "TA GAAAA", + "GTAAA TG", + "TA GCTG", + "GC TCAAA", + "GCA GGAA", + "TA CCTG", + "GG GAAAA", + "TTTT CTA", + "GGGG GGGG", + "CC GA", + "CTT TGAA", + "GGA GGTG", + "TA GTCA", + "GG CCCA", + "TGA TGTT", + "CAAA TAA", + "TCTT CCA", + "GC GCTT", + "GTA TTTG", + "GTC TC", + "GAAA TCA", + "TGA TAAA", + "CATT CTT", + "TA TCCA", + "GCC TCTG", + "TGA GATG", + "C GCCAA", + "GTTTTA TT", + "TATA TATT", + "GTA GGA", + "GACA GAA", + "CTCCAGCC TGGG", + "GC GTGA", + "GG TATG", + "GAGG GAGG", + "TCA TTTG", + "CTA CC", + "TACA GAA", + "GG TAGA", + "GA TCTA", + "GTC CATG", + "TGA GGAA", + "TAA TAAAA", + "TAAA CTT", + "TCA CATT", + "GGA GGCC", + "TCA CAAA", + "CA CTTTT", + "CGG CC", + "CAA CAGA", + "GTA GAGA", + "GTTA TTTT", + "CGTT TG", + "TC GTCA", + "TCTG CTG", + "CAA CACA", + "GG TAGG", + "GCA GCTG", + "TAGTA GAGA", + "CAA GCC", + "GCA TTTG", + "TAA TATG", + "GCTT AAA", + "GCTT CTG", + "CTC TCCA", + "TCA TCTT", + "C GTCTG", + "TCA TTTA", + "CA TAGG", + "GC TCCTT", + "TGTT CTT", + "TACA TTA", + "CACA GAA", + "TAAA TATA", + "TA GAGG", + "GA TAGG", + "TCC TGAA", + "GGA GCTG", + "TGA TATT", + "TCA TTAA", + "CTTTT AAA", + "TC GTTA", + "TAAA CTA", + "GTT TGAA", + "TAAAA TTA", + "CA CCCC", + "TCA GAGA", + "CTCC TGCCTCA", + "TGA CATT", + "GTA TTTA", + "CTT CATT", + "GAAA CTG", + "TAA CACA", + "GTT CAAA", + "GGA GATG", + "TC GGCC", + "CAGCA TT", + "TC GATG", + "TATT CTA", + "CTG TGAA", + "TATT GAA", + "TTTT CCA", + "TATT TCTT", + "GGTG AAA", + "CTGA GAA", + "GCA CAGA", + "GC GAGG", + "CTG TGTG", + "TGAAA TG", + "TGA TGAA", + "GTCC AAA", + "CTCAA TT", + "TCCA GAA", + "GTA TATA", + "TAAA GTT", + "TCTC AAAA", + "TCCA TCA", + "GTC TGAA", + "TGA GAGA", + "TGA TTTG", + "TTA GCC", + "CTC CATG", + "TCC CTGA", + "GA GCTA", + "CCCC CCCC", + "GTG GAAA", + "CTG GGAA", + "CAA TGAA", + "CCA CACA", + "CTT TCAA", + "C GGAGG", + "TC GTGA", + "CCA GAAA", + "GTTTT AAA", + "TGTT GAA", + "TCC TGTG", + "CTAAA TG", + "TCC TTTA", + "GTC TGGG", + "TCTC TTTT", + "TA CGG", + "TATT GTA", + "TTA GTG", + "TTA CC", + "TAATCCCAGCA CTTTG", + "TCTG GAA", + "CTT CTCA", + "CGCA TT", + "TATT TAAA", + "TCA CACA", + "TAA TCAA", + "GC GAAA", + "GG GCCA", + "GTT CATT", + "GAGAA AAA", + "TTTT GTA", + "TA CTTTT", + "TC GAGG", + "GTGAA AAA", + "CAA TATA", + "TCC CATG", + "CAA TTAA", + "CTG GAAA", + "CCCA GCA", + "TCC CATT", + "TCC TGTT", + "CTC TTTA", + "TCC CCTT", + "GTT TCAA", + "GTC CAGG", + "GGAA GGA", + "TA GTTTT", + "TGA CCTT", + "GTGCTG GGATTACAGG", + "TATT TATA", + "TCTG CAA", + "CTGAA AAA", + "TATG TTA", + "CTT CACA", + "GCA CAGG", + "CCTG CTG", + "TTTT TTAA", + "GTTA TTA", + "CC CTTTT", + "TGA TTTA", + "TA CAAAA", + "TAA GTAA", + "TTTT TAAA", + "CA TCTC", + "GTG GTGA", + "GTG GAGA", + "CTC TGCA", + "GTTAA AAA", + "TACA TACA", + "CTT TGTG", + "GGA CACA", + "TCTGA TG", + "TA TTATT", + "TCTT CTA", + "CTG TGTT", + "TCA GCTT", + "CTT TATA", + "GG CGC", + "TCC CTCA", + "GTA CC", + "TGGA GAA", + "CAAAAA TT", + "TCTT TAA", + "CTC TCTC", + "TGA GTGA", + "GCA GCTT", + "CGGA TT", + "TA CGA", + "TCTT GTT", + "TC GTAA", + "GCC TGTG", + "TATT CTG", + "GG GATA", + "GG GTCC", + "TGA GATT", + "CTTTTA TT", + "TCC CACA", + "CATG GTG", + "TTA GGA", + "GAA CACA", + "TCA TAAA", + "CAA CATT", + "GG TCCA", + "GAA TTTG", + "TATTAA TT", + "TCC TGGG", + "GCA GCAA", + "CTC TTCA", + "GAA GAGG", + "TCTG TCA", + "CTGAA TG", + "CCA CAAA", + "GTG GAGG", + "TGA TTAA", + "CTCC CTCC", + "CACACACACACACACA CACACACACACACACA", + "GC GATG", + "CATT CTG", + "GTA GAAA", + "TCA TCAA", + "TTTT CAA", + "TATG TATG", + "CCAAA TG", + "TAA TTTTA", + "TAA GGAA", + "CTT GAAA", + "AAAAAAAA AAAA", + "GC TCCTG", + "GCA GATG", + "GAAAAA TT", + "GA CGC", + "GTG GGGG", + "GTCAA TT", + "CTT GCTT", + "TGA CACA", + "GTG TGTT", + "CCA GAGA", + "CCCA GCC", + "TAAA GAAA", + "GTC CATT", + "TAAA TTAA", + "CC CAAAA", + "GAA TTAA", + "TGAA TTA", + "TTTT TTTG", + "CCA GCTT", + "CAA TTTG", + "CTG TTTG", + "GTC TCAA", + "GTT TGTG", + "GG CATA", + "GG TACA", + "TGA TGTG", + "GATT TCA", + "TCTG CTT", + "GTAA TTA", + "TAA AAAAAA", + "GCC GCC", + "TGTGTGTGTGTGTGTG TGTGTGTGTGTGTGTG", + "GC GTCA", + "GC TCATT", + "GAA CCTG", + "TAAA CAAA", + "GTG CTGA", + "TCA GGAA", + "TCC TCAA", + "TCTA TTTT", + "TCTG TTTT", + "CAGA GCA", + "CCA GGAA", + "GTC TTTA", + "TCTT CAA", + "TCAAAA TT", + "GC TTATT", + "GTT CCTT", + "CA CCTA", + "TCA CTGA", + "GAA GCAA", + "TAAA GA", + "TCC TTCA", + "TCTCA TG", + "TCA GTGA", + "TACA CAA", + "CA CGTG", + "CC TAAAA", + "GCC TTTG", + "GG CTTTT", + "GTT GAAA", + "GTT CTC", + "CTA GA", + "CTA CAAA", + "GCA CAAA", + "TTA CATT", + "GG CCCC", + "TAA TGTG", + "CTG CCTT", + "TCC CAGA", + "GTGAA TG", + "GGA CAGG", + "GGA TGTG", + "GTT TATA", + "TGA CCAA", + "GTG GCTG", + "GTT CTCA", + "CTTA TTTT", + "CTG GAGA", + "TTA CAAA", + "GTC TTCA", + "CAA GAGA", + "CCA TTTG", + "TCA CAGA", + "CTA GTA", + "CA TTATT", + "TTA GA", + "GC TCTCC", + "GC GCCA", + "TATG TTTT", + "TCC TCCA", + "CAGAA AAA", + "GTG GGAA", + "TAA TCTT", + "TGA GTCA", + "CTG CTC", + "GTC TCCA", + "TCA TGTT", + "GTT TCCA", + "TAA GCAA", + "CTAA AAATA", + "TGA CTGA", + "TC GGTT", + "TTA GAAA", + "TAA GCC", + "TAAA GCA", + "CC TCTCC", + "CC TCCTT", + "TCA GATT", + "TATG AAAA", + "GCTGA TG", + "CATA TTTT", + "GC TCCAA", + "CGG CGG", + "CCA CTGA", + "CA GCAAA", + "CTG TCTT", + "CTA GCA", + "TC GGGG", + "CACA GCA", + "GC TGATT", + "CTA GGA", + "TAA CTC", + "TCA TATT", + "CCTT CTT", + "CTG CAAA", + "CC CGC", + "GG TCTA", + "CCCA GGA", + "GTG TCTG", + "TAATAA TAATAA", + "TCA CATG", + "CAA TTTA", + "TATATATATATATATA TATATATATATATATA", + "CCA CAGA", + "TCAA TTTT", + "GTA TTAA", + "GAA CATT", + "TCTC TTA", + "CTA TTTG", + "TCTT TCC", + "GGTT AAA", + "GC TAATT", + "CTG CTGA", + "TA CCTA", + "CAGG GTT", + "TC GCCA", + "CAAAAA TTA", + "CTT CTGA", + "GCA TGTG", + "CTA TTAA", + "GCA CATG", + "CAA CATG", + "TCA TGAA", + "GAA TGTT", + "GG GTTTT", + "CTG CCTG", + "GTC CACA", + "TAAA CA", + "CTC TGGA", + "GA CCCC", + "GG CAAAA", + "TCTG TTA", + "CTA GTG", + "CTA TATA", + "TCA GTCA", + "TAA CTAA", + "GAA GATG", + "GTC TTAA", + "CAA GGAA", + "GTAA AAAA", + "TCC CCTG", + "TC GCAA", + "TCTG CCTG", + "CC TTTTA", + "GTCC CAGCTA", + "TATA TATG", + "TATT GTG", + "TGTG TTTT", + "GC GCAA", + "CACA GTG", + "TAA GATT", + "CTC TGTA", + "GGAGG CTGA", + "GGA CAAA", + "TATTAA AAA", + "TC GTCC", + "TC GGAA", + "CTA TAAA", + "CTT CAGA", + "CTA GAAA", + "CATT CAA", + "CA CGCA", + "CAGGA TT", + "CCA TCTT", + "GTA GCC", + "GAA TTTA", + "CA CGC", + "CAA TCC", + "TGA GCAA", + "GAA GCTG", + "TCAA TTA", + "GAA GTCA", + "CTG CACA", + "CCA CGG", + "GGA TCTT", + "CTCCTGCCTCA GCCTCC", + "TAAA TGAA", + "CC GTC", + "TC GGTG", + "TTTTA TTA", + "GCA GGGG", + "GCA GGTG", + "TCTA TTA", + "TAA CTTA", + "CTAA TTTT", + "CC CGCC", + "TAA TACA", + "GGATT AAA", + "TCTC TCTG", + "GCTT CTT", + "CATT TATT", + "CCA GAGG", + "GGA CAGA", + "GCCAA TT", + "TCC CCAA", + "GTT GATT", + "GAA GAAAA", + "GCA TTTA", + "CTC TAAA", + "CACACACA CACA", + "CC TCAAA", + "TA TAATT", + "CAA TGTT", + "GCC CAGA", + "GTA TATT", + "CTAA AAAA", + "CCA CAGG", + "TAA GAGA", + "TCC TTAA", + "TA TTTTTT", + "GAA TATA", + "GGA TTTG", + "GTG TGAA", + "CTG GCTT", + "GC GGCA", + "TCC GCC", + "GCA TCTT", + "TC TAATA", + "CTG CATT", + "CTC TGCC", + "TCA CTCA", + "TCA GCAA", + "TATTA TG", + "CCA GCTG", + "GA TCTC", + "GCC TCTT", + "CTT CCAA", + "TCC TAAA", + "TCA TCTG", + "CTA TTTA", + "CTG CAGG", + "CAA GCAA", + "GC GGAA", + "GAAA TAAA", + "TAAAA TAA", + "TCA CCTT", + "CCA TGTG", + "GA CCTA", + "CAGA TGA", + "GTG GCTT", + "TTATTA TTATTA", + "TCC CGG", + "TATT TGTT", + "CTG TAAA", + "TCCA TCCA", + "CTG TATA", + "GTT TCTA", + "GTT GCTT", + "CCA TGAA", + "GC TCTTA", + "CTT CATG", + "GTT CCTG", + "GCTG GGA", + "TCA GAGG", + "CATT AAAA", + "TCA GTAA", + "GAA TGTG", + "CTTA TTA", + "GCA CTGA", + "TGA GGTT", + "CA TCAAA", + "CTT CTCC", + "GTT TATG", + "CTT TCCA", + "GTG CCTG", + "GAAA GGA", + "GCA TCTG", + "TA CCCA", + "TAA CAGA", + "AAAAAAAA AAA", + "CTA TGAA", + "CA GTAAA", + "TA GCTA", + "TC GTTTT", + "GTG TCTT", + "GA GCAAA", + "TC TAAAAA", + "GTT CACA", + "GAAA TGA", + "CAAA TGA", + "GCC CTGA", + "GTG TTTA", + "TCA TGTG", + "CATA TTA", + "TCAAAA AAA", + "TAA GTTA", + "TCTC TCTT", + "CCA GTGA", + "CC TCTGA", + "CAA GATG", + "GCC TGTT", + "GTT TGGG", + "CATT CATT", + "GCC CCTG", + "GTT CTGA", + "GC GGCC", + "GC GGTT", + "CAAAA CAAAA", + "TACA TATA", + "GAATT AAA", + "TCAA GAA", + "CTG TATT", + "TTTT TATT", + "GA TTATT", + "TCTAA TG", + "GTT GCTG", + "TGAA TGAA", + "TCA GCTG", + "CTT GATT", + "CAGAA TG", + "CTAA TTA", + "TATAA TG", + "GTTTT GTTTT", + "CCA GCCTG", + "TGA TGGA", + "GCA GATT", + "CTC TATT", + "GCA GTCA", + "TAA GTGA", + "CTA CACA", + "CGCA TG", + "TA GCCA", + "GTG GCTCA", + "CAAA TAAA", + "GTG CTCA", + "TTTT TTTTTT", + "TAA CATG", + "TCCCA GCTA", + "CAAA GTA", + "TCA TATA", + "CAGCA TG", + "TGA TCTT", + "CA TAATT", + "TGTG TTA", + "TTTT GAA", + "TTAA TTA", + "GATA TTA", + "TCA TTCA", + "TGA TATA", + "TGA CTCA", + "GA CGTT", + "TGA CATG", + "GTT GTGA", + "CA TTTTTT", + "GCC TGGA", + "CTA TGTT", + "CTT TGGG", + "GTC TCAAA", + "CTG GCTG", + "CCA CATG", + "GG CGTG", + "CTTAA TG", + "TAA GATG", + "GTA TAAA", + "TGTA TTA", + "TAA CTCA", + "GAGAGAGAGAGAGAGA GAGAGAGAGAGAGAGA", + "GCA TGAA", + "GTTAA TG", + "TCCA GGA", + "GAGA GAAA", + "TCTC TGTG", + "CTC TCTA", + "CCA CCTG", + "GCCA GGA", + "CTG GAGG", + "CCA TTTA", + "GTC TGGA", + "GCC CACA", + "TAGA GAA", + "CAA CTCA", + "GGCA GGA", + "TCTTA TG", + "CAAA GGA", + "GG TAAAA", + "GAGA GGA", + "GTC CAGA", + "GCC CTCA", + "GATA TTTT", + "CAGG GAA", + "CCA CATT", + "GA GGAGG", + "GAAA CTT", + "CA GAATT", + "TCA GATG", + "TATT TCC", + "TACA GTG", + "TGA GCTG", + "CCA TCTG", + "GAGAA TG", + "TCAA CAA", + "A TT", + "TAA CTGA", + "TGA GAGG", + "CA CTGAA", + "CCA CCTT", + "CTG CAGA", + "TCA CCAA", + "TGA GCTT", + "CAAA GCA", + "GG TTTTA", + "CGG GGTT", + "TCCAA AAA", + "TATG TATA", + "CCA GATG", + "TCCA TTTT", + "CTG CTCA", + "GA TAATT", + "CCA CCAA", + "CTCC TCC", + "GA GAATT", + "GAAA GTA", + "TAAAA TAAAA", + "CTT CTTA", + "CTG TTTA", + "GAA TCAA", + "GCA TGTT", + "GCA CGG", + "GA CTGAA", + "GTG CACA", + "GA CGTG", + "TATA CAA", + "TC GACA", + "GAA GACA", + "TAAA GGA", + "GA TCAAA", + "CAGTG TG", + "CTA GCC", + "GAGG AAAA", + "TCTG AAAA", + "GAA CCCA", + "GATG GATG", + "GTT CTTA", + "CTA TATT", + "GCA TTAA", + "TCTCTCTCTCTCTCTC TCTCTCTCTCTCTCTC", + "TCA GTC", + "TATTTT TG", + "GAGGA TT", + "GTA TGTG", + "TAA CCAA", + "GTT GTTTT", + "TTTT TCTT", + "GTG TTAA", + "CTT GGAA", + "AAAAAA TG", + "CAA TGTG", + "GTG CCTT", + "GCC TCAA", + "GA GTCTT", + "GCTAA TTTT", + "CGAA AAA", + "GTG TATA", + "GC GTTA", + "CTGCA CTCCAGCCTGGG", + "GTT CATG", + "CAAA GAAA", + "GCA GTAA", + "GGA TGAA", + "CTT TATG", + "CAGG AAAA", + "TCC TGCA", + "CTG TCTG", + "GAA CATG", + "GGA TGGA", + "GCC TGAA", + "CAAAAA TG", + "TCCAA TG", + "CCA GCAA", + "GG CCTA", + "CAA CTGA", + "GCA CCTG", + "GTC TATT", + "CC TCTCA", + "GTG GTCA", + "GTG TAAA", + "GTA CACA", + "GTAAAA TT", + "GTA CATT", + "TATA TAAA", + "CTG TTAA", + "TAA GTCA", + "GCC TCCA", + "AAATT AAA", + "GTG CAGG", + "TCC TGGA", + "GTG CAAA", + "GC GTCC", + "CCA TTAA", + "GGA GGGA", + "TCA CTTA", + "TCATT AAA", + "CAA CATA", + "TAA TAGA", + "TAA TGTA", + "GA TTTTTT", + "GTT GTCA", + "GGA GACA", + "GTG TGGG", + "TCA CAGG", + "TC GGCA", + "CTCC CTG", + "GA CCAAA", + "TGTT TATT", + "CGAA TG", + "CTCAA TG", + "TCA CCTG", + "CA GTGTT", + "TGA GACA", + "TA GGGG", + "GAAAAA TG", + "GTT GAGA", + "TC GATA", + "CTC GGGAGG", + "GTT GTC", + "CCA GTCA", + "GCC CAGGCTG", + "GAA CAGA", + "GGCTCA CTGCAA", + "GCA GACA", + "TGA GGTG", + "CA CGTT", + "TAA GAAAA", + "CCA GGCA", + "GTA TCTT", + "CTTGG GAGG", + "CTT TCTA", + "CC GCTG", + "GA GCTCA", + "GAGA CAGA", + "CTT CAGG", + "GCA CATT", + "GTA CAAA", + "CTT GTAA", + "GTG GGTG", + "GAA GTGA", + "GG TCTC", + "GTA TGTT", + "GCA CTCA", + "TTA TGTT", + "CAA GTCA", + "CAA GTGA", + "GAAA CTA", + "TAAA TAAAA", + "TCTT AAAA", + "GTT GGAA", + "GTT CTAA", + "CCA CTC", + "CA GTGAA", + "GAAA GG", + "GCA CGA", + "TAA CTTTT", + "GTT GTTA", + "TCA GTTA", + "CGGA TG", + "TATT TGAA", + "CC CTGAA", + "GCC CTC", + "CTT CTAA", + "TTTG TTTT", + "GA GCTGA", + "CTG TGGG", + "CAA GATT", + "GAA GCTT", + "TGA GTAA", + "CTT GCTG", + "GGA TGGG", + "CGTA TG", + "TCCA TTA", + "GTC TGCA", + "GCCA TTTT", + "GTT GTAA", + "CACA CAA", + "GGACTA CAGG", + "C GTTTTA", + "TCTT CC", + "TAA CCTT", + "CTT TAAAA", + "TGAA TTTT", + "CTA CAGA", + "GCAA GAA", + "TAA CAAAA", + "CAATT AAA", + "CCA CTCA", + "CATG GTGAAA", + "CCCA GAA", + "CTA CATT", + "CC GAGG", + "TCCA GTG", + "TGA GTTA", + "GGA GTCA", + "TAA CGA", + "GA GTAAA", + "GA CTCTG", + "GGA GCTT", + "TA CTCC", + "CTG CATG", + "GC TTTTTT", + "GTC TAAA", + "GTG CGG", + "CA TCTCA", + "TGA TCAA", + "GGA GATT", + "GC AAAAAA", + "CA CCAAA", + "TGA CGG", + "CAGA GG", + "GTT GATG", + "CTT GTCA", + "TCCA CCTG", + "GGA GCAA", + "CAA GTAA", + "CCA TAAA", + "GTG CATG", + "GCA TATT", + "GTA GATT", + "GCC TAA", + "CTCAA AAA", + "GGA GAAAA", + "CTA TCC", + "TAATA TTA", + "GTG CTC", + "CAA TATG", + "TGTG GAA", + "TGA CTC", + "GTG TATG", + "TTTTAA TG", + "GC TCTAA", + "CACAA TG", + "CA GCTCA", + "GTT GGTT", + "CTAAAA TT", + "GTC TATG", + "TGTG AAAA", + "CTG GGTT", + "CCCC TCC", + "CC CTCTT", + "GCA GGGA", + "GAAA CCA", + "CATT TCC", + "GCA GCCA", + "TCA TATG", + "GCA GGCA", + "C GTAAAA", + "TGA CCTG", + "CAGA GGTT", + "CTT GTGA", + "TTA TCTT", + "CTG TATG", + "GTCAA TG", + "GGA CGG", + "GC GTAA", + "CAAA CTA", + "TAAA TGTT", + "CTT CGG", + "CTCC CCA", + "TACAA TG", + "TCTG TAA", + "GAA TATG", + "GC GGGA", + "GGA CATT", + "TTA TGAA", + "GGA TGTT", + "GGA CATG", + "TCA GGTG", + "CAA CAAAA", + "GAAA GAGA", + "GTG GATG", + "GG GCTA", + "CCA TCAA", + "CA GCTGA", + "CTC CACC", + "CAA TCAA", + "GTG GTC", + "TGA CAGG", + "CCA TTCA", + "GTCC CTG", + "CAGA CACA", + "GTT GGTG", + "CC TCCTG", + "GAA CTGA", + "TATT CATT", + "GCC CATG", + "CAA TCTT", + "GAAA GCA", + "GAA TCTG", + "TTA TTTTA", + "GTT TGGA", + "TTTT TGTT", + "GGGAA TG", + "GC GACA", + "TAAA CTG", + "CCA TATT", + "GGA TCC", + "CAA GCTT", + "TAAAAAA AAA", + "TCA CTC", + "CA CTGTT", + "TGTTAA TT", + "GGA CTGA", + "GGA GTGA", + "CATA CACA", + "GTT TGTA", + "TCCA GCA", + "GTG CATT", + "GG AAAAAA", + "CCAA GAA", + "TCAA TA", + "CTT CCCA", + "TGA GAAAA", + "GGCC TCCCAAA", + "CAA GCTG", + "GCC CAAA", + "TGA CTTA", + "CA GCCTT", + "CTG GATT", + "TTTT TTTA", + "TCA CGG", + "GCA GTTA", + "TGA CTAA", + "TTA CAGG", + "TGA TATG", + "TAA TTATT", + "TCTT GAA", + "GCC CCTT", + "GTT CAGA", + "CTC TATG", + "CCA TGGA", + "GAGG GAA", + "GGA GGCA", + "CTT TGCA", + "TCTT GG", + "GGA GGTT", + "GCCAA TG", + "CTG GTGA", + "CAA CCAA", + "CCA GTC", + "CTT GAGA", + "TACA GCA", + "CTT GTC", + "GA CGGA", + "CTT CTTTT", + "GTG GC", + "GAGGA TG", + "CAA TAAAA", + "GAAA TTTT", + "AAAA AAAAAA", + "CTC TATA", + "GTA TGAA", + "CTT GTTA", + "TAA CATA", + "CAAA CACA", + "TGATT AAA", + "GCTC TGTT", + "GTG GGTT", + "GTT GGGG", + "GTG TGTA", + "GTAA TTTT", + "GTA TCC", + "TGTGTGTG TGTG", + "TCTT CCTT", + "TCA CTAA", + "TCTCC AAA", + "TA TCAAA", + "TGA TGGG", + "GGA TATT", + "CAAA TTTT", + "GTT CAGG", + "GTG GATT", + "GTG CAGA", + "GCTG CC", + "CTCA GAA", + "GCA GTC", + "GGA TAAA", + "GCC TTCA", + "CCA GGTG", + "TA TCTC", + "CAA TGCA", + "CCCA CTG", + "GTG TATT", + "CGA CAGA", + "TGA GATA", + "CCA GGTT", + "TGTT TAA", + "CATCA TG", + "TGA TTCA", + "GCAA TTA", + "GAAA TGAA", + "CTT GGTT", + "GAA GATT", + "GGA TTAA", + "CC TCATT", + "GGCCA GGCTG", + "GCTA TTA", + "GCCA GCA", + "GAGA CAGG", + "CTT GAGG", + "CA GTCTT", + "GTT CTCC", + "TATT TCAA", + "TGA CGA", + "CATG AAAA", + "CATTA TG", + "TAAA TTTA", + "GA GTGAA", + "CAA CAGG", + "TAA GCTT", + "CACA TTTT", + "GA TCTCA", + "TA GTCC", + "GACC CTG", + "TAA TGCA", + "TAA GTC", + "TAA TAATT", + "GAA GTAA", + "CAA CTC", + "CA TCATT", + "GA CGAA", + "GAAA CAAA", + "TATT TCTG", + "CATTAA TT", + "CCA CCCC", + "TAATA TTTT", + "GTT TAAAA", + "GTA TCTG", + "GTCAA AAA", + "GATG CTG", + "TGTT CTG", + "GG TCAAA", + "GTA GGAA", + "GTA TATG", + "TGA TCTG", + "GGGG CTG", + "GCA TCAA", + "GCCAA AAA", + "CCA CGA", + "GC TAATG", + "CAGA GAAA", + "CCTT CTG", + "TCC TCTA", + "GCA GGTT", + "CTCA CTG", + "TAGA TTA", + "GCC GAGA", + "CCA TCCA", + "CTT TACA", + "GTA CATG", + "GCA CCAA", + "CTT TGTA", + "CTA TGTG", + "TCA CTTTT", + "TGA GTC", + "CAA GAAAA", + "CTGA CTG", + "GTTTT TTTT", + "GCA TAAA", + "TAA TCTG", + "GAA AAAAAA", + "CAGGA TG", + "TGA GCCA", + "GAA TTCA", + "TCA GACA", + "GTT CCAA", + "TCA GGTT", + "CAAA CTG", + "CATT TCTT", + "TGTT AAAA", + "CCA GACA", + "CAA GTTA", + "CATG TTA", + "CATT CTA", + "TCTTTT TG", + "TGA GGGG", + "CACA TTA", + "TAAAA TAAA", + "GCA TATA", + "TGTT CTA", + "GAA GGGG", + "GAGTG TG", + "TAA GACA", + "GAA CTC", + "CCA GTAA", + "GAGA GAGG", + "GC GACC", + "CAA TTCA", + "CGG CTG", + "CCA GATT", + "CCTG GG", + "GGAA GAAA", + "GAGA GG", + "TCAAAA TG", + "CCTCA TG", + "TAAA GG", + "CTT TGGA", + "CCA GGGA", + "GTA CAGA", + "CTGAGGCA GGA", + "TGTT TCTT", + "CCA GGCTG", + "CTGA GG", + "GAGG CTG", + "CTCC TGGG", + "GAA GTC", + "CGA CC", + "GGA CTCA", + "GGA GTC", + "CA CAATT", + "GTG TTCA", + "GA CTAAA", + "GTCA TTA", + "CAAAA TTA", + "TGAA GAAA", + "GCA CCTT", + "GTT TGCA", + "TCC TGCC", + "GTA GATG", + "GCC TGCA", + "GA GTTAA", + "TCC CTTA", + "GTG GTTA", + "TC GGGA", + "TACA TAA", + "TCTC TCCA", + "CA CTAAA", + "TATATATA TATA", + "GTG GCAA", + "CACCA TG", + "TTTG AAAA", + "CACA CTG", + "CTT GGTG", + "TACA CTG", + "CC TCCAA", + "CAA CCTT", + "CA GCCAA", + "TTTT CAAA", + "TGA TAGA", + "TACA CTA", + "TCTG GG", + "TCC CAGCA", + "TAGG AAAA", + "CTT GGGG", + "TC TGTGAA", + "CC TTATT", + "CATT TAAA", + "TTTTA TTTTA", + "GCC CTCC", + "CTGA GCA", + "CC CGTG", + "GTA GTGA", + "TCC TATT", + "GAA GGTG", + "TGTG CTG", + "TCCA CTG", + "TAA TCTA", + "TGA TGTA", + "GTG GTAA", + "TAA TGGA", + "GATG AAAA", + "GTA GTAA", + "GTG GGGA", + "GTG TCAA", + "CAGA CTG", + "TC GAAAA", + "CTCA TTA", + "TAA TAATA", + "CTCA GAAA", + "CA TCCTT", + "CC GCTT", + "GGAA GG", + "CC GTGA", + "CCA CTCC", + "CTA GAGA", + "TAGAA TG", + "GGA TTTA", + "TTAA TTTT", + "GC TAATA", + "TCC CCCA", + "CAAA TATT", + "GA TCATG", + "TCTTAA TT", + "CA GTATT", + "GTCTT GAA", + "CC GAAA", + "CTA TTCA", + "TAA GATA", + "CTT GCAA", + "GCC CCAA", + "TCC CTAA", + "GAA GTTA", + "GA TGATG", + "CTT GATG", + "CC CTAAA", + "CCTG CCTG", + "GACA TTTT", + "CCA GCCA", + "TGTGTGTG TG", + "GTC TATA", + "TCTC TGTT", + "GTC TGTA", + "TA TAATA", + "CTT GTTTT", + "CGC CATT", + "CTCA GCA", + "TACA GTT", + "CAA GAGG", + "GGAA GCA", + "GCC TTTA", + "CC CCATT", + "CAA CGA", + "GTCA TTTT", + "CC CGCA", + "CA GTTAA", + "GAA TCTT", + "CATG TTTT", + "CC GGGG", + "CTA CTGA", + "TCA CGA", + "TAAA TTTG", + "GCC CATT", + "CTC TAGG", + "GGA CCTG", + "TCA GGGA", + "GAGA CTG", + "CC AAAAAA", + "GCC GG", + "CCA GGGG", + "TCA GAAAA", + "CA TCTGA", + "TCTT CAAA", + "CTA CAGG", + "GAGG CAGG", + "CATT GTA", + "TAAA TCAA", + "GA CTCTT", + "CTGA TTA", + "GCA TATG", + "GGA CCTT", + "CAA GACA", + "TATT TATG", + "TATTTT AAA", + "CC GAGA", + "TCA TTTTA", + "CTCA CTCA", + "CCA CCCA", + "CTC TAGA", + "CTA CATG", + "GTG CTTA", + "CAA CCTG", + "TC TGTGTT", + "TAAA TATG", + "CAAA GG", + "CC CTGTT", + "GTT CGG", + "TGA TAAAA", + "CA CGAA", + "GTT GAGG", + "CAGA GTGA", + "GAAA TTAA", + "CACA TA", + "GAA CAGG", + "TCTCC TGA", + "CC TGAGG", + "GGAGG CCAA", + "GTT TACA", + "TAA CAGG", + "TGTG GTG", + "GCCTCC CAAA", + "CCA TCCTG", + "GATT CTT", + "GAA TGGA", + "GTA GTCA", + "CTCC TCTG", + "GAAAGAAA GAAAGAAA", + "CC CTGTG", + "CAGTA TG", + "GC GATA", + "GGA CTC", + "GAAA GA", + "TGTT GG", + "GTA GCTT", + "CA TTTTAA", + "CC CTCTG", + "GCA TTCA", + "CGA TTA", + "TCA CATA", + "TAA TGAAA", + "GGAA TTA", + "CTG TCAA", + "TAAATT AAA", + "CAA GTC", + "GTA TTCA", + "GGCCA TG", + "CTT TAGA", + "TGTT TCC", + "CATG TA", + "GAA TAAAA", + "CAA CTAA", + "TCA TCTA", + "CA CTCTT", + "CA GTTTG", + "CA TAAAAA", + "GCA TGCA", + "GATT TA", + "GAA CCAA", + "TCTG TGA", + "TCA GCCA", + "TCTC CACA", + "TCTCA GCTCA", + "TATCA TG", + "GCA CTTA", + "CGC CAGG", + "CGG GG", + "CATTAA AAA", + "TTTG TTA", + "GGA TATA", + "TC GACC", + "TAA TCCA", + "CC GC", + "CATT GTT", + "CCA GTTA", + "GTA GTTA", + "CTA GGAA", + "CC TAATT", + "TCA TGGG", + "GAA CTAA", + "GCTA TTTT", + "CC GTCA", + "CAGA TTA", + "CCA TATA", + "CAA CTTA", + "TCA GTTTT", + "CTA CCTT", + "GCA CTC", + "GTG TGGA", + "GTG CCAA", + "GACAA TG", + "GA CAATT", + "GTA CCTT", + "TAAA CATT", + "CA GGAGG", + "GTG CGA", + "GAAAA TTA", + "TCTCTT AA", + "CC GATT", + "GA TGATT", + "CCA TGGG", + "TC GGTA", + "CCA TATG", + "CCA GTCC", + "GCC TTAA", + "TGA TCCA", + "GTT GCAA", + "GTA GAGG", + "CAGA TTTT", + "GTA CTTA", + "TCTTTCTT TCTTTCTT", + "GCTC TGTG", + "TCAA TAA", + "GTT TAGA", + "GTT CGA", + "CAA GGTT", + "CTCA TTTT", + "CACA GG", + "CATG CTG", + "GAA CGG", + "TA TAAAAA", + "GAA GGCA", + "GA GCATT", + "TGTT TGTG", + "GCTG TTA", + "GTCA CTG", + "CAAA TGAA", + "GTGA CTG", + "GTT CTTTT", + "CAGGCTG GAGTGCAGTG", + "TGA TGAAA", + "TAA CGG", + "CTA CTAA", + "GACA TTA", + "GGA CGA", + "GAGCA TG", + "GCA TGGG", + "CCA CTTA", + "CTA TCAA", + "GCTG TTTT", + "GTC GTG", + "CCTG GCC", + "TCTC TGAA", + "TGTT GTA", + "CAGC CAGG", + "GTT TAGG", + "CC GCAA", + "GGA GTAA", + "CCAA TTA", + "CAGC AAAA", + "TCA TCCA", + "CA CGTA", + "TCA TAGA", + "TAATT AAAA", + "CA CTTAA", + "TCTT TATT", + "GAGA TTA", + "TAA GAGG", + "CAAA TTAA", + "GA CGCA", + "CA CGGA", + "GTG TGCA", + "TC T", + "TATTA TTA", + "GAAA TATT", + "GGA GTTA", + "TCTT TGA", + "CTGA TTTT", + "TGTGAA TT", + "TCC CACC", + "CC CTTTG", + "CAA GGTG", + "CAGA GTT", + "CCCCA TG", + "CTA CCAA", + "CTCC AAAA", + "CTT CCCC", + "CTG CTAA", + "GATT AAAA", + "GC TTATG", + "CTA CTTA", + "TAAAAAA TT", + "TCA GTCC", + "CTATT AAA", + "GAA TGGG", + "CACA GTA", + "CAA CGG", + "GG TTATT", + "TCA CCCA", + "TGA TGCA", + "TAA TTTTTT", + "GTT TGAGA", + "GTATT AAA", + "GCC CCCA", + "TATA GTA", + "TA GTAAA", + "TGA TACA", + "GTG GTTTT", + "CCA CTAA", + "CACA GAGA", + "CCTCTG CCTCC", + "CAA AAAAAA", + "CTC TCTCC", + "CA TAATA", + "GAA GCCA", + "GTT CCCA", + "TGTG TTTG", + "CAA TGGA", + "TGAA GTA", + "CTT CATA", + "CA CTGTG", + "GC TCTTTT", + "TGA CATA", + "TAAA GAAAA", + "GAGAAA TG", + "CAGG GAGG", + "TGTT CAA", + "GA GCCAA", + "GACA GAGA", + "GG CTGAA", + "CAAA TATA", + "GTG GAAAA", + "TAA GGTT", + "GTGA TTA", + "GGA TCTG", + "GATG TTA", + "GACTA CACA", + "TCC TATA", + "CTG CCAA", + "TCC CGA", + "GTGA TTTT", + "GC GTTTT", + "CAGA GTA", + "GAAA GGAA", + "CA CTTTG", + "CCCC AAAA", + "GCAA CCCA", + "TGCA TTTT", + "TCTA GAA", + "TA CTTTG", + "TGA GGCA", + "CA TCTCC", + "TC GCTA", + "TGA CTTTT", + "GA GCCTG", + "CATT TGTT", + "TCTT TGTT", + "GCAAAA TT", + "CC TGATT", + "GA TAAAAA", + "GA GTGTT", + "TCC TGTA", + "TACA GAAA", + "TC CAGGAA", + "GCCA GTG", + "TAGA TTTT", + "TAA TAGG", + "CTCC TCA", + "CATTTT TG", + "CATT TCAA", + "GCCA TCA", + "TAAAA TATA", + "GA CTGTT", + "GCA TGGA", + "CAAA GTT", + "CA TGATT", + "GA GTTTG", + "CTA GCAA", + "CTT CCTA", + "GG GGAGG", + "CTA TATG", + "TATT TATTTT", + "CA CCATT", + "CC CTCAA", + "TTTTTTTT TTTTTT", + "GA TCATT", + "GTA CATA", + "CTC CATA", + "CCCC GTCTCTA", + "GCC TGCC", + "CTA GCTT", + "CC CGGA", + "GATG TTTT", + "GTA TTTTA", + "TCA GATA", + "CCTG GAA", + "TATT CCA", + "GGA CCAA", + "GCCA TTA", + "CGA CTGA", + "TAA GCTG", + "TAAA CACA", + "GTT TCTC", + "CA TCTTA", + "GAAA TTTG", + "TAA TGGG", + "TAAAA TTTT", + "CTG TTCA", + "CCTG TTA", + "TA CTGAA", + "TGA CCCA", + "TGA TTTTA", + "CTCC TTA", + "TATA GAA", + "CTG CGG", + "GC GGTA", + "GTG CTAA", + "CAGA GGAA", + "TACA TCA", + "TCAA TCAA", + "CTG CAGCC", + "TGAA TATT", + "TCTA CAA", + "CCA CATA", + "CC CGTT", + "TATA CACA", + "TCC TCTC", + "TCTA CTT", + "CC GGAA", + "CTTTT TTA", + "GAAA GAAAA", + "CTA TCTT", + "GA CTTTG", + "TGAA CAA", + "GCA GTTTT", + "GC TAAAAA", + "GAGG CGG", + "TAA TAAAAA", + "CTG GTCA", + "CAGA CAA", + "GGA TATG", + "TGAA GG", + "GCCA GAA", + "CCA GGCC", + "CCA CCATG", + "CAAA CTT", + "TCA TGTA", + "GCTG CTT", + "GTAA TA", + "CCCC CAA", + "CA GCCTG", + "TCAA CTT", + "TAAAA TTAA", + "GCTG AAAA", + "CGA CGA", + "GTG GGCA", + "TGA GGGA", + "CGC TCC", + "TTTT GTTTT", + "GA GTCAA", + "TCA TGCA", + "CTG CTTA", + "TAA GTTTT", + "GTA GCAA", + "CCTT GG", + "TGA CAAAA", + "CTG GTAA", + "TCTT TATA", + "TGTG TGTT", + "CTG GTC", + "CTG GCAA", + "CATT TCTG", + "CTC TACC", + "CTGA GGA", + "CTAAAA TG", + "CTA GATT", + "GTA TCAA", + "CA GTCAA", + "CTG GGTG", + "CC TCTTA", + "TGA GTTTT", + "TTTTA TTTA", + "CC TTTTTT", + "TATA TACA", + "TA GCAAA", + "AAA TTA", + "CTG GATG", + "GA TAATA", + "GA CAAAAA", + "CCTG GGA", + "GCTT TCA", + "GTA CAGG", + "GCTG GAA", + "CTA CTCA", + "CAA TGTA", + "GC GTGAA", + "GA TCCTT", + "TATTAA TG", + "GCC CGA", + "TAAA GTG", + "GCTT CCA", + "CATG GAA", + "TGAA GTT", + "CTT TCTC", + "TCTGTG TG", + "GTA TGTA", + "CAA TACA", + "TCAA GG", + "CC TCTAA", + "TGTG GG", + "GA TCTGA", + "GTA CTGA", + "TTAA TTAA", + "GCA GAAAA", + "CTA CATA", + "CC GGTG", + "GGGG AAAA", + "TACAA AAAA", + "TTTT GG", + "GTGA GAA", + "TCAA TAAA", + "TCAA GTT", + "CTCA GGA", + "CTA CTC", + "CAAA TCA", + "GGCA GAA", + "CC CGAA", + "TGTT GTG", + "GAGC AAAA", + "TATT TGTG", + "GTA GGTT", + "CTA CCTG", + "CA CAAAAA", + "CTCA GG", + "GCTT TA", + "CAGA GCAA", + "CTCA GTG", + "GGAA GAGA", + "TAA CCTG", + "GAAA TATA", + "CGA GAA", + "GTGA GG", + "CATT TATA", + "GGCA GCA", + "TC TAAATT", + "CCCA GTG", + "GCC TAGG", + "TGCA TTA", + "CC GTAA", + "CATT CCA", + "CTA GTTA", + "GA CTTAA", + "CTA TACA", + "GACA CAA", + "TCTT CACA", + "CC GGTT", + "TAAA GTAA", + "CTG TGGA", + "TAA GGTG", + "TCCA GTA", + "CAAA TTTA", + "AAATT AAAA", + "CCA TCTA", + "CTCC CTT", + "CTCC TTTT", + "GAGAGAGA GAGA", + "GGA GATA", + "CCTA TTA", + "CACC AAAA", + "CC GTTA", + "TGTT TATA", + "CTCA GGAGG", + "GA CGTA", + "GTCC TTA", + "GAAA GTT", + "GCTG GTG", + "CTC TACA", + "CAA TAGA", + "TAAAA TATT", + "GTA CCTG", + "GTA CTAA", + "CTT TGAAA", + "CCTT TCC", + "TAAAAA TTA", + "CTC GG", + "CAA GATA", + "CATT TGA", + "CACC TCA", + "GCCA GCC", + "GTC GG", + "GCA CATA", + "CA CTCAA", + "CTTTT AAAA", + "CAGGAA TT", + "GCC TATT", + "TCTT TCTG", + "CTGAGGCA GGAGAA", + "CAGG CAGG", + "CTA GTAA", + "TCCA TA", + "GAA CTTA", + "C G", + "GCTG TGA", + "GAAAA TA", + "TCTT CATT", + "GAGG GAGA", + "CCCA TCC", + "GAGG TGGG", + "GCC TCTA", + "GTA GGTG", + "TAAA CCA", + "GAA GGAAA", + "TATT GG", + "A TG", + "TCCA GTT", + "CCCA CAA", + "GAAA CACA", + "GTC TCAAAA", + "CTTTT CTTTT", + "TGAA GGA", + "TATT GATT", + "CTA TGTA", + "AAAAAAAA AAAAAA", + "TCCTT AAA", + "GC GCTA", + "TCCA CTT", + "GA CTCAA", + "TAAA TACA", + "TCA TGGA", + "TCTG GGA", + "TCC TATG", + "CTG TGCA", + "TCAA GTGA", + "TCA TAAAA", + "CA TCCAA", + "CCTT CCA", + "CTG TACA", + "GAA GGTT", + "CTG TGTA", + "GTCA CTT", + "TCA CAAAA", + "TCA GGCA", + "GTGTT AAA", + "CC CTTAA", + "CAAA GTG", + "GAAA TGTT", + "CTG GGGA", + "GA CGCC", + "TATA TGTG", + "CTA GATG", + "GAAATT AAA", + "GAA TGCA", + "GCA CTAA", + "CGG GAGG", + "GCCA CAA", + "CGC TTA", + "TCCA CAA", + "CAGA TA", + "TC TGAATT", + "TATTA TTTT", + "GC GCGG", + "CTC TGAAA", + "TCTCTT TG", + "TATT TCTA", + "GGGG TGGG", + "GGA TGCA", + "CCA CACC", + "TAAA TGTG", + "TCTT CCTG", + "GCAA GG", + "CTG CTCC", + "CTG GAGTG", + "CTGTT AAA", + "CACA CAAA", + "CTGA CTT", + "GAAAA GAAAA", + "CCTT CTCC", + "GAAA TAAAA", + "CCTCA GGTGA", + "GA TAATG", + "GAATT GCTT", + "CCAAAA TT", + "CGTG AAA", + "CACTG AAA", + "CAGTG AAA", + "GA TCTTA", + "GAGA TGGG", + "TCTG CCA", + "TGA GGTA", + "TATG GAA", + "TATA TTTTA", + "TGAA CTT", + "GCA GATA", + "CTTTT CTT", + "GTAAAA TG", + "TCTC TAA", + "TCTG CAAA", + "GA GCCTT", + "TA TCATT", + "CAA TTTTA", + "CC GCCA", + "TATT TAAAA", + "GAGA GATG", + "GAGA TGGA", + "GCCA GGATG", + "CGA GTAGCTG", + "TTCA TTTT", + "TATA CTT", + "GTC TACA", + "GTGA GTGA", + "GCTA CACA", + "GGGA GGA", + "CAA GGCA", + "GC TTTTAA", + "CA CTATT", + "GTT CATA", + "TCC TC", + "GTG GACA", + "TATT TGGA", + "CTC CAGTA", + "GTT CAGTT", + "CCAA GG", + "CAGA GCC", + "CTC GCC", + "CC GATG", + "GGAA TTTT", + "TCCA GCC", + "CC TCTTTT", + "GAA CCTT", + "CATG CACA", + "GTT TC", + "GAA GATA", + "TA CCCC", + "GCTG CCA", + "GGGG GAGG", + "GCAGTGA GCTGA", + "CTG TCTA", + "CGA GGA", + "CAA TGGG", + "GC TGTGAA", + "GAAA GTG", + "TACC AAAA", + "GTCA GG", + "CAGC TCC", + "TGTG CTT", + "GTC TAGG", + "TTTT TGTA", + "TTA TATG", + "TCA GGGG", + "TATT GTTA", + "CC TGAGA", + "TA TCTCA", + "CAA TCTG", + "CA CTCTG", + "GATT TAA", + "TGAA TAA", + "TCTT GTA", + "TCAA CTG", + "TCTC CAGG", + "CTA GAGG", + "CTGA GAAA", + "CTA GCTG", + "TCCA CCA", + "CGA TTTT", + "CC GGCC", + "GTT GACA", + "CTTA GAA", + "CA TAATG", + "GA GTATT", + "CACA GAAA", + "GA CTGTG", + "CTA TTTTA", + "TGA GGAAA", + "TTATT AAAA", + "CTTA TTTA", + "CAGA CTT", + "CA CGCC", + "GCTT GG", + "CCTG CTT", + "TAAA GCAA", + "CCTC GTGA", + "TA GAATT", + "CTTA CAA", + "TAAA GGAA", + "GTC TAGA", + "GTGA CTT", + "TACA TATG", + "GTCA GGA", + "GCTC CAGG", + "GAA GGGA", + "CA TGATG", + "TCA TCAAA", + "CGTT AAA", + "GTA CTCA", + "CTCC CAA", + "TATA TGTA", + "GGTA TTTT", + "TAA GCCA", + "C GAAATT", + "GTTTG TTTT", + "TCTG TCTT", + "TATA TCA", + "TGTT CATT", + "CAAA CCA", + "TTCA TTA", + "TATT TGTA", + "GATT GAA", + "CTA TAAAA", + "GATTAA TT", + "CCCA CCA", + "TCC TAGG", + "TAAA TGTA", + "CTCTT AAA", + "GCA GTCC", + "GC GGCTG", + "GTC TCGAA", + "TGAA TGA", + "CTG GGGG", + "GTC TCGA", + "GAA CAAAA", + "TGAA TCA", + "TGTATTTT TAGTAGAGA", + "GTTA TTAA", + "TTTTTT AAAA", + "GTCA GTG", + "CCCA TTA", + "CACA GGA", + "TATT CCTT", + "TCTG CCTT", + "CCTG GTG", + "GC GAGC", + "TA CTAAA", + "TACA CAAA", + "CC GTCC", + "GCTT TGTT", + "GCA TCCA", + "CA TCTAA", + "GC TGTGTT", + "GTA GACA", + "GCC TATG", + "TCTT TGTG", + "GATT CTG", + "CGCC CGG", + "GA TGAGA", + "TA TCTGA", + "TGAA TTTG", + "CC TGATG", + "TAAAA CAA", + "CTT TAGG", + "TTTT CCTT", + "TGAA TAAA", + "CGG GGA", + "CAAA CATT", + "GTA TGGA", + "GCTT AAAA", + "TA CCAAA", + "CAAA GAGA", + "CTCC TGCC", + "GTAAAA AAA", + "CACA GCC", + "CCA TGCA", + "TA CAATT", + "CTA GTGA", + "CTGA GTT", + "GAGTG AAA", + "TCTGTT TG", + "CTG TAGG", + "TATAA AAAA", + "GCATT AAA", + "GTC CATA", + "TGTTAA AAA", + "TGTT TGA", + "GAA TAGA", + "CTT CAAAA", + "CTG GACA", + "CTG TAGA", + "CCATT AAA", + "CTA TCTG", + "CACTA TG", + "TTA TCAA", + "TAA GTAAA", + "TAATCCCAGCACTTTG GGAGGCC", + "CCA GAAAA", + "TGAA GCA", + "TCC CTTTT", + "TCA TACA", + "TA CGTT", + "GCC GTG", + "GGAA GTG", + "GG CCAAA", + "GTA CCAA", + "TCTCTA CTAAAAATA", + "CATT GTG", + "TGTG TGA", + "GAAA CAGA", + "CTT GACA", + "GA TGAGG", + "GAGA TTTT", + "CCTT CAA", + "GAA TCTA", + "CTC TCCTT", + "GG CGGA", + "TCTATCTA TCTATCTA", + "CACA CAGA", + "TGTG TGTA", + "CAAA GCC", + "TGTG CCA", + "GTT GAAAA", + "CTC CAGCA", + "TCAA GGA", + "TA GCTCA", + "CGC TGA", + "CCTG AAAA", + "GA CTATT", + "GATT CCA", + "GCTT CTA", + "GTC TGCC", + "CTT GGCA", + "TGTG GTA", + "GCTT TGA", + "GCTC TCTG", + "CTCA CAGA", + "TCTT TAAA", + "CAAA GCAA", + "TA CTTAA", + "GCTT CAA", + "CATT GAA", + "GGA GGAAA", + "CTA TAGA", + "CTGA GGAA", + "CCTG GCA", + "CC CTATT", + "CTC GTG", + "TTA CACA", + "TTA GGAA", + "CTG GTTA", + "GTT GTCC", + "TAATG AAAA", + "TATT TACA", + "GG GAATT", + "GTA GTTTT", + "GCTG CAA", + "CTA CGG", + "GCC GGA", + "CTG GGCA", + "CCTT AAAA", + "GATG GAA", + "TAGATAGA TAGATAGA", + "TATG TAA", + "GTA CGG", + "TATT CAAA", + "GA TCTCC", + "CCTG TTTT", + "TATT GCA", + "GGAAGGAA GGAAGGAA", + "GG TAATT", + "TTA CAGA", + "TCA GC", + "GCAAAA TG", + "GAGA GCA", + "GTA GAAAA", + "CATT TGAA", + "TCTT CTTTT", + "TCC CATA", + "GTTA TTTA", + "CTA TCTA", + "CA TCCTG", + "TCTT GTG", + "TTA TTATT", + "CC CGTC", + "TACTA TG", + "TAAA CATA", + "TAA GGAAA", + "GCTT GTG", + "CTC TAAAA", + "GTTTT AAAA", + "GACA GGA", + "TCC TAGA", + "TCCA CCCA", + "GTT TGAAA", + "CCA TCTCA", + "CTAA GAA", + "GTA TCTA", + "GTGA GGA", + "GCTG GAGG", + "CCTGTAA TCCCAGCTA", + "GCAA CAA", + "CTT TCAAA", + "CAAA TGTT", + "CTT GTCC", + "TCTCAA AAA", + "TATT TATTA", + "TAA GGCA", + "GAGA GGAA", + "TA TGATT", + "GCA TCTA", + "C GTTATT", + "GCC TGTA", + "GTT TCAAA", + "CCTTCCTT CCTTCCTT", + "GG CTTTG", + "GTCA GAA", + "CATG CATG", + "GTCA TTTA", + "CTG GAAAA", + "CTT CGA", + "CCTA TTTT", + "CCAA CAA", + "TCCA TCC", + "TAAA GTTA", + "GTC TCTC", + "TAA TCAAA", + "GATTTT TG", + "GATT TCTT", + "GG GCTGA", + "GCA TGTA", + "CCTG GGTT", + "GAGA CAA", + "GCTG TCA", + "TGA TAGG", + "GGA GACC", + "CC GGCA", + "TAA TCTCA", + "TGAA TTAA", + "TCTG GTG", + "GCC TC", + "GG CGCA", + "CCA GCTA", + "CA GTCTG", + "TGAA CTA", + "GTAA GAA", + "CCTT TCA", + "TCCA TGA", + "CAAA GGAA", + "CTC TC", + "CTC TCTCA", + "CTC CAGC", + "GTA GATA", + "CCCC CTCC", + "GG CGCC", + "TCTG TCC", + "GA CCATT", + "CTT GAAAA", + "TTA TCC", + "TACA TGTG", + "CAAA TTTG", + "TTTT GTG", + "CAGA GTG", + "GTAA TAA", + "GTGA GTG", + "TTTT TCC", + "GG CTCTG", + "GCC CTAA", + "GG CTGTT", + "CC CAATT", + "CAGA GCTT", + "TATAAA TG", + "GA GTCTG", + "TCTTAA AAA", + "GTTTTA TG", + "GA TCCAA", + "GGCC CTG", + "GA TCCTG", + "TCAA GTG", + "GATT CAA", + "CCTC TCTT", + "GAGA CGG", + "CAGA TCA", + "TAAAA GAA", + "CTGA GCAA", + "CCTG CCA", + "CCTT CTA", + "CGC TCA", + "GG CTGTG", + "TGGG AAAA", + "GGA GCCTG", + "CTGA GTG", + "CGTC AAA", + "TCAA GTA", + "CGTAA TT", + "TTA CTTA", + "TATA CTA", + "GG GCAAA", + "CAA CTTTT", + "CTT TGCC", + "GC CAGGAA", + "CACA CTA", + "GCC CAGC", + "TAAATAAA TAAATAAA", + "CTT TCCTT", + "GGGA GAA", + "TATG GTA", + "CGG CCA", + "CCTC TCTG", + "GAAA GCAA", + "CAA GCCA", + "GG CGTT", + "CTC TTTTA", + "TCGGCC TCCCAAA", + "GATT TATT", + "CAA GTCC", + "TA TCTTA", + "GTTCAA GACCA", + "CTCA CACA", + "GAAA TCAA", + "TGA GACC", + "GG GTAAA", + "GCTT GTT", + "GA TTTTAA", + "TTTT TATA", + "CAGA GCTG", + "TC TGTTAA", + "GTAA TTAA", + "TCTT TGAA", + "CTT GCCA", + "TTTT CATT", + "CCA TGTA", + "TCTC GGCTCACTGCAA", + "GGA TTCA", + "TC TATTAA", + "TACA TAAA", + "GATT GATT", + "GGA GAGGA", + "CGC AAAA", + "GGA CTAA", + "TTA TGTG", + "GTCA CTCA", + "GACA GCA", + "CGA GTT", + "GATG GTT", + "GGAA GAGG", + "GCCAA CATGGTGAAA", + "GGA GCCA", + "TGAA CTG", + "CCTC TGTG", + "GTA TAAAA", + "TCC CAGAA", + "CATT TATG", + "GA TTATG", + "TGTT TCTG", + "GAGTG GGTT", + "TACA TATT", + "CTC CAGGA", + "GACA CTG", + "GG TCTCA", + "CC GGGA", + "TGTT TAAA", + "CTCA CCA", + "GGA CTTA", + "GCC CACC", + "CAAA TCAA", + "GAAA TGTG", + "TA GTTAA", + "TCTA TAA", + "TTA GATT", + "GTG TAGG", + "TACTG AAA", + "GCA CCCA", + "GTG GGCTG", + "GAA TGAAA", + "TCTA GTT", + "TCA GGAGA", + "TCCA CTA", + "CTCA GTT", + "TACTT AAA", + "GA CTCCA", + "TCCATT TG", + "CACA GCAA", + "GCTCATG CCTG", + "GGTG CTG", + "GCTT TCTT", + "GTG GCCA", + "TA CGTG", + "GTG CAGTG", + "TGAA GTCA", + "CCTT TAA", + "TCTCAGCTCA CTGCAA", + "GAAA TATG", + "CC TCAAAA", + "GGGG CGG", + "CGA CAA", + "GG TGATG", + "GTCTT AAA", + "CAGAAA TG", + "CGTCA TT", + "CCAA GCA", + "GGA TCAA", + "GTGCTG GGATTA", + "GCTG GCC", + "CGGA GCTT", + "TACA TGA", + "TGTT TGAA", + "TCTC CATT", + "TAA GCAAA", + "CCTT TCTT", + "TA CTGTT", + "TCCA TCTT", + "CTTA CTT", + "CGGA GGTT", + "CAAAA CAA", + "TCA TAGG", + "TTA CTAA", + "CTTA TTTG", + "GAA TGTA", + "CCCCA TGGA", + "TTA CTGA", + "CGG AAAA", + "CTC CAGTG", + "TGTT CCA", + "CAGA TGAA", + "GTT GATA", + "TCC CCCC", + "CATT GCA", + "CTCA GCC", + "CTTA CTG", + "TA TCCTT", + "CTTTTA TG", + "TGAGTA GCTG", + "GACTG AAA", + "CAA TGAAA", + "CGA CTG", + "CTT GGGA", + "GCAA GCA", + "TCA CTCC", + "GATT TGA", + "CATTTT AAA", + "TCAA CTA", + "GTCC AAAA", + "CACC CTG", + "TTA CCTT", + "CAA GGGG", + "TTTT GGA", + "GTTA TTTG", + "GCTA CTG", + "CTGAGGCAGGA GAATG", + "GTGA TGA", + "GTA GTC", + "TAGTA TG", + "GTA TAGA", + "GTG TCTA", + "GCTG CTA", + "TTA GTAA", + "TAAA CATG", + "GTCA CCA", + "CA TCTTTT", + "CATA TAA", + "TCTC TCTA", + "TTTTA TTAA", + "TATT CTAA", + "GAAA TTTA", + "CTT CCCTG", + "TAAA GATG", + "TA CGTA", + "GTT TATTA", + "GAAAA GAA", + "CCCA CCCA", + "CAATT AAAA", + "CC GACA", + "CAAA GTGA", + "CAAA CAAAA", + "GCAA TTTT", + "CGATT AA", + "TTA GAGA", + "CTGA TGA", + "GGA GGAGG", + "GTCC TGGG", + "TCA TGAAA", + "GCAA CCA", + "GTT GGCA", + "GCGG CGG", + "GTCC CCA", + "GTA GGGG", + "GCCA TGTT", + "GTT CGAGA", + "GCC TATA", + "TAAA TTCA", + "GG CCATT", + "GAAAA CAA", + "TGTG TATG", + "GTA CTC", + "TAGG GAA", + "CCTT GAA", + "TC TATTTG", + "GAGG GCA", + "GAAA CTGA", + "TA CGC", + "TA CAAAAA", + "TCA TTATT", + "GGAAAA TT", + "TCAA TATT", + "CC CGTA", + "GGA GAGAA", + "TTA GTTA", + "CTCA GAGA", + "TC GAGC", + "CTA GTCA", + "GATG GCA", + "TGAA CATT", + "CTA TGGG", + "CACA CCA", + "TCAA TTAA", + "GGAA CTG", + "TTA CATG", + "CTT TCATT", + "CAGC TCTG", + "TCTTTT TTTT", + "TAAA TCTT", + "TGA TCTA", + "CATA CAA", + "GC TCAAAA", + "GC TGTGTG", + "TCAA TCA", + "GATT TGAA", + "CCAA GGA", + "GTCC TCA", + "GTG CTCC", + "AAAA TAA", + "GTGA CAA", + "GCTCA CGCCTG", + "CGA CGG", + "TA TCCAA", + "CACA CATG", + "TCTC TCTCC", + "TGTG GTT", + "CTT GGTA", + "TCTG GTT", + "TTTA TAA", + "CTG CTTTT", + "TGTG TCA", + "CACA TCA", + "CC TAATG", + "C GTTTTTT", + "GCTG GCA", + "GA CGTC", + "TATAA TTA", + "TACA GTAA", + "GAAA GTAA", + "GTC TGAAA", + "CCCA TTTT", + "TATA TGA", + "CTT GATA", + "CTT TATTTT", + "CTT TATTA", + "GG CGAA", + "CCA TGCC", + "CCTG CCTT", + "GAAGAA GAAGAA", + "CTGA CTGA", + "GCC CTTA", + "TA TCTAA", + "GTG TTTTA", + "TGTG GCA", + "TATT GTAA", + "GCCA GAAA", + "CCCTG TCTC", + "CACA GGAA", + "AAAA CAA", + "AAAAAAAA AAAAAAA", + "TAA CTCC", + "GCC TAAA", + "CGA GTA", + "TA GTATT", + "GTATTTT TAGTAGAGA", + "GCTG CAGG", + "TATT GAAA", + "CCAGCC TGGG", + "GCTCC AAA", + "TA CGAA", + "GGCC TCC", + "TATA CAAA", + "CATG GCA", + "CATG CAA", + "TACA CCA", + "CTT TACCA", + "TACA GAGA", + "TATT CTTA", + "TATG TCA", + "TCAA GCA", + "TCAA TGA", + "GG CTCTT", + "GGAA GTT", + "TCCA TGTT", + "GCTT TCC", + "TATG TGA", + "GTG TAGA", + "TTTT TAAAA", + "GCTG GAGA", + "GTGA GAGA", + "CCTA GAA", + "CCTCC AAA", + "CCAA TGA", + "CAGG GCA", + "CTA TGCA", + "CTT CACC", + "CTA CAAAA", + "CTCA CC", + "GAGTA TG", + "TA GAAAAA", + "CTTTT GAA", + "TAAA GAGA", + "CATG TCA", + "TCTTTT AAA", + "CACA GTGA", + "GA TCTAA", + "TAA GGTA", + "CATA GAA", + "CGC GCC", + "CAGC TTA", + "TATA GTT", + "CGG GCC", + "TATC CATT", + "TGTTTG TTTT", + "GCTG GCTG", + "TACA GGA", + "CTCC TTTG", + "CAA TCTA", + "CCCC CTG", + "TATA CTG", + "CTGA GCC", + "CGG TTA", + "TGAA GTG", + "GCTT CCTT", + "TTTTA TTTG", + "TA GTGAA", + "CTGA GGTG", + "TCTT CTC", + "GACA GAAA", + "CTGAA CTGAA", + "CCTG GGAA", + "TCC CCAAA", + "TATG TATT", + "GATT TCTG", + "CATT CAAA", + "CACA GTT", + "GCTT GAA", + "GTG GATCA", + "CTGA GTGA", + "TGAA TTTA", + "TCAA CAAA", + "GG TCATT", + "GTAA TTTA", + "GC GACTT", + "CTGA GAGA", + "GTG CCCA", + "CTA GGTT", + "TCC TGAAA", + "GTC CACC", + "TCA CAGAA", + "GC GAAAA", + "GTA TGGG", + "TGAA CAAA", + "TAAA CAAAA", + "CC GTTTT", + "TC TCAATT", + "TCCA GAAA", + "GTAA CAA", + "GCA TTTTA", + "TCTC CATG", + "TTA TAAAA", + "CAGG CAA", + "CTAAAA AAA", + "GTT GGGA", + "TAAA GATT", + "TGAA GAGA", + "CCCC TCA", + "TGTT TATG", + "TCTA CTG", + "CCAA TTTT", + "GGTG GTG", + "GGAA CAA", + "TGTG GGA", + "TCTG CTA", + "GAA CGA", + "GTAA GTA", + "GTT GCCA", + "AAAA TTTT", + "GC GCGA", + "GAAA GATG", + "GTC TCTCA", + "TCCA TCAA", + "GCA GCTA", + "CACA TTTG", + "CTGA CAA", + "TCCA CC", + "GC T", + "CCCA CTT", + "GCA GGTA", + "GAGG CCA", + "TAAA GTCA", + "CTG GATA", + "CGG CAA" + ] + } +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/tokenizer_config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e926cbe05847fc6887cfc8a947900d32ebe68356 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/tokenizer_config.json @@ -0,0 +1,56 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "cache_dir": null, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "model_max_length": 100, + "pad_token": "[PAD]", + "padding_side": "right", + "sep_token": "[SEP]", + "tokenizer_class": "PreTrainedTokenizerFast", + "trust_remote_code": true, + "unk_token": "[UNK]", + "use_fast": true +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/trainer_state.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..33a46a1e145998ce66754700a93322318065838a --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/trainer_state.json @@ -0,0 +1,166 @@ +{ + "best_metric": 0.9063933951179595, + "best_model_checkpoint": "genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875", + "epoch": 3.0, + "eval_steps": 100, + "global_step": 1875, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.16, + "learning_rate": 2.6548672566371683e-05, + "loss": 0.4279, + "step": 100 + }, + { + "epoch": 0.32, + "learning_rate": 2.851872871736663e-05, + "loss": 0.2941, + "step": 200 + }, + { + "epoch": 0.48, + "learning_rate": 2.68161180476731e-05, + "loss": 0.2896, + "step": 300 + }, + { + "epoch": 0.64, + "learning_rate": 2.511350737797957e-05, + "loss": 0.2724, + "step": 400 + }, + { + "epoch": 0.8, + "learning_rate": 2.341089670828604e-05, + "loss": 0.2618, + "step": 500 + }, + { + "epoch": 0.96, + "learning_rate": 2.170828603859251e-05, + "loss": 0.2538, + "step": 600 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.8981, + "eval_f1": 0.8980013458817503, + "eval_loss": 0.24450437724590302, + "eval_matthews_correlation": 0.7970483436740018, + "eval_precision": 0.8991229435739876, + "eval_recall": 0.897926298388621, + "eval_runtime": 2.2999, + "eval_samples_per_second": 4347.921, + "eval_steps_per_second": 34.349, + "step": 625 + }, + { + "epoch": 1.12, + "learning_rate": 2.0005675368898978e-05, + "loss": 0.2381, + "step": 700 + }, + { + "epoch": 1.28, + "learning_rate": 1.830306469920545e-05, + "loss": 0.2257, + "step": 800 + }, + { + "epoch": 1.44, + "learning_rate": 1.6600454029511918e-05, + "loss": 0.2222, + "step": 900 + }, + { + "epoch": 1.6, + "learning_rate": 1.4897843359818387e-05, + "loss": 0.2267, + "step": 1000 + }, + { + "epoch": 1.76, + "learning_rate": 1.3195232690124857e-05, + "loss": 0.2286, + "step": 1100 + }, + { + "epoch": 1.92, + "learning_rate": 1.1492622020431328e-05, + "loss": 0.2205, + "step": 1200 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9036, + "eval_f1": 0.9035999035999036, + "eval_loss": 0.23238573968410492, + "eval_matthews_correlation": 0.8073154756092576, + "eval_precision": 0.9036641967999408, + "eval_recall": 0.9036512789126667, + "eval_runtime": 2.312, + "eval_samples_per_second": 4325.297, + "eval_steps_per_second": 34.17, + "step": 1250 + }, + { + "epoch": 2.08, + "learning_rate": 9.790011350737798e-06, + "loss": 0.1951, + "step": 1300 + }, + { + "epoch": 2.24, + "learning_rate": 8.087400681044268e-06, + "loss": 0.1857, + "step": 1400 + }, + { + "epoch": 2.4, + "learning_rate": 6.384790011350738e-06, + "loss": 0.1822, + "step": 1500 + }, + { + "epoch": 2.56, + "learning_rate": 4.682179341657208e-06, + "loss": 0.175, + "step": 1600 + }, + { + "epoch": 2.72, + "learning_rate": 2.9795686719636776e-06, + "loss": 0.1706, + "step": 1700 + }, + { + "epoch": 2.88, + "learning_rate": 1.2769580022701475e-06, + "loss": 0.1733, + "step": 1800 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9064, + "eval_f1": 0.9063933951179595, + "eval_loss": 0.24301454424858093, + "eval_matthews_correlation": 0.8132168703204711, + "eval_precision": 0.906713678896274, + "eval_recall": 0.9065032186577142, + "eval_runtime": 2.322, + "eval_samples_per_second": 4306.719, + "eval_steps_per_second": 34.023, + "step": 1875 + } + ], + "logging_steps": 100, + "max_steps": 1875, + "num_train_epochs": 3, + "save_steps": 100, + "total_flos": 6413331974400000.0, + "trial_name": null, + "trial_params": null +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/training_args.bin b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9f5f22118e78f147855896956ded76f0dc7aaa14 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef267b331203305d86d178a26fc0947311a7d6fc6d91ae987e0fcc2751ebc0c3 +size 5457 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/results/base5120_demo_coding_vs_intergenomic_seqs_lr3e-5_wd0.0_wr0.06_ep3_seed42/eval_results.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/results/base5120_demo_coding_vs_intergenomic_seqs_lr3e-5_wd0.0_wr0.06_ep3_seed42/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..adf2dc6f8f5c7ba40bce7ab29bbaabe2ced10c67 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/results/base5120_demo_coding_vs_intergenomic_seqs_lr3e-5_wd0.0_wr0.06_ep3_seed42/eval_results.json @@ -0,0 +1 @@ +{"eval_loss": 0.2542661130428314, "eval_accuracy": 0.9021, "eval_f1": 0.9020706907370306, "eval_matthews_correlation": 0.8049607381043299, "eval_precision": 0.9027773220988344, "eval_recall": 0.902183634938159, "eval_runtime": 2.1971, "eval_samples_per_second": 4551.358, "eval_steps_per_second": 35.956, "epoch": 3.0} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/config.json new file mode 100644 index 0000000000000000000000000000000000000000..45e4c6c10a6211acf374c78e8078ab7ac74985f9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.35.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 4096 +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/model.safetensors b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2461c12898eed43e3470e48bc0198a3ede8a68a --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e25307685c51ad096f3aa92059c090823fb1cb5db99bdba9753c7a7e34c95f +size 356777880 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/optimizer.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..266c0abb989064ced8b9e5890fb6f0285e8fdecd --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4705316037c1c05b2aabfbb133c8e6813d57377ad6979be8e9cc442d7e22a4 +size 713677451 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/rng_state.pth b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6be73074a1e99364840199a17ac85dca0dec98c4 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad7b27eb5af31f3653dea61322524cbd98718a2c3f49a7f9e7fa7ba7942638c +size 14645 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/scheduler.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae965367849c82fe748cf08b5cee472a93c1cd6b --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0820d74bc9c19dfa93c8aa4a6b85582725f6342ebcd7b58d2a0c7ac71726eee6 +size 1465 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/special_tokens_map.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8b3208c2884c4efb86e49300fdd3dc877220cdf --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "unk_token": "[UNK]" +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/tokenizer.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8a569df5e832e1e62816e174612061cfbf0790d0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/tokenizer.json @@ -0,0 +1,8340 @@ +{ + "version": "1.0", + "truncation": { + "direction": "Right", + "max_length": 512, + "strategy": "LongestFirst", + "stride": 0 + }, + "padding": { + "strategy": "BatchLongest", + "direction": "Right", + "pad_to_multiple_of": null, + "pad_id": 3, + "pad_type_id": 0, + "pad_token": "[PAD]" + }, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 1 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 2 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "vocab": { + "[UNK]": 0, + "[CLS]": 1, + "[SEP]": 2, + "[PAD]": 3, + "[MASK]": 4, + "A": 5, + "C": 6, + "G": 7, + "T": 8, + "AA": 9, + "TT": 10, + "TG": 11, + "CA": 12, + "CC": 13, + "TA": 14, + "GG": 15, + "TC": 16, + "GA": 17, + "AAA": 18, + "GC": 19, + "TAA": 20, + "TTTT": 21, + "TCA": 22, + "TGA": 23, + "TTA": 24, + "GAA": 25, + "TCC": 26, + "CAA": 27, + "CTG": 28, + "CTT": 29, + "GTG": 30, + "GTT": 31, + "GCA": 32, + "GGA": 33, + "CCA": 34, + "GTA": 35, + "GCC": 36, + "CTA": 37, + "TAAA": 38, + "AAAA": 39, + "CTC": 40, + "GTC": 41, + "TGTG": 42, + "TATT": 43, + "CACA": 44, + "GAAA": 45, + "TATA": 46, + "TCTT": 47, + "TGTT": 48, + "CAAA": 49, + "GAGA": 50, + "CATT": 51, + "TGAA": 52, + "CAGG": 53, + "TCTG": 54, + "CAGA": 55, + "TCAA": 56, + "GGAA": 57, + "TAAAA": 58, + "CTGA": 59, + "GCTT": 60, + "GTGA": 61, + "GCTG": 62, + "CTCA": 63, + "CCTT": 64, + "CATG": 65, + "GCAA": 66, + "GTCA": 67, + "GTAA": 68, + "TTTTA": 69, + "TATG": 70, + "GAGG": 71, + "CGG": 72, + "GATT": 73, + "CCTG": 74, + "TCTC": 75, + "CCAA": 76, + "GTTA": 77, + "CTCC": 78, + "CTAA": 79, + "TACA": 80, + "CTTA": 81, + "TCCA": 82, + "GATG": 83, + "TTAA": 84, + "GAAAA": 85, + "TTTG": 86, + "GTTTT": 87, + "TCTA": 88, + "GCCA": 89, + "GTCC": 90, + "CTTTT": 91, + "GGGG": 92, + "CGA": 93, + "TTTA": 94, + "CCCA": 95, + "CAAAA": 96, + "TGGG": 97, + "TAGA": 98, + "TAGG": 99, + "GACA": 100, + "GGTT": 101, + "CCCC": 102, + "GGTG": 103, + "CATA": 104, + "GCTA": 105, + "TGTA": 106, + "TCAAA": 107, + "TGGA": 108, + "TAATT": 109, + "TTATT": 110, + "TGCA": 111, + "GGCA": 112, + "GATA": 113, + "CCTA": 114, + "TTCA": 115, + "TCTCA": 116, + "GGGA": 117, + "CGC": 118, + "CTGAA": 119, + "GTAAA": 120, + "TCTCC": 121, + "TTTTTT": 122, + "CGTG": 123, + "GCAAA": 124, + "TAAAAA": 125, + "TCTGA": 126, + "TCATT": 127, + "GGAAA": 128, + "TGAAA": 129, + "TCCTT": 130, + "CCAAA": 131, + "GAATT": 132, + "CTAAA": 133, + "CGTT": 134, + "GTGAA": 135, + "GGCC": 136, + "TAATA": 137, + "GGTA": 138, + "TGCC": 139, + "CACC": 140, + "TGATT": 141, + "AAAAAA": 142, + "GCTCA": 143, + "TCCAA": 144, + "GAGAA": 145, + "CTGTT": 146, + "TATTA": 147, + "CAGCA": 148, + "CTCTT": 149, + "CTTAA": 150, + "CAGAA": 151, + "GCTGA": 152, + "GTTAA": 153, + "TCTTA": 154, + "TATTTT": 155, + "GCCAA": 156, + "CTTTG": 157, + "GACC": 158, + "CGCA": 159, + "GTATT": 160, + "GTCTT": 161, + "CAATT": 162, + "GTGTT": 163, + "CTCAA": 164, + "GGAGG": 165, + "CGAA": 166, + "TCTTTT": 167, + "GTCAA": 168, + "CGCC": 169, + "TATAA": 170, + "TACC": 171, + "TCTAA": 172, + "CCATT": 173, + "CGGA": 174, + "CAAAAA": 175, + "CAGTG": 176, + "TCCTG": 177, + "CTCTG": 178, + "GAAAAA": 179, + "CTGTG": 180, + "CAGC": 181, + "TTTTAA": 182, + "GCATT": 183, + "GCCTT": 184, + "TAATG": 185, + "CTATT": 186, + "GTTTG": 187, + "TGATG": 188, + "GGCTG": 189, + "CCTCA": 190, + "GAGGA": 191, + "GCCTG": 192, + "AAATT": 193, + "CGTA": 194, + "TCAAAA": 195, + "TACAA": 196, + "CATCA": 197, + "CAGTT": 198, + "TGAGA": 199, + "GGGAA": 200, + "CACTG": 201, + "CACAA": 202, + "CAGGA": 203, + "CCCCA": 204, + "CCCTG": 205, + "TTTTTTTT": 206, + "TAGAA": 207, + "GAGCA": 208, + "CCTCC": 209, + "CACCA": 210, + "TATCA": 211, + "GAGC": 212, + "CATTA": 213, + "CACACACA": 214, + "GAGTG": 215, + "GGATT": 216, + "TGTGTGTG": 217, + "TACTT": 218, + "CACTT": 219, + "GTCTG": 220, + "TGAGG": 221, + "GAGTT": 222, + "GAATG": 223, + "TCATG": 224, + "GACAA": 225, + "GACTT": 226, + "TATTAA": 227, + "TAATAA": 228, + "GGCCA": 229, + "CATTTT": 230, + "CAGCC": 231, + "CCCTT": 232, + "GCTAA": 233, + "TATATATA": 234, + "GTGTG": 235, + "TACTG": 236, + "TAGTT": 237, + "CAATG": 238, + "GCTC": 239, + "CAGTA": 240, + "GCTCC": 241, + "CATAA": 242, + "TTATG": 243, + "TAAATT": 244, + "GATGA": 245, + "CATGA": 246, + "GCGG": 247, + "AAAAAAAA": 248, + "CCATG": 249, + "GATAA": 250, + "GACTG": 251, + "TATGA": 252, + "GCAGG": 253, + "GATCA": 254, + "GTTTTA": 255, + "GGATG": 256, + "CCTGA": 257, + "GTAAAA": 258, + "GAAGG": 259, + "GATTA": 260, + "CCTC": 261, + "GACCA": 262, + "GCTTA": 263, + "CCCAA": 264, + "AAATG": 265, + "GCATG": 266, + "TAGTA": 267, + "TACCA": 268, + "GGCTT": 269, + "CGTC": 270, + "TCTCTT": 271, + "GGTCA": 272, + "TTATTA": 273, + "TACTA": 274, + "TAGCA": 275, + "TATC": 276, + "CTGGG": 277, + "CATC": 278, + "CTTTTA": 279, + "CTAAAA": 280, + "GTGGG": 281, + "GAGTA": 282, + "CCAGG": 283, + "GATTTT": 284, + "TAGTG": 285, + "GAAATT": 286, + "CACTA": 287, + "TCGG": 288, + "TCAGG": 289, + "CAGGAA": 290, + "GCAAAA": 291, + "CCTTA": 292, + "CATCC": 293, + "CTTGG": 294, + "TGTGAA": 295, + "TATTTG": 296, + "CCTAA": 297, + "CTATG": 298, + "GAGAAA": 299, + "GAGAGAGA": 300, + "GCTTTT": 301, + "TATAAA": 302, + "CAAGG": 303, + "TCTCTG": 304, + "TGTTAA": 305, + "TGTGTT": 306, + "GAGCC": 307, + "GACTA": 308, + "TATATT": 309, + "TAAAAAA": 310, + "TTTTTG": 311, + "GTATG": 312, + "CATTAA": 313, + "TAGGA": 314, + "TAGC": 315, + "GTTGG": 316, + "GAAGAA": 317, + "TAAATG": 318, + "TCTGTT": 319, + "CAGAAA": 320, + "CAAATT": 321, + "TAATTA": 322, + "TCTGTG": 323, + "TATCC": 324, + "TGAATT": 325, + "CTCCA": 326, + "GTGAAA": 327, + "GGCAA": 328, + "GGAGA": 329, + "GAAGA": 330, + "GGTGA": 331, + "GGGCA": 332, + "CCAAAA": 333, + "TCTCTCTC": 334, + "CTGCA": 335, + "CTTCTT": 336, + "TCTTAA": 337, + "CCCTA": 338, + "TGTGTG": 339, + "AAATA": 340, + "TGTTTG": 341, + "GGGTT": 342, + "GTGCTG": 343, + "GGAAAA": 344, + "GGGGA": 345, + "TCAGA": 346, + "CCTTTT": 347, + "GAAATG": 348, + "GCAGCA": 349, + "TCTGAA": 350, + "GGGTG": 351, + "CACATT": 352, + "TCTTTG": 353, + "GGGC": 354, + "TCCCA": 355, + "TCCATT": 356, + "CTGAAA": 357, + "CTTTA": 358, + "TCGA": 359, + "GTTTA": 360, + "CAACAA": 361, + "CTTCC": 362, + "GCCTCC": 363, + "TTAAA": 364, + "GCTCTG": 365, + "GTTTCA": 366, + "GGAGGA": 367, + "CGTGA": 368, + "CAGTC": 369, + "GAATA": 370, + "CAGAGA": 371, + "CCCTC": 372, + "CAAATG": 373, + "CTGCTG": 374, + "GATCC": 375, + "TTTTATT": 376, + "AAAATT": 377, + "TTATA": 378, + "TCAATT": 379, + "GGTAA": 380, + "GTTATT": 381, + "GCCAGG": 382, + "GGAGAA": 383, + "CATTTG": 384, + "TCACC": 385, + "CTCAAA": 386, + "GGTTA": 387, + "TCCAAA": 388, + "TCTATT": 389, + "GCAGA": 390, + "CTTCA": 391, + "TCATCA": 392, + "CGAGG": 393, + "TAACA": 394, + "GTTGTT": 395, + "CTTATT": 396, + "CGTCA": 397, + "TAAGA": 398, + "TAATTTT": 399, + "CTGTA": 400, + "TCCACA": 401, + "GCTGTG": 402, + "CGCTG": 403, + "TCTAAA": 404, + "GCGA": 405, + "CAATA": 406, + "CCACCA": 407, + "GAACA": 408, + "CGAAA": 409, + "CAGATT": 410, + "TCACA": 411, + "TTATTTT": 412, + "TCTCAA": 413, + "TGACA": 414, + "CTCCAA": 415, + "AAAAAAA": 416, + "TATATG": 417, + "TCCTCC": 418, + "TCACTT": 419, + "TCCAGG": 420, + "CAAGA": 421, + "GGCTA": 422, + "GTGGTG": 423, + "CGTAA": 424, + "CGAGA": 425, + "TGATA": 426, + "GGATTA": 427, + "CAACA": 428, + "CGATT": 429, + "TGAGAA": 430, + "CTCCTT": 431, + "CTCATT": 432, + "GTTAAA": 433, + "TCATA": 434, + "CCTCTG": 435, + "CTCTA": 436, + "GCTGAA": 437, + "CTGGA": 438, + "TAAGG": 439, + "CTTAAA": 440, + "TATTTA": 441, + "CCACA": 442, + "CCGG": 443, + "GTCAAA": 444, + "TGGAA": 445, + "CGGAA": 446, + "TGATGA": 447, + "GTTCA": 448, + "TAACAA": 449, + "GCTGTT": 450, + "TAAGAA": 451, + "CTGCC": 452, + "TTAATT": 453, + "CCAGA": 454, + "TCAGAA": 455, + "GTCATT": 456, + "CGCTT": 457, + "GATTAA": 458, + "CTGATT": 459, + "GCCACA": 460, + "GTAATT": 461, + "TCCAGA": 462, + "GCCAAA": 463, + "GTGATT": 464, + "TAAAATT": 465, + "CAAGAA": 466, + "CCACC": 467, + "TAATCC": 468, + "GTTCTT": 469, + "TCCATG": 470, + "GCTCTT": 471, + "TGCTG": 472, + "GGGTA": 473, + "TTACA": 474, + "GCCATT": 475, + "GCACA": 476, + "GCAATT": 477, + "TCCCTG": 478, + "TGTGA": 479, + "TCGAA": 480, + "GGACA": 481, + "GGAATT": 482, + "GTGGA": 483, + "CTTCTG": 484, + "TCCCC": 485, + "GCCCC": 486, + "CTTGA": 487, + "TAATGA": 488, + "TAAATA": 489, + "TATATA": 490, + "CTGCAA": 491, + "TCATTA": 492, + "GTATA": 493, + "TCCCCA": 494, + "CGTTA": 495, + "GCAGAA": 496, + "TGAGTT": 497, + "CTTTTTT": 498, + "CGATG": 499, + "CTTTCA": 500, + "AAAATG": 501, + "CAGGTT": 502, + "CTAATT": 503, + "CGCCA": 504, + "TGAAAAA": 505, + "GTTCC": 506, + "GTCCTT": 507, + "GTCCAA": 508, + "GTTTTTT": 509, + "CTCTGA": 510, + "GCGC": 511, + "GTTGA": 512, + "TGAATG": 513, + "CTATA": 514, + "GCAGTG": 515, + "CCTTAA": 516, + "TCACCA": 517, + "TCACTG": 518, + "GCCCTG": 519, + "TAACTT": 520, + "CAGATG": 521, + "GTAGG": 522, + "TCTATA": 523, + "GAGATT": 524, + "GTCTA": 525, + "TTTTAAA": 526, + "CACATG": 527, + "TGACC": 528, + "CACAAA": 529, + "GTGTA": 530, + "GGGAGG": 531, + "GCTTTG": 532, + "CAAAAAA": 533, + "GAGGAA": 534, + "GTTCTG": 535, + "TTTTTA": 536, + "GTCTCA": 537, + "GTTCAA": 538, + "TCGTG": 539, + "GCTTAA": 540, + "GCACC": 541, + "CTCCTG": 542, + "TAAATAAA": 543, + "CTACA": 544, + "CTTCCA": 545, + "TCCTCA": 546, + "CGCAA": 547, + "GAAAAAA": 548, + "GCCCA": 549, + "TCGTT": 550, + "GTAGA": 551, + "CTCTCA": 552, + "GTCCA": 553, + "TGACTT": 554, + "TCCCTT": 555, + "GCCATG": 556, + "CACACACACACACACA": 557, + "GTGATG": 558, + "CCTCTT": 559, + "GCCAGA": 560, + "TCCTA": 561, + "CGTTTT": 562, + "GTACA": 563, + "GCATA": 564, + "GAATTA": 565, + "TGTGTGTGTGTGTGTG": 566, + "CCCAGG": 567, + "GGTTTT": 568, + "TCAAAAA": 569, + "TCTATG": 570, + "CCATA": 571, + "TGACAA": 572, + "GGATA": 573, + "TCAGTG": 574, + "GTATTTT": 575, + "GAGATG": 576, + "GCGTG": 577, + "CGTCC": 578, + "TTAAAAA": 579, + "TAATCA": 580, + "CAATTA": 581, + "CCACTG": 582, + "CGGTT": 583, + "GTTGAA": 584, + "TGATTA": 585, + "CCTTTG": 586, + "CGGTG": 587, + "CAGGTG": 588, + "TCAATG": 589, + "CTGATG": 590, + "TCAGGA": 591, + "GTTTAA": 592, + "TATTAAA": 593, + "CTCTTA": 594, + "GCAGGA": 595, + "CTCTCC": 596, + "GAACC": 597, + "CTTTAA": 598, + "GGGCC": 599, + "GTATTA": 600, + "GCGCC": 601, + "CCAATT": 602, + "GCTAAA": 603, + "TGACTG": 604, + "GATTTG": 605, + "GATAAA": 606, + "TCAGCA": 607, + "GTTCCA": 608, + "GAAATA": 609, + "GACAAA": 610, + "GAGTC": 611, + "GCTATT": 612, + "TCACAA": 613, + "GAGGTT": 614, + "TAACC": 615, + "GAAGGA": 616, + "GCTCAA": 617, + "GAAAATT": 618, + "CCAGCA": 619, + "GTTTTAA": 620, + "GTGCC": 621, + "TGAGGA": 622, + "CATAAA": 623, + "GGTCC": 624, + "TCATTTT": 625, + "TATTTATT": 626, + "TAATAAA": 627, + "GCCTA": 628, + "CTTTTAA": 629, + "TAAGTG": 630, + "TAAGTA": 631, + "CTGGAA": 632, + "CACACA": 633, + "GACAGA": 634, + "CAACC": 635, + "GGGAAA": 636, + "CCAGAA": 637, + "TCAGTT": 638, + "TAACTA": 639, + "CTAAAAA": 640, + "TGGGTT": 641, + "TGAGTG": 642, + "TAAAATG": 643, + "TATATATATATATATA": 644, + "GCACTG": 645, + "GACTC": 646, + "TACAAA": 647, + "TAAAAAAA": 648, + "TCTACA": 649, + "GTTGTG": 650, + "TCGCC": 651, + "CCCAAA": 652, + "GTCATG": 653, + "CTGCTT": 654, + "GGAATG": 655, + "CTATTA": 656, + "GATATT": 657, + "TAGAAA": 658, + "GGCAGG": 659, + "GATGAA": 660, + "GTAGAA": 661, + "TCCTGA": 662, + "TAACTG": 663, + "GCTGGG": 664, + "GCAATG": 665, + "GCCCCA": 666, + "GTTTGA": 667, + "CATTTA": 668, + "GTGCA": 669, + "CTTGAA": 670, + "GTGGAA": 671, + "CTTCAA": 672, + "TAAATTA": 673, + "GTGGCA": 674, + "TCCTTA": 675, + "GGAAAAA": 676, + "TTTTTTA": 677, + "CCTGTG": 678, + "GTAATG": 679, + "GTGTTA": 680, + "CTAGG": 681, + "CAGGCTG": 682, + "GACACA": 683, + "GAAAAAAA": 684, + "TCGC": 685, + "GTAAAAA": 686, + "TGTTTA": 687, + "TCTCTA": 688, + "GTCCTG": 689, + "CCAGGA": 690, + "GAACAA": 691, + "TAAGTT": 692, + "TGAGCA": 693, + "GCTCCA": 694, + "TAAGCA": 695, + "CTCATG": 696, + "GTCTTA": 697, + "CCCACA": 698, + "CATATT": 699, + "GCCTCA": 700, + "CACTC": 701, + "CTTCTA": 702, + "TGATTTT": 703, + "TCGCA": 704, + "CCTGTT": 705, + "GAAGCA": 706, + "GCAAAAA": 707, + "GCGGA": 708, + "CCACAA": 709, + "GCGCA": 710, + "CATATA": 711, + "GACATT": 712, + "GTTCTA": 713, + "CAAAATT": 714, + "GAAAGAAA": 715, + "CCCGG": 716, + "TACACA": 717, + "CCAAAAA": 718, + "GAGGTG": 719, + "GGCTCA": 720, + "CAGTGA": 721, + "TCCCAA": 722, + "TATCTT": 723, + "TGAGTA": 724, + "TCGTA": 725, + "TTTTCTT": 726, + "GTGGGA": 727, + "GAGCTG": 728, + "CCCTCC": 729, + "TAGGTT": 730, + "TTAGG": 731, + "TAATATT": 732, + "CCAGCC": 733, + "CATCTT": 734, + "GTCTGA": 735, + "GTTTCC": 736, + "CCTGAA": 737, + "GGAGCA": 738, + "GAAAATG": 739, + "TCAGTA": 740, + "TAACCA": 741, + "GATGTT": 742, + "CTGTTA": 743, + "CATGTT": 744, + "GGCGG": 745, + "CATGTG": 746, + "GGGAGA": 747, + "CTTTGA": 748, + "TCTTTCTT": 749, + "AAAAAAAAA": 750, + "GGGGTG": 751, + "CTTTCC": 752, + "CTTGTT": 753, + "GCATTA": 754, + "CCCAGA": 755, + "CAAATA": 756, + "TCGGA": 757, + "CAGCTT": 758, + "TCACTA": 759, + "TAATTAA": 760, + "TAAGGA": 761, + "GAACTG": 762, + "GCACAA": 763, + "GCGTT": 764, + "GGCTC": 765, + "TCTTTTA": 766, + "CCTCCA": 767, + "GGCAAA": 768, + "CAGCTG": 769, + "CTACAA": 770, + "TACATT": 771, + "GCTATG": 772, + "CTTGTG": 773, + "GAGTCA": 774, + "GTTATG": 775, + "CTGCCA": 776, + "GTCTCC": 777, + "TGACCA": 778, + "CACCTG": 779, + "TATATTA": 780, + "TGATCA": 781, + "CAGCAA": 782, + "GATGTG": 783, + "GTCTTTT": 784, + "CTAGAA": 785, + "GCTACA": 786, + "CTGGGA": 787, + "GGGGTT": 788, + "CAAGTA": 789, + "CAAGGA": 790, + "CCCTCA": 791, + "TAGCC": 792, + "GTTGGA": 793, + "GCTATA": 794, + "TCTGAAA": 795, + "TATGTT": 796, + "CCCCTT": 797, + "GTTGTA": 798, + "CCCTGA": 799, + "TGACTA": 800, + "CAAGCA": 801, + "CAATAA": 802, + "GAACTT": 803, + "CATGAA": 804, + "CTTATG": 805, + "CTAATG": 806, + "TCTAAAA": 807, + "CCAATG": 808, + "GAAGTG": 809, + "CCTCAA": 810, + "CCCATT": 811, + "CAGTCA": 812, + "GAGAGAGAGAGAGAGA": 813, + "TATGTG": 814, + "GCAGTGA": 815, + "TCTCCTT": 816, + "TCCCAAA": 817, + "CCATTA": 818, + "CCAGTG": 819, + "GCATCA": 820, + "TCAAATT": 821, + "GATCTT": 822, + "GACAGG": 823, + "GGAGTG": 824, + "GTAGTA": 825, + "CAACTT": 826, + "GAAGTT": 827, + "CCCCTG": 828, + "TCTCAAA": 829, + "GGGTC": 830, + "GAGCTT": 831, + "TATGAAA": 832, + "TATGAA": 833, + "GACATG": 834, + "CAAGTG": 835, + "GATATA": 836, + "CATCTG": 837, + "CTGTGA": 838, + "TAATTTA": 839, + "GGCAGA": 840, + "GCGAA": 841, + "CCTAAA": 842, + "CCATCA": 843, + "CACTGA": 844, + "GGACTA": 845, + "GACGG": 846, + "CTCTTTT": 847, + "CTGTCA": 848, + "TCTCTCTCTCTCTCTC": 849, + "TTAATG": 850, + "GCAGCC": 851, + "CAAAAAAA": 852, + "GCACCA": 853, + "CTATTTT": 854, + "GAGCAA": 855, + "CTTGGA": 856, + "CTGGTG": 857, + "GAATAA": 858, + "TCCTTTT": 859, + "GAAGTA": 860, + "CAGTAA": 861, + "CAACCA": 862, + "CTGTAA": 863, + "TGATAA": 864, + "GCAGTT": 865, + "CACGG": 866, + "TAAATAA": 867, + "CTGTTTT": 868, + "CTACTA": 869, + "GCTCTA": 870, + "CGAAAA": 871, + "CAAGTT": 872, + "CTTGTA": 873, + "GAATGA": 874, + "GAGTGA": 875, + "GCCTGA": 876, + "GGTTTG": 877, + "CCCATG": 878, + "GGGGAA": 879, + "GAAGAAA": 880, + "TGTTA": 881, + "CAATTTT": 882, + "TATATTTT": 883, + "CTCAAAA": 884, + "GGTGGG": 885, + "CCGTG": 886, + "TATTTCA": 887, + "CCCCAA": 888, + "TATTTAA": 889, + "GGCTGA": 890, + "GGTGTG": 891, + "CATCAA": 892, + "CACTCA": 893, + "TCTCATT": 894, + "GAATTTT": 895, + "GAATCA": 896, + "CAGGAAA": 897, + "CATACA": 898, + "TATTTTA": 899, + "TTATAA": 900, + "GAGGAAA": 901, + "CATATG": 902, + "CTTTCTT": 903, + "CAACTG": 904, + "GGGCTG": 905, + "CCCCCA": 906, + "TTTGAAA": 907, + "CATTAAA": 908, + "CTTAAAA": 909, + "GACTGA": 910, + "CAATGA": 911, + "GGCACA": 912, + "CCAGTA": 913, + "GGATGA": 914, + "GTTTTTG": 915, + "GCATTTT": 916, + "GTGCCA": 917, + "GCAGTA": 918, + "GCCCTT": 919, + "TCGTC": 920, + "GAACTA": 921, + "GTGGTT": 922, + "GTGTGA": 923, + "GTGCTT": 924, + "CGCTA": 925, + "GTGTCA": 926, + "TCTTTA": 927, + "GCCTTA": 928, + "CCTATT": 929, + "CAAAATG": 930, + "GAACCA": 931, + "CTCCAGG": 932, + "GACTCA": 933, + "CATGAAA": 934, + "GCTAGG": 935, + "TGTTAAA": 936, + "GCGTA": 937, + "GCACTT": 938, + "TCTTAAA": 939, + "TAAGAAA": 940, + "GGCCTG": 941, + "TCCCTA": 942, + "GTGGTA": 943, + "CTGCTA": 944, + "GGAGTT": 945, + "GGTAAA": 946, + "CAAACAAA": 947, + "GATATG": 948, + "TCATGA": 949, + "GACCTT": 950, + "TAATATA": 951, + "GCTAGA": 952, + "GGACTG": 953, + "GGCATT": 954, + "CAGTTA": 955, + "CCCTAA": 956, + "CACCTT": 957, + "GGTGAA": 958, + "CAGCTA": 959, + "GTGTTTT": 960, + "CAACTA": 961, + "GATCAA": 962, + "GAGAAAA": 963, + "TGTGAAA": 964, + "AAAATA": 965, + "GATGAAA": 966, + "CTCTAA": 967, + "TTACTT": 968, + "GATCTG": 969, + "CCACTT": 970, + "GAGTTA": 971, + "CAATCA": 972, + "GGATTACAGG": 973, + "TTTATTTT": 974, + "TACATA": 975, + "TTTTATG": 976, + "GAGTAA": 977, + "GCTGAAA": 978, + "GTACTG": 979, + "GCTCTC": 980, + "TATGTA": 981, + "TGTGTA": 982, + "TCATAA": 983, + "GGACTT": 984, + "TCTCCAA": 985, + "GCATGA": 986, + "GACGA": 987, + "CGCCTG": 988, + "GACCTG": 989, + "GGTCTT": 990, + "CACCAA": 991, + "GATC": 992, + "GACCAA": 993, + "AAAATTA": 994, + "GTAAATT": 995, + "CCAGTT": 996, + "CAGAAAA": 997, + "TAACAAA": 998, + "GGTGTT": 999, + "GAAATTA": 1000, + "TGCCTCA": 1001, + "CCGCC": 1002, + "CCATTTT": 1003, + "CTTGCC": 1004, + "TCTGTA": 1005, + "CTGGCA": 1006, + "GGGATG": 1007, + "CCATGA": 1008, + "CTACTT": 1009, + "TAGGTG": 1010, + "TAAAAATT": 1011, + "GAAAGAA": 1012, + "TAAAATA": 1013, + "CTTTTTG": 1014, + "GTCAAAA": 1015, + "GGACAA": 1016, + "TCTGATT": 1017, + "CTCTCTT": 1018, + "TAATTTG": 1019, + "CTCTTTG": 1020, + "GGCCTT": 1021, + "GGATTTT": 1022, + "CTACTG": 1023, + "GTTGCA": 1024, + "GGCTCC": 1025, + "CTCTGTG": 1026, + "CTCCAGCC": 1027, + "TTACAA": 1028, + "GGACCA": 1029, + "GGAAGGAA": 1030, + "TAAAGAA": 1031, + "TTAGAA": 1032, + "GTGAAAA": 1033, + "CTTGCA": 1034, + "TGGGTG": 1035, + "GGAGCC": 1036, + "CCTCTA": 1037, + "CT": 1038, + "GGGCTT": 1039, + "GGCATG": 1040, + "CTGGTT": 1041, + "TACAGA": 1042, + "GATTAAA": 1043, + "CTCTGTT": 1044, + "TTATCA": 1045, + "CTGAAAA": 1046, + "GTAGTT": 1047, + "GGGTCA": 1048, + "GT": 1049, + "CAGCCA": 1050, + "GCGTC": 1051, + "CACTTA": 1052, + "GTGCTA": 1053, + "TCTTATT": 1054, + "GTACTT": 1055, + "GGTATT": 1056, + "TAGAGA": 1057, + "TACATG": 1058, + "CCACTA": 1059, + "TGAGAAA": 1060, + "CAATAAA": 1061, + "TCCAAAA": 1062, + "CGTGAA": 1063, + "GGTCTG": 1064, + "CTGAATT": 1065, + "TCAGCC": 1066, + "CCTCTC": 1067, + "GTTAAAA": 1068, + "GGGATT": 1069, + "TCCTAA": 1070, + "CACTAA": 1071, + "GGAGAAA": 1072, + "CCTTCCTT": 1073, + "GTTTCTT": 1074, + "TATCAA": 1075, + "GATACA": 1076, + "TAATCCCAGCA": 1077, + "CCGCA": 1078, + "TGAAATT": 1079, + "CGTAAA": 1080, + "CTCTCTG": 1081, + "TCTTTTTT": 1082, + "GTACAA": 1083, + "CCAAATT": 1084, + "TGTATTTT": 1085, + "TCGCTT": 1086, + "GGGTGA": 1087, + "GATAGA": 1088, + "CTTTATT": 1089, + "TAAACAA": 1090, + "GTTTATT": 1091, + "TGAATA": 1092, + "CTACCA": 1093, + "GTGTCC": 1094, + "CCCGA": 1095, + "TTTATTA": 1096, + "CTCCAAA": 1097, + "TTTTTTTTTTTT": 1098, + "TCATCC": 1099, + "GAAGCC": 1100, + "CTAAATT": 1101, + "CAAATTA": 1102, + "CCCCAAA": 1103, + "TCTTCTT": 1104, + "TAGGAAA": 1105, + "CACGA": 1106, + "CATTTTA": 1107, + "GTGCAA": 1108, + "TCTCCTG": 1109, + "TATTTTAA": 1110, + "GTTTGTT": 1111, + "GAGCCA": 1112, + "GGCCAA": 1113, + "CATTTCA": 1114, + "CATCCA": 1115, + "CCTATA": 1116, + "GACTTA": 1117, + "TCAAATG": 1118, + "GTATCA": 1119, + "TAAATTTT": 1120, + "CTGAGGCA": 1121, + "GCCCAA": 1122, + "GGTTAA": 1123, + "TATCTG": 1124, + "TGACAGA": 1125, + "GGAGAGA": 1126, + "GCTGCTG": 1127, + "CCCTTA": 1128, + "TCCTCTG": 1129, + "GTAGCA": 1130, + "CCTGAAA": 1131, + "CCGAA": 1132, + "TTTTTAA": 1133, + "CTATAA": 1134, + "CCTGTA": 1135, + "TTACTG": 1136, + "GTATAA": 1137, + "GGCGA": 1138, + "GACTAA": 1139, + "TCAGAAA": 1140, + "GTGTGTG": 1141, + "CAAAGAA": 1142, + "CCTATG": 1143, + "GCAGAGA": 1144, + "CCGTT": 1145, + "TTTTATTTT": 1146, + "GGAAGAA": 1147, + "TTACTA": 1148, + "GCCTGGG": 1149, + "TCCCTC": 1150, + "TCCTCTT": 1151, + "GGATCA": 1152, + "GGTCAA": 1153, + "TCGAGA": 1154, + "TATTCTT": 1155, + "TACTC": 1156, + "GTTAATT": 1157, + "GCGAGA": 1158, + "CTTAATT": 1159, + "TCCTTTG": 1160, + "GTCTAA": 1161, + "CACCCA": 1162, + "GGGTTA": 1163, + "GGGCAA": 1164, + "GGAAATG": 1165, + "GCAAATT": 1166, + "TAGATG": 1167, + "GCAGAAA": 1168, + "AAAAAAAAAAAAAAAA": 1169, + "CCTACA": 1170, + "GGAGTA": 1171, + "TCTAATT": 1172, + "CAACAAA": 1173, + "TAGATT": 1174, + "GGTTTA": 1175, + "CCTAGA": 1176, + "CTTTAAA": 1177, + "TACTTA": 1178, + "TAATGAA": 1179, + "CTATCA": 1180, + "TAGTAA": 1181, + "CAGAGAA": 1182, + "CAAGAAA": 1183, + "GGGGAAA": 1184, + "CGTTAA": 1185, + "CGTGTT": 1186, + "TCTGTCTG": 1187, + "TTTTAATT": 1188, + "CTGGCC": 1189, + "TAAATGA": 1190, + "CGTCAA": 1191, + "TTAGTA": 1192, + "GTCTCTG": 1193, + "TTTTAAAA": 1194, + "CAGTTTT": 1195, + "CTTCCTT": 1196, + "TATATAA": 1197, + "GCTTTTA": 1198, + "TTTTTCA": 1199, + "GGTC": 1200, + "TTATTAA": 1201, + "TTTTGTT": 1202, + "CATAGA": 1203, + "TAGGAA": 1204, + "GAGAGAA": 1205, + "GTAGCTG": 1206, + "TTATGA": 1207, + "GTAGTG": 1208, + "GGAGAGG": 1209, + "CTCTGAA": 1210, + "TAGTC": 1211, + "GACTCC": 1212, + "TCCCTCC": 1213, + "TAATGTT": 1214, + "CATCTA": 1215, + "GCCACCA": 1216, + "GTACTA": 1217, + "TGGGAAA": 1218, + "CGCCTT": 1219, + "GCCCGG": 1220, + "GGAGGAA": 1221, + "GTACCA": 1222, + "CGCAAA": 1223, + "CATAAAA": 1224, + "TAACATT": 1225, + "GCTAAAA": 1226, + "TCTTCTG": 1227, + "GCCAAAA": 1228, + "GTATGA": 1229, + "GTCTTTG": 1230, + "TACTGA": 1231, + "TCCCAGG": 1232, + "TTATTTA": 1233, + "TTAGTT": 1234, + "GGACC": 1235, + "TATAAAA": 1236, + "CAAACAA": 1237, + "CTTCTC": 1238, + "TCTATCTA": 1239, + "GAAATAA": 1240, + "GTGTAA": 1241, + "CTTTGTT": 1242, + "GATAAAA": 1243, + "GCCCAGG": 1244, + "GCGATT": 1245, + "AAAAAATT": 1246, + "TACAGG": 1247, + "GGCTAA": 1248, + "TAGCTT": 1249, + "GTCTCTA": 1250, + "CTCCTGA": 1251, + "GAATAAA": 1252, + "TTACCA": 1253, + "GGGACA": 1254, + "GCCACTG": 1255, + "GTTTAAA": 1256, + "GTCTGTG": 1257, + "TGACAAA": 1258, + "TACATTTT": 1259, + "GCCACC": 1260, + "TGTTTT": 1261, + "TAGCAA": 1262, + "TTATAAA": 1263, + "GACCCA": 1264, + "GCAGC": 1265, + "CAGACAGA": 1266, + "CACAAAA": 1267, + "GCCCTA": 1268, + "TATTAAAA": 1269, + "CGTATT": 1270, + "CCATCC": 1271, + "TCGATT": 1272, + "GAAGGAA": 1273, + "GATCCA": 1274, + "TATTTGA": 1275, + "GTGAATT": 1276, + "TACCTT": 1277, + "CGTCTT": 1278, + "CCTAGG": 1279, + "TCGAAA": 1280, + "CTTTCTG": 1281, + "TGAAGAA": 1282, + "TCTCTCA": 1283, + "GTCTCTT": 1284, + "GGAGGGG": 1285, + "GTCTGTT": 1286, + "CTATGA": 1287, + "GGAAATT": 1288, + "GCACACA": 1289, + "GCCTTTT": 1290, + "CAGTCC": 1291, + "CTGGTA": 1292, + "GCATCC": 1293, + "TAGTTA": 1294, + "GGCTTA": 1295, + "GAGTCC": 1296, + "TGAAAA": 1297, + "TAGATAGA": 1298, + "TGTTTGTT": 1299, + "TACTCA": 1300, + "CATTTAA": 1301, + "GATTTTA": 1302, + "CACTCC": 1303, + "GAAACAA": 1304, + "GCGCTG": 1305, + "TCTTTCA": 1306, + "CTGTCC": 1307, + "GAACTCA": 1308, + "CGGAAA": 1309, + "TATTGTT": 1310, + "GCACTA": 1311, + "TATTCAA": 1312, + "GCGGGG": 1313, + "GTGGCC": 1314, + "TAATTAAA": 1315, + "TACTAA": 1316, + "GCGGTG": 1317, + "TACCAA": 1318, + "GGTATA": 1319, + "CTAGTT": 1320, + "GCAGAGG": 1321, + "CTTTTTTTT": 1322, + "TTTTTTTTTTTTTTTT": 1323, + "TACAGTA": 1324, + "CCATGTT": 1325, + "TAGTGA": 1326, + "CGTGTG": 1327, + "GCTCTGA": 1328, + "CTTCCTG": 1329, + "TCGCTG": 1330, + "TAAATCA": 1331, + "TCCAATT": 1332, + "GTTTCTG": 1333, + "GAAGAGA": 1334, + "GGGTAA": 1335, + "CCATAA": 1336, + "TTATATT": 1337, + "CGAATT": 1338, + "CCGGA": 1339, + "TGAGCC": 1340, + "CCGTA": 1341, + "CAGAGGA": 1342, + "GTGTTTG": 1343, + "GACAAAA": 1344, + "TTTTTTAAA": 1345, + "GTTGCC": 1346, + "GAGTTTT": 1347, + "TCAAAAAA": 1348, + "TGTTTCA": 1349, + "TATCTA": 1350, + "TCTCTCC": 1351, + "CTCCACA": 1352, + "TAAATATT": 1353, + "TTTTCTG": 1354, + "CTCTCAA": 1355, + "CCTTAAA": 1356, + "TCTTTTAA": 1357, + "GAACAAA": 1358, + "TTAGCA": 1359, + "GCTCATG": 1360, + "TAAAGTA": 1361, + "GGATAA": 1362, + "TTATTAAA": 1363, + "CTCCATT": 1364, + "TCTCTGA": 1365, + "TTATTTG": 1366, + "CCTGTAA": 1367, + "TTATATA": 1368, + "GACTTTT": 1369, + "TGTTGTT": 1370, + "GCAAATG": 1371, + "CTTCAAA": 1372, + "GAATATT": 1373, + "GAATCC": 1374, + "CTCTTAA": 1375, + "GCATAA": 1376, + "GAATGAA": 1377, + "CTTAAAAA": 1378, + "TAAAAATG": 1379, + "TTTTAAAAA": 1380, + "CTCTGGG": 1381, + "TGATCC": 1382, + "GCTCTCA": 1383, + "CTCCAGA": 1384, + "GAGTGCAGTG": 1385, + "CAATATT": 1386, + "TAGAAAA": 1387, + "GTAAATG": 1388, + "TAGCTG": 1389, + "GCTCAAA": 1390, + "GCAGGAA": 1391, + "TACCTG": 1392, + "GGGAAAA": 1393, + "TTTTCTA": 1394, + "GGGGGGGG": 1395, + "CCGA": 1396, + "CTTTGAA": 1397, + "GGAGGTG": 1398, + "TAGTCA": 1399, + "GGCCCA": 1400, + "TGATGTT": 1401, + "CAAATAA": 1402, + "TCTTCCA": 1403, + "GCGCTT": 1404, + "GTATTTG": 1405, + "GTCTC": 1406, + "GAAATCA": 1407, + "TGATAAA": 1408, + "CATTCTT": 1409, + "TATCCA": 1410, + "GCCTCTG": 1411, + "TGAGATG": 1412, + "CGCCAA": 1413, + "GTTTTATT": 1414, + "TATATATT": 1415, + "GTAGGA": 1416, + "GACAGAA": 1417, + "CTCCAGCCTGGG": 1418, + "GCGTGA": 1419, + "GGTATG": 1420, + "GAGGGAGG": 1421, + "TCATTTG": 1422, + "CTACC": 1423, + "TACAGAA": 1424, + "GGTAGA": 1425, + "GATCTA": 1426, + "GTCCATG": 1427, + "TGAGGAA": 1428, + "TAATAAAA": 1429, + "TAAACTT": 1430, + "TCACATT": 1431, + "GGAGGCC": 1432, + "TCACAAA": 1433, + "CACTTTT": 1434, + "CGGCC": 1435, + "CAACAGA": 1436, + "GTAGAGA": 1437, + "GTTATTTT": 1438, + "CGTTTG": 1439, + "TCGTCA": 1440, + "TCTGCTG": 1441, + "CAACACA": 1442, + "GGTAGG": 1443, + "GCAGCTG": 1444, + "TAGTAGAGA": 1445, + "CAAGCC": 1446, + "GCATTTG": 1447, + "TAATATG": 1448, + "GCTTAAA": 1449, + "GCTTCTG": 1450, + "CTCTCCA": 1451, + "TCATCTT": 1452, + "CGTCTG": 1453, + "TCATTTA": 1454, + "CATAGG": 1455, + "GCTCCTT": 1456, + "TGTTCTT": 1457, + "TACATTA": 1458, + "CACAGAA": 1459, + "TAAATATA": 1460, + "TAGAGG": 1461, + "GATAGG": 1462, + "TCCTGAA": 1463, + "GGAGCTG": 1464, + "TGATATT": 1465, + "TCATTAA": 1466, + "CTTTTAAA": 1467, + "TCGTTA": 1468, + "TAAACTA": 1469, + "GTTTGAA": 1470, + "TAAAATTA": 1471, + "CACCCC": 1472, + "TCAGAGA": 1473, + "CTCCTGCCTCA": 1474, + "TGACATT": 1475, + "GTATTTA": 1476, + "CTTCATT": 1477, + "GAAACTG": 1478, + "TAACACA": 1479, + "GTTCAAA": 1480, + "GGAGATG": 1481, + "TCGGCC": 1482, + "CAGCATT": 1483, + "TCGATG": 1484, + "TATTCTA": 1485, + "CTGTGAA": 1486, + "TATTGAA": 1487, + "TTTTCCA": 1488, + "TATTTCTT": 1489, + "GGTGAAA": 1490, + "CTGAGAA": 1491, + "GCACAGA": 1492, + "GCGAGG": 1493, + "CTGTGTG": 1494, + "TGAAATG": 1495, + "TGATGAA": 1496, + "GTCCAAA": 1497, + "CTCAATT": 1498, + "TCCAGAA": 1499, + "GTATATA": 1500, + "TAAAGTT": 1501, + "TCTCAAAA": 1502, + "TCCATCA": 1503, + "GTCTGAA": 1504, + "TGAGAGA": 1505, + "TGATTTG": 1506, + "TTAGCC": 1507, + "CTCCATG": 1508, + "TCCCTGA": 1509, + "GAGCTA": 1510, + "CCCCCCCC": 1511, + "GTGGAAA": 1512, + "CTGGGAA": 1513, + "CAATGAA": 1514, + "CCACACA": 1515, + "CTTTCAA": 1516, + "CGGAGG": 1517, + "TCGTGA": 1518, + "CCAGAAA": 1519, + "GTTTTAAA": 1520, + "TGTTGAA": 1521, + "TCCTGTG": 1522, + "CTAAATG": 1523, + "TCCTTTA": 1524, + "GTCTGGG": 1525, + "TCTCTTTT": 1526, + "TACGG": 1527, + "TATTGTA": 1528, + "TTAGTG": 1529, + "TTACC": 1530, + "TAATCCCAGCACTTTG": 1531, + "TCTGGAA": 1532, + "CTTCTCA": 1533, + "CGCATT": 1534, + "TATTTAAA": 1535, + "TCACACA": 1536, + "TAATCAA": 1537, + "GCGAAA": 1538, + "GGGCCA": 1539, + "GTTCATT": 1540, + "GAGAAAAA": 1541, + "TTTTGTA": 1542, + "TACTTTT": 1543, + "TCGAGG": 1544, + "GTGAAAAA": 1545, + "CAATATA": 1546, + "TCCCATG": 1547, + "CAATTAA": 1548, + "CTGGAAA": 1549, + "CCCAGCA": 1550, + "TCCCATT": 1551, + "TCCTGTT": 1552, + "CTCTTTA": 1553, + "TCCCCTT": 1554, + "GTTTCAA": 1555, + "GTCCAGG": 1556, + "GGAAGGA": 1557, + "TAGTTTT": 1558, + "TGACCTT": 1559, + "GTGCTGGGATTACAGG": 1560, + "TATTTATA": 1561, + "TCTGCAA": 1562, + "CTGAAAAA": 1563, + "TATGTTA": 1564, + "CTTCACA": 1565, + "GCACAGG": 1566, + "CCTGCTG": 1567, + "TTTTTTAA": 1568, + "GTTATTA": 1569, + "CCCTTTT": 1570, + "TGATTTA": 1571, + "TACAAAA": 1572, + "TAAGTAA": 1573, + "TTTTTAAA": 1574, + "CATCTC": 1575, + "GTGGTGA": 1576, + "GTGGAGA": 1577, + "CTCTGCA": 1578, + "GTTAAAAA": 1579, + "TACATACA": 1580, + "CTTTGTG": 1581, + "GGACACA": 1582, + "TCTGATG": 1583, + "TATTATT": 1584, + "TCTTCTA": 1585, + "CTGTGTT": 1586, + "TCAGCTT": 1587, + "CTTTATA": 1588, + "GGCGC": 1589, + "TCCCTCA": 1590, + "GTACC": 1591, + "TGGAGAA": 1592, + "CAAAAATT": 1593, + "TCTTTAA": 1594, + "CTCTCTC": 1595, + "TGAGTGA": 1596, + "GCAGCTT": 1597, + "CGGATT": 1598, + "TACGA": 1599, + "TCTTGTT": 1600, + "TCGTAA": 1601, + "GCCTGTG": 1602, + "TATTCTG": 1603, + "GGGATA": 1604, + "GGGTCC": 1605, + "TGAGATT": 1606, + "CTTTTATT": 1607, + "TCCCACA": 1608, + "CATGGTG": 1609, + "TTAGGA": 1610, + "GAACACA": 1611, + "TCATAAA": 1612, + "CAACATT": 1613, + "GGTCCA": 1614, + "GAATTTG": 1615, + "TATTAATT": 1616, + "TCCTGGG": 1617, + "GCAGCAA": 1618, + "CTCTTCA": 1619, + "GAAGAGG": 1620, + "TCTGTCA": 1621, + "CTGAATG": 1622, + "CCACAAA": 1623, + "GTGGAGG": 1624, + "TGATTAA": 1625, + "CTCCCTCC": 1626, + "CACACACACACACACACACACACACACACACA": 1627, + "GCGATG": 1628, + "CATTCTG": 1629, + "GTAGAAA": 1630, + "TCATCAA": 1631, + "TTTTCAA": 1632, + "TATGTATG": 1633, + "CCAAATG": 1634, + "TAATTTTA": 1635, + "TAAGGAA": 1636, + "CTTGAAA": 1637, + "AAAAAAAAAAAA": 1638, + "GCTCCTG": 1639, + "GCAGATG": 1640, + "GAAAAATT": 1641, + "GACGC": 1642, + "GTGGGGG": 1643, + "GTCAATT": 1644, + "CTTGCTT": 1645, + "TGACACA": 1646, + "GTGTGTT": 1647, + "CCAGAGA": 1648, + "CCCAGCC": 1649, + "TAAAGAAA": 1650, + "GTCCATT": 1651, + "TAAATTAA": 1652, + "CCCAAAA": 1653, + "GAATTAA": 1654, + "TGAATTA": 1655, + "TTTTTTTG": 1656, + "CCAGCTT": 1657, + "CAATTTG": 1658, + "CTGTTTG": 1659, + "GTCTCAA": 1660, + "GTTTGTG": 1661, + "GGCATA": 1662, + "GGTACA": 1663, + "TGATGTG": 1664, + "GATTTCA": 1665, + "TCTGCTT": 1666, + "GTAATTA": 1667, + "TAAAAAAAA": 1668, + "GCCGCC": 1669, + "TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG": 1670, + "GCGTCA": 1671, + "GCTCATT": 1672, + "GAACCTG": 1673, + "TAAACAAA": 1674, + "GTGCTGA": 1675, + "TCAGGAA": 1676, + "TCCTCAA": 1677, + "TCTATTTT": 1678, + "TCTGTTTT": 1679, + "CAGAGCA": 1680, + "CCAGGAA": 1681, + "GTCTTTA": 1682, + "TCTTCAA": 1683, + "TCAAAATT": 1684, + "GCTTATT": 1685, + "GTTCCTT": 1686, + "CACCTA": 1687, + "TCACTGA": 1688, + "GAAGCAA": 1689, + "TAAAGA": 1690, + "TCCTTCA": 1691, + "TCTCATG": 1692, + "TCAGTGA": 1693, + "TACACAA": 1694, + "CACGTG": 1695, + "CCTAAAA": 1696, + "GCCTTTG": 1697, + "GGCTTTT": 1698, + "GTTGAAA": 1699, + "GTTCTC": 1700, + "CTAGA": 1701, + "CTACAAA": 1702, + "GCACAAA": 1703, + "TTACATT": 1704, + "GGCCCC": 1705, + "TAATGTG": 1706, + "CTGCCTT": 1707, + "TCCCAGA": 1708, + "GTGAATG": 1709, + "GGACAGG": 1710, + "GGATGTG": 1711, + "GTTTATA": 1712, + "TGACCAA": 1713, + "GTGGCTG": 1714, + "GTTCTCA": 1715, + "CTTATTTT": 1716, + "CTGGAGA": 1717, + "TTACAAA": 1718, + "GTCTTCA": 1719, + "CAAGAGA": 1720, + "CCATTTG": 1721, + "TCACAGA": 1722, + "CTAGTA": 1723, + "CATTATT": 1724, + "TTAGA": 1725, + "GCTCTCC": 1726, + "GCGCCA": 1727, + "TATGTTTT": 1728, + "TCCTCCA": 1729, + "CAGAAAAA": 1730, + "GTGGGAA": 1731, + "TAATCTT": 1732, + "TGAGTCA": 1733, + "CTGCTC": 1734, + "GTCTCCA": 1735, + "TCATGTT": 1736, + "GTTTCCA": 1737, + "TAAGCAA": 1738, + "CTAAAAATA": 1739, + "TGACTGA": 1740, + "TCGGTT": 1741, + "TTAGAAA": 1742, + "TAAGCC": 1743, + "TAAAGCA": 1744, + "CCTCTCC": 1745, + "CCTCCTT": 1746, + "TCAGATT": 1747, + "TATGAAAA": 1748, + "GCTGATG": 1749, + "CATATTTT": 1750, + "GCTCCAA": 1751, + "CGGCGG": 1752, + "CCACTGA": 1753, + "CAGCAAA": 1754, + "CTGTCTT": 1755, + "CTAGCA": 1756, + "TCGGGG": 1757, + "CACAGCA": 1758, + "GCTGATT": 1759, + "CTAGGA": 1760, + "TAACTC": 1761, + "TCATATT": 1762, + "CCTTCTT": 1763, + "CTGCAAA": 1764, + "CCCGC": 1765, + "GGTCTA": 1766, + "CCCAGGA": 1767, + "GTGTCTG": 1768, + "TAATAATAATAA": 1769, + "TCACATG": 1770, + "CAATTTA": 1771, + "TATATATATATATATATATATATATATATATA": 1772, + "CCACAGA": 1773, + "TCAATTTT": 1774, + "GTATTAA": 1775, + "GAACATT": 1776, + "TCTCTTA": 1777, + "CTATTTG": 1778, + "TCTTTCC": 1779, + "GGTTAAA": 1780, + "GCTAATT": 1781, + "CTGCTGA": 1782, + "TACCTA": 1783, + "CAGGGTT": 1784, + "TCGCCA": 1785, + "CAAAAATTA": 1786, + "CTTCTGA": 1787, + "GCATGTG": 1788, + "CTATTAA": 1789, + "GCACATG": 1790, + "CAACATG": 1791, + "TCATGAA": 1792, + "GAATGTT": 1793, + "GGGTTTT": 1794, + "CTGCCTG": 1795, + "GTCCACA": 1796, + "TAAACA": 1797, + "CTCTGGA": 1798, + "GACCCC": 1799, + "GGCAAAA": 1800, + "TCTGTTA": 1801, + "CTAGTG": 1802, + "CTATATA": 1803, + "TCAGTCA": 1804, + "TAACTAA": 1805, + "GAAGATG": 1806, + "GTCTTAA": 1807, + "CAAGGAA": 1808, + "GTAAAAAA": 1809, + "TCCCCTG": 1810, + "TCGCAA": 1811, + "TCTGCCTG": 1812, + "CCTTTTA": 1813, + "GTCCCAGCTA": 1814, + "TATATATG": 1815, + "TATTGTG": 1816, + "TGTGTTTT": 1817, + "GCGCAA": 1818, + "CACAGTG": 1819, + "TAAGATT": 1820, + "CTCTGTA": 1821, + "GGAGGCTGA": 1822, + "GGACAAA": 1823, + "TATTAAAAA": 1824, + "TCGTCC": 1825, + "TCGGAA": 1826, + "CTATAAA": 1827, + "CTTCAGA": 1828, + "CTAGAAA": 1829, + "CATTCAA": 1830, + "CACGCA": 1831, + "CAGGATT": 1832, + "CCATCTT": 1833, + "GTAGCC": 1834, + "GAATTTA": 1835, + "CACGC": 1836, + "CAATCC": 1837, + "TGAGCAA": 1838, + "GAAGCTG": 1839, + "TCAATTA": 1840, + "GAAGTCA": 1841, + "CTGCACA": 1842, + "CCACGG": 1843, + "GGATCTT": 1844, + "CTCCTGCCTCAGCCTCC": 1845, + "TAAATGAA": 1846, + "CCGTC": 1847, + "TCGGTG": 1848, + "TTTTATTA": 1849, + "GCAGGGG": 1850, + "GCAGGTG": 1851, + "TCTATTA": 1852, + "TAACTTA": 1853, + "CTAATTTT": 1854, + "CCCGCC": 1855, + "TAATACA": 1856, + "GGATTAAA": 1857, + "TCTCTCTG": 1858, + "GCTTCTT": 1859, + "CATTTATT": 1860, + "CCAGAGG": 1861, + "GGACAGA": 1862, + "GCCAATT": 1863, + "TCCCCAA": 1864, + "GTTGATT": 1865, + "GAAGAAAA": 1866, + "GCATTTA": 1867, + "CTCTAAA": 1868, + "CACACACACACA": 1869, + "CCTCAAA": 1870, + "TATAATT": 1871, + "CAATGTT": 1872, + "GCCCAGA": 1873, + "GTATATT": 1874, + "CTAAAAAA": 1875, + "CCACAGG": 1876, + "TAAGAGA": 1877, + "TCCTTAA": 1878, + "TATTTTTT": 1879, + "GAATATA": 1880, + "GGATTTG": 1881, + "GTGTGAA": 1882, + "CTGGCTT": 1883, + "GCGGCA": 1884, + "TCCGCC": 1885, + "GCATCTT": 1886, + "TCTAATA": 1887, + "CTGCATT": 1888, + "CTCTGCC": 1889, + "TCACTCA": 1890, + "TCAGCAA": 1891, + "TATTATG": 1892, + "CCAGCTG": 1893, + "GATCTC": 1894, + "GCCTCTT": 1895, + "CTTCCAA": 1896, + "TCCTAAA": 1897, + "TCATCTG": 1898, + "CTATTTA": 1899, + "CTGCAGG": 1900, + "CAAGCAA": 1901, + "GCGGAA": 1902, + "GAAATAAA": 1903, + "TAAAATAA": 1904, + "TCACCTT": 1905, + "CCATGTG": 1906, + "GACCTA": 1907, + "CAGATGA": 1908, + "GTGGCTT": 1909, + "TTATTATTATTA": 1910, + "TCCCGG": 1911, + "TATTTGTT": 1912, + "CTGTAAA": 1913, + "TCCATCCA": 1914, + "CTGTATA": 1915, + "GTTTCTA": 1916, + "GTTGCTT": 1917, + "CCATGAA": 1918, + "GCTCTTA": 1919, + "CTTCATG": 1920, + "GTTCCTG": 1921, + "GCTGGGA": 1922, + "TCAGAGG": 1923, + "CATTAAAA": 1924, + "TCAGTAA": 1925, + "GAATGTG": 1926, + "CTTATTA": 1927, + "GCACTGA": 1928, + "TGAGGTT": 1929, + "CATCAAA": 1930, + "CTTCTCC": 1931, + "GTTTATG": 1932, + "CTTTCCA": 1933, + "GTGCCTG": 1934, + "GAAAGGA": 1935, + "GCATCTG": 1936, + "TACCCA": 1937, + "TAACAGA": 1938, + "AAAAAAAAAAA": 1939, + "CTATGAA": 1940, + "CAGTAAA": 1941, + "TAGCTA": 1942, + "TCGTTTT": 1943, + "GTGTCTT": 1944, + "GAGCAAA": 1945, + "TCTAAAAA": 1946, + "GTTCACA": 1947, + "GAAATGA": 1948, + "CAAATGA": 1949, + "GCCCTGA": 1950, + "GTGTTTA": 1951, + "TCATGTG": 1952, + "CATATTA": 1953, + "TCAAAAAAA": 1954, + "TAAGTTA": 1955, + "TCTCTCTT": 1956, + "CCAGTGA": 1957, + "CCTCTGA": 1958, + "CAAGATG": 1959, + "GCCTGTT": 1960, + "GTTTGGG": 1961, + "CATTCATT": 1962, + "GCCCCTG": 1963, + "GTTCTGA": 1964, + "GCGGCC": 1965, + "GCGGTT": 1966, + "CAAAACAAAA": 1967, + "TACATATA": 1968, + "GAATTAAA": 1969, + "TCAAGAA": 1970, + "CTGTATT": 1971, + "TTTTTATT": 1972, + "GATTATT": 1973, + "TCTAATG": 1974, + "GTTGCTG": 1975, + "TGAATGAA": 1976, + "TCAGCTG": 1977, + "CTTGATT": 1978, + "CAGAATG": 1979, + "CTAATTA": 1980, + "TATAATG": 1981, + "GTTTTGTTTT": 1982, + "CCAGCCTG": 1983, + "TGATGGA": 1984, + "GCAGATT": 1985, + "CTCTATT": 1986, + "GCAGTCA": 1987, + "TAAGTGA": 1988, + "CTACACA": 1989, + "CGCATG": 1990, + "TAGCCA": 1991, + "GTGGCTCA": 1992, + "CAAATAAA": 1993, + "GTGCTCA": 1994, + "TTTTTTTTTT": 1995, + "TAACATG": 1996, + "TCCCAGCTA": 1997, + "CAAAGTA": 1998, + "TCATATA": 1999, + "CAGCATG": 2000, + "TGATCTT": 2001, + "CATAATT": 2002, + "TGTGTTA": 2003, + "TTTTGAA": 2004, + "TTAATTA": 2005, + "GATATTA": 2006, + "TCATTCA": 2007, + "TGATATA": 2008, + "TGACTCA": 2009, + "GACGTT": 2010, + "TGACATG": 2011, + "GTTGTGA": 2012, + "CATTTTTT": 2013, + "GCCTGGA": 2014, + "CTATGTT": 2015, + "CTTTGGG": 2016, + "GTCTCAAA": 2017, + "CTGGCTG": 2018, + "CCACATG": 2019, + "GGCGTG": 2020, + "CTTAATG": 2021, + "TAAGATG": 2022, + "GTATAAA": 2023, + "TGTATTA": 2024, + "TAACTCA": 2025, + "GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA": 2026, + "GCATGAA": 2027, + "GTTAATG": 2028, + "TCCAGGA": 2029, + "GAGAGAAA": 2030, + "TCTCTGTG": 2031, + "CTCTCTA": 2032, + "CCACCTG": 2033, + "GCCAGGA": 2034, + "CTGGAGG": 2035, + "CCATTTA": 2036, + "GTCTGGA": 2037, + "GCCCACA": 2038, + "TAGAGAA": 2039, + "CAACTCA": 2040, + "GGCAGGA": 2041, + "TCTTATG": 2042, + "CAAAGGA": 2043, + "GGTAAAA": 2044, + "GAGAGGA": 2045, + "GTCCAGA": 2046, + "GCCCTCA": 2047, + "GATATTTT": 2048, + "CAGGGAA": 2049, + "CCACATT": 2050, + "GAGGAGG": 2051, + "GAAACTT": 2052, + "CAGAATT": 2053, + "TCAGATG": 2054, + "TATTTCC": 2055, + "TACAGTG": 2056, + "TGAGCTG": 2057, + "CCATCTG": 2058, + "GAGAATG": 2059, + "TCAACAA": 2060, + "ATT": 2061, + "TAACTGA": 2062, + "TGAGAGG": 2063, + "CACTGAA": 2064, + "CCACCTT": 2065, + "CTGCAGA": 2066, + "TCACCAA": 2067, + "TGAGCTT": 2068, + "CAAAGCA": 2069, + "GGTTTTA": 2070, + "CGGGGTT": 2071, + "TCCAAAAA": 2072, + "TATGTATA": 2073, + "CCAGATG": 2074, + "TCCATTTT": 2075, + "CTGCTCA": 2076, + "GATAATT": 2077, + "CCACCAA": 2078, + "CTCCTCC": 2079, + "GAGAATT": 2080, + "GAAAGTA": 2081, + "TAAAATAAAA": 2082, + "CTTCTTA": 2083, + "CTGTTTA": 2084, + "GAATCAA": 2085, + "GCATGTT": 2086, + "GCACGG": 2087, + "GACTGAA": 2088, + "GTGCACA": 2089, + "GACGTG": 2090, + "TATACAA": 2091, + "TCGACA": 2092, + "GAAGACA": 2093, + "TAAAGGA": 2094, + "GATCAAA": 2095, + "CAGTGTG": 2096, + "CTAGCC": 2097, + "GAGGAAAA": 2098, + "TCTGAAAA": 2099, + "GAACCCA": 2100, + "GATGGATG": 2101, + "GTTCTTA": 2102, + "CTATATT": 2103, + "GCATTAA": 2104, + "TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC": 2105, + "TCAGTC": 2106, + "TATTTTTG": 2107, + "GAGGATT": 2108, + "GTATGTG": 2109, + "TAACCAA": 2110, + "GTTGTTTT": 2111, + "TTTTTCTT": 2112, + "GTGTTAA": 2113, + "CTTGGAA": 2114, + "AAAAAATG": 2115, + "CAATGTG": 2116, + "GTGCCTT": 2117, + "GCCTCAA": 2118, + "GAGTCTT": 2119, + "GCTAATTTT": 2120, + "CGAAAAA": 2121, + "GTGTATA": 2122, + "GCGTTA": 2123, + "CTGCACTCCAGCCTGGG": 2124, + "GTTCATG": 2125, + "CAAAGAAA": 2126, + "GCAGTAA": 2127, + "GGATGAA": 2128, + "CTTTATG": 2129, + "CAGGAAAA": 2130, + "TCCTGCA": 2131, + "CTGTCTG": 2132, + "GAACATG": 2133, + "GGATGGA": 2134, + "GCCTGAA": 2135, + "CAAAAATG": 2136, + "TCCAATG": 2137, + "CCAGCAA": 2138, + "GGCCTA": 2139, + "CAACTGA": 2140, + "GCACCTG": 2141, + "GTCTATT": 2142, + "CCTCTCA": 2143, + "GTGGTCA": 2144, + "GTGTAAA": 2145, + "GTACACA": 2146, + "GTAAAATT": 2147, + "GTACATT": 2148, + "TATATAAA": 2149, + "CTGTTAA": 2150, + "TAAGTCA": 2151, + "GCCTCCA": 2152, + "AAATTAAA": 2153, + "GTGCAGG": 2154, + "TCCTGGA": 2155, + "GTGCAAA": 2156, + "GCGTCC": 2157, + "CCATTAA": 2158, + "GGAGGGA": 2159, + "TCACTTA": 2160, + "TCATTAAA": 2161, + "CAACATA": 2162, + "TAATAGA": 2163, + "TAATGTA": 2164, + "GATTTTTT": 2165, + "GTTGTCA": 2166, + "GGAGACA": 2167, + "GTGTGGG": 2168, + "TCACAGG": 2169, + "TCGGCA": 2170, + "CTCCCTG": 2171, + "GACCAAA": 2172, + "TGTTTATT": 2173, + "CGAATG": 2174, + "CTCAATG": 2175, + "TCACCTG": 2176, + "CAGTGTT": 2177, + "TGAGACA": 2178, + "TAGGGG": 2179, + "GAAAAATG": 2180, + "GTTGAGA": 2181, + "TCGATA": 2182, + "CTCGGGAGG": 2183, + "GTTGTC": 2184, + "CCAGTCA": 2185, + "GCCCAGGCTG": 2186, + "GAACAGA": 2187, + "GGCTCACTGCAA": 2188, + "GCAGACA": 2189, + "TGAGGTG": 2190, + "CACGTT": 2191, + "TAAGAAAA": 2192, + "CCAGGCA": 2193, + "GTATCTT": 2194, + "CTTGGGAGG": 2195, + "CTTTCTA": 2196, + "CCGCTG": 2197, + "GAGCTCA": 2198, + "GAGACAGA": 2199, + "CTTCAGG": 2200, + "GCACATT": 2201, + "GTACAAA": 2202, + "CTTGTAA": 2203, + "GTGGGTG": 2204, + "GAAGTGA": 2205, + "GGTCTC": 2206, + "GTATGTT": 2207, + "GCACTCA": 2208, + "TTATGTT": 2209, + "CAAGTCA": 2210, + "CAAGTGA": 2211, + "GAAACTA": 2212, + "TAAATAAAA": 2213, + "TCTTAAAA": 2214, + "GTTGGAA": 2215, + "GTTCTAA": 2216, + "CCACTC": 2217, + "CAGTGAA": 2218, + "GAAAGG": 2219, + "GCACGA": 2220, + "TAACTTTT": 2221, + "GTTGTTA": 2222, + "TCAGTTA": 2223, + "CGGATG": 2224, + "TATTTGAA": 2225, + "CCCTGAA": 2226, + "GCCCTC": 2227, + "CTTCTAA": 2228, + "TTTGTTTT": 2229, + "GAGCTGA": 2230, + "CTGTGGG": 2231, + "CAAGATT": 2232, + "GAAGCTT": 2233, + "TGAGTAA": 2234, + "CTTGCTG": 2235, + "GGATGGG": 2236, + "CGTATG": 2237, + "TCCATTA": 2238, + "GTCTGCA": 2239, + "GCCATTTT": 2240, + "GTTGTAA": 2241, + "CACACAA": 2242, + "GGACTACAGG": 2243, + "CGTTTTA": 2244, + "TCTTCC": 2245, + "TAACCTT": 2246, + "CTTTAAAA": 2247, + "TGAATTTT": 2248, + "CTACAGA": 2249, + "GCAAGAA": 2250, + "TAACAAAA": 2251, + "CAATTAAA": 2252, + "CCACTCA": 2253, + "CATGGTGAAA": 2254, + "CCCAGAA": 2255, + "CTACATT": 2256, + "CCGAGG": 2257, + "TCCAGTG": 2258, + "TGAGTTA": 2259, + "GGAGTCA": 2260, + "TAACGA": 2261, + "GAGTAAA": 2262, + "GACTCTG": 2263, + "GGAGCTT": 2264, + "TACTCC": 2265, + "CTGCATG": 2266, + "GCTTTTTT": 2267, + "GTCTAAA": 2268, + "GTGCGG": 2269, + "CATCTCA": 2270, + "TGATCAA": 2271, + "GGAGATT": 2272, + "GCAAAAAA": 2273, + "CACCAAA": 2274, + "TGACGG": 2275, + "CAGAGG": 2276, + "GTTGATG": 2277, + "CTTGTCA": 2278, + "TCCACCTG": 2279, + "GGAGCAA": 2280, + "CAAGTAA": 2281, + "CCATAAA": 2282, + "GTGCATG": 2283, + "GCATATT": 2284, + "GTAGATT": 2285, + "GCCTAA": 2286, + "CTCAAAAA": 2287, + "GGAGAAAA": 2288, + "CTATCC": 2289, + "TAATATTA": 2290, + "GTGCTC": 2291, + "CAATATG": 2292, + "TGTGGAA": 2293, + "TGACTC": 2294, + "GTGTATG": 2295, + "TTTTAATG": 2296, + "GCTCTAA": 2297, + "CACAATG": 2298, + "CAGCTCA": 2299, + "GTTGGTT": 2300, + "CTAAAATT": 2301, + "GTCTATG": 2302, + "TGTGAAAA": 2303, + "CTGGGTT": 2304, + "CCCCTCC": 2305, + "CCCTCTT": 2306, + "GCAGGGA": 2307, + "GAAACCA": 2308, + "CATTTCC": 2309, + "GCAGCCA": 2310, + "TCATATG": 2311, + "GCAGGCA": 2312, + "CGTAAAA": 2313, + "TGACCTG": 2314, + "CAGAGGTT": 2315, + "CTTGTGA": 2316, + "TTATCTT": 2317, + "CTGTATG": 2318, + "GTCAATG": 2319, + "GGACGG": 2320, + "GCGTAA": 2321, + "CAAACTA": 2322, + "TAAATGTT": 2323, + "CTTCGG": 2324, + "CTCCCCA": 2325, + "TACAATG": 2326, + "TCTGTAA": 2327, + "GAATATG": 2328, + "GCGGGA": 2329, + "GGACATT": 2330, + "TTATGAA": 2331, + "GGATGTT": 2332, + "GGACATG": 2333, + "TCAGGTG": 2334, + "CAACAAAA": 2335, + "GAAAGAGA": 2336, + "GTGGATG": 2337, + "GGGCTA": 2338, + "CCATCAA": 2339, + "CAGCTGA": 2340, + "CTCCACC": 2341, + "CAATCAA": 2342, + "GTGGTC": 2343, + "TGACAGG": 2344, + "CCATTCA": 2345, + "GTCCCTG": 2346, + "CAGACACA": 2347, + "GTTGGTG": 2348, + "CCTCCTG": 2349, + "GAACTGA": 2350, + "TATTCATT": 2351, + "GCCCATG": 2352, + "CAATCTT": 2353, + "GAAAGCA": 2354, + "GAATCTG": 2355, + "TTATTTTA": 2356, + "GTTTGGA": 2357, + "TTTTTGTT": 2358, + "GGGAATG": 2359, + "GCGACA": 2360, + "TAAACTG": 2361, + "CCATATT": 2362, + "GGATCC": 2363, + "CAAGCTT": 2364, + "TAAAAAAAAA": 2365, + "TCACTC": 2366, + "CACTGTT": 2367, + "TGTTAATT": 2368, + "GGACTGA": 2369, + "GGAGTGA": 2370, + "CATACACA": 2371, + "GTTTGTA": 2372, + "TCCAGCA": 2373, + "GTGCATT": 2374, + "GGAAAAAA": 2375, + "CCAAGAA": 2376, + "TCAATA": 2377, + "CTTCCCA": 2378, + "TGAGAAAA": 2379, + "GGCCTCCCAAA": 2380, + "CAAGCTG": 2381, + "GCCCAAA": 2382, + "TGACTTA": 2383, + "CAGCCTT": 2384, + "CTGGATT": 2385, + "TTTTTTTA": 2386, + "TCACGG": 2387, + "GCAGTTA": 2388, + "TGACTAA": 2389, + "TTACAGG": 2390, + "TGATATG": 2391, + "TAATTATT": 2392, + "TCTTGAA": 2393, + "GCCCCTT": 2394, + "GTTCAGA": 2395, + "CTCTATG": 2396, + "CCATGGA": 2397, + "GAGGGAA": 2398, + "GGAGGCA": 2399, + "CTTTGCA": 2400, + "TCTTGG": 2401, + "GGAGGTT": 2402, + "GCCAATG": 2403, + "CTGGTGA": 2404, + "CAACCAA": 2405, + "CCAGTC": 2406, + "CTTGAGA": 2407, + "TACAGCA": 2408, + "CTTGTC": 2409, + "GACGGA": 2410, + "CTTCTTTT": 2411, + "GTGGC": 2412, + "GAGGATG": 2413, + "CAATAAAA": 2414, + "GAAATTTT": 2415, + "AAAAAAAAAA": 2416, + "CTCTATA": 2417, + "GTATGAA": 2418, + "CTTGTTA": 2419, + "TAACATA": 2420, + "CAAACACA": 2421, + "TGATTAAA": 2422, + "GCTCTGTT": 2423, + "GTGGGTT": 2424, + "GTTGGGG": 2425, + "GTGTGTA": 2426, + "GTAATTTT": 2427, + "GTATCC": 2428, + "TGTGTGTGTGTG": 2429, + "TCTTCCTT": 2430, + "TCACTAA": 2431, + "TCTCCAAA": 2432, + "TATCAAA": 2433, + "TGATGGG": 2434, + "GGATATT": 2435, + "CAAATTTT": 2436, + "GTTCAGG": 2437, + "GTGGATT": 2438, + "GTGCAGA": 2439, + "GCTGCC": 2440, + "CTCAGAA": 2441, + "GCAGTC": 2442, + "GGATAAA": 2443, + "GCCTTCA": 2444, + "CCAGGTG": 2445, + "TATCTC": 2446, + "CAATGCA": 2447, + "CCCACTG": 2448, + "GTGTATT": 2449, + "CGACAGA": 2450, + "TGAGATA": 2451, + "CCAGGTT": 2452, + "TGTTTAA": 2453, + "CATCATG": 2454, + "TGATTCA": 2455, + "GCAATTA": 2456, + "GAAATGAA": 2457, + "CTTGGTT": 2458, + "GAAGATT": 2459, + "GGATTAA": 2460, + "CCTCATT": 2461, + "GGCCAGGCTG": 2462, + "GCTATTA": 2463, + "GCCAGCA": 2464, + "GAGACAGG": 2465, + "CTTGAGG": 2466, + "CAGTCTT": 2467, + "GTTCTCC": 2468, + "TATTTCAA": 2469, + "TGACGA": 2470, + "CATGAAAA": 2471, + "CATTATG": 2472, + "TAAATTTA": 2473, + "GAGTGAA": 2474, + "CAACAGG": 2475, + "TAAGCTT": 2476, + "CACATTTT": 2477, + "GATCTCA": 2478, + "TAGTCC": 2479, + "GACCCTG": 2480, + "TAATGCA": 2481, + "TAAGTC": 2482, + "TAATAATT": 2483, + "GAAGTAA": 2484, + "CAACTC": 2485, + "CATCATT": 2486, + "GACGAA": 2487, + "GAAACAAA": 2488, + "TATTTCTG": 2489, + "CATTAATT": 2490, + "CCACCCC": 2491, + "TAATATTTT": 2492, + "GTTTAAAA": 2493, + "GTATCTG": 2494, + "GTCAAAAA": 2495, + "GATGCTG": 2496, + "TGTTCTG": 2497, + "GGTCAAA": 2498, + "GTAGGAA": 2499, + "GTATATG": 2500, + "TGATCTG": 2501, + "GGGGCTG": 2502, + "GCATCAA": 2503, + "GCCAAAAA": 2504, + "CCACGA": 2505, + "GCTAATG": 2506, + "CAGAGAAA": 2507, + "CCTTCTG": 2508, + "TCCTCTA": 2509, + "GCAGGTT": 2510, + "CTCACTG": 2511, + "TAGATTA": 2512, + "GCCGAGA": 2513, + "CCATCCA": 2514, + "CTTTACA": 2515, + "GTACATG": 2516, + "GCACCAA": 2517, + "CTTTGTA": 2518, + "CTATGTG": 2519, + "TCACTTTT": 2520, + "TGAGTC": 2521, + "CAAGAAAA": 2522, + "CTGACTG": 2523, + "GTTTTTTTT": 2524, + "GCATAAA": 2525, + "TAATCTG": 2526, + "GAAAAAAAA": 2527, + "CAGGATG": 2528, + "TGAGCCA": 2529, + "GAATTCA": 2530, + "TCAGACA": 2531, + "GTTCCAA": 2532, + "TCAGGTT": 2533, + "CAAACTG": 2534, + "CATTTCTT": 2535, + "TGTTAAAA": 2536, + "CCAGACA": 2537, + "CAAGTTA": 2538, + "CATGTTA": 2539, + "CATTCTA": 2540, + "TCTTTTTG": 2541, + "TGAGGGG": 2542, + "CACATTA": 2543, + "TAAAATAAA": 2544, + "GCATATA": 2545, + "TGTTCTA": 2546, + "GAAGGGG": 2547, + "GAGTGTG": 2548, + "TAAGACA": 2549, + "GAACTC": 2550, + "CCAGTAA": 2551, + "GAGAGAGG": 2552, + "GCGACC": 2553, + "CAATTCA": 2554, + "CGGCTG": 2555, + "CCAGATT": 2556, + "CCTGGG": 2557, + "GGAAGAAA": 2558, + "GAGAGG": 2559, + "TCAAAATG": 2560, + "CCTCATG": 2561, + "TAAAGG": 2562, + "CTTTGGA": 2563, + "CCAGGGA": 2564, + "GTACAGA": 2565, + "CTGAGGCAGGA": 2566, + "TGTTTCTT": 2567, + "CCAGGCTG": 2568, + "CTGAGG": 2569, + "GAGGCTG": 2570, + "CTCCTGGG": 2571, + "GAAGTC": 2572, + "CGACC": 2573, + "GGACTCA": 2574, + "GGAGTC": 2575, + "CACAATT": 2576, + "GTGTTCA": 2577, + "GACTAAA": 2578, + "GTCATTA": 2579, + "CAAAATTA": 2580, + "TGAAGAAA": 2581, + "GCACCTT": 2582, + "GTTTGCA": 2583, + "TCCTGCC": 2584, + "GTAGATG": 2585, + "GCCTGCA": 2586, + "GAGTTAA": 2587, + "TCCCTTA": 2588, + "GTGGTTA": 2589, + "TCGGGA": 2590, + "TACATAA": 2591, + "TCTCTCCA": 2592, + "CACTAAA": 2593, + "TATATATATATA": 2594, + "GTGGCAA": 2595, + "CACCATG": 2596, + "TTTGAAAA": 2597, + "CACACTG": 2598, + "CTTGGTG": 2599, + "TACACTG": 2600, + "CCTCCAA": 2601, + "CAACCTT": 2602, + "CAGCCAA": 2603, + "TTTTCAAA": 2604, + "TGATAGA": 2605, + "TACACTA": 2606, + "TCTGGG": 2607, + "TCCCAGCA": 2608, + "TAGGAAAA": 2609, + "CTTGGGG": 2610, + "TCTGTGAA": 2611, + "CCTTATT": 2612, + "CATTTAAA": 2613, + "TTTTATTTTA": 2614, + "GCCCTCC": 2615, + "CTGAGCA": 2616, + "CCCGTG": 2617, + "GTAGTGA": 2618, + "TCCTATT": 2619, + "GAAGGTG": 2620, + "TGTGCTG": 2621, + "TCCACTG": 2622, + "TAATCTA": 2623, + "TGATGTA": 2624, + "GTGGTAA": 2625, + "TAATGGA": 2626, + "GATGAAAA": 2627, + "GTAGTAA": 2628, + "GTGGGGA": 2629, + "GTGTCAA": 2630, + "CAGACTG": 2631, + "TCGAAAA": 2632, + "CTCATTA": 2633, + "TAATAATA": 2634, + "CTCAGAAA": 2635, + "CATCCTT": 2636, + "CCGCTT": 2637, + "GGAAGG": 2638, + "CCGTGA": 2639, + "CCACTCC": 2640, + "CTAGAGA": 2641, + "TAGAATG": 2642, + "GGATTTA": 2643, + "TTAATTTT": 2644, + "GCTAATA": 2645, + "TCCCCCA": 2646, + "CAAATATT": 2647, + "GATCATG": 2648, + "TCTTAATT": 2649, + "CAGTATT": 2650, + "GTCTTGAA": 2651, + "CCGAAA": 2652, + "CTATTCA": 2653, + "TAAGATA": 2654, + "CTTGCAA": 2655, + "GCCCCAA": 2656, + "TCCCTAA": 2657, + "GAAGTTA": 2658, + "GATGATG": 2659, + "CTTGATG": 2660, + "CCCTAAA": 2661, + "CCTGCCTG": 2662, + "GACATTTT": 2663, + "CCAGCCA": 2664, + "TGTGTGTGTG": 2665, + "GTCTATA": 2666, + "TCTCTGTT": 2667, + "GTCTGTA": 2668, + "TATAATA": 2669, + "CTTGTTTT": 2670, + "CGCCATT": 2671, + "CTCAGCA": 2672, + "TACAGTT": 2673, + "CAAGAGG": 2674, + "GGAAGCA": 2675, + "GCCTTTA": 2676, + "CCCCATT": 2677, + "CAACGA": 2678, + "GTCATTTT": 2679, + "CCCGCA": 2680, + "CAGTTAA": 2681, + "GAATCTT": 2682, + "CATGTTTT": 2683, + "CCGGGG": 2684, + "CTACTGA": 2685, + "TCACGA": 2686, + "TAAATTTG": 2687, + "GCCCATT": 2688, + "CTCTAGG": 2689, + "GGACCTG": 2690, + "TCAGGGA": 2691, + "GAGACTG": 2692, + "CCAAAAAA": 2693, + "GCCGG": 2694, + "CCAGGGG": 2695, + "TCAGAAAA": 2696, + "CATCTGA": 2697, + "TCTTCAAA": 2698, + "CTACAGG": 2699, + "GAGGCAGG": 2700, + "CATTGTA": 2701, + "TAAATCAA": 2702, + "GACTCTT": 2703, + "CTGATTA": 2704, + "GCATATG": 2705, + "GGACCTT": 2706, + "CAAGACA": 2707, + "TATTTATG": 2708, + "TATTTTAAA": 2709, + "CCGAGA": 2710, + "TCATTTTA": 2711, + "CTCACTCA": 2712, + "CCACCCA": 2713, + "CTCTAGA": 2714, + "CTACATG": 2715, + "GTGCTTA": 2716, + "CAACCTG": 2717, + "TCTGTGTT": 2718, + "TAAATATG": 2719, + "CAAAGG": 2720, + "CCCTGTT": 2721, + "GTTCGG": 2722, + "TGATAAAA": 2723, + "CACGAA": 2724, + "GTTGAGG": 2725, + "CAGAGTGA": 2726, + "GAAATTAA": 2727, + "CACATA": 2728, + "GAACAGG": 2729, + "TCTCCTGA": 2730, + "CCTGAGG": 2731, + "GGAGGCCAA": 2732, + "GTTTACA": 2733, + "TAACAGG": 2734, + "TGTGGTG": 2735, + "GCCTCCCAAA": 2736, + "CCATCCTG": 2737, + "GATTCTT": 2738, + "GAATGGA": 2739, + "GTAGTCA": 2740, + "CTCCTCTG": 2741, + "GAAAGAAAGAAAGAAA": 2742, + "CCCTGTG": 2743, + "CAGTATG": 2744, + "GCGATA": 2745, + "GGACTC": 2746, + "GAAAGA": 2747, + "TGTTGG": 2748, + "GTAGCTT": 2749, + "CATTTTAA": 2750, + "CCCTCTG": 2751, + "GCATTCA": 2752, + "CGATTA": 2753, + "TCACATA": 2754, + "TAATGAAA": 2755, + "GGAATTA": 2756, + "CTGTCAA": 2757, + "TAAATTAAA": 2758, + "CAAGTC": 2759, + "GTATTCA": 2760, + "GGCCATG": 2761, + "CTTTAGA": 2762, + "TGTTTCC": 2763, + "CATGTA": 2764, + "GAATAAAA": 2765, + "CAACTAA": 2766, + "TCATCTA": 2767, + "CACTCTT": 2768, + "CAGTTTG": 2769, + "CATAAAAA": 2770, + "GCATGCA": 2771, + "GATTTA": 2772, + "GAACCAA": 2773, + "TCTGTGA": 2774, + "TCAGCCA": 2775, + "TCTCCACA": 2776, + "TCTCAGCTCA": 2777, + "TATCATG": 2778, + "GCACTTA": 2779, + "CGCCAGG": 2780, + "CGGGG": 2781, + "CATTAAAAA": 2782, + "TTTGTTA": 2783, + "GGATATA": 2784, + "TCGACC": 2785, + "TAATCCA": 2786, + "CCGC": 2787, + "CATTGTT": 2788, + "CCAGTTA": 2789, + "GTAGTTA": 2790, + "CTAGGAA": 2791, + "CCTAATT": 2792, + "TCATGGG": 2793, + "GAACTAA": 2794, + "GCTATTTT": 2795, + "CCGTCA": 2796, + "CAGATTA": 2797, + "CCATATA": 2798, + "CAACTTA": 2799, + "TCAGTTTT": 2800, + "CTACCTT": 2801, + "GCACTC": 2802, + "GTGTGGA": 2803, + "GTGCCAA": 2804, + "GACAATG": 2805, + "GACAATT": 2806, + "GTACCTT": 2807, + "TAAACATT": 2808, + "CAGGAGG": 2809, + "GTGCGA": 2810, + "GAAAATTA": 2811, + "TCTCTTAA": 2812, + "CCGATT": 2813, + "GATGATT": 2814, + "CCATGGG": 2815, + "TCGGTA": 2816, + "CCATATG": 2817, + "CCAGTCC": 2818, + "GCCTTAA": 2819, + "TGATCCA": 2820, + "GTTGCAA": 2821, + "GTAGAGG": 2822, + "CAGATTTT": 2823, + "GTACTTA": 2824, + "TCTTTCTTTCTTTCTT": 2825, + "GCTCTGTG": 2826, + "TCAATAA": 2827, + "GTTTAGA": 2828, + "GTTCGA": 2829, + "CAAGGTT": 2830, + "CTCATTTT": 2831, + "CACAGG": 2832, + "CATGCTG": 2833, + "GAACGG": 2834, + "TATAAAAA": 2835, + "GAAGGCA": 2836, + "GAGCATT": 2837, + "TGTTTGTG": 2838, + "GCTGTTA": 2839, + "GTCACTG": 2840, + "CAAATGAA": 2841, + "GTGACTG": 2842, + "GTTCTTTT": 2843, + "CAGGCTGGAGTGCAGTG": 2844, + "TGATGAAA": 2845, + "TAACGG": 2846, + "CTACTAA": 2847, + "GACATTA": 2848, + "GGACGA": 2849, + "GAGCATG": 2850, + "GCATGGG": 2851, + "CCACTTA": 2852, + "CTATCAA": 2853, + "GCTGTTTT": 2854, + "GTCGTG": 2855, + "CCTGGCC": 2856, + "TCTCTGAA": 2857, + "TGTTGTA": 2858, + "CAGCCAGG": 2859, + "GTTTAGG": 2860, + "CCGCAA": 2861, + "GGAGTAA": 2862, + "CCAATTA": 2863, + "CAGCAAAA": 2864, + "TCATCCA": 2865, + "CACGTA": 2866, + "TCATAGA": 2867, + "TAATTAAAA": 2868, + "CACTTAA": 2869, + "TCTTTATT": 2870, + "GAGATTA": 2871, + "TAAGAGG": 2872, + "CAAATTAA": 2873, + "GACGCA": 2874, + "CACGGA": 2875, + "GTGTGCA": 2876, + "TCT": 2877, + "TATTATTA": 2878, + "GAAATATT": 2879, + "GGAGTTA": 2880, + "TCTTTGA": 2881, + "CTGATTTT": 2882, + "TGTGAATT": 2883, + "TCCCACC": 2884, + "CCCTTTG": 2885, + "CAAGGTG": 2886, + "CAGAGTT": 2887, + "CCCCATG": 2888, + "CTACCAA": 2889, + "CTCCAAAA": 2890, + "CTTCCCC": 2891, + "CTGCTAA": 2892, + "GATTAAAA": 2893, + "GCTTATG": 2894, + "CTACTTA": 2895, + "TAAAAAATT": 2896, + "TCAGTCC": 2897, + "CTATTAAA": 2898, + "GAATGGG": 2899, + "CACAGTA": 2900, + "CAACGG": 2901, + "GGTTATT": 2902, + "TCACCCA": 2903, + "TGATGCA": 2904, + "TAATTTTTT": 2905, + "GTTTGAGA": 2906, + "GTATTAAA": 2907, + "GCCCCCA": 2908, + "TATAGTA": 2909, + "TAGTAAA": 2910, + "TGATACA": 2911, + "GTGGTTTT": 2912, + "CCACTAA": 2913, + "CACAGAGA": 2914, + "CCTCTGCCTCC": 2915, + "CAAAAAAAA": 2916, + "CTCTCTCC": 2917, + "CATAATA": 2918, + "GAAGCCA": 2919, + "GTTCCCA": 2920, + "TGTGTTTG": 2921, + "CAATGGA": 2922, + "TGAAGTA": 2923, + "CTTCATA": 2924, + "CACTGTG": 2925, + "GCTCTTTT": 2926, + "TGACATA": 2927, + "TAAAGAAAA": 2928, + "GAGAAATG": 2929, + "CAGGGAGG": 2930, + "TGTTCAA": 2931, + "GAGCCAA": 2932, + "GACAGAGA": 2933, + "GGCTGAA": 2934, + "CAAATATA": 2935, + "GTGGAAAA": 2936, + "TAAGGTT": 2937, + "GTGATTA": 2938, + "GGATCTG": 2939, + "GATGTTA": 2940, + "GACTACACA": 2941, + "TCCTATA": 2942, + "CTGCCAA": 2943, + "TCCCGA": 2944, + "GTGATTTT": 2945, + "GCGTTTT": 2946, + "CAGAGTA": 2947, + "GAAAGGAA": 2948, + "CACTTTG": 2949, + "CCCCAAAA": 2950, + "GCAACCCA": 2951, + "TGCATTTT": 2952, + "TCTAGAA": 2953, + "TACTTTG": 2954, + "TGAGGCA": 2955, + "CATCTCC": 2956, + "TCGCTA": 2957, + "TGACTTTT": 2958, + "GAGCCTG": 2959, + "CATTTGTT": 2960, + "TCTTTGTT": 2961, + "GCAAAATT": 2962, + "CCTGATT": 2963, + "GATAAAAA": 2964, + "GAGTGTT": 2965, + "TCCTGTA": 2966, + "TACAGAAA": 2967, + "TCCAGGAA": 2968, + "GCCAGTG": 2969, + "TAGATTTT": 2970, + "TAATAGG": 2971, + "CTCCTCA": 2972, + "CATTTTTG": 2973, + "CATTTCAA": 2974, + "GCCATCA": 2975, + "TAAAATATA": 2976, + "GACTGTT": 2977, + "GCATGGA": 2978, + "CAAAGTT": 2979, + "CATGATT": 2980, + "GAGTTTG": 2981, + "CTAGCAA": 2982, + "CTTCCTA": 2983, + "GGGGAGG": 2984, + "CTATATG": 2985, + "TATTTATTTT": 2986, + "CACCATT": 2987, + "CCCTCAA": 2988, + "TTTTTTTTTTTTTT": 2989, + "GATCATT": 2990, + "GTACATA": 2991, + "CTCCATA": 2992, + "CCCCGTCTCTA": 2993, + "GCCTGCC": 2994, + "CTAGCTT": 2995, + "CCCGGA": 2996, + "GATGTTTT": 2997, + "GTATTTTA": 2998, + "TCAGATA": 2999, + "CCTGGAA": 3000, + "TATTCCA": 3001, + "GGACCAA": 3002, + "GCCATTA": 3003, + "CGACTGA": 3004, + "TAAGCTG": 3005, + "TAAACACA": 3006, + "GTTTCTC": 3007, + "CATCTTA": 3008, + "GAAATTTG": 3009, + "TAATGGG": 3010, + "TAAAATTTT": 3011, + "CTGTTCA": 3012, + "CCTGTTA": 3013, + "TACTGAA": 3014, + "TGACCCA": 3015, + "TGATTTTA": 3016, + "CTCCTTA": 3017, + "TATAGAA": 3018, + "CTGCGG": 3019, + "GCGGTA": 3020, + "GTGCTAA": 3021, + "CAGAGGAA": 3022, + "TACATCA": 3023, + "TCAATCAA": 3024, + "CTGCAGCC": 3025, + "TGAATATT": 3026, + "TCTACAA": 3027, + "CCACATA": 3028, + "CCCGTT": 3029, + "TATACACA": 3030, + "TCCTCTC": 3031, + "TCTACTT": 3032, + "CCGGAA": 3033, + "CTTTTTTA": 3034, + "GAAAGAAAA": 3035, + "CTATCTT": 3036, + "GACTTTG": 3037, + "TGAACAA": 3038, + "GCAGTTTT": 3039, + "GCTAAAAA": 3040, + "GAGGCGG": 3041, + "TAATAAAAA": 3042, + "CTGGTCA": 3043, + "CAGACAA": 3044, + "GGATATG": 3045, + "TGAAGG": 3046, + "GCCAGAA": 3047, + "CCAGGCC": 3048, + "CCACCATG": 3049, + "CAAACTT": 3050, + "TCATGTA": 3051, + "GCTGCTT": 3052, + "GTAATA": 3053, + "CCCCCAA": 3054, + "CAGCCTG": 3055, + "TCAACTT": 3056, + "TAAAATTAA": 3057, + "GCTGAAAA": 3058, + "CGACGA": 3059, + "GTGGGCA": 3060, + "TGAGGGA": 3061, + "CGCTCC": 3062, + "TTTTGTTTT": 3063, + "GAGTCAA": 3064, + "TCATGCA": 3065, + "CTGCTTA": 3066, + "TAAGTTTT": 3067, + "GTAGCAA": 3068, + "CCTTGG": 3069, + "TGACAAAA": 3070, + "CTGGTAA": 3071, + "TCTTTATA": 3072, + "TGTGTGTT": 3073, + "CTGGTC": 3074, + "CTGGCAA": 3075, + "CATTTCTG": 3076, + "CTCTACC": 3077, + "CTGAGGA": 3078, + "CTAAAATG": 3079, + "CTAGATT": 3080, + "GTATCAA": 3081, + "CAGTCAA": 3082, + "CTGGGTG": 3083, + "CCTCTTA": 3084, + "TGAGTTTT": 3085, + "TTTTATTTA": 3086, + "CCTTTTTT": 3087, + "TATATACA": 3088, + "TAGCAAA": 3089, + "AAATTA": 3090, + "CTGGATG": 3091, + "GATAATA": 3092, + "GACAAAAA": 3093, + "CCTGGGA": 3094, + "GCTTTCA": 3095, + "GTACAGG": 3096, + "GCTGGAA": 3097, + "CTACTCA": 3098, + "CAATGTA": 3099, + "GCGTGAA": 3100, + "GATCCTT": 3101, + "TATTAATG": 3102, + "GCCCGA": 3103, + "TAAAGTG": 3104, + "GCTTCCA": 3105, + "CATGGAA": 3106, + "TGAAGTT": 3107, + "CTTTCTC": 3108, + "TCTGTGTG": 3109, + "GTATGTA": 3110, + "CAATACA": 3111, + "TCAAGG": 3112, + "CCTCTAA": 3113, + "TGTGGG": 3114, + "GATCTGA": 3115, + "GTACTGA": 3116, + "TTAATTAA": 3117, + "GCAGAAAA": 3118, + "CTACATA": 3119, + "CCGGTG": 3120, + "GGGGAAAA": 3121, + "TACAAAAAA": 3122, + "TTTTGG": 3123, + "GTGAGAA": 3124, + "TCAATAAA": 3125, + "TCAAGTT": 3126, + "CTCAGGA": 3127, + "CTACTC": 3128, + "CAAATCA": 3129, + "GGCAGAA": 3130, + "CCCGAA": 3131, + "TGTTGTG": 3132, + "GAGCAAAA": 3133, + "TATTTGTG": 3134, + "GTAGGTT": 3135, + "CTACCTG": 3136, + "CACAAAAA": 3137, + "CTCAGG": 3138, + "GCTTTA": 3139, + "CAGAGCAA": 3140, + "CTCAGTG": 3141, + "GGAAGAGA": 3142, + "TAACCTG": 3143, + "GAAATATA": 3144, + "CGAGAA": 3145, + "GTGAGG": 3146, + "CATTTATA": 3147, + "GGCAGCA": 3148, + "TCTAAATT": 3149, + "CCCAGTG": 3150, + "GCCTAGG": 3151, + "TGCATTA": 3152, + "CCGTAA": 3153, + "CATTCCA": 3154, + "CTAGTTA": 3155, + "GACTTAA": 3156, + "CTATACA": 3157, + "GACACAA": 3158, + "TCTTCACA": 3159, + "CCGGTT": 3160, + "TAAAGTAA": 3161, + "CTGTGGA": 3162, + "TAAGGTG": 3163, + "TCCAGTA": 3164, + "CAAATTTA": 3165, + "AAATTAAAA": 3166, + "CCATCTA": 3167, + "CTCCCTT": 3168, + "CTCCTTTT": 3169, + "GAGAGAGAGAGA": 3170, + "GGAGATA": 3171, + "CCTATTA": 3172, + "CACCAAAA": 3173, + "CCGTTA": 3174, + "TGTTTATA": 3175, + "CTCAGGAGG": 3176, + "GACGTA": 3177, + "GTCCTTA": 3178, + "GAAAGTT": 3179, + "GCTGGTG": 3180, + "CTCTACA": 3181, + "CAATAGA": 3182, + "TAAAATATT": 3183, + "GTACCTG": 3184, + "GTACTAA": 3185, + "CTTTGAAA": 3186, + "CCTTTCC": 3187, + "TAAAAATTA": 3188, + "CTCGG": 3189, + "CAAGATA": 3190, + "CATTTGA": 3191, + "CACCTCA": 3192, + "GCCAGCC": 3193, + "GTCGG": 3194, + "GCACATA": 3195, + "CACTCAA": 3196, + "CTTTTAAAA": 3197, + "CAGGAATT": 3198, + "GCCTATT": 3199, + "TCTTTCTG": 3200, + "CTGAGGCAGGAGAA": 3201, + "CAGGCAGG": 3202, + "CTAGTAA": 3203, + "TCCATA": 3204, + "GAACTTA": 3205, + "CG": 3206, + "GCTGTGA": 3207, + "GAAAATA": 3208, + "TCTTCATT": 3209, + "GAGGGAGA": 3210, + "CCCATCC": 3211, + "GAGGTGGG": 3212, + "GCCTCTA": 3213, + "GTAGGTG": 3214, + "TAAACCA": 3215, + "GAAGGAAA": 3216, + "TATTGG": 3217, + "ATG": 3218, + "TCCAGTT": 3219, + "CCCACAA": 3220, + "GAAACACA": 3221, + "GTCTCAAAA": 3222, + "CTTTTCTTTT": 3223, + "TGAAGGA": 3224, + "TATTGATT": 3225, + "CTATGTA": 3226, + "AAAAAAAAAAAAAA": 3227, + "TCCTTAAA": 3228, + "GCGCTA": 3229, + "TCCACTT": 3230, + "GACTCAA": 3231, + "TAAATACA": 3232, + "TCATGGA": 3233, + "TCTGGGA": 3234, + "TCCTATG": 3235, + "CTGTGCA": 3236, + "TCAAGTGA": 3237, + "TCATAAAA": 3238, + "CATCCAA": 3239, + "CCTTCCA": 3240, + "CTGTACA": 3241, + "GAAGGTT": 3242, + "CTGTGTA": 3243, + "GTCACTT": 3244, + "TCACAAAA": 3245, + "TCAGGCA": 3246, + "GTGTTAAA": 3247, + "CCCTTAA": 3248, + "CAAAGTG": 3249, + "GAAATGTT": 3250, + "CTGGGGA": 3251, + "GACGCC": 3252, + "TATATGTG": 3253, + "CTAGATG": 3254, + "GAAATTAAA": 3255, + "GAATGCA": 3256, + "GCACTAA": 3257, + "CGGGAGG": 3258, + "GCCACAA": 3259, + "CGCTTA": 3260, + "TCCACAA": 3261, + "CAGATA": 3262, + "TCTGAATT": 3263, + "TATTATTTT": 3264, + "GCGCGG": 3265, + "CTCTGAAA": 3266, + "TCTCTTTG": 3267, + "TATTTCTA": 3268, + "GGGGTGGG": 3269, + "GGATGCA": 3270, + "CCACACC": 3271, + "TAAATGTG": 3272, + "TCTTCCTG": 3273, + "GCAAGG": 3274, + "CTGCTCC": 3275, + "CTGGAGTG": 3276, + "CTGTTAAA": 3277, + "CACACAAA": 3278, + "CTGACTT": 3279, + "GAAAAGAAAA": 3280, + "CCTTCTCC": 3281, + "GAAATAAAA": 3282, + "CCTCAGGTGA": 3283, + "GATAATG": 3284, + "GAATTGCTT": 3285, + "CCAAAATT": 3286, + "CGTGAAA": 3287, + "CACTGAAA": 3288, + "CAGTGAAA": 3289, + "GATCTTA": 3290, + "GAGATGGG": 3291, + "TCTGCCA": 3292, + "TGAGGTA": 3293, + "TATGGAA": 3294, + "TATATTTTA": 3295, + "TGAACTT": 3296, + "GCAGATA": 3297, + "CTTTTCTT": 3298, + "GTAAAATG": 3299, + "TCTCTAA": 3300, + "TCTGCAAA": 3301, + "GAGCCTT": 3302, + "TATCATT": 3303, + "CAATTTTA": 3304, + "CCGCCA": 3305, + "TATTTAAAA": 3306, + "GAGAGATG": 3307, + "GAGATGGA": 3308, + "GCCAGGATG": 3309, + "CGAGTAGCTG": 3310, + "TTCATTTT": 3311, + "TATACTT": 3312, + "GTCTACA": 3313, + "GTGAGTGA": 3314, + "GCTACACA": 3315, + "GGGAGGA": 3316, + "CAAGGCA": 3317, + "GCTTTTAA": 3318, + "CACTATT": 3319, + "GTTCATA": 3320, + "TCCTC": 3321, + "GTGGACA": 3322, + "TATTTGGA": 3323, + "CTCCAGTA": 3324, + "GTTCAGTT": 3325, + "CCAAGG": 3326, + "CAGAGCC": 3327, + "CTCGCC": 3328, + "CCGATG": 3329, + "GGAATTTT": 3330, + "TCCAGCC": 3331, + "CCTCTTTT": 3332, + "GAACCTT": 3333, + "CATGCACA": 3334, + "GTTTC": 3335, + "GAAGATA": 3336, + "TACCCC": 3337, + "GCTGCCA": 3338, + "GGGGGAGG": 3339, + "GCAGTGAGCTGA": 3340, + "CTGTCTA": 3341, + "CGAGGA": 3342, + "CAATGGG": 3343, + "GCTGTGAA": 3344, + "GAAAGTG": 3345, + "TACCAAAA": 3346, + "GTCAGG": 3347, + "CAGCTCC": 3348, + "TGTGCTT": 3349, + "GTCTAGG": 3350, + "TTTTTGTA": 3351, + "TTATATG": 3352, + "TCAGGGG": 3353, + "TATTGTTA": 3354, + "CCTGAGA": 3355, + "TATCTCA": 3356, + "CAATCTG": 3357, + "CACTCTG": 3358, + "GATTTAA": 3359, + "TGAATAA": 3360, + "TCTTGTA": 3361, + "TCAACTG": 3362, + "TCTCCAGG": 3363, + "CTAGAGG": 3364, + "CTGAGAAA": 3365, + "CTAGCTG": 3366, + "TCCACCA": 3367, + "CGATTTT": 3368, + "CCGGCC": 3369, + "GTTGACA": 3370, + "CTTAGAA": 3371, + "CATAATG": 3372, + "GAGTATT": 3373, + "CACAGAAA": 3374, + "GACTGTG": 3375, + "CTATTTTA": 3376, + "TGAGGAAA": 3377, + "TTATTAAAA": 3378, + "CTTATTTA": 3379, + "CAGACTT": 3380, + "CACGCC": 3381, + "GCTTGG": 3382, + "CCTGCTT": 3383, + "TAAAGCAA": 3384, + "CCTCGTGA": 3385, + "TAGAATT": 3386, + "CTTACAA": 3387, + "TAAAGGAA": 3388, + "GTCTAGA": 3389, + "GTGACTT": 3390, + "TACATATG": 3391, + "GTCAGGA": 3392, + "GCTCCAGG": 3393, + "GAAGGGA": 3394, + "CATGATG": 3395, + "TCATCAAA": 3396, + "CGTTAAA": 3397, + "GTACTCA": 3398, + "CTCCCAA": 3399, + "TATATGTA": 3400, + "GGTATTTT": 3401, + "TAAGCCA": 3402, + "CGAAATT": 3403, + "GTTTGTTTT": 3404, + "TCTGTCTT": 3405, + "TATATCA": 3406, + "TGTTCATT": 3407, + "CAAACCA": 3408, + "TTCATTA": 3409, + "TATTTGTA": 3410, + "GATTGAA": 3411, + "CTATAAAA": 3412, + "GATTAATT": 3413, + "CCCACCA": 3414, + "TCCTAGG": 3415, + "TAAATGTA": 3416, + "CTCTTAAA": 3417, + "GCAGTCC": 3418, + "GCGGCTG": 3419, + "GTCTCGAA": 3420, + "TGAATGA": 3421, + "CTGGGGG": 3422, + "GTCTCGA": 3423, + "GAACAAAA": 3424, + "TGAATCA": 3425, + "TGTATTTTTAGTAGAGA": 3426, + "GTTATTAA": 3427, + "TTTTTTAAAA": 3428, + "GTCAGTG": 3429, + "CCCATTA": 3430, + "CACAGGA": 3431, + "TATTCCTT": 3432, + "TCTGCCTT": 3433, + "CCTGGTG": 3434, + "GCGAGC": 3435, + "TACTAAA": 3436, + "TACACAAA": 3437, + "CCGTCC": 3438, + "GCTTTGTT": 3439, + "GCATCCA": 3440, + "CATCTAA": 3441, + "GCTGTGTT": 3442, + "GTAGACA": 3443, + "GCCTATG": 3444, + "TCTTTGTG": 3445, + "GATTCTG": 3446, + "CGCCCGG": 3447, + "GATGAGA": 3448, + "TATCTGA": 3449, + "TGAATTTG": 3450, + "CCTGATG": 3451, + "TAAAACAA": 3452, + "CTTTAGG": 3453, + "TTTTCCTT": 3454, + "TGAATAAA": 3455, + "CGGGGA": 3456, + "CAAACATT": 3457, + "GTATGGA": 3458, + "GCTTAAAA": 3459, + "TACCAAA": 3460, + "CAAAGAGA": 3461, + "CTCCTGCC": 3462, + "GTAAAAAAA": 3463, + "CACAGCC": 3464, + "CCATGCA": 3465, + "TACAATT": 3466, + "CTAGTGA": 3467, + "CTGAGTT": 3468, + "GAGTGAAA": 3469, + "TCTGTTTG": 3470, + "CTGTAGG": 3471, + "TATAAAAAA": 3472, + "GCATTAAA": 3473, + "GTCCATA": 3474, + "TGTTAAAAA": 3475, + "TGTTTGA": 3476, + "GAATAGA": 3477, + "CTTCAAAA": 3478, + "CTGGACA": 3479, + "CTGTAGA": 3480, + "CCATTAAA": 3481, + "CTATCTG": 3482, + "CACTATG": 3483, + "TTATCAA": 3484, + "TAAGTAAA": 3485, + "TAATCCCAGCACTTTGGGAGGCC": 3486, + "CCAGAAAA": 3487, + "TGAAGCA": 3488, + "TCCCTTTT": 3489, + "TCATACA": 3490, + "TACGTT": 3491, + "GCCGTG": 3492, + "GGAAGTG": 3493, + "GGCCAAA": 3494, + "GTACCAA": 3495, + "TCTCTACTAAAAATA": 3496, + "CATTGTG": 3497, + "TGTGTGA": 3498, + "GAAACAGA": 3499, + "CTTGACA": 3500, + "GATGAGG": 3501, + "GAGATTTT": 3502, + "CCTTCAA": 3503, + "GAATCTA": 3504, + "CTCTCCTT": 3505, + "GGCGGA": 3506, + "TCTATCTATCTATCTA": 3507, + "CACACAGA": 3508, + "TGTGTGTA": 3509, + "CAAAGCC": 3510, + "TGTGCCA": 3511, + "GTTGAAAA": 3512, + "CTCCAGCA": 3513, + "TCAAGGA": 3514, + "TAGCTCA": 3515, + "CGCTGA": 3516, + "CCTGAAAA": 3517, + "GACTATT": 3518, + "GATTCCA": 3519, + "GCTTCTA": 3520, + "GTCTGCC": 3521, + "CTTGGCA": 3522, + "TGTGGTA": 3523, + "GCTTTGA": 3524, + "GCTCTCTG": 3525, + "CTCACAGA": 3526, + "TCTTTAAA": 3527, + "CAAAGCAA": 3528, + "TACTTAA": 3529, + "GCTTCAA": 3530, + "CATTGAA": 3531, + "GGAGGAAA": 3532, + "CTATAGA": 3533, + "CTGAGGAA": 3534, + "CCTGGCA": 3535, + "CCCTATT": 3536, + "CTCGTG": 3537, + "TTACACA": 3538, + "TTAGGAA": 3539, + "CTGGTTA": 3540, + "GTTGTCC": 3541, + "TAATGAAAA": 3542, + "TATTTACA": 3543, + "GGGAATT": 3544, + "GTAGTTTT": 3545, + "GCTGCAA": 3546, + "CTACGG": 3547, + "GCCGGA": 3548, + "CTGGGCA": 3549, + "CCTTAAAA": 3550, + "GATGGAA": 3551, + "TAGATAGATAGATAGA": 3552, + "TATGTAA": 3553, + "GTACGG": 3554, + "TATTCAAA": 3555, + "GATCTCC": 3556, + "CCTGTTTT": 3557, + "TATTGCA": 3558, + "GGAAGGAAGGAAGGAA": 3559, + "GGTAATT": 3560, + "TTACAGA": 3561, + "TCAGC": 3562, + "GCAAAATG": 3563, + "GAGAGCA": 3564, + "GTAGAAAA": 3565, + "CATTTGAA": 3566, + "TCTTCTTTT": 3567, + "TCCCATA": 3568, + "GTTATTTA": 3569, + "CTATCTA": 3570, + "CATCCTG": 3571, + "TCTTGTG": 3572, + "TTATTATT": 3573, + "CCCGTC": 3574, + "TACTATG": 3575, + "TAAACATA": 3576, + "TAAGGAAA": 3577, + "GCTTGTG": 3578, + "CTCTAAAA": 3579, + "GTTTTAAAA": 3580, + "GACAGGA": 3581, + "TCCTAGA": 3582, + "TCCACCCA": 3583, + "GTTTGAAA": 3584, + "CCATCTCA": 3585, + "CTAAGAA": 3586, + "GTATCTA": 3587, + "GTGAGGA": 3588, + "GCTGGAGG": 3589, + "CCTGTAATCCCAGCTA": 3590, + "GCAACAA": 3591, + "CTTTCAAA": 3592, + "CAAATGTT": 3593, + "CTTGTCC": 3594, + "TCTCAAAAA": 3595, + "TATTTATTA": 3596, + "TAAGGCA": 3597, + "GAGAGGAA": 3598, + "TATGATT": 3599, + "GCATCTA": 3600, + "CGTTATT": 3601, + "GCCTGTA": 3602, + "GTTTCAAA": 3603, + "CCTTCCTTCCTTCCTT": 3604, + "GGCTTTG": 3605, + "GTCAGAA": 3606, + "CATGCATG": 3607, + "GTCATTTA": 3608, + "CTGGAAAA": 3609, + "CTTCGA": 3610, + "CCTATTTT": 3611, + "CCAACAA": 3612, + "TCCATCC": 3613, + "TAAAGTTA": 3614, + "GTCTCTC": 3615, + "TAATCAAA": 3616, + "GATTTTTG": 3617, + "GATTTCTT": 3618, + "GGGCTGA": 3619, + "GCATGTA": 3620, + "CCTGGGTT": 3621, + "GAGACAA": 3622, + "GCTGTCA": 3623, + "TGATAGG": 3624, + "GGAGACC": 3625, + "CCGGCA": 3626, + "TAATCTCA": 3627, + "TGAATTAA": 3628, + "TCTGGTG": 3629, + "GCCTC": 3630, + "GGCGCA": 3631, + "CCAGCTA": 3632, + "CAGTCTG": 3633, + "TGAACTA": 3634, + "GTAAGAA": 3635, + "CCTTTCA": 3636, + "TCCATGA": 3637, + "CAAAGGAA": 3638, + "CTCTC": 3639, + "CTCTCTCA": 3640, + "CTCCAGC": 3641, + "GTAGATA": 3642, + "CCCCCTCC": 3643, + "GGCGCC": 3644, + "TCTGTCC": 3645, + "GACCATT": 3646, + "CTTGAAAA": 3647, + "TTATCC": 3648, + "TACATGTG": 3649, + "CAAATTTG": 3650, + "TTTTGTG": 3651, + "CAGAGTG": 3652, + "GTAATAA": 3653, + "GTGAGTG": 3654, + "TTTTTCC": 3655, + "GGCTCTG": 3656, + "GCCCTAA": 3657, + "GGCTGTT": 3658, + "CCCAATT": 3659, + "CAGAGCTT": 3660, + "TATAAATG": 3661, + "GAGTCTG": 3662, + "TCTTAAAAA": 3663, + "GTTTTATG": 3664, + "GATCCAA": 3665, + "GGCCCTG": 3666, + "GATCCTG": 3667, + "TCAAGTG": 3668, + "GATTCAA": 3669, + "CCTCTCTT": 3670, + "GAGACGG": 3671, + "CAGATCA": 3672, + "TAAAAGAA": 3673, + "CTGAGCAA": 3674, + "CCTGCCA": 3675, + "CCTTCTA": 3676, + "CGCTCA": 3677, + "GGCTGTG": 3678, + "TGGGAAAA": 3679, + "GGAGCCTG": 3680, + "CTGAGTG": 3681, + "CGTCAAA": 3682, + "TCAAGTA": 3683, + "CGTAATT": 3684, + "TTACTTA": 3685, + "TATACTA": 3686, + "GGGCAAA": 3687, + "CAACTTTT": 3688, + "CTTTGCC": 3689, + "GCCAGGAA": 3690, + "CACACTA": 3691, + "GCCCAGC": 3692, + "TAAATAAATAAATAAA": 3693, + "CTTTCCTT": 3694, + "GGGAGAA": 3695, + "TATGGTA": 3696, + "CGGCCA": 3697, + "CCTCTCTG": 3698, + "GAAAGCAA": 3699, + "CAAGCCA": 3700, + "GGCGTT": 3701, + "CTCTTTTA": 3702, + "TCGGCCTCCCAAA": 3703, + "GATTTATT": 3704, + "CAAGTCC": 3705, + "TATCTTA": 3706, + "GTTCAAGACCA": 3707, + "CTCACACA": 3708, + "GAAATCAA": 3709, + "TGAGACC": 3710, + "GGGTAAA": 3711, + "GCTTGTT": 3712, + "GATTTTAA": 3713, + "TTTTTATA": 3714, + "CAGAGCTG": 3715, + "TCTGTTAA": 3716, + "GTAATTAA": 3717, + "TCTTTGAA": 3718, + "CTTGCCA": 3719, + "TTTTCATT": 3720, + "CCATGTA": 3721, + "TCTCGGCTCACTGCAA": 3722, + "GGATTCA": 3723, + "TCTATTAA": 3724, + "TACATAAA": 3725, + "GATTGATT": 3726, + "GGAGAGGA": 3727, + "CGCAAAA": 3728, + "GGACTAA": 3729, + "TTATGTG": 3730, + "GTCACTCA": 3731, + "GACAGCA": 3732, + "CGAGTT": 3733, + "GATGGTT": 3734, + "GGAAGAGG": 3735, + "GCCAACATGGTGAAA": 3736, + "GGAGCCA": 3737, + "TGAACTG": 3738, + "CCTCTGTG": 3739, + "GTATAAAA": 3740, + "TCCCAGAA": 3741, + "CATTTATG": 3742, + "GATTATG": 3743, + "TGTTTCTG": 3744, + "GAGTGGGTT": 3745, + "TACATATT": 3746, + "CTCCAGGA": 3747, + "GACACTG": 3748, + "GGTCTCA": 3749, + "CCGGGA": 3750, + "TGTTTAAA": 3751, + "CTCACCA": 3752, + "GGACTTA": 3753, + "GCCCACC": 3754, + "CAAATCAA": 3755, + "GAAATGTG": 3756, + "TAGTTAA": 3757, + "TCTATAA": 3758, + "TTAGATT": 3759, + "GTGTAGG": 3760, + "TACTGAAA": 3761, + "GCACCCA": 3762, + "GTGGGCTG": 3763, + "GAATGAAA": 3764, + "TCTAGTT": 3765, + "TCAGGAGA": 3766, + "TCCACTA": 3767, + "CTCAGTT": 3768, + "TACTTAAA": 3769, + "GACTCCA": 3770, + "TCCATTTG": 3771, + "CACAGCAA": 3772, + "GCTCATGCCTG": 3773, + "GGTGCTG": 3774, + "GCTTTCTT": 3775, + "GTGGCCA": 3776, + "TACGTG": 3777, + "GTGCAGTG": 3778, + "TGAAGTCA": 3779, + "CCTTTAA": 3780, + "TCTCAGCTCACTGCAA": 3781, + "GAAATATG": 3782, + "CCTCAAAA": 3783, + "GGGGCGG": 3784, + "CGACAA": 3785, + "GGTGATG": 3786, + "GTCTTAAA": 3787, + "CAGAAATG": 3788, + "CGTCATT": 3789, + "CCAAGCA": 3790, + "GGATCAA": 3791, + "GTGCTGGGATTA": 3792, + "GCTGGCC": 3793, + "CGGAGCTT": 3794, + "TACATGA": 3795, + "TGTTTGAA": 3796, + "TCTCCATT": 3797, + "TAAGCAAA": 3798, + "CCTTTCTT": 3799, + "TACTGTT": 3800, + "TCCATCTT": 3801, + "CTTACTT": 3802, + "CGGAGGTT": 3803, + "CAAAACAA": 3804, + "TCATAGG": 3805, + "TTACTAA": 3806, + "CTTATTTG": 3807, + "GAATGTA": 3808, + "CCCCATGGA": 3809, + "TTACTGA": 3810, + "CGGAAAA": 3811, + "CTCCAGTG": 3812, + "TGTTCCA": 3813, + "CAGATGAA": 3814, + "GTTGATA": 3815, + "TCCCCCC": 3816, + "CATTGCA": 3817, + "CTCAGCC": 3818, + "CTTACTG": 3819, + "TATCCTT": 3820, + "CTTTTATG": 3821, + "TGAGTAGCTG": 3822, + "GACTGAAA": 3823, + "CAATGAAA": 3824, + "CGACTG": 3825, + "CTTGGGA": 3826, + "GCAAGCA": 3827, + "TCACTCC": 3828, + "GATTTGA": 3829, + "CATTTTAAA": 3830, + "TCAACTA": 3831, + "GTCCAAAA": 3832, + "CACCCTG": 3833, + "TTACCTT": 3834, + "CAAGGGG": 3835, + "TTTTGGA": 3836, + "GTTATTTG": 3837, + "GCTACTG": 3838, + "CTGAGGCAGGAGAATG": 3839, + "GTGATGA": 3840, + "GTAGTC": 3841, + "TAGTATG": 3842, + "GTATAGA": 3843, + "GTGTCTA": 3844, + "GCTGCTA": 3845, + "TTAGTAA": 3846, + "TAAACATG": 3847, + "GTCACCA": 3848, + "CATCTTTT": 3849, + "CATATAA": 3850, + "TCTCTCTA": 3851, + "TTTTATTAA": 3852, + "TATTCTAA": 3853, + "GAAATTTA": 3854, + "CTTCCCTG": 3855, + "TAAAGATG": 3856, + "TACGTA": 3857, + "GTTTATTA": 3858, + "GAAAAGAA": 3859, + "CCCACCCA": 3860, + "CAATTAAAA": 3861, + "CCGACA": 3862, + "CAAAGTGA": 3863, + "CAAACAAAA": 3864, + "GCAATTTT": 3865, + "CGATTAA": 3866, + "TTAGAGA": 3867, + "CTGATGA": 3868, + "GGAGGAGG": 3869, + "GTCCTGGG": 3870, + "TCATGAAA": 3871, + "GCAACCA": 3872, + "GTTGGCA": 3873, + "GCGGCGG": 3874, + "GTCCCCA": 3875, + "GTAGGGG": 3876, + "GCCATGTT": 3877, + "GTTCGAGA": 3878, + "GCCTATA": 3879, + "TAAATTCA": 3880, + "GGCCATT": 3881, + "GAAAACAA": 3882, + "TGTGTATG": 3883, + "GTACTC": 3884, + "TAGGGAA": 3885, + "CCTTGAA": 3886, + "TCTATTTG": 3887, + "GAGGGCA": 3888, + "GAAACTGA": 3889, + "TACGC": 3890, + "TACAAAAA": 3891, + "TCATTATT": 3892, + "GGAAAATT": 3893, + "TCAATATT": 3894, + "CCCGTA": 3895, + "GGAGAGAA": 3896, + "TTAGTTA": 3897, + "CTCAGAGA": 3898, + "TCGAGC": 3899, + "CTAGTCA": 3900, + "GATGGCA": 3901, + "TGAACATT": 3902, + "CTATGGG": 3903, + "CACACCA": 3904, + "TCAATTAA": 3905, + "GGAACTG": 3906, + "TTACATG": 3907, + "CTTTCATT": 3908, + "CAGCTCTG": 3909, + "TCTTTTTTTT": 3910, + "TAAATCTT": 3911, + "TGATCTA": 3912, + "CATACAA": 3913, + "GCTCAAAA": 3914, + "GCTGTGTG": 3915, + "TCAATCA": 3916, + "GATTTGAA": 3917, + "CCAAGGA": 3918, + "GTCCTCA": 3919, + "GTGCTCC": 3920, + "AAAATAA": 3921, + "GTGACAA": 3922, + "GCTCACGCCTG": 3923, + "CGACGG": 3924, + "TATCCAA": 3925, + "CACACATG": 3926, + "TCTCTCTCC": 3927, + "TGTGGTT": 3928, + "CTTGGTA": 3929, + "TCTGGTT": 3930, + "TTTATAA": 3931, + "CTGCTTTT": 3932, + "TGTGTCA": 3933, + "CACATCA": 3934, + "CCTAATG": 3935, + "CGTTTTTT": 3936, + "GCTGGCA": 3937, + "GACGTC": 3938, + "TATAATTA": 3939, + "TACAGTAA": 3940, + "GAAAGTAA": 3941, + "GTCTGAAA": 3942, + "CCCATTTT": 3943, + "TATATGA": 3944, + "CTTGATA": 3945, + "CTTTATTTT": 3946, + "CTTTATTA": 3947, + "GGCGAA": 3948, + "CCATGCC": 3949, + "CCTGCCTT": 3950, + "GAAGAAGAAGAA": 3951, + "CTGACTGA": 3952, + "GCCCTTA": 3953, + "TATCTAA": 3954, + "GTGTTTTA": 3955, + "TGTGGCA": 3956, + "TATTGTAA": 3957, + "GCCAGAAA": 3958, + "CCCTGTCTC": 3959, + "CACAGGAA": 3960, + "AAAACAA": 3961, + "AAAAAAAAAAAAAAA": 3962, + "TAACTCC": 3963, + "GCCTAAA": 3964, + "CGAGTA": 3965, + "TAGTATT": 3966, + "GTATTTTTAGTAGAGA": 3967, + "GCTGCAGG": 3968, + "TATTGAAA": 3969, + "CCAGCCTGGG": 3970, + "GCTCCAAA": 3971, + "TACGAA": 3972, + "GGCCTCC": 3973, + "TATACAAA": 3974, + "CATGGCA": 3975, + "CATGCAA": 3976, + "TACACCA": 3977, + "CTTTACCA": 3978, + "TACAGAGA": 3979, + "TATTCTTA": 3980, + "TATGTCA": 3981, + "TCAAGCA": 3982, + "TCAATGA": 3983, + "GGCTCTT": 3984, + "GGAAGTT": 3985, + "TCCATGTT": 3986, + "GCTTTCC": 3987, + "TATGTGA": 3988, + "GTGTAGA": 3989, + "TTTTTAAAA": 3990, + "GCTGGAGA": 3991, + "GTGAGAGA": 3992, + "CCTAGAA": 3993, + "CCTCCAAA": 3994, + "CCAATGA": 3995, + "CAGGGCA": 3996, + "CTATGCA": 3997, + "CTTCACC": 3998, + "CTACAAAA": 3999, + "CTCACC": 4000, + "GAGTATG": 4001, + "TAGAAAAA": 4002, + "CTTTTGAA": 4003, + "TAAAGAGA": 4004, + "CATGTCA": 4005, + "TCTTTTAAA": 4006, + "CACAGTGA": 4007, + "GATCTAA": 4008, + "TAAGGTA": 4009, + "CATAGAA": 4010, + "CGCGCC": 4011, + "CAGCTTA": 4012, + "TATAGTT": 4013, + "CGGGCC": 4014, + "TATCCATT": 4015, + "TGTTTGTTTT": 4016, + "GCTGGCTG": 4017, + "TACAGGA": 4018, + "CTCCTTTG": 4019, + "CAATCTA": 4020, + "CCCCCTG": 4021, + "TATACTG": 4022, + "CTGAGCC": 4023, + "CGGTTA": 4024, + "TGAAGTG": 4025, + "GCTTCCTT": 4026, + "TTTTATTTG": 4027, + "TAGTGAA": 4028, + "CTGAGGTG": 4029, + "TCTTCTC": 4030, + "GACAGAAA": 4031, + "CTGAACTGAA": 4032, + "CCTGGGAA": 4033, + "TCCCCAAA": 4034, + "TATGTATT": 4035, + "GATTTCTG": 4036, + "CATTCAAA": 4037, + "CACAGTT": 4038, + "GCTTGAA": 4039, + "GTGGATCA": 4040, + "CTGAGTGA": 4041, + "TGAATTTA": 4042, + "TCAACAAA": 4043, + "GGTCATT": 4044, + "GTAATTTA": 4045, + "GCGACTT": 4046, + "CTGAGAGA": 4047, + "GTGCCCA": 4048, + "CTAGGTT": 4049, + "TCCTGAAA": 4050, + "GTCCACC": 4051, + "TCACAGAA": 4052, + "GCGAAAA": 4053, + "GTATGGG": 4054, + "TGAACAAA": 4055, + "TAAACAAAA": 4056, + "CCGTTTT": 4057, + "TCTCAATT": 4058, + "TCCAGAAA": 4059, + "GTAACAA": 4060, + "GCATTTTA": 4061, + "TCTCCATG": 4062, + "TTATAAAA": 4063, + "CAGGCAA": 4064, + "CTAAAAAAA": 4065, + "GTTGGGA": 4066, + "TAAAGATT": 4067, + "TGAAGAGA": 4068, + "CCCCTCA": 4069, + "TGTTTATG": 4070, + "TCTACTG": 4071, + "CCAATTTT": 4072, + "GGTGGTG": 4073, + "GGAACAA": 4074, + "TGTGGGA": 4075, + "TCTGCTA": 4076, + "GAACGA": 4077, + "GTAAGTA": 4078, + "GTTGCCA": 4079, + "AAAATTTT": 4080, + "GCGCGA": 4081, + "GAAAGATG": 4082, + "GTCTCTCA": 4083, + "TCCATCAA": 4084, + "GCAGCTA": 4085, + "CACATTTG": 4086, + "CTGACAA": 4087, + "TCCACC": 4088, + "GCT": 4089, + "CCCACTT": 4090, + "GCAGGTA": 4091, + "GAGGCCA": 4092, + "TAAAGTCA": 4093, + "CTGGATA": 4094, + "CGGCAA": 4095 + }, + "merges": [ + "A A", + "T T", + "T G", + "C A", + "C C", + "T A", + "G G", + "T C", + "G A", + "AA A", + "G C", + "T AA", + "TT TT", + "T CA", + "TG A", + "TT A", + "G AA", + "T CC", + "C AA", + "C TG", + "C TT", + "G TG", + "G TT", + "G CA", + "GG A", + "C CA", + "G TA", + "G CC", + "C TA", + "T AAA", + "AA AA", + "C TC", + "G TC", + "TG TG", + "TA TT", + "CA CA", + "G AAA", + "TA TA", + "TC TT", + "TG TT", + "C AAA", + "GA GA", + "CA TT", + "TG AA", + "CA GG", + "TC TG", + "CA GA", + "TC AA", + "GG AA", + "TAA AA", + "C TGA", + "GC TT", + "G TGA", + "GC TG", + "C TCA", + "CC TT", + "CA TG", + "GC AA", + "G TCA", + "G TAA", + "TTTT A", + "TA TG", + "GA GG", + "C GG", + "GA TT", + "CC TG", + "TC TC", + "CC AA", + "G TTA", + "C TCC", + "C TAA", + "TA CA", + "C TTA", + "TC CA", + "GA TG", + "TT AA", + "GAA AA", + "TT TG", + "G TTTT", + "TC TA", + "GC CA", + "G TCC", + "C TTTT", + "GG GG", + "C GA", + "TT TA", + "CC CA", + "CAA AA", + "TG GG", + "TA GA", + "TA GG", + "GA CA", + "GG TT", + "CC CC", + "GG TG", + "CA TA", + "GC TA", + "TG TA", + "TC AAA", + "TG GA", + "TAA TT", + "TTA TT", + "TG CA", + "GG CA", + "GA TA", + "CC TA", + "TT CA", + "TC TCA", + "GG GA", + "C GC", + "CTG AA", + "G TAAA", + "TC TCC", + "TTTT TT", + "C GTG", + "GC AAA", + "TAA AAA", + "TC TGA", + "TCA TT", + "GG AAA", + "TG AAA", + "TCC TT", + "CC AAA", + "GAA TT", + "C TAAA", + "C GTT", + "GTG AA", + "GG CC", + "TAA TA", + "GG TA", + "TG CC", + "CA CC", + "TGA TT", + "AAAA AA", + "GC TCA", + "TCC AA", + "GA GAA", + "CTG TT", + "TA TTA", + "CA GCA", + "CTC TT", + "CTT AA", + "CA GAA", + "GC TGA", + "GTT AA", + "TC TTA", + "TA TTTT", + "GCC AA", + "CTT TG", + "GA CC", + "C GCA", + "GTA TT", + "GTC TT", + "CAA TT", + "GTG TT", + "CTC AA", + "GGA GG", + "C GAA", + "TC TTTT", + "GTC AA", + "C GCC", + "TA TAA", + "TA CC", + "TC TAA", + "CCA TT", + "C GGA", + "CAA AAA", + "CA GTG", + "TCC TG", + "CTC TG", + "GAA AAA", + "CTG TG", + "CA GC", + "TTTT AA", + "GCA TT", + "GCC TT", + "TAA TG", + "CTA TT", + "GTT TG", + "TGA TG", + "GG CTG", + "CC TCA", + "GA GGA", + "GCC TG", + "AAA TT", + "C GTA", + "TC AAAA", + "TA CAA", + "CA TCA", + "CA GTT", + "TGA GA", + "GG GAA", + "CA CTG", + "CA CAA", + "CA GGA", + "CC CCA", + "CC CTG", + "TTTT TTTT", + "TA GAA", + "GA GCA", + "CC TCC", + "CA CCA", + "TA TCA", + "GA GC", + "CA TTA", + "CACA CACA", + "GA GTG", + "GGA TT", + "TGTG TGTG", + "TA CTT", + "CA CTT", + "GTC TG", + "TGA GG", + "GA GTT", + "GAA TG", + "TCA TG", + "GA CAA", + "GA CTT", + "TATT AA", + "TAA TAA", + "GG CCA", + "CA TTTT", + "CA GCC", + "CC CTT", + "GC TAA", + "TATA TATA", + "GTG TG", + "TA CTG", + "TA GTT", + "CAA TG", + "GC TC", + "CA GTA", + "GC TCC", + "CA TAA", + "TTA TG", + "TAAA TT", + "GA TGA", + "CA TGA", + "GC GG", + "AAAA AAAA", + "CCA TG", + "GA TAA", + "GA CTG", + "TA TGA", + "GCA GG", + "GA TCA", + "G TTTTA", + "GGA TG", + "CC TGA", + "G TAAAA", + "GAA GG", + "GA TTA", + "CC TC", + "GA CCA", + "GC TTA", + "CC CAA", + "AAA TG", + "GCA TG", + "TA GTA", + "TA CCA", + "GG CTT", + "C GTC", + "TC TCTT", + "GG TCA", + "TTA TTA", + "TA CTA", + "TA GCA", + "TA TC", + "CTG GG", + "CA TC", + "C TTTTA", + "C TAAAA", + "GTG GG", + "GA GTA", + "CCA GG", + "GA TTTT", + "TA GTG", + "GAAA TT", + "CA CTA", + "TC GG", + "TCA GG", + "CAGG AA", + "GC AAAA", + "CC TTA", + "CA TCC", + "CTT GG", + "TGTG AA", + "TATT TG", + "CC TAA", + "CTA TG", + "GA GAAA", + "GAGA GAGA", + "GC TTTT", + "TA TAAA", + "CAA GG", + "TC TCTG", + "TGTT AA", + "TGTG TT", + "GA GCC", + "GA CTA", + "TA TATT", + "TAA AAAA", + "TTTT TG", + "GTA TG", + "CATT AA", + "TA GGA", + "TA GC", + "GTT GG", + "GAA GAA", + "TAAA TG", + "TC TGTT", + "CA GAAA", + "CAAA TT", + "TAA TTA", + "TC TGTG", + "TA TCC", + "TGAA TT", + "CTC CA", + "GTG AAA", + "GG CAA", + "GGA GA", + "GAA GA", + "GG TGA", + "GG GCA", + "CC AAAA", + "TCTC TCTC", + "CTG CA", + "CTT CTT", + "TCTT AA", + "CC CTA", + "TGTG TG", + "AAA TA", + "TGTT TG", + "GG GTT", + "GTG CTG", + "GG AAAA", + "GG GGA", + "TCA GA", + "CC TTTT", + "GAAA TG", + "GCA GCA", + "TC TGAA", + "GG GTG", + "CACA TT", + "TCTT TG", + "GG GC", + "TCC CA", + "TC CATT", + "CTG AAA", + "CTT TA", + "TC GA", + "GTT TA", + "CAA CAA", + "CTT CC", + "GCC TCC", + "TT AAA", + "GC TCTG", + "GTT TCA", + "GGA GGA", + "C GTGA", + "CA GTC", + "GAA TA", + "CA GAGA", + "CC CTC", + "CAAA TG", + "CTG CTG", + "GA TCC", + "TTTTA TT", + "AAAA TT", + "TTA TA", + "TCAA TT", + "GG TAA", + "GTTA TT", + "GC CAGG", + "GGA GAA", + "CATT TG", + "TCA CC", + "CTC AAA", + "GG TTA", + "TCC AAA", + "TC TATT", + "GCA GA", + "CTT CA", + "TCA TCA", + "C GAGG", + "TAA CA", + "GTT GTT", + "CTTA TT", + "C GTCA", + "TAA GA", + "TAA TTTT", + "CTG TA", + "TC CACA", + "GC TGTG", + "C GCTG", + "TC TAAA", + "GC GA", + "CAA TA", + "CCA CCA", + "GAA CA", + "C GAAA", + "CAGA TT", + "TCA CA", + "TTA TTTT", + "TC TCAA", + "TGA CA", + "CTCC AA", + "AAAA AAA", + "TATA TG", + "TCC TCC", + "TCA CTT", + "TC CAGG", + "CAA GA", + "GG CTA", + "GTG GTG", + "C GTAA", + "C GAGA", + "TGA TA", + "GGA TTA", + "CAA CA", + "C GATT", + "TGA GAA", + "CTCC TT", + "CTCA TT", + "GTT AAA", + "TCA TA", + "CC TCTG", + "CTC TA", + "GC TGAA", + "CTG GA", + "TAA GG", + "CTT AAA", + "TATT TA", + "CCA CA", + "CC GG", + "GTC AAA", + "TG GAA", + "C GGAA", + "TGA TGA", + "GTT CA", + "TAA CAA", + "GC TGTT", + "TAA GAA", + "CTG CC", + "TTAA TT", + "CCA GA", + "TCA GAA", + "GTCA TT", + "C GCTT", + "GATT AA", + "CTGA TT", + "GC CACA", + "GTAA TT", + "TC CAGA", + "GCC AAA", + "GTGA TT", + "TAAAA TT", + "CAA GAA", + "CCA CC", + "TAA TCC", + "GTT CTT", + "TC CATG", + "GC TCTT", + "TG CTG", + "GG GTA", + "TTA CA", + "GC CATT", + "GCA CA", + "GCAA TT", + "TCC CTG", + "TG TGA", + "TC GAA", + "GGA CA", + "GGAA TT", + "GTG GA", + "CTT CTG", + "TCC CC", + "GCC CC", + "CTT GA", + "TAA TGA", + "TAAA TA", + "TATA TA", + "CTG CAA", + "TCA TTA", + "GTA TA", + "TCC CCA", + "C GTTA", + "GCA GAA", + "TGA GTT", + "CTTTT TT", + "C GATG", + "CTT TCA", + "AAAA TG", + "CAGG TT", + "CTAA TT", + "C GCCA", + "TGAA AAA", + "GTT CC", + "GTCC TT", + "GTCC AA", + "GTTTT TT", + "CTC TGA", + "GC GC", + "GTT GA", + "TGAA TG", + "CTA TA", + "GCA GTG", + "CCTT AA", + "TCA CCA", + "TCA CTG", + "GCC CTG", + "TAA CTT", + "CAGA TG", + "GTA GG", + "TC TATA", + "GAGA TT", + "GTC TA", + "TTTT AAA", + "CACA TG", + "TGA CC", + "CA CAAA", + "GTG TA", + "GG GAGG", + "GCTT TG", + "CAA AAAA", + "GA GGAA", + "GTT CTG", + "TTTT TA", + "GTC TCA", + "GTT CAA", + "TC GTG", + "GCTT AA", + "GCA CC", + "CTCC TG", + "TAAA TAAA", + "CTA CA", + "CTT CCA", + "TCC TCA", + "C GCAA", + "GAA AAAA", + "GCC CA", + "TC GTT", + "GTA GA", + "CTC TCA", + "GTC CA", + "TGA CTT", + "TCC CTT", + "GC CATG", + "CACACACA CACACACA", + "GTGA TG", + "CC TCTT", + "GC CAGA", + "TCC TA", + "C GTTTT", + "GTA CA", + "GCA TA", + "GAA TTA", + "TGTGTGTG TGTGTGTG", + "CC CAGG", + "GG TTTT", + "TCAA AAA", + "TC TATG", + "CCA TA", + "TGA CAA", + "GGA TA", + "TCA GTG", + "GTA TTTT", + "GAGA TG", + "GC GTG", + "C GTCC", + "TTAA AAA", + "TAA TCA", + "CAA TTA", + "CCA CTG", + "CGG TT", + "GTT GAA", + "TGA TTA", + "CCTT TG", + "CGG TG", + "CAGG TG", + "TCAA TG", + "CTGA TG", + "TCA GGA", + "GTT TAA", + "TATT AAA", + "CTC TTA", + "GCA GGA", + "CTC TCC", + "GAA CC", + "CTT TAA", + "GG GCC", + "GTA TTA", + "GC GCC", + "CCAA TT", + "GC TAAA", + "TGA CTG", + "GATT TG", + "GA TAAA", + "TCA GCA", + "GTT CCA", + "GAAA TA", + "GA CAAA", + "GA GTC", + "GC TATT", + "TCA CAA", + "GAGG TT", + "TAA CC", + "GAA GGA", + "GC TCAA", + "GAAAA TT", + "CCA GCA", + "GTTTT AA", + "GTG CC", + "TGA GGA", + "CA TAAA", + "GG TCC", + "TCA TTTT", + "TATT TATT", + "TAA TAAA", + "GCC TA", + "CTTTT AA", + "TAA GTG", + "TAA GTA", + "CTG GAA", + "CACA CA", + "GA CAGA", + "CAA CC", + "GG GAAA", + "CCA GAA", + "TCA GTT", + "TAA CTA", + "CTAA AAA", + "TGGG TT", + "TGA GTG", + "TAAAA TG", + "TATATATA TATATATA", + "GCA CTG", + "GA CTC", + "TA CAAA", + "TAAAA AAA", + "TC TACA", + "GTT GTG", + "TC GCC", + "CC CAAA", + "GTCA TG", + "CTG CTT", + "GGAA TG", + "CTA TTA", + "GA TATT", + "TA GAAA", + "GG CAGG", + "GA TGAA", + "GTA GAA", + "TCC TGA", + "TAA CTG", + "GCTG GG", + "GCAA TG", + "GCC CCA", + "GTT TGA", + "CATT TA", + "GTG CA", + "CTT GAA", + "GTG GAA", + "CTT CAA", + "TAAA TTA", + "GTG GCA", + "TCC TTA", + "GGAA AAA", + "TTTT TTA", + "CC TGTG", + "GTAA TG", + "GTG TTA", + "CTA GG", + "CAGG CTG", + "GA CACA", + "GAAAA AAA", + "TC GC", + "GTAA AAA", + "TGTT TA", + "TCTC TA", + "GTCC TG", + "CCA GGA", + "GAA CAA", + "TAA GTT", + "TGA GCA", + "GC TCCA", + "TAA GCA", + "CTCA TG", + "GTC TTA", + "CC CACA", + "CA TATT", + "GCC TCA", + "CA CTC", + "CTT CTA", + "TGA TTTT", + "TC GCA", + "CC TGTT", + "GAA GCA", + "GCAA AAA", + "GC GGA", + "CCA CAA", + "GC GCA", + "CA TATA", + "GA CATT", + "GTT CTA", + "CAAAA TT", + "GAAA GAAA", + "CC CGG", + "TA CACA", + "CCAA AAA", + "GAGG TG", + "GG CTCA", + "CA GTGA", + "TCC CAA", + "TA TCTT", + "TGA GTA", + "TC GTA", + "TTTT CTT", + "GTG GGA", + "GA GCTG", + "CC CTCC", + "TAGG TT", + "TTA GG", + "TAA TATT", + "CCA GCC", + "CA TCTT", + "GTC TGA", + "GTT TCC", + "CC TGAA", + "GGA GCA", + "GAAAA TG", + "TCA GTA", + "TAA CCA", + "GA TGTT", + "CTG TTA", + "CA TGTT", + "GG CGG", + "CA TGTG", + "GG GAGA", + "CTT TGA", + "TCTT TCTT", + "AAAAAA AAA", + "GGGG TG", + "CTT TCC", + "CTT GTT", + "GCA TTA", + "CC CAGA", + "CAAA TA", + "TC GGA", + "CA GCTT", + "TCA CTA", + "TAA TTAA", + "TAA GGA", + "GAA CTG", + "GCA CAA", + "GC GTT", + "GG CTC", + "TC TTTTA", + "CC TCCA", + "GG CAAA", + "CA GCTG", + "CTA CAA", + "TA CATT", + "GC TATG", + "CTT GTG", + "GA GTCA", + "GTTA TG", + "CTG CCA", + "GTC TCC", + "TGA CCA", + "CA CCTG", + "TATA TTA", + "TGA TCA", + "CA GCAA", + "GA TGTG", + "GTC TTTT", + "CTA GAA", + "GC TACA", + "CTG GGA", + "GGGG TT", + "CAA GTA", + "CAA GGA", + "CC CTCA", + "TA GCC", + "GTT GGA", + "GC TATA", + "TCTG AAA", + "TA TGTT", + "CC CCTT", + "GTT GTA", + "CC CTGA", + "TGA CTA", + "CAA GCA", + "CAA TAA", + "GAA CTT", + "CA TGAA", + "CTTA TG", + "CTAA TG", + "TC TAAAA", + "CCAA TG", + "GAA GTG", + "CC TCAA", + "CC CATT", + "CA GTCA", + "GAGAGAGA GAGAGAGA", + "TA TGTG", + "GCA GTGA", + "TCTCC TT", + "TCC CAAA", + "CCA TTA", + "CCA GTG", + "GCA TCA", + "TCAAA TT", + "GA TCTT", + "GA CAGG", + "GGA GTG", + "GTA GTA", + "CAA CTT", + "GAA GTT", + "CC CCTG", + "TCTC AAA", + "GG GTC", + "GA GCTT", + "TATG AAA", + "TA TGAA", + "GA CATG", + "CAA GTG", + "GA TATA", + "CA TCTG", + "CTG TGA", + "TAA TTTA", + "GG CAGA", + "GC GAA", + "CC TAAA", + "CCA TCA", + "CA CTGA", + "GGA CTA", + "GA CGG", + "CTC TTTT", + "CTG TCA", + "TCTCTCTC TCTCTCTC", + "TTAA TG", + "GCA GCC", + "CAAAA AAA", + "GCA CCA", + "CTA TTTT", + "GA GCAA", + "CTT GGA", + "CTG GTG", + "GAA TAA", + "TCC TTTT", + "GAA GTA", + "CA GTAA", + "CAA CCA", + "CTG TAA", + "TGA TAA", + "GCA GTT", + "CA CGG", + "TAAA TAA", + "CTG TTTT", + "CTA CTA", + "GC TCTA", + "C GAAAA", + "CAA GTT", + "CTT GTA", + "GAA TGA", + "GA GTGA", + "GCC TGA", + "GG TTTG", + "CC CATG", + "GG GGAA", + "GAA GAAA", + "TG TTA", + "CAA TTTT", + "TATA TTTT", + "CTC AAAA", + "GG TGGG", + "CC GTG", + "TATT TCA", + "CC CCAA", + "TATT TAA", + "GG CTGA", + "GG TGTG", + "CA TCAA", + "CA CTCA", + "TCTCA TT", + "GAA TTTT", + "GAA TCA", + "CAGG AAA", + "CA TACA", + "TA TTTTA", + "TTA TAA", + "GAGG AAA", + "CA TATG", + "CTT TCTT", + "CAA CTG", + "GG GCTG", + "CC CCCA", + "TTTG AAA", + "CATT AAA", + "CTT AAAA", + "GA CTGA", + "CAA TGA", + "GG CACA", + "CCA GTA", + "GGA TGA", + "GTTTT TG", + "GCA TTTT", + "GTG CCA", + "GCA GTA", + "GCC CTT", + "TC GTC", + "GAA CTA", + "GTG GTT", + "GTG TGA", + "GTG CTT", + "C GCTA", + "GTG TCA", + "TCTT TA", + "GCC TTA", + "CC TATT", + "CAAAA TG", + "GAA CCA", + "CTC CAGG", + "GA CTCA", + "CATG AAA", + "GC TAGG", + "TGTT AAA", + "GC GTA", + "GCA CTT", + "TCTT AAA", + "TAA GAAA", + "GG CCTG", + "TCC CTA", + "GTG GTA", + "CTG CTA", + "GGA GTT", + "GG TAAA", + "CAAA CAAA", + "GA TATG", + "TCA TGA", + "GA CCTT", + "TAA TATA", + "GC TAGA", + "GGA CTG", + "GG CATT", + "CA GTTA", + "CC CTAA", + "CA CCTT", + "GG TGAA", + "CA GCTA", + "GTG TTTT", + "CAA CTA", + "GA TCAA", + "GA GAAAA", + "TGTG AAA", + "AAAA TA", + "GATG AAA", + "CTC TAA", + "TTA CTT", + "GA TCTG", + "CCA CTT", + "GA GTTA", + "CAA TCA", + "GGATTA CAGG", + "TTTA TTTT", + "TACA TA", + "TTTTA TG", + "GA GTAA", + "GCTG AAA", + "GTA CTG", + "GC TCTC", + "TATG TA", + "TGTG TA", + "TCA TAA", + "GGA CTT", + "TCTCC AA", + "GCA TGA", + "GA CGA", + "CGCC TG", + "GA CCTG", + "GG TCTT", + "CA CCAA", + "GA TC", + "GA CCAA", + "AAAA TTA", + "GTAAA TT", + "CCA GTT", + "CA GAAAA", + "TAA CAAA", + "GG TGTT", + "GAAA TTA", + "TGCC TCA", + "CC GCC", + "CCA TTTT", + "CTT GCC", + "TCTG TA", + "CTG GCA", + "GG GATG", + "CCA TGA", + "CTA CTT", + "TAGG TG", + "TAAAAA TT", + "GAAA GAA", + "TAAAA TA", + "CTTTT TG", + "GTC AAAA", + "GGA CAA", + "TCTGA TT", + "CTC TCTT", + "TAA TTTG", + "CTC TTTG", + "GG CCTT", + "GGA TTTT", + "CTA CTG", + "GTT GCA", + "GG CTCC", + "CTC TGTG", + "CTC CAGCC", + "TTA CAA", + "GGA CCA", + "GGAA GGAA", + "TAAA GAA", + "TTA GAA", + "GTG AAAA", + "CTT GCA", + "TGGG TG", + "GGA GCC", + "CC TCTA", + "C T", + "GG GCTT", + "GG CATG", + "CTG GTT", + "TA CAGA", + "GATT AAA", + "CTC TGTT", + "TTA TCA", + "CTG AAAA", + "GTA GTT", + "GG GTCA", + "G T", + "CA GCCA", + "GC GTC", + "CA CTTA", + "GTG CTA", + "TC TTATT", + "GTA CTT", + "GG TATT", + "TA GAGA", + "TA CATG", + "CCA CTA", + "TGA GAAA", + "CAA TAAA", + "TCC AAAA", + "CGTG AA", + "GG TCTG", + "CTGAA TT", + "TCA GCC", + "CC TCTC", + "GTT AAAA", + "GG GATT", + "TCC TAA", + "CA CTAA", + "GGA GAAA", + "CCTT CCTT", + "GTT TCTT", + "TA TCAA", + "GA TACA", + "TAATCC CAGCA", + "CC GCA", + "TGAAA TT", + "C GTAAA", + "CTC TCTG", + "TC TTTTTT", + "GTA CAA", + "CCAAA TT", + "TGTA TTTT", + "TC GCTT", + "GG GTGA", + "GA TAGA", + "CTT TATT", + "TAAA CAA", + "GTT TATT", + "TGAA TA", + "CTA CCA", + "GTG TCC", + "CC CGA", + "TTTA TTA", + "CTCC AAA", + "TTTTTTTT TTTT", + "TCA TCC", + "GAA GCC", + "CTAAA TT", + "CAAA TTA", + "CCCC AAA", + "TCTT CTT", + "TAGG AAA", + "CA CGA", + "CA TTTTA", + "GTG CAA", + "TCTCC TG", + "TATTTT AA", + "GTT TGTT", + "GA GCCA", + "GG CCAA", + "CATT TCA", + "CA TCCA", + "CC TATA", + "GA CTTA", + "TCAAA TG", + "GTA TCA", + "TAAA TTTT", + "CTGA GGCA", + "GCC CAA", + "GG TTAA", + "TA TCTG", + "TGA CAGA", + "GGA GAGA", + "GCTG CTG", + "CC CTTA", + "TCC TCTG", + "GTA GCA", + "CCTG AAA", + "CC GAA", + "TTTT TAA", + "CTA TAA", + "CCTG TA", + "TTA CTG", + "GTA TAA", + "GG CGA", + "GA CTAA", + "TCA GAAA", + "GTG TGTG", + "CAAA GAA", + "CC TATG", + "GCA GAGA", + "CC GTT", + "TTTTA TTTT", + "GGAA GAA", + "TTA CTA", + "GCC TGGG", + "TCC CTC", + "TCC TCTT", + "GGA TCA", + "GG TCAA", + "TC GAGA", + "TATT CTT", + "TA CTC", + "GTTAA TT", + "GC GAGA", + "CTTAA TT", + "TCC TTTG", + "GTC TAA", + "CA CCCA", + "GG GTTA", + "GG GCAA", + "GGAAA TG", + "GCAAA TT", + "TA GATG", + "GCA GAAA", + "AAAAAAAA AAAAAAAA", + "CC TACA", + "GGA GTA", + "TC TAATT", + "CAA CAAA", + "TA GATT", + "GG TTTA", + "CC TAGA", + "CTT TAAA", + "TA CTTA", + "TAA TGAA", + "CTA TCA", + "TA GTAA", + "CAGA GAA", + "CAA GAAA", + "GGGG AAA", + "CGTT AA", + "CGTG TT", + "TCTG TCTG", + "TTTTAA TT", + "CTG GCC", + "TAAA TGA", + "C GTCAA", + "TTA GTA", + "GTC TCTG", + "TTTT AAAA", + "CA GTTTT", + "CTT CCTT", + "TATA TAA", + "GC TTTTA", + "TTTT TCA", + "GG TC", + "TTA TTAA", + "TTTT GTT", + "CA TAGA", + "TA GGAA", + "GAGA GAA", + "GTA GCTG", + "TTA TGA", + "GTA GTG", + "GGA GAGG", + "CTC TGAA", + "TA GTC", + "GA CTCC", + "TCC CTCC", + "TAA TGTT", + "CA TCTA", + "GCCA CCA", + "GTA CTA", + "TGGG AAA", + "CGCC TT", + "GCC CGG", + "GGA GGAA", + "GTA CCA", + "CGC AAA", + "CA TAAAA", + "TAA CATT", + "GC TAAAA", + "TCTT CTG", + "GCC AAAA", + "GTA TGA", + "GTC TTTG", + "TA CTGA", + "TCC CAGG", + "TTA TTTA", + "TTA GTT", + "GGA CC", + "TA TAAAA", + "CAAA CAA", + "CTT CTC", + "TCTA TCTA", + "GAAA TAA", + "GTG TAA", + "CTT TGTT", + "GA TAAAA", + "GCC CAGG", + "GC GATT", + "AAAAAA TT", + "TA CAGG", + "GG CTAA", + "TA GCTT", + "GTC TCTA", + "CTCC TGA", + "GAA TAAA", + "TTA CCA", + "GG GACA", + "GCCA CTG", + "GTT TAAA", + "GTC TGTG", + "TGA CAAA", + "TACA TTTT", + "GCCA CC", + "TG TTTT", + "TA GCAA", + "TTA TAAA", + "GA CCCA", + "GCA GC", + "CAGA CAGA", + "CA CAAAA", + "GCC CTA", + "TATT AAAA", + "C GTATT", + "CCA TCC", + "TC GATT", + "GAA GGAA", + "GA TCCA", + "TATT TGA", + "GTGAA TT", + "TA CCTT", + "C GTCTT", + "CC TAGG", + "TC GAAA", + "CTT TCTG", + "TGAA GAA", + "TCTC TCA", + "GTC TCTT", + "GGA GGGG", + "GTC TGTT", + "CTA TGA", + "GGAAA TT", + "GCA CACA", + "GCC TTTT", + "CA GTCC", + "CTG GTA", + "GCA TCC", + "TA GTTA", + "GG CTTA", + "GA GTCC", + "TG AAAA", + "TAGA TAGA", + "TGTT TGTT", + "TA CTCA", + "CATT TAA", + "GA TTTTA", + "CA CTCC", + "GAAA CAA", + "GC GCTG", + "TCTT TCA", + "CTG TCC", + "GAA CTCA", + "CGG AAA", + "TATT GTT", + "GCA CTA", + "TATT CAA", + "GC GGGG", + "GTG GCC", + "TAATT AAA", + "TA CTAA", + "GC GGTG", + "TA CCAA", + "GG TATA", + "CTA GTT", + "GCA GAGG", + "CTTTT TTTT", + "TTTTTTTT TTTTTTTT", + "TACA GTA", + "CCA TGTT", + "TA GTGA", + "CGTG TG", + "GC TCTGA", + "CTT CCTG", + "TC GCTG", + "TAAA TCA", + "TCCAA TT", + "GTT TCTG", + "GAA GAGA", + "GG GTAA", + "CCA TAA", + "TTA TATT", + "C GAATT", + "CC GGA", + "TGA GCC", + "CC GTA", + "CAGA GGA", + "GTG TTTG", + "GA CAAAA", + "TTTTTT AAA", + "GTT GCC", + "GA GTTTT", + "TC AAAAAA", + "TGTT TCA", + "TA TCTA", + "TCTC TCC", + "CTC CACA", + "TAAA TATT", + "TTTT CTG", + "CTC TCAA", + "CCTT AAA", + "TCTTTT AA", + "GAA CAAA", + "TTA GCA", + "GCTCA TG", + "TAAA GTA", + "GGA TAA", + "TTATT AAA", + "CTC CATT", + "TCTC TGA", + "TTA TTTG", + "CCTG TAA", + "TTA TATA", + "GA CTTTT", + "TGTT GTT", + "GCAAA TG", + "CTT CAAA", + "GAA TATT", + "GAA TCC", + "CTC TTAA", + "GCA TAA", + "GAA TGAA", + "CTTAA AAA", + "TAAAAA TG", + "TTTTAA AAA", + "CTC TGGG", + "TGA TCC", + "GC TCTCA", + "CTC CAGA", + "GAGTG CAGTG", + "CAA TATT", + "TA GAAAA", + "GTAAA TG", + "TA GCTG", + "GC TCAAA", + "GCA GGAA", + "TA CCTG", + "GG GAAAA", + "TTTT CTA", + "GGGG GGGG", + "CC GA", + "CTT TGAA", + "GGA GGTG", + "TA GTCA", + "GG CCCA", + "TGA TGTT", + "CAAA TAA", + "TCTT CCA", + "GC GCTT", + "GTA TTTG", + "GTC TC", + "GAAA TCA", + "TGA TAAA", + "CATT CTT", + "TA TCCA", + "GCC TCTG", + "TGA GATG", + "C GCCAA", + "GTTTTA TT", + "TATA TATT", + "GTA GGA", + "GACA GAA", + "CTCCAGCC TGGG", + "GC GTGA", + "GG TATG", + "GAGG GAGG", + "TCA TTTG", + "CTA CC", + "TACA GAA", + "GG TAGA", + "GA TCTA", + "GTC CATG", + "TGA GGAA", + "TAA TAAAA", + "TAAA CTT", + "TCA CATT", + "GGA GGCC", + "TCA CAAA", + "CA CTTTT", + "CGG CC", + "CAA CAGA", + "GTA GAGA", + "GTTA TTTT", + "CGTT TG", + "TC GTCA", + "TCTG CTG", + "CAA CACA", + "GG TAGG", + "GCA GCTG", + "TAGTA GAGA", + "CAA GCC", + "GCA TTTG", + "TAA TATG", + "GCTT AAA", + "GCTT CTG", + "CTC TCCA", + "TCA TCTT", + "C GTCTG", + "TCA TTTA", + "CA TAGG", + "GC TCCTT", + "TGTT CTT", + "TACA TTA", + "CACA GAA", + "TAAA TATA", + "TA GAGG", + "GA TAGG", + "TCC TGAA", + "GGA GCTG", + "TGA TATT", + "TCA TTAA", + "CTTTT AAA", + "TC GTTA", + "TAAA CTA", + "GTT TGAA", + "TAAAA TTA", + "CA CCCC", + "TCA GAGA", + "CTCC TGCCTCA", + "TGA CATT", + "GTA TTTA", + "CTT CATT", + "GAAA CTG", + "TAA CACA", + "GTT CAAA", + "GGA GATG", + "TC GGCC", + "CAGCA TT", + "TC GATG", + "TATT CTA", + "CTG TGAA", + "TATT GAA", + "TTTT CCA", + "TATT TCTT", + "GGTG AAA", + "CTGA GAA", + "GCA CAGA", + "GC GAGG", + "CTG TGTG", + "TGAAA TG", + "TGA TGAA", + "GTCC AAA", + "CTCAA TT", + "TCCA GAA", + "GTA TATA", + "TAAA GTT", + "TCTC AAAA", + "TCCA TCA", + "GTC TGAA", + "TGA GAGA", + "TGA TTTG", + "TTA GCC", + "CTC CATG", + "TCC CTGA", + "GA GCTA", + "CCCC CCCC", + "GTG GAAA", + "CTG GGAA", + "CAA TGAA", + "CCA CACA", + "CTT TCAA", + "C GGAGG", + "TC GTGA", + "CCA GAAA", + "GTTTT AAA", + "TGTT GAA", + "TCC TGTG", + "CTAAA TG", + "TCC TTTA", + "GTC TGGG", + "TCTC TTTT", + "TA CGG", + "TATT GTA", + "TTA GTG", + "TTA CC", + "TAATCCCAGCA CTTTG", + "TCTG GAA", + "CTT CTCA", + "CGCA TT", + "TATT TAAA", + "TCA CACA", + "TAA TCAA", + "GC GAAA", + "GG GCCA", + "GTT CATT", + "GAGAA AAA", + "TTTT GTA", + "TA CTTTT", + "TC GAGG", + "GTGAA AAA", + "CAA TATA", + "TCC CATG", + "CAA TTAA", + "CTG GAAA", + "CCCA GCA", + "TCC CATT", + "TCC TGTT", + "CTC TTTA", + "TCC CCTT", + "GTT TCAA", + "GTC CAGG", + "GGAA GGA", + "TA GTTTT", + "TGA CCTT", + "GTGCTG GGATTACAGG", + "TATT TATA", + "TCTG CAA", + "CTGAA AAA", + "TATG TTA", + "CTT CACA", + "GCA CAGG", + "CCTG CTG", + "TTTT TTAA", + "GTTA TTA", + "CC CTTTT", + "TGA TTTA", + "TA CAAAA", + "TAA GTAA", + "TTTT TAAA", + "CA TCTC", + "GTG GTGA", + "GTG GAGA", + "CTC TGCA", + "GTTAA AAA", + "TACA TACA", + "CTT TGTG", + "GGA CACA", + "TCTGA TG", + "TA TTATT", + "TCTT CTA", + "CTG TGTT", + "TCA GCTT", + "CTT TATA", + "GG CGC", + "TCC CTCA", + "GTA CC", + "TGGA GAA", + "CAAAAA TT", + "TCTT TAA", + "CTC TCTC", + "TGA GTGA", + "GCA GCTT", + "CGGA TT", + "TA CGA", + "TCTT GTT", + "TC GTAA", + "GCC TGTG", + "TATT CTG", + "GG GATA", + "GG GTCC", + "TGA GATT", + "CTTTTA TT", + "TCC CACA", + "CATG GTG", + "TTA GGA", + "GAA CACA", + "TCA TAAA", + "CAA CATT", + "GG TCCA", + "GAA TTTG", + "TATTAA TT", + "TCC TGGG", + "GCA GCAA", + "CTC TTCA", + "GAA GAGG", + "TCTG TCA", + "CTGAA TG", + "CCA CAAA", + "GTG GAGG", + "TGA TTAA", + "CTCC CTCC", + "CACACACACACACACA CACACACACACACACA", + "GC GATG", + "CATT CTG", + "GTA GAAA", + "TCA TCAA", + "TTTT CAA", + "TATG TATG", + "CCAAA TG", + "TAA TTTTA", + "TAA GGAA", + "CTT GAAA", + "AAAAAAAA AAAA", + "GC TCCTG", + "GCA GATG", + "GAAAAA TT", + "GA CGC", + "GTG GGGG", + "GTCAA TT", + "CTT GCTT", + "TGA CACA", + "GTG TGTT", + "CCA GAGA", + "CCCA GCC", + "TAAA GAAA", + "GTC CATT", + "TAAA TTAA", + "CC CAAAA", + "GAA TTAA", + "TGAA TTA", + "TTTT TTTG", + "CCA GCTT", + "CAA TTTG", + "CTG TTTG", + "GTC TCAA", + "GTT TGTG", + "GG CATA", + "GG TACA", + "TGA TGTG", + "GATT TCA", + "TCTG CTT", + "GTAA TTA", + "TAA AAAAAA", + "GCC GCC", + "TGTGTGTGTGTGTGTG TGTGTGTGTGTGTGTG", + "GC GTCA", + "GC TCATT", + "GAA CCTG", + "TAAA CAAA", + "GTG CTGA", + "TCA GGAA", + "TCC TCAA", + "TCTA TTTT", + "TCTG TTTT", + "CAGA GCA", + "CCA GGAA", + "GTC TTTA", + "TCTT CAA", + "TCAAAA TT", + "GC TTATT", + "GTT CCTT", + "CA CCTA", + "TCA CTGA", + "GAA GCAA", + "TAAA GA", + "TCC TTCA", + "TCTCA TG", + "TCA GTGA", + "TACA CAA", + "CA CGTG", + "CC TAAAA", + "GCC TTTG", + "GG CTTTT", + "GTT GAAA", + "GTT CTC", + "CTA GA", + "CTA CAAA", + "GCA CAAA", + "TTA CATT", + "GG CCCC", + "TAA TGTG", + "CTG CCTT", + "TCC CAGA", + "GTGAA TG", + "GGA CAGG", + "GGA TGTG", + "GTT TATA", + "TGA CCAA", + "GTG GCTG", + "GTT CTCA", + "CTTA TTTT", + "CTG GAGA", + "TTA CAAA", + "GTC TTCA", + "CAA GAGA", + "CCA TTTG", + "TCA CAGA", + "CTA GTA", + "CA TTATT", + "TTA GA", + "GC TCTCC", + "GC GCCA", + "TATG TTTT", + "TCC TCCA", + "CAGAA AAA", + "GTG GGAA", + "TAA TCTT", + "TGA GTCA", + "CTG CTC", + "GTC TCCA", + "TCA TGTT", + "GTT TCCA", + "TAA GCAA", + "CTAA AAATA", + "TGA CTGA", + "TC GGTT", + "TTA GAAA", + "TAA GCC", + "TAAA GCA", + "CC TCTCC", + "CC TCCTT", + "TCA GATT", + "TATG AAAA", + "GCTGA TG", + "CATA TTTT", + "GC TCCAA", + "CGG CGG", + "CCA CTGA", + "CA GCAAA", + "CTG TCTT", + "CTA GCA", + "TC GGGG", + "CACA GCA", + "GC TGATT", + "CTA GGA", + "TAA CTC", + "TCA TATT", + "CCTT CTT", + "CTG CAAA", + "CC CGC", + "GG TCTA", + "CCCA GGA", + "GTG TCTG", + "TAATAA TAATAA", + "TCA CATG", + "CAA TTTA", + "TATATATATATATATA TATATATATATATATA", + "CCA CAGA", + "TCAA TTTT", + "GTA TTAA", + "GAA CATT", + "TCTC TTA", + "CTA TTTG", + "TCTT TCC", + "GGTT AAA", + "GC TAATT", + "CTG CTGA", + "TA CCTA", + "CAGG GTT", + "TC GCCA", + "CAAAAA TTA", + "CTT CTGA", + "GCA TGTG", + "CTA TTAA", + "GCA CATG", + "CAA CATG", + "TCA TGAA", + "GAA TGTT", + "GG GTTTT", + "CTG CCTG", + "GTC CACA", + "TAAA CA", + "CTC TGGA", + "GA CCCC", + "GG CAAAA", + "TCTG TTA", + "CTA GTG", + "CTA TATA", + "TCA GTCA", + "TAA CTAA", + "GAA GATG", + "GTC TTAA", + "CAA GGAA", + "GTAA AAAA", + "TCC CCTG", + "TC GCAA", + "TCTG CCTG", + "CC TTTTA", + "GTCC CAGCTA", + "TATA TATG", + "TATT GTG", + "TGTG TTTT", + "GC GCAA", + "CACA GTG", + "TAA GATT", + "CTC TGTA", + "GGAGG CTGA", + "GGA CAAA", + "TATTAA AAA", + "TC GTCC", + "TC GGAA", + "CTA TAAA", + "CTT CAGA", + "CTA GAAA", + "CATT CAA", + "CA CGCA", + "CAGGA TT", + "CCA TCTT", + "GTA GCC", + "GAA TTTA", + "CA CGC", + "CAA TCC", + "TGA GCAA", + "GAA GCTG", + "TCAA TTA", + "GAA GTCA", + "CTG CACA", + "CCA CGG", + "GGA TCTT", + "CTCCTGCCTCA GCCTCC", + "TAAA TGAA", + "CC GTC", + "TC GGTG", + "TTTTA TTA", + "GCA GGGG", + "GCA GGTG", + "TCTA TTA", + "TAA CTTA", + "CTAA TTTT", + "CC CGCC", + "TAA TACA", + "GGATT AAA", + "TCTC TCTG", + "GCTT CTT", + "CATT TATT", + "CCA GAGG", + "GGA CAGA", + "GCCAA TT", + "TCC CCAA", + "GTT GATT", + "GAA GAAAA", + "GCA TTTA", + "CTC TAAA", + "CACACACA CACA", + "CC TCAAA", + "TA TAATT", + "CAA TGTT", + "GCC CAGA", + "GTA TATT", + "CTAA AAAA", + "CCA CAGG", + "TAA GAGA", + "TCC TTAA", + "TA TTTTTT", + "GAA TATA", + "GGA TTTG", + "GTG TGAA", + "CTG GCTT", + "GC GGCA", + "TCC GCC", + "GCA TCTT", + "TC TAATA", + "CTG CATT", + "CTC TGCC", + "TCA CTCA", + "TCA GCAA", + "TATTA TG", + "CCA GCTG", + "GA TCTC", + "GCC TCTT", + "CTT CCAA", + "TCC TAAA", + "TCA TCTG", + "CTA TTTA", + "CTG CAGG", + "CAA GCAA", + "GC GGAA", + "GAAA TAAA", + "TAAAA TAA", + "TCA CCTT", + "CCA TGTG", + "GA CCTA", + "CAGA TGA", + "GTG GCTT", + "TTATTA TTATTA", + "TCC CGG", + "TATT TGTT", + "CTG TAAA", + "TCCA TCCA", + "CTG TATA", + "GTT TCTA", + "GTT GCTT", + "CCA TGAA", + "GC TCTTA", + "CTT CATG", + "GTT CCTG", + "GCTG GGA", + "TCA GAGG", + "CATT AAAA", + "TCA GTAA", + "GAA TGTG", + "CTTA TTA", + "GCA CTGA", + "TGA GGTT", + "CA TCAAA", + "CTT CTCC", + "GTT TATG", + "CTT TCCA", + "GTG CCTG", + "GAAA GGA", + "GCA TCTG", + "TA CCCA", + "TAA CAGA", + "AAAAAAAA AAA", + "CTA TGAA", + "CA GTAAA", + "TA GCTA", + "TC GTTTT", + "GTG TCTT", + "GA GCAAA", + "TC TAAAAA", + "GTT CACA", + "GAAA TGA", + "CAAA TGA", + "GCC CTGA", + "GTG TTTA", + "TCA TGTG", + "CATA TTA", + "TCAAAA AAA", + "TAA GTTA", + "TCTC TCTT", + "CCA GTGA", + "CC TCTGA", + "CAA GATG", + "GCC TGTT", + "GTT TGGG", + "CATT CATT", + "GCC CCTG", + "GTT CTGA", + "GC GGCC", + "GC GGTT", + "CAAAA CAAAA", + "TACA TATA", + "GAATT AAA", + "TCAA GAA", + "CTG TATT", + "TTTT TATT", + "GA TTATT", + "TCTAA TG", + "GTT GCTG", + "TGAA TGAA", + "TCA GCTG", + "CTT GATT", + "CAGAA TG", + "CTAA TTA", + "TATAA TG", + "GTTTT GTTTT", + "CCA GCCTG", + "TGA TGGA", + "GCA GATT", + "CTC TATT", + "GCA GTCA", + "TAA GTGA", + "CTA CACA", + "CGCA TG", + "TA GCCA", + "GTG GCTCA", + "CAAA TAAA", + "GTG CTCA", + "TTTT TTTTTT", + "TAA CATG", + "TCCCA GCTA", + "CAAA GTA", + "TCA TATA", + "CAGCA TG", + "TGA TCTT", + "CA TAATT", + "TGTG TTA", + "TTTT GAA", + "TTAA TTA", + "GATA TTA", + "TCA TTCA", + "TGA TATA", + "TGA CTCA", + "GA CGTT", + "TGA CATG", + "GTT GTGA", + "CA TTTTTT", + "GCC TGGA", + "CTA TGTT", + "CTT TGGG", + "GTC TCAAA", + "CTG GCTG", + "CCA CATG", + "GG CGTG", + "CTTAA TG", + "TAA GATG", + "GTA TAAA", + "TGTA TTA", + "TAA CTCA", + "GAGAGAGAGAGAGAGA GAGAGAGAGAGAGAGA", + "GCA TGAA", + "GTTAA TG", + "TCCA GGA", + "GAGA GAAA", + "TCTC TGTG", + "CTC TCTA", + "CCA CCTG", + "GCCA GGA", + "CTG GAGG", + "CCA TTTA", + "GTC TGGA", + "GCC CACA", + "TAGA GAA", + "CAA CTCA", + "GGCA GGA", + "TCTTA TG", + "CAAA GGA", + "GG TAAAA", + "GAGA GGA", + "GTC CAGA", + "GCC CTCA", + "GATA TTTT", + "CAGG GAA", + "CCA CATT", + "GA GGAGG", + "GAAA CTT", + "CA GAATT", + "TCA GATG", + "TATT TCC", + "TACA GTG", + "TGA GCTG", + "CCA TCTG", + "GAGAA TG", + "TCAA CAA", + "A TT", + "TAA CTGA", + "TGA GAGG", + "CA CTGAA", + "CCA CCTT", + "CTG CAGA", + "TCA CCAA", + "TGA GCTT", + "CAAA GCA", + "GG TTTTA", + "CGG GGTT", + "TCCAA AAA", + "TATG TATA", + "CCA GATG", + "TCCA TTTT", + "CTG CTCA", + "GA TAATT", + "CCA CCAA", + "CTCC TCC", + "GA GAATT", + "GAAA GTA", + "TAAAA TAAAA", + "CTT CTTA", + "CTG TTTA", + "GAA TCAA", + "GCA TGTT", + "GCA CGG", + "GA CTGAA", + "GTG CACA", + "GA CGTG", + "TATA CAA", + "TC GACA", + "GAA GACA", + "TAAA GGA", + "GA TCAAA", + "CAGTG TG", + "CTA GCC", + "GAGG AAAA", + "TCTG AAAA", + "GAA CCCA", + "GATG GATG", + "GTT CTTA", + "CTA TATT", + "GCA TTAA", + "TCTCTCTCTCTCTCTC TCTCTCTCTCTCTCTC", + "TCA GTC", + "TATTTT TG", + "GAGGA TT", + "GTA TGTG", + "TAA CCAA", + "GTT GTTTT", + "TTTT TCTT", + "GTG TTAA", + "CTT GGAA", + "AAAAAA TG", + "CAA TGTG", + "GTG CCTT", + "GCC TCAA", + "GA GTCTT", + "GCTAA TTTT", + "CGAA AAA", + "GTG TATA", + "GC GTTA", + "CTGCA CTCCAGCCTGGG", + "GTT CATG", + "CAAA GAAA", + "GCA GTAA", + "GGA TGAA", + "CTT TATG", + "CAGG AAAA", + "TCC TGCA", + "CTG TCTG", + "GAA CATG", + "GGA TGGA", + "GCC TGAA", + "CAAAAA TG", + "TCCAA TG", + "CCA GCAA", + "GG CCTA", + "CAA CTGA", + "GCA CCTG", + "GTC TATT", + "CC TCTCA", + "GTG GTCA", + "GTG TAAA", + "GTA CACA", + "GTAAAA TT", + "GTA CATT", + "TATA TAAA", + "CTG TTAA", + "TAA GTCA", + "GCC TCCA", + "AAATT AAA", + "GTG CAGG", + "TCC TGGA", + "GTG CAAA", + "GC GTCC", + "CCA TTAA", + "GGA GGGA", + "TCA CTTA", + "TCATT AAA", + "CAA CATA", + "TAA TAGA", + "TAA TGTA", + "GA TTTTTT", + "GTT GTCA", + "GGA GACA", + "GTG TGGG", + "TCA CAGG", + "TC GGCA", + "CTCC CTG", + "GA CCAAA", + "TGTT TATT", + "CGAA TG", + "CTCAA TG", + "TCA CCTG", + "CA GTGTT", + "TGA GACA", + "TA GGGG", + "GAAAAA TG", + "GTT GAGA", + "TC GATA", + "CTC GGGAGG", + "GTT GTC", + "CCA GTCA", + "GCC CAGGCTG", + "GAA CAGA", + "GGCTCA CTGCAA", + "GCA GACA", + "TGA GGTG", + "CA CGTT", + "TAA GAAAA", + "CCA GGCA", + "GTA TCTT", + "CTTGG GAGG", + "CTT TCTA", + "CC GCTG", + "GA GCTCA", + "GAGA CAGA", + "CTT CAGG", + "GCA CATT", + "GTA CAAA", + "CTT GTAA", + "GTG GGTG", + "GAA GTGA", + "GG TCTC", + "GTA TGTT", + "GCA CTCA", + "TTA TGTT", + "CAA GTCA", + "CAA GTGA", + "GAAA CTA", + "TAAA TAAAA", + "TCTT AAAA", + "GTT GGAA", + "GTT CTAA", + "CCA CTC", + "CA GTGAA", + "GAAA GG", + "GCA CGA", + "TAA CTTTT", + "GTT GTTA", + "TCA GTTA", + "CGGA TG", + "TATT TGAA", + "CC CTGAA", + "GCC CTC", + "CTT CTAA", + "TTTG TTTT", + "GA GCTGA", + "CTG TGGG", + "CAA GATT", + "GAA GCTT", + "TGA GTAA", + "CTT GCTG", + "GGA TGGG", + "CGTA TG", + "TCCA TTA", + "GTC TGCA", + "GCCA TTTT", + "GTT GTAA", + "CACA CAA", + "GGACTA CAGG", + "C GTTTTA", + "TCTT CC", + "TAA CCTT", + "CTT TAAAA", + "TGAA TTTT", + "CTA CAGA", + "GCAA GAA", + "TAA CAAAA", + "CAATT AAA", + "CCA CTCA", + "CATG GTGAAA", + "CCCA GAA", + "CTA CATT", + "CC GAGG", + "TCCA GTG", + "TGA GTTA", + "GGA GTCA", + "TAA CGA", + "GA GTAAA", + "GA CTCTG", + "GGA GCTT", + "TA CTCC", + "CTG CATG", + "GC TTTTTT", + "GTC TAAA", + "GTG CGG", + "CA TCTCA", + "TGA TCAA", + "GGA GATT", + "GC AAAAAA", + "CA CCAAA", + "TGA CGG", + "CAGA GG", + "GTT GATG", + "CTT GTCA", + "TCCA CCTG", + "GGA GCAA", + "CAA GTAA", + "CCA TAAA", + "GTG CATG", + "GCA TATT", + "GTA GATT", + "GCC TAA", + "CTCAA AAA", + "GGA GAAAA", + "CTA TCC", + "TAATA TTA", + "GTG CTC", + "CAA TATG", + "TGTG GAA", + "TGA CTC", + "GTG TATG", + "TTTTAA TG", + "GC TCTAA", + "CACAA TG", + "CA GCTCA", + "GTT GGTT", + "CTAAAA TT", + "GTC TATG", + "TGTG AAAA", + "CTG GGTT", + "CCCC TCC", + "CC CTCTT", + "GCA GGGA", + "GAAA CCA", + "CATT TCC", + "GCA GCCA", + "TCA TATG", + "GCA GGCA", + "C GTAAAA", + "TGA CCTG", + "CAGA GGTT", + "CTT GTGA", + "TTA TCTT", + "CTG TATG", + "GTCAA TG", + "GGA CGG", + "GC GTAA", + "CAAA CTA", + "TAAA TGTT", + "CTT CGG", + "CTCC CCA", + "TACAA TG", + "TCTG TAA", + "GAA TATG", + "GC GGGA", + "GGA CATT", + "TTA TGAA", + "GGA TGTT", + "GGA CATG", + "TCA GGTG", + "CAA CAAAA", + "GAAA GAGA", + "GTG GATG", + "GG GCTA", + "CCA TCAA", + "CA GCTGA", + "CTC CACC", + "CAA TCAA", + "GTG GTC", + "TGA CAGG", + "CCA TTCA", + "GTCC CTG", + "CAGA CACA", + "GTT GGTG", + "CC TCCTG", + "GAA CTGA", + "TATT CATT", + "GCC CATG", + "CAA TCTT", + "GAAA GCA", + "GAA TCTG", + "TTA TTTTA", + "GTT TGGA", + "TTTT TGTT", + "GGGAA TG", + "GC GACA", + "TAAA CTG", + "CCA TATT", + "GGA TCC", + "CAA GCTT", + "TAAAAAA AAA", + "TCA CTC", + "CA CTGTT", + "TGTTAA TT", + "GGA CTGA", + "GGA GTGA", + "CATA CACA", + "GTT TGTA", + "TCCA GCA", + "GTG CATT", + "GG AAAAAA", + "CCAA GAA", + "TCAA TA", + "CTT CCCA", + "TGA GAAAA", + "GGCC TCCCAAA", + "CAA GCTG", + "GCC CAAA", + "TGA CTTA", + "CA GCCTT", + "CTG GATT", + "TTTT TTTA", + "TCA CGG", + "GCA GTTA", + "TGA CTAA", + "TTA CAGG", + "TGA TATG", + "TAA TTATT", + "TCTT GAA", + "GCC CCTT", + "GTT CAGA", + "CTC TATG", + "CCA TGGA", + "GAGG GAA", + "GGA GGCA", + "CTT TGCA", + "TCTT GG", + "GGA GGTT", + "GCCAA TG", + "CTG GTGA", + "CAA CCAA", + "CCA GTC", + "CTT GAGA", + "TACA GCA", + "CTT GTC", + "GA CGGA", + "CTT CTTTT", + "GTG GC", + "GAGGA TG", + "CAA TAAAA", + "GAAA TTTT", + "AAAA AAAAAA", + "CTC TATA", + "GTA TGAA", + "CTT GTTA", + "TAA CATA", + "CAAA CACA", + "TGATT AAA", + "GCTC TGTT", + "GTG GGTT", + "GTT GGGG", + "GTG TGTA", + "GTAA TTTT", + "GTA TCC", + "TGTGTGTG TGTG", + "TCTT CCTT", + "TCA CTAA", + "TCTCC AAA", + "TA TCAAA", + "TGA TGGG", + "GGA TATT", + "CAAA TTTT", + "GTT CAGG", + "GTG GATT", + "GTG CAGA", + "GCTG CC", + "CTCA GAA", + "GCA GTC", + "GGA TAAA", + "GCC TTCA", + "CCA GGTG", + "TA TCTC", + "CAA TGCA", + "CCCA CTG", + "GTG TATT", + "CGA CAGA", + "TGA GATA", + "CCA GGTT", + "TGTT TAA", + "CATCA TG", + "TGA TTCA", + "GCAA TTA", + "GAAA TGAA", + "CTT GGTT", + "GAA GATT", + "GGA TTAA", + "CC TCATT", + "GGCCA GGCTG", + "GCTA TTA", + "GCCA GCA", + "GAGA CAGG", + "CTT GAGG", + "CA GTCTT", + "GTT CTCC", + "TATT TCAA", + "TGA CGA", + "CATG AAAA", + "CATTA TG", + "TAAA TTTA", + "GA GTGAA", + "CAA CAGG", + "TAA GCTT", + "CACA TTTT", + "GA TCTCA", + "TA GTCC", + "GACC CTG", + "TAA TGCA", + "TAA GTC", + "TAA TAATT", + "GAA GTAA", + "CAA CTC", + "CA TCATT", + "GA CGAA", + "GAAA CAAA", + "TATT TCTG", + "CATTAA TT", + "CCA CCCC", + "TAATA TTTT", + "GTT TAAAA", + "GTA TCTG", + "GTCAA AAA", + "GATG CTG", + "TGTT CTG", + "GG TCAAA", + "GTA GGAA", + "GTA TATG", + "TGA TCTG", + "GGGG CTG", + "GCA TCAA", + "GCCAA AAA", + "CCA CGA", + "GC TAATG", + "CAGA GAAA", + "CCTT CTG", + "TCC TCTA", + "GCA GGTT", + "CTCA CTG", + "TAGA TTA", + "GCC GAGA", + "CCA TCCA", + "CTT TACA", + "GTA CATG", + "GCA CCAA", + "CTT TGTA", + "CTA TGTG", + "TCA CTTTT", + "TGA GTC", + "CAA GAAAA", + "CTGA CTG", + "GTTTT TTTT", + "GCA TAAA", + "TAA TCTG", + "GAA AAAAAA", + "CAGGA TG", + "TGA GCCA", + "GAA TTCA", + "TCA GACA", + "GTT CCAA", + "TCA GGTT", + "CAAA CTG", + "CATT TCTT", + "TGTT AAAA", + "CCA GACA", + "CAA GTTA", + "CATG TTA", + "CATT CTA", + "TCTTTT TG", + "TGA GGGG", + "CACA TTA", + "TAAAA TAAA", + "GCA TATA", + "TGTT CTA", + "GAA GGGG", + "GAGTG TG", + "TAA GACA", + "GAA CTC", + "CCA GTAA", + "GAGA GAGG", + "GC GACC", + "CAA TTCA", + "CGG CTG", + "CCA GATT", + "CCTG GG", + "GGAA GAAA", + "GAGA GG", + "TCAAAA TG", + "CCTCA TG", + "TAAA GG", + "CTT TGGA", + "CCA GGGA", + "GTA CAGA", + "CTGAGGCA GGA", + "TGTT TCTT", + "CCA GGCTG", + "CTGA GG", + "GAGG CTG", + "CTCC TGGG", + "GAA GTC", + "CGA CC", + "GGA CTCA", + "GGA GTC", + "CA CAATT", + "GTG TTCA", + "GA CTAAA", + "GTCA TTA", + "CAAAA TTA", + "TGAA GAAA", + "GCA CCTT", + "GTT TGCA", + "TCC TGCC", + "GTA GATG", + "GCC TGCA", + "GA GTTAA", + "TCC CTTA", + "GTG GTTA", + "TC GGGA", + "TACA TAA", + "TCTC TCCA", + "CA CTAAA", + "TATATATA TATA", + "GTG GCAA", + "CACCA TG", + "TTTG AAAA", + "CACA CTG", + "CTT GGTG", + "TACA CTG", + "CC TCCAA", + "CAA CCTT", + "CA GCCAA", + "TTTT CAAA", + "TGA TAGA", + "TACA CTA", + "TCTG GG", + "TCC CAGCA", + "TAGG AAAA", + "CTT GGGG", + "TC TGTGAA", + "CC TTATT", + "CATT TAAA", + "TTTTA TTTTA", + "GCC CTCC", + "CTGA GCA", + "CC CGTG", + "GTA GTGA", + "TCC TATT", + "GAA GGTG", + "TGTG CTG", + "TCCA CTG", + "TAA TCTA", + "TGA TGTA", + "GTG GTAA", + "TAA TGGA", + "GATG AAAA", + "GTA GTAA", + "GTG GGGA", + "GTG TCAA", + "CAGA CTG", + "TC GAAAA", + "CTCA TTA", + "TAA TAATA", + "CTCA GAAA", + "CA TCCTT", + "CC GCTT", + "GGAA GG", + "CC GTGA", + "CCA CTCC", + "CTA GAGA", + "TAGAA TG", + "GGA TTTA", + "TTAA TTTT", + "GC TAATA", + "TCC CCCA", + "CAAA TATT", + "GA TCATG", + "TCTTAA TT", + "CA GTATT", + "GTCTT GAA", + "CC GAAA", + "CTA TTCA", + "TAA GATA", + "CTT GCAA", + "GCC CCAA", + "TCC CTAA", + "GAA GTTA", + "GA TGATG", + "CTT GATG", + "CC CTAAA", + "CCTG CCTG", + "GACA TTTT", + "CCA GCCA", + "TGTGTGTG TG", + "GTC TATA", + "TCTC TGTT", + "GTC TGTA", + "TA TAATA", + "CTT GTTTT", + "CGC CATT", + "CTCA GCA", + "TACA GTT", + "CAA GAGG", + "GGAA GCA", + "GCC TTTA", + "CC CCATT", + "CAA CGA", + "GTCA TTTT", + "CC CGCA", + "CA GTTAA", + "GAA TCTT", + "CATG TTTT", + "CC GGGG", + "CTA CTGA", + "TCA CGA", + "TAAA TTTG", + "GCC CATT", + "CTC TAGG", + "GGA CCTG", + "TCA GGGA", + "GAGA CTG", + "CC AAAAAA", + "GCC GG", + "CCA GGGG", + "TCA GAAAA", + "CA TCTGA", + "TCTT CAAA", + "CTA CAGG", + "GAGG CAGG", + "CATT GTA", + "TAAA TCAA", + "GA CTCTT", + "CTGA TTA", + "GCA TATG", + "GGA CCTT", + "CAA GACA", + "TATT TATG", + "TATTTT AAA", + "CC GAGA", + "TCA TTTTA", + "CTCA CTCA", + "CCA CCCA", + "CTC TAGA", + "CTA CATG", + "GTG CTTA", + "CAA CCTG", + "TC TGTGTT", + "TAAA TATG", + "CAAA GG", + "CC CTGTT", + "GTT CGG", + "TGA TAAAA", + "CA CGAA", + "GTT GAGG", + "CAGA GTGA", + "GAAA TTAA", + "CACA TA", + "GAA CAGG", + "TCTCC TGA", + "CC TGAGG", + "GGAGG CCAA", + "GTT TACA", + "TAA CAGG", + "TGTG GTG", + "GCCTCC CAAA", + "CCA TCCTG", + "GATT CTT", + "GAA TGGA", + "GTA GTCA", + "CTCC TCTG", + "GAAAGAAA GAAAGAAA", + "CC CTGTG", + "CAGTA TG", + "GC GATA", + "GGA CTC", + "GAAA GA", + "TGTT GG", + "GTA GCTT", + "CA TTTTAA", + "CC CTCTG", + "GCA TTCA", + "CGA TTA", + "TCA CATA", + "TAA TGAAA", + "GGAA TTA", + "CTG TCAA", + "TAAATT AAA", + "CAA GTC", + "GTA TTCA", + "GGCCA TG", + "CTT TAGA", + "TGTT TCC", + "CATG TA", + "GAA TAAAA", + "CAA CTAA", + "TCA TCTA", + "CA CTCTT", + "CA GTTTG", + "CA TAAAAA", + "GCA TGCA", + "GATT TA", + "GAA CCAA", + "TCTG TGA", + "TCA GCCA", + "TCTC CACA", + "TCTCA GCTCA", + "TATCA TG", + "GCA CTTA", + "CGC CAGG", + "CGG GG", + "CATTAA AAA", + "TTTG TTA", + "GGA TATA", + "TC GACC", + "TAA TCCA", + "CC GC", + "CATT GTT", + "CCA GTTA", + "GTA GTTA", + "CTA GGAA", + "CC TAATT", + "TCA TGGG", + "GAA CTAA", + "GCTA TTTT", + "CC GTCA", + "CAGA TTA", + "CCA TATA", + "CAA CTTA", + "TCA GTTTT", + "CTA CCTT", + "GCA CTC", + "GTG TGGA", + "GTG CCAA", + "GACAA TG", + "GA CAATT", + "GTA CCTT", + "TAAA CATT", + "CA GGAGG", + "GTG CGA", + "GAAAA TTA", + "TCTCTT AA", + "CC GATT", + "GA TGATT", + "CCA TGGG", + "TC GGTA", + "CCA TATG", + "CCA GTCC", + "GCC TTAA", + "TGA TCCA", + "GTT GCAA", + "GTA GAGG", + "CAGA TTTT", + "GTA CTTA", + "TCTTTCTT TCTTTCTT", + "GCTC TGTG", + "TCAA TAA", + "GTT TAGA", + "GTT CGA", + "CAA GGTT", + "CTCA TTTT", + "CACA GG", + "CATG CTG", + "GAA CGG", + "TA TAAAAA", + "GAA GGCA", + "GA GCATT", + "TGTT TGTG", + "GCTG TTA", + "GTCA CTG", + "CAAA TGAA", + "GTGA CTG", + "GTT CTTTT", + "CAGGCTG GAGTGCAGTG", + "TGA TGAAA", + "TAA CGG", + "CTA CTAA", + "GACA TTA", + "GGA CGA", + "GAGCA TG", + "GCA TGGG", + "CCA CTTA", + "CTA TCAA", + "GCTG TTTT", + "GTC GTG", + "CCTG GCC", + "TCTC TGAA", + "TGTT GTA", + "CAGC CAGG", + "GTT TAGG", + "CC GCAA", + "GGA GTAA", + "CCAA TTA", + "CAGC AAAA", + "TCA TCCA", + "CA CGTA", + "TCA TAGA", + "TAATT AAAA", + "CA CTTAA", + "TCTT TATT", + "GAGA TTA", + "TAA GAGG", + "CAAA TTAA", + "GA CGCA", + "CA CGGA", + "GTG TGCA", + "TC T", + "TATTA TTA", + "GAAA TATT", + "GGA GTTA", + "TCTT TGA", + "CTGA TTTT", + "TGTGAA TT", + "TCC CACC", + "CC CTTTG", + "CAA GGTG", + "CAGA GTT", + "CCCCA TG", + "CTA CCAA", + "CTCC AAAA", + "CTT CCCC", + "CTG CTAA", + "GATT AAAA", + "GC TTATG", + "CTA CTTA", + "TAAAAAA TT", + "TCA GTCC", + "CTATT AAA", + "GAA TGGG", + "CACA GTA", + "CAA CGG", + "GG TTATT", + "TCA CCCA", + "TGA TGCA", + "TAA TTTTTT", + "GTT TGAGA", + "GTATT AAA", + "GCC CCCA", + "TATA GTA", + "TA GTAAA", + "TGA TACA", + "GTG GTTTT", + "CCA CTAA", + "CACA GAGA", + "CCTCTG CCTCC", + "CAA AAAAAA", + "CTC TCTCC", + "CA TAATA", + "GAA GCCA", + "GTT CCCA", + "TGTG TTTG", + "CAA TGGA", + "TGAA GTA", + "CTT CATA", + "CA CTGTG", + "GC TCTTTT", + "TGA CATA", + "TAAA GAAAA", + "GAGAAA TG", + "CAGG GAGG", + "TGTT CAA", + "GA GCCAA", + "GACA GAGA", + "GG CTGAA", + "CAAA TATA", + "GTG GAAAA", + "TAA GGTT", + "GTGA TTA", + "GGA TCTG", + "GATG TTA", + "GACTA CACA", + "TCC TATA", + "CTG CCAA", + "TCC CGA", + "GTGA TTTT", + "GC GTTTT", + "CAGA GTA", + "GAAA GGAA", + "CA CTTTG", + "CCCC AAAA", + "GCAA CCCA", + "TGCA TTTT", + "TCTA GAA", + "TA CTTTG", + "TGA GGCA", + "CA TCTCC", + "TC GCTA", + "TGA CTTTT", + "GA GCCTG", + "CATT TGTT", + "TCTT TGTT", + "GCAAAA TT", + "CC TGATT", + "GA TAAAAA", + "GA GTGTT", + "TCC TGTA", + "TACA GAAA", + "TC CAGGAA", + "GCCA GTG", + "TAGA TTTT", + "TAA TAGG", + "CTCC TCA", + "CATTTT TG", + "CATT TCAA", + "GCCA TCA", + "TAAAA TATA", + "GA CTGTT", + "GCA TGGA", + "CAAA GTT", + "CA TGATT", + "GA GTTTG", + "CTA GCAA", + "CTT CCTA", + "GG GGAGG", + "CTA TATG", + "TATT TATTTT", + "CA CCATT", + "CC CTCAA", + "TTTTTTTT TTTTTT", + "GA TCATT", + "GTA CATA", + "CTC CATA", + "CCCC GTCTCTA", + "GCC TGCC", + "CTA GCTT", + "CC CGGA", + "GATG TTTT", + "GTA TTTTA", + "TCA GATA", + "CCTG GAA", + "TATT CCA", + "GGA CCAA", + "GCCA TTA", + "CGA CTGA", + "TAA GCTG", + "TAAA CACA", + "GTT TCTC", + "CA TCTTA", + "GAAA TTTG", + "TAA TGGG", + "TAAAA TTTT", + "CTG TTCA", + "CCTG TTA", + "TA CTGAA", + "TGA CCCA", + "TGA TTTTA", + "CTCC TTA", + "TATA GAA", + "CTG CGG", + "GC GGTA", + "GTG CTAA", + "CAGA GGAA", + "TACA TCA", + "TCAA TCAA", + "CTG CAGCC", + "TGAA TATT", + "TCTA CAA", + "CCA CATA", + "CC CGTT", + "TATA CACA", + "TCC TCTC", + "TCTA CTT", + "CC GGAA", + "CTTTT TTA", + "GAAA GAAAA", + "CTA TCTT", + "GA CTTTG", + "TGAA CAA", + "GCA GTTTT", + "GC TAAAAA", + "GAGG CGG", + "TAA TAAAAA", + "CTG GTCA", + "CAGA CAA", + "GGA TATG", + "TGAA GG", + "GCCA GAA", + "CCA GGCC", + "CCA CCATG", + "CAAA CTT", + "TCA TGTA", + "GCTG CTT", + "GTAA TA", + "CCCC CAA", + "CA GCCTG", + "TCAA CTT", + "TAAAA TTAA", + "GCTG AAAA", + "CGA CGA", + "GTG GGCA", + "TGA GGGA", + "CGC TCC", + "TTTT GTTTT", + "GA GTCAA", + "TCA TGCA", + "CTG CTTA", + "TAA GTTTT", + "GTA GCAA", + "CCTT GG", + "TGA CAAAA", + "CTG GTAA", + "TCTT TATA", + "TGTG TGTT", + "CTG GTC", + "CTG GCAA", + "CATT TCTG", + "CTC TACC", + "CTGA GGA", + "CTAAAA TG", + "CTA GATT", + "GTA TCAA", + "CA GTCAA", + "CTG GGTG", + "CC TCTTA", + "TGA GTTTT", + "TTTTA TTTA", + "CC TTTTTT", + "TATA TACA", + "TA GCAAA", + "AAA TTA", + "CTG GATG", + "GA TAATA", + "GA CAAAAA", + "CCTG GGA", + "GCTT TCA", + "GTA CAGG", + "GCTG GAA", + "CTA CTCA", + "CAA TGTA", + "GC GTGAA", + "GA TCCTT", + "TATTAA TG", + "GCC CGA", + "TAAA GTG", + "GCTT CCA", + "CATG GAA", + "TGAA GTT", + "CTT TCTC", + "TCTGTG TG", + "GTA TGTA", + "CAA TACA", + "TCAA GG", + "CC TCTAA", + "TGTG GG", + "GA TCTGA", + "GTA CTGA", + "TTAA TTAA", + "GCA GAAAA", + "CTA CATA", + "CC GGTG", + "GGGG AAAA", + "TACAA AAAA", + "TTTT GG", + "GTGA GAA", + "TCAA TAAA", + "TCAA GTT", + "CTCA GGA", + "CTA CTC", + "CAAA TCA", + "GGCA GAA", + "CC CGAA", + "TGTT GTG", + "GAGC AAAA", + "TATT TGTG", + "GTA GGTT", + "CTA CCTG", + "CA CAAAAA", + "CTCA GG", + "GCTT TA", + "CAGA GCAA", + "CTCA GTG", + "GGAA GAGA", + "TAA CCTG", + "GAAA TATA", + "CGA GAA", + "GTGA GG", + "CATT TATA", + "GGCA GCA", + "TC TAAATT", + "CCCA GTG", + "GCC TAGG", + "TGCA TTA", + "CC GTAA", + "CATT CCA", + "CTA GTTA", + "GA CTTAA", + "CTA TACA", + "GACA CAA", + "TCTT CACA", + "CC GGTT", + "TAAA GTAA", + "CTG TGGA", + "TAA GGTG", + "TCCA GTA", + "CAAA TTTA", + "AAATT AAAA", + "CCA TCTA", + "CTCC CTT", + "CTCC TTTT", + "GAGAGAGA GAGA", + "GGA GATA", + "CCTA TTA", + "CACC AAAA", + "CC GTTA", + "TGTT TATA", + "CTCA GGAGG", + "GA CGTA", + "GTCC TTA", + "GAAA GTT", + "GCTG GTG", + "CTC TACA", + "CAA TAGA", + "TAAAA TATT", + "GTA CCTG", + "GTA CTAA", + "CTT TGAAA", + "CCTT TCC", + "TAAAAA TTA", + "CTC GG", + "CAA GATA", + "CATT TGA", + "CACC TCA", + "GCCA GCC", + "GTC GG", + "GCA CATA", + "CA CTCAA", + "CTTTT AAAA", + "CAGGAA TT", + "GCC TATT", + "TCTT TCTG", + "CTGAGGCA GGAGAA", + "CAGG CAGG", + "CTA GTAA", + "TCCA TA", + "GAA CTTA", + "C G", + "GCTG TGA", + "GAAAA TA", + "TCTT CATT", + "GAGG GAGA", + "CCCA TCC", + "GAGG TGGG", + "GCC TCTA", + "GTA GGTG", + "TAAA CCA", + "GAA GGAAA", + "TATT GG", + "A TG", + "TCCA GTT", + "CCCA CAA", + "GAAA CACA", + "GTC TCAAAA", + "CTTTT CTTTT", + "TGAA GGA", + "TATT GATT", + "CTA TGTA", + "AAAAAAAA AAAAAA", + "TCCTT AAA", + "GC GCTA", + "TCCA CTT", + "GA CTCAA", + "TAAA TACA", + "TCA TGGA", + "TCTG GGA", + "TCC TATG", + "CTG TGCA", + "TCAA GTGA", + "TCA TAAAA", + "CA TCCAA", + "CCTT CCA", + "CTG TACA", + "GAA GGTT", + "CTG TGTA", + "GTCA CTT", + "TCA CAAAA", + "TCA GGCA", + "GTGTT AAA", + "CC CTTAA", + "CAAA GTG", + "GAAA TGTT", + "CTG GGGA", + "GA CGCC", + "TATA TGTG", + "CTA GATG", + "GAAATT AAA", + "GAA TGCA", + "GCA CTAA", + "CGG GAGG", + "GCCA CAA", + "CGC TTA", + "TCCA CAA", + "CAGA TA", + "TC TGAATT", + "TATTA TTTT", + "GC GCGG", + "CTC TGAAA", + "TCTCTT TG", + "TATT TCTA", + "GGGG TGGG", + "GGA TGCA", + "CCA CACC", + "TAAA TGTG", + "TCTT CCTG", + "GCAA GG", + "CTG CTCC", + "CTG GAGTG", + "CTGTT AAA", + "CACA CAAA", + "CTGA CTT", + "GAAAA GAAAA", + "CCTT CTCC", + "GAAA TAAAA", + "CCTCA GGTGA", + "GA TAATG", + "GAATT GCTT", + "CCAAAA TT", + "CGTG AAA", + "CACTG AAA", + "CAGTG AAA", + "GA TCTTA", + "GAGA TGGG", + "TCTG CCA", + "TGA GGTA", + "TATG GAA", + "TATA TTTTA", + "TGAA CTT", + "GCA GATA", + "CTTTT CTT", + "GTAAAA TG", + "TCTC TAA", + "TCTG CAAA", + "GA GCCTT", + "TA TCATT", + "CAA TTTTA", + "CC GCCA", + "TATT TAAAA", + "GAGA GATG", + "GAGA TGGA", + "GCCA GGATG", + "CGA GTAGCTG", + "TTCA TTTT", + "TATA CTT", + "GTC TACA", + "GTGA GTGA", + "GCTA CACA", + "GGGA GGA", + "CAA GGCA", + "GC TTTTAA", + "CA CTATT", + "GTT CATA", + "TCC TC", + "GTG GACA", + "TATT TGGA", + "CTC CAGTA", + "GTT CAGTT", + "CCAA GG", + "CAGA GCC", + "CTC GCC", + "CC GATG", + "GGAA TTTT", + "TCCA GCC", + "CC TCTTTT", + "GAA CCTT", + "CATG CACA", + "GTT TC", + "GAA GATA", + "TA CCCC", + "GCTG CCA", + "GGGG GAGG", + "GCAGTGA GCTGA", + "CTG TCTA", + "CGA GGA", + "CAA TGGG", + "GC TGTGAA", + "GAAA GTG", + "TACC AAAA", + "GTCA GG", + "CAGC TCC", + "TGTG CTT", + "GTC TAGG", + "TTTT TGTA", + "TTA TATG", + "TCA GGGG", + "TATT GTTA", + "CC TGAGA", + "TA TCTCA", + "CAA TCTG", + "CA CTCTG", + "GATT TAA", + "TGAA TAA", + "TCTT GTA", + "TCAA CTG", + "TCTC CAGG", + "CTA GAGG", + "CTGA GAAA", + "CTA GCTG", + "TCCA CCA", + "CGA TTTT", + "CC GGCC", + "GTT GACA", + "CTTA GAA", + "CA TAATG", + "GA GTATT", + "CACA GAAA", + "GA CTGTG", + "CTA TTTTA", + "TGA GGAAA", + "TTATT AAAA", + "CTTA TTTA", + "CAGA CTT", + "CA CGCC", + "GCTT GG", + "CCTG CTT", + "TAAA GCAA", + "CCTC GTGA", + "TA GAATT", + "CTTA CAA", + "TAAA GGAA", + "GTC TAGA", + "GTGA CTT", + "TACA TATG", + "GTCA GGA", + "GCTC CAGG", + "GAA GGGA", + "CA TGATG", + "TCA TCAAA", + "CGTT AAA", + "GTA CTCA", + "CTCC CAA", + "TATA TGTA", + "GGTA TTTT", + "TAA GCCA", + "C GAAATT", + "GTTTG TTTT", + "TCTG TCTT", + "TATA TCA", + "TGTT CATT", + "CAAA CCA", + "TTCA TTA", + "TATT TGTA", + "GATT GAA", + "CTA TAAAA", + "GATTAA TT", + "CCCA CCA", + "TCC TAGG", + "TAAA TGTA", + "CTCTT AAA", + "GCA GTCC", + "GC GGCTG", + "GTC TCGAA", + "TGAA TGA", + "CTG GGGG", + "GTC TCGA", + "GAA CAAAA", + "TGAA TCA", + "TGTATTTT TAGTAGAGA", + "GTTA TTAA", + "TTTTTT AAAA", + "GTCA GTG", + "CCCA TTA", + "CACA GGA", + "TATT CCTT", + "TCTG CCTT", + "CCTG GTG", + "GC GAGC", + "TA CTAAA", + "TACA CAAA", + "CC GTCC", + "GCTT TGTT", + "GCA TCCA", + "CA TCTAA", + "GC TGTGTT", + "GTA GACA", + "GCC TATG", + "TCTT TGTG", + "GATT CTG", + "CGCC CGG", + "GA TGAGA", + "TA TCTGA", + "TGAA TTTG", + "CC TGATG", + "TAAAA CAA", + "CTT TAGG", + "TTTT CCTT", + "TGAA TAAA", + "CGG GGA", + "CAAA CATT", + "GTA TGGA", + "GCTT AAAA", + "TA CCAAA", + "CAAA GAGA", + "CTCC TGCC", + "GTAAAA AAA", + "CACA GCC", + "CCA TGCA", + "TA CAATT", + "CTA GTGA", + "CTGA GTT", + "GAGTG AAA", + "TCTGTT TG", + "CTG TAGG", + "TATAA AAAA", + "GCATT AAA", + "GTC CATA", + "TGTTAA AAA", + "TGTT TGA", + "GAA TAGA", + "CTT CAAAA", + "CTG GACA", + "CTG TAGA", + "CCATT AAA", + "CTA TCTG", + "CACTA TG", + "TTA TCAA", + "TAA GTAAA", + "TAATCCCAGCACTTTG GGAGGCC", + "CCA GAAAA", + "TGAA GCA", + "TCC CTTTT", + "TCA TACA", + "TA CGTT", + "GCC GTG", + "GGAA GTG", + "GG CCAAA", + "GTA CCAA", + "TCTCTA CTAAAAATA", + "CATT GTG", + "TGTG TGA", + "GAAA CAGA", + "CTT GACA", + "GA TGAGG", + "GAGA TTTT", + "CCTT CAA", + "GAA TCTA", + "CTC TCCTT", + "GG CGGA", + "TCTATCTA TCTATCTA", + "CACA CAGA", + "TGTG TGTA", + "CAAA GCC", + "TGTG CCA", + "GTT GAAAA", + "CTC CAGCA", + "TCAA GGA", + "TA GCTCA", + "CGC TGA", + "CCTG AAAA", + "GA CTATT", + "GATT CCA", + "GCTT CTA", + "GTC TGCC", + "CTT GGCA", + "TGTG GTA", + "GCTT TGA", + "GCTC TCTG", + "CTCA CAGA", + "TCTT TAAA", + "CAAA GCAA", + "TA CTTAA", + "GCTT CAA", + "CATT GAA", + "GGA GGAAA", + "CTA TAGA", + "CTGA GGAA", + "CCTG GCA", + "CC CTATT", + "CTC GTG", + "TTA CACA", + "TTA GGAA", + "CTG GTTA", + "GTT GTCC", + "TAATG AAAA", + "TATT TACA", + "GG GAATT", + "GTA GTTTT", + "GCTG CAA", + "CTA CGG", + "GCC GGA", + "CTG GGCA", + "CCTT AAAA", + "GATG GAA", + "TAGATAGA TAGATAGA", + "TATG TAA", + "GTA CGG", + "TATT CAAA", + "GA TCTCC", + "CCTG TTTT", + "TATT GCA", + "GGAAGGAA GGAAGGAA", + "GG TAATT", + "TTA CAGA", + "TCA GC", + "GCAAAA TG", + "GAGA GCA", + "GTA GAAAA", + "CATT TGAA", + "TCTT CTTTT", + "TCC CATA", + "GTTA TTTA", + "CTA TCTA", + "CA TCCTG", + "TCTT GTG", + "TTA TTATT", + "CC CGTC", + "TACTA TG", + "TAAA CATA", + "TAA GGAAA", + "GCTT GTG", + "CTC TAAAA", + "GTTTT AAAA", + "GACA GGA", + "TCC TAGA", + "TCCA CCCA", + "GTT TGAAA", + "CCA TCTCA", + "CTAA GAA", + "GTA TCTA", + "GTGA GGA", + "GCTG GAGG", + "CCTGTAA TCCCAGCTA", + "GCAA CAA", + "CTT TCAAA", + "CAAA TGTT", + "CTT GTCC", + "TCTCAA AAA", + "TATT TATTA", + "TAA GGCA", + "GAGA GGAA", + "TA TGATT", + "GCA TCTA", + "C GTTATT", + "GCC TGTA", + "GTT TCAAA", + "CCTTCCTT CCTTCCTT", + "GG CTTTG", + "GTCA GAA", + "CATG CATG", + "GTCA TTTA", + "CTG GAAAA", + "CTT CGA", + "CCTA TTTT", + "CCAA CAA", + "TCCA TCC", + "TAAA GTTA", + "GTC TCTC", + "TAA TCAAA", + "GATTTT TG", + "GATT TCTT", + "GG GCTGA", + "GCA TGTA", + "CCTG GGTT", + "GAGA CAA", + "GCTG TCA", + "TGA TAGG", + "GGA GACC", + "CC GGCA", + "TAA TCTCA", + "TGAA TTAA", + "TCTG GTG", + "GCC TC", + "GG CGCA", + "CCA GCTA", + "CA GTCTG", + "TGAA CTA", + "GTAA GAA", + "CCTT TCA", + "TCCA TGA", + "CAAA GGAA", + "CTC TC", + "CTC TCTCA", + "CTC CAGC", + "GTA GATA", + "CCCC CTCC", + "GG CGCC", + "TCTG TCC", + "GA CCATT", + "CTT GAAAA", + "TTA TCC", + "TACA TGTG", + "CAAA TTTG", + "TTTT GTG", + "CAGA GTG", + "GTAA TAA", + "GTGA GTG", + "TTTT TCC", + "GG CTCTG", + "GCC CTAA", + "GG CTGTT", + "CC CAATT", + "CAGA GCTT", + "TATAAA TG", + "GA GTCTG", + "TCTTAA AAA", + "GTTTTA TG", + "GA TCCAA", + "GGCC CTG", + "GA TCCTG", + "TCAA GTG", + "GATT CAA", + "CCTC TCTT", + "GAGA CGG", + "CAGA TCA", + "TAAAA GAA", + "CTGA GCAA", + "CCTG CCA", + "CCTT CTA", + "CGC TCA", + "GG CTGTG", + "TGGG AAAA", + "GGA GCCTG", + "CTGA GTG", + "CGTC AAA", + "TCAA GTA", + "CGTAA TT", + "TTA CTTA", + "TATA CTA", + "GG GCAAA", + "CAA CTTTT", + "CTT TGCC", + "GC CAGGAA", + "CACA CTA", + "GCC CAGC", + "TAAATAAA TAAATAAA", + "CTT TCCTT", + "GGGA GAA", + "TATG GTA", + "CGG CCA", + "CCTC TCTG", + "GAAA GCAA", + "CAA GCCA", + "GG CGTT", + "CTC TTTTA", + "TCGGCC TCCCAAA", + "GATT TATT", + "CAA GTCC", + "TA TCTTA", + "GTTCAA GACCA", + "CTCA CACA", + "GAAA TCAA", + "TGA GACC", + "GG GTAAA", + "GCTT GTT", + "GA TTTTAA", + "TTTT TATA", + "CAGA GCTG", + "TC TGTTAA", + "GTAA TTAA", + "TCTT TGAA", + "CTT GCCA", + "TTTT CATT", + "CCA TGTA", + "TCTC GGCTCACTGCAA", + "GGA TTCA", + "TC TATTAA", + "TACA TAAA", + "GATT GATT", + "GGA GAGGA", + "CGC AAAA", + "GGA CTAA", + "TTA TGTG", + "GTCA CTCA", + "GACA GCA", + "CGA GTT", + "GATG GTT", + "GGAA GAGG", + "GCCAA CATGGTGAAA", + "GGA GCCA", + "TGAA CTG", + "CCTC TGTG", + "GTA TAAAA", + "TCC CAGAA", + "CATT TATG", + "GA TTATG", + "TGTT TCTG", + "GAGTG GGTT", + "TACA TATT", + "CTC CAGGA", + "GACA CTG", + "GG TCTCA", + "CC GGGA", + "TGTT TAAA", + "CTCA CCA", + "GGA CTTA", + "GCC CACC", + "CAAA TCAA", + "GAAA TGTG", + "TA GTTAA", + "TCTA TAA", + "TTA GATT", + "GTG TAGG", + "TACTG AAA", + "GCA CCCA", + "GTG GGCTG", + "GAA TGAAA", + "TCTA GTT", + "TCA GGAGA", + "TCCA CTA", + "CTCA GTT", + "TACTT AAA", + "GA CTCCA", + "TCCATT TG", + "CACA GCAA", + "GCTCATG CCTG", + "GGTG CTG", + "GCTT TCTT", + "GTG GCCA", + "TA CGTG", + "GTG CAGTG", + "TGAA GTCA", + "CCTT TAA", + "TCTCAGCTCA CTGCAA", + "GAAA TATG", + "CC TCAAAA", + "GGGG CGG", + "CGA CAA", + "GG TGATG", + "GTCTT AAA", + "CAGAAA TG", + "CGTCA TT", + "CCAA GCA", + "GGA TCAA", + "GTGCTG GGATTA", + "GCTG GCC", + "CGGA GCTT", + "TACA TGA", + "TGTT TGAA", + "TCTC CATT", + "TAA GCAAA", + "CCTT TCTT", + "TA CTGTT", + "TCCA TCTT", + "CTTA CTT", + "CGGA GGTT", + "CAAAA CAA", + "TCA TAGG", + "TTA CTAA", + "CTTA TTTG", + "GAA TGTA", + "CCCCA TGGA", + "TTA CTGA", + "CGG AAAA", + "CTC CAGTG", + "TGTT CCA", + "CAGA TGAA", + "GTT GATA", + "TCC CCCC", + "CATT GCA", + "CTCA GCC", + "CTTA CTG", + "TA TCCTT", + "CTTTTA TG", + "TGAGTA GCTG", + "GACTG AAA", + "CAA TGAAA", + "CGA CTG", + "CTT GGGA", + "GCAA GCA", + "TCA CTCC", + "GATT TGA", + "CATTTT AAA", + "TCAA CTA", + "GTCC AAAA", + "CACC CTG", + "TTA CCTT", + "CAA GGGG", + "TTTT GGA", + "GTTA TTTG", + "GCTA CTG", + "CTGAGGCAGGA GAATG", + "GTGA TGA", + "GTA GTC", + "TAGTA TG", + "GTA TAGA", + "GTG TCTA", + "GCTG CTA", + "TTA GTAA", + "TAAA CATG", + "GTCA CCA", + "CA TCTTTT", + "CATA TAA", + "TCTC TCTA", + "TTTTA TTAA", + "TATT CTAA", + "GAAA TTTA", + "CTT CCCTG", + "TAAA GATG", + "TA CGTA", + "GTT TATTA", + "GAAAA GAA", + "CCCA CCCA", + "CAATT AAAA", + "CC GACA", + "CAAA GTGA", + "CAAA CAAAA", + "GCAA TTTT", + "CGATT AA", + "TTA GAGA", + "CTGA TGA", + "GGA GGAGG", + "GTCC TGGG", + "TCA TGAAA", + "GCAA CCA", + "GTT GGCA", + "GCGG CGG", + "GTCC CCA", + "GTA GGGG", + "GCCA TGTT", + "GTT CGAGA", + "GCC TATA", + "TAAA TTCA", + "GG CCATT", + "GAAAA CAA", + "TGTG TATG", + "GTA CTC", + "TAGG GAA", + "CCTT GAA", + "TC TATTTG", + "GAGG GCA", + "GAAA CTGA", + "TA CGC", + "TA CAAAAA", + "TCA TTATT", + "GGAAAA TT", + "TCAA TATT", + "CC CGTA", + "GGA GAGAA", + "TTA GTTA", + "CTCA GAGA", + "TC GAGC", + "CTA GTCA", + "GATG GCA", + "TGAA CATT", + "CTA TGGG", + "CACA CCA", + "TCAA TTAA", + "GGAA CTG", + "TTA CATG", + "CTT TCATT", + "CAGC TCTG", + "TCTTTT TTTT", + "TAAA TCTT", + "TGA TCTA", + "CATA CAA", + "GC TCAAAA", + "GC TGTGTG", + "TCAA TCA", + "GATT TGAA", + "CCAA GGA", + "GTCC TCA", + "GTG CTCC", + "AAAA TAA", + "GTGA CAA", + "GCTCA CGCCTG", + "CGA CGG", + "TA TCCAA", + "CACA CATG", + "TCTC TCTCC", + "TGTG GTT", + "CTT GGTA", + "TCTG GTT", + "TTTA TAA", + "CTG CTTTT", + "TGTG TCA", + "CACA TCA", + "CC TAATG", + "C GTTTTTT", + "GCTG GCA", + "GA CGTC", + "TATAA TTA", + "TACA GTAA", + "GAAA GTAA", + "GTC TGAAA", + "CCCA TTTT", + "TATA TGA", + "CTT GATA", + "CTT TATTTT", + "CTT TATTA", + "GG CGAA", + "CCA TGCC", + "CCTG CCTT", + "GAAGAA GAAGAA", + "CTGA CTGA", + "GCC CTTA", + "TA TCTAA", + "GTG TTTTA", + "TGTG GCA", + "TATT GTAA", + "GCCA GAAA", + "CCCTG TCTC", + "CACA GGAA", + "AAAA CAA", + "AAAAAAAA AAAAAAA", + "TAA CTCC", + "GCC TAAA", + "CGA GTA", + "TA GTATT", + "GTATTTT TAGTAGAGA", + "GCTG CAGG", + "TATT GAAA", + "CCAGCC TGGG", + "GCTCC AAA", + "TA CGAA", + "GGCC TCC", + "TATA CAAA", + "CATG GCA", + "CATG CAA", + "TACA CCA", + "CTT TACCA", + "TACA GAGA", + "TATT CTTA", + "TATG TCA", + "TCAA GCA", + "TCAA TGA", + "GG CTCTT", + "GGAA GTT", + "TCCA TGTT", + "GCTT TCC", + "TATG TGA", + "GTG TAGA", + "TTTT TAAAA", + "GCTG GAGA", + "GTGA GAGA", + "CCTA GAA", + "CCTCC AAA", + "CCAA TGA", + "CAGG GCA", + "CTA TGCA", + "CTT CACC", + "CTA CAAAA", + "CTCA CC", + "GAGTA TG", + "TA GAAAAA", + "CTTTT GAA", + "TAAA GAGA", + "CATG TCA", + "TCTTTT AAA", + "CACA GTGA", + "GA TCTAA", + "TAA GGTA", + "CATA GAA", + "CGC GCC", + "CAGC TTA", + "TATA GTT", + "CGG GCC", + "TATC CATT", + "TGTTTG TTTT", + "GCTG GCTG", + "TACA GGA", + "CTCC TTTG", + "CAA TCTA", + "CCCC CTG", + "TATA CTG", + "CTGA GCC", + "CGG TTA", + "TGAA GTG", + "GCTT CCTT", + "TTTTA TTTG", + "TA GTGAA", + "CTGA GGTG", + "TCTT CTC", + "GACA GAAA", + "CTGAA CTGAA", + "CCTG GGAA", + "TCC CCAAA", + "TATG TATT", + "GATT TCTG", + "CATT CAAA", + "CACA GTT", + "GCTT GAA", + "GTG GATCA", + "CTGA GTGA", + "TGAA TTTA", + "TCAA CAAA", + "GG TCATT", + "GTAA TTTA", + "GC GACTT", + "CTGA GAGA", + "GTG CCCA", + "CTA GGTT", + "TCC TGAAA", + "GTC CACC", + "TCA CAGAA", + "GC GAAAA", + "GTA TGGG", + "TGAA CAAA", + "TAAA CAAAA", + "CC GTTTT", + "TC TCAATT", + "TCCA GAAA", + "GTAA CAA", + "GCA TTTTA", + "TCTC CATG", + "TTA TAAAA", + "CAGG CAA", + "CTAAAA AAA", + "GTT GGGA", + "TAAA GATT", + "TGAA GAGA", + "CCCC TCA", + "TGTT TATG", + "TCTA CTG", + "CCAA TTTT", + "GGTG GTG", + "GGAA CAA", + "TGTG GGA", + "TCTG CTA", + "GAA CGA", + "GTAA GTA", + "GTT GCCA", + "AAAA TTTT", + "GC GCGA", + "GAAA GATG", + "GTC TCTCA", + "TCCA TCAA", + "GCA GCTA", + "CACA TTTG", + "CTGA CAA", + "TCCA CC", + "GC T", + "CCCA CTT", + "GCA GGTA", + "GAGG CCA", + "TAAA GTCA", + "CTG GATA", + "CGG CAA" + ] + } +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/tokenizer_config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..12cee777f1285b52e37dffd583040cdba7f5a0d3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/tokenizer_config.json @@ -0,0 +1,56 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "cache_dir": null, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "model_max_length": 512, + "pad_token": "[PAD]", + "padding_side": "right", + "sep_token": "[SEP]", + "tokenizer_class": "PreTrainedTokenizerFast", + "trust_remote_code": true, + "unk_token": "[UNK]", + "use_fast": true +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/trainer_state.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1d0d7009d5e6180f1510e56f65504cc593da3693 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/trainer_state.json @@ -0,0 +1,368 @@ +{ + "best_metric": 0.9556951153864713, + "best_model_checkpoint": "genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375", + "epoch": 7.0, + "eval_steps": 100, + "global_step": 4375, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.16, + "learning_rate": 9.999999999999999e-06, + "loss": 0.4352, + "step": 100 + }, + { + "epoch": 0.32, + "learning_rate": 1.9999999999999998e-05, + "loss": 0.1944, + "step": 200 + }, + { + "epoch": 0.48, + "learning_rate": 3e-05, + "loss": 0.1817, + "step": 300 + }, + { + "epoch": 0.64, + "learning_rate": 2.9361702127659574e-05, + "loss": 0.1653, + "step": 400 + }, + { + "epoch": 0.8, + "learning_rate": 2.872340425531915e-05, + "loss": 0.15, + "step": 500 + }, + { + "epoch": 0.96, + "learning_rate": 2.8085106382978723e-05, + "loss": 0.1423, + "step": 600 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9492, + "eval_f1": 0.9491973663914737, + "eval_loss": 0.14096176624298096, + "eval_matthews_correlation": 0.8987776409510696, + "eval_precision": 0.9494807442016145, + "eval_recall": 0.949296915548862, + "eval_runtime": 9.3786, + "eval_samples_per_second": 1066.258, + "eval_steps_per_second": 8.423, + "step": 625 + }, + { + "epoch": 1.12, + "learning_rate": 2.74468085106383e-05, + "loss": 0.1286, + "step": 700 + }, + { + "epoch": 1.28, + "learning_rate": 2.6808510638297873e-05, + "loss": 0.1227, + "step": 800 + }, + { + "epoch": 1.44, + "learning_rate": 2.617021276595745e-05, + "loss": 0.1128, + "step": 900 + }, + { + "epoch": 1.6, + "learning_rate": 2.5531914893617022e-05, + "loss": 0.1133, + "step": 1000 + }, + { + "epoch": 1.76, + "learning_rate": 2.4893617021276595e-05, + "loss": 0.1108, + "step": 1100 + }, + { + "epoch": 1.92, + "learning_rate": 2.4255319148936168e-05, + "loss": 0.1174, + "step": 1200 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9448, + "eval_f1": 0.9447548116457338, + "eval_loss": 0.14714133739471436, + "eval_matthews_correlation": 0.8919111732730618, + "eval_precision": 0.9468665229355923, + "eval_recall": 0.9450465072788566, + "eval_runtime": 9.4606, + "eval_samples_per_second": 1057.01, + "eval_steps_per_second": 8.35, + "step": 1250 + }, + { + "epoch": 2.08, + "learning_rate": 2.3617021276595744e-05, + "loss": 0.0896, + "step": 1300 + }, + { + "epoch": 2.24, + "learning_rate": 2.297872340425532e-05, + "loss": 0.0771, + "step": 1400 + }, + { + "epoch": 2.4, + "learning_rate": 2.2340425531914894e-05, + "loss": 0.0849, + "step": 1500 + }, + { + "epoch": 2.56, + "learning_rate": 2.170212765957447e-05, + "loss": 0.0786, + "step": 1600 + }, + { + "epoch": 2.72, + "learning_rate": 2.1063829787234043e-05, + "loss": 0.0872, + "step": 1700 + }, + { + "epoch": 2.88, + "learning_rate": 2.0425531914893616e-05, + "loss": 0.0827, + "step": 1800 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9533, + "eval_f1": 0.9532990543058497, + "eval_loss": 0.12726238369941711, + "eval_matthews_correlation": 0.9066100253951647, + "eval_precision": 0.9532948131792527, + "eval_recall": 0.9533152124454098, + "eval_runtime": 9.3211, + "eval_samples_per_second": 1072.834, + "eval_steps_per_second": 8.475, + "step": 1875 + }, + { + "epoch": 3.04, + "learning_rate": 1.978723404255319e-05, + "loss": 0.0769, + "step": 1900 + }, + { + "epoch": 3.2, + "learning_rate": 1.914893617021277e-05, + "loss": 0.0553, + "step": 2000 + }, + { + "epoch": 3.36, + "learning_rate": 1.8510638297872342e-05, + "loss": 0.0561, + "step": 2100 + }, + { + "epoch": 3.52, + "learning_rate": 1.7872340425531915e-05, + "loss": 0.0551, + "step": 2200 + }, + { + "epoch": 3.68, + "learning_rate": 1.723404255319149e-05, + "loss": 0.0555, + "step": 2300 + }, + { + "epoch": 3.84, + "learning_rate": 1.6595744680851064e-05, + "loss": 0.0574, + "step": 2400 + }, + { + "epoch": 4.0, + "learning_rate": 1.5957446808510637e-05, + "loss": 0.0562, + "step": 2500 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9532, + "eval_f1": 0.9531823797204316, + "eval_loss": 0.16251493990421295, + "eval_matthews_correlation": 0.9066581426052277, + "eval_precision": 0.9535473452173776, + "eval_recall": 0.9531109024342193, + "eval_runtime": 9.3211, + "eval_samples_per_second": 1072.831, + "eval_steps_per_second": 8.475, + "step": 2500 + }, + { + "epoch": 4.16, + "learning_rate": 1.531914893617021e-05, + "loss": 0.0343, + "step": 2600 + }, + { + "epoch": 4.32, + "learning_rate": 1.4680851063829787e-05, + "loss": 0.0289, + "step": 2700 + }, + { + "epoch": 4.48, + "learning_rate": 1.4042553191489362e-05, + "loss": 0.0365, + "step": 2800 + }, + { + "epoch": 4.64, + "learning_rate": 1.3404255319148936e-05, + "loss": 0.0336, + "step": 2900 + }, + { + "epoch": 4.8, + "learning_rate": 1.2765957446808511e-05, + "loss": 0.0362, + "step": 3000 + }, + { + "epoch": 4.96, + "learning_rate": 1.2127659574468084e-05, + "loss": 0.0355, + "step": 3100 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9553, + "eval_f1": 0.9552997206232539, + "eval_loss": 0.17249269783496857, + "eval_matthews_correlation": 0.9106381321410131, + "eval_precision": 0.9553088212352849, + "eval_recall": 0.9553293111362456, + "eval_runtime": 9.36, + "eval_samples_per_second": 1068.377, + "eval_steps_per_second": 8.44, + "step": 3125 + }, + { + "epoch": 5.12, + "learning_rate": 1.148936170212766e-05, + "loss": 0.0224, + "step": 3200 + }, + { + "epoch": 5.28, + "learning_rate": 1.0851063829787235e-05, + "loss": 0.0207, + "step": 3300 + }, + { + "epoch": 5.44, + "learning_rate": 1.0212765957446808e-05, + "loss": 0.0194, + "step": 3400 + }, + { + "epoch": 5.6, + "learning_rate": 9.574468085106385e-06, + "loss": 0.0188, + "step": 3500 + }, + { + "epoch": 5.76, + "learning_rate": 8.936170212765958e-06, + "loss": 0.0197, + "step": 3600 + }, + { + "epoch": 5.92, + "learning_rate": 8.297872340425532e-06, + "loss": 0.0183, + "step": 3700 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9552, + "eval_f1": 0.9551867424458852, + "eval_loss": 0.24090713262557983, + "eval_matthews_correlation": 0.91057237575741, + "eval_precision": 0.9554460306619799, + "eval_recall": 0.9551264011936584, + "eval_runtime": 9.3493, + "eval_samples_per_second": 1069.598, + "eval_steps_per_second": 8.45, + "step": 3750 + }, + { + "epoch": 6.08, + "learning_rate": 7.659574468085105e-06, + "loss": 0.0129, + "step": 3800 + }, + { + "epoch": 6.24, + "learning_rate": 7.021276595744681e-06, + "loss": 0.0136, + "step": 3900 + }, + { + "epoch": 6.4, + "learning_rate": 6.3829787234042555e-06, + "loss": 0.0076, + "step": 4000 + }, + { + "epoch": 6.56, + "learning_rate": 5.74468085106383e-06, + "loss": 0.0079, + "step": 4100 + }, + { + "epoch": 6.72, + "learning_rate": 5.106382978723404e-06, + "loss": 0.0105, + "step": 4200 + }, + { + "epoch": 6.88, + "learning_rate": 4.468085106382979e-06, + "loss": 0.0083, + "step": 4300 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9557, + "eval_f1": 0.9556951153864713, + "eval_loss": 0.2684195339679718, + "eval_matthews_correlation": 0.9114136506343465, + "eval_precision": 0.9557403251037204, + "eval_recall": 0.9556733279930717, + "eval_runtime": 9.3532, + "eval_samples_per_second": 1069.157, + "eval_steps_per_second": 8.446, + "step": 4375 + } + ], + "logging_steps": 100, + "max_steps": 5000, + "num_train_epochs": 8, + "save_steps": 100, + "total_flos": 5.81310987936e+16, + "trial_name": null, + "trial_params": null +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/training_args.bin b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..70456fba1d334b1e38983c0f1ea39b6306fe37c9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20a373dc3f643d266ad74c7a0b0d13cb127512911e4b86a88d2e216be1b358c +size 5393 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/results/base5120_demo_human_or_worm_lr3e-5_wd0.03_wr0.06_ep8_seed42/eval_results.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/results/base5120_demo_human_or_worm_lr3e-5_wd0.03_wr0.06_ep8_seed42/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8d86c388dee9d2887549a5ad79216695e90c50f0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/results/base5120_demo_human_or_worm_lr3e-5_wd0.03_wr0.06_ep8_seed42/eval_results.json @@ -0,0 +1 @@ +{"eval_loss": 0.24703867733478546, "eval_accuracy": 0.958, "eval_f1": 0.957999621996598, "eval_matthews_correlation": 0.9160011600280884, "eval_precision": 0.957997831991328, "eval_recall": 0.958003328053249, "eval_runtime": 9.2577, "eval_samples_per_second": 1080.186, "eval_steps_per_second": 8.533, "epoch": 8.0} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/config.json new file mode 100644 index 0000000000000000000000000000000000000000..45e4c6c10a6211acf374c78e8078ab7ac74985f9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.35.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 4096 +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/model.safetensors b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..794aeb33f746ba654de569428cb7b8156f0f4b71 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feeef0c39907da371ecf27c5b36c6e2e02993d3f49aa4c5cc63ef947ea3c0e8c +size 356777880 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/optimizer.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..76d564184b7f7ab5fa0e0c36d5e41b6921a99cfb --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a67dec67c94f26952fbfce73585c99612150584c9e86209d83fc2ef1dab902c6 +size 713677451 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/rng_state.pth b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b3f7cae39bfe3a5b3eb4fb9f6c8c0189fe12091 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4098e4ab6e0c90fa8c2284ad8ab481b5d170da601ea7faab38e80d96bbe954b6 +size 14645 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/scheduler.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..33d561634093be6e22d8835f96e1dd7cffcceb5e --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:675091024a487fbecbd7ff59a446faf287a463a27c48ae3ddc55b3312f4aa745 +size 1465 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/special_tokens_map.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8b3208c2884c4efb86e49300fdd3dc877220cdf --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "unk_token": "[UNK]" +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/tokenizer.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8a569df5e832e1e62816e174612061cfbf0790d0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/tokenizer.json @@ -0,0 +1,8340 @@ +{ + "version": "1.0", + "truncation": { + "direction": "Right", + "max_length": 512, + "strategy": "LongestFirst", + "stride": 0 + }, + "padding": { + "strategy": "BatchLongest", + "direction": "Right", + "pad_to_multiple_of": null, + "pad_id": 3, + "pad_type_id": 0, + "pad_token": "[PAD]" + }, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 1 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 2 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "vocab": { + "[UNK]": 0, + "[CLS]": 1, + "[SEP]": 2, + "[PAD]": 3, + "[MASK]": 4, + "A": 5, + "C": 6, + "G": 7, + "T": 8, + "AA": 9, + "TT": 10, + "TG": 11, + "CA": 12, + "CC": 13, + "TA": 14, + "GG": 15, + "TC": 16, + "GA": 17, + "AAA": 18, + "GC": 19, + "TAA": 20, + "TTTT": 21, + "TCA": 22, + "TGA": 23, + "TTA": 24, + "GAA": 25, + "TCC": 26, + "CAA": 27, + "CTG": 28, + "CTT": 29, + "GTG": 30, + "GTT": 31, + "GCA": 32, + "GGA": 33, + "CCA": 34, + "GTA": 35, + "GCC": 36, + "CTA": 37, + "TAAA": 38, + "AAAA": 39, + "CTC": 40, + "GTC": 41, + "TGTG": 42, + "TATT": 43, + "CACA": 44, + "GAAA": 45, + "TATA": 46, + "TCTT": 47, + "TGTT": 48, + "CAAA": 49, + "GAGA": 50, + "CATT": 51, + "TGAA": 52, + "CAGG": 53, + "TCTG": 54, + "CAGA": 55, + "TCAA": 56, + "GGAA": 57, + "TAAAA": 58, + "CTGA": 59, + "GCTT": 60, + "GTGA": 61, + "GCTG": 62, + "CTCA": 63, + "CCTT": 64, + "CATG": 65, + "GCAA": 66, + "GTCA": 67, + "GTAA": 68, + "TTTTA": 69, + "TATG": 70, + "GAGG": 71, + "CGG": 72, + "GATT": 73, + "CCTG": 74, + "TCTC": 75, + "CCAA": 76, + "GTTA": 77, + "CTCC": 78, + "CTAA": 79, + "TACA": 80, + "CTTA": 81, + "TCCA": 82, + "GATG": 83, + "TTAA": 84, + "GAAAA": 85, + "TTTG": 86, + "GTTTT": 87, + "TCTA": 88, + "GCCA": 89, + "GTCC": 90, + "CTTTT": 91, + "GGGG": 92, + "CGA": 93, + "TTTA": 94, + "CCCA": 95, + "CAAAA": 96, + "TGGG": 97, + "TAGA": 98, + "TAGG": 99, + "GACA": 100, + "GGTT": 101, + "CCCC": 102, + "GGTG": 103, + "CATA": 104, + "GCTA": 105, + "TGTA": 106, + "TCAAA": 107, + "TGGA": 108, + "TAATT": 109, + "TTATT": 110, + "TGCA": 111, + "GGCA": 112, + "GATA": 113, + "CCTA": 114, + "TTCA": 115, + "TCTCA": 116, + "GGGA": 117, + "CGC": 118, + "CTGAA": 119, + "GTAAA": 120, + "TCTCC": 121, + "TTTTTT": 122, + "CGTG": 123, + "GCAAA": 124, + "TAAAAA": 125, + "TCTGA": 126, + "TCATT": 127, + "GGAAA": 128, + "TGAAA": 129, + "TCCTT": 130, + "CCAAA": 131, + "GAATT": 132, + "CTAAA": 133, + "CGTT": 134, + "GTGAA": 135, + "GGCC": 136, + "TAATA": 137, + "GGTA": 138, + "TGCC": 139, + "CACC": 140, + "TGATT": 141, + "AAAAAA": 142, + "GCTCA": 143, + "TCCAA": 144, + "GAGAA": 145, + "CTGTT": 146, + "TATTA": 147, + "CAGCA": 148, + "CTCTT": 149, + "CTTAA": 150, + "CAGAA": 151, + "GCTGA": 152, + "GTTAA": 153, + "TCTTA": 154, + "TATTTT": 155, + "GCCAA": 156, + "CTTTG": 157, + "GACC": 158, + "CGCA": 159, + "GTATT": 160, + "GTCTT": 161, + "CAATT": 162, + "GTGTT": 163, + "CTCAA": 164, + "GGAGG": 165, + "CGAA": 166, + "TCTTTT": 167, + "GTCAA": 168, + "CGCC": 169, + "TATAA": 170, + "TACC": 171, + "TCTAA": 172, + "CCATT": 173, + "CGGA": 174, + "CAAAAA": 175, + "CAGTG": 176, + "TCCTG": 177, + "CTCTG": 178, + "GAAAAA": 179, + "CTGTG": 180, + "CAGC": 181, + "TTTTAA": 182, + "GCATT": 183, + "GCCTT": 184, + "TAATG": 185, + "CTATT": 186, + "GTTTG": 187, + "TGATG": 188, + "GGCTG": 189, + "CCTCA": 190, + "GAGGA": 191, + "GCCTG": 192, + "AAATT": 193, + "CGTA": 194, + "TCAAAA": 195, + "TACAA": 196, + "CATCA": 197, + "CAGTT": 198, + "TGAGA": 199, + "GGGAA": 200, + "CACTG": 201, + "CACAA": 202, + "CAGGA": 203, + "CCCCA": 204, + "CCCTG": 205, + "TTTTTTTT": 206, + "TAGAA": 207, + "GAGCA": 208, + "CCTCC": 209, + "CACCA": 210, + "TATCA": 211, + "GAGC": 212, + "CATTA": 213, + "CACACACA": 214, + "GAGTG": 215, + "GGATT": 216, + "TGTGTGTG": 217, + "TACTT": 218, + "CACTT": 219, + "GTCTG": 220, + "TGAGG": 221, + "GAGTT": 222, + "GAATG": 223, + "TCATG": 224, + "GACAA": 225, + "GACTT": 226, + "TATTAA": 227, + "TAATAA": 228, + "GGCCA": 229, + "CATTTT": 230, + "CAGCC": 231, + "CCCTT": 232, + "GCTAA": 233, + "TATATATA": 234, + "GTGTG": 235, + "TACTG": 236, + "TAGTT": 237, + "CAATG": 238, + "GCTC": 239, + "CAGTA": 240, + "GCTCC": 241, + "CATAA": 242, + "TTATG": 243, + "TAAATT": 244, + "GATGA": 245, + "CATGA": 246, + "GCGG": 247, + "AAAAAAAA": 248, + "CCATG": 249, + "GATAA": 250, + "GACTG": 251, + "TATGA": 252, + "GCAGG": 253, + "GATCA": 254, + "GTTTTA": 255, + "GGATG": 256, + "CCTGA": 257, + "GTAAAA": 258, + "GAAGG": 259, + "GATTA": 260, + "CCTC": 261, + "GACCA": 262, + "GCTTA": 263, + "CCCAA": 264, + "AAATG": 265, + "GCATG": 266, + "TAGTA": 267, + "TACCA": 268, + "GGCTT": 269, + "CGTC": 270, + "TCTCTT": 271, + "GGTCA": 272, + "TTATTA": 273, + "TACTA": 274, + "TAGCA": 275, + "TATC": 276, + "CTGGG": 277, + "CATC": 278, + "CTTTTA": 279, + "CTAAAA": 280, + "GTGGG": 281, + "GAGTA": 282, + "CCAGG": 283, + "GATTTT": 284, + "TAGTG": 285, + "GAAATT": 286, + "CACTA": 287, + "TCGG": 288, + "TCAGG": 289, + "CAGGAA": 290, + "GCAAAA": 291, + "CCTTA": 292, + "CATCC": 293, + "CTTGG": 294, + "TGTGAA": 295, + "TATTTG": 296, + "CCTAA": 297, + "CTATG": 298, + "GAGAAA": 299, + "GAGAGAGA": 300, + "GCTTTT": 301, + "TATAAA": 302, + "CAAGG": 303, + "TCTCTG": 304, + "TGTTAA": 305, + "TGTGTT": 306, + "GAGCC": 307, + "GACTA": 308, + "TATATT": 309, + "TAAAAAA": 310, + "TTTTTG": 311, + "GTATG": 312, + "CATTAA": 313, + "TAGGA": 314, + "TAGC": 315, + "GTTGG": 316, + "GAAGAA": 317, + "TAAATG": 318, + "TCTGTT": 319, + "CAGAAA": 320, + "CAAATT": 321, + "TAATTA": 322, + "TCTGTG": 323, + "TATCC": 324, + "TGAATT": 325, + "CTCCA": 326, + "GTGAAA": 327, + "GGCAA": 328, + "GGAGA": 329, + "GAAGA": 330, + "GGTGA": 331, + "GGGCA": 332, + "CCAAAA": 333, + "TCTCTCTC": 334, + "CTGCA": 335, + "CTTCTT": 336, + "TCTTAA": 337, + "CCCTA": 338, + "TGTGTG": 339, + "AAATA": 340, + "TGTTTG": 341, + "GGGTT": 342, + "GTGCTG": 343, + "GGAAAA": 344, + "GGGGA": 345, + "TCAGA": 346, + "CCTTTT": 347, + "GAAATG": 348, + "GCAGCA": 349, + "TCTGAA": 350, + "GGGTG": 351, + "CACATT": 352, + "TCTTTG": 353, + "GGGC": 354, + "TCCCA": 355, + "TCCATT": 356, + "CTGAAA": 357, + "CTTTA": 358, + "TCGA": 359, + "GTTTA": 360, + "CAACAA": 361, + "CTTCC": 362, + "GCCTCC": 363, + "TTAAA": 364, + "GCTCTG": 365, + "GTTTCA": 366, + "GGAGGA": 367, + "CGTGA": 368, + "CAGTC": 369, + "GAATA": 370, + "CAGAGA": 371, + "CCCTC": 372, + "CAAATG": 373, + "CTGCTG": 374, + "GATCC": 375, + "TTTTATT": 376, + "AAAATT": 377, + "TTATA": 378, + "TCAATT": 379, + "GGTAA": 380, + "GTTATT": 381, + "GCCAGG": 382, + "GGAGAA": 383, + "CATTTG": 384, + "TCACC": 385, + "CTCAAA": 386, + "GGTTA": 387, + "TCCAAA": 388, + "TCTATT": 389, + "GCAGA": 390, + "CTTCA": 391, + "TCATCA": 392, + "CGAGG": 393, + "TAACA": 394, + "GTTGTT": 395, + "CTTATT": 396, + "CGTCA": 397, + "TAAGA": 398, + "TAATTTT": 399, + "CTGTA": 400, + "TCCACA": 401, + "GCTGTG": 402, + "CGCTG": 403, + "TCTAAA": 404, + "GCGA": 405, + "CAATA": 406, + "CCACCA": 407, + "GAACA": 408, + "CGAAA": 409, + "CAGATT": 410, + "TCACA": 411, + "TTATTTT": 412, + "TCTCAA": 413, + "TGACA": 414, + "CTCCAA": 415, + "AAAAAAA": 416, + "TATATG": 417, + "TCCTCC": 418, + "TCACTT": 419, + "TCCAGG": 420, + "CAAGA": 421, + "GGCTA": 422, + "GTGGTG": 423, + "CGTAA": 424, + "CGAGA": 425, + "TGATA": 426, + "GGATTA": 427, + "CAACA": 428, + "CGATT": 429, + "TGAGAA": 430, + "CTCCTT": 431, + "CTCATT": 432, + "GTTAAA": 433, + "TCATA": 434, + "CCTCTG": 435, + "CTCTA": 436, + "GCTGAA": 437, + "CTGGA": 438, + "TAAGG": 439, + "CTTAAA": 440, + "TATTTA": 441, + "CCACA": 442, + "CCGG": 443, + "GTCAAA": 444, + "TGGAA": 445, + "CGGAA": 446, + "TGATGA": 447, + "GTTCA": 448, + "TAACAA": 449, + "GCTGTT": 450, + "TAAGAA": 451, + "CTGCC": 452, + "TTAATT": 453, + "CCAGA": 454, + "TCAGAA": 455, + "GTCATT": 456, + "CGCTT": 457, + "GATTAA": 458, + "CTGATT": 459, + "GCCACA": 460, + "GTAATT": 461, + "TCCAGA": 462, + "GCCAAA": 463, + "GTGATT": 464, + "TAAAATT": 465, + "CAAGAA": 466, + "CCACC": 467, + "TAATCC": 468, + "GTTCTT": 469, + "TCCATG": 470, + "GCTCTT": 471, + "TGCTG": 472, + "GGGTA": 473, + "TTACA": 474, + "GCCATT": 475, + "GCACA": 476, + "GCAATT": 477, + "TCCCTG": 478, + "TGTGA": 479, + "TCGAA": 480, + "GGACA": 481, + "GGAATT": 482, + "GTGGA": 483, + "CTTCTG": 484, + "TCCCC": 485, + "GCCCC": 486, + "CTTGA": 487, + "TAATGA": 488, + "TAAATA": 489, + "TATATA": 490, + "CTGCAA": 491, + "TCATTA": 492, + "GTATA": 493, + "TCCCCA": 494, + "CGTTA": 495, + "GCAGAA": 496, + "TGAGTT": 497, + "CTTTTTT": 498, + "CGATG": 499, + "CTTTCA": 500, + "AAAATG": 501, + "CAGGTT": 502, + "CTAATT": 503, + "CGCCA": 504, + "TGAAAAA": 505, + "GTTCC": 506, + "GTCCTT": 507, + "GTCCAA": 508, + "GTTTTTT": 509, + "CTCTGA": 510, + "GCGC": 511, + "GTTGA": 512, + "TGAATG": 513, + "CTATA": 514, + "GCAGTG": 515, + "CCTTAA": 516, + "TCACCA": 517, + "TCACTG": 518, + "GCCCTG": 519, + "TAACTT": 520, + "CAGATG": 521, + "GTAGG": 522, + "TCTATA": 523, + "GAGATT": 524, + "GTCTA": 525, + "TTTTAAA": 526, + "CACATG": 527, + "TGACC": 528, + "CACAAA": 529, + "GTGTA": 530, + "GGGAGG": 531, + "GCTTTG": 532, + "CAAAAAA": 533, + "GAGGAA": 534, + "GTTCTG": 535, + "TTTTTA": 536, + "GTCTCA": 537, + "GTTCAA": 538, + "TCGTG": 539, + "GCTTAA": 540, + "GCACC": 541, + "CTCCTG": 542, + "TAAATAAA": 543, + "CTACA": 544, + "CTTCCA": 545, + "TCCTCA": 546, + "CGCAA": 547, + "GAAAAAA": 548, + "GCCCA": 549, + "TCGTT": 550, + "GTAGA": 551, + "CTCTCA": 552, + "GTCCA": 553, + "TGACTT": 554, + "TCCCTT": 555, + "GCCATG": 556, + "CACACACACACACACA": 557, + "GTGATG": 558, + "CCTCTT": 559, + "GCCAGA": 560, + "TCCTA": 561, + "CGTTTT": 562, + "GTACA": 563, + "GCATA": 564, + "GAATTA": 565, + "TGTGTGTGTGTGTGTG": 566, + "CCCAGG": 567, + "GGTTTT": 568, + "TCAAAAA": 569, + "TCTATG": 570, + "CCATA": 571, + "TGACAA": 572, + "GGATA": 573, + "TCAGTG": 574, + "GTATTTT": 575, + "GAGATG": 576, + "GCGTG": 577, + "CGTCC": 578, + "TTAAAAA": 579, + "TAATCA": 580, + "CAATTA": 581, + "CCACTG": 582, + "CGGTT": 583, + "GTTGAA": 584, + "TGATTA": 585, + "CCTTTG": 586, + "CGGTG": 587, + "CAGGTG": 588, + "TCAATG": 589, + "CTGATG": 590, + "TCAGGA": 591, + "GTTTAA": 592, + "TATTAAA": 593, + "CTCTTA": 594, + "GCAGGA": 595, + "CTCTCC": 596, + "GAACC": 597, + "CTTTAA": 598, + "GGGCC": 599, + "GTATTA": 600, + "GCGCC": 601, + "CCAATT": 602, + "GCTAAA": 603, + "TGACTG": 604, + "GATTTG": 605, + "GATAAA": 606, + "TCAGCA": 607, + "GTTCCA": 608, + "GAAATA": 609, + "GACAAA": 610, + "GAGTC": 611, + "GCTATT": 612, + "TCACAA": 613, + "GAGGTT": 614, + "TAACC": 615, + "GAAGGA": 616, + "GCTCAA": 617, + "GAAAATT": 618, + "CCAGCA": 619, + "GTTTTAA": 620, + "GTGCC": 621, + "TGAGGA": 622, + "CATAAA": 623, + "GGTCC": 624, + "TCATTTT": 625, + "TATTTATT": 626, + "TAATAAA": 627, + "GCCTA": 628, + "CTTTTAA": 629, + "TAAGTG": 630, + "TAAGTA": 631, + "CTGGAA": 632, + "CACACA": 633, + "GACAGA": 634, + "CAACC": 635, + "GGGAAA": 636, + "CCAGAA": 637, + "TCAGTT": 638, + "TAACTA": 639, + "CTAAAAA": 640, + "TGGGTT": 641, + "TGAGTG": 642, + "TAAAATG": 643, + "TATATATATATATATA": 644, + "GCACTG": 645, + "GACTC": 646, + "TACAAA": 647, + "TAAAAAAA": 648, + "TCTACA": 649, + "GTTGTG": 650, + "TCGCC": 651, + "CCCAAA": 652, + "GTCATG": 653, + "CTGCTT": 654, + "GGAATG": 655, + "CTATTA": 656, + "GATATT": 657, + "TAGAAA": 658, + "GGCAGG": 659, + "GATGAA": 660, + "GTAGAA": 661, + "TCCTGA": 662, + "TAACTG": 663, + "GCTGGG": 664, + "GCAATG": 665, + "GCCCCA": 666, + "GTTTGA": 667, + "CATTTA": 668, + "GTGCA": 669, + "CTTGAA": 670, + "GTGGAA": 671, + "CTTCAA": 672, + "TAAATTA": 673, + "GTGGCA": 674, + "TCCTTA": 675, + "GGAAAAA": 676, + "TTTTTTA": 677, + "CCTGTG": 678, + "GTAATG": 679, + "GTGTTA": 680, + "CTAGG": 681, + "CAGGCTG": 682, + "GACACA": 683, + "GAAAAAAA": 684, + "TCGC": 685, + "GTAAAAA": 686, + "TGTTTA": 687, + "TCTCTA": 688, + "GTCCTG": 689, + "CCAGGA": 690, + "GAACAA": 691, + "TAAGTT": 692, + "TGAGCA": 693, + "GCTCCA": 694, + "TAAGCA": 695, + "CTCATG": 696, + "GTCTTA": 697, + "CCCACA": 698, + "CATATT": 699, + "GCCTCA": 700, + "CACTC": 701, + "CTTCTA": 702, + "TGATTTT": 703, + "TCGCA": 704, + "CCTGTT": 705, + "GAAGCA": 706, + "GCAAAAA": 707, + "GCGGA": 708, + "CCACAA": 709, + "GCGCA": 710, + "CATATA": 711, + "GACATT": 712, + "GTTCTA": 713, + "CAAAATT": 714, + "GAAAGAAA": 715, + "CCCGG": 716, + "TACACA": 717, + "CCAAAAA": 718, + "GAGGTG": 719, + "GGCTCA": 720, + "CAGTGA": 721, + "TCCCAA": 722, + "TATCTT": 723, + "TGAGTA": 724, + "TCGTA": 725, + "TTTTCTT": 726, + "GTGGGA": 727, + "GAGCTG": 728, + "CCCTCC": 729, + "TAGGTT": 730, + "TTAGG": 731, + "TAATATT": 732, + "CCAGCC": 733, + "CATCTT": 734, + "GTCTGA": 735, + "GTTTCC": 736, + "CCTGAA": 737, + "GGAGCA": 738, + "GAAAATG": 739, + "TCAGTA": 740, + "TAACCA": 741, + "GATGTT": 742, + "CTGTTA": 743, + "CATGTT": 744, + "GGCGG": 745, + "CATGTG": 746, + "GGGAGA": 747, + "CTTTGA": 748, + "TCTTTCTT": 749, + "AAAAAAAAA": 750, + "GGGGTG": 751, + "CTTTCC": 752, + "CTTGTT": 753, + "GCATTA": 754, + "CCCAGA": 755, + "CAAATA": 756, + "TCGGA": 757, + "CAGCTT": 758, + "TCACTA": 759, + "TAATTAA": 760, + "TAAGGA": 761, + "GAACTG": 762, + "GCACAA": 763, + "GCGTT": 764, + "GGCTC": 765, + "TCTTTTA": 766, + "CCTCCA": 767, + "GGCAAA": 768, + "CAGCTG": 769, + "CTACAA": 770, + "TACATT": 771, + "GCTATG": 772, + "CTTGTG": 773, + "GAGTCA": 774, + "GTTATG": 775, + "CTGCCA": 776, + "GTCTCC": 777, + "TGACCA": 778, + "CACCTG": 779, + "TATATTA": 780, + "TGATCA": 781, + "CAGCAA": 782, + "GATGTG": 783, + "GTCTTTT": 784, + "CTAGAA": 785, + "GCTACA": 786, + "CTGGGA": 787, + "GGGGTT": 788, + "CAAGTA": 789, + "CAAGGA": 790, + "CCCTCA": 791, + "TAGCC": 792, + "GTTGGA": 793, + "GCTATA": 794, + "TCTGAAA": 795, + "TATGTT": 796, + "CCCCTT": 797, + "GTTGTA": 798, + "CCCTGA": 799, + "TGACTA": 800, + "CAAGCA": 801, + "CAATAA": 802, + "GAACTT": 803, + "CATGAA": 804, + "CTTATG": 805, + "CTAATG": 806, + "TCTAAAA": 807, + "CCAATG": 808, + "GAAGTG": 809, + "CCTCAA": 810, + "CCCATT": 811, + "CAGTCA": 812, + "GAGAGAGAGAGAGAGA": 813, + "TATGTG": 814, + "GCAGTGA": 815, + "TCTCCTT": 816, + "TCCCAAA": 817, + "CCATTA": 818, + "CCAGTG": 819, + "GCATCA": 820, + "TCAAATT": 821, + "GATCTT": 822, + "GACAGG": 823, + "GGAGTG": 824, + "GTAGTA": 825, + "CAACTT": 826, + "GAAGTT": 827, + "CCCCTG": 828, + "TCTCAAA": 829, + "GGGTC": 830, + "GAGCTT": 831, + "TATGAAA": 832, + "TATGAA": 833, + "GACATG": 834, + "CAAGTG": 835, + "GATATA": 836, + "CATCTG": 837, + "CTGTGA": 838, + "TAATTTA": 839, + "GGCAGA": 840, + "GCGAA": 841, + "CCTAAA": 842, + "CCATCA": 843, + "CACTGA": 844, + "GGACTA": 845, + "GACGG": 846, + "CTCTTTT": 847, + "CTGTCA": 848, + "TCTCTCTCTCTCTCTC": 849, + "TTAATG": 850, + "GCAGCC": 851, + "CAAAAAAA": 852, + "GCACCA": 853, + "CTATTTT": 854, + "GAGCAA": 855, + "CTTGGA": 856, + "CTGGTG": 857, + "GAATAA": 858, + "TCCTTTT": 859, + "GAAGTA": 860, + "CAGTAA": 861, + "CAACCA": 862, + "CTGTAA": 863, + "TGATAA": 864, + "GCAGTT": 865, + "CACGG": 866, + "TAAATAA": 867, + "CTGTTTT": 868, + "CTACTA": 869, + "GCTCTA": 870, + "CGAAAA": 871, + "CAAGTT": 872, + "CTTGTA": 873, + "GAATGA": 874, + "GAGTGA": 875, + "GCCTGA": 876, + "GGTTTG": 877, + "CCCATG": 878, + "GGGGAA": 879, + "GAAGAAA": 880, + "TGTTA": 881, + "CAATTTT": 882, + "TATATTTT": 883, + "CTCAAAA": 884, + "GGTGGG": 885, + "CCGTG": 886, + "TATTTCA": 887, + "CCCCAA": 888, + "TATTTAA": 889, + "GGCTGA": 890, + "GGTGTG": 891, + "CATCAA": 892, + "CACTCA": 893, + "TCTCATT": 894, + "GAATTTT": 895, + "GAATCA": 896, + "CAGGAAA": 897, + "CATACA": 898, + "TATTTTA": 899, + "TTATAA": 900, + "GAGGAAA": 901, + "CATATG": 902, + "CTTTCTT": 903, + "CAACTG": 904, + "GGGCTG": 905, + "CCCCCA": 906, + "TTTGAAA": 907, + "CATTAAA": 908, + "CTTAAAA": 909, + "GACTGA": 910, + "CAATGA": 911, + "GGCACA": 912, + "CCAGTA": 913, + "GGATGA": 914, + "GTTTTTG": 915, + "GCATTTT": 916, + "GTGCCA": 917, + "GCAGTA": 918, + "GCCCTT": 919, + "TCGTC": 920, + "GAACTA": 921, + "GTGGTT": 922, + "GTGTGA": 923, + "GTGCTT": 924, + "CGCTA": 925, + "GTGTCA": 926, + "TCTTTA": 927, + "GCCTTA": 928, + "CCTATT": 929, + "CAAAATG": 930, + "GAACCA": 931, + "CTCCAGG": 932, + "GACTCA": 933, + "CATGAAA": 934, + "GCTAGG": 935, + "TGTTAAA": 936, + "GCGTA": 937, + "GCACTT": 938, + "TCTTAAA": 939, + "TAAGAAA": 940, + "GGCCTG": 941, + "TCCCTA": 942, + "GTGGTA": 943, + "CTGCTA": 944, + "GGAGTT": 945, + "GGTAAA": 946, + "CAAACAAA": 947, + "GATATG": 948, + "TCATGA": 949, + "GACCTT": 950, + "TAATATA": 951, + "GCTAGA": 952, + "GGACTG": 953, + "GGCATT": 954, + "CAGTTA": 955, + "CCCTAA": 956, + "CACCTT": 957, + "GGTGAA": 958, + "CAGCTA": 959, + "GTGTTTT": 960, + "CAACTA": 961, + "GATCAA": 962, + "GAGAAAA": 963, + "TGTGAAA": 964, + "AAAATA": 965, + "GATGAAA": 966, + "CTCTAA": 967, + "TTACTT": 968, + "GATCTG": 969, + "CCACTT": 970, + "GAGTTA": 971, + "CAATCA": 972, + "GGATTACAGG": 973, + "TTTATTTT": 974, + "TACATA": 975, + "TTTTATG": 976, + "GAGTAA": 977, + "GCTGAAA": 978, + "GTACTG": 979, + "GCTCTC": 980, + "TATGTA": 981, + "TGTGTA": 982, + "TCATAA": 983, + "GGACTT": 984, + "TCTCCAA": 985, + "GCATGA": 986, + "GACGA": 987, + "CGCCTG": 988, + "GACCTG": 989, + "GGTCTT": 990, + "CACCAA": 991, + "GATC": 992, + "GACCAA": 993, + "AAAATTA": 994, + "GTAAATT": 995, + "CCAGTT": 996, + "CAGAAAA": 997, + "TAACAAA": 998, + "GGTGTT": 999, + "GAAATTA": 1000, + "TGCCTCA": 1001, + "CCGCC": 1002, + "CCATTTT": 1003, + "CTTGCC": 1004, + "TCTGTA": 1005, + "CTGGCA": 1006, + "GGGATG": 1007, + "CCATGA": 1008, + "CTACTT": 1009, + "TAGGTG": 1010, + "TAAAAATT": 1011, + "GAAAGAA": 1012, + "TAAAATA": 1013, + "CTTTTTG": 1014, + "GTCAAAA": 1015, + "GGACAA": 1016, + "TCTGATT": 1017, + "CTCTCTT": 1018, + "TAATTTG": 1019, + "CTCTTTG": 1020, + "GGCCTT": 1021, + "GGATTTT": 1022, + "CTACTG": 1023, + "GTTGCA": 1024, + "GGCTCC": 1025, + "CTCTGTG": 1026, + "CTCCAGCC": 1027, + "TTACAA": 1028, + "GGACCA": 1029, + "GGAAGGAA": 1030, + "TAAAGAA": 1031, + "TTAGAA": 1032, + "GTGAAAA": 1033, + "CTTGCA": 1034, + "TGGGTG": 1035, + "GGAGCC": 1036, + "CCTCTA": 1037, + "CT": 1038, + "GGGCTT": 1039, + "GGCATG": 1040, + "CTGGTT": 1041, + "TACAGA": 1042, + "GATTAAA": 1043, + "CTCTGTT": 1044, + "TTATCA": 1045, + "CTGAAAA": 1046, + "GTAGTT": 1047, + "GGGTCA": 1048, + "GT": 1049, + "CAGCCA": 1050, + "GCGTC": 1051, + "CACTTA": 1052, + "GTGCTA": 1053, + "TCTTATT": 1054, + "GTACTT": 1055, + "GGTATT": 1056, + "TAGAGA": 1057, + "TACATG": 1058, + "CCACTA": 1059, + "TGAGAAA": 1060, + "CAATAAA": 1061, + "TCCAAAA": 1062, + "CGTGAA": 1063, + "GGTCTG": 1064, + "CTGAATT": 1065, + "TCAGCC": 1066, + "CCTCTC": 1067, + "GTTAAAA": 1068, + "GGGATT": 1069, + "TCCTAA": 1070, + "CACTAA": 1071, + "GGAGAAA": 1072, + "CCTTCCTT": 1073, + "GTTTCTT": 1074, + "TATCAA": 1075, + "GATACA": 1076, + "TAATCCCAGCA": 1077, + "CCGCA": 1078, + "TGAAATT": 1079, + "CGTAAA": 1080, + "CTCTCTG": 1081, + "TCTTTTTT": 1082, + "GTACAA": 1083, + "CCAAATT": 1084, + "TGTATTTT": 1085, + "TCGCTT": 1086, + "GGGTGA": 1087, + "GATAGA": 1088, + "CTTTATT": 1089, + "TAAACAA": 1090, + "GTTTATT": 1091, + "TGAATA": 1092, + "CTACCA": 1093, + "GTGTCC": 1094, + "CCCGA": 1095, + "TTTATTA": 1096, + "CTCCAAA": 1097, + "TTTTTTTTTTTT": 1098, + "TCATCC": 1099, + "GAAGCC": 1100, + "CTAAATT": 1101, + "CAAATTA": 1102, + "CCCCAAA": 1103, + "TCTTCTT": 1104, + "TAGGAAA": 1105, + "CACGA": 1106, + "CATTTTA": 1107, + "GTGCAA": 1108, + "TCTCCTG": 1109, + "TATTTTAA": 1110, + "GTTTGTT": 1111, + "GAGCCA": 1112, + "GGCCAA": 1113, + "CATTTCA": 1114, + "CATCCA": 1115, + "CCTATA": 1116, + "GACTTA": 1117, + "TCAAATG": 1118, + "GTATCA": 1119, + "TAAATTTT": 1120, + "CTGAGGCA": 1121, + "GCCCAA": 1122, + "GGTTAA": 1123, + "TATCTG": 1124, + "TGACAGA": 1125, + "GGAGAGA": 1126, + "GCTGCTG": 1127, + "CCCTTA": 1128, + "TCCTCTG": 1129, + "GTAGCA": 1130, + "CCTGAAA": 1131, + "CCGAA": 1132, + "TTTTTAA": 1133, + "CTATAA": 1134, + "CCTGTA": 1135, + "TTACTG": 1136, + "GTATAA": 1137, + "GGCGA": 1138, + "GACTAA": 1139, + "TCAGAAA": 1140, + "GTGTGTG": 1141, + "CAAAGAA": 1142, + "CCTATG": 1143, + "GCAGAGA": 1144, + "CCGTT": 1145, + "TTTTATTTT": 1146, + "GGAAGAA": 1147, + "TTACTA": 1148, + "GCCTGGG": 1149, + "TCCCTC": 1150, + "TCCTCTT": 1151, + "GGATCA": 1152, + "GGTCAA": 1153, + "TCGAGA": 1154, + "TATTCTT": 1155, + "TACTC": 1156, + "GTTAATT": 1157, + "GCGAGA": 1158, + "CTTAATT": 1159, + "TCCTTTG": 1160, + "GTCTAA": 1161, + "CACCCA": 1162, + "GGGTTA": 1163, + "GGGCAA": 1164, + "GGAAATG": 1165, + "GCAAATT": 1166, + "TAGATG": 1167, + "GCAGAAA": 1168, + "AAAAAAAAAAAAAAAA": 1169, + "CCTACA": 1170, + "GGAGTA": 1171, + "TCTAATT": 1172, + "CAACAAA": 1173, + "TAGATT": 1174, + "GGTTTA": 1175, + "CCTAGA": 1176, + "CTTTAAA": 1177, + "TACTTA": 1178, + "TAATGAA": 1179, + "CTATCA": 1180, + "TAGTAA": 1181, + "CAGAGAA": 1182, + "CAAGAAA": 1183, + "GGGGAAA": 1184, + "CGTTAA": 1185, + "CGTGTT": 1186, + "TCTGTCTG": 1187, + "TTTTAATT": 1188, + "CTGGCC": 1189, + "TAAATGA": 1190, + "CGTCAA": 1191, + "TTAGTA": 1192, + "GTCTCTG": 1193, + "TTTTAAAA": 1194, + "CAGTTTT": 1195, + "CTTCCTT": 1196, + "TATATAA": 1197, + "GCTTTTA": 1198, + "TTTTTCA": 1199, + "GGTC": 1200, + "TTATTAA": 1201, + "TTTTGTT": 1202, + "CATAGA": 1203, + "TAGGAA": 1204, + "GAGAGAA": 1205, + "GTAGCTG": 1206, + "TTATGA": 1207, + "GTAGTG": 1208, + "GGAGAGG": 1209, + "CTCTGAA": 1210, + "TAGTC": 1211, + "GACTCC": 1212, + "TCCCTCC": 1213, + "TAATGTT": 1214, + "CATCTA": 1215, + "GCCACCA": 1216, + "GTACTA": 1217, + "TGGGAAA": 1218, + "CGCCTT": 1219, + "GCCCGG": 1220, + "GGAGGAA": 1221, + "GTACCA": 1222, + "CGCAAA": 1223, + "CATAAAA": 1224, + "TAACATT": 1225, + "GCTAAAA": 1226, + "TCTTCTG": 1227, + "GCCAAAA": 1228, + "GTATGA": 1229, + "GTCTTTG": 1230, + "TACTGA": 1231, + "TCCCAGG": 1232, + "TTATTTA": 1233, + "TTAGTT": 1234, + "GGACC": 1235, + "TATAAAA": 1236, + "CAAACAA": 1237, + "CTTCTC": 1238, + "TCTATCTA": 1239, + "GAAATAA": 1240, + "GTGTAA": 1241, + "CTTTGTT": 1242, + "GATAAAA": 1243, + "GCCCAGG": 1244, + "GCGATT": 1245, + "AAAAAATT": 1246, + "TACAGG": 1247, + "GGCTAA": 1248, + "TAGCTT": 1249, + "GTCTCTA": 1250, + "CTCCTGA": 1251, + "GAATAAA": 1252, + "TTACCA": 1253, + "GGGACA": 1254, + "GCCACTG": 1255, + "GTTTAAA": 1256, + "GTCTGTG": 1257, + "TGACAAA": 1258, + "TACATTTT": 1259, + "GCCACC": 1260, + "TGTTTT": 1261, + "TAGCAA": 1262, + "TTATAAA": 1263, + "GACCCA": 1264, + "GCAGC": 1265, + "CAGACAGA": 1266, + "CACAAAA": 1267, + "GCCCTA": 1268, + "TATTAAAA": 1269, + "CGTATT": 1270, + "CCATCC": 1271, + "TCGATT": 1272, + "GAAGGAA": 1273, + "GATCCA": 1274, + "TATTTGA": 1275, + "GTGAATT": 1276, + "TACCTT": 1277, + "CGTCTT": 1278, + "CCTAGG": 1279, + "TCGAAA": 1280, + "CTTTCTG": 1281, + "TGAAGAA": 1282, + "TCTCTCA": 1283, + "GTCTCTT": 1284, + "GGAGGGG": 1285, + "GTCTGTT": 1286, + "CTATGA": 1287, + "GGAAATT": 1288, + "GCACACA": 1289, + "GCCTTTT": 1290, + "CAGTCC": 1291, + "CTGGTA": 1292, + "GCATCC": 1293, + "TAGTTA": 1294, + "GGCTTA": 1295, + "GAGTCC": 1296, + "TGAAAA": 1297, + "TAGATAGA": 1298, + "TGTTTGTT": 1299, + "TACTCA": 1300, + "CATTTAA": 1301, + "GATTTTA": 1302, + "CACTCC": 1303, + "GAAACAA": 1304, + "GCGCTG": 1305, + "TCTTTCA": 1306, + "CTGTCC": 1307, + "GAACTCA": 1308, + "CGGAAA": 1309, + "TATTGTT": 1310, + "GCACTA": 1311, + "TATTCAA": 1312, + "GCGGGG": 1313, + "GTGGCC": 1314, + "TAATTAAA": 1315, + "TACTAA": 1316, + "GCGGTG": 1317, + "TACCAA": 1318, + "GGTATA": 1319, + "CTAGTT": 1320, + "GCAGAGG": 1321, + "CTTTTTTTT": 1322, + "TTTTTTTTTTTTTTTT": 1323, + "TACAGTA": 1324, + "CCATGTT": 1325, + "TAGTGA": 1326, + "CGTGTG": 1327, + "GCTCTGA": 1328, + "CTTCCTG": 1329, + "TCGCTG": 1330, + "TAAATCA": 1331, + "TCCAATT": 1332, + "GTTTCTG": 1333, + "GAAGAGA": 1334, + "GGGTAA": 1335, + "CCATAA": 1336, + "TTATATT": 1337, + "CGAATT": 1338, + "CCGGA": 1339, + "TGAGCC": 1340, + "CCGTA": 1341, + "CAGAGGA": 1342, + "GTGTTTG": 1343, + "GACAAAA": 1344, + "TTTTTTAAA": 1345, + "GTTGCC": 1346, + "GAGTTTT": 1347, + "TCAAAAAA": 1348, + "TGTTTCA": 1349, + "TATCTA": 1350, + "TCTCTCC": 1351, + "CTCCACA": 1352, + "TAAATATT": 1353, + "TTTTCTG": 1354, + "CTCTCAA": 1355, + "CCTTAAA": 1356, + "TCTTTTAA": 1357, + "GAACAAA": 1358, + "TTAGCA": 1359, + "GCTCATG": 1360, + "TAAAGTA": 1361, + "GGATAA": 1362, + "TTATTAAA": 1363, + "CTCCATT": 1364, + "TCTCTGA": 1365, + "TTATTTG": 1366, + "CCTGTAA": 1367, + "TTATATA": 1368, + "GACTTTT": 1369, + "TGTTGTT": 1370, + "GCAAATG": 1371, + "CTTCAAA": 1372, + "GAATATT": 1373, + "GAATCC": 1374, + "CTCTTAA": 1375, + "GCATAA": 1376, + "GAATGAA": 1377, + "CTTAAAAA": 1378, + "TAAAAATG": 1379, + "TTTTAAAAA": 1380, + "CTCTGGG": 1381, + "TGATCC": 1382, + "GCTCTCA": 1383, + "CTCCAGA": 1384, + "GAGTGCAGTG": 1385, + "CAATATT": 1386, + "TAGAAAA": 1387, + "GTAAATG": 1388, + "TAGCTG": 1389, + "GCTCAAA": 1390, + "GCAGGAA": 1391, + "TACCTG": 1392, + "GGGAAAA": 1393, + "TTTTCTA": 1394, + "GGGGGGGG": 1395, + "CCGA": 1396, + "CTTTGAA": 1397, + "GGAGGTG": 1398, + "TAGTCA": 1399, + "GGCCCA": 1400, + "TGATGTT": 1401, + "CAAATAA": 1402, + "TCTTCCA": 1403, + "GCGCTT": 1404, + "GTATTTG": 1405, + "GTCTC": 1406, + "GAAATCA": 1407, + "TGATAAA": 1408, + "CATTCTT": 1409, + "TATCCA": 1410, + "GCCTCTG": 1411, + "TGAGATG": 1412, + "CGCCAA": 1413, + "GTTTTATT": 1414, + "TATATATT": 1415, + "GTAGGA": 1416, + "GACAGAA": 1417, + "CTCCAGCCTGGG": 1418, + "GCGTGA": 1419, + "GGTATG": 1420, + "GAGGGAGG": 1421, + "TCATTTG": 1422, + "CTACC": 1423, + "TACAGAA": 1424, + "GGTAGA": 1425, + "GATCTA": 1426, + "GTCCATG": 1427, + "TGAGGAA": 1428, + "TAATAAAA": 1429, + "TAAACTT": 1430, + "TCACATT": 1431, + "GGAGGCC": 1432, + "TCACAAA": 1433, + "CACTTTT": 1434, + "CGGCC": 1435, + "CAACAGA": 1436, + "GTAGAGA": 1437, + "GTTATTTT": 1438, + "CGTTTG": 1439, + "TCGTCA": 1440, + "TCTGCTG": 1441, + "CAACACA": 1442, + "GGTAGG": 1443, + "GCAGCTG": 1444, + "TAGTAGAGA": 1445, + "CAAGCC": 1446, + "GCATTTG": 1447, + "TAATATG": 1448, + "GCTTAAA": 1449, + "GCTTCTG": 1450, + "CTCTCCA": 1451, + "TCATCTT": 1452, + "CGTCTG": 1453, + "TCATTTA": 1454, + "CATAGG": 1455, + "GCTCCTT": 1456, + "TGTTCTT": 1457, + "TACATTA": 1458, + "CACAGAA": 1459, + "TAAATATA": 1460, + "TAGAGG": 1461, + "GATAGG": 1462, + "TCCTGAA": 1463, + "GGAGCTG": 1464, + "TGATATT": 1465, + "TCATTAA": 1466, + "CTTTTAAA": 1467, + "TCGTTA": 1468, + "TAAACTA": 1469, + "GTTTGAA": 1470, + "TAAAATTA": 1471, + "CACCCC": 1472, + "TCAGAGA": 1473, + "CTCCTGCCTCA": 1474, + "TGACATT": 1475, + "GTATTTA": 1476, + "CTTCATT": 1477, + "GAAACTG": 1478, + "TAACACA": 1479, + "GTTCAAA": 1480, + "GGAGATG": 1481, + "TCGGCC": 1482, + "CAGCATT": 1483, + "TCGATG": 1484, + "TATTCTA": 1485, + "CTGTGAA": 1486, + "TATTGAA": 1487, + "TTTTCCA": 1488, + "TATTTCTT": 1489, + "GGTGAAA": 1490, + "CTGAGAA": 1491, + "GCACAGA": 1492, + "GCGAGG": 1493, + "CTGTGTG": 1494, + "TGAAATG": 1495, + "TGATGAA": 1496, + "GTCCAAA": 1497, + "CTCAATT": 1498, + "TCCAGAA": 1499, + "GTATATA": 1500, + "TAAAGTT": 1501, + "TCTCAAAA": 1502, + "TCCATCA": 1503, + "GTCTGAA": 1504, + "TGAGAGA": 1505, + "TGATTTG": 1506, + "TTAGCC": 1507, + "CTCCATG": 1508, + "TCCCTGA": 1509, + "GAGCTA": 1510, + "CCCCCCCC": 1511, + "GTGGAAA": 1512, + "CTGGGAA": 1513, + "CAATGAA": 1514, + "CCACACA": 1515, + "CTTTCAA": 1516, + "CGGAGG": 1517, + "TCGTGA": 1518, + "CCAGAAA": 1519, + "GTTTTAAA": 1520, + "TGTTGAA": 1521, + "TCCTGTG": 1522, + "CTAAATG": 1523, + "TCCTTTA": 1524, + "GTCTGGG": 1525, + "TCTCTTTT": 1526, + "TACGG": 1527, + "TATTGTA": 1528, + "TTAGTG": 1529, + "TTACC": 1530, + "TAATCCCAGCACTTTG": 1531, + "TCTGGAA": 1532, + "CTTCTCA": 1533, + "CGCATT": 1534, + "TATTTAAA": 1535, + "TCACACA": 1536, + "TAATCAA": 1537, + "GCGAAA": 1538, + "GGGCCA": 1539, + "GTTCATT": 1540, + "GAGAAAAA": 1541, + "TTTTGTA": 1542, + "TACTTTT": 1543, + "TCGAGG": 1544, + "GTGAAAAA": 1545, + "CAATATA": 1546, + "TCCCATG": 1547, + "CAATTAA": 1548, + "CTGGAAA": 1549, + "CCCAGCA": 1550, + "TCCCATT": 1551, + "TCCTGTT": 1552, + "CTCTTTA": 1553, + "TCCCCTT": 1554, + "GTTTCAA": 1555, + "GTCCAGG": 1556, + "GGAAGGA": 1557, + "TAGTTTT": 1558, + "TGACCTT": 1559, + "GTGCTGGGATTACAGG": 1560, + "TATTTATA": 1561, + "TCTGCAA": 1562, + "CTGAAAAA": 1563, + "TATGTTA": 1564, + "CTTCACA": 1565, + "GCACAGG": 1566, + "CCTGCTG": 1567, + "TTTTTTAA": 1568, + "GTTATTA": 1569, + "CCCTTTT": 1570, + "TGATTTA": 1571, + "TACAAAA": 1572, + "TAAGTAA": 1573, + "TTTTTAAA": 1574, + "CATCTC": 1575, + "GTGGTGA": 1576, + "GTGGAGA": 1577, + "CTCTGCA": 1578, + "GTTAAAAA": 1579, + "TACATACA": 1580, + "CTTTGTG": 1581, + "GGACACA": 1582, + "TCTGATG": 1583, + "TATTATT": 1584, + "TCTTCTA": 1585, + "CTGTGTT": 1586, + "TCAGCTT": 1587, + "CTTTATA": 1588, + "GGCGC": 1589, + "TCCCTCA": 1590, + "GTACC": 1591, + "TGGAGAA": 1592, + "CAAAAATT": 1593, + "TCTTTAA": 1594, + "CTCTCTC": 1595, + "TGAGTGA": 1596, + "GCAGCTT": 1597, + "CGGATT": 1598, + "TACGA": 1599, + "TCTTGTT": 1600, + "TCGTAA": 1601, + "GCCTGTG": 1602, + "TATTCTG": 1603, + "GGGATA": 1604, + "GGGTCC": 1605, + "TGAGATT": 1606, + "CTTTTATT": 1607, + "TCCCACA": 1608, + "CATGGTG": 1609, + "TTAGGA": 1610, + "GAACACA": 1611, + "TCATAAA": 1612, + "CAACATT": 1613, + "GGTCCA": 1614, + "GAATTTG": 1615, + "TATTAATT": 1616, + "TCCTGGG": 1617, + "GCAGCAA": 1618, + "CTCTTCA": 1619, + "GAAGAGG": 1620, + "TCTGTCA": 1621, + "CTGAATG": 1622, + "CCACAAA": 1623, + "GTGGAGG": 1624, + "TGATTAA": 1625, + "CTCCCTCC": 1626, + "CACACACACACACACACACACACACACACACA": 1627, + "GCGATG": 1628, + "CATTCTG": 1629, + "GTAGAAA": 1630, + "TCATCAA": 1631, + "TTTTCAA": 1632, + "TATGTATG": 1633, + "CCAAATG": 1634, + "TAATTTTA": 1635, + "TAAGGAA": 1636, + "CTTGAAA": 1637, + "AAAAAAAAAAAA": 1638, + "GCTCCTG": 1639, + "GCAGATG": 1640, + "GAAAAATT": 1641, + "GACGC": 1642, + "GTGGGGG": 1643, + "GTCAATT": 1644, + "CTTGCTT": 1645, + "TGACACA": 1646, + "GTGTGTT": 1647, + "CCAGAGA": 1648, + "CCCAGCC": 1649, + "TAAAGAAA": 1650, + "GTCCATT": 1651, + "TAAATTAA": 1652, + "CCCAAAA": 1653, + "GAATTAA": 1654, + "TGAATTA": 1655, + "TTTTTTTG": 1656, + "CCAGCTT": 1657, + "CAATTTG": 1658, + "CTGTTTG": 1659, + "GTCTCAA": 1660, + "GTTTGTG": 1661, + "GGCATA": 1662, + "GGTACA": 1663, + "TGATGTG": 1664, + "GATTTCA": 1665, + "TCTGCTT": 1666, + "GTAATTA": 1667, + "TAAAAAAAA": 1668, + "GCCGCC": 1669, + "TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG": 1670, + "GCGTCA": 1671, + "GCTCATT": 1672, + "GAACCTG": 1673, + "TAAACAAA": 1674, + "GTGCTGA": 1675, + "TCAGGAA": 1676, + "TCCTCAA": 1677, + "TCTATTTT": 1678, + "TCTGTTTT": 1679, + "CAGAGCA": 1680, + "CCAGGAA": 1681, + "GTCTTTA": 1682, + "TCTTCAA": 1683, + "TCAAAATT": 1684, + "GCTTATT": 1685, + "GTTCCTT": 1686, + "CACCTA": 1687, + "TCACTGA": 1688, + "GAAGCAA": 1689, + "TAAAGA": 1690, + "TCCTTCA": 1691, + "TCTCATG": 1692, + "TCAGTGA": 1693, + "TACACAA": 1694, + "CACGTG": 1695, + "CCTAAAA": 1696, + "GCCTTTG": 1697, + "GGCTTTT": 1698, + "GTTGAAA": 1699, + "GTTCTC": 1700, + "CTAGA": 1701, + "CTACAAA": 1702, + "GCACAAA": 1703, + "TTACATT": 1704, + "GGCCCC": 1705, + "TAATGTG": 1706, + "CTGCCTT": 1707, + "TCCCAGA": 1708, + "GTGAATG": 1709, + "GGACAGG": 1710, + "GGATGTG": 1711, + "GTTTATA": 1712, + "TGACCAA": 1713, + "GTGGCTG": 1714, + "GTTCTCA": 1715, + "CTTATTTT": 1716, + "CTGGAGA": 1717, + "TTACAAA": 1718, + "GTCTTCA": 1719, + "CAAGAGA": 1720, + "CCATTTG": 1721, + "TCACAGA": 1722, + "CTAGTA": 1723, + "CATTATT": 1724, + "TTAGA": 1725, + "GCTCTCC": 1726, + "GCGCCA": 1727, + "TATGTTTT": 1728, + "TCCTCCA": 1729, + "CAGAAAAA": 1730, + "GTGGGAA": 1731, + "TAATCTT": 1732, + "TGAGTCA": 1733, + "CTGCTC": 1734, + "GTCTCCA": 1735, + "TCATGTT": 1736, + "GTTTCCA": 1737, + "TAAGCAA": 1738, + "CTAAAAATA": 1739, + "TGACTGA": 1740, + "TCGGTT": 1741, + "TTAGAAA": 1742, + "TAAGCC": 1743, + "TAAAGCA": 1744, + "CCTCTCC": 1745, + "CCTCCTT": 1746, + "TCAGATT": 1747, + "TATGAAAA": 1748, + "GCTGATG": 1749, + "CATATTTT": 1750, + "GCTCCAA": 1751, + "CGGCGG": 1752, + "CCACTGA": 1753, + "CAGCAAA": 1754, + "CTGTCTT": 1755, + "CTAGCA": 1756, + "TCGGGG": 1757, + "CACAGCA": 1758, + "GCTGATT": 1759, + "CTAGGA": 1760, + "TAACTC": 1761, + "TCATATT": 1762, + "CCTTCTT": 1763, + "CTGCAAA": 1764, + "CCCGC": 1765, + "GGTCTA": 1766, + "CCCAGGA": 1767, + "GTGTCTG": 1768, + "TAATAATAATAA": 1769, + "TCACATG": 1770, + "CAATTTA": 1771, + "TATATATATATATATATATATATATATATATA": 1772, + "CCACAGA": 1773, + "TCAATTTT": 1774, + "GTATTAA": 1775, + "GAACATT": 1776, + "TCTCTTA": 1777, + "CTATTTG": 1778, + "TCTTTCC": 1779, + "GGTTAAA": 1780, + "GCTAATT": 1781, + "CTGCTGA": 1782, + "TACCTA": 1783, + "CAGGGTT": 1784, + "TCGCCA": 1785, + "CAAAAATTA": 1786, + "CTTCTGA": 1787, + "GCATGTG": 1788, + "CTATTAA": 1789, + "GCACATG": 1790, + "CAACATG": 1791, + "TCATGAA": 1792, + "GAATGTT": 1793, + "GGGTTTT": 1794, + "CTGCCTG": 1795, + "GTCCACA": 1796, + "TAAACA": 1797, + "CTCTGGA": 1798, + "GACCCC": 1799, + "GGCAAAA": 1800, + "TCTGTTA": 1801, + "CTAGTG": 1802, + "CTATATA": 1803, + "TCAGTCA": 1804, + "TAACTAA": 1805, + "GAAGATG": 1806, + "GTCTTAA": 1807, + "CAAGGAA": 1808, + "GTAAAAAA": 1809, + "TCCCCTG": 1810, + "TCGCAA": 1811, + "TCTGCCTG": 1812, + "CCTTTTA": 1813, + "GTCCCAGCTA": 1814, + "TATATATG": 1815, + "TATTGTG": 1816, + "TGTGTTTT": 1817, + "GCGCAA": 1818, + "CACAGTG": 1819, + "TAAGATT": 1820, + "CTCTGTA": 1821, + "GGAGGCTGA": 1822, + "GGACAAA": 1823, + "TATTAAAAA": 1824, + "TCGTCC": 1825, + "TCGGAA": 1826, + "CTATAAA": 1827, + "CTTCAGA": 1828, + "CTAGAAA": 1829, + "CATTCAA": 1830, + "CACGCA": 1831, + "CAGGATT": 1832, + "CCATCTT": 1833, + "GTAGCC": 1834, + "GAATTTA": 1835, + "CACGC": 1836, + "CAATCC": 1837, + "TGAGCAA": 1838, + "GAAGCTG": 1839, + "TCAATTA": 1840, + "GAAGTCA": 1841, + "CTGCACA": 1842, + "CCACGG": 1843, + "GGATCTT": 1844, + "CTCCTGCCTCAGCCTCC": 1845, + "TAAATGAA": 1846, + "CCGTC": 1847, + "TCGGTG": 1848, + "TTTTATTA": 1849, + "GCAGGGG": 1850, + "GCAGGTG": 1851, + "TCTATTA": 1852, + "TAACTTA": 1853, + "CTAATTTT": 1854, + "CCCGCC": 1855, + "TAATACA": 1856, + "GGATTAAA": 1857, + "TCTCTCTG": 1858, + "GCTTCTT": 1859, + "CATTTATT": 1860, + "CCAGAGG": 1861, + "GGACAGA": 1862, + "GCCAATT": 1863, + "TCCCCAA": 1864, + "GTTGATT": 1865, + "GAAGAAAA": 1866, + "GCATTTA": 1867, + "CTCTAAA": 1868, + "CACACACACACA": 1869, + "CCTCAAA": 1870, + "TATAATT": 1871, + "CAATGTT": 1872, + "GCCCAGA": 1873, + "GTATATT": 1874, + "CTAAAAAA": 1875, + "CCACAGG": 1876, + "TAAGAGA": 1877, + "TCCTTAA": 1878, + "TATTTTTT": 1879, + "GAATATA": 1880, + "GGATTTG": 1881, + "GTGTGAA": 1882, + "CTGGCTT": 1883, + "GCGGCA": 1884, + "TCCGCC": 1885, + "GCATCTT": 1886, + "TCTAATA": 1887, + "CTGCATT": 1888, + "CTCTGCC": 1889, + "TCACTCA": 1890, + "TCAGCAA": 1891, + "TATTATG": 1892, + "CCAGCTG": 1893, + "GATCTC": 1894, + "GCCTCTT": 1895, + "CTTCCAA": 1896, + "TCCTAAA": 1897, + "TCATCTG": 1898, + "CTATTTA": 1899, + "CTGCAGG": 1900, + "CAAGCAA": 1901, + "GCGGAA": 1902, + "GAAATAAA": 1903, + "TAAAATAA": 1904, + "TCACCTT": 1905, + "CCATGTG": 1906, + "GACCTA": 1907, + "CAGATGA": 1908, + "GTGGCTT": 1909, + "TTATTATTATTA": 1910, + "TCCCGG": 1911, + "TATTTGTT": 1912, + "CTGTAAA": 1913, + "TCCATCCA": 1914, + "CTGTATA": 1915, + "GTTTCTA": 1916, + "GTTGCTT": 1917, + "CCATGAA": 1918, + "GCTCTTA": 1919, + "CTTCATG": 1920, + "GTTCCTG": 1921, + "GCTGGGA": 1922, + "TCAGAGG": 1923, + "CATTAAAA": 1924, + "TCAGTAA": 1925, + "GAATGTG": 1926, + "CTTATTA": 1927, + "GCACTGA": 1928, + "TGAGGTT": 1929, + "CATCAAA": 1930, + "CTTCTCC": 1931, + "GTTTATG": 1932, + "CTTTCCA": 1933, + "GTGCCTG": 1934, + "GAAAGGA": 1935, + "GCATCTG": 1936, + "TACCCA": 1937, + "TAACAGA": 1938, + "AAAAAAAAAAA": 1939, + "CTATGAA": 1940, + "CAGTAAA": 1941, + "TAGCTA": 1942, + "TCGTTTT": 1943, + "GTGTCTT": 1944, + "GAGCAAA": 1945, + "TCTAAAAA": 1946, + "GTTCACA": 1947, + "GAAATGA": 1948, + "CAAATGA": 1949, + "GCCCTGA": 1950, + "GTGTTTA": 1951, + "TCATGTG": 1952, + "CATATTA": 1953, + "TCAAAAAAA": 1954, + "TAAGTTA": 1955, + "TCTCTCTT": 1956, + "CCAGTGA": 1957, + "CCTCTGA": 1958, + "CAAGATG": 1959, + "GCCTGTT": 1960, + "GTTTGGG": 1961, + "CATTCATT": 1962, + "GCCCCTG": 1963, + "GTTCTGA": 1964, + "GCGGCC": 1965, + "GCGGTT": 1966, + "CAAAACAAAA": 1967, + "TACATATA": 1968, + "GAATTAAA": 1969, + "TCAAGAA": 1970, + "CTGTATT": 1971, + "TTTTTATT": 1972, + "GATTATT": 1973, + "TCTAATG": 1974, + "GTTGCTG": 1975, + "TGAATGAA": 1976, + "TCAGCTG": 1977, + "CTTGATT": 1978, + "CAGAATG": 1979, + "CTAATTA": 1980, + "TATAATG": 1981, + "GTTTTGTTTT": 1982, + "CCAGCCTG": 1983, + "TGATGGA": 1984, + "GCAGATT": 1985, + "CTCTATT": 1986, + "GCAGTCA": 1987, + "TAAGTGA": 1988, + "CTACACA": 1989, + "CGCATG": 1990, + "TAGCCA": 1991, + "GTGGCTCA": 1992, + "CAAATAAA": 1993, + "GTGCTCA": 1994, + "TTTTTTTTTT": 1995, + "TAACATG": 1996, + "TCCCAGCTA": 1997, + "CAAAGTA": 1998, + "TCATATA": 1999, + "CAGCATG": 2000, + "TGATCTT": 2001, + "CATAATT": 2002, + "TGTGTTA": 2003, + "TTTTGAA": 2004, + "TTAATTA": 2005, + "GATATTA": 2006, + "TCATTCA": 2007, + "TGATATA": 2008, + "TGACTCA": 2009, + "GACGTT": 2010, + "TGACATG": 2011, + "GTTGTGA": 2012, + "CATTTTTT": 2013, + "GCCTGGA": 2014, + "CTATGTT": 2015, + "CTTTGGG": 2016, + "GTCTCAAA": 2017, + "CTGGCTG": 2018, + "CCACATG": 2019, + "GGCGTG": 2020, + "CTTAATG": 2021, + "TAAGATG": 2022, + "GTATAAA": 2023, + "TGTATTA": 2024, + "TAACTCA": 2025, + "GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA": 2026, + "GCATGAA": 2027, + "GTTAATG": 2028, + "TCCAGGA": 2029, + "GAGAGAAA": 2030, + "TCTCTGTG": 2031, + "CTCTCTA": 2032, + "CCACCTG": 2033, + "GCCAGGA": 2034, + "CTGGAGG": 2035, + "CCATTTA": 2036, + "GTCTGGA": 2037, + "GCCCACA": 2038, + "TAGAGAA": 2039, + "CAACTCA": 2040, + "GGCAGGA": 2041, + "TCTTATG": 2042, + "CAAAGGA": 2043, + "GGTAAAA": 2044, + "GAGAGGA": 2045, + "GTCCAGA": 2046, + "GCCCTCA": 2047, + "GATATTTT": 2048, + "CAGGGAA": 2049, + "CCACATT": 2050, + "GAGGAGG": 2051, + "GAAACTT": 2052, + "CAGAATT": 2053, + "TCAGATG": 2054, + "TATTTCC": 2055, + "TACAGTG": 2056, + "TGAGCTG": 2057, + "CCATCTG": 2058, + "GAGAATG": 2059, + "TCAACAA": 2060, + "ATT": 2061, + "TAACTGA": 2062, + "TGAGAGG": 2063, + "CACTGAA": 2064, + "CCACCTT": 2065, + "CTGCAGA": 2066, + "TCACCAA": 2067, + "TGAGCTT": 2068, + "CAAAGCA": 2069, + "GGTTTTA": 2070, + "CGGGGTT": 2071, + "TCCAAAAA": 2072, + "TATGTATA": 2073, + "CCAGATG": 2074, + "TCCATTTT": 2075, + "CTGCTCA": 2076, + "GATAATT": 2077, + "CCACCAA": 2078, + "CTCCTCC": 2079, + "GAGAATT": 2080, + "GAAAGTA": 2081, + "TAAAATAAAA": 2082, + "CTTCTTA": 2083, + "CTGTTTA": 2084, + "GAATCAA": 2085, + "GCATGTT": 2086, + "GCACGG": 2087, + "GACTGAA": 2088, + "GTGCACA": 2089, + "GACGTG": 2090, + "TATACAA": 2091, + "TCGACA": 2092, + "GAAGACA": 2093, + "TAAAGGA": 2094, + "GATCAAA": 2095, + "CAGTGTG": 2096, + "CTAGCC": 2097, + "GAGGAAAA": 2098, + "TCTGAAAA": 2099, + "GAACCCA": 2100, + "GATGGATG": 2101, + "GTTCTTA": 2102, + "CTATATT": 2103, + "GCATTAA": 2104, + "TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC": 2105, + "TCAGTC": 2106, + "TATTTTTG": 2107, + "GAGGATT": 2108, + "GTATGTG": 2109, + "TAACCAA": 2110, + "GTTGTTTT": 2111, + "TTTTTCTT": 2112, + "GTGTTAA": 2113, + "CTTGGAA": 2114, + "AAAAAATG": 2115, + "CAATGTG": 2116, + "GTGCCTT": 2117, + "GCCTCAA": 2118, + "GAGTCTT": 2119, + "GCTAATTTT": 2120, + "CGAAAAA": 2121, + "GTGTATA": 2122, + "GCGTTA": 2123, + "CTGCACTCCAGCCTGGG": 2124, + "GTTCATG": 2125, + "CAAAGAAA": 2126, + "GCAGTAA": 2127, + "GGATGAA": 2128, + "CTTTATG": 2129, + "CAGGAAAA": 2130, + "TCCTGCA": 2131, + "CTGTCTG": 2132, + "GAACATG": 2133, + "GGATGGA": 2134, + "GCCTGAA": 2135, + "CAAAAATG": 2136, + "TCCAATG": 2137, + "CCAGCAA": 2138, + "GGCCTA": 2139, + "CAACTGA": 2140, + "GCACCTG": 2141, + "GTCTATT": 2142, + "CCTCTCA": 2143, + "GTGGTCA": 2144, + "GTGTAAA": 2145, + "GTACACA": 2146, + "GTAAAATT": 2147, + "GTACATT": 2148, + "TATATAAA": 2149, + "CTGTTAA": 2150, + "TAAGTCA": 2151, + "GCCTCCA": 2152, + "AAATTAAA": 2153, + "GTGCAGG": 2154, + "TCCTGGA": 2155, + "GTGCAAA": 2156, + "GCGTCC": 2157, + "CCATTAA": 2158, + "GGAGGGA": 2159, + "TCACTTA": 2160, + "TCATTAAA": 2161, + "CAACATA": 2162, + "TAATAGA": 2163, + "TAATGTA": 2164, + "GATTTTTT": 2165, + "GTTGTCA": 2166, + "GGAGACA": 2167, + "GTGTGGG": 2168, + "TCACAGG": 2169, + "TCGGCA": 2170, + "CTCCCTG": 2171, + "GACCAAA": 2172, + "TGTTTATT": 2173, + "CGAATG": 2174, + "CTCAATG": 2175, + "TCACCTG": 2176, + "CAGTGTT": 2177, + "TGAGACA": 2178, + "TAGGGG": 2179, + "GAAAAATG": 2180, + "GTTGAGA": 2181, + "TCGATA": 2182, + "CTCGGGAGG": 2183, + "GTTGTC": 2184, + "CCAGTCA": 2185, + "GCCCAGGCTG": 2186, + "GAACAGA": 2187, + "GGCTCACTGCAA": 2188, + "GCAGACA": 2189, + "TGAGGTG": 2190, + "CACGTT": 2191, + "TAAGAAAA": 2192, + "CCAGGCA": 2193, + "GTATCTT": 2194, + "CTTGGGAGG": 2195, + "CTTTCTA": 2196, + "CCGCTG": 2197, + "GAGCTCA": 2198, + "GAGACAGA": 2199, + "CTTCAGG": 2200, + "GCACATT": 2201, + "GTACAAA": 2202, + "CTTGTAA": 2203, + "GTGGGTG": 2204, + "GAAGTGA": 2205, + "GGTCTC": 2206, + "GTATGTT": 2207, + "GCACTCA": 2208, + "TTATGTT": 2209, + "CAAGTCA": 2210, + "CAAGTGA": 2211, + "GAAACTA": 2212, + "TAAATAAAA": 2213, + "TCTTAAAA": 2214, + "GTTGGAA": 2215, + "GTTCTAA": 2216, + "CCACTC": 2217, + "CAGTGAA": 2218, + "GAAAGG": 2219, + "GCACGA": 2220, + "TAACTTTT": 2221, + "GTTGTTA": 2222, + "TCAGTTA": 2223, + "CGGATG": 2224, + "TATTTGAA": 2225, + "CCCTGAA": 2226, + "GCCCTC": 2227, + "CTTCTAA": 2228, + "TTTGTTTT": 2229, + "GAGCTGA": 2230, + "CTGTGGG": 2231, + "CAAGATT": 2232, + "GAAGCTT": 2233, + "TGAGTAA": 2234, + "CTTGCTG": 2235, + "GGATGGG": 2236, + "CGTATG": 2237, + "TCCATTA": 2238, + "GTCTGCA": 2239, + "GCCATTTT": 2240, + "GTTGTAA": 2241, + "CACACAA": 2242, + "GGACTACAGG": 2243, + "CGTTTTA": 2244, + "TCTTCC": 2245, + "TAACCTT": 2246, + "CTTTAAAA": 2247, + "TGAATTTT": 2248, + "CTACAGA": 2249, + "GCAAGAA": 2250, + "TAACAAAA": 2251, + "CAATTAAA": 2252, + "CCACTCA": 2253, + "CATGGTGAAA": 2254, + "CCCAGAA": 2255, + "CTACATT": 2256, + "CCGAGG": 2257, + "TCCAGTG": 2258, + "TGAGTTA": 2259, + "GGAGTCA": 2260, + "TAACGA": 2261, + "GAGTAAA": 2262, + "GACTCTG": 2263, + "GGAGCTT": 2264, + "TACTCC": 2265, + "CTGCATG": 2266, + "GCTTTTTT": 2267, + "GTCTAAA": 2268, + "GTGCGG": 2269, + "CATCTCA": 2270, + "TGATCAA": 2271, + "GGAGATT": 2272, + "GCAAAAAA": 2273, + "CACCAAA": 2274, + "TGACGG": 2275, + "CAGAGG": 2276, + "GTTGATG": 2277, + "CTTGTCA": 2278, + "TCCACCTG": 2279, + "GGAGCAA": 2280, + "CAAGTAA": 2281, + "CCATAAA": 2282, + "GTGCATG": 2283, + "GCATATT": 2284, + "GTAGATT": 2285, + "GCCTAA": 2286, + "CTCAAAAA": 2287, + "GGAGAAAA": 2288, + "CTATCC": 2289, + "TAATATTA": 2290, + "GTGCTC": 2291, + "CAATATG": 2292, + "TGTGGAA": 2293, + "TGACTC": 2294, + "GTGTATG": 2295, + "TTTTAATG": 2296, + "GCTCTAA": 2297, + "CACAATG": 2298, + "CAGCTCA": 2299, + "GTTGGTT": 2300, + "CTAAAATT": 2301, + "GTCTATG": 2302, + "TGTGAAAA": 2303, + "CTGGGTT": 2304, + "CCCCTCC": 2305, + "CCCTCTT": 2306, + "GCAGGGA": 2307, + "GAAACCA": 2308, + "CATTTCC": 2309, + "GCAGCCA": 2310, + "TCATATG": 2311, + "GCAGGCA": 2312, + "CGTAAAA": 2313, + "TGACCTG": 2314, + "CAGAGGTT": 2315, + "CTTGTGA": 2316, + "TTATCTT": 2317, + "CTGTATG": 2318, + "GTCAATG": 2319, + "GGACGG": 2320, + "GCGTAA": 2321, + "CAAACTA": 2322, + "TAAATGTT": 2323, + "CTTCGG": 2324, + "CTCCCCA": 2325, + "TACAATG": 2326, + "TCTGTAA": 2327, + "GAATATG": 2328, + "GCGGGA": 2329, + "GGACATT": 2330, + "TTATGAA": 2331, + "GGATGTT": 2332, + "GGACATG": 2333, + "TCAGGTG": 2334, + "CAACAAAA": 2335, + "GAAAGAGA": 2336, + "GTGGATG": 2337, + "GGGCTA": 2338, + "CCATCAA": 2339, + "CAGCTGA": 2340, + "CTCCACC": 2341, + "CAATCAA": 2342, + "GTGGTC": 2343, + "TGACAGG": 2344, + "CCATTCA": 2345, + "GTCCCTG": 2346, + "CAGACACA": 2347, + "GTTGGTG": 2348, + "CCTCCTG": 2349, + "GAACTGA": 2350, + "TATTCATT": 2351, + "GCCCATG": 2352, + "CAATCTT": 2353, + "GAAAGCA": 2354, + "GAATCTG": 2355, + "TTATTTTA": 2356, + "GTTTGGA": 2357, + "TTTTTGTT": 2358, + "GGGAATG": 2359, + "GCGACA": 2360, + "TAAACTG": 2361, + "CCATATT": 2362, + "GGATCC": 2363, + "CAAGCTT": 2364, + "TAAAAAAAAA": 2365, + "TCACTC": 2366, + "CACTGTT": 2367, + "TGTTAATT": 2368, + "GGACTGA": 2369, + "GGAGTGA": 2370, + "CATACACA": 2371, + "GTTTGTA": 2372, + "TCCAGCA": 2373, + "GTGCATT": 2374, + "GGAAAAAA": 2375, + "CCAAGAA": 2376, + "TCAATA": 2377, + "CTTCCCA": 2378, + "TGAGAAAA": 2379, + "GGCCTCCCAAA": 2380, + "CAAGCTG": 2381, + "GCCCAAA": 2382, + "TGACTTA": 2383, + "CAGCCTT": 2384, + "CTGGATT": 2385, + "TTTTTTTA": 2386, + "TCACGG": 2387, + "GCAGTTA": 2388, + "TGACTAA": 2389, + "TTACAGG": 2390, + "TGATATG": 2391, + "TAATTATT": 2392, + "TCTTGAA": 2393, + "GCCCCTT": 2394, + "GTTCAGA": 2395, + "CTCTATG": 2396, + "CCATGGA": 2397, + "GAGGGAA": 2398, + "GGAGGCA": 2399, + "CTTTGCA": 2400, + "TCTTGG": 2401, + "GGAGGTT": 2402, + "GCCAATG": 2403, + "CTGGTGA": 2404, + "CAACCAA": 2405, + "CCAGTC": 2406, + "CTTGAGA": 2407, + "TACAGCA": 2408, + "CTTGTC": 2409, + "GACGGA": 2410, + "CTTCTTTT": 2411, + "GTGGC": 2412, + "GAGGATG": 2413, + "CAATAAAA": 2414, + "GAAATTTT": 2415, + "AAAAAAAAAA": 2416, + "CTCTATA": 2417, + "GTATGAA": 2418, + "CTTGTTA": 2419, + "TAACATA": 2420, + "CAAACACA": 2421, + "TGATTAAA": 2422, + "GCTCTGTT": 2423, + "GTGGGTT": 2424, + "GTTGGGG": 2425, + "GTGTGTA": 2426, + "GTAATTTT": 2427, + "GTATCC": 2428, + "TGTGTGTGTGTG": 2429, + "TCTTCCTT": 2430, + "TCACTAA": 2431, + "TCTCCAAA": 2432, + "TATCAAA": 2433, + "TGATGGG": 2434, + "GGATATT": 2435, + "CAAATTTT": 2436, + "GTTCAGG": 2437, + "GTGGATT": 2438, + "GTGCAGA": 2439, + "GCTGCC": 2440, + "CTCAGAA": 2441, + "GCAGTC": 2442, + "GGATAAA": 2443, + "GCCTTCA": 2444, + "CCAGGTG": 2445, + "TATCTC": 2446, + "CAATGCA": 2447, + "CCCACTG": 2448, + "GTGTATT": 2449, + "CGACAGA": 2450, + "TGAGATA": 2451, + "CCAGGTT": 2452, + "TGTTTAA": 2453, + "CATCATG": 2454, + "TGATTCA": 2455, + "GCAATTA": 2456, + "GAAATGAA": 2457, + "CTTGGTT": 2458, + "GAAGATT": 2459, + "GGATTAA": 2460, + "CCTCATT": 2461, + "GGCCAGGCTG": 2462, + "GCTATTA": 2463, + "GCCAGCA": 2464, + "GAGACAGG": 2465, + "CTTGAGG": 2466, + "CAGTCTT": 2467, + "GTTCTCC": 2468, + "TATTTCAA": 2469, + "TGACGA": 2470, + "CATGAAAA": 2471, + "CATTATG": 2472, + "TAAATTTA": 2473, + "GAGTGAA": 2474, + "CAACAGG": 2475, + "TAAGCTT": 2476, + "CACATTTT": 2477, + "GATCTCA": 2478, + "TAGTCC": 2479, + "GACCCTG": 2480, + "TAATGCA": 2481, + "TAAGTC": 2482, + "TAATAATT": 2483, + "GAAGTAA": 2484, + "CAACTC": 2485, + "CATCATT": 2486, + "GACGAA": 2487, + "GAAACAAA": 2488, + "TATTTCTG": 2489, + "CATTAATT": 2490, + "CCACCCC": 2491, + "TAATATTTT": 2492, + "GTTTAAAA": 2493, + "GTATCTG": 2494, + "GTCAAAAA": 2495, + "GATGCTG": 2496, + "TGTTCTG": 2497, + "GGTCAAA": 2498, + "GTAGGAA": 2499, + "GTATATG": 2500, + "TGATCTG": 2501, + "GGGGCTG": 2502, + "GCATCAA": 2503, + "GCCAAAAA": 2504, + "CCACGA": 2505, + "GCTAATG": 2506, + "CAGAGAAA": 2507, + "CCTTCTG": 2508, + "TCCTCTA": 2509, + "GCAGGTT": 2510, + "CTCACTG": 2511, + "TAGATTA": 2512, + "GCCGAGA": 2513, + "CCATCCA": 2514, + "CTTTACA": 2515, + "GTACATG": 2516, + "GCACCAA": 2517, + "CTTTGTA": 2518, + "CTATGTG": 2519, + "TCACTTTT": 2520, + "TGAGTC": 2521, + "CAAGAAAA": 2522, + "CTGACTG": 2523, + "GTTTTTTTT": 2524, + "GCATAAA": 2525, + "TAATCTG": 2526, + "GAAAAAAAA": 2527, + "CAGGATG": 2528, + "TGAGCCA": 2529, + "GAATTCA": 2530, + "TCAGACA": 2531, + "GTTCCAA": 2532, + "TCAGGTT": 2533, + "CAAACTG": 2534, + "CATTTCTT": 2535, + "TGTTAAAA": 2536, + "CCAGACA": 2537, + "CAAGTTA": 2538, + "CATGTTA": 2539, + "CATTCTA": 2540, + "TCTTTTTG": 2541, + "TGAGGGG": 2542, + "CACATTA": 2543, + "TAAAATAAA": 2544, + "GCATATA": 2545, + "TGTTCTA": 2546, + "GAAGGGG": 2547, + "GAGTGTG": 2548, + "TAAGACA": 2549, + "GAACTC": 2550, + "CCAGTAA": 2551, + "GAGAGAGG": 2552, + "GCGACC": 2553, + "CAATTCA": 2554, + "CGGCTG": 2555, + "CCAGATT": 2556, + "CCTGGG": 2557, + "GGAAGAAA": 2558, + "GAGAGG": 2559, + "TCAAAATG": 2560, + "CCTCATG": 2561, + "TAAAGG": 2562, + "CTTTGGA": 2563, + "CCAGGGA": 2564, + "GTACAGA": 2565, + "CTGAGGCAGGA": 2566, + "TGTTTCTT": 2567, + "CCAGGCTG": 2568, + "CTGAGG": 2569, + "GAGGCTG": 2570, + "CTCCTGGG": 2571, + "GAAGTC": 2572, + "CGACC": 2573, + "GGACTCA": 2574, + "GGAGTC": 2575, + "CACAATT": 2576, + "GTGTTCA": 2577, + "GACTAAA": 2578, + "GTCATTA": 2579, + "CAAAATTA": 2580, + "TGAAGAAA": 2581, + "GCACCTT": 2582, + "GTTTGCA": 2583, + "TCCTGCC": 2584, + "GTAGATG": 2585, + "GCCTGCA": 2586, + "GAGTTAA": 2587, + "TCCCTTA": 2588, + "GTGGTTA": 2589, + "TCGGGA": 2590, + "TACATAA": 2591, + "TCTCTCCA": 2592, + "CACTAAA": 2593, + "TATATATATATA": 2594, + "GTGGCAA": 2595, + "CACCATG": 2596, + "TTTGAAAA": 2597, + "CACACTG": 2598, + "CTTGGTG": 2599, + "TACACTG": 2600, + "CCTCCAA": 2601, + "CAACCTT": 2602, + "CAGCCAA": 2603, + "TTTTCAAA": 2604, + "TGATAGA": 2605, + "TACACTA": 2606, + "TCTGGG": 2607, + "TCCCAGCA": 2608, + "TAGGAAAA": 2609, + "CTTGGGG": 2610, + "TCTGTGAA": 2611, + "CCTTATT": 2612, + "CATTTAAA": 2613, + "TTTTATTTTA": 2614, + "GCCCTCC": 2615, + "CTGAGCA": 2616, + "CCCGTG": 2617, + "GTAGTGA": 2618, + "TCCTATT": 2619, + "GAAGGTG": 2620, + "TGTGCTG": 2621, + "TCCACTG": 2622, + "TAATCTA": 2623, + "TGATGTA": 2624, + "GTGGTAA": 2625, + "TAATGGA": 2626, + "GATGAAAA": 2627, + "GTAGTAA": 2628, + "GTGGGGA": 2629, + "GTGTCAA": 2630, + "CAGACTG": 2631, + "TCGAAAA": 2632, + "CTCATTA": 2633, + "TAATAATA": 2634, + "CTCAGAAA": 2635, + "CATCCTT": 2636, + "CCGCTT": 2637, + "GGAAGG": 2638, + "CCGTGA": 2639, + "CCACTCC": 2640, + "CTAGAGA": 2641, + "TAGAATG": 2642, + "GGATTTA": 2643, + "TTAATTTT": 2644, + "GCTAATA": 2645, + "TCCCCCA": 2646, + "CAAATATT": 2647, + "GATCATG": 2648, + "TCTTAATT": 2649, + "CAGTATT": 2650, + "GTCTTGAA": 2651, + "CCGAAA": 2652, + "CTATTCA": 2653, + "TAAGATA": 2654, + "CTTGCAA": 2655, + "GCCCCAA": 2656, + "TCCCTAA": 2657, + "GAAGTTA": 2658, + "GATGATG": 2659, + "CTTGATG": 2660, + "CCCTAAA": 2661, + "CCTGCCTG": 2662, + "GACATTTT": 2663, + "CCAGCCA": 2664, + "TGTGTGTGTG": 2665, + "GTCTATA": 2666, + "TCTCTGTT": 2667, + "GTCTGTA": 2668, + "TATAATA": 2669, + "CTTGTTTT": 2670, + "CGCCATT": 2671, + "CTCAGCA": 2672, + "TACAGTT": 2673, + "CAAGAGG": 2674, + "GGAAGCA": 2675, + "GCCTTTA": 2676, + "CCCCATT": 2677, + "CAACGA": 2678, + "GTCATTTT": 2679, + "CCCGCA": 2680, + "CAGTTAA": 2681, + "GAATCTT": 2682, + "CATGTTTT": 2683, + "CCGGGG": 2684, + "CTACTGA": 2685, + "TCACGA": 2686, + "TAAATTTG": 2687, + "GCCCATT": 2688, + "CTCTAGG": 2689, + "GGACCTG": 2690, + "TCAGGGA": 2691, + "GAGACTG": 2692, + "CCAAAAAA": 2693, + "GCCGG": 2694, + "CCAGGGG": 2695, + "TCAGAAAA": 2696, + "CATCTGA": 2697, + "TCTTCAAA": 2698, + "CTACAGG": 2699, + "GAGGCAGG": 2700, + "CATTGTA": 2701, + "TAAATCAA": 2702, + "GACTCTT": 2703, + "CTGATTA": 2704, + "GCATATG": 2705, + "GGACCTT": 2706, + "CAAGACA": 2707, + "TATTTATG": 2708, + "TATTTTAAA": 2709, + "CCGAGA": 2710, + "TCATTTTA": 2711, + "CTCACTCA": 2712, + "CCACCCA": 2713, + "CTCTAGA": 2714, + "CTACATG": 2715, + "GTGCTTA": 2716, + "CAACCTG": 2717, + "TCTGTGTT": 2718, + "TAAATATG": 2719, + "CAAAGG": 2720, + "CCCTGTT": 2721, + "GTTCGG": 2722, + "TGATAAAA": 2723, + "CACGAA": 2724, + "GTTGAGG": 2725, + "CAGAGTGA": 2726, + "GAAATTAA": 2727, + "CACATA": 2728, + "GAACAGG": 2729, + "TCTCCTGA": 2730, + "CCTGAGG": 2731, + "GGAGGCCAA": 2732, + "GTTTACA": 2733, + "TAACAGG": 2734, + "TGTGGTG": 2735, + "GCCTCCCAAA": 2736, + "CCATCCTG": 2737, + "GATTCTT": 2738, + "GAATGGA": 2739, + "GTAGTCA": 2740, + "CTCCTCTG": 2741, + "GAAAGAAAGAAAGAAA": 2742, + "CCCTGTG": 2743, + "CAGTATG": 2744, + "GCGATA": 2745, + "GGACTC": 2746, + "GAAAGA": 2747, + "TGTTGG": 2748, + "GTAGCTT": 2749, + "CATTTTAA": 2750, + "CCCTCTG": 2751, + "GCATTCA": 2752, + "CGATTA": 2753, + "TCACATA": 2754, + "TAATGAAA": 2755, + "GGAATTA": 2756, + "CTGTCAA": 2757, + "TAAATTAAA": 2758, + "CAAGTC": 2759, + "GTATTCA": 2760, + "GGCCATG": 2761, + "CTTTAGA": 2762, + "TGTTTCC": 2763, + "CATGTA": 2764, + "GAATAAAA": 2765, + "CAACTAA": 2766, + "TCATCTA": 2767, + "CACTCTT": 2768, + "CAGTTTG": 2769, + "CATAAAAA": 2770, + "GCATGCA": 2771, + "GATTTA": 2772, + "GAACCAA": 2773, + "TCTGTGA": 2774, + "TCAGCCA": 2775, + "TCTCCACA": 2776, + "TCTCAGCTCA": 2777, + "TATCATG": 2778, + "GCACTTA": 2779, + "CGCCAGG": 2780, + "CGGGG": 2781, + "CATTAAAAA": 2782, + "TTTGTTA": 2783, + "GGATATA": 2784, + "TCGACC": 2785, + "TAATCCA": 2786, + "CCGC": 2787, + "CATTGTT": 2788, + "CCAGTTA": 2789, + "GTAGTTA": 2790, + "CTAGGAA": 2791, + "CCTAATT": 2792, + "TCATGGG": 2793, + "GAACTAA": 2794, + "GCTATTTT": 2795, + "CCGTCA": 2796, + "CAGATTA": 2797, + "CCATATA": 2798, + "CAACTTA": 2799, + "TCAGTTTT": 2800, + "CTACCTT": 2801, + "GCACTC": 2802, + "GTGTGGA": 2803, + "GTGCCAA": 2804, + "GACAATG": 2805, + "GACAATT": 2806, + "GTACCTT": 2807, + "TAAACATT": 2808, + "CAGGAGG": 2809, + "GTGCGA": 2810, + "GAAAATTA": 2811, + "TCTCTTAA": 2812, + "CCGATT": 2813, + "GATGATT": 2814, + "CCATGGG": 2815, + "TCGGTA": 2816, + "CCATATG": 2817, + "CCAGTCC": 2818, + "GCCTTAA": 2819, + "TGATCCA": 2820, + "GTTGCAA": 2821, + "GTAGAGG": 2822, + "CAGATTTT": 2823, + "GTACTTA": 2824, + "TCTTTCTTTCTTTCTT": 2825, + "GCTCTGTG": 2826, + "TCAATAA": 2827, + "GTTTAGA": 2828, + "GTTCGA": 2829, + "CAAGGTT": 2830, + "CTCATTTT": 2831, + "CACAGG": 2832, + "CATGCTG": 2833, + "GAACGG": 2834, + "TATAAAAA": 2835, + "GAAGGCA": 2836, + "GAGCATT": 2837, + "TGTTTGTG": 2838, + "GCTGTTA": 2839, + "GTCACTG": 2840, + "CAAATGAA": 2841, + "GTGACTG": 2842, + "GTTCTTTT": 2843, + "CAGGCTGGAGTGCAGTG": 2844, + "TGATGAAA": 2845, + "TAACGG": 2846, + "CTACTAA": 2847, + "GACATTA": 2848, + "GGACGA": 2849, + "GAGCATG": 2850, + "GCATGGG": 2851, + "CCACTTA": 2852, + "CTATCAA": 2853, + "GCTGTTTT": 2854, + "GTCGTG": 2855, + "CCTGGCC": 2856, + "TCTCTGAA": 2857, + "TGTTGTA": 2858, + "CAGCCAGG": 2859, + "GTTTAGG": 2860, + "CCGCAA": 2861, + "GGAGTAA": 2862, + "CCAATTA": 2863, + "CAGCAAAA": 2864, + "TCATCCA": 2865, + "CACGTA": 2866, + "TCATAGA": 2867, + "TAATTAAAA": 2868, + "CACTTAA": 2869, + "TCTTTATT": 2870, + "GAGATTA": 2871, + "TAAGAGG": 2872, + "CAAATTAA": 2873, + "GACGCA": 2874, + "CACGGA": 2875, + "GTGTGCA": 2876, + "TCT": 2877, + "TATTATTA": 2878, + "GAAATATT": 2879, + "GGAGTTA": 2880, + "TCTTTGA": 2881, + "CTGATTTT": 2882, + "TGTGAATT": 2883, + "TCCCACC": 2884, + "CCCTTTG": 2885, + "CAAGGTG": 2886, + "CAGAGTT": 2887, + "CCCCATG": 2888, + "CTACCAA": 2889, + "CTCCAAAA": 2890, + "CTTCCCC": 2891, + "CTGCTAA": 2892, + "GATTAAAA": 2893, + "GCTTATG": 2894, + "CTACTTA": 2895, + "TAAAAAATT": 2896, + "TCAGTCC": 2897, + "CTATTAAA": 2898, + "GAATGGG": 2899, + "CACAGTA": 2900, + "CAACGG": 2901, + "GGTTATT": 2902, + "TCACCCA": 2903, + "TGATGCA": 2904, + "TAATTTTTT": 2905, + "GTTTGAGA": 2906, + "GTATTAAA": 2907, + "GCCCCCA": 2908, + "TATAGTA": 2909, + "TAGTAAA": 2910, + "TGATACA": 2911, + "GTGGTTTT": 2912, + "CCACTAA": 2913, + "CACAGAGA": 2914, + "CCTCTGCCTCC": 2915, + "CAAAAAAAA": 2916, + "CTCTCTCC": 2917, + "CATAATA": 2918, + "GAAGCCA": 2919, + "GTTCCCA": 2920, + "TGTGTTTG": 2921, + "CAATGGA": 2922, + "TGAAGTA": 2923, + "CTTCATA": 2924, + "CACTGTG": 2925, + "GCTCTTTT": 2926, + "TGACATA": 2927, + "TAAAGAAAA": 2928, + "GAGAAATG": 2929, + "CAGGGAGG": 2930, + "TGTTCAA": 2931, + "GAGCCAA": 2932, + "GACAGAGA": 2933, + "GGCTGAA": 2934, + "CAAATATA": 2935, + "GTGGAAAA": 2936, + "TAAGGTT": 2937, + "GTGATTA": 2938, + "GGATCTG": 2939, + "GATGTTA": 2940, + "GACTACACA": 2941, + "TCCTATA": 2942, + "CTGCCAA": 2943, + "TCCCGA": 2944, + "GTGATTTT": 2945, + "GCGTTTT": 2946, + "CAGAGTA": 2947, + "GAAAGGAA": 2948, + "CACTTTG": 2949, + "CCCCAAAA": 2950, + "GCAACCCA": 2951, + "TGCATTTT": 2952, + "TCTAGAA": 2953, + "TACTTTG": 2954, + "TGAGGCA": 2955, + "CATCTCC": 2956, + "TCGCTA": 2957, + "TGACTTTT": 2958, + "GAGCCTG": 2959, + "CATTTGTT": 2960, + "TCTTTGTT": 2961, + "GCAAAATT": 2962, + "CCTGATT": 2963, + "GATAAAAA": 2964, + "GAGTGTT": 2965, + "TCCTGTA": 2966, + "TACAGAAA": 2967, + "TCCAGGAA": 2968, + "GCCAGTG": 2969, + "TAGATTTT": 2970, + "TAATAGG": 2971, + "CTCCTCA": 2972, + "CATTTTTG": 2973, + "CATTTCAA": 2974, + "GCCATCA": 2975, + "TAAAATATA": 2976, + "GACTGTT": 2977, + "GCATGGA": 2978, + "CAAAGTT": 2979, + "CATGATT": 2980, + "GAGTTTG": 2981, + "CTAGCAA": 2982, + "CTTCCTA": 2983, + "GGGGAGG": 2984, + "CTATATG": 2985, + "TATTTATTTT": 2986, + "CACCATT": 2987, + "CCCTCAA": 2988, + "TTTTTTTTTTTTTT": 2989, + "GATCATT": 2990, + "GTACATA": 2991, + "CTCCATA": 2992, + "CCCCGTCTCTA": 2993, + "GCCTGCC": 2994, + "CTAGCTT": 2995, + "CCCGGA": 2996, + "GATGTTTT": 2997, + "GTATTTTA": 2998, + "TCAGATA": 2999, + "CCTGGAA": 3000, + "TATTCCA": 3001, + "GGACCAA": 3002, + "GCCATTA": 3003, + "CGACTGA": 3004, + "TAAGCTG": 3005, + "TAAACACA": 3006, + "GTTTCTC": 3007, + "CATCTTA": 3008, + "GAAATTTG": 3009, + "TAATGGG": 3010, + "TAAAATTTT": 3011, + "CTGTTCA": 3012, + "CCTGTTA": 3013, + "TACTGAA": 3014, + "TGACCCA": 3015, + "TGATTTTA": 3016, + "CTCCTTA": 3017, + "TATAGAA": 3018, + "CTGCGG": 3019, + "GCGGTA": 3020, + "GTGCTAA": 3021, + "CAGAGGAA": 3022, + "TACATCA": 3023, + "TCAATCAA": 3024, + "CTGCAGCC": 3025, + "TGAATATT": 3026, + "TCTACAA": 3027, + "CCACATA": 3028, + "CCCGTT": 3029, + "TATACACA": 3030, + "TCCTCTC": 3031, + "TCTACTT": 3032, + "CCGGAA": 3033, + "CTTTTTTA": 3034, + "GAAAGAAAA": 3035, + "CTATCTT": 3036, + "GACTTTG": 3037, + "TGAACAA": 3038, + "GCAGTTTT": 3039, + "GCTAAAAA": 3040, + "GAGGCGG": 3041, + "TAATAAAAA": 3042, + "CTGGTCA": 3043, + "CAGACAA": 3044, + "GGATATG": 3045, + "TGAAGG": 3046, + "GCCAGAA": 3047, + "CCAGGCC": 3048, + "CCACCATG": 3049, + "CAAACTT": 3050, + "TCATGTA": 3051, + "GCTGCTT": 3052, + "GTAATA": 3053, + "CCCCCAA": 3054, + "CAGCCTG": 3055, + "TCAACTT": 3056, + "TAAAATTAA": 3057, + "GCTGAAAA": 3058, + "CGACGA": 3059, + "GTGGGCA": 3060, + "TGAGGGA": 3061, + "CGCTCC": 3062, + "TTTTGTTTT": 3063, + "GAGTCAA": 3064, + "TCATGCA": 3065, + "CTGCTTA": 3066, + "TAAGTTTT": 3067, + "GTAGCAA": 3068, + "CCTTGG": 3069, + "TGACAAAA": 3070, + "CTGGTAA": 3071, + "TCTTTATA": 3072, + "TGTGTGTT": 3073, + "CTGGTC": 3074, + "CTGGCAA": 3075, + "CATTTCTG": 3076, + "CTCTACC": 3077, + "CTGAGGA": 3078, + "CTAAAATG": 3079, + "CTAGATT": 3080, + "GTATCAA": 3081, + "CAGTCAA": 3082, + "CTGGGTG": 3083, + "CCTCTTA": 3084, + "TGAGTTTT": 3085, + "TTTTATTTA": 3086, + "CCTTTTTT": 3087, + "TATATACA": 3088, + "TAGCAAA": 3089, + "AAATTA": 3090, + "CTGGATG": 3091, + "GATAATA": 3092, + "GACAAAAA": 3093, + "CCTGGGA": 3094, + "GCTTTCA": 3095, + "GTACAGG": 3096, + "GCTGGAA": 3097, + "CTACTCA": 3098, + "CAATGTA": 3099, + "GCGTGAA": 3100, + "GATCCTT": 3101, + "TATTAATG": 3102, + "GCCCGA": 3103, + "TAAAGTG": 3104, + "GCTTCCA": 3105, + "CATGGAA": 3106, + "TGAAGTT": 3107, + "CTTTCTC": 3108, + "TCTGTGTG": 3109, + "GTATGTA": 3110, + "CAATACA": 3111, + "TCAAGG": 3112, + "CCTCTAA": 3113, + "TGTGGG": 3114, + "GATCTGA": 3115, + "GTACTGA": 3116, + "TTAATTAA": 3117, + "GCAGAAAA": 3118, + "CTACATA": 3119, + "CCGGTG": 3120, + "GGGGAAAA": 3121, + "TACAAAAAA": 3122, + "TTTTGG": 3123, + "GTGAGAA": 3124, + "TCAATAAA": 3125, + "TCAAGTT": 3126, + "CTCAGGA": 3127, + "CTACTC": 3128, + "CAAATCA": 3129, + "GGCAGAA": 3130, + "CCCGAA": 3131, + "TGTTGTG": 3132, + "GAGCAAAA": 3133, + "TATTTGTG": 3134, + "GTAGGTT": 3135, + "CTACCTG": 3136, + "CACAAAAA": 3137, + "CTCAGG": 3138, + "GCTTTA": 3139, + "CAGAGCAA": 3140, + "CTCAGTG": 3141, + "GGAAGAGA": 3142, + "TAACCTG": 3143, + "GAAATATA": 3144, + "CGAGAA": 3145, + "GTGAGG": 3146, + "CATTTATA": 3147, + "GGCAGCA": 3148, + "TCTAAATT": 3149, + "CCCAGTG": 3150, + "GCCTAGG": 3151, + "TGCATTA": 3152, + "CCGTAA": 3153, + "CATTCCA": 3154, + "CTAGTTA": 3155, + "GACTTAA": 3156, + "CTATACA": 3157, + "GACACAA": 3158, + "TCTTCACA": 3159, + "CCGGTT": 3160, + "TAAAGTAA": 3161, + "CTGTGGA": 3162, + "TAAGGTG": 3163, + "TCCAGTA": 3164, + "CAAATTTA": 3165, + "AAATTAAAA": 3166, + "CCATCTA": 3167, + "CTCCCTT": 3168, + "CTCCTTTT": 3169, + "GAGAGAGAGAGA": 3170, + "GGAGATA": 3171, + "CCTATTA": 3172, + "CACCAAAA": 3173, + "CCGTTA": 3174, + "TGTTTATA": 3175, + "CTCAGGAGG": 3176, + "GACGTA": 3177, + "GTCCTTA": 3178, + "GAAAGTT": 3179, + "GCTGGTG": 3180, + "CTCTACA": 3181, + "CAATAGA": 3182, + "TAAAATATT": 3183, + "GTACCTG": 3184, + "GTACTAA": 3185, + "CTTTGAAA": 3186, + "CCTTTCC": 3187, + "TAAAAATTA": 3188, + "CTCGG": 3189, + "CAAGATA": 3190, + "CATTTGA": 3191, + "CACCTCA": 3192, + "GCCAGCC": 3193, + "GTCGG": 3194, + "GCACATA": 3195, + "CACTCAA": 3196, + "CTTTTAAAA": 3197, + "CAGGAATT": 3198, + "GCCTATT": 3199, + "TCTTTCTG": 3200, + "CTGAGGCAGGAGAA": 3201, + "CAGGCAGG": 3202, + "CTAGTAA": 3203, + "TCCATA": 3204, + "GAACTTA": 3205, + "CG": 3206, + "GCTGTGA": 3207, + "GAAAATA": 3208, + "TCTTCATT": 3209, + "GAGGGAGA": 3210, + "CCCATCC": 3211, + "GAGGTGGG": 3212, + "GCCTCTA": 3213, + "GTAGGTG": 3214, + "TAAACCA": 3215, + "GAAGGAAA": 3216, + "TATTGG": 3217, + "ATG": 3218, + "TCCAGTT": 3219, + "CCCACAA": 3220, + "GAAACACA": 3221, + "GTCTCAAAA": 3222, + "CTTTTCTTTT": 3223, + "TGAAGGA": 3224, + "TATTGATT": 3225, + "CTATGTA": 3226, + "AAAAAAAAAAAAAA": 3227, + "TCCTTAAA": 3228, + "GCGCTA": 3229, + "TCCACTT": 3230, + "GACTCAA": 3231, + "TAAATACA": 3232, + "TCATGGA": 3233, + "TCTGGGA": 3234, + "TCCTATG": 3235, + "CTGTGCA": 3236, + "TCAAGTGA": 3237, + "TCATAAAA": 3238, + "CATCCAA": 3239, + "CCTTCCA": 3240, + "CTGTACA": 3241, + "GAAGGTT": 3242, + "CTGTGTA": 3243, + "GTCACTT": 3244, + "TCACAAAA": 3245, + "TCAGGCA": 3246, + "GTGTTAAA": 3247, + "CCCTTAA": 3248, + "CAAAGTG": 3249, + "GAAATGTT": 3250, + "CTGGGGA": 3251, + "GACGCC": 3252, + "TATATGTG": 3253, + "CTAGATG": 3254, + "GAAATTAAA": 3255, + "GAATGCA": 3256, + "GCACTAA": 3257, + "CGGGAGG": 3258, + "GCCACAA": 3259, + "CGCTTA": 3260, + "TCCACAA": 3261, + "CAGATA": 3262, + "TCTGAATT": 3263, + "TATTATTTT": 3264, + "GCGCGG": 3265, + "CTCTGAAA": 3266, + "TCTCTTTG": 3267, + "TATTTCTA": 3268, + "GGGGTGGG": 3269, + "GGATGCA": 3270, + "CCACACC": 3271, + "TAAATGTG": 3272, + "TCTTCCTG": 3273, + "GCAAGG": 3274, + "CTGCTCC": 3275, + "CTGGAGTG": 3276, + "CTGTTAAA": 3277, + "CACACAAA": 3278, + "CTGACTT": 3279, + "GAAAAGAAAA": 3280, + "CCTTCTCC": 3281, + "GAAATAAAA": 3282, + "CCTCAGGTGA": 3283, + "GATAATG": 3284, + "GAATTGCTT": 3285, + "CCAAAATT": 3286, + "CGTGAAA": 3287, + "CACTGAAA": 3288, + "CAGTGAAA": 3289, + "GATCTTA": 3290, + "GAGATGGG": 3291, + "TCTGCCA": 3292, + "TGAGGTA": 3293, + "TATGGAA": 3294, + "TATATTTTA": 3295, + "TGAACTT": 3296, + "GCAGATA": 3297, + "CTTTTCTT": 3298, + "GTAAAATG": 3299, + "TCTCTAA": 3300, + "TCTGCAAA": 3301, + "GAGCCTT": 3302, + "TATCATT": 3303, + "CAATTTTA": 3304, + "CCGCCA": 3305, + "TATTTAAAA": 3306, + "GAGAGATG": 3307, + "GAGATGGA": 3308, + "GCCAGGATG": 3309, + "CGAGTAGCTG": 3310, + "TTCATTTT": 3311, + "TATACTT": 3312, + "GTCTACA": 3313, + "GTGAGTGA": 3314, + "GCTACACA": 3315, + "GGGAGGA": 3316, + "CAAGGCA": 3317, + "GCTTTTAA": 3318, + "CACTATT": 3319, + "GTTCATA": 3320, + "TCCTC": 3321, + "GTGGACA": 3322, + "TATTTGGA": 3323, + "CTCCAGTA": 3324, + "GTTCAGTT": 3325, + "CCAAGG": 3326, + "CAGAGCC": 3327, + "CTCGCC": 3328, + "CCGATG": 3329, + "GGAATTTT": 3330, + "TCCAGCC": 3331, + "CCTCTTTT": 3332, + "GAACCTT": 3333, + "CATGCACA": 3334, + "GTTTC": 3335, + "GAAGATA": 3336, + "TACCCC": 3337, + "GCTGCCA": 3338, + "GGGGGAGG": 3339, + "GCAGTGAGCTGA": 3340, + "CTGTCTA": 3341, + "CGAGGA": 3342, + "CAATGGG": 3343, + "GCTGTGAA": 3344, + "GAAAGTG": 3345, + "TACCAAAA": 3346, + "GTCAGG": 3347, + "CAGCTCC": 3348, + "TGTGCTT": 3349, + "GTCTAGG": 3350, + "TTTTTGTA": 3351, + "TTATATG": 3352, + "TCAGGGG": 3353, + "TATTGTTA": 3354, + "CCTGAGA": 3355, + "TATCTCA": 3356, + "CAATCTG": 3357, + "CACTCTG": 3358, + "GATTTAA": 3359, + "TGAATAA": 3360, + "TCTTGTA": 3361, + "TCAACTG": 3362, + "TCTCCAGG": 3363, + "CTAGAGG": 3364, + "CTGAGAAA": 3365, + "CTAGCTG": 3366, + "TCCACCA": 3367, + "CGATTTT": 3368, + "CCGGCC": 3369, + "GTTGACA": 3370, + "CTTAGAA": 3371, + "CATAATG": 3372, + "GAGTATT": 3373, + "CACAGAAA": 3374, + "GACTGTG": 3375, + "CTATTTTA": 3376, + "TGAGGAAA": 3377, + "TTATTAAAA": 3378, + "CTTATTTA": 3379, + "CAGACTT": 3380, + "CACGCC": 3381, + "GCTTGG": 3382, + "CCTGCTT": 3383, + "TAAAGCAA": 3384, + "CCTCGTGA": 3385, + "TAGAATT": 3386, + "CTTACAA": 3387, + "TAAAGGAA": 3388, + "GTCTAGA": 3389, + "GTGACTT": 3390, + "TACATATG": 3391, + "GTCAGGA": 3392, + "GCTCCAGG": 3393, + "GAAGGGA": 3394, + "CATGATG": 3395, + "TCATCAAA": 3396, + "CGTTAAA": 3397, + "GTACTCA": 3398, + "CTCCCAA": 3399, + "TATATGTA": 3400, + "GGTATTTT": 3401, + "TAAGCCA": 3402, + "CGAAATT": 3403, + "GTTTGTTTT": 3404, + "TCTGTCTT": 3405, + "TATATCA": 3406, + "TGTTCATT": 3407, + "CAAACCA": 3408, + "TTCATTA": 3409, + "TATTTGTA": 3410, + "GATTGAA": 3411, + "CTATAAAA": 3412, + "GATTAATT": 3413, + "CCCACCA": 3414, + "TCCTAGG": 3415, + "TAAATGTA": 3416, + "CTCTTAAA": 3417, + "GCAGTCC": 3418, + "GCGGCTG": 3419, + "GTCTCGAA": 3420, + "TGAATGA": 3421, + "CTGGGGG": 3422, + "GTCTCGA": 3423, + "GAACAAAA": 3424, + "TGAATCA": 3425, + "TGTATTTTTAGTAGAGA": 3426, + "GTTATTAA": 3427, + "TTTTTTAAAA": 3428, + "GTCAGTG": 3429, + "CCCATTA": 3430, + "CACAGGA": 3431, + "TATTCCTT": 3432, + "TCTGCCTT": 3433, + "CCTGGTG": 3434, + "GCGAGC": 3435, + "TACTAAA": 3436, + "TACACAAA": 3437, + "CCGTCC": 3438, + "GCTTTGTT": 3439, + "GCATCCA": 3440, + "CATCTAA": 3441, + "GCTGTGTT": 3442, + "GTAGACA": 3443, + "GCCTATG": 3444, + "TCTTTGTG": 3445, + "GATTCTG": 3446, + "CGCCCGG": 3447, + "GATGAGA": 3448, + "TATCTGA": 3449, + "TGAATTTG": 3450, + "CCTGATG": 3451, + "TAAAACAA": 3452, + "CTTTAGG": 3453, + "TTTTCCTT": 3454, + "TGAATAAA": 3455, + "CGGGGA": 3456, + "CAAACATT": 3457, + "GTATGGA": 3458, + "GCTTAAAA": 3459, + "TACCAAA": 3460, + "CAAAGAGA": 3461, + "CTCCTGCC": 3462, + "GTAAAAAAA": 3463, + "CACAGCC": 3464, + "CCATGCA": 3465, + "TACAATT": 3466, + "CTAGTGA": 3467, + "CTGAGTT": 3468, + "GAGTGAAA": 3469, + "TCTGTTTG": 3470, + "CTGTAGG": 3471, + "TATAAAAAA": 3472, + "GCATTAAA": 3473, + "GTCCATA": 3474, + "TGTTAAAAA": 3475, + "TGTTTGA": 3476, + "GAATAGA": 3477, + "CTTCAAAA": 3478, + "CTGGACA": 3479, + "CTGTAGA": 3480, + "CCATTAAA": 3481, + "CTATCTG": 3482, + "CACTATG": 3483, + "TTATCAA": 3484, + "TAAGTAAA": 3485, + "TAATCCCAGCACTTTGGGAGGCC": 3486, + "CCAGAAAA": 3487, + "TGAAGCA": 3488, + "TCCCTTTT": 3489, + "TCATACA": 3490, + "TACGTT": 3491, + "GCCGTG": 3492, + "GGAAGTG": 3493, + "GGCCAAA": 3494, + "GTACCAA": 3495, + "TCTCTACTAAAAATA": 3496, + "CATTGTG": 3497, + "TGTGTGA": 3498, + "GAAACAGA": 3499, + "CTTGACA": 3500, + "GATGAGG": 3501, + "GAGATTTT": 3502, + "CCTTCAA": 3503, + "GAATCTA": 3504, + "CTCTCCTT": 3505, + "GGCGGA": 3506, + "TCTATCTATCTATCTA": 3507, + "CACACAGA": 3508, + "TGTGTGTA": 3509, + "CAAAGCC": 3510, + "TGTGCCA": 3511, + "GTTGAAAA": 3512, + "CTCCAGCA": 3513, + "TCAAGGA": 3514, + "TAGCTCA": 3515, + "CGCTGA": 3516, + "CCTGAAAA": 3517, + "GACTATT": 3518, + "GATTCCA": 3519, + "GCTTCTA": 3520, + "GTCTGCC": 3521, + "CTTGGCA": 3522, + "TGTGGTA": 3523, + "GCTTTGA": 3524, + "GCTCTCTG": 3525, + "CTCACAGA": 3526, + "TCTTTAAA": 3527, + "CAAAGCAA": 3528, + "TACTTAA": 3529, + "GCTTCAA": 3530, + "CATTGAA": 3531, + "GGAGGAAA": 3532, + "CTATAGA": 3533, + "CTGAGGAA": 3534, + "CCTGGCA": 3535, + "CCCTATT": 3536, + "CTCGTG": 3537, + "TTACACA": 3538, + "TTAGGAA": 3539, + "CTGGTTA": 3540, + "GTTGTCC": 3541, + "TAATGAAAA": 3542, + "TATTTACA": 3543, + "GGGAATT": 3544, + "GTAGTTTT": 3545, + "GCTGCAA": 3546, + "CTACGG": 3547, + "GCCGGA": 3548, + "CTGGGCA": 3549, + "CCTTAAAA": 3550, + "GATGGAA": 3551, + "TAGATAGATAGATAGA": 3552, + "TATGTAA": 3553, + "GTACGG": 3554, + "TATTCAAA": 3555, + "GATCTCC": 3556, + "CCTGTTTT": 3557, + "TATTGCA": 3558, + "GGAAGGAAGGAAGGAA": 3559, + "GGTAATT": 3560, + "TTACAGA": 3561, + "TCAGC": 3562, + "GCAAAATG": 3563, + "GAGAGCA": 3564, + "GTAGAAAA": 3565, + "CATTTGAA": 3566, + "TCTTCTTTT": 3567, + "TCCCATA": 3568, + "GTTATTTA": 3569, + "CTATCTA": 3570, + "CATCCTG": 3571, + "TCTTGTG": 3572, + "TTATTATT": 3573, + "CCCGTC": 3574, + "TACTATG": 3575, + "TAAACATA": 3576, + "TAAGGAAA": 3577, + "GCTTGTG": 3578, + "CTCTAAAA": 3579, + "GTTTTAAAA": 3580, + "GACAGGA": 3581, + "TCCTAGA": 3582, + "TCCACCCA": 3583, + "GTTTGAAA": 3584, + "CCATCTCA": 3585, + "CTAAGAA": 3586, + "GTATCTA": 3587, + "GTGAGGA": 3588, + "GCTGGAGG": 3589, + "CCTGTAATCCCAGCTA": 3590, + "GCAACAA": 3591, + "CTTTCAAA": 3592, + "CAAATGTT": 3593, + "CTTGTCC": 3594, + "TCTCAAAAA": 3595, + "TATTTATTA": 3596, + "TAAGGCA": 3597, + "GAGAGGAA": 3598, + "TATGATT": 3599, + "GCATCTA": 3600, + "CGTTATT": 3601, + "GCCTGTA": 3602, + "GTTTCAAA": 3603, + "CCTTCCTTCCTTCCTT": 3604, + "GGCTTTG": 3605, + "GTCAGAA": 3606, + "CATGCATG": 3607, + "GTCATTTA": 3608, + "CTGGAAAA": 3609, + "CTTCGA": 3610, + "CCTATTTT": 3611, + "CCAACAA": 3612, + "TCCATCC": 3613, + "TAAAGTTA": 3614, + "GTCTCTC": 3615, + "TAATCAAA": 3616, + "GATTTTTG": 3617, + "GATTTCTT": 3618, + "GGGCTGA": 3619, + "GCATGTA": 3620, + "CCTGGGTT": 3621, + "GAGACAA": 3622, + "GCTGTCA": 3623, + "TGATAGG": 3624, + "GGAGACC": 3625, + "CCGGCA": 3626, + "TAATCTCA": 3627, + "TGAATTAA": 3628, + "TCTGGTG": 3629, + "GCCTC": 3630, + "GGCGCA": 3631, + "CCAGCTA": 3632, + "CAGTCTG": 3633, + "TGAACTA": 3634, + "GTAAGAA": 3635, + "CCTTTCA": 3636, + "TCCATGA": 3637, + "CAAAGGAA": 3638, + "CTCTC": 3639, + "CTCTCTCA": 3640, + "CTCCAGC": 3641, + "GTAGATA": 3642, + "CCCCCTCC": 3643, + "GGCGCC": 3644, + "TCTGTCC": 3645, + "GACCATT": 3646, + "CTTGAAAA": 3647, + "TTATCC": 3648, + "TACATGTG": 3649, + "CAAATTTG": 3650, + "TTTTGTG": 3651, + "CAGAGTG": 3652, + "GTAATAA": 3653, + "GTGAGTG": 3654, + "TTTTTCC": 3655, + "GGCTCTG": 3656, + "GCCCTAA": 3657, + "GGCTGTT": 3658, + "CCCAATT": 3659, + "CAGAGCTT": 3660, + "TATAAATG": 3661, + "GAGTCTG": 3662, + "TCTTAAAAA": 3663, + "GTTTTATG": 3664, + "GATCCAA": 3665, + "GGCCCTG": 3666, + "GATCCTG": 3667, + "TCAAGTG": 3668, + "GATTCAA": 3669, + "CCTCTCTT": 3670, + "GAGACGG": 3671, + "CAGATCA": 3672, + "TAAAAGAA": 3673, + "CTGAGCAA": 3674, + "CCTGCCA": 3675, + "CCTTCTA": 3676, + "CGCTCA": 3677, + "GGCTGTG": 3678, + "TGGGAAAA": 3679, + "GGAGCCTG": 3680, + "CTGAGTG": 3681, + "CGTCAAA": 3682, + "TCAAGTA": 3683, + "CGTAATT": 3684, + "TTACTTA": 3685, + "TATACTA": 3686, + "GGGCAAA": 3687, + "CAACTTTT": 3688, + "CTTTGCC": 3689, + "GCCAGGAA": 3690, + "CACACTA": 3691, + "GCCCAGC": 3692, + "TAAATAAATAAATAAA": 3693, + "CTTTCCTT": 3694, + "GGGAGAA": 3695, + "TATGGTA": 3696, + "CGGCCA": 3697, + "CCTCTCTG": 3698, + "GAAAGCAA": 3699, + "CAAGCCA": 3700, + "GGCGTT": 3701, + "CTCTTTTA": 3702, + "TCGGCCTCCCAAA": 3703, + "GATTTATT": 3704, + "CAAGTCC": 3705, + "TATCTTA": 3706, + "GTTCAAGACCA": 3707, + "CTCACACA": 3708, + "GAAATCAA": 3709, + "TGAGACC": 3710, + "GGGTAAA": 3711, + "GCTTGTT": 3712, + "GATTTTAA": 3713, + "TTTTTATA": 3714, + "CAGAGCTG": 3715, + "TCTGTTAA": 3716, + "GTAATTAA": 3717, + "TCTTTGAA": 3718, + "CTTGCCA": 3719, + "TTTTCATT": 3720, + "CCATGTA": 3721, + "TCTCGGCTCACTGCAA": 3722, + "GGATTCA": 3723, + "TCTATTAA": 3724, + "TACATAAA": 3725, + "GATTGATT": 3726, + "GGAGAGGA": 3727, + "CGCAAAA": 3728, + "GGACTAA": 3729, + "TTATGTG": 3730, + "GTCACTCA": 3731, + "GACAGCA": 3732, + "CGAGTT": 3733, + "GATGGTT": 3734, + "GGAAGAGG": 3735, + "GCCAACATGGTGAAA": 3736, + "GGAGCCA": 3737, + "TGAACTG": 3738, + "CCTCTGTG": 3739, + "GTATAAAA": 3740, + "TCCCAGAA": 3741, + "CATTTATG": 3742, + "GATTATG": 3743, + "TGTTTCTG": 3744, + "GAGTGGGTT": 3745, + "TACATATT": 3746, + "CTCCAGGA": 3747, + "GACACTG": 3748, + "GGTCTCA": 3749, + "CCGGGA": 3750, + "TGTTTAAA": 3751, + "CTCACCA": 3752, + "GGACTTA": 3753, + "GCCCACC": 3754, + "CAAATCAA": 3755, + "GAAATGTG": 3756, + "TAGTTAA": 3757, + "TCTATAA": 3758, + "TTAGATT": 3759, + "GTGTAGG": 3760, + "TACTGAAA": 3761, + "GCACCCA": 3762, + "GTGGGCTG": 3763, + "GAATGAAA": 3764, + "TCTAGTT": 3765, + "TCAGGAGA": 3766, + "TCCACTA": 3767, + "CTCAGTT": 3768, + "TACTTAAA": 3769, + "GACTCCA": 3770, + "TCCATTTG": 3771, + "CACAGCAA": 3772, + "GCTCATGCCTG": 3773, + "GGTGCTG": 3774, + "GCTTTCTT": 3775, + "GTGGCCA": 3776, + "TACGTG": 3777, + "GTGCAGTG": 3778, + "TGAAGTCA": 3779, + "CCTTTAA": 3780, + "TCTCAGCTCACTGCAA": 3781, + "GAAATATG": 3782, + "CCTCAAAA": 3783, + "GGGGCGG": 3784, + "CGACAA": 3785, + "GGTGATG": 3786, + "GTCTTAAA": 3787, + "CAGAAATG": 3788, + "CGTCATT": 3789, + "CCAAGCA": 3790, + "GGATCAA": 3791, + "GTGCTGGGATTA": 3792, + "GCTGGCC": 3793, + "CGGAGCTT": 3794, + "TACATGA": 3795, + "TGTTTGAA": 3796, + "TCTCCATT": 3797, + "TAAGCAAA": 3798, + "CCTTTCTT": 3799, + "TACTGTT": 3800, + "TCCATCTT": 3801, + "CTTACTT": 3802, + "CGGAGGTT": 3803, + "CAAAACAA": 3804, + "TCATAGG": 3805, + "TTACTAA": 3806, + "CTTATTTG": 3807, + "GAATGTA": 3808, + "CCCCATGGA": 3809, + "TTACTGA": 3810, + "CGGAAAA": 3811, + "CTCCAGTG": 3812, + "TGTTCCA": 3813, + "CAGATGAA": 3814, + "GTTGATA": 3815, + "TCCCCCC": 3816, + "CATTGCA": 3817, + "CTCAGCC": 3818, + "CTTACTG": 3819, + "TATCCTT": 3820, + "CTTTTATG": 3821, + "TGAGTAGCTG": 3822, + "GACTGAAA": 3823, + "CAATGAAA": 3824, + "CGACTG": 3825, + "CTTGGGA": 3826, + "GCAAGCA": 3827, + "TCACTCC": 3828, + "GATTTGA": 3829, + "CATTTTAAA": 3830, + "TCAACTA": 3831, + "GTCCAAAA": 3832, + "CACCCTG": 3833, + "TTACCTT": 3834, + "CAAGGGG": 3835, + "TTTTGGA": 3836, + "GTTATTTG": 3837, + "GCTACTG": 3838, + "CTGAGGCAGGAGAATG": 3839, + "GTGATGA": 3840, + "GTAGTC": 3841, + "TAGTATG": 3842, + "GTATAGA": 3843, + "GTGTCTA": 3844, + "GCTGCTA": 3845, + "TTAGTAA": 3846, + "TAAACATG": 3847, + "GTCACCA": 3848, + "CATCTTTT": 3849, + "CATATAA": 3850, + "TCTCTCTA": 3851, + "TTTTATTAA": 3852, + "TATTCTAA": 3853, + "GAAATTTA": 3854, + "CTTCCCTG": 3855, + "TAAAGATG": 3856, + "TACGTA": 3857, + "GTTTATTA": 3858, + "GAAAAGAA": 3859, + "CCCACCCA": 3860, + "CAATTAAAA": 3861, + "CCGACA": 3862, + "CAAAGTGA": 3863, + "CAAACAAAA": 3864, + "GCAATTTT": 3865, + "CGATTAA": 3866, + "TTAGAGA": 3867, + "CTGATGA": 3868, + "GGAGGAGG": 3869, + "GTCCTGGG": 3870, + "TCATGAAA": 3871, + "GCAACCA": 3872, + "GTTGGCA": 3873, + "GCGGCGG": 3874, + "GTCCCCA": 3875, + "GTAGGGG": 3876, + "GCCATGTT": 3877, + "GTTCGAGA": 3878, + "GCCTATA": 3879, + "TAAATTCA": 3880, + "GGCCATT": 3881, + "GAAAACAA": 3882, + "TGTGTATG": 3883, + "GTACTC": 3884, + "TAGGGAA": 3885, + "CCTTGAA": 3886, + "TCTATTTG": 3887, + "GAGGGCA": 3888, + "GAAACTGA": 3889, + "TACGC": 3890, + "TACAAAAA": 3891, + "TCATTATT": 3892, + "GGAAAATT": 3893, + "TCAATATT": 3894, + "CCCGTA": 3895, + "GGAGAGAA": 3896, + "TTAGTTA": 3897, + "CTCAGAGA": 3898, + "TCGAGC": 3899, + "CTAGTCA": 3900, + "GATGGCA": 3901, + "TGAACATT": 3902, + "CTATGGG": 3903, + "CACACCA": 3904, + "TCAATTAA": 3905, + "GGAACTG": 3906, + "TTACATG": 3907, + "CTTTCATT": 3908, + "CAGCTCTG": 3909, + "TCTTTTTTTT": 3910, + "TAAATCTT": 3911, + "TGATCTA": 3912, + "CATACAA": 3913, + "GCTCAAAA": 3914, + "GCTGTGTG": 3915, + "TCAATCA": 3916, + "GATTTGAA": 3917, + "CCAAGGA": 3918, + "GTCCTCA": 3919, + "GTGCTCC": 3920, + "AAAATAA": 3921, + "GTGACAA": 3922, + "GCTCACGCCTG": 3923, + "CGACGG": 3924, + "TATCCAA": 3925, + "CACACATG": 3926, + "TCTCTCTCC": 3927, + "TGTGGTT": 3928, + "CTTGGTA": 3929, + "TCTGGTT": 3930, + "TTTATAA": 3931, + "CTGCTTTT": 3932, + "TGTGTCA": 3933, + "CACATCA": 3934, + "CCTAATG": 3935, + "CGTTTTTT": 3936, + "GCTGGCA": 3937, + "GACGTC": 3938, + "TATAATTA": 3939, + "TACAGTAA": 3940, + "GAAAGTAA": 3941, + "GTCTGAAA": 3942, + "CCCATTTT": 3943, + "TATATGA": 3944, + "CTTGATA": 3945, + "CTTTATTTT": 3946, + "CTTTATTA": 3947, + "GGCGAA": 3948, + "CCATGCC": 3949, + "CCTGCCTT": 3950, + "GAAGAAGAAGAA": 3951, + "CTGACTGA": 3952, + "GCCCTTA": 3953, + "TATCTAA": 3954, + "GTGTTTTA": 3955, + "TGTGGCA": 3956, + "TATTGTAA": 3957, + "GCCAGAAA": 3958, + "CCCTGTCTC": 3959, + "CACAGGAA": 3960, + "AAAACAA": 3961, + "AAAAAAAAAAAAAAA": 3962, + "TAACTCC": 3963, + "GCCTAAA": 3964, + "CGAGTA": 3965, + "TAGTATT": 3966, + "GTATTTTTAGTAGAGA": 3967, + "GCTGCAGG": 3968, + "TATTGAAA": 3969, + "CCAGCCTGGG": 3970, + "GCTCCAAA": 3971, + "TACGAA": 3972, + "GGCCTCC": 3973, + "TATACAAA": 3974, + "CATGGCA": 3975, + "CATGCAA": 3976, + "TACACCA": 3977, + "CTTTACCA": 3978, + "TACAGAGA": 3979, + "TATTCTTA": 3980, + "TATGTCA": 3981, + "TCAAGCA": 3982, + "TCAATGA": 3983, + "GGCTCTT": 3984, + "GGAAGTT": 3985, + "TCCATGTT": 3986, + "GCTTTCC": 3987, + "TATGTGA": 3988, + "GTGTAGA": 3989, + "TTTTTAAAA": 3990, + "GCTGGAGA": 3991, + "GTGAGAGA": 3992, + "CCTAGAA": 3993, + "CCTCCAAA": 3994, + "CCAATGA": 3995, + "CAGGGCA": 3996, + "CTATGCA": 3997, + "CTTCACC": 3998, + "CTACAAAA": 3999, + "CTCACC": 4000, + "GAGTATG": 4001, + "TAGAAAAA": 4002, + "CTTTTGAA": 4003, + "TAAAGAGA": 4004, + "CATGTCA": 4005, + "TCTTTTAAA": 4006, + "CACAGTGA": 4007, + "GATCTAA": 4008, + "TAAGGTA": 4009, + "CATAGAA": 4010, + "CGCGCC": 4011, + "CAGCTTA": 4012, + "TATAGTT": 4013, + "CGGGCC": 4014, + "TATCCATT": 4015, + "TGTTTGTTTT": 4016, + "GCTGGCTG": 4017, + "TACAGGA": 4018, + "CTCCTTTG": 4019, + "CAATCTA": 4020, + "CCCCCTG": 4021, + "TATACTG": 4022, + "CTGAGCC": 4023, + "CGGTTA": 4024, + "TGAAGTG": 4025, + "GCTTCCTT": 4026, + "TTTTATTTG": 4027, + "TAGTGAA": 4028, + "CTGAGGTG": 4029, + "TCTTCTC": 4030, + "GACAGAAA": 4031, + "CTGAACTGAA": 4032, + "CCTGGGAA": 4033, + "TCCCCAAA": 4034, + "TATGTATT": 4035, + "GATTTCTG": 4036, + "CATTCAAA": 4037, + "CACAGTT": 4038, + "GCTTGAA": 4039, + "GTGGATCA": 4040, + "CTGAGTGA": 4041, + "TGAATTTA": 4042, + "TCAACAAA": 4043, + "GGTCATT": 4044, + "GTAATTTA": 4045, + "GCGACTT": 4046, + "CTGAGAGA": 4047, + "GTGCCCA": 4048, + "CTAGGTT": 4049, + "TCCTGAAA": 4050, + "GTCCACC": 4051, + "TCACAGAA": 4052, + "GCGAAAA": 4053, + "GTATGGG": 4054, + "TGAACAAA": 4055, + "TAAACAAAA": 4056, + "CCGTTTT": 4057, + "TCTCAATT": 4058, + "TCCAGAAA": 4059, + "GTAACAA": 4060, + "GCATTTTA": 4061, + "TCTCCATG": 4062, + "TTATAAAA": 4063, + "CAGGCAA": 4064, + "CTAAAAAAA": 4065, + "GTTGGGA": 4066, + "TAAAGATT": 4067, + "TGAAGAGA": 4068, + "CCCCTCA": 4069, + "TGTTTATG": 4070, + "TCTACTG": 4071, + "CCAATTTT": 4072, + "GGTGGTG": 4073, + "GGAACAA": 4074, + "TGTGGGA": 4075, + "TCTGCTA": 4076, + "GAACGA": 4077, + "GTAAGTA": 4078, + "GTTGCCA": 4079, + "AAAATTTT": 4080, + "GCGCGA": 4081, + "GAAAGATG": 4082, + "GTCTCTCA": 4083, + "TCCATCAA": 4084, + "GCAGCTA": 4085, + "CACATTTG": 4086, + "CTGACAA": 4087, + "TCCACC": 4088, + "GCT": 4089, + "CCCACTT": 4090, + "GCAGGTA": 4091, + "GAGGCCA": 4092, + "TAAAGTCA": 4093, + "CTGGATA": 4094, + "CGGCAA": 4095 + }, + "merges": [ + "A A", + "T T", + "T G", + "C A", + "C C", + "T A", + "G G", + "T C", + "G A", + "AA A", + "G C", + "T AA", + "TT TT", + "T CA", + "TG A", + "TT A", + "G AA", + "T CC", + "C AA", + "C TG", + "C TT", + "G TG", + "G TT", + "G CA", + "GG A", + "C CA", + "G TA", + "G CC", + "C TA", + "T AAA", + "AA AA", + "C TC", + "G TC", + "TG TG", + "TA TT", + "CA CA", + "G AAA", + "TA TA", + "TC TT", + "TG TT", + "C AAA", + "GA GA", + "CA TT", + "TG AA", + "CA GG", + "TC TG", + "CA GA", + "TC AA", + "GG AA", + "TAA AA", + "C TGA", + "GC TT", + "G TGA", + "GC TG", + "C TCA", + "CC TT", + "CA TG", + "GC AA", + "G TCA", + "G TAA", + "TTTT A", + "TA TG", + "GA GG", + "C GG", + "GA TT", + "CC TG", + "TC TC", + "CC AA", + "G TTA", + "C TCC", + "C TAA", + "TA CA", + "C TTA", + "TC CA", + "GA TG", + "TT AA", + "GAA AA", + "TT TG", + "G TTTT", + "TC TA", + "GC CA", + "G TCC", + "C TTTT", + "GG GG", + "C GA", + "TT TA", + "CC CA", + "CAA AA", + "TG GG", + "TA GA", + "TA GG", + "GA CA", + "GG TT", + "CC CC", + "GG TG", + "CA TA", + "GC TA", + "TG TA", + "TC AAA", + "TG GA", + "TAA TT", + "TTA TT", + "TG CA", + "GG CA", + "GA TA", + "CC TA", + "TT CA", + "TC TCA", + "GG GA", + "C GC", + "CTG AA", + "G TAAA", + "TC TCC", + "TTTT TT", + "C GTG", + "GC AAA", + "TAA AAA", + "TC TGA", + "TCA TT", + "GG AAA", + "TG AAA", + "TCC TT", + "CC AAA", + "GAA TT", + "C TAAA", + "C GTT", + "GTG AA", + "GG CC", + "TAA TA", + "GG TA", + "TG CC", + "CA CC", + "TGA TT", + "AAAA AA", + "GC TCA", + "TCC AA", + "GA GAA", + "CTG TT", + "TA TTA", + "CA GCA", + "CTC TT", + "CTT AA", + "CA GAA", + "GC TGA", + "GTT AA", + "TC TTA", + "TA TTTT", + "GCC AA", + "CTT TG", + "GA CC", + "C GCA", + "GTA TT", + "GTC TT", + "CAA TT", + "GTG TT", + "CTC AA", + "GGA GG", + "C GAA", + "TC TTTT", + "GTC AA", + "C GCC", + "TA TAA", + "TA CC", + "TC TAA", + "CCA TT", + "C GGA", + "CAA AAA", + "CA GTG", + "TCC TG", + "CTC TG", + "GAA AAA", + "CTG TG", + "CA GC", + "TTTT AA", + "GCA TT", + "GCC TT", + "TAA TG", + "CTA TT", + "GTT TG", + "TGA TG", + "GG CTG", + "CC TCA", + "GA GGA", + "GCC TG", + "AAA TT", + "C GTA", + "TC AAAA", + "TA CAA", + "CA TCA", + "CA GTT", + "TGA GA", + "GG GAA", + "CA CTG", + "CA CAA", + "CA GGA", + "CC CCA", + "CC CTG", + "TTTT TTTT", + "TA GAA", + "GA GCA", + "CC TCC", + "CA CCA", + "TA TCA", + "GA GC", + "CA TTA", + "CACA CACA", + "GA GTG", + "GGA TT", + "TGTG TGTG", + "TA CTT", + "CA CTT", + "GTC TG", + "TGA GG", + "GA GTT", + "GAA TG", + "TCA TG", + "GA CAA", + "GA CTT", + "TATT AA", + "TAA TAA", + "GG CCA", + "CA TTTT", + "CA GCC", + "CC CTT", + "GC TAA", + "TATA TATA", + "GTG TG", + "TA CTG", + "TA GTT", + "CAA TG", + "GC TC", + "CA GTA", + "GC TCC", + "CA TAA", + "TTA TG", + "TAAA TT", + "GA TGA", + "CA TGA", + "GC GG", + "AAAA AAAA", + "CCA TG", + "GA TAA", + "GA CTG", + "TA TGA", + "GCA GG", + "GA TCA", + "G TTTTA", + "GGA TG", + "CC TGA", + "G TAAAA", + "GAA GG", + "GA TTA", + "CC TC", + "GA CCA", + "GC TTA", + "CC CAA", + "AAA TG", + "GCA TG", + "TA GTA", + "TA CCA", + "GG CTT", + "C GTC", + "TC TCTT", + "GG TCA", + "TTA TTA", + "TA CTA", + "TA GCA", + "TA TC", + "CTG GG", + "CA TC", + "C TTTTA", + "C TAAAA", + "GTG GG", + "GA GTA", + "CCA GG", + "GA TTTT", + "TA GTG", + "GAAA TT", + "CA CTA", + "TC GG", + "TCA GG", + "CAGG AA", + "GC AAAA", + "CC TTA", + "CA TCC", + "CTT GG", + "TGTG AA", + "TATT TG", + "CC TAA", + "CTA TG", + "GA GAAA", + "GAGA GAGA", + "GC TTTT", + "TA TAAA", + "CAA GG", + "TC TCTG", + "TGTT AA", + "TGTG TT", + "GA GCC", + "GA CTA", + "TA TATT", + "TAA AAAA", + "TTTT TG", + "GTA TG", + "CATT AA", + "TA GGA", + "TA GC", + "GTT GG", + "GAA GAA", + "TAAA TG", + "TC TGTT", + "CA GAAA", + "CAAA TT", + "TAA TTA", + "TC TGTG", + "TA TCC", + "TGAA TT", + "CTC CA", + "GTG AAA", + "GG CAA", + "GGA GA", + "GAA GA", + "GG TGA", + "GG GCA", + "CC AAAA", + "TCTC TCTC", + "CTG CA", + "CTT CTT", + "TCTT AA", + "CC CTA", + "TGTG TG", + "AAA TA", + "TGTT TG", + "GG GTT", + "GTG CTG", + "GG AAAA", + "GG GGA", + "TCA GA", + "CC TTTT", + "GAAA TG", + "GCA GCA", + "TC TGAA", + "GG GTG", + "CACA TT", + "TCTT TG", + "GG GC", + "TCC CA", + "TC CATT", + "CTG AAA", + "CTT TA", + "TC GA", + "GTT TA", + "CAA CAA", + "CTT CC", + "GCC TCC", + "TT AAA", + "GC TCTG", + "GTT TCA", + "GGA GGA", + "C GTGA", + "CA GTC", + "GAA TA", + "CA GAGA", + "CC CTC", + "CAAA TG", + "CTG CTG", + "GA TCC", + "TTTTA TT", + "AAAA TT", + "TTA TA", + "TCAA TT", + "GG TAA", + "GTTA TT", + "GC CAGG", + "GGA GAA", + "CATT TG", + "TCA CC", + "CTC AAA", + "GG TTA", + "TCC AAA", + "TC TATT", + "GCA GA", + "CTT CA", + "TCA TCA", + "C GAGG", + "TAA CA", + "GTT GTT", + "CTTA TT", + "C GTCA", + "TAA GA", + "TAA TTTT", + "CTG TA", + "TC CACA", + "GC TGTG", + "C GCTG", + "TC TAAA", + "GC GA", + "CAA TA", + "CCA CCA", + "GAA CA", + "C GAAA", + "CAGA TT", + "TCA CA", + "TTA TTTT", + "TC TCAA", + "TGA CA", + "CTCC AA", + "AAAA AAA", + "TATA TG", + "TCC TCC", + "TCA CTT", + "TC CAGG", + "CAA GA", + "GG CTA", + "GTG GTG", + "C GTAA", + "C GAGA", + "TGA TA", + "GGA TTA", + "CAA CA", + "C GATT", + "TGA GAA", + "CTCC TT", + "CTCA TT", + "GTT AAA", + "TCA TA", + "CC TCTG", + "CTC TA", + "GC TGAA", + "CTG GA", + "TAA GG", + "CTT AAA", + "TATT TA", + "CCA CA", + "CC GG", + "GTC AAA", + "TG GAA", + "C GGAA", + "TGA TGA", + "GTT CA", + "TAA CAA", + "GC TGTT", + "TAA GAA", + "CTG CC", + "TTAA TT", + "CCA GA", + "TCA GAA", + "GTCA TT", + "C GCTT", + "GATT AA", + "CTGA TT", + "GC CACA", + "GTAA TT", + "TC CAGA", + "GCC AAA", + "GTGA TT", + "TAAAA TT", + "CAA GAA", + "CCA CC", + "TAA TCC", + "GTT CTT", + "TC CATG", + "GC TCTT", + "TG CTG", + "GG GTA", + "TTA CA", + "GC CATT", + "GCA CA", + "GCAA TT", + "TCC CTG", + "TG TGA", + "TC GAA", + "GGA CA", + "GGAA TT", + "GTG GA", + "CTT CTG", + "TCC CC", + "GCC CC", + "CTT GA", + "TAA TGA", + "TAAA TA", + "TATA TA", + "CTG CAA", + "TCA TTA", + "GTA TA", + "TCC CCA", + "C GTTA", + "GCA GAA", + "TGA GTT", + "CTTTT TT", + "C GATG", + "CTT TCA", + "AAAA TG", + "CAGG TT", + "CTAA TT", + "C GCCA", + "TGAA AAA", + "GTT CC", + "GTCC TT", + "GTCC AA", + "GTTTT TT", + "CTC TGA", + "GC GC", + "GTT GA", + "TGAA TG", + "CTA TA", + "GCA GTG", + "CCTT AA", + "TCA CCA", + "TCA CTG", + "GCC CTG", + "TAA CTT", + "CAGA TG", + "GTA GG", + "TC TATA", + "GAGA TT", + "GTC TA", + "TTTT AAA", + "CACA TG", + "TGA CC", + "CA CAAA", + "GTG TA", + "GG GAGG", + "GCTT TG", + "CAA AAAA", + "GA GGAA", + "GTT CTG", + "TTTT TA", + "GTC TCA", + "GTT CAA", + "TC GTG", + "GCTT AA", + "GCA CC", + "CTCC TG", + "TAAA TAAA", + "CTA CA", + "CTT CCA", + "TCC TCA", + "C GCAA", + "GAA AAAA", + "GCC CA", + "TC GTT", + "GTA GA", + "CTC TCA", + "GTC CA", + "TGA CTT", + "TCC CTT", + "GC CATG", + "CACACACA CACACACA", + "GTGA TG", + "CC TCTT", + "GC CAGA", + "TCC TA", + "C GTTTT", + "GTA CA", + "GCA TA", + "GAA TTA", + "TGTGTGTG TGTGTGTG", + "CC CAGG", + "GG TTTT", + "TCAA AAA", + "TC TATG", + "CCA TA", + "TGA CAA", + "GGA TA", + "TCA GTG", + "GTA TTTT", + "GAGA TG", + "GC GTG", + "C GTCC", + "TTAA AAA", + "TAA TCA", + "CAA TTA", + "CCA CTG", + "CGG TT", + "GTT GAA", + "TGA TTA", + "CCTT TG", + "CGG TG", + "CAGG TG", + "TCAA TG", + "CTGA TG", + "TCA GGA", + "GTT TAA", + "TATT AAA", + "CTC TTA", + "GCA GGA", + "CTC TCC", + "GAA CC", + "CTT TAA", + "GG GCC", + "GTA TTA", + "GC GCC", + "CCAA TT", + "GC TAAA", + "TGA CTG", + "GATT TG", + "GA TAAA", + "TCA GCA", + "GTT CCA", + "GAAA TA", + "GA CAAA", + "GA GTC", + "GC TATT", + "TCA CAA", + "GAGG TT", + "TAA CC", + "GAA GGA", + "GC TCAA", + "GAAAA TT", + "CCA GCA", + "GTTTT AA", + "GTG CC", + "TGA GGA", + "CA TAAA", + "GG TCC", + "TCA TTTT", + "TATT TATT", + "TAA TAAA", + "GCC TA", + "CTTTT AA", + "TAA GTG", + "TAA GTA", + "CTG GAA", + "CACA CA", + "GA CAGA", + "CAA CC", + "GG GAAA", + "CCA GAA", + "TCA GTT", + "TAA CTA", + "CTAA AAA", + "TGGG TT", + "TGA GTG", + "TAAAA TG", + "TATATATA TATATATA", + "GCA CTG", + "GA CTC", + "TA CAAA", + "TAAAA AAA", + "TC TACA", + "GTT GTG", + "TC GCC", + "CC CAAA", + "GTCA TG", + "CTG CTT", + "GGAA TG", + "CTA TTA", + "GA TATT", + "TA GAAA", + "GG CAGG", + "GA TGAA", + "GTA GAA", + "TCC TGA", + "TAA CTG", + "GCTG GG", + "GCAA TG", + "GCC CCA", + "GTT TGA", + "CATT TA", + "GTG CA", + "CTT GAA", + "GTG GAA", + "CTT CAA", + "TAAA TTA", + "GTG GCA", + "TCC TTA", + "GGAA AAA", + "TTTT TTA", + "CC TGTG", + "GTAA TG", + "GTG TTA", + "CTA GG", + "CAGG CTG", + "GA CACA", + "GAAAA AAA", + "TC GC", + "GTAA AAA", + "TGTT TA", + "TCTC TA", + "GTCC TG", + "CCA GGA", + "GAA CAA", + "TAA GTT", + "TGA GCA", + "GC TCCA", + "TAA GCA", + "CTCA TG", + "GTC TTA", + "CC CACA", + "CA TATT", + "GCC TCA", + "CA CTC", + "CTT CTA", + "TGA TTTT", + "TC GCA", + "CC TGTT", + "GAA GCA", + "GCAA AAA", + "GC GGA", + "CCA CAA", + "GC GCA", + "CA TATA", + "GA CATT", + "GTT CTA", + "CAAAA TT", + "GAAA GAAA", + "CC CGG", + "TA CACA", + "CCAA AAA", + "GAGG TG", + "GG CTCA", + "CA GTGA", + "TCC CAA", + "TA TCTT", + "TGA GTA", + "TC GTA", + "TTTT CTT", + "GTG GGA", + "GA GCTG", + "CC CTCC", + "TAGG TT", + "TTA GG", + "TAA TATT", + "CCA GCC", + "CA TCTT", + "GTC TGA", + "GTT TCC", + "CC TGAA", + "GGA GCA", + "GAAAA TG", + "TCA GTA", + "TAA CCA", + "GA TGTT", + "CTG TTA", + "CA TGTT", + "GG CGG", + "CA TGTG", + "GG GAGA", + "CTT TGA", + "TCTT TCTT", + "AAAAAA AAA", + "GGGG TG", + "CTT TCC", + "CTT GTT", + "GCA TTA", + "CC CAGA", + "CAAA TA", + "TC GGA", + "CA GCTT", + "TCA CTA", + "TAA TTAA", + "TAA GGA", + "GAA CTG", + "GCA CAA", + "GC GTT", + "GG CTC", + "TC TTTTA", + "CC TCCA", + "GG CAAA", + "CA GCTG", + "CTA CAA", + "TA CATT", + "GC TATG", + "CTT GTG", + "GA GTCA", + "GTTA TG", + "CTG CCA", + "GTC TCC", + "TGA CCA", + "CA CCTG", + "TATA TTA", + "TGA TCA", + "CA GCAA", + "GA TGTG", + "GTC TTTT", + "CTA GAA", + "GC TACA", + "CTG GGA", + "GGGG TT", + "CAA GTA", + "CAA GGA", + "CC CTCA", + "TA GCC", + "GTT GGA", + "GC TATA", + "TCTG AAA", + "TA TGTT", + "CC CCTT", + "GTT GTA", + "CC CTGA", + "TGA CTA", + "CAA GCA", + "CAA TAA", + "GAA CTT", + "CA TGAA", + "CTTA TG", + "CTAA TG", + "TC TAAAA", + "CCAA TG", + "GAA GTG", + "CC TCAA", + "CC CATT", + "CA GTCA", + "GAGAGAGA GAGAGAGA", + "TA TGTG", + "GCA GTGA", + "TCTCC TT", + "TCC CAAA", + "CCA TTA", + "CCA GTG", + "GCA TCA", + "TCAAA TT", + "GA TCTT", + "GA CAGG", + "GGA GTG", + "GTA GTA", + "CAA CTT", + "GAA GTT", + "CC CCTG", + "TCTC AAA", + "GG GTC", + "GA GCTT", + "TATG AAA", + "TA TGAA", + "GA CATG", + "CAA GTG", + "GA TATA", + "CA TCTG", + "CTG TGA", + "TAA TTTA", + "GG CAGA", + "GC GAA", + "CC TAAA", + "CCA TCA", + "CA CTGA", + "GGA CTA", + "GA CGG", + "CTC TTTT", + "CTG TCA", + "TCTCTCTC TCTCTCTC", + "TTAA TG", + "GCA GCC", + "CAAAA AAA", + "GCA CCA", + "CTA TTTT", + "GA GCAA", + "CTT GGA", + "CTG GTG", + "GAA TAA", + "TCC TTTT", + "GAA GTA", + "CA GTAA", + "CAA CCA", + "CTG TAA", + "TGA TAA", + "GCA GTT", + "CA CGG", + "TAAA TAA", + "CTG TTTT", + "CTA CTA", + "GC TCTA", + "C GAAAA", + "CAA GTT", + "CTT GTA", + "GAA TGA", + "GA GTGA", + "GCC TGA", + "GG TTTG", + "CC CATG", + "GG GGAA", + "GAA GAAA", + "TG TTA", + "CAA TTTT", + "TATA TTTT", + "CTC AAAA", + "GG TGGG", + "CC GTG", + "TATT TCA", + "CC CCAA", + "TATT TAA", + "GG CTGA", + "GG TGTG", + "CA TCAA", + "CA CTCA", + "TCTCA TT", + "GAA TTTT", + "GAA TCA", + "CAGG AAA", + "CA TACA", + "TA TTTTA", + "TTA TAA", + "GAGG AAA", + "CA TATG", + "CTT TCTT", + "CAA CTG", + "GG GCTG", + "CC CCCA", + "TTTG AAA", + "CATT AAA", + "CTT AAAA", + "GA CTGA", + "CAA TGA", + "GG CACA", + "CCA GTA", + "GGA TGA", + "GTTTT TG", + "GCA TTTT", + "GTG CCA", + "GCA GTA", + "GCC CTT", + "TC GTC", + "GAA CTA", + "GTG GTT", + "GTG TGA", + "GTG CTT", + "C GCTA", + "GTG TCA", + "TCTT TA", + "GCC TTA", + "CC TATT", + "CAAAA TG", + "GAA CCA", + "CTC CAGG", + "GA CTCA", + "CATG AAA", + "GC TAGG", + "TGTT AAA", + "GC GTA", + "GCA CTT", + "TCTT AAA", + "TAA GAAA", + "GG CCTG", + "TCC CTA", + "GTG GTA", + "CTG CTA", + "GGA GTT", + "GG TAAA", + "CAAA CAAA", + "GA TATG", + "TCA TGA", + "GA CCTT", + "TAA TATA", + "GC TAGA", + "GGA CTG", + "GG CATT", + "CA GTTA", + "CC CTAA", + "CA CCTT", + "GG TGAA", + "CA GCTA", + "GTG TTTT", + "CAA CTA", + "GA TCAA", + "GA GAAAA", + "TGTG AAA", + "AAAA TA", + "GATG AAA", + "CTC TAA", + "TTA CTT", + "GA TCTG", + "CCA CTT", + "GA GTTA", + "CAA TCA", + "GGATTA CAGG", + "TTTA TTTT", + "TACA TA", + "TTTTA TG", + "GA GTAA", + "GCTG AAA", + "GTA CTG", + "GC TCTC", + "TATG TA", + "TGTG TA", + "TCA TAA", + "GGA CTT", + "TCTCC AA", + "GCA TGA", + "GA CGA", + "CGCC TG", + "GA CCTG", + "GG TCTT", + "CA CCAA", + "GA TC", + "GA CCAA", + "AAAA TTA", + "GTAAA TT", + "CCA GTT", + "CA GAAAA", + "TAA CAAA", + "GG TGTT", + "GAAA TTA", + "TGCC TCA", + "CC GCC", + "CCA TTTT", + "CTT GCC", + "TCTG TA", + "CTG GCA", + "GG GATG", + "CCA TGA", + "CTA CTT", + "TAGG TG", + "TAAAAA TT", + "GAAA GAA", + "TAAAA TA", + "CTTTT TG", + "GTC AAAA", + "GGA CAA", + "TCTGA TT", + "CTC TCTT", + "TAA TTTG", + "CTC TTTG", + "GG CCTT", + "GGA TTTT", + "CTA CTG", + "GTT GCA", + "GG CTCC", + "CTC TGTG", + "CTC CAGCC", + "TTA CAA", + "GGA CCA", + "GGAA GGAA", + "TAAA GAA", + "TTA GAA", + "GTG AAAA", + "CTT GCA", + "TGGG TG", + "GGA GCC", + "CC TCTA", + "C T", + "GG GCTT", + "GG CATG", + "CTG GTT", + "TA CAGA", + "GATT AAA", + "CTC TGTT", + "TTA TCA", + "CTG AAAA", + "GTA GTT", + "GG GTCA", + "G T", + "CA GCCA", + "GC GTC", + "CA CTTA", + "GTG CTA", + "TC TTATT", + "GTA CTT", + "GG TATT", + "TA GAGA", + "TA CATG", + "CCA CTA", + "TGA GAAA", + "CAA TAAA", + "TCC AAAA", + "CGTG AA", + "GG TCTG", + "CTGAA TT", + "TCA GCC", + "CC TCTC", + "GTT AAAA", + "GG GATT", + "TCC TAA", + "CA CTAA", + "GGA GAAA", + "CCTT CCTT", + "GTT TCTT", + "TA TCAA", + "GA TACA", + "TAATCC CAGCA", + "CC GCA", + "TGAAA TT", + "C GTAAA", + "CTC TCTG", + "TC TTTTTT", + "GTA CAA", + "CCAAA TT", + "TGTA TTTT", + "TC GCTT", + "GG GTGA", + "GA TAGA", + "CTT TATT", + "TAAA CAA", + "GTT TATT", + "TGAA TA", + "CTA CCA", + "GTG TCC", + "CC CGA", + "TTTA TTA", + "CTCC AAA", + "TTTTTTTT TTTT", + "TCA TCC", + "GAA GCC", + "CTAAA TT", + "CAAA TTA", + "CCCC AAA", + "TCTT CTT", + "TAGG AAA", + "CA CGA", + "CA TTTTA", + "GTG CAA", + "TCTCC TG", + "TATTTT AA", + "GTT TGTT", + "GA GCCA", + "GG CCAA", + "CATT TCA", + "CA TCCA", + "CC TATA", + "GA CTTA", + "TCAAA TG", + "GTA TCA", + "TAAA TTTT", + "CTGA GGCA", + "GCC CAA", + "GG TTAA", + "TA TCTG", + "TGA CAGA", + "GGA GAGA", + "GCTG CTG", + "CC CTTA", + "TCC TCTG", + "GTA GCA", + "CCTG AAA", + "CC GAA", + "TTTT TAA", + "CTA TAA", + "CCTG TA", + "TTA CTG", + "GTA TAA", + "GG CGA", + "GA CTAA", + "TCA GAAA", + "GTG TGTG", + "CAAA GAA", + "CC TATG", + "GCA GAGA", + "CC GTT", + "TTTTA TTTT", + "GGAA GAA", + "TTA CTA", + "GCC TGGG", + "TCC CTC", + "TCC TCTT", + "GGA TCA", + "GG TCAA", + "TC GAGA", + "TATT CTT", + "TA CTC", + "GTTAA TT", + "GC GAGA", + "CTTAA TT", + "TCC TTTG", + "GTC TAA", + "CA CCCA", + "GG GTTA", + "GG GCAA", + "GGAAA TG", + "GCAAA TT", + "TA GATG", + "GCA GAAA", + "AAAAAAAA AAAAAAAA", + "CC TACA", + "GGA GTA", + "TC TAATT", + "CAA CAAA", + "TA GATT", + "GG TTTA", + "CC TAGA", + "CTT TAAA", + "TA CTTA", + "TAA TGAA", + "CTA TCA", + "TA GTAA", + "CAGA GAA", + "CAA GAAA", + "GGGG AAA", + "CGTT AA", + "CGTG TT", + "TCTG TCTG", + "TTTTAA TT", + "CTG GCC", + "TAAA TGA", + "C GTCAA", + "TTA GTA", + "GTC TCTG", + "TTTT AAAA", + "CA GTTTT", + "CTT CCTT", + "TATA TAA", + "GC TTTTA", + "TTTT TCA", + "GG TC", + "TTA TTAA", + "TTTT GTT", + "CA TAGA", + "TA GGAA", + "GAGA GAA", + "GTA GCTG", + "TTA TGA", + "GTA GTG", + "GGA GAGG", + "CTC TGAA", + "TA GTC", + "GA CTCC", + "TCC CTCC", + "TAA TGTT", + "CA TCTA", + "GCCA CCA", + "GTA CTA", + "TGGG AAA", + "CGCC TT", + "GCC CGG", + "GGA GGAA", + "GTA CCA", + "CGC AAA", + "CA TAAAA", + "TAA CATT", + "GC TAAAA", + "TCTT CTG", + "GCC AAAA", + "GTA TGA", + "GTC TTTG", + "TA CTGA", + "TCC CAGG", + "TTA TTTA", + "TTA GTT", + "GGA CC", + "TA TAAAA", + "CAAA CAA", + "CTT CTC", + "TCTA TCTA", + "GAAA TAA", + "GTG TAA", + "CTT TGTT", + "GA TAAAA", + "GCC CAGG", + "GC GATT", + "AAAAAA TT", + "TA CAGG", + "GG CTAA", + "TA GCTT", + "GTC TCTA", + "CTCC TGA", + "GAA TAAA", + "TTA CCA", + "GG GACA", + "GCCA CTG", + "GTT TAAA", + "GTC TGTG", + "TGA CAAA", + "TACA TTTT", + "GCCA CC", + "TG TTTT", + "TA GCAA", + "TTA TAAA", + "GA CCCA", + "GCA GC", + "CAGA CAGA", + "CA CAAAA", + "GCC CTA", + "TATT AAAA", + "C GTATT", + "CCA TCC", + "TC GATT", + "GAA GGAA", + "GA TCCA", + "TATT TGA", + "GTGAA TT", + "TA CCTT", + "C GTCTT", + "CC TAGG", + "TC GAAA", + "CTT TCTG", + "TGAA GAA", + "TCTC TCA", + "GTC TCTT", + "GGA GGGG", + "GTC TGTT", + "CTA TGA", + "GGAAA TT", + "GCA CACA", + "GCC TTTT", + "CA GTCC", + "CTG GTA", + "GCA TCC", + "TA GTTA", + "GG CTTA", + "GA GTCC", + "TG AAAA", + "TAGA TAGA", + "TGTT TGTT", + "TA CTCA", + "CATT TAA", + "GA TTTTA", + "CA CTCC", + "GAAA CAA", + "GC GCTG", + "TCTT TCA", + "CTG TCC", + "GAA CTCA", + "CGG AAA", + "TATT GTT", + "GCA CTA", + "TATT CAA", + "GC GGGG", + "GTG GCC", + "TAATT AAA", + "TA CTAA", + "GC GGTG", + "TA CCAA", + "GG TATA", + "CTA GTT", + "GCA GAGG", + "CTTTT TTTT", + "TTTTTTTT TTTTTTTT", + "TACA GTA", + "CCA TGTT", + "TA GTGA", + "CGTG TG", + "GC TCTGA", + "CTT CCTG", + "TC GCTG", + "TAAA TCA", + "TCCAA TT", + "GTT TCTG", + "GAA GAGA", + "GG GTAA", + "CCA TAA", + "TTA TATT", + "C GAATT", + "CC GGA", + "TGA GCC", + "CC GTA", + "CAGA GGA", + "GTG TTTG", + "GA CAAAA", + "TTTTTT AAA", + "GTT GCC", + "GA GTTTT", + "TC AAAAAA", + "TGTT TCA", + "TA TCTA", + "TCTC TCC", + "CTC CACA", + "TAAA TATT", + "TTTT CTG", + "CTC TCAA", + "CCTT AAA", + "TCTTTT AA", + "GAA CAAA", + "TTA GCA", + "GCTCA TG", + "TAAA GTA", + "GGA TAA", + "TTATT AAA", + "CTC CATT", + "TCTC TGA", + "TTA TTTG", + "CCTG TAA", + "TTA TATA", + "GA CTTTT", + "TGTT GTT", + "GCAAA TG", + "CTT CAAA", + "GAA TATT", + "GAA TCC", + "CTC TTAA", + "GCA TAA", + "GAA TGAA", + "CTTAA AAA", + "TAAAAA TG", + "TTTTAA AAA", + "CTC TGGG", + "TGA TCC", + "GC TCTCA", + "CTC CAGA", + "GAGTG CAGTG", + "CAA TATT", + "TA GAAAA", + "GTAAA TG", + "TA GCTG", + "GC TCAAA", + "GCA GGAA", + "TA CCTG", + "GG GAAAA", + "TTTT CTA", + "GGGG GGGG", + "CC GA", + "CTT TGAA", + "GGA GGTG", + "TA GTCA", + "GG CCCA", + "TGA TGTT", + "CAAA TAA", + "TCTT CCA", + "GC GCTT", + "GTA TTTG", + "GTC TC", + "GAAA TCA", + "TGA TAAA", + "CATT CTT", + "TA TCCA", + "GCC TCTG", + "TGA GATG", + "C GCCAA", + "GTTTTA TT", + "TATA TATT", + "GTA GGA", + "GACA GAA", + "CTCCAGCC TGGG", + "GC GTGA", + "GG TATG", + "GAGG GAGG", + "TCA TTTG", + "CTA CC", + "TACA GAA", + "GG TAGA", + "GA TCTA", + "GTC CATG", + "TGA GGAA", + "TAA TAAAA", + "TAAA CTT", + "TCA CATT", + "GGA GGCC", + "TCA CAAA", + "CA CTTTT", + "CGG CC", + "CAA CAGA", + "GTA GAGA", + "GTTA TTTT", + "CGTT TG", + "TC GTCA", + "TCTG CTG", + "CAA CACA", + "GG TAGG", + "GCA GCTG", + "TAGTA GAGA", + "CAA GCC", + "GCA TTTG", + "TAA TATG", + "GCTT AAA", + "GCTT CTG", + "CTC TCCA", + "TCA TCTT", + "C GTCTG", + "TCA TTTA", + "CA TAGG", + "GC TCCTT", + "TGTT CTT", + "TACA TTA", + "CACA GAA", + "TAAA TATA", + "TA GAGG", + "GA TAGG", + "TCC TGAA", + "GGA GCTG", + "TGA TATT", + "TCA TTAA", + "CTTTT AAA", + "TC GTTA", + "TAAA CTA", + "GTT TGAA", + "TAAAA TTA", + "CA CCCC", + "TCA GAGA", + "CTCC TGCCTCA", + "TGA CATT", + "GTA TTTA", + "CTT CATT", + "GAAA CTG", + "TAA CACA", + "GTT CAAA", + "GGA GATG", + "TC GGCC", + "CAGCA TT", + "TC GATG", + "TATT CTA", + "CTG TGAA", + "TATT GAA", + "TTTT CCA", + "TATT TCTT", + "GGTG AAA", + "CTGA GAA", + "GCA CAGA", + "GC GAGG", + "CTG TGTG", + "TGAAA TG", + "TGA TGAA", + "GTCC AAA", + "CTCAA TT", + "TCCA GAA", + "GTA TATA", + "TAAA GTT", + "TCTC AAAA", + "TCCA TCA", + "GTC TGAA", + "TGA GAGA", + "TGA TTTG", + "TTA GCC", + "CTC CATG", + "TCC CTGA", + "GA GCTA", + "CCCC CCCC", + "GTG GAAA", + "CTG GGAA", + "CAA TGAA", + "CCA CACA", + "CTT TCAA", + "C GGAGG", + "TC GTGA", + "CCA GAAA", + "GTTTT AAA", + "TGTT GAA", + "TCC TGTG", + "CTAAA TG", + "TCC TTTA", + "GTC TGGG", + "TCTC TTTT", + "TA CGG", + "TATT GTA", + "TTA GTG", + "TTA CC", + "TAATCCCAGCA CTTTG", + "TCTG GAA", + "CTT CTCA", + "CGCA TT", + "TATT TAAA", + "TCA CACA", + "TAA TCAA", + "GC GAAA", + "GG GCCA", + "GTT CATT", + "GAGAA AAA", + "TTTT GTA", + "TA CTTTT", + "TC GAGG", + "GTGAA AAA", + "CAA TATA", + "TCC CATG", + "CAA TTAA", + "CTG GAAA", + "CCCA GCA", + "TCC CATT", + "TCC TGTT", + "CTC TTTA", + "TCC CCTT", + "GTT TCAA", + "GTC CAGG", + "GGAA GGA", + "TA GTTTT", + "TGA CCTT", + "GTGCTG GGATTACAGG", + "TATT TATA", + "TCTG CAA", + "CTGAA AAA", + "TATG TTA", + "CTT CACA", + "GCA CAGG", + "CCTG CTG", + "TTTT TTAA", + "GTTA TTA", + "CC CTTTT", + "TGA TTTA", + "TA CAAAA", + "TAA GTAA", + "TTTT TAAA", + "CA TCTC", + "GTG GTGA", + "GTG GAGA", + "CTC TGCA", + "GTTAA AAA", + "TACA TACA", + "CTT TGTG", + "GGA CACA", + "TCTGA TG", + "TA TTATT", + "TCTT CTA", + "CTG TGTT", + "TCA GCTT", + "CTT TATA", + "GG CGC", + "TCC CTCA", + "GTA CC", + "TGGA GAA", + "CAAAAA TT", + "TCTT TAA", + "CTC TCTC", + "TGA GTGA", + "GCA GCTT", + "CGGA TT", + "TA CGA", + "TCTT GTT", + "TC GTAA", + "GCC TGTG", + "TATT CTG", + "GG GATA", + "GG GTCC", + "TGA GATT", + "CTTTTA TT", + "TCC CACA", + "CATG GTG", + "TTA GGA", + "GAA CACA", + "TCA TAAA", + "CAA CATT", + "GG TCCA", + "GAA TTTG", + "TATTAA TT", + "TCC TGGG", + "GCA GCAA", + "CTC TTCA", + "GAA GAGG", + "TCTG TCA", + "CTGAA TG", + "CCA CAAA", + "GTG GAGG", + "TGA TTAA", + "CTCC CTCC", + "CACACACACACACACA CACACACACACACACA", + "GC GATG", + "CATT CTG", + "GTA GAAA", + "TCA TCAA", + "TTTT CAA", + "TATG TATG", + "CCAAA TG", + "TAA TTTTA", + "TAA GGAA", + "CTT GAAA", + "AAAAAAAA AAAA", + "GC TCCTG", + "GCA GATG", + "GAAAAA TT", + "GA CGC", + "GTG GGGG", + "GTCAA TT", + "CTT GCTT", + "TGA CACA", + "GTG TGTT", + "CCA GAGA", + "CCCA GCC", + "TAAA GAAA", + "GTC CATT", + "TAAA TTAA", + "CC CAAAA", + "GAA TTAA", + "TGAA TTA", + "TTTT TTTG", + "CCA GCTT", + "CAA TTTG", + "CTG TTTG", + "GTC TCAA", + "GTT TGTG", + "GG CATA", + "GG TACA", + "TGA TGTG", + "GATT TCA", + "TCTG CTT", + "GTAA TTA", + "TAA AAAAAA", + "GCC GCC", + "TGTGTGTGTGTGTGTG TGTGTGTGTGTGTGTG", + "GC GTCA", + "GC TCATT", + "GAA CCTG", + "TAAA CAAA", + "GTG CTGA", + "TCA GGAA", + "TCC TCAA", + "TCTA TTTT", + "TCTG TTTT", + "CAGA GCA", + "CCA GGAA", + "GTC TTTA", + "TCTT CAA", + "TCAAAA TT", + "GC TTATT", + "GTT CCTT", + "CA CCTA", + "TCA CTGA", + "GAA GCAA", + "TAAA GA", + "TCC TTCA", + "TCTCA TG", + "TCA GTGA", + "TACA CAA", + "CA CGTG", + "CC TAAAA", + "GCC TTTG", + "GG CTTTT", + "GTT GAAA", + "GTT CTC", + "CTA GA", + "CTA CAAA", + "GCA CAAA", + "TTA CATT", + "GG CCCC", + "TAA TGTG", + "CTG CCTT", + "TCC CAGA", + "GTGAA TG", + "GGA CAGG", + "GGA TGTG", + "GTT TATA", + "TGA CCAA", + "GTG GCTG", + "GTT CTCA", + "CTTA TTTT", + "CTG GAGA", + "TTA CAAA", + "GTC TTCA", + "CAA GAGA", + "CCA TTTG", + "TCA CAGA", + "CTA GTA", + "CA TTATT", + "TTA GA", + "GC TCTCC", + "GC GCCA", + "TATG TTTT", + "TCC TCCA", + "CAGAA AAA", + "GTG GGAA", + "TAA TCTT", + "TGA GTCA", + "CTG CTC", + "GTC TCCA", + "TCA TGTT", + "GTT TCCA", + "TAA GCAA", + "CTAA AAATA", + "TGA CTGA", + "TC GGTT", + "TTA GAAA", + "TAA GCC", + "TAAA GCA", + "CC TCTCC", + "CC TCCTT", + "TCA GATT", + "TATG AAAA", + "GCTGA TG", + "CATA TTTT", + "GC TCCAA", + "CGG CGG", + "CCA CTGA", + "CA GCAAA", + "CTG TCTT", + "CTA GCA", + "TC GGGG", + "CACA GCA", + "GC TGATT", + "CTA GGA", + "TAA CTC", + "TCA TATT", + "CCTT CTT", + "CTG CAAA", + "CC CGC", + "GG TCTA", + "CCCA GGA", + "GTG TCTG", + "TAATAA TAATAA", + "TCA CATG", + "CAA TTTA", + "TATATATATATATATA TATATATATATATATA", + "CCA CAGA", + "TCAA TTTT", + "GTA TTAA", + "GAA CATT", + "TCTC TTA", + "CTA TTTG", + "TCTT TCC", + "GGTT AAA", + "GC TAATT", + "CTG CTGA", + "TA CCTA", + "CAGG GTT", + "TC GCCA", + "CAAAAA TTA", + "CTT CTGA", + "GCA TGTG", + "CTA TTAA", + "GCA CATG", + "CAA CATG", + "TCA TGAA", + "GAA TGTT", + "GG GTTTT", + "CTG CCTG", + "GTC CACA", + "TAAA CA", + "CTC TGGA", + "GA CCCC", + "GG CAAAA", + "TCTG TTA", + "CTA GTG", + "CTA TATA", + "TCA GTCA", + "TAA CTAA", + "GAA GATG", + "GTC TTAA", + "CAA GGAA", + "GTAA AAAA", + "TCC CCTG", + "TC GCAA", + "TCTG CCTG", + "CC TTTTA", + "GTCC CAGCTA", + "TATA TATG", + "TATT GTG", + "TGTG TTTT", + "GC GCAA", + "CACA GTG", + "TAA GATT", + "CTC TGTA", + "GGAGG CTGA", + "GGA CAAA", + "TATTAA AAA", + "TC GTCC", + "TC GGAA", + "CTA TAAA", + "CTT CAGA", + "CTA GAAA", + "CATT CAA", + "CA CGCA", + "CAGGA TT", + "CCA TCTT", + "GTA GCC", + "GAA TTTA", + "CA CGC", + "CAA TCC", + "TGA GCAA", + "GAA GCTG", + "TCAA TTA", + "GAA GTCA", + "CTG CACA", + "CCA CGG", + "GGA TCTT", + "CTCCTGCCTCA GCCTCC", + "TAAA TGAA", + "CC GTC", + "TC GGTG", + "TTTTA TTA", + "GCA GGGG", + "GCA GGTG", + "TCTA TTA", + "TAA CTTA", + "CTAA TTTT", + "CC CGCC", + "TAA TACA", + "GGATT AAA", + "TCTC TCTG", + "GCTT CTT", + "CATT TATT", + "CCA GAGG", + "GGA CAGA", + "GCCAA TT", + "TCC CCAA", + "GTT GATT", + "GAA GAAAA", + "GCA TTTA", + "CTC TAAA", + "CACACACA CACA", + "CC TCAAA", + "TA TAATT", + "CAA TGTT", + "GCC CAGA", + "GTA TATT", + "CTAA AAAA", + "CCA CAGG", + "TAA GAGA", + "TCC TTAA", + "TA TTTTTT", + "GAA TATA", + "GGA TTTG", + "GTG TGAA", + "CTG GCTT", + "GC GGCA", + "TCC GCC", + "GCA TCTT", + "TC TAATA", + "CTG CATT", + "CTC TGCC", + "TCA CTCA", + "TCA GCAA", + "TATTA TG", + "CCA GCTG", + "GA TCTC", + "GCC TCTT", + "CTT CCAA", + "TCC TAAA", + "TCA TCTG", + "CTA TTTA", + "CTG CAGG", + "CAA GCAA", + "GC GGAA", + "GAAA TAAA", + "TAAAA TAA", + "TCA CCTT", + "CCA TGTG", + "GA CCTA", + "CAGA TGA", + "GTG GCTT", + "TTATTA TTATTA", + "TCC CGG", + "TATT TGTT", + "CTG TAAA", + "TCCA TCCA", + "CTG TATA", + "GTT TCTA", + "GTT GCTT", + "CCA TGAA", + "GC TCTTA", + "CTT CATG", + "GTT CCTG", + "GCTG GGA", + "TCA GAGG", + "CATT AAAA", + "TCA GTAA", + "GAA TGTG", + "CTTA TTA", + "GCA CTGA", + "TGA GGTT", + "CA TCAAA", + "CTT CTCC", + "GTT TATG", + "CTT TCCA", + "GTG CCTG", + "GAAA GGA", + "GCA TCTG", + "TA CCCA", + "TAA CAGA", + "AAAAAAAA AAA", + "CTA TGAA", + "CA GTAAA", + "TA GCTA", + "TC GTTTT", + "GTG TCTT", + "GA GCAAA", + "TC TAAAAA", + "GTT CACA", + "GAAA TGA", + "CAAA TGA", + "GCC CTGA", + "GTG TTTA", + "TCA TGTG", + "CATA TTA", + "TCAAAA AAA", + "TAA GTTA", + "TCTC TCTT", + "CCA GTGA", + "CC TCTGA", + "CAA GATG", + "GCC TGTT", + "GTT TGGG", + "CATT CATT", + "GCC CCTG", + "GTT CTGA", + "GC GGCC", + "GC GGTT", + "CAAAA CAAAA", + "TACA TATA", + "GAATT AAA", + "TCAA GAA", + "CTG TATT", + "TTTT TATT", + "GA TTATT", + "TCTAA TG", + "GTT GCTG", + "TGAA TGAA", + "TCA GCTG", + "CTT GATT", + "CAGAA TG", + "CTAA TTA", + "TATAA TG", + "GTTTT GTTTT", + "CCA GCCTG", + "TGA TGGA", + "GCA GATT", + "CTC TATT", + "GCA GTCA", + "TAA GTGA", + "CTA CACA", + "CGCA TG", + "TA GCCA", + "GTG GCTCA", + "CAAA TAAA", + "GTG CTCA", + "TTTT TTTTTT", + "TAA CATG", + "TCCCA GCTA", + "CAAA GTA", + "TCA TATA", + "CAGCA TG", + "TGA TCTT", + "CA TAATT", + "TGTG TTA", + "TTTT GAA", + "TTAA TTA", + "GATA TTA", + "TCA TTCA", + "TGA TATA", + "TGA CTCA", + "GA CGTT", + "TGA CATG", + "GTT GTGA", + "CA TTTTTT", + "GCC TGGA", + "CTA TGTT", + "CTT TGGG", + "GTC TCAAA", + "CTG GCTG", + "CCA CATG", + "GG CGTG", + "CTTAA TG", + "TAA GATG", + "GTA TAAA", + "TGTA TTA", + "TAA CTCA", + "GAGAGAGAGAGAGAGA GAGAGAGAGAGAGAGA", + "GCA TGAA", + "GTTAA TG", + "TCCA GGA", + "GAGA GAAA", + "TCTC TGTG", + "CTC TCTA", + "CCA CCTG", + "GCCA GGA", + "CTG GAGG", + "CCA TTTA", + "GTC TGGA", + "GCC CACA", + "TAGA GAA", + "CAA CTCA", + "GGCA GGA", + "TCTTA TG", + "CAAA GGA", + "GG TAAAA", + "GAGA GGA", + "GTC CAGA", + "GCC CTCA", + "GATA TTTT", + "CAGG GAA", + "CCA CATT", + "GA GGAGG", + "GAAA CTT", + "CA GAATT", + "TCA GATG", + "TATT TCC", + "TACA GTG", + "TGA GCTG", + "CCA TCTG", + "GAGAA TG", + "TCAA CAA", + "A TT", + "TAA CTGA", + "TGA GAGG", + "CA CTGAA", + "CCA CCTT", + "CTG CAGA", + "TCA CCAA", + "TGA GCTT", + "CAAA GCA", + "GG TTTTA", + "CGG GGTT", + "TCCAA AAA", + "TATG TATA", + "CCA GATG", + "TCCA TTTT", + "CTG CTCA", + "GA TAATT", + "CCA CCAA", + "CTCC TCC", + "GA GAATT", + "GAAA GTA", + "TAAAA TAAAA", + "CTT CTTA", + "CTG TTTA", + "GAA TCAA", + "GCA TGTT", + "GCA CGG", + "GA CTGAA", + "GTG CACA", + "GA CGTG", + "TATA CAA", + "TC GACA", + "GAA GACA", + "TAAA GGA", + "GA TCAAA", + "CAGTG TG", + "CTA GCC", + "GAGG AAAA", + "TCTG AAAA", + "GAA CCCA", + "GATG GATG", + "GTT CTTA", + "CTA TATT", + "GCA TTAA", + "TCTCTCTCTCTCTCTC TCTCTCTCTCTCTCTC", + "TCA GTC", + "TATTTT TG", + "GAGGA TT", + "GTA TGTG", + "TAA CCAA", + "GTT GTTTT", + "TTTT TCTT", + "GTG TTAA", + "CTT GGAA", + "AAAAAA TG", + "CAA TGTG", + "GTG CCTT", + "GCC TCAA", + "GA GTCTT", + "GCTAA TTTT", + "CGAA AAA", + "GTG TATA", + "GC GTTA", + "CTGCA CTCCAGCCTGGG", + "GTT CATG", + "CAAA GAAA", + "GCA GTAA", + "GGA TGAA", + "CTT TATG", + "CAGG AAAA", + "TCC TGCA", + "CTG TCTG", + "GAA CATG", + "GGA TGGA", + "GCC TGAA", + "CAAAAA TG", + "TCCAA TG", + "CCA GCAA", + "GG CCTA", + "CAA CTGA", + "GCA CCTG", + "GTC TATT", + "CC TCTCA", + "GTG GTCA", + "GTG TAAA", + "GTA CACA", + "GTAAAA TT", + "GTA CATT", + "TATA TAAA", + "CTG TTAA", + "TAA GTCA", + "GCC TCCA", + "AAATT AAA", + "GTG CAGG", + "TCC TGGA", + "GTG CAAA", + "GC GTCC", + "CCA TTAA", + "GGA GGGA", + "TCA CTTA", + "TCATT AAA", + "CAA CATA", + "TAA TAGA", + "TAA TGTA", + "GA TTTTTT", + "GTT GTCA", + "GGA GACA", + "GTG TGGG", + "TCA CAGG", + "TC GGCA", + "CTCC CTG", + "GA CCAAA", + "TGTT TATT", + "CGAA TG", + "CTCAA TG", + "TCA CCTG", + "CA GTGTT", + "TGA GACA", + "TA GGGG", + "GAAAAA TG", + "GTT GAGA", + "TC GATA", + "CTC GGGAGG", + "GTT GTC", + "CCA GTCA", + "GCC CAGGCTG", + "GAA CAGA", + "GGCTCA CTGCAA", + "GCA GACA", + "TGA GGTG", + "CA CGTT", + "TAA GAAAA", + "CCA GGCA", + "GTA TCTT", + "CTTGG GAGG", + "CTT TCTA", + "CC GCTG", + "GA GCTCA", + "GAGA CAGA", + "CTT CAGG", + "GCA CATT", + "GTA CAAA", + "CTT GTAA", + "GTG GGTG", + "GAA GTGA", + "GG TCTC", + "GTA TGTT", + "GCA CTCA", + "TTA TGTT", + "CAA GTCA", + "CAA GTGA", + "GAAA CTA", + "TAAA TAAAA", + "TCTT AAAA", + "GTT GGAA", + "GTT CTAA", + "CCA CTC", + "CA GTGAA", + "GAAA GG", + "GCA CGA", + "TAA CTTTT", + "GTT GTTA", + "TCA GTTA", + "CGGA TG", + "TATT TGAA", + "CC CTGAA", + "GCC CTC", + "CTT CTAA", + "TTTG TTTT", + "GA GCTGA", + "CTG TGGG", + "CAA GATT", + "GAA GCTT", + "TGA GTAA", + "CTT GCTG", + "GGA TGGG", + "CGTA TG", + "TCCA TTA", + "GTC TGCA", + "GCCA TTTT", + "GTT GTAA", + "CACA CAA", + "GGACTA CAGG", + "C GTTTTA", + "TCTT CC", + "TAA CCTT", + "CTT TAAAA", + "TGAA TTTT", + "CTA CAGA", + "GCAA GAA", + "TAA CAAAA", + "CAATT AAA", + "CCA CTCA", + "CATG GTGAAA", + "CCCA GAA", + "CTA CATT", + "CC GAGG", + "TCCA GTG", + "TGA GTTA", + "GGA GTCA", + "TAA CGA", + "GA GTAAA", + "GA CTCTG", + "GGA GCTT", + "TA CTCC", + "CTG CATG", + "GC TTTTTT", + "GTC TAAA", + "GTG CGG", + "CA TCTCA", + "TGA TCAA", + "GGA GATT", + "GC AAAAAA", + "CA CCAAA", + "TGA CGG", + "CAGA GG", + "GTT GATG", + "CTT GTCA", + "TCCA CCTG", + "GGA GCAA", + "CAA GTAA", + "CCA TAAA", + "GTG CATG", + "GCA TATT", + "GTA GATT", + "GCC TAA", + "CTCAA AAA", + "GGA GAAAA", + "CTA TCC", + "TAATA TTA", + "GTG CTC", + "CAA TATG", + "TGTG GAA", + "TGA CTC", + "GTG TATG", + "TTTTAA TG", + "GC TCTAA", + "CACAA TG", + "CA GCTCA", + "GTT GGTT", + "CTAAAA TT", + "GTC TATG", + "TGTG AAAA", + "CTG GGTT", + "CCCC TCC", + "CC CTCTT", + "GCA GGGA", + "GAAA CCA", + "CATT TCC", + "GCA GCCA", + "TCA TATG", + "GCA GGCA", + "C GTAAAA", + "TGA CCTG", + "CAGA GGTT", + "CTT GTGA", + "TTA TCTT", + "CTG TATG", + "GTCAA TG", + "GGA CGG", + "GC GTAA", + "CAAA CTA", + "TAAA TGTT", + "CTT CGG", + "CTCC CCA", + "TACAA TG", + "TCTG TAA", + "GAA TATG", + "GC GGGA", + "GGA CATT", + "TTA TGAA", + "GGA TGTT", + "GGA CATG", + "TCA GGTG", + "CAA CAAAA", + "GAAA GAGA", + "GTG GATG", + "GG GCTA", + "CCA TCAA", + "CA GCTGA", + "CTC CACC", + "CAA TCAA", + "GTG GTC", + "TGA CAGG", + "CCA TTCA", + "GTCC CTG", + "CAGA CACA", + "GTT GGTG", + "CC TCCTG", + "GAA CTGA", + "TATT CATT", + "GCC CATG", + "CAA TCTT", + "GAAA GCA", + "GAA TCTG", + "TTA TTTTA", + "GTT TGGA", + "TTTT TGTT", + "GGGAA TG", + "GC GACA", + "TAAA CTG", + "CCA TATT", + "GGA TCC", + "CAA GCTT", + "TAAAAAA AAA", + "TCA CTC", + "CA CTGTT", + "TGTTAA TT", + "GGA CTGA", + "GGA GTGA", + "CATA CACA", + "GTT TGTA", + "TCCA GCA", + "GTG CATT", + "GG AAAAAA", + "CCAA GAA", + "TCAA TA", + "CTT CCCA", + "TGA GAAAA", + "GGCC TCCCAAA", + "CAA GCTG", + "GCC CAAA", + "TGA CTTA", + "CA GCCTT", + "CTG GATT", + "TTTT TTTA", + "TCA CGG", + "GCA GTTA", + "TGA CTAA", + "TTA CAGG", + "TGA TATG", + "TAA TTATT", + "TCTT GAA", + "GCC CCTT", + "GTT CAGA", + "CTC TATG", + "CCA TGGA", + "GAGG GAA", + "GGA GGCA", + "CTT TGCA", + "TCTT GG", + "GGA GGTT", + "GCCAA TG", + "CTG GTGA", + "CAA CCAA", + "CCA GTC", + "CTT GAGA", + "TACA GCA", + "CTT GTC", + "GA CGGA", + "CTT CTTTT", + "GTG GC", + "GAGGA TG", + "CAA TAAAA", + "GAAA TTTT", + "AAAA AAAAAA", + "CTC TATA", + "GTA TGAA", + "CTT GTTA", + "TAA CATA", + "CAAA CACA", + "TGATT AAA", + "GCTC TGTT", + "GTG GGTT", + "GTT GGGG", + "GTG TGTA", + "GTAA TTTT", + "GTA TCC", + "TGTGTGTG TGTG", + "TCTT CCTT", + "TCA CTAA", + "TCTCC AAA", + "TA TCAAA", + "TGA TGGG", + "GGA TATT", + "CAAA TTTT", + "GTT CAGG", + "GTG GATT", + "GTG CAGA", + "GCTG CC", + "CTCA GAA", + "GCA GTC", + "GGA TAAA", + "GCC TTCA", + "CCA GGTG", + "TA TCTC", + "CAA TGCA", + "CCCA CTG", + "GTG TATT", + "CGA CAGA", + "TGA GATA", + "CCA GGTT", + "TGTT TAA", + "CATCA TG", + "TGA TTCA", + "GCAA TTA", + "GAAA TGAA", + "CTT GGTT", + "GAA GATT", + "GGA TTAA", + "CC TCATT", + "GGCCA GGCTG", + "GCTA TTA", + "GCCA GCA", + "GAGA CAGG", + "CTT GAGG", + "CA GTCTT", + "GTT CTCC", + "TATT TCAA", + "TGA CGA", + "CATG AAAA", + "CATTA TG", + "TAAA TTTA", + "GA GTGAA", + "CAA CAGG", + "TAA GCTT", + "CACA TTTT", + "GA TCTCA", + "TA GTCC", + "GACC CTG", + "TAA TGCA", + "TAA GTC", + "TAA TAATT", + "GAA GTAA", + "CAA CTC", + "CA TCATT", + "GA CGAA", + "GAAA CAAA", + "TATT TCTG", + "CATTAA TT", + "CCA CCCC", + "TAATA TTTT", + "GTT TAAAA", + "GTA TCTG", + "GTCAA AAA", + "GATG CTG", + "TGTT CTG", + "GG TCAAA", + "GTA GGAA", + "GTA TATG", + "TGA TCTG", + "GGGG CTG", + "GCA TCAA", + "GCCAA AAA", + "CCA CGA", + "GC TAATG", + "CAGA GAAA", + "CCTT CTG", + "TCC TCTA", + "GCA GGTT", + "CTCA CTG", + "TAGA TTA", + "GCC GAGA", + "CCA TCCA", + "CTT TACA", + "GTA CATG", + "GCA CCAA", + "CTT TGTA", + "CTA TGTG", + "TCA CTTTT", + "TGA GTC", + "CAA GAAAA", + "CTGA CTG", + "GTTTT TTTT", + "GCA TAAA", + "TAA TCTG", + "GAA AAAAAA", + "CAGGA TG", + "TGA GCCA", + "GAA TTCA", + "TCA GACA", + "GTT CCAA", + "TCA GGTT", + "CAAA CTG", + "CATT TCTT", + "TGTT AAAA", + "CCA GACA", + "CAA GTTA", + "CATG TTA", + "CATT CTA", + "TCTTTT TG", + "TGA GGGG", + "CACA TTA", + "TAAAA TAAA", + "GCA TATA", + "TGTT CTA", + "GAA GGGG", + "GAGTG TG", + "TAA GACA", + "GAA CTC", + "CCA GTAA", + "GAGA GAGG", + "GC GACC", + "CAA TTCA", + "CGG CTG", + "CCA GATT", + "CCTG GG", + "GGAA GAAA", + "GAGA GG", + "TCAAAA TG", + "CCTCA TG", + "TAAA GG", + "CTT TGGA", + "CCA GGGA", + "GTA CAGA", + "CTGAGGCA GGA", + "TGTT TCTT", + "CCA GGCTG", + "CTGA GG", + "GAGG CTG", + "CTCC TGGG", + "GAA GTC", + "CGA CC", + "GGA CTCA", + "GGA GTC", + "CA CAATT", + "GTG TTCA", + "GA CTAAA", + "GTCA TTA", + "CAAAA TTA", + "TGAA GAAA", + "GCA CCTT", + "GTT TGCA", + "TCC TGCC", + "GTA GATG", + "GCC TGCA", + "GA GTTAA", + "TCC CTTA", + "GTG GTTA", + "TC GGGA", + "TACA TAA", + "TCTC TCCA", + "CA CTAAA", + "TATATATA TATA", + "GTG GCAA", + "CACCA TG", + "TTTG AAAA", + "CACA CTG", + "CTT GGTG", + "TACA CTG", + "CC TCCAA", + "CAA CCTT", + "CA GCCAA", + "TTTT CAAA", + "TGA TAGA", + "TACA CTA", + "TCTG GG", + "TCC CAGCA", + "TAGG AAAA", + "CTT GGGG", + "TC TGTGAA", + "CC TTATT", + "CATT TAAA", + "TTTTA TTTTA", + "GCC CTCC", + "CTGA GCA", + "CC CGTG", + "GTA GTGA", + "TCC TATT", + "GAA GGTG", + "TGTG CTG", + "TCCA CTG", + "TAA TCTA", + "TGA TGTA", + "GTG GTAA", + "TAA TGGA", + "GATG AAAA", + "GTA GTAA", + "GTG GGGA", + "GTG TCAA", + "CAGA CTG", + "TC GAAAA", + "CTCA TTA", + "TAA TAATA", + "CTCA GAAA", + "CA TCCTT", + "CC GCTT", + "GGAA GG", + "CC GTGA", + "CCA CTCC", + "CTA GAGA", + "TAGAA TG", + "GGA TTTA", + "TTAA TTTT", + "GC TAATA", + "TCC CCCA", + "CAAA TATT", + "GA TCATG", + "TCTTAA TT", + "CA GTATT", + "GTCTT GAA", + "CC GAAA", + "CTA TTCA", + "TAA GATA", + "CTT GCAA", + "GCC CCAA", + "TCC CTAA", + "GAA GTTA", + "GA TGATG", + "CTT GATG", + "CC CTAAA", + "CCTG CCTG", + "GACA TTTT", + "CCA GCCA", + "TGTGTGTG TG", + "GTC TATA", + "TCTC TGTT", + "GTC TGTA", + "TA TAATA", + "CTT GTTTT", + "CGC CATT", + "CTCA GCA", + "TACA GTT", + "CAA GAGG", + "GGAA GCA", + "GCC TTTA", + "CC CCATT", + "CAA CGA", + "GTCA TTTT", + "CC CGCA", + "CA GTTAA", + "GAA TCTT", + "CATG TTTT", + "CC GGGG", + "CTA CTGA", + "TCA CGA", + "TAAA TTTG", + "GCC CATT", + "CTC TAGG", + "GGA CCTG", + "TCA GGGA", + "GAGA CTG", + "CC AAAAAA", + "GCC GG", + "CCA GGGG", + "TCA GAAAA", + "CA TCTGA", + "TCTT CAAA", + "CTA CAGG", + "GAGG CAGG", + "CATT GTA", + "TAAA TCAA", + "GA CTCTT", + "CTGA TTA", + "GCA TATG", + "GGA CCTT", + "CAA GACA", + "TATT TATG", + "TATTTT AAA", + "CC GAGA", + "TCA TTTTA", + "CTCA CTCA", + "CCA CCCA", + "CTC TAGA", + "CTA CATG", + "GTG CTTA", + "CAA CCTG", + "TC TGTGTT", + "TAAA TATG", + "CAAA GG", + "CC CTGTT", + "GTT CGG", + "TGA TAAAA", + "CA CGAA", + "GTT GAGG", + "CAGA GTGA", + "GAAA TTAA", + "CACA TA", + "GAA CAGG", + "TCTCC TGA", + "CC TGAGG", + "GGAGG CCAA", + "GTT TACA", + "TAA CAGG", + "TGTG GTG", + "GCCTCC CAAA", + "CCA TCCTG", + "GATT CTT", + "GAA TGGA", + "GTA GTCA", + "CTCC TCTG", + "GAAAGAAA GAAAGAAA", + "CC CTGTG", + "CAGTA TG", + "GC GATA", + "GGA CTC", + "GAAA GA", + "TGTT GG", + "GTA GCTT", + "CA TTTTAA", + "CC CTCTG", + "GCA TTCA", + "CGA TTA", + "TCA CATA", + "TAA TGAAA", + "GGAA TTA", + "CTG TCAA", + "TAAATT AAA", + "CAA GTC", + "GTA TTCA", + "GGCCA TG", + "CTT TAGA", + "TGTT TCC", + "CATG TA", + "GAA TAAAA", + "CAA CTAA", + "TCA TCTA", + "CA CTCTT", + "CA GTTTG", + "CA TAAAAA", + "GCA TGCA", + "GATT TA", + "GAA CCAA", + "TCTG TGA", + "TCA GCCA", + "TCTC CACA", + "TCTCA GCTCA", + "TATCA TG", + "GCA CTTA", + "CGC CAGG", + "CGG GG", + "CATTAA AAA", + "TTTG TTA", + "GGA TATA", + "TC GACC", + "TAA TCCA", + "CC GC", + "CATT GTT", + "CCA GTTA", + "GTA GTTA", + "CTA GGAA", + "CC TAATT", + "TCA TGGG", + "GAA CTAA", + "GCTA TTTT", + "CC GTCA", + "CAGA TTA", + "CCA TATA", + "CAA CTTA", + "TCA GTTTT", + "CTA CCTT", + "GCA CTC", + "GTG TGGA", + "GTG CCAA", + "GACAA TG", + "GA CAATT", + "GTA CCTT", + "TAAA CATT", + "CA GGAGG", + "GTG CGA", + "GAAAA TTA", + "TCTCTT AA", + "CC GATT", + "GA TGATT", + "CCA TGGG", + "TC GGTA", + "CCA TATG", + "CCA GTCC", + "GCC TTAA", + "TGA TCCA", + "GTT GCAA", + "GTA GAGG", + "CAGA TTTT", + "GTA CTTA", + "TCTTTCTT TCTTTCTT", + "GCTC TGTG", + "TCAA TAA", + "GTT TAGA", + "GTT CGA", + "CAA GGTT", + "CTCA TTTT", + "CACA GG", + "CATG CTG", + "GAA CGG", + "TA TAAAAA", + "GAA GGCA", + "GA GCATT", + "TGTT TGTG", + "GCTG TTA", + "GTCA CTG", + "CAAA TGAA", + "GTGA CTG", + "GTT CTTTT", + "CAGGCTG GAGTGCAGTG", + "TGA TGAAA", + "TAA CGG", + "CTA CTAA", + "GACA TTA", + "GGA CGA", + "GAGCA TG", + "GCA TGGG", + "CCA CTTA", + "CTA TCAA", + "GCTG TTTT", + "GTC GTG", + "CCTG GCC", + "TCTC TGAA", + "TGTT GTA", + "CAGC CAGG", + "GTT TAGG", + "CC GCAA", + "GGA GTAA", + "CCAA TTA", + "CAGC AAAA", + "TCA TCCA", + "CA CGTA", + "TCA TAGA", + "TAATT AAAA", + "CA CTTAA", + "TCTT TATT", + "GAGA TTA", + "TAA GAGG", + "CAAA TTAA", + "GA CGCA", + "CA CGGA", + "GTG TGCA", + "TC T", + "TATTA TTA", + "GAAA TATT", + "GGA GTTA", + "TCTT TGA", + "CTGA TTTT", + "TGTGAA TT", + "TCC CACC", + "CC CTTTG", + "CAA GGTG", + "CAGA GTT", + "CCCCA TG", + "CTA CCAA", + "CTCC AAAA", + "CTT CCCC", + "CTG CTAA", + "GATT AAAA", + "GC TTATG", + "CTA CTTA", + "TAAAAAA TT", + "TCA GTCC", + "CTATT AAA", + "GAA TGGG", + "CACA GTA", + "CAA CGG", + "GG TTATT", + "TCA CCCA", + "TGA TGCA", + "TAA TTTTTT", + "GTT TGAGA", + "GTATT AAA", + "GCC CCCA", + "TATA GTA", + "TA GTAAA", + "TGA TACA", + "GTG GTTTT", + "CCA CTAA", + "CACA GAGA", + "CCTCTG CCTCC", + "CAA AAAAAA", + "CTC TCTCC", + "CA TAATA", + "GAA GCCA", + "GTT CCCA", + "TGTG TTTG", + "CAA TGGA", + "TGAA GTA", + "CTT CATA", + "CA CTGTG", + "GC TCTTTT", + "TGA CATA", + "TAAA GAAAA", + "GAGAAA TG", + "CAGG GAGG", + "TGTT CAA", + "GA GCCAA", + "GACA GAGA", + "GG CTGAA", + "CAAA TATA", + "GTG GAAAA", + "TAA GGTT", + "GTGA TTA", + "GGA TCTG", + "GATG TTA", + "GACTA CACA", + "TCC TATA", + "CTG CCAA", + "TCC CGA", + "GTGA TTTT", + "GC GTTTT", + "CAGA GTA", + "GAAA GGAA", + "CA CTTTG", + "CCCC AAAA", + "GCAA CCCA", + "TGCA TTTT", + "TCTA GAA", + "TA CTTTG", + "TGA GGCA", + "CA TCTCC", + "TC GCTA", + "TGA CTTTT", + "GA GCCTG", + "CATT TGTT", + "TCTT TGTT", + "GCAAAA TT", + "CC TGATT", + "GA TAAAAA", + "GA GTGTT", + "TCC TGTA", + "TACA GAAA", + "TC CAGGAA", + "GCCA GTG", + "TAGA TTTT", + "TAA TAGG", + "CTCC TCA", + "CATTTT TG", + "CATT TCAA", + "GCCA TCA", + "TAAAA TATA", + "GA CTGTT", + "GCA TGGA", + "CAAA GTT", + "CA TGATT", + "GA GTTTG", + "CTA GCAA", + "CTT CCTA", + "GG GGAGG", + "CTA TATG", + "TATT TATTTT", + "CA CCATT", + "CC CTCAA", + "TTTTTTTT TTTTTT", + "GA TCATT", + "GTA CATA", + "CTC CATA", + "CCCC GTCTCTA", + "GCC TGCC", + "CTA GCTT", + "CC CGGA", + "GATG TTTT", + "GTA TTTTA", + "TCA GATA", + "CCTG GAA", + "TATT CCA", + "GGA CCAA", + "GCCA TTA", + "CGA CTGA", + "TAA GCTG", + "TAAA CACA", + "GTT TCTC", + "CA TCTTA", + "GAAA TTTG", + "TAA TGGG", + "TAAAA TTTT", + "CTG TTCA", + "CCTG TTA", + "TA CTGAA", + "TGA CCCA", + "TGA TTTTA", + "CTCC TTA", + "TATA GAA", + "CTG CGG", + "GC GGTA", + "GTG CTAA", + "CAGA GGAA", + "TACA TCA", + "TCAA TCAA", + "CTG CAGCC", + "TGAA TATT", + "TCTA CAA", + "CCA CATA", + "CC CGTT", + "TATA CACA", + "TCC TCTC", + "TCTA CTT", + "CC GGAA", + "CTTTT TTA", + "GAAA GAAAA", + "CTA TCTT", + "GA CTTTG", + "TGAA CAA", + "GCA GTTTT", + "GC TAAAAA", + "GAGG CGG", + "TAA TAAAAA", + "CTG GTCA", + "CAGA CAA", + "GGA TATG", + "TGAA GG", + "GCCA GAA", + "CCA GGCC", + "CCA CCATG", + "CAAA CTT", + "TCA TGTA", + "GCTG CTT", + "GTAA TA", + "CCCC CAA", + "CA GCCTG", + "TCAA CTT", + "TAAAA TTAA", + "GCTG AAAA", + "CGA CGA", + "GTG GGCA", + "TGA GGGA", + "CGC TCC", + "TTTT GTTTT", + "GA GTCAA", + "TCA TGCA", + "CTG CTTA", + "TAA GTTTT", + "GTA GCAA", + "CCTT GG", + "TGA CAAAA", + "CTG GTAA", + "TCTT TATA", + "TGTG TGTT", + "CTG GTC", + "CTG GCAA", + "CATT TCTG", + "CTC TACC", + "CTGA GGA", + "CTAAAA TG", + "CTA GATT", + "GTA TCAA", + "CA GTCAA", + "CTG GGTG", + "CC TCTTA", + "TGA GTTTT", + "TTTTA TTTA", + "CC TTTTTT", + "TATA TACA", + "TA GCAAA", + "AAA TTA", + "CTG GATG", + "GA TAATA", + "GA CAAAAA", + "CCTG GGA", + "GCTT TCA", + "GTA CAGG", + "GCTG GAA", + "CTA CTCA", + "CAA TGTA", + "GC GTGAA", + "GA TCCTT", + "TATTAA TG", + "GCC CGA", + "TAAA GTG", + "GCTT CCA", + "CATG GAA", + "TGAA GTT", + "CTT TCTC", + "TCTGTG TG", + "GTA TGTA", + "CAA TACA", + "TCAA GG", + "CC TCTAA", + "TGTG GG", + "GA TCTGA", + "GTA CTGA", + "TTAA TTAA", + "GCA GAAAA", + "CTA CATA", + "CC GGTG", + "GGGG AAAA", + "TACAA AAAA", + "TTTT GG", + "GTGA GAA", + "TCAA TAAA", + "TCAA GTT", + "CTCA GGA", + "CTA CTC", + "CAAA TCA", + "GGCA GAA", + "CC CGAA", + "TGTT GTG", + "GAGC AAAA", + "TATT TGTG", + "GTA GGTT", + "CTA CCTG", + "CA CAAAAA", + "CTCA GG", + "GCTT TA", + "CAGA GCAA", + "CTCA GTG", + "GGAA GAGA", + "TAA CCTG", + "GAAA TATA", + "CGA GAA", + "GTGA GG", + "CATT TATA", + "GGCA GCA", + "TC TAAATT", + "CCCA GTG", + "GCC TAGG", + "TGCA TTA", + "CC GTAA", + "CATT CCA", + "CTA GTTA", + "GA CTTAA", + "CTA TACA", + "GACA CAA", + "TCTT CACA", + "CC GGTT", + "TAAA GTAA", + "CTG TGGA", + "TAA GGTG", + "TCCA GTA", + "CAAA TTTA", + "AAATT AAAA", + "CCA TCTA", + "CTCC CTT", + "CTCC TTTT", + "GAGAGAGA GAGA", + "GGA GATA", + "CCTA TTA", + "CACC AAAA", + "CC GTTA", + "TGTT TATA", + "CTCA GGAGG", + "GA CGTA", + "GTCC TTA", + "GAAA GTT", + "GCTG GTG", + "CTC TACA", + "CAA TAGA", + "TAAAA TATT", + "GTA CCTG", + "GTA CTAA", + "CTT TGAAA", + "CCTT TCC", + "TAAAAA TTA", + "CTC GG", + "CAA GATA", + "CATT TGA", + "CACC TCA", + "GCCA GCC", + "GTC GG", + "GCA CATA", + "CA CTCAA", + "CTTTT AAAA", + "CAGGAA TT", + "GCC TATT", + "TCTT TCTG", + "CTGAGGCA GGAGAA", + "CAGG CAGG", + "CTA GTAA", + "TCCA TA", + "GAA CTTA", + "C G", + "GCTG TGA", + "GAAAA TA", + "TCTT CATT", + "GAGG GAGA", + "CCCA TCC", + "GAGG TGGG", + "GCC TCTA", + "GTA GGTG", + "TAAA CCA", + "GAA GGAAA", + "TATT GG", + "A TG", + "TCCA GTT", + "CCCA CAA", + "GAAA CACA", + "GTC TCAAAA", + "CTTTT CTTTT", + "TGAA GGA", + "TATT GATT", + "CTA TGTA", + "AAAAAAAA AAAAAA", + "TCCTT AAA", + "GC GCTA", + "TCCA CTT", + "GA CTCAA", + "TAAA TACA", + "TCA TGGA", + "TCTG GGA", + "TCC TATG", + "CTG TGCA", + "TCAA GTGA", + "TCA TAAAA", + "CA TCCAA", + "CCTT CCA", + "CTG TACA", + "GAA GGTT", + "CTG TGTA", + "GTCA CTT", + "TCA CAAAA", + "TCA GGCA", + "GTGTT AAA", + "CC CTTAA", + "CAAA GTG", + "GAAA TGTT", + "CTG GGGA", + "GA CGCC", + "TATA TGTG", + "CTA GATG", + "GAAATT AAA", + "GAA TGCA", + "GCA CTAA", + "CGG GAGG", + "GCCA CAA", + "CGC TTA", + "TCCA CAA", + "CAGA TA", + "TC TGAATT", + "TATTA TTTT", + "GC GCGG", + "CTC TGAAA", + "TCTCTT TG", + "TATT TCTA", + "GGGG TGGG", + "GGA TGCA", + "CCA CACC", + "TAAA TGTG", + "TCTT CCTG", + "GCAA GG", + "CTG CTCC", + "CTG GAGTG", + "CTGTT AAA", + "CACA CAAA", + "CTGA CTT", + "GAAAA GAAAA", + "CCTT CTCC", + "GAAA TAAAA", + "CCTCA GGTGA", + "GA TAATG", + "GAATT GCTT", + "CCAAAA TT", + "CGTG AAA", + "CACTG AAA", + "CAGTG AAA", + "GA TCTTA", + "GAGA TGGG", + "TCTG CCA", + "TGA GGTA", + "TATG GAA", + "TATA TTTTA", + "TGAA CTT", + "GCA GATA", + "CTTTT CTT", + "GTAAAA TG", + "TCTC TAA", + "TCTG CAAA", + "GA GCCTT", + "TA TCATT", + "CAA TTTTA", + "CC GCCA", + "TATT TAAAA", + "GAGA GATG", + "GAGA TGGA", + "GCCA GGATG", + "CGA GTAGCTG", + "TTCA TTTT", + "TATA CTT", + "GTC TACA", + "GTGA GTGA", + "GCTA CACA", + "GGGA GGA", + "CAA GGCA", + "GC TTTTAA", + "CA CTATT", + "GTT CATA", + "TCC TC", + "GTG GACA", + "TATT TGGA", + "CTC CAGTA", + "GTT CAGTT", + "CCAA GG", + "CAGA GCC", + "CTC GCC", + "CC GATG", + "GGAA TTTT", + "TCCA GCC", + "CC TCTTTT", + "GAA CCTT", + "CATG CACA", + "GTT TC", + "GAA GATA", + "TA CCCC", + "GCTG CCA", + "GGGG GAGG", + "GCAGTGA GCTGA", + "CTG TCTA", + "CGA GGA", + "CAA TGGG", + "GC TGTGAA", + "GAAA GTG", + "TACC AAAA", + "GTCA GG", + "CAGC TCC", + "TGTG CTT", + "GTC TAGG", + "TTTT TGTA", + "TTA TATG", + "TCA GGGG", + "TATT GTTA", + "CC TGAGA", + "TA TCTCA", + "CAA TCTG", + "CA CTCTG", + "GATT TAA", + "TGAA TAA", + "TCTT GTA", + "TCAA CTG", + "TCTC CAGG", + "CTA GAGG", + "CTGA GAAA", + "CTA GCTG", + "TCCA CCA", + "CGA TTTT", + "CC GGCC", + "GTT GACA", + "CTTA GAA", + "CA TAATG", + "GA GTATT", + "CACA GAAA", + "GA CTGTG", + "CTA TTTTA", + "TGA GGAAA", + "TTATT AAAA", + "CTTA TTTA", + "CAGA CTT", + "CA CGCC", + "GCTT GG", + "CCTG CTT", + "TAAA GCAA", + "CCTC GTGA", + "TA GAATT", + "CTTA CAA", + "TAAA GGAA", + "GTC TAGA", + "GTGA CTT", + "TACA TATG", + "GTCA GGA", + "GCTC CAGG", + "GAA GGGA", + "CA TGATG", + "TCA TCAAA", + "CGTT AAA", + "GTA CTCA", + "CTCC CAA", + "TATA TGTA", + "GGTA TTTT", + "TAA GCCA", + "C GAAATT", + "GTTTG TTTT", + "TCTG TCTT", + "TATA TCA", + "TGTT CATT", + "CAAA CCA", + "TTCA TTA", + "TATT TGTA", + "GATT GAA", + "CTA TAAAA", + "GATTAA TT", + "CCCA CCA", + "TCC TAGG", + "TAAA TGTA", + "CTCTT AAA", + "GCA GTCC", + "GC GGCTG", + "GTC TCGAA", + "TGAA TGA", + "CTG GGGG", + "GTC TCGA", + "GAA CAAAA", + "TGAA TCA", + "TGTATTTT TAGTAGAGA", + "GTTA TTAA", + "TTTTTT AAAA", + "GTCA GTG", + "CCCA TTA", + "CACA GGA", + "TATT CCTT", + "TCTG CCTT", + "CCTG GTG", + "GC GAGC", + "TA CTAAA", + "TACA CAAA", + "CC GTCC", + "GCTT TGTT", + "GCA TCCA", + "CA TCTAA", + "GC TGTGTT", + "GTA GACA", + "GCC TATG", + "TCTT TGTG", + "GATT CTG", + "CGCC CGG", + "GA TGAGA", + "TA TCTGA", + "TGAA TTTG", + "CC TGATG", + "TAAAA CAA", + "CTT TAGG", + "TTTT CCTT", + "TGAA TAAA", + "CGG GGA", + "CAAA CATT", + "GTA TGGA", + "GCTT AAAA", + "TA CCAAA", + "CAAA GAGA", + "CTCC TGCC", + "GTAAAA AAA", + "CACA GCC", + "CCA TGCA", + "TA CAATT", + "CTA GTGA", + "CTGA GTT", + "GAGTG AAA", + "TCTGTT TG", + "CTG TAGG", + "TATAA AAAA", + "GCATT AAA", + "GTC CATA", + "TGTTAA AAA", + "TGTT TGA", + "GAA TAGA", + "CTT CAAAA", + "CTG GACA", + "CTG TAGA", + "CCATT AAA", + "CTA TCTG", + "CACTA TG", + "TTA TCAA", + "TAA GTAAA", + "TAATCCCAGCACTTTG GGAGGCC", + "CCA GAAAA", + "TGAA GCA", + "TCC CTTTT", + "TCA TACA", + "TA CGTT", + "GCC GTG", + "GGAA GTG", + "GG CCAAA", + "GTA CCAA", + "TCTCTA CTAAAAATA", + "CATT GTG", + "TGTG TGA", + "GAAA CAGA", + "CTT GACA", + "GA TGAGG", + "GAGA TTTT", + "CCTT CAA", + "GAA TCTA", + "CTC TCCTT", + "GG CGGA", + "TCTATCTA TCTATCTA", + "CACA CAGA", + "TGTG TGTA", + "CAAA GCC", + "TGTG CCA", + "GTT GAAAA", + "CTC CAGCA", + "TCAA GGA", + "TA GCTCA", + "CGC TGA", + "CCTG AAAA", + "GA CTATT", + "GATT CCA", + "GCTT CTA", + "GTC TGCC", + "CTT GGCA", + "TGTG GTA", + "GCTT TGA", + "GCTC TCTG", + "CTCA CAGA", + "TCTT TAAA", + "CAAA GCAA", + "TA CTTAA", + "GCTT CAA", + "CATT GAA", + "GGA GGAAA", + "CTA TAGA", + "CTGA GGAA", + "CCTG GCA", + "CC CTATT", + "CTC GTG", + "TTA CACA", + "TTA GGAA", + "CTG GTTA", + "GTT GTCC", + "TAATG AAAA", + "TATT TACA", + "GG GAATT", + "GTA GTTTT", + "GCTG CAA", + "CTA CGG", + "GCC GGA", + "CTG GGCA", + "CCTT AAAA", + "GATG GAA", + "TAGATAGA TAGATAGA", + "TATG TAA", + "GTA CGG", + "TATT CAAA", + "GA TCTCC", + "CCTG TTTT", + "TATT GCA", + "GGAAGGAA GGAAGGAA", + "GG TAATT", + "TTA CAGA", + "TCA GC", + "GCAAAA TG", + "GAGA GCA", + "GTA GAAAA", + "CATT TGAA", + "TCTT CTTTT", + "TCC CATA", + "GTTA TTTA", + "CTA TCTA", + "CA TCCTG", + "TCTT GTG", + "TTA TTATT", + "CC CGTC", + "TACTA TG", + "TAAA CATA", + "TAA GGAAA", + "GCTT GTG", + "CTC TAAAA", + "GTTTT AAAA", + "GACA GGA", + "TCC TAGA", + "TCCA CCCA", + "GTT TGAAA", + "CCA TCTCA", + "CTAA GAA", + "GTA TCTA", + "GTGA GGA", + "GCTG GAGG", + "CCTGTAA TCCCAGCTA", + "GCAA CAA", + "CTT TCAAA", + "CAAA TGTT", + "CTT GTCC", + "TCTCAA AAA", + "TATT TATTA", + "TAA GGCA", + "GAGA GGAA", + "TA TGATT", + "GCA TCTA", + "C GTTATT", + "GCC TGTA", + "GTT TCAAA", + "CCTTCCTT CCTTCCTT", + "GG CTTTG", + "GTCA GAA", + "CATG CATG", + "GTCA TTTA", + "CTG GAAAA", + "CTT CGA", + "CCTA TTTT", + "CCAA CAA", + "TCCA TCC", + "TAAA GTTA", + "GTC TCTC", + "TAA TCAAA", + "GATTTT TG", + "GATT TCTT", + "GG GCTGA", + "GCA TGTA", + "CCTG GGTT", + "GAGA CAA", + "GCTG TCA", + "TGA TAGG", + "GGA GACC", + "CC GGCA", + "TAA TCTCA", + "TGAA TTAA", + "TCTG GTG", + "GCC TC", + "GG CGCA", + "CCA GCTA", + "CA GTCTG", + "TGAA CTA", + "GTAA GAA", + "CCTT TCA", + "TCCA TGA", + "CAAA GGAA", + "CTC TC", + "CTC TCTCA", + "CTC CAGC", + "GTA GATA", + "CCCC CTCC", + "GG CGCC", + "TCTG TCC", + "GA CCATT", + "CTT GAAAA", + "TTA TCC", + "TACA TGTG", + "CAAA TTTG", + "TTTT GTG", + "CAGA GTG", + "GTAA TAA", + "GTGA GTG", + "TTTT TCC", + "GG CTCTG", + "GCC CTAA", + "GG CTGTT", + "CC CAATT", + "CAGA GCTT", + "TATAAA TG", + "GA GTCTG", + "TCTTAA AAA", + "GTTTTA TG", + "GA TCCAA", + "GGCC CTG", + "GA TCCTG", + "TCAA GTG", + "GATT CAA", + "CCTC TCTT", + "GAGA CGG", + "CAGA TCA", + "TAAAA GAA", + "CTGA GCAA", + "CCTG CCA", + "CCTT CTA", + "CGC TCA", + "GG CTGTG", + "TGGG AAAA", + "GGA GCCTG", + "CTGA GTG", + "CGTC AAA", + "TCAA GTA", + "CGTAA TT", + "TTA CTTA", + "TATA CTA", + "GG GCAAA", + "CAA CTTTT", + "CTT TGCC", + "GC CAGGAA", + "CACA CTA", + "GCC CAGC", + "TAAATAAA TAAATAAA", + "CTT TCCTT", + "GGGA GAA", + "TATG GTA", + "CGG CCA", + "CCTC TCTG", + "GAAA GCAA", + "CAA GCCA", + "GG CGTT", + "CTC TTTTA", + "TCGGCC TCCCAAA", + "GATT TATT", + "CAA GTCC", + "TA TCTTA", + "GTTCAA GACCA", + "CTCA CACA", + "GAAA TCAA", + "TGA GACC", + "GG GTAAA", + "GCTT GTT", + "GA TTTTAA", + "TTTT TATA", + "CAGA GCTG", + "TC TGTTAA", + "GTAA TTAA", + "TCTT TGAA", + "CTT GCCA", + "TTTT CATT", + "CCA TGTA", + "TCTC GGCTCACTGCAA", + "GGA TTCA", + "TC TATTAA", + "TACA TAAA", + "GATT GATT", + "GGA GAGGA", + "CGC AAAA", + "GGA CTAA", + "TTA TGTG", + "GTCA CTCA", + "GACA GCA", + "CGA GTT", + "GATG GTT", + "GGAA GAGG", + "GCCAA CATGGTGAAA", + "GGA GCCA", + "TGAA CTG", + "CCTC TGTG", + "GTA TAAAA", + "TCC CAGAA", + "CATT TATG", + "GA TTATG", + "TGTT TCTG", + "GAGTG GGTT", + "TACA TATT", + "CTC CAGGA", + "GACA CTG", + "GG TCTCA", + "CC GGGA", + "TGTT TAAA", + "CTCA CCA", + "GGA CTTA", + "GCC CACC", + "CAAA TCAA", + "GAAA TGTG", + "TA GTTAA", + "TCTA TAA", + "TTA GATT", + "GTG TAGG", + "TACTG AAA", + "GCA CCCA", + "GTG GGCTG", + "GAA TGAAA", + "TCTA GTT", + "TCA GGAGA", + "TCCA CTA", + "CTCA GTT", + "TACTT AAA", + "GA CTCCA", + "TCCATT TG", + "CACA GCAA", + "GCTCATG CCTG", + "GGTG CTG", + "GCTT TCTT", + "GTG GCCA", + "TA CGTG", + "GTG CAGTG", + "TGAA GTCA", + "CCTT TAA", + "TCTCAGCTCA CTGCAA", + "GAAA TATG", + "CC TCAAAA", + "GGGG CGG", + "CGA CAA", + "GG TGATG", + "GTCTT AAA", + "CAGAAA TG", + "CGTCA TT", + "CCAA GCA", + "GGA TCAA", + "GTGCTG GGATTA", + "GCTG GCC", + "CGGA GCTT", + "TACA TGA", + "TGTT TGAA", + "TCTC CATT", + "TAA GCAAA", + "CCTT TCTT", + "TA CTGTT", + "TCCA TCTT", + "CTTA CTT", + "CGGA GGTT", + "CAAAA CAA", + "TCA TAGG", + "TTA CTAA", + "CTTA TTTG", + "GAA TGTA", + "CCCCA TGGA", + "TTA CTGA", + "CGG AAAA", + "CTC CAGTG", + "TGTT CCA", + "CAGA TGAA", + "GTT GATA", + "TCC CCCC", + "CATT GCA", + "CTCA GCC", + "CTTA CTG", + "TA TCCTT", + "CTTTTA TG", + "TGAGTA GCTG", + "GACTG AAA", + "CAA TGAAA", + "CGA CTG", + "CTT GGGA", + "GCAA GCA", + "TCA CTCC", + "GATT TGA", + "CATTTT AAA", + "TCAA CTA", + "GTCC AAAA", + "CACC CTG", + "TTA CCTT", + "CAA GGGG", + "TTTT GGA", + "GTTA TTTG", + "GCTA CTG", + "CTGAGGCAGGA GAATG", + "GTGA TGA", + "GTA GTC", + "TAGTA TG", + "GTA TAGA", + "GTG TCTA", + "GCTG CTA", + "TTA GTAA", + "TAAA CATG", + "GTCA CCA", + "CA TCTTTT", + "CATA TAA", + "TCTC TCTA", + "TTTTA TTAA", + "TATT CTAA", + "GAAA TTTA", + "CTT CCCTG", + "TAAA GATG", + "TA CGTA", + "GTT TATTA", + "GAAAA GAA", + "CCCA CCCA", + "CAATT AAAA", + "CC GACA", + "CAAA GTGA", + "CAAA CAAAA", + "GCAA TTTT", + "CGATT AA", + "TTA GAGA", + "CTGA TGA", + "GGA GGAGG", + "GTCC TGGG", + "TCA TGAAA", + "GCAA CCA", + "GTT GGCA", + "GCGG CGG", + "GTCC CCA", + "GTA GGGG", + "GCCA TGTT", + "GTT CGAGA", + "GCC TATA", + "TAAA TTCA", + "GG CCATT", + "GAAAA CAA", + "TGTG TATG", + "GTA CTC", + "TAGG GAA", + "CCTT GAA", + "TC TATTTG", + "GAGG GCA", + "GAAA CTGA", + "TA CGC", + "TA CAAAAA", + "TCA TTATT", + "GGAAAA TT", + "TCAA TATT", + "CC CGTA", + "GGA GAGAA", + "TTA GTTA", + "CTCA GAGA", + "TC GAGC", + "CTA GTCA", + "GATG GCA", + "TGAA CATT", + "CTA TGGG", + "CACA CCA", + "TCAA TTAA", + "GGAA CTG", + "TTA CATG", + "CTT TCATT", + "CAGC TCTG", + "TCTTTT TTTT", + "TAAA TCTT", + "TGA TCTA", + "CATA CAA", + "GC TCAAAA", + "GC TGTGTG", + "TCAA TCA", + "GATT TGAA", + "CCAA GGA", + "GTCC TCA", + "GTG CTCC", + "AAAA TAA", + "GTGA CAA", + "GCTCA CGCCTG", + "CGA CGG", + "TA TCCAA", + "CACA CATG", + "TCTC TCTCC", + "TGTG GTT", + "CTT GGTA", + "TCTG GTT", + "TTTA TAA", + "CTG CTTTT", + "TGTG TCA", + "CACA TCA", + "CC TAATG", + "C GTTTTTT", + "GCTG GCA", + "GA CGTC", + "TATAA TTA", + "TACA GTAA", + "GAAA GTAA", + "GTC TGAAA", + "CCCA TTTT", + "TATA TGA", + "CTT GATA", + "CTT TATTTT", + "CTT TATTA", + "GG CGAA", + "CCA TGCC", + "CCTG CCTT", + "GAAGAA GAAGAA", + "CTGA CTGA", + "GCC CTTA", + "TA TCTAA", + "GTG TTTTA", + "TGTG GCA", + "TATT GTAA", + "GCCA GAAA", + "CCCTG TCTC", + "CACA GGAA", + "AAAA CAA", + "AAAAAAAA AAAAAAA", + "TAA CTCC", + "GCC TAAA", + "CGA GTA", + "TA GTATT", + "GTATTTT TAGTAGAGA", + "GCTG CAGG", + "TATT GAAA", + "CCAGCC TGGG", + "GCTCC AAA", + "TA CGAA", + "GGCC TCC", + "TATA CAAA", + "CATG GCA", + "CATG CAA", + "TACA CCA", + "CTT TACCA", + "TACA GAGA", + "TATT CTTA", + "TATG TCA", + "TCAA GCA", + "TCAA TGA", + "GG CTCTT", + "GGAA GTT", + "TCCA TGTT", + "GCTT TCC", + "TATG TGA", + "GTG TAGA", + "TTTT TAAAA", + "GCTG GAGA", + "GTGA GAGA", + "CCTA GAA", + "CCTCC AAA", + "CCAA TGA", + "CAGG GCA", + "CTA TGCA", + "CTT CACC", + "CTA CAAAA", + "CTCA CC", + "GAGTA TG", + "TA GAAAAA", + "CTTTT GAA", + "TAAA GAGA", + "CATG TCA", + "TCTTTT AAA", + "CACA GTGA", + "GA TCTAA", + "TAA GGTA", + "CATA GAA", + "CGC GCC", + "CAGC TTA", + "TATA GTT", + "CGG GCC", + "TATC CATT", + "TGTTTG TTTT", + "GCTG GCTG", + "TACA GGA", + "CTCC TTTG", + "CAA TCTA", + "CCCC CTG", + "TATA CTG", + "CTGA GCC", + "CGG TTA", + "TGAA GTG", + "GCTT CCTT", + "TTTTA TTTG", + "TA GTGAA", + "CTGA GGTG", + "TCTT CTC", + "GACA GAAA", + "CTGAA CTGAA", + "CCTG GGAA", + "TCC CCAAA", + "TATG TATT", + "GATT TCTG", + "CATT CAAA", + "CACA GTT", + "GCTT GAA", + "GTG GATCA", + "CTGA GTGA", + "TGAA TTTA", + "TCAA CAAA", + "GG TCATT", + "GTAA TTTA", + "GC GACTT", + "CTGA GAGA", + "GTG CCCA", + "CTA GGTT", + "TCC TGAAA", + "GTC CACC", + "TCA CAGAA", + "GC GAAAA", + "GTA TGGG", + "TGAA CAAA", + "TAAA CAAAA", + "CC GTTTT", + "TC TCAATT", + "TCCA GAAA", + "GTAA CAA", + "GCA TTTTA", + "TCTC CATG", + "TTA TAAAA", + "CAGG CAA", + "CTAAAA AAA", + "GTT GGGA", + "TAAA GATT", + "TGAA GAGA", + "CCCC TCA", + "TGTT TATG", + "TCTA CTG", + "CCAA TTTT", + "GGTG GTG", + "GGAA CAA", + "TGTG GGA", + "TCTG CTA", + "GAA CGA", + "GTAA GTA", + "GTT GCCA", + "AAAA TTTT", + "GC GCGA", + "GAAA GATG", + "GTC TCTCA", + "TCCA TCAA", + "GCA GCTA", + "CACA TTTG", + "CTGA CAA", + "TCCA CC", + "GC T", + "CCCA CTT", + "GCA GGTA", + "GAGG CCA", + "TAAA GTCA", + "CTG GATA", + "CGG CAA" + ] + } +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/tokenizer_config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..12cee777f1285b52e37dffd583040cdba7f5a0d3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/tokenizer_config.json @@ -0,0 +1,56 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "cache_dir": null, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "model_max_length": 512, + "pad_token": "[PAD]", + "padding_side": "right", + "sep_token": "[SEP]", + "tokenizer_class": "PreTrainedTokenizerFast", + "trust_remote_code": true, + "unk_token": "[UNK]", + "use_fast": true +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/trainer_state.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f10ee266853fcad28b863d53141d7276f7efd0b8 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/trainer_state.json @@ -0,0 +1,77 @@ +{ + "best_metric": 0.6941549901360716, + "best_model_checkpoint": "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172", + "epoch": 3.976878612716763, + "eval_steps": 100, + "global_step": 172, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.99, + "eval_accuracy": 0.5817655571635311, + "eval_f1": 0.5610827704893171, + "eval_loss": 0.6628125309944153, + "eval_matthews_correlation": 0.21314123722950265, + "eval_precision": 0.6214865572625698, + "eval_recall": 0.5934860367092647, + "eval_runtime": 2.2484, + "eval_samples_per_second": 307.332, + "eval_steps_per_second": 9.785, + "step": 43 + }, + { + "epoch": 1.99, + "eval_accuracy": 0.6743849493487699, + "eval_f1": 0.6528777821686682, + "eval_loss": 0.5978314876556396, + "eval_matthews_correlation": 0.3669122104687527, + "eval_precision": 0.7052053787162517, + "eval_recall": 0.6640119901257788, + "eval_runtime": 2.2606, + "eval_samples_per_second": 305.673, + "eval_steps_per_second": 9.732, + "step": 86 + }, + { + "epoch": 2.31, + "learning_rate": 1.4794520547945205e-05, + "loss": 0.6252, + "step": 100 + }, + { + "epoch": 2.98, + "eval_accuracy": 0.6903039073806078, + "eval_f1": 0.687708020071297, + "eval_loss": 0.58121657371521, + "eval_matthews_correlation": 0.37804735134263795, + "eval_precision": 0.6905416012558869, + "eval_recall": 0.6875178424490755, + "eval_runtime": 2.2678, + "eval_samples_per_second": 304.702, + "eval_steps_per_second": 9.701, + "step": 129 + }, + { + "epoch": 3.98, + "eval_accuracy": 0.6960926193921853, + "eval_f1": 0.6941549901360716, + "eval_loss": 0.5836874842643738, + "eval_matthews_correlation": 0.38973274325358714, + "eval_precision": 0.6958638707926167, + "eval_recall": 0.6938739525432837, + "eval_runtime": 2.274, + "eval_samples_per_second": 303.868, + "eval_steps_per_second": 9.675, + "step": 172 + } + ], + "logging_steps": 100, + "max_steps": 172, + "num_train_epochs": 4, + "save_steps": 100, + "total_flos": 5788706328975360.0, + "trial_name": null, + "trial_params": null +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/training_args.bin b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a6f4f963668c4fc90216ad0306a00d36148d92ff --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a68a7c1d06f3fae4b483d7ad97db3a2ebb2b62fc4919f1c9ca8106fbb000df3 +size 5457 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/results/base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42/eval_results.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/results/base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..670639a93df6ae4ffc4e72ba43c2d295d70798e2 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/results/base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42/eval_results.json @@ -0,0 +1 @@ +{"eval_loss": 0.6012589931488037, "eval_accuracy": 0.6907514450867052, "eval_f1": 0.685187074829932, "eval_matthews_correlation": 0.37509742426025894, "eval_precision": 0.6905166435506241, "eval_recall": 0.6846270161290322, "eval_runtime": 2.2655, "eval_samples_per_second": 305.451, "eval_steps_per_second": 9.711, "epoch": 3.98} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/config.json new file mode 100644 index 0000000000000000000000000000000000000000..45e4c6c10a6211acf374c78e8078ab7ac74985f9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.35.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 4096 +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/model.safetensors b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b25153a3c1486bd9207b19ba0fcacc49897084d2 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb5da133669d1b7e98a5f3c658941682f60b2082efc73aa9a6abb0247660563 +size 356777880 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/optimizer.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..973524b573e157731b8e63b9d3055cbb0eed73fc --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:542ed2c97fd12d4b4b23702cbde98497b5cd3339e63658fbe28c1c18872936a0 +size 713677451 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/rng_state.pth b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f71c6b1326125621d138051007813953aa276d6 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab279adf128873bd2145c65a30f6ed93cbcac4b882bde6ac0f5067f8b73817e9 +size 14645 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/scheduler.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..174b14cd75aa18fb9534bea74736b1fbe011d264 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ee3b73fbe41c8fc4eee2f5d8cbed5b4be61fbf8b2cc81d5f85262c6ebf5b73 +size 1465 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/special_tokens_map.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8b3208c2884c4efb86e49300fdd3dc877220cdf --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "unk_token": "[UNK]" +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/tokenizer.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8a569df5e832e1e62816e174612061cfbf0790d0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/tokenizer.json @@ -0,0 +1,8340 @@ +{ + "version": "1.0", + "truncation": { + "direction": "Right", + "max_length": 512, + "strategy": "LongestFirst", + "stride": 0 + }, + "padding": { + "strategy": "BatchLongest", + "direction": "Right", + "pad_to_multiple_of": null, + "pad_id": 3, + "pad_type_id": 0, + "pad_token": "[PAD]" + }, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 1 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 2 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "vocab": { + "[UNK]": 0, + "[CLS]": 1, + "[SEP]": 2, + "[PAD]": 3, + "[MASK]": 4, + "A": 5, + "C": 6, + "G": 7, + "T": 8, + "AA": 9, + "TT": 10, + "TG": 11, + "CA": 12, + "CC": 13, + "TA": 14, + "GG": 15, + "TC": 16, + "GA": 17, + "AAA": 18, + "GC": 19, + "TAA": 20, + "TTTT": 21, + "TCA": 22, + "TGA": 23, + "TTA": 24, + "GAA": 25, + "TCC": 26, + "CAA": 27, + "CTG": 28, + "CTT": 29, + "GTG": 30, + "GTT": 31, + "GCA": 32, + "GGA": 33, + "CCA": 34, + "GTA": 35, + "GCC": 36, + "CTA": 37, + "TAAA": 38, + "AAAA": 39, + "CTC": 40, + "GTC": 41, + "TGTG": 42, + "TATT": 43, + "CACA": 44, + "GAAA": 45, + "TATA": 46, + "TCTT": 47, + "TGTT": 48, + "CAAA": 49, + "GAGA": 50, + "CATT": 51, + "TGAA": 52, + "CAGG": 53, + "TCTG": 54, + "CAGA": 55, + "TCAA": 56, + "GGAA": 57, + "TAAAA": 58, + "CTGA": 59, + "GCTT": 60, + "GTGA": 61, + "GCTG": 62, + "CTCA": 63, + "CCTT": 64, + "CATG": 65, + "GCAA": 66, + "GTCA": 67, + "GTAA": 68, + "TTTTA": 69, + "TATG": 70, + "GAGG": 71, + "CGG": 72, + "GATT": 73, + "CCTG": 74, + "TCTC": 75, + "CCAA": 76, + "GTTA": 77, + "CTCC": 78, + "CTAA": 79, + "TACA": 80, + "CTTA": 81, + "TCCA": 82, + "GATG": 83, + "TTAA": 84, + "GAAAA": 85, + "TTTG": 86, + "GTTTT": 87, + "TCTA": 88, + "GCCA": 89, + "GTCC": 90, + "CTTTT": 91, + "GGGG": 92, + "CGA": 93, + "TTTA": 94, + "CCCA": 95, + "CAAAA": 96, + "TGGG": 97, + "TAGA": 98, + "TAGG": 99, + "GACA": 100, + "GGTT": 101, + "CCCC": 102, + "GGTG": 103, + "CATA": 104, + "GCTA": 105, + "TGTA": 106, + "TCAAA": 107, + "TGGA": 108, + "TAATT": 109, + "TTATT": 110, + "TGCA": 111, + "GGCA": 112, + "GATA": 113, + "CCTA": 114, + "TTCA": 115, + "TCTCA": 116, + "GGGA": 117, + "CGC": 118, + "CTGAA": 119, + "GTAAA": 120, + "TCTCC": 121, + "TTTTTT": 122, + "CGTG": 123, + "GCAAA": 124, + "TAAAAA": 125, + "TCTGA": 126, + "TCATT": 127, + "GGAAA": 128, + "TGAAA": 129, + "TCCTT": 130, + "CCAAA": 131, + "GAATT": 132, + "CTAAA": 133, + "CGTT": 134, + "GTGAA": 135, + "GGCC": 136, + "TAATA": 137, + "GGTA": 138, + "TGCC": 139, + "CACC": 140, + "TGATT": 141, + "AAAAAA": 142, + "GCTCA": 143, + "TCCAA": 144, + "GAGAA": 145, + "CTGTT": 146, + "TATTA": 147, + "CAGCA": 148, + "CTCTT": 149, + "CTTAA": 150, + "CAGAA": 151, + "GCTGA": 152, + "GTTAA": 153, + "TCTTA": 154, + "TATTTT": 155, + "GCCAA": 156, + "CTTTG": 157, + "GACC": 158, + "CGCA": 159, + "GTATT": 160, + "GTCTT": 161, + "CAATT": 162, + "GTGTT": 163, + "CTCAA": 164, + "GGAGG": 165, + "CGAA": 166, + "TCTTTT": 167, + "GTCAA": 168, + "CGCC": 169, + "TATAA": 170, + "TACC": 171, + "TCTAA": 172, + "CCATT": 173, + "CGGA": 174, + "CAAAAA": 175, + "CAGTG": 176, + "TCCTG": 177, + "CTCTG": 178, + "GAAAAA": 179, + "CTGTG": 180, + "CAGC": 181, + "TTTTAA": 182, + "GCATT": 183, + "GCCTT": 184, + "TAATG": 185, + "CTATT": 186, + "GTTTG": 187, + "TGATG": 188, + "GGCTG": 189, + "CCTCA": 190, + "GAGGA": 191, + "GCCTG": 192, + "AAATT": 193, + "CGTA": 194, + "TCAAAA": 195, + "TACAA": 196, + "CATCA": 197, + "CAGTT": 198, + "TGAGA": 199, + "GGGAA": 200, + "CACTG": 201, + "CACAA": 202, + "CAGGA": 203, + "CCCCA": 204, + "CCCTG": 205, + "TTTTTTTT": 206, + "TAGAA": 207, + "GAGCA": 208, + "CCTCC": 209, + "CACCA": 210, + "TATCA": 211, + "GAGC": 212, + "CATTA": 213, + "CACACACA": 214, + "GAGTG": 215, + "GGATT": 216, + "TGTGTGTG": 217, + "TACTT": 218, + "CACTT": 219, + "GTCTG": 220, + "TGAGG": 221, + "GAGTT": 222, + "GAATG": 223, + "TCATG": 224, + "GACAA": 225, + "GACTT": 226, + "TATTAA": 227, + "TAATAA": 228, + "GGCCA": 229, + "CATTTT": 230, + "CAGCC": 231, + "CCCTT": 232, + "GCTAA": 233, + "TATATATA": 234, + "GTGTG": 235, + "TACTG": 236, + "TAGTT": 237, + "CAATG": 238, + "GCTC": 239, + "CAGTA": 240, + "GCTCC": 241, + "CATAA": 242, + "TTATG": 243, + "TAAATT": 244, + "GATGA": 245, + "CATGA": 246, + "GCGG": 247, + "AAAAAAAA": 248, + "CCATG": 249, + "GATAA": 250, + "GACTG": 251, + "TATGA": 252, + "GCAGG": 253, + "GATCA": 254, + "GTTTTA": 255, + "GGATG": 256, + "CCTGA": 257, + "GTAAAA": 258, + "GAAGG": 259, + "GATTA": 260, + "CCTC": 261, + "GACCA": 262, + "GCTTA": 263, + "CCCAA": 264, + "AAATG": 265, + "GCATG": 266, + "TAGTA": 267, + "TACCA": 268, + "GGCTT": 269, + "CGTC": 270, + "TCTCTT": 271, + "GGTCA": 272, + "TTATTA": 273, + "TACTA": 274, + "TAGCA": 275, + "TATC": 276, + "CTGGG": 277, + "CATC": 278, + "CTTTTA": 279, + "CTAAAA": 280, + "GTGGG": 281, + "GAGTA": 282, + "CCAGG": 283, + "GATTTT": 284, + "TAGTG": 285, + "GAAATT": 286, + "CACTA": 287, + "TCGG": 288, + "TCAGG": 289, + "CAGGAA": 290, + "GCAAAA": 291, + "CCTTA": 292, + "CATCC": 293, + "CTTGG": 294, + "TGTGAA": 295, + "TATTTG": 296, + "CCTAA": 297, + "CTATG": 298, + "GAGAAA": 299, + "GAGAGAGA": 300, + "GCTTTT": 301, + "TATAAA": 302, + "CAAGG": 303, + "TCTCTG": 304, + "TGTTAA": 305, + "TGTGTT": 306, + "GAGCC": 307, + "GACTA": 308, + "TATATT": 309, + "TAAAAAA": 310, + "TTTTTG": 311, + "GTATG": 312, + "CATTAA": 313, + "TAGGA": 314, + "TAGC": 315, + "GTTGG": 316, + "GAAGAA": 317, + "TAAATG": 318, + "TCTGTT": 319, + "CAGAAA": 320, + "CAAATT": 321, + "TAATTA": 322, + "TCTGTG": 323, + "TATCC": 324, + "TGAATT": 325, + "CTCCA": 326, + "GTGAAA": 327, + "GGCAA": 328, + "GGAGA": 329, + "GAAGA": 330, + "GGTGA": 331, + "GGGCA": 332, + "CCAAAA": 333, + "TCTCTCTC": 334, + "CTGCA": 335, + "CTTCTT": 336, + "TCTTAA": 337, + "CCCTA": 338, + "TGTGTG": 339, + "AAATA": 340, + "TGTTTG": 341, + "GGGTT": 342, + "GTGCTG": 343, + "GGAAAA": 344, + "GGGGA": 345, + "TCAGA": 346, + "CCTTTT": 347, + "GAAATG": 348, + "GCAGCA": 349, + "TCTGAA": 350, + "GGGTG": 351, + "CACATT": 352, + "TCTTTG": 353, + "GGGC": 354, + "TCCCA": 355, + "TCCATT": 356, + "CTGAAA": 357, + "CTTTA": 358, + "TCGA": 359, + "GTTTA": 360, + "CAACAA": 361, + "CTTCC": 362, + "GCCTCC": 363, + "TTAAA": 364, + "GCTCTG": 365, + "GTTTCA": 366, + "GGAGGA": 367, + "CGTGA": 368, + "CAGTC": 369, + "GAATA": 370, + "CAGAGA": 371, + "CCCTC": 372, + "CAAATG": 373, + "CTGCTG": 374, + "GATCC": 375, + "TTTTATT": 376, + "AAAATT": 377, + "TTATA": 378, + "TCAATT": 379, + "GGTAA": 380, + "GTTATT": 381, + "GCCAGG": 382, + "GGAGAA": 383, + "CATTTG": 384, + "TCACC": 385, + "CTCAAA": 386, + "GGTTA": 387, + "TCCAAA": 388, + "TCTATT": 389, + "GCAGA": 390, + "CTTCA": 391, + "TCATCA": 392, + "CGAGG": 393, + "TAACA": 394, + "GTTGTT": 395, + "CTTATT": 396, + "CGTCA": 397, + "TAAGA": 398, + "TAATTTT": 399, + "CTGTA": 400, + "TCCACA": 401, + "GCTGTG": 402, + "CGCTG": 403, + "TCTAAA": 404, + "GCGA": 405, + "CAATA": 406, + "CCACCA": 407, + "GAACA": 408, + "CGAAA": 409, + "CAGATT": 410, + "TCACA": 411, + "TTATTTT": 412, + "TCTCAA": 413, + "TGACA": 414, + "CTCCAA": 415, + "AAAAAAA": 416, + "TATATG": 417, + "TCCTCC": 418, + "TCACTT": 419, + "TCCAGG": 420, + "CAAGA": 421, + "GGCTA": 422, + "GTGGTG": 423, + "CGTAA": 424, + "CGAGA": 425, + "TGATA": 426, + "GGATTA": 427, + "CAACA": 428, + "CGATT": 429, + "TGAGAA": 430, + "CTCCTT": 431, + "CTCATT": 432, + "GTTAAA": 433, + "TCATA": 434, + "CCTCTG": 435, + "CTCTA": 436, + "GCTGAA": 437, + "CTGGA": 438, + "TAAGG": 439, + "CTTAAA": 440, + "TATTTA": 441, + "CCACA": 442, + "CCGG": 443, + "GTCAAA": 444, + "TGGAA": 445, + "CGGAA": 446, + "TGATGA": 447, + "GTTCA": 448, + "TAACAA": 449, + "GCTGTT": 450, + "TAAGAA": 451, + "CTGCC": 452, + "TTAATT": 453, + "CCAGA": 454, + "TCAGAA": 455, + "GTCATT": 456, + "CGCTT": 457, + "GATTAA": 458, + "CTGATT": 459, + "GCCACA": 460, + "GTAATT": 461, + "TCCAGA": 462, + "GCCAAA": 463, + "GTGATT": 464, + "TAAAATT": 465, + "CAAGAA": 466, + "CCACC": 467, + "TAATCC": 468, + "GTTCTT": 469, + "TCCATG": 470, + "GCTCTT": 471, + "TGCTG": 472, + "GGGTA": 473, + "TTACA": 474, + "GCCATT": 475, + "GCACA": 476, + "GCAATT": 477, + "TCCCTG": 478, + "TGTGA": 479, + "TCGAA": 480, + "GGACA": 481, + "GGAATT": 482, + "GTGGA": 483, + "CTTCTG": 484, + "TCCCC": 485, + "GCCCC": 486, + "CTTGA": 487, + "TAATGA": 488, + "TAAATA": 489, + "TATATA": 490, + "CTGCAA": 491, + "TCATTA": 492, + "GTATA": 493, + "TCCCCA": 494, + "CGTTA": 495, + "GCAGAA": 496, + "TGAGTT": 497, + "CTTTTTT": 498, + "CGATG": 499, + "CTTTCA": 500, + "AAAATG": 501, + "CAGGTT": 502, + "CTAATT": 503, + "CGCCA": 504, + "TGAAAAA": 505, + "GTTCC": 506, + "GTCCTT": 507, + "GTCCAA": 508, + "GTTTTTT": 509, + "CTCTGA": 510, + "GCGC": 511, + "GTTGA": 512, + "TGAATG": 513, + "CTATA": 514, + "GCAGTG": 515, + "CCTTAA": 516, + "TCACCA": 517, + "TCACTG": 518, + "GCCCTG": 519, + "TAACTT": 520, + "CAGATG": 521, + "GTAGG": 522, + "TCTATA": 523, + "GAGATT": 524, + "GTCTA": 525, + "TTTTAAA": 526, + "CACATG": 527, + "TGACC": 528, + "CACAAA": 529, + "GTGTA": 530, + "GGGAGG": 531, + "GCTTTG": 532, + "CAAAAAA": 533, + "GAGGAA": 534, + "GTTCTG": 535, + "TTTTTA": 536, + "GTCTCA": 537, + "GTTCAA": 538, + "TCGTG": 539, + "GCTTAA": 540, + "GCACC": 541, + "CTCCTG": 542, + "TAAATAAA": 543, + "CTACA": 544, + "CTTCCA": 545, + "TCCTCA": 546, + "CGCAA": 547, + "GAAAAAA": 548, + "GCCCA": 549, + "TCGTT": 550, + "GTAGA": 551, + "CTCTCA": 552, + "GTCCA": 553, + "TGACTT": 554, + "TCCCTT": 555, + "GCCATG": 556, + "CACACACACACACACA": 557, + "GTGATG": 558, + "CCTCTT": 559, + "GCCAGA": 560, + "TCCTA": 561, + "CGTTTT": 562, + "GTACA": 563, + "GCATA": 564, + "GAATTA": 565, + "TGTGTGTGTGTGTGTG": 566, + "CCCAGG": 567, + "GGTTTT": 568, + "TCAAAAA": 569, + "TCTATG": 570, + "CCATA": 571, + "TGACAA": 572, + "GGATA": 573, + "TCAGTG": 574, + "GTATTTT": 575, + "GAGATG": 576, + "GCGTG": 577, + "CGTCC": 578, + "TTAAAAA": 579, + "TAATCA": 580, + "CAATTA": 581, + "CCACTG": 582, + "CGGTT": 583, + "GTTGAA": 584, + "TGATTA": 585, + "CCTTTG": 586, + "CGGTG": 587, + "CAGGTG": 588, + "TCAATG": 589, + "CTGATG": 590, + "TCAGGA": 591, + "GTTTAA": 592, + "TATTAAA": 593, + "CTCTTA": 594, + "GCAGGA": 595, + "CTCTCC": 596, + "GAACC": 597, + "CTTTAA": 598, + "GGGCC": 599, + "GTATTA": 600, + "GCGCC": 601, + "CCAATT": 602, + "GCTAAA": 603, + "TGACTG": 604, + "GATTTG": 605, + "GATAAA": 606, + "TCAGCA": 607, + "GTTCCA": 608, + "GAAATA": 609, + "GACAAA": 610, + "GAGTC": 611, + "GCTATT": 612, + "TCACAA": 613, + "GAGGTT": 614, + "TAACC": 615, + "GAAGGA": 616, + "GCTCAA": 617, + "GAAAATT": 618, + "CCAGCA": 619, + "GTTTTAA": 620, + "GTGCC": 621, + "TGAGGA": 622, + "CATAAA": 623, + "GGTCC": 624, + "TCATTTT": 625, + "TATTTATT": 626, + "TAATAAA": 627, + "GCCTA": 628, + "CTTTTAA": 629, + "TAAGTG": 630, + "TAAGTA": 631, + "CTGGAA": 632, + "CACACA": 633, + "GACAGA": 634, + "CAACC": 635, + "GGGAAA": 636, + "CCAGAA": 637, + "TCAGTT": 638, + "TAACTA": 639, + "CTAAAAA": 640, + "TGGGTT": 641, + "TGAGTG": 642, + "TAAAATG": 643, + "TATATATATATATATA": 644, + "GCACTG": 645, + "GACTC": 646, + "TACAAA": 647, + "TAAAAAAA": 648, + "TCTACA": 649, + "GTTGTG": 650, + "TCGCC": 651, + "CCCAAA": 652, + "GTCATG": 653, + "CTGCTT": 654, + "GGAATG": 655, + "CTATTA": 656, + "GATATT": 657, + "TAGAAA": 658, + "GGCAGG": 659, + "GATGAA": 660, + "GTAGAA": 661, + "TCCTGA": 662, + "TAACTG": 663, + "GCTGGG": 664, + "GCAATG": 665, + "GCCCCA": 666, + "GTTTGA": 667, + "CATTTA": 668, + "GTGCA": 669, + "CTTGAA": 670, + "GTGGAA": 671, + "CTTCAA": 672, + "TAAATTA": 673, + "GTGGCA": 674, + "TCCTTA": 675, + "GGAAAAA": 676, + "TTTTTTA": 677, + "CCTGTG": 678, + "GTAATG": 679, + "GTGTTA": 680, + "CTAGG": 681, + "CAGGCTG": 682, + "GACACA": 683, + "GAAAAAAA": 684, + "TCGC": 685, + "GTAAAAA": 686, + "TGTTTA": 687, + "TCTCTA": 688, + "GTCCTG": 689, + "CCAGGA": 690, + "GAACAA": 691, + "TAAGTT": 692, + "TGAGCA": 693, + "GCTCCA": 694, + "TAAGCA": 695, + "CTCATG": 696, + "GTCTTA": 697, + "CCCACA": 698, + "CATATT": 699, + "GCCTCA": 700, + "CACTC": 701, + "CTTCTA": 702, + "TGATTTT": 703, + "TCGCA": 704, + "CCTGTT": 705, + "GAAGCA": 706, + "GCAAAAA": 707, + "GCGGA": 708, + "CCACAA": 709, + "GCGCA": 710, + "CATATA": 711, + "GACATT": 712, + "GTTCTA": 713, + "CAAAATT": 714, + "GAAAGAAA": 715, + "CCCGG": 716, + "TACACA": 717, + "CCAAAAA": 718, + "GAGGTG": 719, + "GGCTCA": 720, + "CAGTGA": 721, + "TCCCAA": 722, + "TATCTT": 723, + "TGAGTA": 724, + "TCGTA": 725, + "TTTTCTT": 726, + "GTGGGA": 727, + "GAGCTG": 728, + "CCCTCC": 729, + "TAGGTT": 730, + "TTAGG": 731, + "TAATATT": 732, + "CCAGCC": 733, + "CATCTT": 734, + "GTCTGA": 735, + "GTTTCC": 736, + "CCTGAA": 737, + "GGAGCA": 738, + "GAAAATG": 739, + "TCAGTA": 740, + "TAACCA": 741, + "GATGTT": 742, + "CTGTTA": 743, + "CATGTT": 744, + "GGCGG": 745, + "CATGTG": 746, + "GGGAGA": 747, + "CTTTGA": 748, + "TCTTTCTT": 749, + "AAAAAAAAA": 750, + "GGGGTG": 751, + "CTTTCC": 752, + "CTTGTT": 753, + "GCATTA": 754, + "CCCAGA": 755, + "CAAATA": 756, + "TCGGA": 757, + "CAGCTT": 758, + "TCACTA": 759, + "TAATTAA": 760, + "TAAGGA": 761, + "GAACTG": 762, + "GCACAA": 763, + "GCGTT": 764, + "GGCTC": 765, + "TCTTTTA": 766, + "CCTCCA": 767, + "GGCAAA": 768, + "CAGCTG": 769, + "CTACAA": 770, + "TACATT": 771, + "GCTATG": 772, + "CTTGTG": 773, + "GAGTCA": 774, + "GTTATG": 775, + "CTGCCA": 776, + "GTCTCC": 777, + "TGACCA": 778, + "CACCTG": 779, + "TATATTA": 780, + "TGATCA": 781, + "CAGCAA": 782, + "GATGTG": 783, + "GTCTTTT": 784, + "CTAGAA": 785, + "GCTACA": 786, + "CTGGGA": 787, + "GGGGTT": 788, + "CAAGTA": 789, + "CAAGGA": 790, + "CCCTCA": 791, + "TAGCC": 792, + "GTTGGA": 793, + "GCTATA": 794, + "TCTGAAA": 795, + "TATGTT": 796, + "CCCCTT": 797, + "GTTGTA": 798, + "CCCTGA": 799, + "TGACTA": 800, + "CAAGCA": 801, + "CAATAA": 802, + "GAACTT": 803, + "CATGAA": 804, + "CTTATG": 805, + "CTAATG": 806, + "TCTAAAA": 807, + "CCAATG": 808, + "GAAGTG": 809, + "CCTCAA": 810, + "CCCATT": 811, + "CAGTCA": 812, + "GAGAGAGAGAGAGAGA": 813, + "TATGTG": 814, + "GCAGTGA": 815, + "TCTCCTT": 816, + "TCCCAAA": 817, + "CCATTA": 818, + "CCAGTG": 819, + "GCATCA": 820, + "TCAAATT": 821, + "GATCTT": 822, + "GACAGG": 823, + "GGAGTG": 824, + "GTAGTA": 825, + "CAACTT": 826, + "GAAGTT": 827, + "CCCCTG": 828, + "TCTCAAA": 829, + "GGGTC": 830, + "GAGCTT": 831, + "TATGAAA": 832, + "TATGAA": 833, + "GACATG": 834, + "CAAGTG": 835, + "GATATA": 836, + "CATCTG": 837, + "CTGTGA": 838, + "TAATTTA": 839, + "GGCAGA": 840, + "GCGAA": 841, + "CCTAAA": 842, + "CCATCA": 843, + "CACTGA": 844, + "GGACTA": 845, + "GACGG": 846, + "CTCTTTT": 847, + "CTGTCA": 848, + "TCTCTCTCTCTCTCTC": 849, + "TTAATG": 850, + "GCAGCC": 851, + "CAAAAAAA": 852, + "GCACCA": 853, + "CTATTTT": 854, + "GAGCAA": 855, + "CTTGGA": 856, + "CTGGTG": 857, + "GAATAA": 858, + "TCCTTTT": 859, + "GAAGTA": 860, + "CAGTAA": 861, + "CAACCA": 862, + "CTGTAA": 863, + "TGATAA": 864, + "GCAGTT": 865, + "CACGG": 866, + "TAAATAA": 867, + "CTGTTTT": 868, + "CTACTA": 869, + "GCTCTA": 870, + "CGAAAA": 871, + "CAAGTT": 872, + "CTTGTA": 873, + "GAATGA": 874, + "GAGTGA": 875, + "GCCTGA": 876, + "GGTTTG": 877, + "CCCATG": 878, + "GGGGAA": 879, + "GAAGAAA": 880, + "TGTTA": 881, + "CAATTTT": 882, + "TATATTTT": 883, + "CTCAAAA": 884, + "GGTGGG": 885, + "CCGTG": 886, + "TATTTCA": 887, + "CCCCAA": 888, + "TATTTAA": 889, + "GGCTGA": 890, + "GGTGTG": 891, + "CATCAA": 892, + "CACTCA": 893, + "TCTCATT": 894, + "GAATTTT": 895, + "GAATCA": 896, + "CAGGAAA": 897, + "CATACA": 898, + "TATTTTA": 899, + "TTATAA": 900, + "GAGGAAA": 901, + "CATATG": 902, + "CTTTCTT": 903, + "CAACTG": 904, + "GGGCTG": 905, + "CCCCCA": 906, + "TTTGAAA": 907, + "CATTAAA": 908, + "CTTAAAA": 909, + "GACTGA": 910, + "CAATGA": 911, + "GGCACA": 912, + "CCAGTA": 913, + "GGATGA": 914, + "GTTTTTG": 915, + "GCATTTT": 916, + "GTGCCA": 917, + "GCAGTA": 918, + "GCCCTT": 919, + "TCGTC": 920, + "GAACTA": 921, + "GTGGTT": 922, + "GTGTGA": 923, + "GTGCTT": 924, + "CGCTA": 925, + "GTGTCA": 926, + "TCTTTA": 927, + "GCCTTA": 928, + "CCTATT": 929, + "CAAAATG": 930, + "GAACCA": 931, + "CTCCAGG": 932, + "GACTCA": 933, + "CATGAAA": 934, + "GCTAGG": 935, + "TGTTAAA": 936, + "GCGTA": 937, + "GCACTT": 938, + "TCTTAAA": 939, + "TAAGAAA": 940, + "GGCCTG": 941, + "TCCCTA": 942, + "GTGGTA": 943, + "CTGCTA": 944, + "GGAGTT": 945, + "GGTAAA": 946, + "CAAACAAA": 947, + "GATATG": 948, + "TCATGA": 949, + "GACCTT": 950, + "TAATATA": 951, + "GCTAGA": 952, + "GGACTG": 953, + "GGCATT": 954, + "CAGTTA": 955, + "CCCTAA": 956, + "CACCTT": 957, + "GGTGAA": 958, + "CAGCTA": 959, + "GTGTTTT": 960, + "CAACTA": 961, + "GATCAA": 962, + "GAGAAAA": 963, + "TGTGAAA": 964, + "AAAATA": 965, + "GATGAAA": 966, + "CTCTAA": 967, + "TTACTT": 968, + "GATCTG": 969, + "CCACTT": 970, + "GAGTTA": 971, + "CAATCA": 972, + "GGATTACAGG": 973, + "TTTATTTT": 974, + "TACATA": 975, + "TTTTATG": 976, + "GAGTAA": 977, + "GCTGAAA": 978, + "GTACTG": 979, + "GCTCTC": 980, + "TATGTA": 981, + "TGTGTA": 982, + "TCATAA": 983, + "GGACTT": 984, + "TCTCCAA": 985, + "GCATGA": 986, + "GACGA": 987, + "CGCCTG": 988, + "GACCTG": 989, + "GGTCTT": 990, + "CACCAA": 991, + "GATC": 992, + "GACCAA": 993, + "AAAATTA": 994, + "GTAAATT": 995, + "CCAGTT": 996, + "CAGAAAA": 997, + "TAACAAA": 998, + "GGTGTT": 999, + "GAAATTA": 1000, + "TGCCTCA": 1001, + "CCGCC": 1002, + "CCATTTT": 1003, + "CTTGCC": 1004, + "TCTGTA": 1005, + "CTGGCA": 1006, + "GGGATG": 1007, + "CCATGA": 1008, + "CTACTT": 1009, + "TAGGTG": 1010, + "TAAAAATT": 1011, + "GAAAGAA": 1012, + "TAAAATA": 1013, + "CTTTTTG": 1014, + "GTCAAAA": 1015, + "GGACAA": 1016, + "TCTGATT": 1017, + "CTCTCTT": 1018, + "TAATTTG": 1019, + "CTCTTTG": 1020, + "GGCCTT": 1021, + "GGATTTT": 1022, + "CTACTG": 1023, + "GTTGCA": 1024, + "GGCTCC": 1025, + "CTCTGTG": 1026, + "CTCCAGCC": 1027, + "TTACAA": 1028, + "GGACCA": 1029, + "GGAAGGAA": 1030, + "TAAAGAA": 1031, + "TTAGAA": 1032, + "GTGAAAA": 1033, + "CTTGCA": 1034, + "TGGGTG": 1035, + "GGAGCC": 1036, + "CCTCTA": 1037, + "CT": 1038, + "GGGCTT": 1039, + "GGCATG": 1040, + "CTGGTT": 1041, + "TACAGA": 1042, + "GATTAAA": 1043, + "CTCTGTT": 1044, + "TTATCA": 1045, + "CTGAAAA": 1046, + "GTAGTT": 1047, + "GGGTCA": 1048, + "GT": 1049, + "CAGCCA": 1050, + "GCGTC": 1051, + "CACTTA": 1052, + "GTGCTA": 1053, + "TCTTATT": 1054, + "GTACTT": 1055, + "GGTATT": 1056, + "TAGAGA": 1057, + "TACATG": 1058, + "CCACTA": 1059, + "TGAGAAA": 1060, + "CAATAAA": 1061, + "TCCAAAA": 1062, + "CGTGAA": 1063, + "GGTCTG": 1064, + "CTGAATT": 1065, + "TCAGCC": 1066, + "CCTCTC": 1067, + "GTTAAAA": 1068, + "GGGATT": 1069, + "TCCTAA": 1070, + "CACTAA": 1071, + "GGAGAAA": 1072, + "CCTTCCTT": 1073, + "GTTTCTT": 1074, + "TATCAA": 1075, + "GATACA": 1076, + "TAATCCCAGCA": 1077, + "CCGCA": 1078, + "TGAAATT": 1079, + "CGTAAA": 1080, + "CTCTCTG": 1081, + "TCTTTTTT": 1082, + "GTACAA": 1083, + "CCAAATT": 1084, + "TGTATTTT": 1085, + "TCGCTT": 1086, + "GGGTGA": 1087, + "GATAGA": 1088, + "CTTTATT": 1089, + "TAAACAA": 1090, + "GTTTATT": 1091, + "TGAATA": 1092, + "CTACCA": 1093, + "GTGTCC": 1094, + "CCCGA": 1095, + "TTTATTA": 1096, + "CTCCAAA": 1097, + "TTTTTTTTTTTT": 1098, + "TCATCC": 1099, + "GAAGCC": 1100, + "CTAAATT": 1101, + "CAAATTA": 1102, + "CCCCAAA": 1103, + "TCTTCTT": 1104, + "TAGGAAA": 1105, + "CACGA": 1106, + "CATTTTA": 1107, + "GTGCAA": 1108, + "TCTCCTG": 1109, + "TATTTTAA": 1110, + "GTTTGTT": 1111, + "GAGCCA": 1112, + "GGCCAA": 1113, + "CATTTCA": 1114, + "CATCCA": 1115, + "CCTATA": 1116, + "GACTTA": 1117, + "TCAAATG": 1118, + "GTATCA": 1119, + "TAAATTTT": 1120, + "CTGAGGCA": 1121, + "GCCCAA": 1122, + "GGTTAA": 1123, + "TATCTG": 1124, + "TGACAGA": 1125, + "GGAGAGA": 1126, + "GCTGCTG": 1127, + "CCCTTA": 1128, + "TCCTCTG": 1129, + "GTAGCA": 1130, + "CCTGAAA": 1131, + "CCGAA": 1132, + "TTTTTAA": 1133, + "CTATAA": 1134, + "CCTGTA": 1135, + "TTACTG": 1136, + "GTATAA": 1137, + "GGCGA": 1138, + "GACTAA": 1139, + "TCAGAAA": 1140, + "GTGTGTG": 1141, + "CAAAGAA": 1142, + "CCTATG": 1143, + "GCAGAGA": 1144, + "CCGTT": 1145, + "TTTTATTTT": 1146, + "GGAAGAA": 1147, + "TTACTA": 1148, + "GCCTGGG": 1149, + "TCCCTC": 1150, + "TCCTCTT": 1151, + "GGATCA": 1152, + "GGTCAA": 1153, + "TCGAGA": 1154, + "TATTCTT": 1155, + "TACTC": 1156, + "GTTAATT": 1157, + "GCGAGA": 1158, + "CTTAATT": 1159, + "TCCTTTG": 1160, + "GTCTAA": 1161, + "CACCCA": 1162, + "GGGTTA": 1163, + "GGGCAA": 1164, + "GGAAATG": 1165, + "GCAAATT": 1166, + "TAGATG": 1167, + "GCAGAAA": 1168, + "AAAAAAAAAAAAAAAA": 1169, + "CCTACA": 1170, + "GGAGTA": 1171, + "TCTAATT": 1172, + "CAACAAA": 1173, + "TAGATT": 1174, + "GGTTTA": 1175, + "CCTAGA": 1176, + "CTTTAAA": 1177, + "TACTTA": 1178, + "TAATGAA": 1179, + "CTATCA": 1180, + "TAGTAA": 1181, + "CAGAGAA": 1182, + "CAAGAAA": 1183, + "GGGGAAA": 1184, + "CGTTAA": 1185, + "CGTGTT": 1186, + "TCTGTCTG": 1187, + "TTTTAATT": 1188, + "CTGGCC": 1189, + "TAAATGA": 1190, + "CGTCAA": 1191, + "TTAGTA": 1192, + "GTCTCTG": 1193, + "TTTTAAAA": 1194, + "CAGTTTT": 1195, + "CTTCCTT": 1196, + "TATATAA": 1197, + "GCTTTTA": 1198, + "TTTTTCA": 1199, + "GGTC": 1200, + "TTATTAA": 1201, + "TTTTGTT": 1202, + "CATAGA": 1203, + "TAGGAA": 1204, + "GAGAGAA": 1205, + "GTAGCTG": 1206, + "TTATGA": 1207, + "GTAGTG": 1208, + "GGAGAGG": 1209, + "CTCTGAA": 1210, + "TAGTC": 1211, + "GACTCC": 1212, + "TCCCTCC": 1213, + "TAATGTT": 1214, + "CATCTA": 1215, + "GCCACCA": 1216, + "GTACTA": 1217, + "TGGGAAA": 1218, + "CGCCTT": 1219, + "GCCCGG": 1220, + "GGAGGAA": 1221, + "GTACCA": 1222, + "CGCAAA": 1223, + "CATAAAA": 1224, + "TAACATT": 1225, + "GCTAAAA": 1226, + "TCTTCTG": 1227, + "GCCAAAA": 1228, + "GTATGA": 1229, + "GTCTTTG": 1230, + "TACTGA": 1231, + "TCCCAGG": 1232, + "TTATTTA": 1233, + "TTAGTT": 1234, + "GGACC": 1235, + "TATAAAA": 1236, + "CAAACAA": 1237, + "CTTCTC": 1238, + "TCTATCTA": 1239, + "GAAATAA": 1240, + "GTGTAA": 1241, + "CTTTGTT": 1242, + "GATAAAA": 1243, + "GCCCAGG": 1244, + "GCGATT": 1245, + "AAAAAATT": 1246, + "TACAGG": 1247, + "GGCTAA": 1248, + "TAGCTT": 1249, + "GTCTCTA": 1250, + "CTCCTGA": 1251, + "GAATAAA": 1252, + "TTACCA": 1253, + "GGGACA": 1254, + "GCCACTG": 1255, + "GTTTAAA": 1256, + "GTCTGTG": 1257, + "TGACAAA": 1258, + "TACATTTT": 1259, + "GCCACC": 1260, + "TGTTTT": 1261, + "TAGCAA": 1262, + "TTATAAA": 1263, + "GACCCA": 1264, + "GCAGC": 1265, + "CAGACAGA": 1266, + "CACAAAA": 1267, + "GCCCTA": 1268, + "TATTAAAA": 1269, + "CGTATT": 1270, + "CCATCC": 1271, + "TCGATT": 1272, + "GAAGGAA": 1273, + "GATCCA": 1274, + "TATTTGA": 1275, + "GTGAATT": 1276, + "TACCTT": 1277, + "CGTCTT": 1278, + "CCTAGG": 1279, + "TCGAAA": 1280, + "CTTTCTG": 1281, + "TGAAGAA": 1282, + "TCTCTCA": 1283, + "GTCTCTT": 1284, + "GGAGGGG": 1285, + "GTCTGTT": 1286, + "CTATGA": 1287, + "GGAAATT": 1288, + "GCACACA": 1289, + "GCCTTTT": 1290, + "CAGTCC": 1291, + "CTGGTA": 1292, + "GCATCC": 1293, + "TAGTTA": 1294, + "GGCTTA": 1295, + "GAGTCC": 1296, + "TGAAAA": 1297, + "TAGATAGA": 1298, + "TGTTTGTT": 1299, + "TACTCA": 1300, + "CATTTAA": 1301, + "GATTTTA": 1302, + "CACTCC": 1303, + "GAAACAA": 1304, + "GCGCTG": 1305, + "TCTTTCA": 1306, + "CTGTCC": 1307, + "GAACTCA": 1308, + "CGGAAA": 1309, + "TATTGTT": 1310, + "GCACTA": 1311, + "TATTCAA": 1312, + "GCGGGG": 1313, + "GTGGCC": 1314, + "TAATTAAA": 1315, + "TACTAA": 1316, + "GCGGTG": 1317, + "TACCAA": 1318, + "GGTATA": 1319, + "CTAGTT": 1320, + "GCAGAGG": 1321, + "CTTTTTTTT": 1322, + "TTTTTTTTTTTTTTTT": 1323, + "TACAGTA": 1324, + "CCATGTT": 1325, + "TAGTGA": 1326, + "CGTGTG": 1327, + "GCTCTGA": 1328, + "CTTCCTG": 1329, + "TCGCTG": 1330, + "TAAATCA": 1331, + "TCCAATT": 1332, + "GTTTCTG": 1333, + "GAAGAGA": 1334, + "GGGTAA": 1335, + "CCATAA": 1336, + "TTATATT": 1337, + "CGAATT": 1338, + "CCGGA": 1339, + "TGAGCC": 1340, + "CCGTA": 1341, + "CAGAGGA": 1342, + "GTGTTTG": 1343, + "GACAAAA": 1344, + "TTTTTTAAA": 1345, + "GTTGCC": 1346, + "GAGTTTT": 1347, + "TCAAAAAA": 1348, + "TGTTTCA": 1349, + "TATCTA": 1350, + "TCTCTCC": 1351, + "CTCCACA": 1352, + "TAAATATT": 1353, + "TTTTCTG": 1354, + "CTCTCAA": 1355, + "CCTTAAA": 1356, + "TCTTTTAA": 1357, + "GAACAAA": 1358, + "TTAGCA": 1359, + "GCTCATG": 1360, + "TAAAGTA": 1361, + "GGATAA": 1362, + "TTATTAAA": 1363, + "CTCCATT": 1364, + "TCTCTGA": 1365, + "TTATTTG": 1366, + "CCTGTAA": 1367, + "TTATATA": 1368, + "GACTTTT": 1369, + "TGTTGTT": 1370, + "GCAAATG": 1371, + "CTTCAAA": 1372, + "GAATATT": 1373, + "GAATCC": 1374, + "CTCTTAA": 1375, + "GCATAA": 1376, + "GAATGAA": 1377, + "CTTAAAAA": 1378, + "TAAAAATG": 1379, + "TTTTAAAAA": 1380, + "CTCTGGG": 1381, + "TGATCC": 1382, + "GCTCTCA": 1383, + "CTCCAGA": 1384, + "GAGTGCAGTG": 1385, + "CAATATT": 1386, + "TAGAAAA": 1387, + "GTAAATG": 1388, + "TAGCTG": 1389, + "GCTCAAA": 1390, + "GCAGGAA": 1391, + "TACCTG": 1392, + "GGGAAAA": 1393, + "TTTTCTA": 1394, + "GGGGGGGG": 1395, + "CCGA": 1396, + "CTTTGAA": 1397, + "GGAGGTG": 1398, + "TAGTCA": 1399, + "GGCCCA": 1400, + "TGATGTT": 1401, + "CAAATAA": 1402, + "TCTTCCA": 1403, + "GCGCTT": 1404, + "GTATTTG": 1405, + "GTCTC": 1406, + "GAAATCA": 1407, + "TGATAAA": 1408, + "CATTCTT": 1409, + "TATCCA": 1410, + "GCCTCTG": 1411, + "TGAGATG": 1412, + "CGCCAA": 1413, + "GTTTTATT": 1414, + "TATATATT": 1415, + "GTAGGA": 1416, + "GACAGAA": 1417, + "CTCCAGCCTGGG": 1418, + "GCGTGA": 1419, + "GGTATG": 1420, + "GAGGGAGG": 1421, + "TCATTTG": 1422, + "CTACC": 1423, + "TACAGAA": 1424, + "GGTAGA": 1425, + "GATCTA": 1426, + "GTCCATG": 1427, + "TGAGGAA": 1428, + "TAATAAAA": 1429, + "TAAACTT": 1430, + "TCACATT": 1431, + "GGAGGCC": 1432, + "TCACAAA": 1433, + "CACTTTT": 1434, + "CGGCC": 1435, + "CAACAGA": 1436, + "GTAGAGA": 1437, + "GTTATTTT": 1438, + "CGTTTG": 1439, + "TCGTCA": 1440, + "TCTGCTG": 1441, + "CAACACA": 1442, + "GGTAGG": 1443, + "GCAGCTG": 1444, + "TAGTAGAGA": 1445, + "CAAGCC": 1446, + "GCATTTG": 1447, + "TAATATG": 1448, + "GCTTAAA": 1449, + "GCTTCTG": 1450, + "CTCTCCA": 1451, + "TCATCTT": 1452, + "CGTCTG": 1453, + "TCATTTA": 1454, + "CATAGG": 1455, + "GCTCCTT": 1456, + "TGTTCTT": 1457, + "TACATTA": 1458, + "CACAGAA": 1459, + "TAAATATA": 1460, + "TAGAGG": 1461, + "GATAGG": 1462, + "TCCTGAA": 1463, + "GGAGCTG": 1464, + "TGATATT": 1465, + "TCATTAA": 1466, + "CTTTTAAA": 1467, + "TCGTTA": 1468, + "TAAACTA": 1469, + "GTTTGAA": 1470, + "TAAAATTA": 1471, + "CACCCC": 1472, + "TCAGAGA": 1473, + "CTCCTGCCTCA": 1474, + "TGACATT": 1475, + "GTATTTA": 1476, + "CTTCATT": 1477, + "GAAACTG": 1478, + "TAACACA": 1479, + "GTTCAAA": 1480, + "GGAGATG": 1481, + "TCGGCC": 1482, + "CAGCATT": 1483, + "TCGATG": 1484, + "TATTCTA": 1485, + "CTGTGAA": 1486, + "TATTGAA": 1487, + "TTTTCCA": 1488, + "TATTTCTT": 1489, + "GGTGAAA": 1490, + "CTGAGAA": 1491, + "GCACAGA": 1492, + "GCGAGG": 1493, + "CTGTGTG": 1494, + "TGAAATG": 1495, + "TGATGAA": 1496, + "GTCCAAA": 1497, + "CTCAATT": 1498, + "TCCAGAA": 1499, + "GTATATA": 1500, + "TAAAGTT": 1501, + "TCTCAAAA": 1502, + "TCCATCA": 1503, + "GTCTGAA": 1504, + "TGAGAGA": 1505, + "TGATTTG": 1506, + "TTAGCC": 1507, + "CTCCATG": 1508, + "TCCCTGA": 1509, + "GAGCTA": 1510, + "CCCCCCCC": 1511, + "GTGGAAA": 1512, + "CTGGGAA": 1513, + "CAATGAA": 1514, + "CCACACA": 1515, + "CTTTCAA": 1516, + "CGGAGG": 1517, + "TCGTGA": 1518, + "CCAGAAA": 1519, + "GTTTTAAA": 1520, + "TGTTGAA": 1521, + "TCCTGTG": 1522, + "CTAAATG": 1523, + "TCCTTTA": 1524, + "GTCTGGG": 1525, + "TCTCTTTT": 1526, + "TACGG": 1527, + "TATTGTA": 1528, + "TTAGTG": 1529, + "TTACC": 1530, + "TAATCCCAGCACTTTG": 1531, + "TCTGGAA": 1532, + "CTTCTCA": 1533, + "CGCATT": 1534, + "TATTTAAA": 1535, + "TCACACA": 1536, + "TAATCAA": 1537, + "GCGAAA": 1538, + "GGGCCA": 1539, + "GTTCATT": 1540, + "GAGAAAAA": 1541, + "TTTTGTA": 1542, + "TACTTTT": 1543, + "TCGAGG": 1544, + "GTGAAAAA": 1545, + "CAATATA": 1546, + "TCCCATG": 1547, + "CAATTAA": 1548, + "CTGGAAA": 1549, + "CCCAGCA": 1550, + "TCCCATT": 1551, + "TCCTGTT": 1552, + "CTCTTTA": 1553, + "TCCCCTT": 1554, + "GTTTCAA": 1555, + "GTCCAGG": 1556, + "GGAAGGA": 1557, + "TAGTTTT": 1558, + "TGACCTT": 1559, + "GTGCTGGGATTACAGG": 1560, + "TATTTATA": 1561, + "TCTGCAA": 1562, + "CTGAAAAA": 1563, + "TATGTTA": 1564, + "CTTCACA": 1565, + "GCACAGG": 1566, + "CCTGCTG": 1567, + "TTTTTTAA": 1568, + "GTTATTA": 1569, + "CCCTTTT": 1570, + "TGATTTA": 1571, + "TACAAAA": 1572, + "TAAGTAA": 1573, + "TTTTTAAA": 1574, + "CATCTC": 1575, + "GTGGTGA": 1576, + "GTGGAGA": 1577, + "CTCTGCA": 1578, + "GTTAAAAA": 1579, + "TACATACA": 1580, + "CTTTGTG": 1581, + "GGACACA": 1582, + "TCTGATG": 1583, + "TATTATT": 1584, + "TCTTCTA": 1585, + "CTGTGTT": 1586, + "TCAGCTT": 1587, + "CTTTATA": 1588, + "GGCGC": 1589, + "TCCCTCA": 1590, + "GTACC": 1591, + "TGGAGAA": 1592, + "CAAAAATT": 1593, + "TCTTTAA": 1594, + "CTCTCTC": 1595, + "TGAGTGA": 1596, + "GCAGCTT": 1597, + "CGGATT": 1598, + "TACGA": 1599, + "TCTTGTT": 1600, + "TCGTAA": 1601, + "GCCTGTG": 1602, + "TATTCTG": 1603, + "GGGATA": 1604, + "GGGTCC": 1605, + "TGAGATT": 1606, + "CTTTTATT": 1607, + "TCCCACA": 1608, + "CATGGTG": 1609, + "TTAGGA": 1610, + "GAACACA": 1611, + "TCATAAA": 1612, + "CAACATT": 1613, + "GGTCCA": 1614, + "GAATTTG": 1615, + "TATTAATT": 1616, + "TCCTGGG": 1617, + "GCAGCAA": 1618, + "CTCTTCA": 1619, + "GAAGAGG": 1620, + "TCTGTCA": 1621, + "CTGAATG": 1622, + "CCACAAA": 1623, + "GTGGAGG": 1624, + "TGATTAA": 1625, + "CTCCCTCC": 1626, + "CACACACACACACACACACACACACACACACA": 1627, + "GCGATG": 1628, + "CATTCTG": 1629, + "GTAGAAA": 1630, + "TCATCAA": 1631, + "TTTTCAA": 1632, + "TATGTATG": 1633, + "CCAAATG": 1634, + "TAATTTTA": 1635, + "TAAGGAA": 1636, + "CTTGAAA": 1637, + "AAAAAAAAAAAA": 1638, + "GCTCCTG": 1639, + "GCAGATG": 1640, + "GAAAAATT": 1641, + "GACGC": 1642, + "GTGGGGG": 1643, + "GTCAATT": 1644, + "CTTGCTT": 1645, + "TGACACA": 1646, + "GTGTGTT": 1647, + "CCAGAGA": 1648, + "CCCAGCC": 1649, + "TAAAGAAA": 1650, + "GTCCATT": 1651, + "TAAATTAA": 1652, + "CCCAAAA": 1653, + "GAATTAA": 1654, + "TGAATTA": 1655, + "TTTTTTTG": 1656, + "CCAGCTT": 1657, + "CAATTTG": 1658, + "CTGTTTG": 1659, + "GTCTCAA": 1660, + "GTTTGTG": 1661, + "GGCATA": 1662, + "GGTACA": 1663, + "TGATGTG": 1664, + "GATTTCA": 1665, + "TCTGCTT": 1666, + "GTAATTA": 1667, + "TAAAAAAAA": 1668, + "GCCGCC": 1669, + "TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG": 1670, + "GCGTCA": 1671, + "GCTCATT": 1672, + "GAACCTG": 1673, + "TAAACAAA": 1674, + "GTGCTGA": 1675, + "TCAGGAA": 1676, + "TCCTCAA": 1677, + "TCTATTTT": 1678, + "TCTGTTTT": 1679, + "CAGAGCA": 1680, + "CCAGGAA": 1681, + "GTCTTTA": 1682, + "TCTTCAA": 1683, + "TCAAAATT": 1684, + "GCTTATT": 1685, + "GTTCCTT": 1686, + "CACCTA": 1687, + "TCACTGA": 1688, + "GAAGCAA": 1689, + "TAAAGA": 1690, + "TCCTTCA": 1691, + "TCTCATG": 1692, + "TCAGTGA": 1693, + "TACACAA": 1694, + "CACGTG": 1695, + "CCTAAAA": 1696, + "GCCTTTG": 1697, + "GGCTTTT": 1698, + "GTTGAAA": 1699, + "GTTCTC": 1700, + "CTAGA": 1701, + "CTACAAA": 1702, + "GCACAAA": 1703, + "TTACATT": 1704, + "GGCCCC": 1705, + "TAATGTG": 1706, + "CTGCCTT": 1707, + "TCCCAGA": 1708, + "GTGAATG": 1709, + "GGACAGG": 1710, + "GGATGTG": 1711, + "GTTTATA": 1712, + "TGACCAA": 1713, + "GTGGCTG": 1714, + "GTTCTCA": 1715, + "CTTATTTT": 1716, + "CTGGAGA": 1717, + "TTACAAA": 1718, + "GTCTTCA": 1719, + "CAAGAGA": 1720, + "CCATTTG": 1721, + "TCACAGA": 1722, + "CTAGTA": 1723, + "CATTATT": 1724, + "TTAGA": 1725, + "GCTCTCC": 1726, + "GCGCCA": 1727, + "TATGTTTT": 1728, + "TCCTCCA": 1729, + "CAGAAAAA": 1730, + "GTGGGAA": 1731, + "TAATCTT": 1732, + "TGAGTCA": 1733, + "CTGCTC": 1734, + "GTCTCCA": 1735, + "TCATGTT": 1736, + "GTTTCCA": 1737, + "TAAGCAA": 1738, + "CTAAAAATA": 1739, + "TGACTGA": 1740, + "TCGGTT": 1741, + "TTAGAAA": 1742, + "TAAGCC": 1743, + "TAAAGCA": 1744, + "CCTCTCC": 1745, + "CCTCCTT": 1746, + "TCAGATT": 1747, + "TATGAAAA": 1748, + "GCTGATG": 1749, + "CATATTTT": 1750, + "GCTCCAA": 1751, + "CGGCGG": 1752, + "CCACTGA": 1753, + "CAGCAAA": 1754, + "CTGTCTT": 1755, + "CTAGCA": 1756, + "TCGGGG": 1757, + "CACAGCA": 1758, + "GCTGATT": 1759, + "CTAGGA": 1760, + "TAACTC": 1761, + "TCATATT": 1762, + "CCTTCTT": 1763, + "CTGCAAA": 1764, + "CCCGC": 1765, + "GGTCTA": 1766, + "CCCAGGA": 1767, + "GTGTCTG": 1768, + "TAATAATAATAA": 1769, + "TCACATG": 1770, + "CAATTTA": 1771, + "TATATATATATATATATATATATATATATATA": 1772, + "CCACAGA": 1773, + "TCAATTTT": 1774, + "GTATTAA": 1775, + "GAACATT": 1776, + "TCTCTTA": 1777, + "CTATTTG": 1778, + "TCTTTCC": 1779, + "GGTTAAA": 1780, + "GCTAATT": 1781, + "CTGCTGA": 1782, + "TACCTA": 1783, + "CAGGGTT": 1784, + "TCGCCA": 1785, + "CAAAAATTA": 1786, + "CTTCTGA": 1787, + "GCATGTG": 1788, + "CTATTAA": 1789, + "GCACATG": 1790, + "CAACATG": 1791, + "TCATGAA": 1792, + "GAATGTT": 1793, + "GGGTTTT": 1794, + "CTGCCTG": 1795, + "GTCCACA": 1796, + "TAAACA": 1797, + "CTCTGGA": 1798, + "GACCCC": 1799, + "GGCAAAA": 1800, + "TCTGTTA": 1801, + "CTAGTG": 1802, + "CTATATA": 1803, + "TCAGTCA": 1804, + "TAACTAA": 1805, + "GAAGATG": 1806, + "GTCTTAA": 1807, + "CAAGGAA": 1808, + "GTAAAAAA": 1809, + "TCCCCTG": 1810, + "TCGCAA": 1811, + "TCTGCCTG": 1812, + "CCTTTTA": 1813, + "GTCCCAGCTA": 1814, + "TATATATG": 1815, + "TATTGTG": 1816, + "TGTGTTTT": 1817, + "GCGCAA": 1818, + "CACAGTG": 1819, + "TAAGATT": 1820, + "CTCTGTA": 1821, + "GGAGGCTGA": 1822, + "GGACAAA": 1823, + "TATTAAAAA": 1824, + "TCGTCC": 1825, + "TCGGAA": 1826, + "CTATAAA": 1827, + "CTTCAGA": 1828, + "CTAGAAA": 1829, + "CATTCAA": 1830, + "CACGCA": 1831, + "CAGGATT": 1832, + "CCATCTT": 1833, + "GTAGCC": 1834, + "GAATTTA": 1835, + "CACGC": 1836, + "CAATCC": 1837, + "TGAGCAA": 1838, + "GAAGCTG": 1839, + "TCAATTA": 1840, + "GAAGTCA": 1841, + "CTGCACA": 1842, + "CCACGG": 1843, + "GGATCTT": 1844, + "CTCCTGCCTCAGCCTCC": 1845, + "TAAATGAA": 1846, + "CCGTC": 1847, + "TCGGTG": 1848, + "TTTTATTA": 1849, + "GCAGGGG": 1850, + "GCAGGTG": 1851, + "TCTATTA": 1852, + "TAACTTA": 1853, + "CTAATTTT": 1854, + "CCCGCC": 1855, + "TAATACA": 1856, + "GGATTAAA": 1857, + "TCTCTCTG": 1858, + "GCTTCTT": 1859, + "CATTTATT": 1860, + "CCAGAGG": 1861, + "GGACAGA": 1862, + "GCCAATT": 1863, + "TCCCCAA": 1864, + "GTTGATT": 1865, + "GAAGAAAA": 1866, + "GCATTTA": 1867, + "CTCTAAA": 1868, + "CACACACACACA": 1869, + "CCTCAAA": 1870, + "TATAATT": 1871, + "CAATGTT": 1872, + "GCCCAGA": 1873, + "GTATATT": 1874, + "CTAAAAAA": 1875, + "CCACAGG": 1876, + "TAAGAGA": 1877, + "TCCTTAA": 1878, + "TATTTTTT": 1879, + "GAATATA": 1880, + "GGATTTG": 1881, + "GTGTGAA": 1882, + "CTGGCTT": 1883, + "GCGGCA": 1884, + "TCCGCC": 1885, + "GCATCTT": 1886, + "TCTAATA": 1887, + "CTGCATT": 1888, + "CTCTGCC": 1889, + "TCACTCA": 1890, + "TCAGCAA": 1891, + "TATTATG": 1892, + "CCAGCTG": 1893, + "GATCTC": 1894, + "GCCTCTT": 1895, + "CTTCCAA": 1896, + "TCCTAAA": 1897, + "TCATCTG": 1898, + "CTATTTA": 1899, + "CTGCAGG": 1900, + "CAAGCAA": 1901, + "GCGGAA": 1902, + "GAAATAAA": 1903, + "TAAAATAA": 1904, + "TCACCTT": 1905, + "CCATGTG": 1906, + "GACCTA": 1907, + "CAGATGA": 1908, + "GTGGCTT": 1909, + "TTATTATTATTA": 1910, + "TCCCGG": 1911, + "TATTTGTT": 1912, + "CTGTAAA": 1913, + "TCCATCCA": 1914, + "CTGTATA": 1915, + "GTTTCTA": 1916, + "GTTGCTT": 1917, + "CCATGAA": 1918, + "GCTCTTA": 1919, + "CTTCATG": 1920, + "GTTCCTG": 1921, + "GCTGGGA": 1922, + "TCAGAGG": 1923, + "CATTAAAA": 1924, + "TCAGTAA": 1925, + "GAATGTG": 1926, + "CTTATTA": 1927, + "GCACTGA": 1928, + "TGAGGTT": 1929, + "CATCAAA": 1930, + "CTTCTCC": 1931, + "GTTTATG": 1932, + "CTTTCCA": 1933, + "GTGCCTG": 1934, + "GAAAGGA": 1935, + "GCATCTG": 1936, + "TACCCA": 1937, + "TAACAGA": 1938, + "AAAAAAAAAAA": 1939, + "CTATGAA": 1940, + "CAGTAAA": 1941, + "TAGCTA": 1942, + "TCGTTTT": 1943, + "GTGTCTT": 1944, + "GAGCAAA": 1945, + "TCTAAAAA": 1946, + "GTTCACA": 1947, + "GAAATGA": 1948, + "CAAATGA": 1949, + "GCCCTGA": 1950, + "GTGTTTA": 1951, + "TCATGTG": 1952, + "CATATTA": 1953, + "TCAAAAAAA": 1954, + "TAAGTTA": 1955, + "TCTCTCTT": 1956, + "CCAGTGA": 1957, + "CCTCTGA": 1958, + "CAAGATG": 1959, + "GCCTGTT": 1960, + "GTTTGGG": 1961, + "CATTCATT": 1962, + "GCCCCTG": 1963, + "GTTCTGA": 1964, + "GCGGCC": 1965, + "GCGGTT": 1966, + "CAAAACAAAA": 1967, + "TACATATA": 1968, + "GAATTAAA": 1969, + "TCAAGAA": 1970, + "CTGTATT": 1971, + "TTTTTATT": 1972, + "GATTATT": 1973, + "TCTAATG": 1974, + "GTTGCTG": 1975, + "TGAATGAA": 1976, + "TCAGCTG": 1977, + "CTTGATT": 1978, + "CAGAATG": 1979, + "CTAATTA": 1980, + "TATAATG": 1981, + "GTTTTGTTTT": 1982, + "CCAGCCTG": 1983, + "TGATGGA": 1984, + "GCAGATT": 1985, + "CTCTATT": 1986, + "GCAGTCA": 1987, + "TAAGTGA": 1988, + "CTACACA": 1989, + "CGCATG": 1990, + "TAGCCA": 1991, + "GTGGCTCA": 1992, + "CAAATAAA": 1993, + "GTGCTCA": 1994, + "TTTTTTTTTT": 1995, + "TAACATG": 1996, + "TCCCAGCTA": 1997, + "CAAAGTA": 1998, + "TCATATA": 1999, + "CAGCATG": 2000, + "TGATCTT": 2001, + "CATAATT": 2002, + "TGTGTTA": 2003, + "TTTTGAA": 2004, + "TTAATTA": 2005, + "GATATTA": 2006, + "TCATTCA": 2007, + "TGATATA": 2008, + "TGACTCA": 2009, + "GACGTT": 2010, + "TGACATG": 2011, + "GTTGTGA": 2012, + "CATTTTTT": 2013, + "GCCTGGA": 2014, + "CTATGTT": 2015, + "CTTTGGG": 2016, + "GTCTCAAA": 2017, + "CTGGCTG": 2018, + "CCACATG": 2019, + "GGCGTG": 2020, + "CTTAATG": 2021, + "TAAGATG": 2022, + "GTATAAA": 2023, + "TGTATTA": 2024, + "TAACTCA": 2025, + "GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA": 2026, + "GCATGAA": 2027, + "GTTAATG": 2028, + "TCCAGGA": 2029, + "GAGAGAAA": 2030, + "TCTCTGTG": 2031, + "CTCTCTA": 2032, + "CCACCTG": 2033, + "GCCAGGA": 2034, + "CTGGAGG": 2035, + "CCATTTA": 2036, + "GTCTGGA": 2037, + "GCCCACA": 2038, + "TAGAGAA": 2039, + "CAACTCA": 2040, + "GGCAGGA": 2041, + "TCTTATG": 2042, + "CAAAGGA": 2043, + "GGTAAAA": 2044, + "GAGAGGA": 2045, + "GTCCAGA": 2046, + "GCCCTCA": 2047, + "GATATTTT": 2048, + "CAGGGAA": 2049, + "CCACATT": 2050, + "GAGGAGG": 2051, + "GAAACTT": 2052, + "CAGAATT": 2053, + "TCAGATG": 2054, + "TATTTCC": 2055, + "TACAGTG": 2056, + "TGAGCTG": 2057, + "CCATCTG": 2058, + "GAGAATG": 2059, + "TCAACAA": 2060, + "ATT": 2061, + "TAACTGA": 2062, + "TGAGAGG": 2063, + "CACTGAA": 2064, + "CCACCTT": 2065, + "CTGCAGA": 2066, + "TCACCAA": 2067, + "TGAGCTT": 2068, + "CAAAGCA": 2069, + "GGTTTTA": 2070, + "CGGGGTT": 2071, + "TCCAAAAA": 2072, + "TATGTATA": 2073, + "CCAGATG": 2074, + "TCCATTTT": 2075, + "CTGCTCA": 2076, + "GATAATT": 2077, + "CCACCAA": 2078, + "CTCCTCC": 2079, + "GAGAATT": 2080, + "GAAAGTA": 2081, + "TAAAATAAAA": 2082, + "CTTCTTA": 2083, + "CTGTTTA": 2084, + "GAATCAA": 2085, + "GCATGTT": 2086, + "GCACGG": 2087, + "GACTGAA": 2088, + "GTGCACA": 2089, + "GACGTG": 2090, + "TATACAA": 2091, + "TCGACA": 2092, + "GAAGACA": 2093, + "TAAAGGA": 2094, + "GATCAAA": 2095, + "CAGTGTG": 2096, + "CTAGCC": 2097, + "GAGGAAAA": 2098, + "TCTGAAAA": 2099, + "GAACCCA": 2100, + "GATGGATG": 2101, + "GTTCTTA": 2102, + "CTATATT": 2103, + "GCATTAA": 2104, + "TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC": 2105, + "TCAGTC": 2106, + "TATTTTTG": 2107, + "GAGGATT": 2108, + "GTATGTG": 2109, + "TAACCAA": 2110, + "GTTGTTTT": 2111, + "TTTTTCTT": 2112, + "GTGTTAA": 2113, + "CTTGGAA": 2114, + "AAAAAATG": 2115, + "CAATGTG": 2116, + "GTGCCTT": 2117, + "GCCTCAA": 2118, + "GAGTCTT": 2119, + "GCTAATTTT": 2120, + "CGAAAAA": 2121, + "GTGTATA": 2122, + "GCGTTA": 2123, + "CTGCACTCCAGCCTGGG": 2124, + "GTTCATG": 2125, + "CAAAGAAA": 2126, + "GCAGTAA": 2127, + "GGATGAA": 2128, + "CTTTATG": 2129, + "CAGGAAAA": 2130, + "TCCTGCA": 2131, + "CTGTCTG": 2132, + "GAACATG": 2133, + "GGATGGA": 2134, + "GCCTGAA": 2135, + "CAAAAATG": 2136, + "TCCAATG": 2137, + "CCAGCAA": 2138, + "GGCCTA": 2139, + "CAACTGA": 2140, + "GCACCTG": 2141, + "GTCTATT": 2142, + "CCTCTCA": 2143, + "GTGGTCA": 2144, + "GTGTAAA": 2145, + "GTACACA": 2146, + "GTAAAATT": 2147, + "GTACATT": 2148, + "TATATAAA": 2149, + "CTGTTAA": 2150, + "TAAGTCA": 2151, + "GCCTCCA": 2152, + "AAATTAAA": 2153, + "GTGCAGG": 2154, + "TCCTGGA": 2155, + "GTGCAAA": 2156, + "GCGTCC": 2157, + "CCATTAA": 2158, + "GGAGGGA": 2159, + "TCACTTA": 2160, + "TCATTAAA": 2161, + "CAACATA": 2162, + "TAATAGA": 2163, + "TAATGTA": 2164, + "GATTTTTT": 2165, + "GTTGTCA": 2166, + "GGAGACA": 2167, + "GTGTGGG": 2168, + "TCACAGG": 2169, + "TCGGCA": 2170, + "CTCCCTG": 2171, + "GACCAAA": 2172, + "TGTTTATT": 2173, + "CGAATG": 2174, + "CTCAATG": 2175, + "TCACCTG": 2176, + "CAGTGTT": 2177, + "TGAGACA": 2178, + "TAGGGG": 2179, + "GAAAAATG": 2180, + "GTTGAGA": 2181, + "TCGATA": 2182, + "CTCGGGAGG": 2183, + "GTTGTC": 2184, + "CCAGTCA": 2185, + "GCCCAGGCTG": 2186, + "GAACAGA": 2187, + "GGCTCACTGCAA": 2188, + "GCAGACA": 2189, + "TGAGGTG": 2190, + "CACGTT": 2191, + "TAAGAAAA": 2192, + "CCAGGCA": 2193, + "GTATCTT": 2194, + "CTTGGGAGG": 2195, + "CTTTCTA": 2196, + "CCGCTG": 2197, + "GAGCTCA": 2198, + "GAGACAGA": 2199, + "CTTCAGG": 2200, + "GCACATT": 2201, + "GTACAAA": 2202, + "CTTGTAA": 2203, + "GTGGGTG": 2204, + "GAAGTGA": 2205, + "GGTCTC": 2206, + "GTATGTT": 2207, + "GCACTCA": 2208, + "TTATGTT": 2209, + "CAAGTCA": 2210, + "CAAGTGA": 2211, + "GAAACTA": 2212, + "TAAATAAAA": 2213, + "TCTTAAAA": 2214, + "GTTGGAA": 2215, + "GTTCTAA": 2216, + "CCACTC": 2217, + "CAGTGAA": 2218, + "GAAAGG": 2219, + "GCACGA": 2220, + "TAACTTTT": 2221, + "GTTGTTA": 2222, + "TCAGTTA": 2223, + "CGGATG": 2224, + "TATTTGAA": 2225, + "CCCTGAA": 2226, + "GCCCTC": 2227, + "CTTCTAA": 2228, + "TTTGTTTT": 2229, + "GAGCTGA": 2230, + "CTGTGGG": 2231, + "CAAGATT": 2232, + "GAAGCTT": 2233, + "TGAGTAA": 2234, + "CTTGCTG": 2235, + "GGATGGG": 2236, + "CGTATG": 2237, + "TCCATTA": 2238, + "GTCTGCA": 2239, + "GCCATTTT": 2240, + "GTTGTAA": 2241, + "CACACAA": 2242, + "GGACTACAGG": 2243, + "CGTTTTA": 2244, + "TCTTCC": 2245, + "TAACCTT": 2246, + "CTTTAAAA": 2247, + "TGAATTTT": 2248, + "CTACAGA": 2249, + "GCAAGAA": 2250, + "TAACAAAA": 2251, + "CAATTAAA": 2252, + "CCACTCA": 2253, + "CATGGTGAAA": 2254, + "CCCAGAA": 2255, + "CTACATT": 2256, + "CCGAGG": 2257, + "TCCAGTG": 2258, + "TGAGTTA": 2259, + "GGAGTCA": 2260, + "TAACGA": 2261, + "GAGTAAA": 2262, + "GACTCTG": 2263, + "GGAGCTT": 2264, + "TACTCC": 2265, + "CTGCATG": 2266, + "GCTTTTTT": 2267, + "GTCTAAA": 2268, + "GTGCGG": 2269, + "CATCTCA": 2270, + "TGATCAA": 2271, + "GGAGATT": 2272, + "GCAAAAAA": 2273, + "CACCAAA": 2274, + "TGACGG": 2275, + "CAGAGG": 2276, + "GTTGATG": 2277, + "CTTGTCA": 2278, + "TCCACCTG": 2279, + "GGAGCAA": 2280, + "CAAGTAA": 2281, + "CCATAAA": 2282, + "GTGCATG": 2283, + "GCATATT": 2284, + "GTAGATT": 2285, + "GCCTAA": 2286, + "CTCAAAAA": 2287, + "GGAGAAAA": 2288, + "CTATCC": 2289, + "TAATATTA": 2290, + "GTGCTC": 2291, + "CAATATG": 2292, + "TGTGGAA": 2293, + "TGACTC": 2294, + "GTGTATG": 2295, + "TTTTAATG": 2296, + "GCTCTAA": 2297, + "CACAATG": 2298, + "CAGCTCA": 2299, + "GTTGGTT": 2300, + "CTAAAATT": 2301, + "GTCTATG": 2302, + "TGTGAAAA": 2303, + "CTGGGTT": 2304, + "CCCCTCC": 2305, + "CCCTCTT": 2306, + "GCAGGGA": 2307, + "GAAACCA": 2308, + "CATTTCC": 2309, + "GCAGCCA": 2310, + "TCATATG": 2311, + "GCAGGCA": 2312, + "CGTAAAA": 2313, + "TGACCTG": 2314, + "CAGAGGTT": 2315, + "CTTGTGA": 2316, + "TTATCTT": 2317, + "CTGTATG": 2318, + "GTCAATG": 2319, + "GGACGG": 2320, + "GCGTAA": 2321, + "CAAACTA": 2322, + "TAAATGTT": 2323, + "CTTCGG": 2324, + "CTCCCCA": 2325, + "TACAATG": 2326, + "TCTGTAA": 2327, + "GAATATG": 2328, + "GCGGGA": 2329, + "GGACATT": 2330, + "TTATGAA": 2331, + "GGATGTT": 2332, + "GGACATG": 2333, + "TCAGGTG": 2334, + "CAACAAAA": 2335, + "GAAAGAGA": 2336, + "GTGGATG": 2337, + "GGGCTA": 2338, + "CCATCAA": 2339, + "CAGCTGA": 2340, + "CTCCACC": 2341, + "CAATCAA": 2342, + "GTGGTC": 2343, + "TGACAGG": 2344, + "CCATTCA": 2345, + "GTCCCTG": 2346, + "CAGACACA": 2347, + "GTTGGTG": 2348, + "CCTCCTG": 2349, + "GAACTGA": 2350, + "TATTCATT": 2351, + "GCCCATG": 2352, + "CAATCTT": 2353, + "GAAAGCA": 2354, + "GAATCTG": 2355, + "TTATTTTA": 2356, + "GTTTGGA": 2357, + "TTTTTGTT": 2358, + "GGGAATG": 2359, + "GCGACA": 2360, + "TAAACTG": 2361, + "CCATATT": 2362, + "GGATCC": 2363, + "CAAGCTT": 2364, + "TAAAAAAAAA": 2365, + "TCACTC": 2366, + "CACTGTT": 2367, + "TGTTAATT": 2368, + "GGACTGA": 2369, + "GGAGTGA": 2370, + "CATACACA": 2371, + "GTTTGTA": 2372, + "TCCAGCA": 2373, + "GTGCATT": 2374, + "GGAAAAAA": 2375, + "CCAAGAA": 2376, + "TCAATA": 2377, + "CTTCCCA": 2378, + "TGAGAAAA": 2379, + "GGCCTCCCAAA": 2380, + "CAAGCTG": 2381, + "GCCCAAA": 2382, + "TGACTTA": 2383, + "CAGCCTT": 2384, + "CTGGATT": 2385, + "TTTTTTTA": 2386, + "TCACGG": 2387, + "GCAGTTA": 2388, + "TGACTAA": 2389, + "TTACAGG": 2390, + "TGATATG": 2391, + "TAATTATT": 2392, + "TCTTGAA": 2393, + "GCCCCTT": 2394, + "GTTCAGA": 2395, + "CTCTATG": 2396, + "CCATGGA": 2397, + "GAGGGAA": 2398, + "GGAGGCA": 2399, + "CTTTGCA": 2400, + "TCTTGG": 2401, + "GGAGGTT": 2402, + "GCCAATG": 2403, + "CTGGTGA": 2404, + "CAACCAA": 2405, + "CCAGTC": 2406, + "CTTGAGA": 2407, + "TACAGCA": 2408, + "CTTGTC": 2409, + "GACGGA": 2410, + "CTTCTTTT": 2411, + "GTGGC": 2412, + "GAGGATG": 2413, + "CAATAAAA": 2414, + "GAAATTTT": 2415, + "AAAAAAAAAA": 2416, + "CTCTATA": 2417, + "GTATGAA": 2418, + "CTTGTTA": 2419, + "TAACATA": 2420, + "CAAACACA": 2421, + "TGATTAAA": 2422, + "GCTCTGTT": 2423, + "GTGGGTT": 2424, + "GTTGGGG": 2425, + "GTGTGTA": 2426, + "GTAATTTT": 2427, + "GTATCC": 2428, + "TGTGTGTGTGTG": 2429, + "TCTTCCTT": 2430, + "TCACTAA": 2431, + "TCTCCAAA": 2432, + "TATCAAA": 2433, + "TGATGGG": 2434, + "GGATATT": 2435, + "CAAATTTT": 2436, + "GTTCAGG": 2437, + "GTGGATT": 2438, + "GTGCAGA": 2439, + "GCTGCC": 2440, + "CTCAGAA": 2441, + "GCAGTC": 2442, + "GGATAAA": 2443, + "GCCTTCA": 2444, + "CCAGGTG": 2445, + "TATCTC": 2446, + "CAATGCA": 2447, + "CCCACTG": 2448, + "GTGTATT": 2449, + "CGACAGA": 2450, + "TGAGATA": 2451, + "CCAGGTT": 2452, + "TGTTTAA": 2453, + "CATCATG": 2454, + "TGATTCA": 2455, + "GCAATTA": 2456, + "GAAATGAA": 2457, + "CTTGGTT": 2458, + "GAAGATT": 2459, + "GGATTAA": 2460, + "CCTCATT": 2461, + "GGCCAGGCTG": 2462, + "GCTATTA": 2463, + "GCCAGCA": 2464, + "GAGACAGG": 2465, + "CTTGAGG": 2466, + "CAGTCTT": 2467, + "GTTCTCC": 2468, + "TATTTCAA": 2469, + "TGACGA": 2470, + "CATGAAAA": 2471, + "CATTATG": 2472, + "TAAATTTA": 2473, + "GAGTGAA": 2474, + "CAACAGG": 2475, + "TAAGCTT": 2476, + "CACATTTT": 2477, + "GATCTCA": 2478, + "TAGTCC": 2479, + "GACCCTG": 2480, + "TAATGCA": 2481, + "TAAGTC": 2482, + "TAATAATT": 2483, + "GAAGTAA": 2484, + "CAACTC": 2485, + "CATCATT": 2486, + "GACGAA": 2487, + "GAAACAAA": 2488, + "TATTTCTG": 2489, + "CATTAATT": 2490, + "CCACCCC": 2491, + "TAATATTTT": 2492, + "GTTTAAAA": 2493, + "GTATCTG": 2494, + "GTCAAAAA": 2495, + "GATGCTG": 2496, + "TGTTCTG": 2497, + "GGTCAAA": 2498, + "GTAGGAA": 2499, + "GTATATG": 2500, + "TGATCTG": 2501, + "GGGGCTG": 2502, + "GCATCAA": 2503, + "GCCAAAAA": 2504, + "CCACGA": 2505, + "GCTAATG": 2506, + "CAGAGAAA": 2507, + "CCTTCTG": 2508, + "TCCTCTA": 2509, + "GCAGGTT": 2510, + "CTCACTG": 2511, + "TAGATTA": 2512, + "GCCGAGA": 2513, + "CCATCCA": 2514, + "CTTTACA": 2515, + "GTACATG": 2516, + "GCACCAA": 2517, + "CTTTGTA": 2518, + "CTATGTG": 2519, + "TCACTTTT": 2520, + "TGAGTC": 2521, + "CAAGAAAA": 2522, + "CTGACTG": 2523, + "GTTTTTTTT": 2524, + "GCATAAA": 2525, + "TAATCTG": 2526, + "GAAAAAAAA": 2527, + "CAGGATG": 2528, + "TGAGCCA": 2529, + "GAATTCA": 2530, + "TCAGACA": 2531, + "GTTCCAA": 2532, + "TCAGGTT": 2533, + "CAAACTG": 2534, + "CATTTCTT": 2535, + "TGTTAAAA": 2536, + "CCAGACA": 2537, + "CAAGTTA": 2538, + "CATGTTA": 2539, + "CATTCTA": 2540, + "TCTTTTTG": 2541, + "TGAGGGG": 2542, + "CACATTA": 2543, + "TAAAATAAA": 2544, + "GCATATA": 2545, + "TGTTCTA": 2546, + "GAAGGGG": 2547, + "GAGTGTG": 2548, + "TAAGACA": 2549, + "GAACTC": 2550, + "CCAGTAA": 2551, + "GAGAGAGG": 2552, + "GCGACC": 2553, + "CAATTCA": 2554, + "CGGCTG": 2555, + "CCAGATT": 2556, + "CCTGGG": 2557, + "GGAAGAAA": 2558, + "GAGAGG": 2559, + "TCAAAATG": 2560, + "CCTCATG": 2561, + "TAAAGG": 2562, + "CTTTGGA": 2563, + "CCAGGGA": 2564, + "GTACAGA": 2565, + "CTGAGGCAGGA": 2566, + "TGTTTCTT": 2567, + "CCAGGCTG": 2568, + "CTGAGG": 2569, + "GAGGCTG": 2570, + "CTCCTGGG": 2571, + "GAAGTC": 2572, + "CGACC": 2573, + "GGACTCA": 2574, + "GGAGTC": 2575, + "CACAATT": 2576, + "GTGTTCA": 2577, + "GACTAAA": 2578, + "GTCATTA": 2579, + "CAAAATTA": 2580, + "TGAAGAAA": 2581, + "GCACCTT": 2582, + "GTTTGCA": 2583, + "TCCTGCC": 2584, + "GTAGATG": 2585, + "GCCTGCA": 2586, + "GAGTTAA": 2587, + "TCCCTTA": 2588, + "GTGGTTA": 2589, + "TCGGGA": 2590, + "TACATAA": 2591, + "TCTCTCCA": 2592, + "CACTAAA": 2593, + "TATATATATATA": 2594, + "GTGGCAA": 2595, + "CACCATG": 2596, + "TTTGAAAA": 2597, + "CACACTG": 2598, + "CTTGGTG": 2599, + "TACACTG": 2600, + "CCTCCAA": 2601, + "CAACCTT": 2602, + "CAGCCAA": 2603, + "TTTTCAAA": 2604, + "TGATAGA": 2605, + "TACACTA": 2606, + "TCTGGG": 2607, + "TCCCAGCA": 2608, + "TAGGAAAA": 2609, + "CTTGGGG": 2610, + "TCTGTGAA": 2611, + "CCTTATT": 2612, + "CATTTAAA": 2613, + "TTTTATTTTA": 2614, + "GCCCTCC": 2615, + "CTGAGCA": 2616, + "CCCGTG": 2617, + "GTAGTGA": 2618, + "TCCTATT": 2619, + "GAAGGTG": 2620, + "TGTGCTG": 2621, + "TCCACTG": 2622, + "TAATCTA": 2623, + "TGATGTA": 2624, + "GTGGTAA": 2625, + "TAATGGA": 2626, + "GATGAAAA": 2627, + "GTAGTAA": 2628, + "GTGGGGA": 2629, + "GTGTCAA": 2630, + "CAGACTG": 2631, + "TCGAAAA": 2632, + "CTCATTA": 2633, + "TAATAATA": 2634, + "CTCAGAAA": 2635, + "CATCCTT": 2636, + "CCGCTT": 2637, + "GGAAGG": 2638, + "CCGTGA": 2639, + "CCACTCC": 2640, + "CTAGAGA": 2641, + "TAGAATG": 2642, + "GGATTTA": 2643, + "TTAATTTT": 2644, + "GCTAATA": 2645, + "TCCCCCA": 2646, + "CAAATATT": 2647, + "GATCATG": 2648, + "TCTTAATT": 2649, + "CAGTATT": 2650, + "GTCTTGAA": 2651, + "CCGAAA": 2652, + "CTATTCA": 2653, + "TAAGATA": 2654, + "CTTGCAA": 2655, + "GCCCCAA": 2656, + "TCCCTAA": 2657, + "GAAGTTA": 2658, + "GATGATG": 2659, + "CTTGATG": 2660, + "CCCTAAA": 2661, + "CCTGCCTG": 2662, + "GACATTTT": 2663, + "CCAGCCA": 2664, + "TGTGTGTGTG": 2665, + "GTCTATA": 2666, + "TCTCTGTT": 2667, + "GTCTGTA": 2668, + "TATAATA": 2669, + "CTTGTTTT": 2670, + "CGCCATT": 2671, + "CTCAGCA": 2672, + "TACAGTT": 2673, + "CAAGAGG": 2674, + "GGAAGCA": 2675, + "GCCTTTA": 2676, + "CCCCATT": 2677, + "CAACGA": 2678, + "GTCATTTT": 2679, + "CCCGCA": 2680, + "CAGTTAA": 2681, + "GAATCTT": 2682, + "CATGTTTT": 2683, + "CCGGGG": 2684, + "CTACTGA": 2685, + "TCACGA": 2686, + "TAAATTTG": 2687, + "GCCCATT": 2688, + "CTCTAGG": 2689, + "GGACCTG": 2690, + "TCAGGGA": 2691, + "GAGACTG": 2692, + "CCAAAAAA": 2693, + "GCCGG": 2694, + "CCAGGGG": 2695, + "TCAGAAAA": 2696, + "CATCTGA": 2697, + "TCTTCAAA": 2698, + "CTACAGG": 2699, + "GAGGCAGG": 2700, + "CATTGTA": 2701, + "TAAATCAA": 2702, + "GACTCTT": 2703, + "CTGATTA": 2704, + "GCATATG": 2705, + "GGACCTT": 2706, + "CAAGACA": 2707, + "TATTTATG": 2708, + "TATTTTAAA": 2709, + "CCGAGA": 2710, + "TCATTTTA": 2711, + "CTCACTCA": 2712, + "CCACCCA": 2713, + "CTCTAGA": 2714, + "CTACATG": 2715, + "GTGCTTA": 2716, + "CAACCTG": 2717, + "TCTGTGTT": 2718, + "TAAATATG": 2719, + "CAAAGG": 2720, + "CCCTGTT": 2721, + "GTTCGG": 2722, + "TGATAAAA": 2723, + "CACGAA": 2724, + "GTTGAGG": 2725, + "CAGAGTGA": 2726, + "GAAATTAA": 2727, + "CACATA": 2728, + "GAACAGG": 2729, + "TCTCCTGA": 2730, + "CCTGAGG": 2731, + "GGAGGCCAA": 2732, + "GTTTACA": 2733, + "TAACAGG": 2734, + "TGTGGTG": 2735, + "GCCTCCCAAA": 2736, + "CCATCCTG": 2737, + "GATTCTT": 2738, + "GAATGGA": 2739, + "GTAGTCA": 2740, + "CTCCTCTG": 2741, + "GAAAGAAAGAAAGAAA": 2742, + "CCCTGTG": 2743, + "CAGTATG": 2744, + "GCGATA": 2745, + "GGACTC": 2746, + "GAAAGA": 2747, + "TGTTGG": 2748, + "GTAGCTT": 2749, + "CATTTTAA": 2750, + "CCCTCTG": 2751, + "GCATTCA": 2752, + "CGATTA": 2753, + "TCACATA": 2754, + "TAATGAAA": 2755, + "GGAATTA": 2756, + "CTGTCAA": 2757, + "TAAATTAAA": 2758, + "CAAGTC": 2759, + "GTATTCA": 2760, + "GGCCATG": 2761, + "CTTTAGA": 2762, + "TGTTTCC": 2763, + "CATGTA": 2764, + "GAATAAAA": 2765, + "CAACTAA": 2766, + "TCATCTA": 2767, + "CACTCTT": 2768, + "CAGTTTG": 2769, + "CATAAAAA": 2770, + "GCATGCA": 2771, + "GATTTA": 2772, + "GAACCAA": 2773, + "TCTGTGA": 2774, + "TCAGCCA": 2775, + "TCTCCACA": 2776, + "TCTCAGCTCA": 2777, + "TATCATG": 2778, + "GCACTTA": 2779, + "CGCCAGG": 2780, + "CGGGG": 2781, + "CATTAAAAA": 2782, + "TTTGTTA": 2783, + "GGATATA": 2784, + "TCGACC": 2785, + "TAATCCA": 2786, + "CCGC": 2787, + "CATTGTT": 2788, + "CCAGTTA": 2789, + "GTAGTTA": 2790, + "CTAGGAA": 2791, + "CCTAATT": 2792, + "TCATGGG": 2793, + "GAACTAA": 2794, + "GCTATTTT": 2795, + "CCGTCA": 2796, + "CAGATTA": 2797, + "CCATATA": 2798, + "CAACTTA": 2799, + "TCAGTTTT": 2800, + "CTACCTT": 2801, + "GCACTC": 2802, + "GTGTGGA": 2803, + "GTGCCAA": 2804, + "GACAATG": 2805, + "GACAATT": 2806, + "GTACCTT": 2807, + "TAAACATT": 2808, + "CAGGAGG": 2809, + "GTGCGA": 2810, + "GAAAATTA": 2811, + "TCTCTTAA": 2812, + "CCGATT": 2813, + "GATGATT": 2814, + "CCATGGG": 2815, + "TCGGTA": 2816, + "CCATATG": 2817, + "CCAGTCC": 2818, + "GCCTTAA": 2819, + "TGATCCA": 2820, + "GTTGCAA": 2821, + "GTAGAGG": 2822, + "CAGATTTT": 2823, + "GTACTTA": 2824, + "TCTTTCTTTCTTTCTT": 2825, + "GCTCTGTG": 2826, + "TCAATAA": 2827, + "GTTTAGA": 2828, + "GTTCGA": 2829, + "CAAGGTT": 2830, + "CTCATTTT": 2831, + "CACAGG": 2832, + "CATGCTG": 2833, + "GAACGG": 2834, + "TATAAAAA": 2835, + "GAAGGCA": 2836, + "GAGCATT": 2837, + "TGTTTGTG": 2838, + "GCTGTTA": 2839, + "GTCACTG": 2840, + "CAAATGAA": 2841, + "GTGACTG": 2842, + "GTTCTTTT": 2843, + "CAGGCTGGAGTGCAGTG": 2844, + "TGATGAAA": 2845, + "TAACGG": 2846, + "CTACTAA": 2847, + "GACATTA": 2848, + "GGACGA": 2849, + "GAGCATG": 2850, + "GCATGGG": 2851, + "CCACTTA": 2852, + "CTATCAA": 2853, + "GCTGTTTT": 2854, + "GTCGTG": 2855, + "CCTGGCC": 2856, + "TCTCTGAA": 2857, + "TGTTGTA": 2858, + "CAGCCAGG": 2859, + "GTTTAGG": 2860, + "CCGCAA": 2861, + "GGAGTAA": 2862, + "CCAATTA": 2863, + "CAGCAAAA": 2864, + "TCATCCA": 2865, + "CACGTA": 2866, + "TCATAGA": 2867, + "TAATTAAAA": 2868, + "CACTTAA": 2869, + "TCTTTATT": 2870, + "GAGATTA": 2871, + "TAAGAGG": 2872, + "CAAATTAA": 2873, + "GACGCA": 2874, + "CACGGA": 2875, + "GTGTGCA": 2876, + "TCT": 2877, + "TATTATTA": 2878, + "GAAATATT": 2879, + "GGAGTTA": 2880, + "TCTTTGA": 2881, + "CTGATTTT": 2882, + "TGTGAATT": 2883, + "TCCCACC": 2884, + "CCCTTTG": 2885, + "CAAGGTG": 2886, + "CAGAGTT": 2887, + "CCCCATG": 2888, + "CTACCAA": 2889, + "CTCCAAAA": 2890, + "CTTCCCC": 2891, + "CTGCTAA": 2892, + "GATTAAAA": 2893, + "GCTTATG": 2894, + "CTACTTA": 2895, + "TAAAAAATT": 2896, + "TCAGTCC": 2897, + "CTATTAAA": 2898, + "GAATGGG": 2899, + "CACAGTA": 2900, + "CAACGG": 2901, + "GGTTATT": 2902, + "TCACCCA": 2903, + "TGATGCA": 2904, + "TAATTTTTT": 2905, + "GTTTGAGA": 2906, + "GTATTAAA": 2907, + "GCCCCCA": 2908, + "TATAGTA": 2909, + "TAGTAAA": 2910, + "TGATACA": 2911, + "GTGGTTTT": 2912, + "CCACTAA": 2913, + "CACAGAGA": 2914, + "CCTCTGCCTCC": 2915, + "CAAAAAAAA": 2916, + "CTCTCTCC": 2917, + "CATAATA": 2918, + "GAAGCCA": 2919, + "GTTCCCA": 2920, + "TGTGTTTG": 2921, + "CAATGGA": 2922, + "TGAAGTA": 2923, + "CTTCATA": 2924, + "CACTGTG": 2925, + "GCTCTTTT": 2926, + "TGACATA": 2927, + "TAAAGAAAA": 2928, + "GAGAAATG": 2929, + "CAGGGAGG": 2930, + "TGTTCAA": 2931, + "GAGCCAA": 2932, + "GACAGAGA": 2933, + "GGCTGAA": 2934, + "CAAATATA": 2935, + "GTGGAAAA": 2936, + "TAAGGTT": 2937, + "GTGATTA": 2938, + "GGATCTG": 2939, + "GATGTTA": 2940, + "GACTACACA": 2941, + "TCCTATA": 2942, + "CTGCCAA": 2943, + "TCCCGA": 2944, + "GTGATTTT": 2945, + "GCGTTTT": 2946, + "CAGAGTA": 2947, + "GAAAGGAA": 2948, + "CACTTTG": 2949, + "CCCCAAAA": 2950, + "GCAACCCA": 2951, + "TGCATTTT": 2952, + "TCTAGAA": 2953, + "TACTTTG": 2954, + "TGAGGCA": 2955, + "CATCTCC": 2956, + "TCGCTA": 2957, + "TGACTTTT": 2958, + "GAGCCTG": 2959, + "CATTTGTT": 2960, + "TCTTTGTT": 2961, + "GCAAAATT": 2962, + "CCTGATT": 2963, + "GATAAAAA": 2964, + "GAGTGTT": 2965, + "TCCTGTA": 2966, + "TACAGAAA": 2967, + "TCCAGGAA": 2968, + "GCCAGTG": 2969, + "TAGATTTT": 2970, + "TAATAGG": 2971, + "CTCCTCA": 2972, + "CATTTTTG": 2973, + "CATTTCAA": 2974, + "GCCATCA": 2975, + "TAAAATATA": 2976, + "GACTGTT": 2977, + "GCATGGA": 2978, + "CAAAGTT": 2979, + "CATGATT": 2980, + "GAGTTTG": 2981, + "CTAGCAA": 2982, + "CTTCCTA": 2983, + "GGGGAGG": 2984, + "CTATATG": 2985, + "TATTTATTTT": 2986, + "CACCATT": 2987, + "CCCTCAA": 2988, + "TTTTTTTTTTTTTT": 2989, + "GATCATT": 2990, + "GTACATA": 2991, + "CTCCATA": 2992, + "CCCCGTCTCTA": 2993, + "GCCTGCC": 2994, + "CTAGCTT": 2995, + "CCCGGA": 2996, + "GATGTTTT": 2997, + "GTATTTTA": 2998, + "TCAGATA": 2999, + "CCTGGAA": 3000, + "TATTCCA": 3001, + "GGACCAA": 3002, + "GCCATTA": 3003, + "CGACTGA": 3004, + "TAAGCTG": 3005, + "TAAACACA": 3006, + "GTTTCTC": 3007, + "CATCTTA": 3008, + "GAAATTTG": 3009, + "TAATGGG": 3010, + "TAAAATTTT": 3011, + "CTGTTCA": 3012, + "CCTGTTA": 3013, + "TACTGAA": 3014, + "TGACCCA": 3015, + "TGATTTTA": 3016, + "CTCCTTA": 3017, + "TATAGAA": 3018, + "CTGCGG": 3019, + "GCGGTA": 3020, + "GTGCTAA": 3021, + "CAGAGGAA": 3022, + "TACATCA": 3023, + "TCAATCAA": 3024, + "CTGCAGCC": 3025, + "TGAATATT": 3026, + "TCTACAA": 3027, + "CCACATA": 3028, + "CCCGTT": 3029, + "TATACACA": 3030, + "TCCTCTC": 3031, + "TCTACTT": 3032, + "CCGGAA": 3033, + "CTTTTTTA": 3034, + "GAAAGAAAA": 3035, + "CTATCTT": 3036, + "GACTTTG": 3037, + "TGAACAA": 3038, + "GCAGTTTT": 3039, + "GCTAAAAA": 3040, + "GAGGCGG": 3041, + "TAATAAAAA": 3042, + "CTGGTCA": 3043, + "CAGACAA": 3044, + "GGATATG": 3045, + "TGAAGG": 3046, + "GCCAGAA": 3047, + "CCAGGCC": 3048, + "CCACCATG": 3049, + "CAAACTT": 3050, + "TCATGTA": 3051, + "GCTGCTT": 3052, + "GTAATA": 3053, + "CCCCCAA": 3054, + "CAGCCTG": 3055, + "TCAACTT": 3056, + "TAAAATTAA": 3057, + "GCTGAAAA": 3058, + "CGACGA": 3059, + "GTGGGCA": 3060, + "TGAGGGA": 3061, + "CGCTCC": 3062, + "TTTTGTTTT": 3063, + "GAGTCAA": 3064, + "TCATGCA": 3065, + "CTGCTTA": 3066, + "TAAGTTTT": 3067, + "GTAGCAA": 3068, + "CCTTGG": 3069, + "TGACAAAA": 3070, + "CTGGTAA": 3071, + "TCTTTATA": 3072, + "TGTGTGTT": 3073, + "CTGGTC": 3074, + "CTGGCAA": 3075, + "CATTTCTG": 3076, + "CTCTACC": 3077, + "CTGAGGA": 3078, + "CTAAAATG": 3079, + "CTAGATT": 3080, + "GTATCAA": 3081, + "CAGTCAA": 3082, + "CTGGGTG": 3083, + "CCTCTTA": 3084, + "TGAGTTTT": 3085, + "TTTTATTTA": 3086, + "CCTTTTTT": 3087, + "TATATACA": 3088, + "TAGCAAA": 3089, + "AAATTA": 3090, + "CTGGATG": 3091, + "GATAATA": 3092, + "GACAAAAA": 3093, + "CCTGGGA": 3094, + "GCTTTCA": 3095, + "GTACAGG": 3096, + "GCTGGAA": 3097, + "CTACTCA": 3098, + "CAATGTA": 3099, + "GCGTGAA": 3100, + "GATCCTT": 3101, + "TATTAATG": 3102, + "GCCCGA": 3103, + "TAAAGTG": 3104, + "GCTTCCA": 3105, + "CATGGAA": 3106, + "TGAAGTT": 3107, + "CTTTCTC": 3108, + "TCTGTGTG": 3109, + "GTATGTA": 3110, + "CAATACA": 3111, + "TCAAGG": 3112, + "CCTCTAA": 3113, + "TGTGGG": 3114, + "GATCTGA": 3115, + "GTACTGA": 3116, + "TTAATTAA": 3117, + "GCAGAAAA": 3118, + "CTACATA": 3119, + "CCGGTG": 3120, + "GGGGAAAA": 3121, + "TACAAAAAA": 3122, + "TTTTGG": 3123, + "GTGAGAA": 3124, + "TCAATAAA": 3125, + "TCAAGTT": 3126, + "CTCAGGA": 3127, + "CTACTC": 3128, + "CAAATCA": 3129, + "GGCAGAA": 3130, + "CCCGAA": 3131, + "TGTTGTG": 3132, + "GAGCAAAA": 3133, + "TATTTGTG": 3134, + "GTAGGTT": 3135, + "CTACCTG": 3136, + "CACAAAAA": 3137, + "CTCAGG": 3138, + "GCTTTA": 3139, + "CAGAGCAA": 3140, + "CTCAGTG": 3141, + "GGAAGAGA": 3142, + "TAACCTG": 3143, + "GAAATATA": 3144, + "CGAGAA": 3145, + "GTGAGG": 3146, + "CATTTATA": 3147, + "GGCAGCA": 3148, + "TCTAAATT": 3149, + "CCCAGTG": 3150, + "GCCTAGG": 3151, + "TGCATTA": 3152, + "CCGTAA": 3153, + "CATTCCA": 3154, + "CTAGTTA": 3155, + "GACTTAA": 3156, + "CTATACA": 3157, + "GACACAA": 3158, + "TCTTCACA": 3159, + "CCGGTT": 3160, + "TAAAGTAA": 3161, + "CTGTGGA": 3162, + "TAAGGTG": 3163, + "TCCAGTA": 3164, + "CAAATTTA": 3165, + "AAATTAAAA": 3166, + "CCATCTA": 3167, + "CTCCCTT": 3168, + "CTCCTTTT": 3169, + "GAGAGAGAGAGA": 3170, + "GGAGATA": 3171, + "CCTATTA": 3172, + "CACCAAAA": 3173, + "CCGTTA": 3174, + "TGTTTATA": 3175, + "CTCAGGAGG": 3176, + "GACGTA": 3177, + "GTCCTTA": 3178, + "GAAAGTT": 3179, + "GCTGGTG": 3180, + "CTCTACA": 3181, + "CAATAGA": 3182, + "TAAAATATT": 3183, + "GTACCTG": 3184, + "GTACTAA": 3185, + "CTTTGAAA": 3186, + "CCTTTCC": 3187, + "TAAAAATTA": 3188, + "CTCGG": 3189, + "CAAGATA": 3190, + "CATTTGA": 3191, + "CACCTCA": 3192, + "GCCAGCC": 3193, + "GTCGG": 3194, + "GCACATA": 3195, + "CACTCAA": 3196, + "CTTTTAAAA": 3197, + "CAGGAATT": 3198, + "GCCTATT": 3199, + "TCTTTCTG": 3200, + "CTGAGGCAGGAGAA": 3201, + "CAGGCAGG": 3202, + "CTAGTAA": 3203, + "TCCATA": 3204, + "GAACTTA": 3205, + "CG": 3206, + "GCTGTGA": 3207, + "GAAAATA": 3208, + "TCTTCATT": 3209, + "GAGGGAGA": 3210, + "CCCATCC": 3211, + "GAGGTGGG": 3212, + "GCCTCTA": 3213, + "GTAGGTG": 3214, + "TAAACCA": 3215, + "GAAGGAAA": 3216, + "TATTGG": 3217, + "ATG": 3218, + "TCCAGTT": 3219, + "CCCACAA": 3220, + "GAAACACA": 3221, + "GTCTCAAAA": 3222, + "CTTTTCTTTT": 3223, + "TGAAGGA": 3224, + "TATTGATT": 3225, + "CTATGTA": 3226, + "AAAAAAAAAAAAAA": 3227, + "TCCTTAAA": 3228, + "GCGCTA": 3229, + "TCCACTT": 3230, + "GACTCAA": 3231, + "TAAATACA": 3232, + "TCATGGA": 3233, + "TCTGGGA": 3234, + "TCCTATG": 3235, + "CTGTGCA": 3236, + "TCAAGTGA": 3237, + "TCATAAAA": 3238, + "CATCCAA": 3239, + "CCTTCCA": 3240, + "CTGTACA": 3241, + "GAAGGTT": 3242, + "CTGTGTA": 3243, + "GTCACTT": 3244, + "TCACAAAA": 3245, + "TCAGGCA": 3246, + "GTGTTAAA": 3247, + "CCCTTAA": 3248, + "CAAAGTG": 3249, + "GAAATGTT": 3250, + "CTGGGGA": 3251, + "GACGCC": 3252, + "TATATGTG": 3253, + "CTAGATG": 3254, + "GAAATTAAA": 3255, + "GAATGCA": 3256, + "GCACTAA": 3257, + "CGGGAGG": 3258, + "GCCACAA": 3259, + "CGCTTA": 3260, + "TCCACAA": 3261, + "CAGATA": 3262, + "TCTGAATT": 3263, + "TATTATTTT": 3264, + "GCGCGG": 3265, + "CTCTGAAA": 3266, + "TCTCTTTG": 3267, + "TATTTCTA": 3268, + "GGGGTGGG": 3269, + "GGATGCA": 3270, + "CCACACC": 3271, + "TAAATGTG": 3272, + "TCTTCCTG": 3273, + "GCAAGG": 3274, + "CTGCTCC": 3275, + "CTGGAGTG": 3276, + "CTGTTAAA": 3277, + "CACACAAA": 3278, + "CTGACTT": 3279, + "GAAAAGAAAA": 3280, + "CCTTCTCC": 3281, + "GAAATAAAA": 3282, + "CCTCAGGTGA": 3283, + "GATAATG": 3284, + "GAATTGCTT": 3285, + "CCAAAATT": 3286, + "CGTGAAA": 3287, + "CACTGAAA": 3288, + "CAGTGAAA": 3289, + "GATCTTA": 3290, + "GAGATGGG": 3291, + "TCTGCCA": 3292, + "TGAGGTA": 3293, + "TATGGAA": 3294, + "TATATTTTA": 3295, + "TGAACTT": 3296, + "GCAGATA": 3297, + "CTTTTCTT": 3298, + "GTAAAATG": 3299, + "TCTCTAA": 3300, + "TCTGCAAA": 3301, + "GAGCCTT": 3302, + "TATCATT": 3303, + "CAATTTTA": 3304, + "CCGCCA": 3305, + "TATTTAAAA": 3306, + "GAGAGATG": 3307, + "GAGATGGA": 3308, + "GCCAGGATG": 3309, + "CGAGTAGCTG": 3310, + "TTCATTTT": 3311, + "TATACTT": 3312, + "GTCTACA": 3313, + "GTGAGTGA": 3314, + "GCTACACA": 3315, + "GGGAGGA": 3316, + "CAAGGCA": 3317, + "GCTTTTAA": 3318, + "CACTATT": 3319, + "GTTCATA": 3320, + "TCCTC": 3321, + "GTGGACA": 3322, + "TATTTGGA": 3323, + "CTCCAGTA": 3324, + "GTTCAGTT": 3325, + "CCAAGG": 3326, + "CAGAGCC": 3327, + "CTCGCC": 3328, + "CCGATG": 3329, + "GGAATTTT": 3330, + "TCCAGCC": 3331, + "CCTCTTTT": 3332, + "GAACCTT": 3333, + "CATGCACA": 3334, + "GTTTC": 3335, + "GAAGATA": 3336, + "TACCCC": 3337, + "GCTGCCA": 3338, + "GGGGGAGG": 3339, + "GCAGTGAGCTGA": 3340, + "CTGTCTA": 3341, + "CGAGGA": 3342, + "CAATGGG": 3343, + "GCTGTGAA": 3344, + "GAAAGTG": 3345, + "TACCAAAA": 3346, + "GTCAGG": 3347, + "CAGCTCC": 3348, + "TGTGCTT": 3349, + "GTCTAGG": 3350, + "TTTTTGTA": 3351, + "TTATATG": 3352, + "TCAGGGG": 3353, + "TATTGTTA": 3354, + "CCTGAGA": 3355, + "TATCTCA": 3356, + "CAATCTG": 3357, + "CACTCTG": 3358, + "GATTTAA": 3359, + "TGAATAA": 3360, + "TCTTGTA": 3361, + "TCAACTG": 3362, + "TCTCCAGG": 3363, + "CTAGAGG": 3364, + "CTGAGAAA": 3365, + "CTAGCTG": 3366, + "TCCACCA": 3367, + "CGATTTT": 3368, + "CCGGCC": 3369, + "GTTGACA": 3370, + "CTTAGAA": 3371, + "CATAATG": 3372, + "GAGTATT": 3373, + "CACAGAAA": 3374, + "GACTGTG": 3375, + "CTATTTTA": 3376, + "TGAGGAAA": 3377, + "TTATTAAAA": 3378, + "CTTATTTA": 3379, + "CAGACTT": 3380, + "CACGCC": 3381, + "GCTTGG": 3382, + "CCTGCTT": 3383, + "TAAAGCAA": 3384, + "CCTCGTGA": 3385, + "TAGAATT": 3386, + "CTTACAA": 3387, + "TAAAGGAA": 3388, + "GTCTAGA": 3389, + "GTGACTT": 3390, + "TACATATG": 3391, + "GTCAGGA": 3392, + "GCTCCAGG": 3393, + "GAAGGGA": 3394, + "CATGATG": 3395, + "TCATCAAA": 3396, + "CGTTAAA": 3397, + "GTACTCA": 3398, + "CTCCCAA": 3399, + "TATATGTA": 3400, + "GGTATTTT": 3401, + "TAAGCCA": 3402, + "CGAAATT": 3403, + "GTTTGTTTT": 3404, + "TCTGTCTT": 3405, + "TATATCA": 3406, + "TGTTCATT": 3407, + "CAAACCA": 3408, + "TTCATTA": 3409, + "TATTTGTA": 3410, + "GATTGAA": 3411, + "CTATAAAA": 3412, + "GATTAATT": 3413, + "CCCACCA": 3414, + "TCCTAGG": 3415, + "TAAATGTA": 3416, + "CTCTTAAA": 3417, + "GCAGTCC": 3418, + "GCGGCTG": 3419, + "GTCTCGAA": 3420, + "TGAATGA": 3421, + "CTGGGGG": 3422, + "GTCTCGA": 3423, + "GAACAAAA": 3424, + "TGAATCA": 3425, + "TGTATTTTTAGTAGAGA": 3426, + "GTTATTAA": 3427, + "TTTTTTAAAA": 3428, + "GTCAGTG": 3429, + "CCCATTA": 3430, + "CACAGGA": 3431, + "TATTCCTT": 3432, + "TCTGCCTT": 3433, + "CCTGGTG": 3434, + "GCGAGC": 3435, + "TACTAAA": 3436, + "TACACAAA": 3437, + "CCGTCC": 3438, + "GCTTTGTT": 3439, + "GCATCCA": 3440, + "CATCTAA": 3441, + "GCTGTGTT": 3442, + "GTAGACA": 3443, + "GCCTATG": 3444, + "TCTTTGTG": 3445, + "GATTCTG": 3446, + "CGCCCGG": 3447, + "GATGAGA": 3448, + "TATCTGA": 3449, + "TGAATTTG": 3450, + "CCTGATG": 3451, + "TAAAACAA": 3452, + "CTTTAGG": 3453, + "TTTTCCTT": 3454, + "TGAATAAA": 3455, + "CGGGGA": 3456, + "CAAACATT": 3457, + "GTATGGA": 3458, + "GCTTAAAA": 3459, + "TACCAAA": 3460, + "CAAAGAGA": 3461, + "CTCCTGCC": 3462, + "GTAAAAAAA": 3463, + "CACAGCC": 3464, + "CCATGCA": 3465, + "TACAATT": 3466, + "CTAGTGA": 3467, + "CTGAGTT": 3468, + "GAGTGAAA": 3469, + "TCTGTTTG": 3470, + "CTGTAGG": 3471, + "TATAAAAAA": 3472, + "GCATTAAA": 3473, + "GTCCATA": 3474, + "TGTTAAAAA": 3475, + "TGTTTGA": 3476, + "GAATAGA": 3477, + "CTTCAAAA": 3478, + "CTGGACA": 3479, + "CTGTAGA": 3480, + "CCATTAAA": 3481, + "CTATCTG": 3482, + "CACTATG": 3483, + "TTATCAA": 3484, + "TAAGTAAA": 3485, + "TAATCCCAGCACTTTGGGAGGCC": 3486, + "CCAGAAAA": 3487, + "TGAAGCA": 3488, + "TCCCTTTT": 3489, + "TCATACA": 3490, + "TACGTT": 3491, + "GCCGTG": 3492, + "GGAAGTG": 3493, + "GGCCAAA": 3494, + "GTACCAA": 3495, + "TCTCTACTAAAAATA": 3496, + "CATTGTG": 3497, + "TGTGTGA": 3498, + "GAAACAGA": 3499, + "CTTGACA": 3500, + "GATGAGG": 3501, + "GAGATTTT": 3502, + "CCTTCAA": 3503, + "GAATCTA": 3504, + "CTCTCCTT": 3505, + "GGCGGA": 3506, + "TCTATCTATCTATCTA": 3507, + "CACACAGA": 3508, + "TGTGTGTA": 3509, + "CAAAGCC": 3510, + "TGTGCCA": 3511, + "GTTGAAAA": 3512, + "CTCCAGCA": 3513, + "TCAAGGA": 3514, + "TAGCTCA": 3515, + "CGCTGA": 3516, + "CCTGAAAA": 3517, + "GACTATT": 3518, + "GATTCCA": 3519, + "GCTTCTA": 3520, + "GTCTGCC": 3521, + "CTTGGCA": 3522, + "TGTGGTA": 3523, + "GCTTTGA": 3524, + "GCTCTCTG": 3525, + "CTCACAGA": 3526, + "TCTTTAAA": 3527, + "CAAAGCAA": 3528, + "TACTTAA": 3529, + "GCTTCAA": 3530, + "CATTGAA": 3531, + "GGAGGAAA": 3532, + "CTATAGA": 3533, + "CTGAGGAA": 3534, + "CCTGGCA": 3535, + "CCCTATT": 3536, + "CTCGTG": 3537, + "TTACACA": 3538, + "TTAGGAA": 3539, + "CTGGTTA": 3540, + "GTTGTCC": 3541, + "TAATGAAAA": 3542, + "TATTTACA": 3543, + "GGGAATT": 3544, + "GTAGTTTT": 3545, + "GCTGCAA": 3546, + "CTACGG": 3547, + "GCCGGA": 3548, + "CTGGGCA": 3549, + "CCTTAAAA": 3550, + "GATGGAA": 3551, + "TAGATAGATAGATAGA": 3552, + "TATGTAA": 3553, + "GTACGG": 3554, + "TATTCAAA": 3555, + "GATCTCC": 3556, + "CCTGTTTT": 3557, + "TATTGCA": 3558, + "GGAAGGAAGGAAGGAA": 3559, + "GGTAATT": 3560, + "TTACAGA": 3561, + "TCAGC": 3562, + "GCAAAATG": 3563, + "GAGAGCA": 3564, + "GTAGAAAA": 3565, + "CATTTGAA": 3566, + "TCTTCTTTT": 3567, + "TCCCATA": 3568, + "GTTATTTA": 3569, + "CTATCTA": 3570, + "CATCCTG": 3571, + "TCTTGTG": 3572, + "TTATTATT": 3573, + "CCCGTC": 3574, + "TACTATG": 3575, + "TAAACATA": 3576, + "TAAGGAAA": 3577, + "GCTTGTG": 3578, + "CTCTAAAA": 3579, + "GTTTTAAAA": 3580, + "GACAGGA": 3581, + "TCCTAGA": 3582, + "TCCACCCA": 3583, + "GTTTGAAA": 3584, + "CCATCTCA": 3585, + "CTAAGAA": 3586, + "GTATCTA": 3587, + "GTGAGGA": 3588, + "GCTGGAGG": 3589, + "CCTGTAATCCCAGCTA": 3590, + "GCAACAA": 3591, + "CTTTCAAA": 3592, + "CAAATGTT": 3593, + "CTTGTCC": 3594, + "TCTCAAAAA": 3595, + "TATTTATTA": 3596, + "TAAGGCA": 3597, + "GAGAGGAA": 3598, + "TATGATT": 3599, + "GCATCTA": 3600, + "CGTTATT": 3601, + "GCCTGTA": 3602, + "GTTTCAAA": 3603, + "CCTTCCTTCCTTCCTT": 3604, + "GGCTTTG": 3605, + "GTCAGAA": 3606, + "CATGCATG": 3607, + "GTCATTTA": 3608, + "CTGGAAAA": 3609, + "CTTCGA": 3610, + "CCTATTTT": 3611, + "CCAACAA": 3612, + "TCCATCC": 3613, + "TAAAGTTA": 3614, + "GTCTCTC": 3615, + "TAATCAAA": 3616, + "GATTTTTG": 3617, + "GATTTCTT": 3618, + "GGGCTGA": 3619, + "GCATGTA": 3620, + "CCTGGGTT": 3621, + "GAGACAA": 3622, + "GCTGTCA": 3623, + "TGATAGG": 3624, + "GGAGACC": 3625, + "CCGGCA": 3626, + "TAATCTCA": 3627, + "TGAATTAA": 3628, + "TCTGGTG": 3629, + "GCCTC": 3630, + "GGCGCA": 3631, + "CCAGCTA": 3632, + "CAGTCTG": 3633, + "TGAACTA": 3634, + "GTAAGAA": 3635, + "CCTTTCA": 3636, + "TCCATGA": 3637, + "CAAAGGAA": 3638, + "CTCTC": 3639, + "CTCTCTCA": 3640, + "CTCCAGC": 3641, + "GTAGATA": 3642, + "CCCCCTCC": 3643, + "GGCGCC": 3644, + "TCTGTCC": 3645, + "GACCATT": 3646, + "CTTGAAAA": 3647, + "TTATCC": 3648, + "TACATGTG": 3649, + "CAAATTTG": 3650, + "TTTTGTG": 3651, + "CAGAGTG": 3652, + "GTAATAA": 3653, + "GTGAGTG": 3654, + "TTTTTCC": 3655, + "GGCTCTG": 3656, + "GCCCTAA": 3657, + "GGCTGTT": 3658, + "CCCAATT": 3659, + "CAGAGCTT": 3660, + "TATAAATG": 3661, + "GAGTCTG": 3662, + "TCTTAAAAA": 3663, + "GTTTTATG": 3664, + "GATCCAA": 3665, + "GGCCCTG": 3666, + "GATCCTG": 3667, + "TCAAGTG": 3668, + "GATTCAA": 3669, + "CCTCTCTT": 3670, + "GAGACGG": 3671, + "CAGATCA": 3672, + "TAAAAGAA": 3673, + "CTGAGCAA": 3674, + "CCTGCCA": 3675, + "CCTTCTA": 3676, + "CGCTCA": 3677, + "GGCTGTG": 3678, + "TGGGAAAA": 3679, + "GGAGCCTG": 3680, + "CTGAGTG": 3681, + "CGTCAAA": 3682, + "TCAAGTA": 3683, + "CGTAATT": 3684, + "TTACTTA": 3685, + "TATACTA": 3686, + "GGGCAAA": 3687, + "CAACTTTT": 3688, + "CTTTGCC": 3689, + "GCCAGGAA": 3690, + "CACACTA": 3691, + "GCCCAGC": 3692, + "TAAATAAATAAATAAA": 3693, + "CTTTCCTT": 3694, + "GGGAGAA": 3695, + "TATGGTA": 3696, + "CGGCCA": 3697, + "CCTCTCTG": 3698, + "GAAAGCAA": 3699, + "CAAGCCA": 3700, + "GGCGTT": 3701, + "CTCTTTTA": 3702, + "TCGGCCTCCCAAA": 3703, + "GATTTATT": 3704, + "CAAGTCC": 3705, + "TATCTTA": 3706, + "GTTCAAGACCA": 3707, + "CTCACACA": 3708, + "GAAATCAA": 3709, + "TGAGACC": 3710, + "GGGTAAA": 3711, + "GCTTGTT": 3712, + "GATTTTAA": 3713, + "TTTTTATA": 3714, + "CAGAGCTG": 3715, + "TCTGTTAA": 3716, + "GTAATTAA": 3717, + "TCTTTGAA": 3718, + "CTTGCCA": 3719, + "TTTTCATT": 3720, + "CCATGTA": 3721, + "TCTCGGCTCACTGCAA": 3722, + "GGATTCA": 3723, + "TCTATTAA": 3724, + "TACATAAA": 3725, + "GATTGATT": 3726, + "GGAGAGGA": 3727, + "CGCAAAA": 3728, + "GGACTAA": 3729, + "TTATGTG": 3730, + "GTCACTCA": 3731, + "GACAGCA": 3732, + "CGAGTT": 3733, + "GATGGTT": 3734, + "GGAAGAGG": 3735, + "GCCAACATGGTGAAA": 3736, + "GGAGCCA": 3737, + "TGAACTG": 3738, + "CCTCTGTG": 3739, + "GTATAAAA": 3740, + "TCCCAGAA": 3741, + "CATTTATG": 3742, + "GATTATG": 3743, + "TGTTTCTG": 3744, + "GAGTGGGTT": 3745, + "TACATATT": 3746, + "CTCCAGGA": 3747, + "GACACTG": 3748, + "GGTCTCA": 3749, + "CCGGGA": 3750, + "TGTTTAAA": 3751, + "CTCACCA": 3752, + "GGACTTA": 3753, + "GCCCACC": 3754, + "CAAATCAA": 3755, + "GAAATGTG": 3756, + "TAGTTAA": 3757, + "TCTATAA": 3758, + "TTAGATT": 3759, + "GTGTAGG": 3760, + "TACTGAAA": 3761, + "GCACCCA": 3762, + "GTGGGCTG": 3763, + "GAATGAAA": 3764, + "TCTAGTT": 3765, + "TCAGGAGA": 3766, + "TCCACTA": 3767, + "CTCAGTT": 3768, + "TACTTAAA": 3769, + "GACTCCA": 3770, + "TCCATTTG": 3771, + "CACAGCAA": 3772, + "GCTCATGCCTG": 3773, + "GGTGCTG": 3774, + "GCTTTCTT": 3775, + "GTGGCCA": 3776, + "TACGTG": 3777, + "GTGCAGTG": 3778, + "TGAAGTCA": 3779, + "CCTTTAA": 3780, + "TCTCAGCTCACTGCAA": 3781, + "GAAATATG": 3782, + "CCTCAAAA": 3783, + "GGGGCGG": 3784, + "CGACAA": 3785, + "GGTGATG": 3786, + "GTCTTAAA": 3787, + "CAGAAATG": 3788, + "CGTCATT": 3789, + "CCAAGCA": 3790, + "GGATCAA": 3791, + "GTGCTGGGATTA": 3792, + "GCTGGCC": 3793, + "CGGAGCTT": 3794, + "TACATGA": 3795, + "TGTTTGAA": 3796, + "TCTCCATT": 3797, + "TAAGCAAA": 3798, + "CCTTTCTT": 3799, + "TACTGTT": 3800, + "TCCATCTT": 3801, + "CTTACTT": 3802, + "CGGAGGTT": 3803, + "CAAAACAA": 3804, + "TCATAGG": 3805, + "TTACTAA": 3806, + "CTTATTTG": 3807, + "GAATGTA": 3808, + "CCCCATGGA": 3809, + "TTACTGA": 3810, + "CGGAAAA": 3811, + "CTCCAGTG": 3812, + "TGTTCCA": 3813, + "CAGATGAA": 3814, + "GTTGATA": 3815, + "TCCCCCC": 3816, + "CATTGCA": 3817, + "CTCAGCC": 3818, + "CTTACTG": 3819, + "TATCCTT": 3820, + "CTTTTATG": 3821, + "TGAGTAGCTG": 3822, + "GACTGAAA": 3823, + "CAATGAAA": 3824, + "CGACTG": 3825, + "CTTGGGA": 3826, + "GCAAGCA": 3827, + "TCACTCC": 3828, + "GATTTGA": 3829, + "CATTTTAAA": 3830, + "TCAACTA": 3831, + "GTCCAAAA": 3832, + "CACCCTG": 3833, + "TTACCTT": 3834, + "CAAGGGG": 3835, + "TTTTGGA": 3836, + "GTTATTTG": 3837, + "GCTACTG": 3838, + "CTGAGGCAGGAGAATG": 3839, + "GTGATGA": 3840, + "GTAGTC": 3841, + "TAGTATG": 3842, + "GTATAGA": 3843, + "GTGTCTA": 3844, + "GCTGCTA": 3845, + "TTAGTAA": 3846, + "TAAACATG": 3847, + "GTCACCA": 3848, + "CATCTTTT": 3849, + "CATATAA": 3850, + "TCTCTCTA": 3851, + "TTTTATTAA": 3852, + "TATTCTAA": 3853, + "GAAATTTA": 3854, + "CTTCCCTG": 3855, + "TAAAGATG": 3856, + "TACGTA": 3857, + "GTTTATTA": 3858, + "GAAAAGAA": 3859, + "CCCACCCA": 3860, + "CAATTAAAA": 3861, + "CCGACA": 3862, + "CAAAGTGA": 3863, + "CAAACAAAA": 3864, + "GCAATTTT": 3865, + "CGATTAA": 3866, + "TTAGAGA": 3867, + "CTGATGA": 3868, + "GGAGGAGG": 3869, + "GTCCTGGG": 3870, + "TCATGAAA": 3871, + "GCAACCA": 3872, + "GTTGGCA": 3873, + "GCGGCGG": 3874, + "GTCCCCA": 3875, + "GTAGGGG": 3876, + "GCCATGTT": 3877, + "GTTCGAGA": 3878, + "GCCTATA": 3879, + "TAAATTCA": 3880, + "GGCCATT": 3881, + "GAAAACAA": 3882, + "TGTGTATG": 3883, + "GTACTC": 3884, + "TAGGGAA": 3885, + "CCTTGAA": 3886, + "TCTATTTG": 3887, + "GAGGGCA": 3888, + "GAAACTGA": 3889, + "TACGC": 3890, + "TACAAAAA": 3891, + "TCATTATT": 3892, + "GGAAAATT": 3893, + "TCAATATT": 3894, + "CCCGTA": 3895, + "GGAGAGAA": 3896, + "TTAGTTA": 3897, + "CTCAGAGA": 3898, + "TCGAGC": 3899, + "CTAGTCA": 3900, + "GATGGCA": 3901, + "TGAACATT": 3902, + "CTATGGG": 3903, + "CACACCA": 3904, + "TCAATTAA": 3905, + "GGAACTG": 3906, + "TTACATG": 3907, + "CTTTCATT": 3908, + "CAGCTCTG": 3909, + "TCTTTTTTTT": 3910, + "TAAATCTT": 3911, + "TGATCTA": 3912, + "CATACAA": 3913, + "GCTCAAAA": 3914, + "GCTGTGTG": 3915, + "TCAATCA": 3916, + "GATTTGAA": 3917, + "CCAAGGA": 3918, + "GTCCTCA": 3919, + "GTGCTCC": 3920, + "AAAATAA": 3921, + "GTGACAA": 3922, + "GCTCACGCCTG": 3923, + "CGACGG": 3924, + "TATCCAA": 3925, + "CACACATG": 3926, + "TCTCTCTCC": 3927, + "TGTGGTT": 3928, + "CTTGGTA": 3929, + "TCTGGTT": 3930, + "TTTATAA": 3931, + "CTGCTTTT": 3932, + "TGTGTCA": 3933, + "CACATCA": 3934, + "CCTAATG": 3935, + "CGTTTTTT": 3936, + "GCTGGCA": 3937, + "GACGTC": 3938, + "TATAATTA": 3939, + "TACAGTAA": 3940, + "GAAAGTAA": 3941, + "GTCTGAAA": 3942, + "CCCATTTT": 3943, + "TATATGA": 3944, + "CTTGATA": 3945, + "CTTTATTTT": 3946, + "CTTTATTA": 3947, + "GGCGAA": 3948, + "CCATGCC": 3949, + "CCTGCCTT": 3950, + "GAAGAAGAAGAA": 3951, + "CTGACTGA": 3952, + "GCCCTTA": 3953, + "TATCTAA": 3954, + "GTGTTTTA": 3955, + "TGTGGCA": 3956, + "TATTGTAA": 3957, + "GCCAGAAA": 3958, + "CCCTGTCTC": 3959, + "CACAGGAA": 3960, + "AAAACAA": 3961, + "AAAAAAAAAAAAAAA": 3962, + "TAACTCC": 3963, + "GCCTAAA": 3964, + "CGAGTA": 3965, + "TAGTATT": 3966, + "GTATTTTTAGTAGAGA": 3967, + "GCTGCAGG": 3968, + "TATTGAAA": 3969, + "CCAGCCTGGG": 3970, + "GCTCCAAA": 3971, + "TACGAA": 3972, + "GGCCTCC": 3973, + "TATACAAA": 3974, + "CATGGCA": 3975, + "CATGCAA": 3976, + "TACACCA": 3977, + "CTTTACCA": 3978, + "TACAGAGA": 3979, + "TATTCTTA": 3980, + "TATGTCA": 3981, + "TCAAGCA": 3982, + "TCAATGA": 3983, + "GGCTCTT": 3984, + "GGAAGTT": 3985, + "TCCATGTT": 3986, + "GCTTTCC": 3987, + "TATGTGA": 3988, + "GTGTAGA": 3989, + "TTTTTAAAA": 3990, + "GCTGGAGA": 3991, + "GTGAGAGA": 3992, + "CCTAGAA": 3993, + "CCTCCAAA": 3994, + "CCAATGA": 3995, + "CAGGGCA": 3996, + "CTATGCA": 3997, + "CTTCACC": 3998, + "CTACAAAA": 3999, + "CTCACC": 4000, + "GAGTATG": 4001, + "TAGAAAAA": 4002, + "CTTTTGAA": 4003, + "TAAAGAGA": 4004, + "CATGTCA": 4005, + "TCTTTTAAA": 4006, + "CACAGTGA": 4007, + "GATCTAA": 4008, + "TAAGGTA": 4009, + "CATAGAA": 4010, + "CGCGCC": 4011, + "CAGCTTA": 4012, + "TATAGTT": 4013, + "CGGGCC": 4014, + "TATCCATT": 4015, + "TGTTTGTTTT": 4016, + "GCTGGCTG": 4017, + "TACAGGA": 4018, + "CTCCTTTG": 4019, + "CAATCTA": 4020, + "CCCCCTG": 4021, + "TATACTG": 4022, + "CTGAGCC": 4023, + "CGGTTA": 4024, + "TGAAGTG": 4025, + "GCTTCCTT": 4026, + "TTTTATTTG": 4027, + "TAGTGAA": 4028, + "CTGAGGTG": 4029, + "TCTTCTC": 4030, + "GACAGAAA": 4031, + "CTGAACTGAA": 4032, + "CCTGGGAA": 4033, + "TCCCCAAA": 4034, + "TATGTATT": 4035, + "GATTTCTG": 4036, + "CATTCAAA": 4037, + "CACAGTT": 4038, + "GCTTGAA": 4039, + "GTGGATCA": 4040, + "CTGAGTGA": 4041, + "TGAATTTA": 4042, + "TCAACAAA": 4043, + "GGTCATT": 4044, + "GTAATTTA": 4045, + "GCGACTT": 4046, + "CTGAGAGA": 4047, + "GTGCCCA": 4048, + "CTAGGTT": 4049, + "TCCTGAAA": 4050, + "GTCCACC": 4051, + "TCACAGAA": 4052, + "GCGAAAA": 4053, + "GTATGGG": 4054, + "TGAACAAA": 4055, + "TAAACAAAA": 4056, + "CCGTTTT": 4057, + "TCTCAATT": 4058, + "TCCAGAAA": 4059, + "GTAACAA": 4060, + "GCATTTTA": 4061, + "TCTCCATG": 4062, + "TTATAAAA": 4063, + "CAGGCAA": 4064, + "CTAAAAAAA": 4065, + "GTTGGGA": 4066, + "TAAAGATT": 4067, + "TGAAGAGA": 4068, + "CCCCTCA": 4069, + "TGTTTATG": 4070, + "TCTACTG": 4071, + "CCAATTTT": 4072, + "GGTGGTG": 4073, + "GGAACAA": 4074, + "TGTGGGA": 4075, + "TCTGCTA": 4076, + "GAACGA": 4077, + "GTAAGTA": 4078, + "GTTGCCA": 4079, + "AAAATTTT": 4080, + "GCGCGA": 4081, + "GAAAGATG": 4082, + "GTCTCTCA": 4083, + "TCCATCAA": 4084, + "GCAGCTA": 4085, + "CACATTTG": 4086, + "CTGACAA": 4087, + "TCCACC": 4088, + "GCT": 4089, + "CCCACTT": 4090, + "GCAGGTA": 4091, + "GAGGCCA": 4092, + "TAAAGTCA": 4093, + "CTGGATA": 4094, + "CGGCAA": 4095 + }, + "merges": [ + "A A", + "T T", + "T G", + "C A", + "C C", + "T A", + "G G", + "T C", + "G A", + "AA A", + "G C", + "T AA", + "TT TT", + "T CA", + "TG A", + "TT A", + "G AA", + "T CC", + "C AA", + "C TG", + "C TT", + "G TG", + "G TT", + "G CA", + "GG A", + "C CA", + "G TA", + "G CC", + "C TA", + "T AAA", + "AA AA", + "C TC", + "G TC", + "TG TG", + "TA TT", + "CA CA", + "G AAA", + "TA TA", + "TC TT", + "TG TT", + "C AAA", + "GA GA", + "CA TT", + "TG AA", + "CA GG", + "TC TG", + "CA GA", + "TC AA", + "GG AA", + "TAA AA", + "C TGA", + "GC TT", + "G TGA", + "GC TG", + "C TCA", + "CC TT", + "CA TG", + "GC AA", + "G TCA", + "G TAA", + "TTTT A", + "TA TG", + "GA GG", + "C GG", + "GA TT", + "CC TG", + "TC TC", + "CC AA", + "G TTA", + "C TCC", + "C TAA", + "TA CA", + "C TTA", + "TC CA", + "GA TG", + "TT AA", + "GAA AA", + "TT TG", + "G TTTT", + "TC TA", + "GC CA", + "G TCC", + "C TTTT", + "GG GG", + "C GA", + "TT TA", + "CC CA", + "CAA AA", + "TG GG", + "TA GA", + "TA GG", + "GA CA", + "GG TT", + "CC CC", + "GG TG", + "CA TA", + "GC TA", + "TG TA", + "TC AAA", + "TG GA", + "TAA TT", + "TTA TT", + "TG CA", + "GG CA", + "GA TA", + "CC TA", + "TT CA", + "TC TCA", + "GG GA", + "C GC", + "CTG AA", + "G TAAA", + "TC TCC", + "TTTT TT", + "C GTG", + "GC AAA", + "TAA AAA", + "TC TGA", + "TCA TT", + "GG AAA", + "TG AAA", + "TCC TT", + "CC AAA", + "GAA TT", + "C TAAA", + "C GTT", + "GTG AA", + "GG CC", + "TAA TA", + "GG TA", + "TG CC", + "CA CC", + "TGA TT", + "AAAA AA", + "GC TCA", + "TCC AA", + "GA GAA", + "CTG TT", + "TA TTA", + "CA GCA", + "CTC TT", + "CTT AA", + "CA GAA", + "GC TGA", + "GTT AA", + "TC TTA", + "TA TTTT", + "GCC AA", + "CTT TG", + "GA CC", + "C GCA", + "GTA TT", + "GTC TT", + "CAA TT", + "GTG TT", + "CTC AA", + "GGA GG", + "C GAA", + "TC TTTT", + "GTC AA", + "C GCC", + "TA TAA", + "TA CC", + "TC TAA", + "CCA TT", + "C GGA", + "CAA AAA", + "CA GTG", + "TCC TG", + "CTC TG", + "GAA AAA", + "CTG TG", + "CA GC", + "TTTT AA", + "GCA TT", + "GCC TT", + "TAA TG", + "CTA TT", + "GTT TG", + "TGA TG", + "GG CTG", + "CC TCA", + "GA GGA", + "GCC TG", + "AAA TT", + "C GTA", + "TC AAAA", + "TA CAA", + "CA TCA", + "CA GTT", + "TGA GA", + "GG GAA", + "CA CTG", + "CA CAA", + "CA GGA", + "CC CCA", + "CC CTG", + "TTTT TTTT", + "TA GAA", + "GA GCA", + "CC TCC", + "CA CCA", + "TA TCA", + "GA GC", + "CA TTA", + "CACA CACA", + "GA GTG", + "GGA TT", + "TGTG TGTG", + "TA CTT", + "CA CTT", + "GTC TG", + "TGA GG", + "GA GTT", + "GAA TG", + "TCA TG", + "GA CAA", + "GA CTT", + "TATT AA", + "TAA TAA", + "GG CCA", + "CA TTTT", + "CA GCC", + "CC CTT", + "GC TAA", + "TATA TATA", + "GTG TG", + "TA CTG", + "TA GTT", + "CAA TG", + "GC TC", + "CA GTA", + "GC TCC", + "CA TAA", + "TTA TG", + "TAAA TT", + "GA TGA", + "CA TGA", + "GC GG", + "AAAA AAAA", + "CCA TG", + "GA TAA", + "GA CTG", + "TA TGA", + "GCA GG", + "GA TCA", + "G TTTTA", + "GGA TG", + "CC TGA", + "G TAAAA", + "GAA GG", + "GA TTA", + "CC TC", + "GA CCA", + "GC TTA", + "CC CAA", + "AAA TG", + "GCA TG", + "TA GTA", + "TA CCA", + "GG CTT", + "C GTC", + "TC TCTT", + "GG TCA", + "TTA TTA", + "TA CTA", + "TA GCA", + "TA TC", + "CTG GG", + "CA TC", + "C TTTTA", + "C TAAAA", + "GTG GG", + "GA GTA", + "CCA GG", + "GA TTTT", + "TA GTG", + "GAAA TT", + "CA CTA", + "TC GG", + "TCA GG", + "CAGG AA", + "GC AAAA", + "CC TTA", + "CA TCC", + "CTT GG", + "TGTG AA", + "TATT TG", + "CC TAA", + "CTA TG", + "GA GAAA", + "GAGA GAGA", + "GC TTTT", + "TA TAAA", + "CAA GG", + "TC TCTG", + "TGTT AA", + "TGTG TT", + "GA GCC", + "GA CTA", + "TA TATT", + "TAA AAAA", + "TTTT TG", + "GTA TG", + "CATT AA", + "TA GGA", + "TA GC", + "GTT GG", + "GAA GAA", + "TAAA TG", + "TC TGTT", + "CA GAAA", + "CAAA TT", + "TAA TTA", + "TC TGTG", + "TA TCC", + "TGAA TT", + "CTC CA", + "GTG AAA", + "GG CAA", + "GGA GA", + "GAA GA", + "GG TGA", + "GG GCA", + "CC AAAA", + "TCTC TCTC", + "CTG CA", + "CTT CTT", + "TCTT AA", + "CC CTA", + "TGTG TG", + "AAA TA", + "TGTT TG", + "GG GTT", + "GTG CTG", + "GG AAAA", + "GG GGA", + "TCA GA", + "CC TTTT", + "GAAA TG", + "GCA GCA", + "TC TGAA", + "GG GTG", + "CACA TT", + "TCTT TG", + "GG GC", + "TCC CA", + "TC CATT", + "CTG AAA", + "CTT TA", + "TC GA", + "GTT TA", + "CAA CAA", + "CTT CC", + "GCC TCC", + "TT AAA", + "GC TCTG", + "GTT TCA", + "GGA GGA", + "C GTGA", + "CA GTC", + "GAA TA", + "CA GAGA", + "CC CTC", + "CAAA TG", + "CTG CTG", + "GA TCC", + "TTTTA TT", + "AAAA TT", + "TTA TA", + "TCAA TT", + "GG TAA", + "GTTA TT", + "GC CAGG", + "GGA GAA", + "CATT TG", + "TCA CC", + "CTC AAA", + "GG TTA", + "TCC AAA", + "TC TATT", + "GCA GA", + "CTT CA", + "TCA TCA", + "C GAGG", + "TAA CA", + "GTT GTT", + "CTTA TT", + "C GTCA", + "TAA GA", + "TAA TTTT", + "CTG TA", + "TC CACA", + "GC TGTG", + "C GCTG", + "TC TAAA", + "GC GA", + "CAA TA", + "CCA CCA", + "GAA CA", + "C GAAA", + "CAGA TT", + "TCA CA", + "TTA TTTT", + "TC TCAA", + "TGA CA", + "CTCC AA", + "AAAA AAA", + "TATA TG", + "TCC TCC", + "TCA CTT", + "TC CAGG", + "CAA GA", + "GG CTA", + "GTG GTG", + "C GTAA", + "C GAGA", + "TGA TA", + "GGA TTA", + "CAA CA", + "C GATT", + "TGA GAA", + "CTCC TT", + "CTCA TT", + "GTT AAA", + "TCA TA", + "CC TCTG", + "CTC TA", + "GC TGAA", + "CTG GA", + "TAA GG", + "CTT AAA", + "TATT TA", + "CCA CA", + "CC GG", + "GTC AAA", + "TG GAA", + "C GGAA", + "TGA TGA", + "GTT CA", + "TAA CAA", + "GC TGTT", + "TAA GAA", + "CTG CC", + "TTAA TT", + "CCA GA", + "TCA GAA", + "GTCA TT", + "C GCTT", + "GATT AA", + "CTGA TT", + "GC CACA", + "GTAA TT", + "TC CAGA", + "GCC AAA", + "GTGA TT", + "TAAAA TT", + "CAA GAA", + "CCA CC", + "TAA TCC", + "GTT CTT", + "TC CATG", + "GC TCTT", + "TG CTG", + "GG GTA", + "TTA CA", + "GC CATT", + "GCA CA", + "GCAA TT", + "TCC CTG", + "TG TGA", + "TC GAA", + "GGA CA", + "GGAA TT", + "GTG GA", + "CTT CTG", + "TCC CC", + "GCC CC", + "CTT GA", + "TAA TGA", + "TAAA TA", + "TATA TA", + "CTG CAA", + "TCA TTA", + "GTA TA", + "TCC CCA", + "C GTTA", + "GCA GAA", + "TGA GTT", + "CTTTT TT", + "C GATG", + "CTT TCA", + "AAAA TG", + "CAGG TT", + "CTAA TT", + "C GCCA", + "TGAA AAA", + "GTT CC", + "GTCC TT", + "GTCC AA", + "GTTTT TT", + "CTC TGA", + "GC GC", + "GTT GA", + "TGAA TG", + "CTA TA", + "GCA GTG", + "CCTT AA", + "TCA CCA", + "TCA CTG", + "GCC CTG", + "TAA CTT", + "CAGA TG", + "GTA GG", + "TC TATA", + "GAGA TT", + "GTC TA", + "TTTT AAA", + "CACA TG", + "TGA CC", + "CA CAAA", + "GTG TA", + "GG GAGG", + "GCTT TG", + "CAA AAAA", + "GA GGAA", + "GTT CTG", + "TTTT TA", + "GTC TCA", + "GTT CAA", + "TC GTG", + "GCTT AA", + "GCA CC", + "CTCC TG", + "TAAA TAAA", + "CTA CA", + "CTT CCA", + "TCC TCA", + "C GCAA", + "GAA AAAA", + "GCC CA", + "TC GTT", + "GTA GA", + "CTC TCA", + "GTC CA", + "TGA CTT", + "TCC CTT", + "GC CATG", + "CACACACA CACACACA", + "GTGA TG", + "CC TCTT", + "GC CAGA", + "TCC TA", + "C GTTTT", + "GTA CA", + "GCA TA", + "GAA TTA", + "TGTGTGTG TGTGTGTG", + "CC CAGG", + "GG TTTT", + "TCAA AAA", + "TC TATG", + "CCA TA", + "TGA CAA", + "GGA TA", + "TCA GTG", + "GTA TTTT", + "GAGA TG", + "GC GTG", + "C GTCC", + "TTAA AAA", + "TAA TCA", + "CAA TTA", + "CCA CTG", + "CGG TT", + "GTT GAA", + "TGA TTA", + "CCTT TG", + "CGG TG", + "CAGG TG", + "TCAA TG", + "CTGA TG", + "TCA GGA", + "GTT TAA", + "TATT AAA", + "CTC TTA", + "GCA GGA", + "CTC TCC", + "GAA CC", + "CTT TAA", + "GG GCC", + "GTA TTA", + "GC GCC", + "CCAA TT", + "GC TAAA", + "TGA CTG", + "GATT TG", + "GA TAAA", + "TCA GCA", + "GTT CCA", + "GAAA TA", + "GA CAAA", + "GA GTC", + "GC TATT", + "TCA CAA", + "GAGG TT", + "TAA CC", + "GAA GGA", + "GC TCAA", + "GAAAA TT", + "CCA GCA", + "GTTTT AA", + "GTG CC", + "TGA GGA", + "CA TAAA", + "GG TCC", + "TCA TTTT", + "TATT TATT", + "TAA TAAA", + "GCC TA", + "CTTTT AA", + "TAA GTG", + "TAA GTA", + "CTG GAA", + "CACA CA", + "GA CAGA", + "CAA CC", + "GG GAAA", + "CCA GAA", + "TCA GTT", + "TAA CTA", + "CTAA AAA", + "TGGG TT", + "TGA GTG", + "TAAAA TG", + "TATATATA TATATATA", + "GCA CTG", + "GA CTC", + "TA CAAA", + "TAAAA AAA", + "TC TACA", + "GTT GTG", + "TC GCC", + "CC CAAA", + "GTCA TG", + "CTG CTT", + "GGAA TG", + "CTA TTA", + "GA TATT", + "TA GAAA", + "GG CAGG", + "GA TGAA", + "GTA GAA", + "TCC TGA", + "TAA CTG", + "GCTG GG", + "GCAA TG", + "GCC CCA", + "GTT TGA", + "CATT TA", + "GTG CA", + "CTT GAA", + "GTG GAA", + "CTT CAA", + "TAAA TTA", + "GTG GCA", + "TCC TTA", + "GGAA AAA", + "TTTT TTA", + "CC TGTG", + "GTAA TG", + "GTG TTA", + "CTA GG", + "CAGG CTG", + "GA CACA", + "GAAAA AAA", + "TC GC", + "GTAA AAA", + "TGTT TA", + "TCTC TA", + "GTCC TG", + "CCA GGA", + "GAA CAA", + "TAA GTT", + "TGA GCA", + "GC TCCA", + "TAA GCA", + "CTCA TG", + "GTC TTA", + "CC CACA", + "CA TATT", + "GCC TCA", + "CA CTC", + "CTT CTA", + "TGA TTTT", + "TC GCA", + "CC TGTT", + "GAA GCA", + "GCAA AAA", + "GC GGA", + "CCA CAA", + "GC GCA", + "CA TATA", + "GA CATT", + "GTT CTA", + "CAAAA TT", + "GAAA GAAA", + "CC CGG", + "TA CACA", + "CCAA AAA", + "GAGG TG", + "GG CTCA", + "CA GTGA", + "TCC CAA", + "TA TCTT", + "TGA GTA", + "TC GTA", + "TTTT CTT", + "GTG GGA", + "GA GCTG", + "CC CTCC", + "TAGG TT", + "TTA GG", + "TAA TATT", + "CCA GCC", + "CA TCTT", + "GTC TGA", + "GTT TCC", + "CC TGAA", + "GGA GCA", + "GAAAA TG", + "TCA GTA", + "TAA CCA", + "GA TGTT", + "CTG TTA", + "CA TGTT", + "GG CGG", + "CA TGTG", + "GG GAGA", + "CTT TGA", + "TCTT TCTT", + "AAAAAA AAA", + "GGGG TG", + "CTT TCC", + "CTT GTT", + "GCA TTA", + "CC CAGA", + "CAAA TA", + "TC GGA", + "CA GCTT", + "TCA CTA", + "TAA TTAA", + "TAA GGA", + "GAA CTG", + "GCA CAA", + "GC GTT", + "GG CTC", + "TC TTTTA", + "CC TCCA", + "GG CAAA", + "CA GCTG", + "CTA CAA", + "TA CATT", + "GC TATG", + "CTT GTG", + "GA GTCA", + "GTTA TG", + "CTG CCA", + "GTC TCC", + "TGA CCA", + "CA CCTG", + "TATA TTA", + "TGA TCA", + "CA GCAA", + "GA TGTG", + "GTC TTTT", + "CTA GAA", + "GC TACA", + "CTG GGA", + "GGGG TT", + "CAA GTA", + "CAA GGA", + "CC CTCA", + "TA GCC", + "GTT GGA", + "GC TATA", + "TCTG AAA", + "TA TGTT", + "CC CCTT", + "GTT GTA", + "CC CTGA", + "TGA CTA", + "CAA GCA", + "CAA TAA", + "GAA CTT", + "CA TGAA", + "CTTA TG", + "CTAA TG", + "TC TAAAA", + "CCAA TG", + "GAA GTG", + "CC TCAA", + "CC CATT", + "CA GTCA", + "GAGAGAGA GAGAGAGA", + "TA TGTG", + "GCA GTGA", + "TCTCC TT", + "TCC CAAA", + "CCA TTA", + "CCA GTG", + "GCA TCA", + "TCAAA TT", + "GA TCTT", + "GA CAGG", + "GGA GTG", + "GTA GTA", + "CAA CTT", + "GAA GTT", + "CC CCTG", + "TCTC AAA", + "GG GTC", + "GA GCTT", + "TATG AAA", + "TA TGAA", + "GA CATG", + "CAA GTG", + "GA TATA", + "CA TCTG", + "CTG TGA", + "TAA TTTA", + "GG CAGA", + "GC GAA", + "CC TAAA", + "CCA TCA", + "CA CTGA", + "GGA CTA", + "GA CGG", + "CTC TTTT", + "CTG TCA", + "TCTCTCTC TCTCTCTC", + "TTAA TG", + "GCA GCC", + "CAAAA AAA", + "GCA CCA", + "CTA TTTT", + "GA GCAA", + "CTT GGA", + "CTG GTG", + "GAA TAA", + "TCC TTTT", + "GAA GTA", + "CA GTAA", + "CAA CCA", + "CTG TAA", + "TGA TAA", + "GCA GTT", + "CA CGG", + "TAAA TAA", + "CTG TTTT", + "CTA CTA", + "GC TCTA", + "C GAAAA", + "CAA GTT", + "CTT GTA", + "GAA TGA", + "GA GTGA", + "GCC TGA", + "GG TTTG", + "CC CATG", + "GG GGAA", + "GAA GAAA", + "TG TTA", + "CAA TTTT", + "TATA TTTT", + "CTC AAAA", + "GG TGGG", + "CC GTG", + "TATT TCA", + "CC CCAA", + "TATT TAA", + "GG CTGA", + "GG TGTG", + "CA TCAA", + "CA CTCA", + "TCTCA TT", + "GAA TTTT", + "GAA TCA", + "CAGG AAA", + "CA TACA", + "TA TTTTA", + "TTA TAA", + "GAGG AAA", + "CA TATG", + "CTT TCTT", + "CAA CTG", + "GG GCTG", + "CC CCCA", + "TTTG AAA", + "CATT AAA", + "CTT AAAA", + "GA CTGA", + "CAA TGA", + "GG CACA", + "CCA GTA", + "GGA TGA", + "GTTTT TG", + "GCA TTTT", + "GTG CCA", + "GCA GTA", + "GCC CTT", + "TC GTC", + "GAA CTA", + "GTG GTT", + "GTG TGA", + "GTG CTT", + "C GCTA", + "GTG TCA", + "TCTT TA", + "GCC TTA", + "CC TATT", + "CAAAA TG", + "GAA CCA", + "CTC CAGG", + "GA CTCA", + "CATG AAA", + "GC TAGG", + "TGTT AAA", + "GC GTA", + "GCA CTT", + "TCTT AAA", + "TAA GAAA", + "GG CCTG", + "TCC CTA", + "GTG GTA", + "CTG CTA", + "GGA GTT", + "GG TAAA", + "CAAA CAAA", + "GA TATG", + "TCA TGA", + "GA CCTT", + "TAA TATA", + "GC TAGA", + "GGA CTG", + "GG CATT", + "CA GTTA", + "CC CTAA", + "CA CCTT", + "GG TGAA", + "CA GCTA", + "GTG TTTT", + "CAA CTA", + "GA TCAA", + "GA GAAAA", + "TGTG AAA", + "AAAA TA", + "GATG AAA", + "CTC TAA", + "TTA CTT", + "GA TCTG", + "CCA CTT", + "GA GTTA", + "CAA TCA", + "GGATTA CAGG", + "TTTA TTTT", + "TACA TA", + "TTTTA TG", + "GA GTAA", + "GCTG AAA", + "GTA CTG", + "GC TCTC", + "TATG TA", + "TGTG TA", + "TCA TAA", + "GGA CTT", + "TCTCC AA", + "GCA TGA", + "GA CGA", + "CGCC TG", + "GA CCTG", + "GG TCTT", + "CA CCAA", + "GA TC", + "GA CCAA", + "AAAA TTA", + "GTAAA TT", + "CCA GTT", + "CA GAAAA", + "TAA CAAA", + "GG TGTT", + "GAAA TTA", + "TGCC TCA", + "CC GCC", + "CCA TTTT", + "CTT GCC", + "TCTG TA", + "CTG GCA", + "GG GATG", + "CCA TGA", + "CTA CTT", + "TAGG TG", + "TAAAAA TT", + "GAAA GAA", + "TAAAA TA", + "CTTTT TG", + "GTC AAAA", + "GGA CAA", + "TCTGA TT", + "CTC TCTT", + "TAA TTTG", + "CTC TTTG", + "GG CCTT", + "GGA TTTT", + "CTA CTG", + "GTT GCA", + "GG CTCC", + "CTC TGTG", + "CTC CAGCC", + "TTA CAA", + "GGA CCA", + "GGAA GGAA", + "TAAA GAA", + "TTA GAA", + "GTG AAAA", + "CTT GCA", + "TGGG TG", + "GGA GCC", + "CC TCTA", + "C T", + "GG GCTT", + "GG CATG", + "CTG GTT", + "TA CAGA", + "GATT AAA", + "CTC TGTT", + "TTA TCA", + "CTG AAAA", + "GTA GTT", + "GG GTCA", + "G T", + "CA GCCA", + "GC GTC", + "CA CTTA", + "GTG CTA", + "TC TTATT", + "GTA CTT", + "GG TATT", + "TA GAGA", + "TA CATG", + "CCA CTA", + "TGA GAAA", + "CAA TAAA", + "TCC AAAA", + "CGTG AA", + "GG TCTG", + "CTGAA TT", + "TCA GCC", + "CC TCTC", + "GTT AAAA", + "GG GATT", + "TCC TAA", + "CA CTAA", + "GGA GAAA", + "CCTT CCTT", + "GTT TCTT", + "TA TCAA", + "GA TACA", + "TAATCC CAGCA", + "CC GCA", + "TGAAA TT", + "C GTAAA", + "CTC TCTG", + "TC TTTTTT", + "GTA CAA", + "CCAAA TT", + "TGTA TTTT", + "TC GCTT", + "GG GTGA", + "GA TAGA", + "CTT TATT", + "TAAA CAA", + "GTT TATT", + "TGAA TA", + "CTA CCA", + "GTG TCC", + "CC CGA", + "TTTA TTA", + "CTCC AAA", + "TTTTTTTT TTTT", + "TCA TCC", + "GAA GCC", + "CTAAA TT", + "CAAA TTA", + "CCCC AAA", + "TCTT CTT", + "TAGG AAA", + "CA CGA", + "CA TTTTA", + "GTG CAA", + "TCTCC TG", + "TATTTT AA", + "GTT TGTT", + "GA GCCA", + "GG CCAA", + "CATT TCA", + "CA TCCA", + "CC TATA", + "GA CTTA", + "TCAAA TG", + "GTA TCA", + "TAAA TTTT", + "CTGA GGCA", + "GCC CAA", + "GG TTAA", + "TA TCTG", + "TGA CAGA", + "GGA GAGA", + "GCTG CTG", + "CC CTTA", + "TCC TCTG", + "GTA GCA", + "CCTG AAA", + "CC GAA", + "TTTT TAA", + "CTA TAA", + "CCTG TA", + "TTA CTG", + "GTA TAA", + "GG CGA", + "GA CTAA", + "TCA GAAA", + "GTG TGTG", + "CAAA GAA", + "CC TATG", + "GCA GAGA", + "CC GTT", + "TTTTA TTTT", + "GGAA GAA", + "TTA CTA", + "GCC TGGG", + "TCC CTC", + "TCC TCTT", + "GGA TCA", + "GG TCAA", + "TC GAGA", + "TATT CTT", + "TA CTC", + "GTTAA TT", + "GC GAGA", + "CTTAA TT", + "TCC TTTG", + "GTC TAA", + "CA CCCA", + "GG GTTA", + "GG GCAA", + "GGAAA TG", + "GCAAA TT", + "TA GATG", + "GCA GAAA", + "AAAAAAAA AAAAAAAA", + "CC TACA", + "GGA GTA", + "TC TAATT", + "CAA CAAA", + "TA GATT", + "GG TTTA", + "CC TAGA", + "CTT TAAA", + "TA CTTA", + "TAA TGAA", + "CTA TCA", + "TA GTAA", + "CAGA GAA", + "CAA GAAA", + "GGGG AAA", + "CGTT AA", + "CGTG TT", + "TCTG TCTG", + "TTTTAA TT", + "CTG GCC", + "TAAA TGA", + "C GTCAA", + "TTA GTA", + "GTC TCTG", + "TTTT AAAA", + "CA GTTTT", + "CTT CCTT", + "TATA TAA", + "GC TTTTA", + "TTTT TCA", + "GG TC", + "TTA TTAA", + "TTTT GTT", + "CA TAGA", + "TA GGAA", + "GAGA GAA", + "GTA GCTG", + "TTA TGA", + "GTA GTG", + "GGA GAGG", + "CTC TGAA", + "TA GTC", + "GA CTCC", + "TCC CTCC", + "TAA TGTT", + "CA TCTA", + "GCCA CCA", + "GTA CTA", + "TGGG AAA", + "CGCC TT", + "GCC CGG", + "GGA GGAA", + "GTA CCA", + "CGC AAA", + "CA TAAAA", + "TAA CATT", + "GC TAAAA", + "TCTT CTG", + "GCC AAAA", + "GTA TGA", + "GTC TTTG", + "TA CTGA", + "TCC CAGG", + "TTA TTTA", + "TTA GTT", + "GGA CC", + "TA TAAAA", + "CAAA CAA", + "CTT CTC", + "TCTA TCTA", + "GAAA TAA", + "GTG TAA", + "CTT TGTT", + "GA TAAAA", + "GCC CAGG", + "GC GATT", + "AAAAAA TT", + "TA CAGG", + "GG CTAA", + "TA GCTT", + "GTC TCTA", + "CTCC TGA", + "GAA TAAA", + "TTA CCA", + "GG GACA", + "GCCA CTG", + "GTT TAAA", + "GTC TGTG", + "TGA CAAA", + "TACA TTTT", + "GCCA CC", + "TG TTTT", + "TA GCAA", + "TTA TAAA", + "GA CCCA", + "GCA GC", + "CAGA CAGA", + "CA CAAAA", + "GCC CTA", + "TATT AAAA", + "C GTATT", + "CCA TCC", + "TC GATT", + "GAA GGAA", + "GA TCCA", + "TATT TGA", + "GTGAA TT", + "TA CCTT", + "C GTCTT", + "CC TAGG", + "TC GAAA", + "CTT TCTG", + "TGAA GAA", + "TCTC TCA", + "GTC TCTT", + "GGA GGGG", + "GTC TGTT", + "CTA TGA", + "GGAAA TT", + "GCA CACA", + "GCC TTTT", + "CA GTCC", + "CTG GTA", + "GCA TCC", + "TA GTTA", + "GG CTTA", + "GA GTCC", + "TG AAAA", + "TAGA TAGA", + "TGTT TGTT", + "TA CTCA", + "CATT TAA", + "GA TTTTA", + "CA CTCC", + "GAAA CAA", + "GC GCTG", + "TCTT TCA", + "CTG TCC", + "GAA CTCA", + "CGG AAA", + "TATT GTT", + "GCA CTA", + "TATT CAA", + "GC GGGG", + "GTG GCC", + "TAATT AAA", + "TA CTAA", + "GC GGTG", + "TA CCAA", + "GG TATA", + "CTA GTT", + "GCA GAGG", + "CTTTT TTTT", + "TTTTTTTT TTTTTTTT", + "TACA GTA", + "CCA TGTT", + "TA GTGA", + "CGTG TG", + "GC TCTGA", + "CTT CCTG", + "TC GCTG", + "TAAA TCA", + "TCCAA TT", + "GTT TCTG", + "GAA GAGA", + "GG GTAA", + "CCA TAA", + "TTA TATT", + "C GAATT", + "CC GGA", + "TGA GCC", + "CC GTA", + "CAGA GGA", + "GTG TTTG", + "GA CAAAA", + "TTTTTT AAA", + "GTT GCC", + "GA GTTTT", + "TC AAAAAA", + "TGTT TCA", + "TA TCTA", + "TCTC TCC", + "CTC CACA", + "TAAA TATT", + "TTTT CTG", + "CTC TCAA", + "CCTT AAA", + "TCTTTT AA", + "GAA CAAA", + "TTA GCA", + "GCTCA TG", + "TAAA GTA", + "GGA TAA", + "TTATT AAA", + "CTC CATT", + "TCTC TGA", + "TTA TTTG", + "CCTG TAA", + "TTA TATA", + "GA CTTTT", + "TGTT GTT", + "GCAAA TG", + "CTT CAAA", + "GAA TATT", + "GAA TCC", + "CTC TTAA", + "GCA TAA", + "GAA TGAA", + "CTTAA AAA", + "TAAAAA TG", + "TTTTAA AAA", + "CTC TGGG", + "TGA TCC", + "GC TCTCA", + "CTC CAGA", + "GAGTG CAGTG", + "CAA TATT", + "TA GAAAA", + "GTAAA TG", + "TA GCTG", + "GC TCAAA", + "GCA GGAA", + "TA CCTG", + "GG GAAAA", + "TTTT CTA", + "GGGG GGGG", + "CC GA", + "CTT TGAA", + "GGA GGTG", + "TA GTCA", + "GG CCCA", + "TGA TGTT", + "CAAA TAA", + "TCTT CCA", + "GC GCTT", + "GTA TTTG", + "GTC TC", + "GAAA TCA", + "TGA TAAA", + "CATT CTT", + "TA TCCA", + "GCC TCTG", + "TGA GATG", + "C GCCAA", + "GTTTTA TT", + "TATA TATT", + "GTA GGA", + "GACA GAA", + "CTCCAGCC TGGG", + "GC GTGA", + "GG TATG", + "GAGG GAGG", + "TCA TTTG", + "CTA CC", + "TACA GAA", + "GG TAGA", + "GA TCTA", + "GTC CATG", + "TGA GGAA", + "TAA TAAAA", + "TAAA CTT", + "TCA CATT", + "GGA GGCC", + "TCA CAAA", + "CA CTTTT", + "CGG CC", + "CAA CAGA", + "GTA GAGA", + "GTTA TTTT", + "CGTT TG", + "TC GTCA", + "TCTG CTG", + "CAA CACA", + "GG TAGG", + "GCA GCTG", + "TAGTA GAGA", + "CAA GCC", + "GCA TTTG", + "TAA TATG", + "GCTT AAA", + "GCTT CTG", + "CTC TCCA", + "TCA TCTT", + "C GTCTG", + "TCA TTTA", + "CA TAGG", + "GC TCCTT", + "TGTT CTT", + "TACA TTA", + "CACA GAA", + "TAAA TATA", + "TA GAGG", + "GA TAGG", + "TCC TGAA", + "GGA GCTG", + "TGA TATT", + "TCA TTAA", + "CTTTT AAA", + "TC GTTA", + "TAAA CTA", + "GTT TGAA", + "TAAAA TTA", + "CA CCCC", + "TCA GAGA", + "CTCC TGCCTCA", + "TGA CATT", + "GTA TTTA", + "CTT CATT", + "GAAA CTG", + "TAA CACA", + "GTT CAAA", + "GGA GATG", + "TC GGCC", + "CAGCA TT", + "TC GATG", + "TATT CTA", + "CTG TGAA", + "TATT GAA", + "TTTT CCA", + "TATT TCTT", + "GGTG AAA", + "CTGA GAA", + "GCA CAGA", + "GC GAGG", + "CTG TGTG", + "TGAAA TG", + "TGA TGAA", + "GTCC AAA", + "CTCAA TT", + "TCCA GAA", + "GTA TATA", + "TAAA GTT", + "TCTC AAAA", + "TCCA TCA", + "GTC TGAA", + "TGA GAGA", + "TGA TTTG", + "TTA GCC", + "CTC CATG", + "TCC CTGA", + "GA GCTA", + "CCCC CCCC", + "GTG GAAA", + "CTG GGAA", + "CAA TGAA", + "CCA CACA", + "CTT TCAA", + "C GGAGG", + "TC GTGA", + "CCA GAAA", + "GTTTT AAA", + "TGTT GAA", + "TCC TGTG", + "CTAAA TG", + "TCC TTTA", + "GTC TGGG", + "TCTC TTTT", + "TA CGG", + "TATT GTA", + "TTA GTG", + "TTA CC", + "TAATCCCAGCA CTTTG", + "TCTG GAA", + "CTT CTCA", + "CGCA TT", + "TATT TAAA", + "TCA CACA", + "TAA TCAA", + "GC GAAA", + "GG GCCA", + "GTT CATT", + "GAGAA AAA", + "TTTT GTA", + "TA CTTTT", + "TC GAGG", + "GTGAA AAA", + "CAA TATA", + "TCC CATG", + "CAA TTAA", + "CTG GAAA", + "CCCA GCA", + "TCC CATT", + "TCC TGTT", + "CTC TTTA", + "TCC CCTT", + "GTT TCAA", + "GTC CAGG", + "GGAA GGA", + "TA GTTTT", + "TGA CCTT", + "GTGCTG GGATTACAGG", + "TATT TATA", + "TCTG CAA", + "CTGAA AAA", + "TATG TTA", + "CTT CACA", + "GCA CAGG", + "CCTG CTG", + "TTTT TTAA", + "GTTA TTA", + "CC CTTTT", + "TGA TTTA", + "TA CAAAA", + "TAA GTAA", + "TTTT TAAA", + "CA TCTC", + "GTG GTGA", + "GTG GAGA", + "CTC TGCA", + "GTTAA AAA", + "TACA TACA", + "CTT TGTG", + "GGA CACA", + "TCTGA TG", + "TA TTATT", + "TCTT CTA", + "CTG TGTT", + "TCA GCTT", + "CTT TATA", + "GG CGC", + "TCC CTCA", + "GTA CC", + "TGGA GAA", + "CAAAAA TT", + "TCTT TAA", + "CTC TCTC", + "TGA GTGA", + "GCA GCTT", + "CGGA TT", + "TA CGA", + "TCTT GTT", + "TC GTAA", + "GCC TGTG", + "TATT CTG", + "GG GATA", + "GG GTCC", + "TGA GATT", + "CTTTTA TT", + "TCC CACA", + "CATG GTG", + "TTA GGA", + "GAA CACA", + "TCA TAAA", + "CAA CATT", + "GG TCCA", + "GAA TTTG", + "TATTAA TT", + "TCC TGGG", + "GCA GCAA", + "CTC TTCA", + "GAA GAGG", + "TCTG TCA", + "CTGAA TG", + "CCA CAAA", + "GTG GAGG", + "TGA TTAA", + "CTCC CTCC", + "CACACACACACACACA CACACACACACACACA", + "GC GATG", + "CATT CTG", + "GTA GAAA", + "TCA TCAA", + "TTTT CAA", + "TATG TATG", + "CCAAA TG", + "TAA TTTTA", + "TAA GGAA", + "CTT GAAA", + "AAAAAAAA AAAA", + "GC TCCTG", + "GCA GATG", + "GAAAAA TT", + "GA CGC", + "GTG GGGG", + "GTCAA TT", + "CTT GCTT", + "TGA CACA", + "GTG TGTT", + "CCA GAGA", + "CCCA GCC", + "TAAA GAAA", + "GTC CATT", + "TAAA TTAA", + "CC CAAAA", + "GAA TTAA", + "TGAA TTA", + "TTTT TTTG", + "CCA GCTT", + "CAA TTTG", + "CTG TTTG", + "GTC TCAA", + "GTT TGTG", + "GG CATA", + "GG TACA", + "TGA TGTG", + "GATT TCA", + "TCTG CTT", + "GTAA TTA", + "TAA AAAAAA", + "GCC GCC", + "TGTGTGTGTGTGTGTG TGTGTGTGTGTGTGTG", + "GC GTCA", + "GC TCATT", + "GAA CCTG", + "TAAA CAAA", + "GTG CTGA", + "TCA GGAA", + "TCC TCAA", + "TCTA TTTT", + "TCTG TTTT", + "CAGA GCA", + "CCA GGAA", + "GTC TTTA", + "TCTT CAA", + "TCAAAA TT", + "GC TTATT", + "GTT CCTT", + "CA CCTA", + "TCA CTGA", + "GAA GCAA", + "TAAA GA", + "TCC TTCA", + "TCTCA TG", + "TCA GTGA", + "TACA CAA", + "CA CGTG", + "CC TAAAA", + "GCC TTTG", + "GG CTTTT", + "GTT GAAA", + "GTT CTC", + "CTA GA", + "CTA CAAA", + "GCA CAAA", + "TTA CATT", + "GG CCCC", + "TAA TGTG", + "CTG CCTT", + "TCC CAGA", + "GTGAA TG", + "GGA CAGG", + "GGA TGTG", + "GTT TATA", + "TGA CCAA", + "GTG GCTG", + "GTT CTCA", + "CTTA TTTT", + "CTG GAGA", + "TTA CAAA", + "GTC TTCA", + "CAA GAGA", + "CCA TTTG", + "TCA CAGA", + "CTA GTA", + "CA TTATT", + "TTA GA", + "GC TCTCC", + "GC GCCA", + "TATG TTTT", + "TCC TCCA", + "CAGAA AAA", + "GTG GGAA", + "TAA TCTT", + "TGA GTCA", + "CTG CTC", + "GTC TCCA", + "TCA TGTT", + "GTT TCCA", + "TAA GCAA", + "CTAA AAATA", + "TGA CTGA", + "TC GGTT", + "TTA GAAA", + "TAA GCC", + "TAAA GCA", + "CC TCTCC", + "CC TCCTT", + "TCA GATT", + "TATG AAAA", + "GCTGA TG", + "CATA TTTT", + "GC TCCAA", + "CGG CGG", + "CCA CTGA", + "CA GCAAA", + "CTG TCTT", + "CTA GCA", + "TC GGGG", + "CACA GCA", + "GC TGATT", + "CTA GGA", + "TAA CTC", + "TCA TATT", + "CCTT CTT", + "CTG CAAA", + "CC CGC", + "GG TCTA", + "CCCA GGA", + "GTG TCTG", + "TAATAA TAATAA", + "TCA CATG", + "CAA TTTA", + "TATATATATATATATA TATATATATATATATA", + "CCA CAGA", + "TCAA TTTT", + "GTA TTAA", + "GAA CATT", + "TCTC TTA", + "CTA TTTG", + "TCTT TCC", + "GGTT AAA", + "GC TAATT", + "CTG CTGA", + "TA CCTA", + "CAGG GTT", + "TC GCCA", + "CAAAAA TTA", + "CTT CTGA", + "GCA TGTG", + "CTA TTAA", + "GCA CATG", + "CAA CATG", + "TCA TGAA", + "GAA TGTT", + "GG GTTTT", + "CTG CCTG", + "GTC CACA", + "TAAA CA", + "CTC TGGA", + "GA CCCC", + "GG CAAAA", + "TCTG TTA", + "CTA GTG", + "CTA TATA", + "TCA GTCA", + "TAA CTAA", + "GAA GATG", + "GTC TTAA", + "CAA GGAA", + "GTAA AAAA", + "TCC CCTG", + "TC GCAA", + "TCTG CCTG", + "CC TTTTA", + "GTCC CAGCTA", + "TATA TATG", + "TATT GTG", + "TGTG TTTT", + "GC GCAA", + "CACA GTG", + "TAA GATT", + "CTC TGTA", + "GGAGG CTGA", + "GGA CAAA", + "TATTAA AAA", + "TC GTCC", + "TC GGAA", + "CTA TAAA", + "CTT CAGA", + "CTA GAAA", + "CATT CAA", + "CA CGCA", + "CAGGA TT", + "CCA TCTT", + "GTA GCC", + "GAA TTTA", + "CA CGC", + "CAA TCC", + "TGA GCAA", + "GAA GCTG", + "TCAA TTA", + "GAA GTCA", + "CTG CACA", + "CCA CGG", + "GGA TCTT", + "CTCCTGCCTCA GCCTCC", + "TAAA TGAA", + "CC GTC", + "TC GGTG", + "TTTTA TTA", + "GCA GGGG", + "GCA GGTG", + "TCTA TTA", + "TAA CTTA", + "CTAA TTTT", + "CC CGCC", + "TAA TACA", + "GGATT AAA", + "TCTC TCTG", + "GCTT CTT", + "CATT TATT", + "CCA GAGG", + "GGA CAGA", + "GCCAA TT", + "TCC CCAA", + "GTT GATT", + "GAA GAAAA", + "GCA TTTA", + "CTC TAAA", + "CACACACA CACA", + "CC TCAAA", + "TA TAATT", + "CAA TGTT", + "GCC CAGA", + "GTA TATT", + "CTAA AAAA", + "CCA CAGG", + "TAA GAGA", + "TCC TTAA", + "TA TTTTTT", + "GAA TATA", + "GGA TTTG", + "GTG TGAA", + "CTG GCTT", + "GC GGCA", + "TCC GCC", + "GCA TCTT", + "TC TAATA", + "CTG CATT", + "CTC TGCC", + "TCA CTCA", + "TCA GCAA", + "TATTA TG", + "CCA GCTG", + "GA TCTC", + "GCC TCTT", + "CTT CCAA", + "TCC TAAA", + "TCA TCTG", + "CTA TTTA", + "CTG CAGG", + "CAA GCAA", + "GC GGAA", + "GAAA TAAA", + "TAAAA TAA", + "TCA CCTT", + "CCA TGTG", + "GA CCTA", + "CAGA TGA", + "GTG GCTT", + "TTATTA TTATTA", + "TCC CGG", + "TATT TGTT", + "CTG TAAA", + "TCCA TCCA", + "CTG TATA", + "GTT TCTA", + "GTT GCTT", + "CCA TGAA", + "GC TCTTA", + "CTT CATG", + "GTT CCTG", + "GCTG GGA", + "TCA GAGG", + "CATT AAAA", + "TCA GTAA", + "GAA TGTG", + "CTTA TTA", + "GCA CTGA", + "TGA GGTT", + "CA TCAAA", + "CTT CTCC", + "GTT TATG", + "CTT TCCA", + "GTG CCTG", + "GAAA GGA", + "GCA TCTG", + "TA CCCA", + "TAA CAGA", + "AAAAAAAA AAA", + "CTA TGAA", + "CA GTAAA", + "TA GCTA", + "TC GTTTT", + "GTG TCTT", + "GA GCAAA", + "TC TAAAAA", + "GTT CACA", + "GAAA TGA", + "CAAA TGA", + "GCC CTGA", + "GTG TTTA", + "TCA TGTG", + "CATA TTA", + "TCAAAA AAA", + "TAA GTTA", + "TCTC TCTT", + "CCA GTGA", + "CC TCTGA", + "CAA GATG", + "GCC TGTT", + "GTT TGGG", + "CATT CATT", + "GCC CCTG", + "GTT CTGA", + "GC GGCC", + "GC GGTT", + "CAAAA CAAAA", + "TACA TATA", + "GAATT AAA", + "TCAA GAA", + "CTG TATT", + "TTTT TATT", + "GA TTATT", + "TCTAA TG", + "GTT GCTG", + "TGAA TGAA", + "TCA GCTG", + "CTT GATT", + "CAGAA TG", + "CTAA TTA", + "TATAA TG", + "GTTTT GTTTT", + "CCA GCCTG", + "TGA TGGA", + "GCA GATT", + "CTC TATT", + "GCA GTCA", + "TAA GTGA", + "CTA CACA", + "CGCA TG", + "TA GCCA", + "GTG GCTCA", + "CAAA TAAA", + "GTG CTCA", + "TTTT TTTTTT", + "TAA CATG", + "TCCCA GCTA", + "CAAA GTA", + "TCA TATA", + "CAGCA TG", + "TGA TCTT", + "CA TAATT", + "TGTG TTA", + "TTTT GAA", + "TTAA TTA", + "GATA TTA", + "TCA TTCA", + "TGA TATA", + "TGA CTCA", + "GA CGTT", + "TGA CATG", + "GTT GTGA", + "CA TTTTTT", + "GCC TGGA", + "CTA TGTT", + "CTT TGGG", + "GTC TCAAA", + "CTG GCTG", + "CCA CATG", + "GG CGTG", + "CTTAA TG", + "TAA GATG", + "GTA TAAA", + "TGTA TTA", + "TAA CTCA", + "GAGAGAGAGAGAGAGA GAGAGAGAGAGAGAGA", + "GCA TGAA", + "GTTAA TG", + "TCCA GGA", + "GAGA GAAA", + "TCTC TGTG", + "CTC TCTA", + "CCA CCTG", + "GCCA GGA", + "CTG GAGG", + "CCA TTTA", + "GTC TGGA", + "GCC CACA", + "TAGA GAA", + "CAA CTCA", + "GGCA GGA", + "TCTTA TG", + "CAAA GGA", + "GG TAAAA", + "GAGA GGA", + "GTC CAGA", + "GCC CTCA", + "GATA TTTT", + "CAGG GAA", + "CCA CATT", + "GA GGAGG", + "GAAA CTT", + "CA GAATT", + "TCA GATG", + "TATT TCC", + "TACA GTG", + "TGA GCTG", + "CCA TCTG", + "GAGAA TG", + "TCAA CAA", + "A TT", + "TAA CTGA", + "TGA GAGG", + "CA CTGAA", + "CCA CCTT", + "CTG CAGA", + "TCA CCAA", + "TGA GCTT", + "CAAA GCA", + "GG TTTTA", + "CGG GGTT", + "TCCAA AAA", + "TATG TATA", + "CCA GATG", + "TCCA TTTT", + "CTG CTCA", + "GA TAATT", + "CCA CCAA", + "CTCC TCC", + "GA GAATT", + "GAAA GTA", + "TAAAA TAAAA", + "CTT CTTA", + "CTG TTTA", + "GAA TCAA", + "GCA TGTT", + "GCA CGG", + "GA CTGAA", + "GTG CACA", + "GA CGTG", + "TATA CAA", + "TC GACA", + "GAA GACA", + "TAAA GGA", + "GA TCAAA", + "CAGTG TG", + "CTA GCC", + "GAGG AAAA", + "TCTG AAAA", + "GAA CCCA", + "GATG GATG", + "GTT CTTA", + "CTA TATT", + "GCA TTAA", + "TCTCTCTCTCTCTCTC TCTCTCTCTCTCTCTC", + "TCA GTC", + "TATTTT TG", + "GAGGA TT", + "GTA TGTG", + "TAA CCAA", + "GTT GTTTT", + "TTTT TCTT", + "GTG TTAA", + "CTT GGAA", + "AAAAAA TG", + "CAA TGTG", + "GTG CCTT", + "GCC TCAA", + "GA GTCTT", + "GCTAA TTTT", + "CGAA AAA", + "GTG TATA", + "GC GTTA", + "CTGCA CTCCAGCCTGGG", + "GTT CATG", + "CAAA GAAA", + "GCA GTAA", + "GGA TGAA", + "CTT TATG", + "CAGG AAAA", + "TCC TGCA", + "CTG TCTG", + "GAA CATG", + "GGA TGGA", + "GCC TGAA", + "CAAAAA TG", + "TCCAA TG", + "CCA GCAA", + "GG CCTA", + "CAA CTGA", + "GCA CCTG", + "GTC TATT", + "CC TCTCA", + "GTG GTCA", + "GTG TAAA", + "GTA CACA", + "GTAAAA TT", + "GTA CATT", + "TATA TAAA", + "CTG TTAA", + "TAA GTCA", + "GCC TCCA", + "AAATT AAA", + "GTG CAGG", + "TCC TGGA", + "GTG CAAA", + "GC GTCC", + "CCA TTAA", + "GGA GGGA", + "TCA CTTA", + "TCATT AAA", + "CAA CATA", + "TAA TAGA", + "TAA TGTA", + "GA TTTTTT", + "GTT GTCA", + "GGA GACA", + "GTG TGGG", + "TCA CAGG", + "TC GGCA", + "CTCC CTG", + "GA CCAAA", + "TGTT TATT", + "CGAA TG", + "CTCAA TG", + "TCA CCTG", + "CA GTGTT", + "TGA GACA", + "TA GGGG", + "GAAAAA TG", + "GTT GAGA", + "TC GATA", + "CTC GGGAGG", + "GTT GTC", + "CCA GTCA", + "GCC CAGGCTG", + "GAA CAGA", + "GGCTCA CTGCAA", + "GCA GACA", + "TGA GGTG", + "CA CGTT", + "TAA GAAAA", + "CCA GGCA", + "GTA TCTT", + "CTTGG GAGG", + "CTT TCTA", + "CC GCTG", + "GA GCTCA", + "GAGA CAGA", + "CTT CAGG", + "GCA CATT", + "GTA CAAA", + "CTT GTAA", + "GTG GGTG", + "GAA GTGA", + "GG TCTC", + "GTA TGTT", + "GCA CTCA", + "TTA TGTT", + "CAA GTCA", + "CAA GTGA", + "GAAA CTA", + "TAAA TAAAA", + "TCTT AAAA", + "GTT GGAA", + "GTT CTAA", + "CCA CTC", + "CA GTGAA", + "GAAA GG", + "GCA CGA", + "TAA CTTTT", + "GTT GTTA", + "TCA GTTA", + "CGGA TG", + "TATT TGAA", + "CC CTGAA", + "GCC CTC", + "CTT CTAA", + "TTTG TTTT", + "GA GCTGA", + "CTG TGGG", + "CAA GATT", + "GAA GCTT", + "TGA GTAA", + "CTT GCTG", + "GGA TGGG", + "CGTA TG", + "TCCA TTA", + "GTC TGCA", + "GCCA TTTT", + "GTT GTAA", + "CACA CAA", + "GGACTA CAGG", + "C GTTTTA", + "TCTT CC", + "TAA CCTT", + "CTT TAAAA", + "TGAA TTTT", + "CTA CAGA", + "GCAA GAA", + "TAA CAAAA", + "CAATT AAA", + "CCA CTCA", + "CATG GTGAAA", + "CCCA GAA", + "CTA CATT", + "CC GAGG", + "TCCA GTG", + "TGA GTTA", + "GGA GTCA", + "TAA CGA", + "GA GTAAA", + "GA CTCTG", + "GGA GCTT", + "TA CTCC", + "CTG CATG", + "GC TTTTTT", + "GTC TAAA", + "GTG CGG", + "CA TCTCA", + "TGA TCAA", + "GGA GATT", + "GC AAAAAA", + "CA CCAAA", + "TGA CGG", + "CAGA GG", + "GTT GATG", + "CTT GTCA", + "TCCA CCTG", + "GGA GCAA", + "CAA GTAA", + "CCA TAAA", + "GTG CATG", + "GCA TATT", + "GTA GATT", + "GCC TAA", + "CTCAA AAA", + "GGA GAAAA", + "CTA TCC", + "TAATA TTA", + "GTG CTC", + "CAA TATG", + "TGTG GAA", + "TGA CTC", + "GTG TATG", + "TTTTAA TG", + "GC TCTAA", + "CACAA TG", + "CA GCTCA", + "GTT GGTT", + "CTAAAA TT", + "GTC TATG", + "TGTG AAAA", + "CTG GGTT", + "CCCC TCC", + "CC CTCTT", + "GCA GGGA", + "GAAA CCA", + "CATT TCC", + "GCA GCCA", + "TCA TATG", + "GCA GGCA", + "C GTAAAA", + "TGA CCTG", + "CAGA GGTT", + "CTT GTGA", + "TTA TCTT", + "CTG TATG", + "GTCAA TG", + "GGA CGG", + "GC GTAA", + "CAAA CTA", + "TAAA TGTT", + "CTT CGG", + "CTCC CCA", + "TACAA TG", + "TCTG TAA", + "GAA TATG", + "GC GGGA", + "GGA CATT", + "TTA TGAA", + "GGA TGTT", + "GGA CATG", + "TCA GGTG", + "CAA CAAAA", + "GAAA GAGA", + "GTG GATG", + "GG GCTA", + "CCA TCAA", + "CA GCTGA", + "CTC CACC", + "CAA TCAA", + "GTG GTC", + "TGA CAGG", + "CCA TTCA", + "GTCC CTG", + "CAGA CACA", + "GTT GGTG", + "CC TCCTG", + "GAA CTGA", + "TATT CATT", + "GCC CATG", + "CAA TCTT", + "GAAA GCA", + "GAA TCTG", + "TTA TTTTA", + "GTT TGGA", + "TTTT TGTT", + "GGGAA TG", + "GC GACA", + "TAAA CTG", + "CCA TATT", + "GGA TCC", + "CAA GCTT", + "TAAAAAA AAA", + "TCA CTC", + "CA CTGTT", + "TGTTAA TT", + "GGA CTGA", + "GGA GTGA", + "CATA CACA", + "GTT TGTA", + "TCCA GCA", + "GTG CATT", + "GG AAAAAA", + "CCAA GAA", + "TCAA TA", + "CTT CCCA", + "TGA GAAAA", + "GGCC TCCCAAA", + "CAA GCTG", + "GCC CAAA", + "TGA CTTA", + "CA GCCTT", + "CTG GATT", + "TTTT TTTA", + "TCA CGG", + "GCA GTTA", + "TGA CTAA", + "TTA CAGG", + "TGA TATG", + "TAA TTATT", + "TCTT GAA", + "GCC CCTT", + "GTT CAGA", + "CTC TATG", + "CCA TGGA", + "GAGG GAA", + "GGA GGCA", + "CTT TGCA", + "TCTT GG", + "GGA GGTT", + "GCCAA TG", + "CTG GTGA", + "CAA CCAA", + "CCA GTC", + "CTT GAGA", + "TACA GCA", + "CTT GTC", + "GA CGGA", + "CTT CTTTT", + "GTG GC", + "GAGGA TG", + "CAA TAAAA", + "GAAA TTTT", + "AAAA AAAAAA", + "CTC TATA", + "GTA TGAA", + "CTT GTTA", + "TAA CATA", + "CAAA CACA", + "TGATT AAA", + "GCTC TGTT", + "GTG GGTT", + "GTT GGGG", + "GTG TGTA", + "GTAA TTTT", + "GTA TCC", + "TGTGTGTG TGTG", + "TCTT CCTT", + "TCA CTAA", + "TCTCC AAA", + "TA TCAAA", + "TGA TGGG", + "GGA TATT", + "CAAA TTTT", + "GTT CAGG", + "GTG GATT", + "GTG CAGA", + "GCTG CC", + "CTCA GAA", + "GCA GTC", + "GGA TAAA", + "GCC TTCA", + "CCA GGTG", + "TA TCTC", + "CAA TGCA", + "CCCA CTG", + "GTG TATT", + "CGA CAGA", + "TGA GATA", + "CCA GGTT", + "TGTT TAA", + "CATCA TG", + "TGA TTCA", + "GCAA TTA", + "GAAA TGAA", + "CTT GGTT", + "GAA GATT", + "GGA TTAA", + "CC TCATT", + "GGCCA GGCTG", + "GCTA TTA", + "GCCA GCA", + "GAGA CAGG", + "CTT GAGG", + "CA GTCTT", + "GTT CTCC", + "TATT TCAA", + "TGA CGA", + "CATG AAAA", + "CATTA TG", + "TAAA TTTA", + "GA GTGAA", + "CAA CAGG", + "TAA GCTT", + "CACA TTTT", + "GA TCTCA", + "TA GTCC", + "GACC CTG", + "TAA TGCA", + "TAA GTC", + "TAA TAATT", + "GAA GTAA", + "CAA CTC", + "CA TCATT", + "GA CGAA", + "GAAA CAAA", + "TATT TCTG", + "CATTAA TT", + "CCA CCCC", + "TAATA TTTT", + "GTT TAAAA", + "GTA TCTG", + "GTCAA AAA", + "GATG CTG", + "TGTT CTG", + "GG TCAAA", + "GTA GGAA", + "GTA TATG", + "TGA TCTG", + "GGGG CTG", + "GCA TCAA", + "GCCAA AAA", + "CCA CGA", + "GC TAATG", + "CAGA GAAA", + "CCTT CTG", + "TCC TCTA", + "GCA GGTT", + "CTCA CTG", + "TAGA TTA", + "GCC GAGA", + "CCA TCCA", + "CTT TACA", + "GTA CATG", + "GCA CCAA", + "CTT TGTA", + "CTA TGTG", + "TCA CTTTT", + "TGA GTC", + "CAA GAAAA", + "CTGA CTG", + "GTTTT TTTT", + "GCA TAAA", + "TAA TCTG", + "GAA AAAAAA", + "CAGGA TG", + "TGA GCCA", + "GAA TTCA", + "TCA GACA", + "GTT CCAA", + "TCA GGTT", + "CAAA CTG", + "CATT TCTT", + "TGTT AAAA", + "CCA GACA", + "CAA GTTA", + "CATG TTA", + "CATT CTA", + "TCTTTT TG", + "TGA GGGG", + "CACA TTA", + "TAAAA TAAA", + "GCA TATA", + "TGTT CTA", + "GAA GGGG", + "GAGTG TG", + "TAA GACA", + "GAA CTC", + "CCA GTAA", + "GAGA GAGG", + "GC GACC", + "CAA TTCA", + "CGG CTG", + "CCA GATT", + "CCTG GG", + "GGAA GAAA", + "GAGA GG", + "TCAAAA TG", + "CCTCA TG", + "TAAA GG", + "CTT TGGA", + "CCA GGGA", + "GTA CAGA", + "CTGAGGCA GGA", + "TGTT TCTT", + "CCA GGCTG", + "CTGA GG", + "GAGG CTG", + "CTCC TGGG", + "GAA GTC", + "CGA CC", + "GGA CTCA", + "GGA GTC", + "CA CAATT", + "GTG TTCA", + "GA CTAAA", + "GTCA TTA", + "CAAAA TTA", + "TGAA GAAA", + "GCA CCTT", + "GTT TGCA", + "TCC TGCC", + "GTA GATG", + "GCC TGCA", + "GA GTTAA", + "TCC CTTA", + "GTG GTTA", + "TC GGGA", + "TACA TAA", + "TCTC TCCA", + "CA CTAAA", + "TATATATA TATA", + "GTG GCAA", + "CACCA TG", + "TTTG AAAA", + "CACA CTG", + "CTT GGTG", + "TACA CTG", + "CC TCCAA", + "CAA CCTT", + "CA GCCAA", + "TTTT CAAA", + "TGA TAGA", + "TACA CTA", + "TCTG GG", + "TCC CAGCA", + "TAGG AAAA", + "CTT GGGG", + "TC TGTGAA", + "CC TTATT", + "CATT TAAA", + "TTTTA TTTTA", + "GCC CTCC", + "CTGA GCA", + "CC CGTG", + "GTA GTGA", + "TCC TATT", + "GAA GGTG", + "TGTG CTG", + "TCCA CTG", + "TAA TCTA", + "TGA TGTA", + "GTG GTAA", + "TAA TGGA", + "GATG AAAA", + "GTA GTAA", + "GTG GGGA", + "GTG TCAA", + "CAGA CTG", + "TC GAAAA", + "CTCA TTA", + "TAA TAATA", + "CTCA GAAA", + "CA TCCTT", + "CC GCTT", + "GGAA GG", + "CC GTGA", + "CCA CTCC", + "CTA GAGA", + "TAGAA TG", + "GGA TTTA", + "TTAA TTTT", + "GC TAATA", + "TCC CCCA", + "CAAA TATT", + "GA TCATG", + "TCTTAA TT", + "CA GTATT", + "GTCTT GAA", + "CC GAAA", + "CTA TTCA", + "TAA GATA", + "CTT GCAA", + "GCC CCAA", + "TCC CTAA", + "GAA GTTA", + "GA TGATG", + "CTT GATG", + "CC CTAAA", + "CCTG CCTG", + "GACA TTTT", + "CCA GCCA", + "TGTGTGTG TG", + "GTC TATA", + "TCTC TGTT", + "GTC TGTA", + "TA TAATA", + "CTT GTTTT", + "CGC CATT", + "CTCA GCA", + "TACA GTT", + "CAA GAGG", + "GGAA GCA", + "GCC TTTA", + "CC CCATT", + "CAA CGA", + "GTCA TTTT", + "CC CGCA", + "CA GTTAA", + "GAA TCTT", + "CATG TTTT", + "CC GGGG", + "CTA CTGA", + "TCA CGA", + "TAAA TTTG", + "GCC CATT", + "CTC TAGG", + "GGA CCTG", + "TCA GGGA", + "GAGA CTG", + "CC AAAAAA", + "GCC GG", + "CCA GGGG", + "TCA GAAAA", + "CA TCTGA", + "TCTT CAAA", + "CTA CAGG", + "GAGG CAGG", + "CATT GTA", + "TAAA TCAA", + "GA CTCTT", + "CTGA TTA", + "GCA TATG", + "GGA CCTT", + "CAA GACA", + "TATT TATG", + "TATTTT AAA", + "CC GAGA", + "TCA TTTTA", + "CTCA CTCA", + "CCA CCCA", + "CTC TAGA", + "CTA CATG", + "GTG CTTA", + "CAA CCTG", + "TC TGTGTT", + "TAAA TATG", + "CAAA GG", + "CC CTGTT", + "GTT CGG", + "TGA TAAAA", + "CA CGAA", + "GTT GAGG", + "CAGA GTGA", + "GAAA TTAA", + "CACA TA", + "GAA CAGG", + "TCTCC TGA", + "CC TGAGG", + "GGAGG CCAA", + "GTT TACA", + "TAA CAGG", + "TGTG GTG", + "GCCTCC CAAA", + "CCA TCCTG", + "GATT CTT", + "GAA TGGA", + "GTA GTCA", + "CTCC TCTG", + "GAAAGAAA GAAAGAAA", + "CC CTGTG", + "CAGTA TG", + "GC GATA", + "GGA CTC", + "GAAA GA", + "TGTT GG", + "GTA GCTT", + "CA TTTTAA", + "CC CTCTG", + "GCA TTCA", + "CGA TTA", + "TCA CATA", + "TAA TGAAA", + "GGAA TTA", + "CTG TCAA", + "TAAATT AAA", + "CAA GTC", + "GTA TTCA", + "GGCCA TG", + "CTT TAGA", + "TGTT TCC", + "CATG TA", + "GAA TAAAA", + "CAA CTAA", + "TCA TCTA", + "CA CTCTT", + "CA GTTTG", + "CA TAAAAA", + "GCA TGCA", + "GATT TA", + "GAA CCAA", + "TCTG TGA", + "TCA GCCA", + "TCTC CACA", + "TCTCA GCTCA", + "TATCA TG", + "GCA CTTA", + "CGC CAGG", + "CGG GG", + "CATTAA AAA", + "TTTG TTA", + "GGA TATA", + "TC GACC", + "TAA TCCA", + "CC GC", + "CATT GTT", + "CCA GTTA", + "GTA GTTA", + "CTA GGAA", + "CC TAATT", + "TCA TGGG", + "GAA CTAA", + "GCTA TTTT", + "CC GTCA", + "CAGA TTA", + "CCA TATA", + "CAA CTTA", + "TCA GTTTT", + "CTA CCTT", + "GCA CTC", + "GTG TGGA", + "GTG CCAA", + "GACAA TG", + "GA CAATT", + "GTA CCTT", + "TAAA CATT", + "CA GGAGG", + "GTG CGA", + "GAAAA TTA", + "TCTCTT AA", + "CC GATT", + "GA TGATT", + "CCA TGGG", + "TC GGTA", + "CCA TATG", + "CCA GTCC", + "GCC TTAA", + "TGA TCCA", + "GTT GCAA", + "GTA GAGG", + "CAGA TTTT", + "GTA CTTA", + "TCTTTCTT TCTTTCTT", + "GCTC TGTG", + "TCAA TAA", + "GTT TAGA", + "GTT CGA", + "CAA GGTT", + "CTCA TTTT", + "CACA GG", + "CATG CTG", + "GAA CGG", + "TA TAAAAA", + "GAA GGCA", + "GA GCATT", + "TGTT TGTG", + "GCTG TTA", + "GTCA CTG", + "CAAA TGAA", + "GTGA CTG", + "GTT CTTTT", + "CAGGCTG GAGTGCAGTG", + "TGA TGAAA", + "TAA CGG", + "CTA CTAA", + "GACA TTA", + "GGA CGA", + "GAGCA TG", + "GCA TGGG", + "CCA CTTA", + "CTA TCAA", + "GCTG TTTT", + "GTC GTG", + "CCTG GCC", + "TCTC TGAA", + "TGTT GTA", + "CAGC CAGG", + "GTT TAGG", + "CC GCAA", + "GGA GTAA", + "CCAA TTA", + "CAGC AAAA", + "TCA TCCA", + "CA CGTA", + "TCA TAGA", + "TAATT AAAA", + "CA CTTAA", + "TCTT TATT", + "GAGA TTA", + "TAA GAGG", + "CAAA TTAA", + "GA CGCA", + "CA CGGA", + "GTG TGCA", + "TC T", + "TATTA TTA", + "GAAA TATT", + "GGA GTTA", + "TCTT TGA", + "CTGA TTTT", + "TGTGAA TT", + "TCC CACC", + "CC CTTTG", + "CAA GGTG", + "CAGA GTT", + "CCCCA TG", + "CTA CCAA", + "CTCC AAAA", + "CTT CCCC", + "CTG CTAA", + "GATT AAAA", + "GC TTATG", + "CTA CTTA", + "TAAAAAA TT", + "TCA GTCC", + "CTATT AAA", + "GAA TGGG", + "CACA GTA", + "CAA CGG", + "GG TTATT", + "TCA CCCA", + "TGA TGCA", + "TAA TTTTTT", + "GTT TGAGA", + "GTATT AAA", + "GCC CCCA", + "TATA GTA", + "TA GTAAA", + "TGA TACA", + "GTG GTTTT", + "CCA CTAA", + "CACA GAGA", + "CCTCTG CCTCC", + "CAA AAAAAA", + "CTC TCTCC", + "CA TAATA", + "GAA GCCA", + "GTT CCCA", + "TGTG TTTG", + "CAA TGGA", + "TGAA GTA", + "CTT CATA", + "CA CTGTG", + "GC TCTTTT", + "TGA CATA", + "TAAA GAAAA", + "GAGAAA TG", + "CAGG GAGG", + "TGTT CAA", + "GA GCCAA", + "GACA GAGA", + "GG CTGAA", + "CAAA TATA", + "GTG GAAAA", + "TAA GGTT", + "GTGA TTA", + "GGA TCTG", + "GATG TTA", + "GACTA CACA", + "TCC TATA", + "CTG CCAA", + "TCC CGA", + "GTGA TTTT", + "GC GTTTT", + "CAGA GTA", + "GAAA GGAA", + "CA CTTTG", + "CCCC AAAA", + "GCAA CCCA", + "TGCA TTTT", + "TCTA GAA", + "TA CTTTG", + "TGA GGCA", + "CA TCTCC", + "TC GCTA", + "TGA CTTTT", + "GA GCCTG", + "CATT TGTT", + "TCTT TGTT", + "GCAAAA TT", + "CC TGATT", + "GA TAAAAA", + "GA GTGTT", + "TCC TGTA", + "TACA GAAA", + "TC CAGGAA", + "GCCA GTG", + "TAGA TTTT", + "TAA TAGG", + "CTCC TCA", + "CATTTT TG", + "CATT TCAA", + "GCCA TCA", + "TAAAA TATA", + "GA CTGTT", + "GCA TGGA", + "CAAA GTT", + "CA TGATT", + "GA GTTTG", + "CTA GCAA", + "CTT CCTA", + "GG GGAGG", + "CTA TATG", + "TATT TATTTT", + "CA CCATT", + "CC CTCAA", + "TTTTTTTT TTTTTT", + "GA TCATT", + "GTA CATA", + "CTC CATA", + "CCCC GTCTCTA", + "GCC TGCC", + "CTA GCTT", + "CC CGGA", + "GATG TTTT", + "GTA TTTTA", + "TCA GATA", + "CCTG GAA", + "TATT CCA", + "GGA CCAA", + "GCCA TTA", + "CGA CTGA", + "TAA GCTG", + "TAAA CACA", + "GTT TCTC", + "CA TCTTA", + "GAAA TTTG", + "TAA TGGG", + "TAAAA TTTT", + "CTG TTCA", + "CCTG TTA", + "TA CTGAA", + "TGA CCCA", + "TGA TTTTA", + "CTCC TTA", + "TATA GAA", + "CTG CGG", + "GC GGTA", + "GTG CTAA", + "CAGA GGAA", + "TACA TCA", + "TCAA TCAA", + "CTG CAGCC", + "TGAA TATT", + "TCTA CAA", + "CCA CATA", + "CC CGTT", + "TATA CACA", + "TCC TCTC", + "TCTA CTT", + "CC GGAA", + "CTTTT TTA", + "GAAA GAAAA", + "CTA TCTT", + "GA CTTTG", + "TGAA CAA", + "GCA GTTTT", + "GC TAAAAA", + "GAGG CGG", + "TAA TAAAAA", + "CTG GTCA", + "CAGA CAA", + "GGA TATG", + "TGAA GG", + "GCCA GAA", + "CCA GGCC", + "CCA CCATG", + "CAAA CTT", + "TCA TGTA", + "GCTG CTT", + "GTAA TA", + "CCCC CAA", + "CA GCCTG", + "TCAA CTT", + "TAAAA TTAA", + "GCTG AAAA", + "CGA CGA", + "GTG GGCA", + "TGA GGGA", + "CGC TCC", + "TTTT GTTTT", + "GA GTCAA", + "TCA TGCA", + "CTG CTTA", + "TAA GTTTT", + "GTA GCAA", + "CCTT GG", + "TGA CAAAA", + "CTG GTAA", + "TCTT TATA", + "TGTG TGTT", + "CTG GTC", + "CTG GCAA", + "CATT TCTG", + "CTC TACC", + "CTGA GGA", + "CTAAAA TG", + "CTA GATT", + "GTA TCAA", + "CA GTCAA", + "CTG GGTG", + "CC TCTTA", + "TGA GTTTT", + "TTTTA TTTA", + "CC TTTTTT", + "TATA TACA", + "TA GCAAA", + "AAA TTA", + "CTG GATG", + "GA TAATA", + "GA CAAAAA", + "CCTG GGA", + "GCTT TCA", + "GTA CAGG", + "GCTG GAA", + "CTA CTCA", + "CAA TGTA", + "GC GTGAA", + "GA TCCTT", + "TATTAA TG", + "GCC CGA", + "TAAA GTG", + "GCTT CCA", + "CATG GAA", + "TGAA GTT", + "CTT TCTC", + "TCTGTG TG", + "GTA TGTA", + "CAA TACA", + "TCAA GG", + "CC TCTAA", + "TGTG GG", + "GA TCTGA", + "GTA CTGA", + "TTAA TTAA", + "GCA GAAAA", + "CTA CATA", + "CC GGTG", + "GGGG AAAA", + "TACAA AAAA", + "TTTT GG", + "GTGA GAA", + "TCAA TAAA", + "TCAA GTT", + "CTCA GGA", + "CTA CTC", + "CAAA TCA", + "GGCA GAA", + "CC CGAA", + "TGTT GTG", + "GAGC AAAA", + "TATT TGTG", + "GTA GGTT", + "CTA CCTG", + "CA CAAAAA", + "CTCA GG", + "GCTT TA", + "CAGA GCAA", + "CTCA GTG", + "GGAA GAGA", + "TAA CCTG", + "GAAA TATA", + "CGA GAA", + "GTGA GG", + "CATT TATA", + "GGCA GCA", + "TC TAAATT", + "CCCA GTG", + "GCC TAGG", + "TGCA TTA", + "CC GTAA", + "CATT CCA", + "CTA GTTA", + "GA CTTAA", + "CTA TACA", + "GACA CAA", + "TCTT CACA", + "CC GGTT", + "TAAA GTAA", + "CTG TGGA", + "TAA GGTG", + "TCCA GTA", + "CAAA TTTA", + "AAATT AAAA", + "CCA TCTA", + "CTCC CTT", + "CTCC TTTT", + "GAGAGAGA GAGA", + "GGA GATA", + "CCTA TTA", + "CACC AAAA", + "CC GTTA", + "TGTT TATA", + "CTCA GGAGG", + "GA CGTA", + "GTCC TTA", + "GAAA GTT", + "GCTG GTG", + "CTC TACA", + "CAA TAGA", + "TAAAA TATT", + "GTA CCTG", + "GTA CTAA", + "CTT TGAAA", + "CCTT TCC", + "TAAAAA TTA", + "CTC GG", + "CAA GATA", + "CATT TGA", + "CACC TCA", + "GCCA GCC", + "GTC GG", + "GCA CATA", + "CA CTCAA", + "CTTTT AAAA", + "CAGGAA TT", + "GCC TATT", + "TCTT TCTG", + "CTGAGGCA GGAGAA", + "CAGG CAGG", + "CTA GTAA", + "TCCA TA", + "GAA CTTA", + "C G", + "GCTG TGA", + "GAAAA TA", + "TCTT CATT", + "GAGG GAGA", + "CCCA TCC", + "GAGG TGGG", + "GCC TCTA", + "GTA GGTG", + "TAAA CCA", + "GAA GGAAA", + "TATT GG", + "A TG", + "TCCA GTT", + "CCCA CAA", + "GAAA CACA", + "GTC TCAAAA", + "CTTTT CTTTT", + "TGAA GGA", + "TATT GATT", + "CTA TGTA", + "AAAAAAAA AAAAAA", + "TCCTT AAA", + "GC GCTA", + "TCCA CTT", + "GA CTCAA", + "TAAA TACA", + "TCA TGGA", + "TCTG GGA", + "TCC TATG", + "CTG TGCA", + "TCAA GTGA", + "TCA TAAAA", + "CA TCCAA", + "CCTT CCA", + "CTG TACA", + "GAA GGTT", + "CTG TGTA", + "GTCA CTT", + "TCA CAAAA", + "TCA GGCA", + "GTGTT AAA", + "CC CTTAA", + "CAAA GTG", + "GAAA TGTT", + "CTG GGGA", + "GA CGCC", + "TATA TGTG", + "CTA GATG", + "GAAATT AAA", + "GAA TGCA", + "GCA CTAA", + "CGG GAGG", + "GCCA CAA", + "CGC TTA", + "TCCA CAA", + "CAGA TA", + "TC TGAATT", + "TATTA TTTT", + "GC GCGG", + "CTC TGAAA", + "TCTCTT TG", + "TATT TCTA", + "GGGG TGGG", + "GGA TGCA", + "CCA CACC", + "TAAA TGTG", + "TCTT CCTG", + "GCAA GG", + "CTG CTCC", + "CTG GAGTG", + "CTGTT AAA", + "CACA CAAA", + "CTGA CTT", + "GAAAA GAAAA", + "CCTT CTCC", + "GAAA TAAAA", + "CCTCA GGTGA", + "GA TAATG", + "GAATT GCTT", + "CCAAAA TT", + "CGTG AAA", + "CACTG AAA", + "CAGTG AAA", + "GA TCTTA", + "GAGA TGGG", + "TCTG CCA", + "TGA GGTA", + "TATG GAA", + "TATA TTTTA", + "TGAA CTT", + "GCA GATA", + "CTTTT CTT", + "GTAAAA TG", + "TCTC TAA", + "TCTG CAAA", + "GA GCCTT", + "TA TCATT", + "CAA TTTTA", + "CC GCCA", + "TATT TAAAA", + "GAGA GATG", + "GAGA TGGA", + "GCCA GGATG", + "CGA GTAGCTG", + "TTCA TTTT", + "TATA CTT", + "GTC TACA", + "GTGA GTGA", + "GCTA CACA", + "GGGA GGA", + "CAA GGCA", + "GC TTTTAA", + "CA CTATT", + "GTT CATA", + "TCC TC", + "GTG GACA", + "TATT TGGA", + "CTC CAGTA", + "GTT CAGTT", + "CCAA GG", + "CAGA GCC", + "CTC GCC", + "CC GATG", + "GGAA TTTT", + "TCCA GCC", + "CC TCTTTT", + "GAA CCTT", + "CATG CACA", + "GTT TC", + "GAA GATA", + "TA CCCC", + "GCTG CCA", + "GGGG GAGG", + "GCAGTGA GCTGA", + "CTG TCTA", + "CGA GGA", + "CAA TGGG", + "GC TGTGAA", + "GAAA GTG", + "TACC AAAA", + "GTCA GG", + "CAGC TCC", + "TGTG CTT", + "GTC TAGG", + "TTTT TGTA", + "TTA TATG", + "TCA GGGG", + "TATT GTTA", + "CC TGAGA", + "TA TCTCA", + "CAA TCTG", + "CA CTCTG", + "GATT TAA", + "TGAA TAA", + "TCTT GTA", + "TCAA CTG", + "TCTC CAGG", + "CTA GAGG", + "CTGA GAAA", + "CTA GCTG", + "TCCA CCA", + "CGA TTTT", + "CC GGCC", + "GTT GACA", + "CTTA GAA", + "CA TAATG", + "GA GTATT", + "CACA GAAA", + "GA CTGTG", + "CTA TTTTA", + "TGA GGAAA", + "TTATT AAAA", + "CTTA TTTA", + "CAGA CTT", + "CA CGCC", + "GCTT GG", + "CCTG CTT", + "TAAA GCAA", + "CCTC GTGA", + "TA GAATT", + "CTTA CAA", + "TAAA GGAA", + "GTC TAGA", + "GTGA CTT", + "TACA TATG", + "GTCA GGA", + "GCTC CAGG", + "GAA GGGA", + "CA TGATG", + "TCA TCAAA", + "CGTT AAA", + "GTA CTCA", + "CTCC CAA", + "TATA TGTA", + "GGTA TTTT", + "TAA GCCA", + "C GAAATT", + "GTTTG TTTT", + "TCTG TCTT", + "TATA TCA", + "TGTT CATT", + "CAAA CCA", + "TTCA TTA", + "TATT TGTA", + "GATT GAA", + "CTA TAAAA", + "GATTAA TT", + "CCCA CCA", + "TCC TAGG", + "TAAA TGTA", + "CTCTT AAA", + "GCA GTCC", + "GC GGCTG", + "GTC TCGAA", + "TGAA TGA", + "CTG GGGG", + "GTC TCGA", + "GAA CAAAA", + "TGAA TCA", + "TGTATTTT TAGTAGAGA", + "GTTA TTAA", + "TTTTTT AAAA", + "GTCA GTG", + "CCCA TTA", + "CACA GGA", + "TATT CCTT", + "TCTG CCTT", + "CCTG GTG", + "GC GAGC", + "TA CTAAA", + "TACA CAAA", + "CC GTCC", + "GCTT TGTT", + "GCA TCCA", + "CA TCTAA", + "GC TGTGTT", + "GTA GACA", + "GCC TATG", + "TCTT TGTG", + "GATT CTG", + "CGCC CGG", + "GA TGAGA", + "TA TCTGA", + "TGAA TTTG", + "CC TGATG", + "TAAAA CAA", + "CTT TAGG", + "TTTT CCTT", + "TGAA TAAA", + "CGG GGA", + "CAAA CATT", + "GTA TGGA", + "GCTT AAAA", + "TA CCAAA", + "CAAA GAGA", + "CTCC TGCC", + "GTAAAA AAA", + "CACA GCC", + "CCA TGCA", + "TA CAATT", + "CTA GTGA", + "CTGA GTT", + "GAGTG AAA", + "TCTGTT TG", + "CTG TAGG", + "TATAA AAAA", + "GCATT AAA", + "GTC CATA", + "TGTTAA AAA", + "TGTT TGA", + "GAA TAGA", + "CTT CAAAA", + "CTG GACA", + "CTG TAGA", + "CCATT AAA", + "CTA TCTG", + "CACTA TG", + "TTA TCAA", + "TAA GTAAA", + "TAATCCCAGCACTTTG GGAGGCC", + "CCA GAAAA", + "TGAA GCA", + "TCC CTTTT", + "TCA TACA", + "TA CGTT", + "GCC GTG", + "GGAA GTG", + "GG CCAAA", + "GTA CCAA", + "TCTCTA CTAAAAATA", + "CATT GTG", + "TGTG TGA", + "GAAA CAGA", + "CTT GACA", + "GA TGAGG", + "GAGA TTTT", + "CCTT CAA", + "GAA TCTA", + "CTC TCCTT", + "GG CGGA", + "TCTATCTA TCTATCTA", + "CACA CAGA", + "TGTG TGTA", + "CAAA GCC", + "TGTG CCA", + "GTT GAAAA", + "CTC CAGCA", + "TCAA GGA", + "TA GCTCA", + "CGC TGA", + "CCTG AAAA", + "GA CTATT", + "GATT CCA", + "GCTT CTA", + "GTC TGCC", + "CTT GGCA", + "TGTG GTA", + "GCTT TGA", + "GCTC TCTG", + "CTCA CAGA", + "TCTT TAAA", + "CAAA GCAA", + "TA CTTAA", + "GCTT CAA", + "CATT GAA", + "GGA GGAAA", + "CTA TAGA", + "CTGA GGAA", + "CCTG GCA", + "CC CTATT", + "CTC GTG", + "TTA CACA", + "TTA GGAA", + "CTG GTTA", + "GTT GTCC", + "TAATG AAAA", + "TATT TACA", + "GG GAATT", + "GTA GTTTT", + "GCTG CAA", + "CTA CGG", + "GCC GGA", + "CTG GGCA", + "CCTT AAAA", + "GATG GAA", + "TAGATAGA TAGATAGA", + "TATG TAA", + "GTA CGG", + "TATT CAAA", + "GA TCTCC", + "CCTG TTTT", + "TATT GCA", + "GGAAGGAA GGAAGGAA", + "GG TAATT", + "TTA CAGA", + "TCA GC", + "GCAAAA TG", + "GAGA GCA", + "GTA GAAAA", + "CATT TGAA", + "TCTT CTTTT", + "TCC CATA", + "GTTA TTTA", + "CTA TCTA", + "CA TCCTG", + "TCTT GTG", + "TTA TTATT", + "CC CGTC", + "TACTA TG", + "TAAA CATA", + "TAA GGAAA", + "GCTT GTG", + "CTC TAAAA", + "GTTTT AAAA", + "GACA GGA", + "TCC TAGA", + "TCCA CCCA", + "GTT TGAAA", + "CCA TCTCA", + "CTAA GAA", + "GTA TCTA", + "GTGA GGA", + "GCTG GAGG", + "CCTGTAA TCCCAGCTA", + "GCAA CAA", + "CTT TCAAA", + "CAAA TGTT", + "CTT GTCC", + "TCTCAA AAA", + "TATT TATTA", + "TAA GGCA", + "GAGA GGAA", + "TA TGATT", + "GCA TCTA", + "C GTTATT", + "GCC TGTA", + "GTT TCAAA", + "CCTTCCTT CCTTCCTT", + "GG CTTTG", + "GTCA GAA", + "CATG CATG", + "GTCA TTTA", + "CTG GAAAA", + "CTT CGA", + "CCTA TTTT", + "CCAA CAA", + "TCCA TCC", + "TAAA GTTA", + "GTC TCTC", + "TAA TCAAA", + "GATTTT TG", + "GATT TCTT", + "GG GCTGA", + "GCA TGTA", + "CCTG GGTT", + "GAGA CAA", + "GCTG TCA", + "TGA TAGG", + "GGA GACC", + "CC GGCA", + "TAA TCTCA", + "TGAA TTAA", + "TCTG GTG", + "GCC TC", + "GG CGCA", + "CCA GCTA", + "CA GTCTG", + "TGAA CTA", + "GTAA GAA", + "CCTT TCA", + "TCCA TGA", + "CAAA GGAA", + "CTC TC", + "CTC TCTCA", + "CTC CAGC", + "GTA GATA", + "CCCC CTCC", + "GG CGCC", + "TCTG TCC", + "GA CCATT", + "CTT GAAAA", + "TTA TCC", + "TACA TGTG", + "CAAA TTTG", + "TTTT GTG", + "CAGA GTG", + "GTAA TAA", + "GTGA GTG", + "TTTT TCC", + "GG CTCTG", + "GCC CTAA", + "GG CTGTT", + "CC CAATT", + "CAGA GCTT", + "TATAAA TG", + "GA GTCTG", + "TCTTAA AAA", + "GTTTTA TG", + "GA TCCAA", + "GGCC CTG", + "GA TCCTG", + "TCAA GTG", + "GATT CAA", + "CCTC TCTT", + "GAGA CGG", + "CAGA TCA", + "TAAAA GAA", + "CTGA GCAA", + "CCTG CCA", + "CCTT CTA", + "CGC TCA", + "GG CTGTG", + "TGGG AAAA", + "GGA GCCTG", + "CTGA GTG", + "CGTC AAA", + "TCAA GTA", + "CGTAA TT", + "TTA CTTA", + "TATA CTA", + "GG GCAAA", + "CAA CTTTT", + "CTT TGCC", + "GC CAGGAA", + "CACA CTA", + "GCC CAGC", + "TAAATAAA TAAATAAA", + "CTT TCCTT", + "GGGA GAA", + "TATG GTA", + "CGG CCA", + "CCTC TCTG", + "GAAA GCAA", + "CAA GCCA", + "GG CGTT", + "CTC TTTTA", + "TCGGCC TCCCAAA", + "GATT TATT", + "CAA GTCC", + "TA TCTTA", + "GTTCAA GACCA", + "CTCA CACA", + "GAAA TCAA", + "TGA GACC", + "GG GTAAA", + "GCTT GTT", + "GA TTTTAA", + "TTTT TATA", + "CAGA GCTG", + "TC TGTTAA", + "GTAA TTAA", + "TCTT TGAA", + "CTT GCCA", + "TTTT CATT", + "CCA TGTA", + "TCTC GGCTCACTGCAA", + "GGA TTCA", + "TC TATTAA", + "TACA TAAA", + "GATT GATT", + "GGA GAGGA", + "CGC AAAA", + "GGA CTAA", + "TTA TGTG", + "GTCA CTCA", + "GACA GCA", + "CGA GTT", + "GATG GTT", + "GGAA GAGG", + "GCCAA CATGGTGAAA", + "GGA GCCA", + "TGAA CTG", + "CCTC TGTG", + "GTA TAAAA", + "TCC CAGAA", + "CATT TATG", + "GA TTATG", + "TGTT TCTG", + "GAGTG GGTT", + "TACA TATT", + "CTC CAGGA", + "GACA CTG", + "GG TCTCA", + "CC GGGA", + "TGTT TAAA", + "CTCA CCA", + "GGA CTTA", + "GCC CACC", + "CAAA TCAA", + "GAAA TGTG", + "TA GTTAA", + "TCTA TAA", + "TTA GATT", + "GTG TAGG", + "TACTG AAA", + "GCA CCCA", + "GTG GGCTG", + "GAA TGAAA", + "TCTA GTT", + "TCA GGAGA", + "TCCA CTA", + "CTCA GTT", + "TACTT AAA", + "GA CTCCA", + "TCCATT TG", + "CACA GCAA", + "GCTCATG CCTG", + "GGTG CTG", + "GCTT TCTT", + "GTG GCCA", + "TA CGTG", + "GTG CAGTG", + "TGAA GTCA", + "CCTT TAA", + "TCTCAGCTCA CTGCAA", + "GAAA TATG", + "CC TCAAAA", + "GGGG CGG", + "CGA CAA", + "GG TGATG", + "GTCTT AAA", + "CAGAAA TG", + "CGTCA TT", + "CCAA GCA", + "GGA TCAA", + "GTGCTG GGATTA", + "GCTG GCC", + "CGGA GCTT", + "TACA TGA", + "TGTT TGAA", + "TCTC CATT", + "TAA GCAAA", + "CCTT TCTT", + "TA CTGTT", + "TCCA TCTT", + "CTTA CTT", + "CGGA GGTT", + "CAAAA CAA", + "TCA TAGG", + "TTA CTAA", + "CTTA TTTG", + "GAA TGTA", + "CCCCA TGGA", + "TTA CTGA", + "CGG AAAA", + "CTC CAGTG", + "TGTT CCA", + "CAGA TGAA", + "GTT GATA", + "TCC CCCC", + "CATT GCA", + "CTCA GCC", + "CTTA CTG", + "TA TCCTT", + "CTTTTA TG", + "TGAGTA GCTG", + "GACTG AAA", + "CAA TGAAA", + "CGA CTG", + "CTT GGGA", + "GCAA GCA", + "TCA CTCC", + "GATT TGA", + "CATTTT AAA", + "TCAA CTA", + "GTCC AAAA", + "CACC CTG", + "TTA CCTT", + "CAA GGGG", + "TTTT GGA", + "GTTA TTTG", + "GCTA CTG", + "CTGAGGCAGGA GAATG", + "GTGA TGA", + "GTA GTC", + "TAGTA TG", + "GTA TAGA", + "GTG TCTA", + "GCTG CTA", + "TTA GTAA", + "TAAA CATG", + "GTCA CCA", + "CA TCTTTT", + "CATA TAA", + "TCTC TCTA", + "TTTTA TTAA", + "TATT CTAA", + "GAAA TTTA", + "CTT CCCTG", + "TAAA GATG", + "TA CGTA", + "GTT TATTA", + "GAAAA GAA", + "CCCA CCCA", + "CAATT AAAA", + "CC GACA", + "CAAA GTGA", + "CAAA CAAAA", + "GCAA TTTT", + "CGATT AA", + "TTA GAGA", + "CTGA TGA", + "GGA GGAGG", + "GTCC TGGG", + "TCA TGAAA", + "GCAA CCA", + "GTT GGCA", + "GCGG CGG", + "GTCC CCA", + "GTA GGGG", + "GCCA TGTT", + "GTT CGAGA", + "GCC TATA", + "TAAA TTCA", + "GG CCATT", + "GAAAA CAA", + "TGTG TATG", + "GTA CTC", + "TAGG GAA", + "CCTT GAA", + "TC TATTTG", + "GAGG GCA", + "GAAA CTGA", + "TA CGC", + "TA CAAAAA", + "TCA TTATT", + "GGAAAA TT", + "TCAA TATT", + "CC CGTA", + "GGA GAGAA", + "TTA GTTA", + "CTCA GAGA", + "TC GAGC", + "CTA GTCA", + "GATG GCA", + "TGAA CATT", + "CTA TGGG", + "CACA CCA", + "TCAA TTAA", + "GGAA CTG", + "TTA CATG", + "CTT TCATT", + "CAGC TCTG", + "TCTTTT TTTT", + "TAAA TCTT", + "TGA TCTA", + "CATA CAA", + "GC TCAAAA", + "GC TGTGTG", + "TCAA TCA", + "GATT TGAA", + "CCAA GGA", + "GTCC TCA", + "GTG CTCC", + "AAAA TAA", + "GTGA CAA", + "GCTCA CGCCTG", + "CGA CGG", + "TA TCCAA", + "CACA CATG", + "TCTC TCTCC", + "TGTG GTT", + "CTT GGTA", + "TCTG GTT", + "TTTA TAA", + "CTG CTTTT", + "TGTG TCA", + "CACA TCA", + "CC TAATG", + "C GTTTTTT", + "GCTG GCA", + "GA CGTC", + "TATAA TTA", + "TACA GTAA", + "GAAA GTAA", + "GTC TGAAA", + "CCCA TTTT", + "TATA TGA", + "CTT GATA", + "CTT TATTTT", + "CTT TATTA", + "GG CGAA", + "CCA TGCC", + "CCTG CCTT", + "GAAGAA GAAGAA", + "CTGA CTGA", + "GCC CTTA", + "TA TCTAA", + "GTG TTTTA", + "TGTG GCA", + "TATT GTAA", + "GCCA GAAA", + "CCCTG TCTC", + "CACA GGAA", + "AAAA CAA", + "AAAAAAAA AAAAAAA", + "TAA CTCC", + "GCC TAAA", + "CGA GTA", + "TA GTATT", + "GTATTTT TAGTAGAGA", + "GCTG CAGG", + "TATT GAAA", + "CCAGCC TGGG", + "GCTCC AAA", + "TA CGAA", + "GGCC TCC", + "TATA CAAA", + "CATG GCA", + "CATG CAA", + "TACA CCA", + "CTT TACCA", + "TACA GAGA", + "TATT CTTA", + "TATG TCA", + "TCAA GCA", + "TCAA TGA", + "GG CTCTT", + "GGAA GTT", + "TCCA TGTT", + "GCTT TCC", + "TATG TGA", + "GTG TAGA", + "TTTT TAAAA", + "GCTG GAGA", + "GTGA GAGA", + "CCTA GAA", + "CCTCC AAA", + "CCAA TGA", + "CAGG GCA", + "CTA TGCA", + "CTT CACC", + "CTA CAAAA", + "CTCA CC", + "GAGTA TG", + "TA GAAAAA", + "CTTTT GAA", + "TAAA GAGA", + "CATG TCA", + "TCTTTT AAA", + "CACA GTGA", + "GA TCTAA", + "TAA GGTA", + "CATA GAA", + "CGC GCC", + "CAGC TTA", + "TATA GTT", + "CGG GCC", + "TATC CATT", + "TGTTTG TTTT", + "GCTG GCTG", + "TACA GGA", + "CTCC TTTG", + "CAA TCTA", + "CCCC CTG", + "TATA CTG", + "CTGA GCC", + "CGG TTA", + "TGAA GTG", + "GCTT CCTT", + "TTTTA TTTG", + "TA GTGAA", + "CTGA GGTG", + "TCTT CTC", + "GACA GAAA", + "CTGAA CTGAA", + "CCTG GGAA", + "TCC CCAAA", + "TATG TATT", + "GATT TCTG", + "CATT CAAA", + "CACA GTT", + "GCTT GAA", + "GTG GATCA", + "CTGA GTGA", + "TGAA TTTA", + "TCAA CAAA", + "GG TCATT", + "GTAA TTTA", + "GC GACTT", + "CTGA GAGA", + "GTG CCCA", + "CTA GGTT", + "TCC TGAAA", + "GTC CACC", + "TCA CAGAA", + "GC GAAAA", + "GTA TGGG", + "TGAA CAAA", + "TAAA CAAAA", + "CC GTTTT", + "TC TCAATT", + "TCCA GAAA", + "GTAA CAA", + "GCA TTTTA", + "TCTC CATG", + "TTA TAAAA", + "CAGG CAA", + "CTAAAA AAA", + "GTT GGGA", + "TAAA GATT", + "TGAA GAGA", + "CCCC TCA", + "TGTT TATG", + "TCTA CTG", + "CCAA TTTT", + "GGTG GTG", + "GGAA CAA", + "TGTG GGA", + "TCTG CTA", + "GAA CGA", + "GTAA GTA", + "GTT GCCA", + "AAAA TTTT", + "GC GCGA", + "GAAA GATG", + "GTC TCTCA", + "TCCA TCAA", + "GCA GCTA", + "CACA TTTG", + "CTGA CAA", + "TCCA CC", + "GC T", + "CCCA CTT", + "GCA GGTA", + "GAGG CCA", + "TAAA GTCA", + "CTG GATA", + "CGG CAA" + ] + } +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/tokenizer_config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..12cee777f1285b52e37dffd583040cdba7f5a0d3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/tokenizer_config.json @@ -0,0 +1,56 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "cache_dir": null, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "model_max_length": 512, + "pad_token": "[PAD]", + "padding_side": "right", + "sep_token": "[SEP]", + "tokenizer_class": "PreTrainedTokenizerFast", + "trust_remote_code": true, + "unk_token": "[UNK]", + "use_fast": true +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/trainer_state.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5c31d36c1eb0dccae625dff8075238176e2d9d56 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/trainer_state.json @@ -0,0 +1,71 @@ +{ + "best_metric": 0.732448866777225, + "best_model_checkpoint": "genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31", + "epoch": 4.0, + "eval_steps": 100, + "global_step": 31, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.9, + "eval_accuracy": 0.5785123966942148, + "eval_f1": 0.5727933541017654, + "eval_loss": 0.6725832223892212, + "eval_matthews_correlation": 0.15919997829120064, + "eval_precision": 0.5816561242093157, + "eval_recall": 0.5775956284153005, + "eval_runtime": 0.4097, + "eval_samples_per_second": 295.354, + "eval_steps_per_second": 9.764, + "step": 7 + }, + { + "epoch": 1.94, + "eval_accuracy": 0.6859504132231405, + "eval_f1": 0.6857572443958447, + "eval_loss": 0.5833003520965576, + "eval_matthews_correlation": 0.3718896520690588, + "eval_precision": 0.6860974274767379, + "eval_recall": 0.6857923497267759, + "eval_runtime": 0.4058, + "eval_samples_per_second": 298.171, + "eval_steps_per_second": 9.857, + "step": 15 + }, + { + "epoch": 2.97, + "eval_accuracy": 0.6611570247933884, + "eval_f1": 0.6565593631014192, + "eval_loss": 0.7039547562599182, + "eval_matthews_correlation": 0.328770377703483, + "eval_precision": 0.6686313973548016, + "eval_recall": 0.6602459016393443, + "eval_runtime": 0.4059, + "eval_samples_per_second": 298.079, + "eval_steps_per_second": 9.854, + "step": 23 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7355371900826446, + "eval_f1": 0.732448866777225, + "eval_loss": 0.603266716003418, + "eval_matthews_correlation": 0.48516753947547236, + "eval_precision": 0.7488499137435307, + "eval_recall": 0.7364754098360655, + "eval_runtime": 0.41, + "eval_samples_per_second": 295.158, + "eval_steps_per_second": 9.757, + "step": 31 + } + ], + "logging_steps": 100, + "max_steps": 56, + "num_train_epochs": 8, + "save_steps": 100, + "total_flos": 1018766006353920.0, + "trial_name": null, + "trial_params": null +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/training_args.bin b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e29c32623a3ee8be857205fb3fedab9a0a8fa84 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f1d73872a5edc6087d5a86d94ab9dfb19e4292bbdef9e58372cec8c98799f6 +size 5457 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/results/base5120_dummy_mouse_enhancers_ensembl_lr3e-5_wd0.01_wr0.10_ep8_seed42/eval_results.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/results/base5120_dummy_mouse_enhancers_ensembl_lr3e-5_wd0.01_wr0.10_ep8_seed42/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b589a9b08d17839f1037878bc60557cad6cc66ea --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/results/base5120_dummy_mouse_enhancers_ensembl_lr3e-5_wd0.01_wr0.10_ep8_seed42/eval_results.json @@ -0,0 +1 @@ +{"eval_loss": 0.6152443289756775, "eval_accuracy": 0.71900826446281, "eval_f1": 0.7180646929824561, "eval_matthews_correlation": 0.48551407172989086, "eval_precision": 0.7481159420289856, "eval_recall": 0.737513873473918, "eval_runtime": 0.4084, "eval_samples_per_second": 296.277, "eval_steps_per_second": 9.794, "epoch": 7.23} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/config.json new file mode 100644 index 0000000000000000000000000000000000000000..45e4c6c10a6211acf374c78e8078ab7ac74985f9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.35.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 4096 +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/model.safetensors b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74bdb123b52425c5183512a812d9e37da4fa89bc --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:844e1101b26d9d078be054c5678c03308ef4982586ca627688e21be5d1612d95 +size 356777880 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/optimizer.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8adff55dfce2ee69950a98f0861b9c30a664eb9b --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d877593affb440e7d5826d3ff99f5af58cdd539d448f100d1190804a257b84 +size 713677451 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/rng_state.pth b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fb6cc371e7a7631048957a98c24174179a6a8374 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee830a19268175c63a22b6420ef78992c05e141b1d0e2ed9be9e4b5c5b32ec76 +size 14645 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/scheduler.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..839628a1086282fb5e6954266904781d32586e7b --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f91e474d2a3e61e8210efce18190e4acfbbfa51f3fff243221d81a4b9e7b072f +size 1465 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/special_tokens_map.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8b3208c2884c4efb86e49300fdd3dc877220cdf --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "unk_token": "[UNK]" +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/tokenizer.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c390760bdfa97b696a762628a15dd3bf7932038a --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/tokenizer.json @@ -0,0 +1,8340 @@ +{ + "version": "1.0", + "truncation": { + "direction": "Right", + "max_length": 100, + "strategy": "LongestFirst", + "stride": 0 + }, + "padding": { + "strategy": "BatchLongest", + "direction": "Right", + "pad_to_multiple_of": null, + "pad_id": 3, + "pad_type_id": 0, + "pad_token": "[PAD]" + }, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 1 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 2 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "vocab": { + "[UNK]": 0, + "[CLS]": 1, + "[SEP]": 2, + "[PAD]": 3, + "[MASK]": 4, + "A": 5, + "C": 6, + "G": 7, + "T": 8, + "AA": 9, + "TT": 10, + "TG": 11, + "CA": 12, + "CC": 13, + "TA": 14, + "GG": 15, + "TC": 16, + "GA": 17, + "AAA": 18, + "GC": 19, + "TAA": 20, + "TTTT": 21, + "TCA": 22, + "TGA": 23, + "TTA": 24, + "GAA": 25, + "TCC": 26, + "CAA": 27, + "CTG": 28, + "CTT": 29, + "GTG": 30, + "GTT": 31, + "GCA": 32, + "GGA": 33, + "CCA": 34, + "GTA": 35, + "GCC": 36, + "CTA": 37, + "TAAA": 38, + "AAAA": 39, + "CTC": 40, + "GTC": 41, + "TGTG": 42, + "TATT": 43, + "CACA": 44, + "GAAA": 45, + "TATA": 46, + "TCTT": 47, + "TGTT": 48, + "CAAA": 49, + "GAGA": 50, + "CATT": 51, + "TGAA": 52, + "CAGG": 53, + "TCTG": 54, + "CAGA": 55, + "TCAA": 56, + "GGAA": 57, + "TAAAA": 58, + "CTGA": 59, + "GCTT": 60, + "GTGA": 61, + "GCTG": 62, + "CTCA": 63, + "CCTT": 64, + "CATG": 65, + "GCAA": 66, + "GTCA": 67, + "GTAA": 68, + "TTTTA": 69, + "TATG": 70, + "GAGG": 71, + "CGG": 72, + "GATT": 73, + "CCTG": 74, + "TCTC": 75, + "CCAA": 76, + "GTTA": 77, + "CTCC": 78, + "CTAA": 79, + "TACA": 80, + "CTTA": 81, + "TCCA": 82, + "GATG": 83, + "TTAA": 84, + "GAAAA": 85, + "TTTG": 86, + "GTTTT": 87, + "TCTA": 88, + "GCCA": 89, + "GTCC": 90, + "CTTTT": 91, + "GGGG": 92, + "CGA": 93, + "TTTA": 94, + "CCCA": 95, + "CAAAA": 96, + "TGGG": 97, + "TAGA": 98, + "TAGG": 99, + "GACA": 100, + "GGTT": 101, + "CCCC": 102, + "GGTG": 103, + "CATA": 104, + "GCTA": 105, + "TGTA": 106, + "TCAAA": 107, + "TGGA": 108, + "TAATT": 109, + "TTATT": 110, + "TGCA": 111, + "GGCA": 112, + "GATA": 113, + "CCTA": 114, + "TTCA": 115, + "TCTCA": 116, + "GGGA": 117, + "CGC": 118, + "CTGAA": 119, + "GTAAA": 120, + "TCTCC": 121, + "TTTTTT": 122, + "CGTG": 123, + "GCAAA": 124, + "TAAAAA": 125, + "TCTGA": 126, + "TCATT": 127, + "GGAAA": 128, + "TGAAA": 129, + "TCCTT": 130, + "CCAAA": 131, + "GAATT": 132, + "CTAAA": 133, + "CGTT": 134, + "GTGAA": 135, + "GGCC": 136, + "TAATA": 137, + "GGTA": 138, + "TGCC": 139, + "CACC": 140, + "TGATT": 141, + "AAAAAA": 142, + "GCTCA": 143, + "TCCAA": 144, + "GAGAA": 145, + "CTGTT": 146, + "TATTA": 147, + "CAGCA": 148, + "CTCTT": 149, + "CTTAA": 150, + "CAGAA": 151, + "GCTGA": 152, + "GTTAA": 153, + "TCTTA": 154, + "TATTTT": 155, + "GCCAA": 156, + "CTTTG": 157, + "GACC": 158, + "CGCA": 159, + "GTATT": 160, + "GTCTT": 161, + "CAATT": 162, + "GTGTT": 163, + "CTCAA": 164, + "GGAGG": 165, + "CGAA": 166, + "TCTTTT": 167, + "GTCAA": 168, + "CGCC": 169, + "TATAA": 170, + "TACC": 171, + "TCTAA": 172, + "CCATT": 173, + "CGGA": 174, + "CAAAAA": 175, + "CAGTG": 176, + "TCCTG": 177, + "CTCTG": 178, + "GAAAAA": 179, + "CTGTG": 180, + "CAGC": 181, + "TTTTAA": 182, + "GCATT": 183, + "GCCTT": 184, + "TAATG": 185, + "CTATT": 186, + "GTTTG": 187, + "TGATG": 188, + "GGCTG": 189, + "CCTCA": 190, + "GAGGA": 191, + "GCCTG": 192, + "AAATT": 193, + "CGTA": 194, + "TCAAAA": 195, + "TACAA": 196, + "CATCA": 197, + "CAGTT": 198, + "TGAGA": 199, + "GGGAA": 200, + "CACTG": 201, + "CACAA": 202, + "CAGGA": 203, + "CCCCA": 204, + "CCCTG": 205, + "TTTTTTTT": 206, + "TAGAA": 207, + "GAGCA": 208, + "CCTCC": 209, + "CACCA": 210, + "TATCA": 211, + "GAGC": 212, + "CATTA": 213, + "CACACACA": 214, + "GAGTG": 215, + "GGATT": 216, + "TGTGTGTG": 217, + "TACTT": 218, + "CACTT": 219, + "GTCTG": 220, + "TGAGG": 221, + "GAGTT": 222, + "GAATG": 223, + "TCATG": 224, + "GACAA": 225, + "GACTT": 226, + "TATTAA": 227, + "TAATAA": 228, + "GGCCA": 229, + "CATTTT": 230, + "CAGCC": 231, + "CCCTT": 232, + "GCTAA": 233, + "TATATATA": 234, + "GTGTG": 235, + "TACTG": 236, + "TAGTT": 237, + "CAATG": 238, + "GCTC": 239, + "CAGTA": 240, + "GCTCC": 241, + "CATAA": 242, + "TTATG": 243, + "TAAATT": 244, + "GATGA": 245, + "CATGA": 246, + "GCGG": 247, + "AAAAAAAA": 248, + "CCATG": 249, + "GATAA": 250, + "GACTG": 251, + "TATGA": 252, + "GCAGG": 253, + "GATCA": 254, + "GTTTTA": 255, + "GGATG": 256, + "CCTGA": 257, + "GTAAAA": 258, + "GAAGG": 259, + "GATTA": 260, + "CCTC": 261, + "GACCA": 262, + "GCTTA": 263, + "CCCAA": 264, + "AAATG": 265, + "GCATG": 266, + "TAGTA": 267, + "TACCA": 268, + "GGCTT": 269, + "CGTC": 270, + "TCTCTT": 271, + "GGTCA": 272, + "TTATTA": 273, + "TACTA": 274, + "TAGCA": 275, + "TATC": 276, + "CTGGG": 277, + "CATC": 278, + "CTTTTA": 279, + "CTAAAA": 280, + "GTGGG": 281, + "GAGTA": 282, + "CCAGG": 283, + "GATTTT": 284, + "TAGTG": 285, + "GAAATT": 286, + "CACTA": 287, + "TCGG": 288, + "TCAGG": 289, + "CAGGAA": 290, + "GCAAAA": 291, + "CCTTA": 292, + "CATCC": 293, + "CTTGG": 294, + "TGTGAA": 295, + "TATTTG": 296, + "CCTAA": 297, + "CTATG": 298, + "GAGAAA": 299, + "GAGAGAGA": 300, + "GCTTTT": 301, + "TATAAA": 302, + "CAAGG": 303, + "TCTCTG": 304, + "TGTTAA": 305, + "TGTGTT": 306, + "GAGCC": 307, + "GACTA": 308, + "TATATT": 309, + "TAAAAAA": 310, + "TTTTTG": 311, + "GTATG": 312, + "CATTAA": 313, + "TAGGA": 314, + "TAGC": 315, + "GTTGG": 316, + "GAAGAA": 317, + "TAAATG": 318, + "TCTGTT": 319, + "CAGAAA": 320, + "CAAATT": 321, + "TAATTA": 322, + "TCTGTG": 323, + "TATCC": 324, + "TGAATT": 325, + "CTCCA": 326, + "GTGAAA": 327, + "GGCAA": 328, + "GGAGA": 329, + "GAAGA": 330, + "GGTGA": 331, + "GGGCA": 332, + "CCAAAA": 333, + "TCTCTCTC": 334, + "CTGCA": 335, + "CTTCTT": 336, + "TCTTAA": 337, + "CCCTA": 338, + "TGTGTG": 339, + "AAATA": 340, + "TGTTTG": 341, + "GGGTT": 342, + "GTGCTG": 343, + "GGAAAA": 344, + "GGGGA": 345, + "TCAGA": 346, + "CCTTTT": 347, + "GAAATG": 348, + "GCAGCA": 349, + "TCTGAA": 350, + "GGGTG": 351, + "CACATT": 352, + "TCTTTG": 353, + "GGGC": 354, + "TCCCA": 355, + "TCCATT": 356, + "CTGAAA": 357, + "CTTTA": 358, + "TCGA": 359, + "GTTTA": 360, + "CAACAA": 361, + "CTTCC": 362, + "GCCTCC": 363, + "TTAAA": 364, + "GCTCTG": 365, + "GTTTCA": 366, + "GGAGGA": 367, + "CGTGA": 368, + "CAGTC": 369, + "GAATA": 370, + "CAGAGA": 371, + "CCCTC": 372, + "CAAATG": 373, + "CTGCTG": 374, + "GATCC": 375, + "TTTTATT": 376, + "AAAATT": 377, + "TTATA": 378, + "TCAATT": 379, + "GGTAA": 380, + "GTTATT": 381, + "GCCAGG": 382, + "GGAGAA": 383, + "CATTTG": 384, + "TCACC": 385, + "CTCAAA": 386, + "GGTTA": 387, + "TCCAAA": 388, + "TCTATT": 389, + "GCAGA": 390, + "CTTCA": 391, + "TCATCA": 392, + "CGAGG": 393, + "TAACA": 394, + "GTTGTT": 395, + "CTTATT": 396, + "CGTCA": 397, + "TAAGA": 398, + "TAATTTT": 399, + "CTGTA": 400, + "TCCACA": 401, + "GCTGTG": 402, + "CGCTG": 403, + "TCTAAA": 404, + "GCGA": 405, + "CAATA": 406, + "CCACCA": 407, + "GAACA": 408, + "CGAAA": 409, + "CAGATT": 410, + "TCACA": 411, + "TTATTTT": 412, + "TCTCAA": 413, + "TGACA": 414, + "CTCCAA": 415, + "AAAAAAA": 416, + "TATATG": 417, + "TCCTCC": 418, + "TCACTT": 419, + "TCCAGG": 420, + "CAAGA": 421, + "GGCTA": 422, + "GTGGTG": 423, + "CGTAA": 424, + "CGAGA": 425, + "TGATA": 426, + "GGATTA": 427, + "CAACA": 428, + "CGATT": 429, + "TGAGAA": 430, + "CTCCTT": 431, + "CTCATT": 432, + "GTTAAA": 433, + "TCATA": 434, + "CCTCTG": 435, + "CTCTA": 436, + "GCTGAA": 437, + "CTGGA": 438, + "TAAGG": 439, + "CTTAAA": 440, + "TATTTA": 441, + "CCACA": 442, + "CCGG": 443, + "GTCAAA": 444, + "TGGAA": 445, + "CGGAA": 446, + "TGATGA": 447, + "GTTCA": 448, + "TAACAA": 449, + "GCTGTT": 450, + "TAAGAA": 451, + "CTGCC": 452, + "TTAATT": 453, + "CCAGA": 454, + "TCAGAA": 455, + "GTCATT": 456, + "CGCTT": 457, + "GATTAA": 458, + "CTGATT": 459, + "GCCACA": 460, + "GTAATT": 461, + "TCCAGA": 462, + "GCCAAA": 463, + "GTGATT": 464, + "TAAAATT": 465, + "CAAGAA": 466, + "CCACC": 467, + "TAATCC": 468, + "GTTCTT": 469, + "TCCATG": 470, + "GCTCTT": 471, + "TGCTG": 472, + "GGGTA": 473, + "TTACA": 474, + "GCCATT": 475, + "GCACA": 476, + "GCAATT": 477, + "TCCCTG": 478, + "TGTGA": 479, + "TCGAA": 480, + "GGACA": 481, + "GGAATT": 482, + "GTGGA": 483, + "CTTCTG": 484, + "TCCCC": 485, + "GCCCC": 486, + "CTTGA": 487, + "TAATGA": 488, + "TAAATA": 489, + "TATATA": 490, + "CTGCAA": 491, + "TCATTA": 492, + "GTATA": 493, + "TCCCCA": 494, + "CGTTA": 495, + "GCAGAA": 496, + "TGAGTT": 497, + "CTTTTTT": 498, + "CGATG": 499, + "CTTTCA": 500, + "AAAATG": 501, + "CAGGTT": 502, + "CTAATT": 503, + "CGCCA": 504, + "TGAAAAA": 505, + "GTTCC": 506, + "GTCCTT": 507, + "GTCCAA": 508, + "GTTTTTT": 509, + "CTCTGA": 510, + "GCGC": 511, + "GTTGA": 512, + "TGAATG": 513, + "CTATA": 514, + "GCAGTG": 515, + "CCTTAA": 516, + "TCACCA": 517, + "TCACTG": 518, + "GCCCTG": 519, + "TAACTT": 520, + "CAGATG": 521, + "GTAGG": 522, + "TCTATA": 523, + "GAGATT": 524, + "GTCTA": 525, + "TTTTAAA": 526, + "CACATG": 527, + "TGACC": 528, + "CACAAA": 529, + "GTGTA": 530, + "GGGAGG": 531, + "GCTTTG": 532, + "CAAAAAA": 533, + "GAGGAA": 534, + "GTTCTG": 535, + "TTTTTA": 536, + "GTCTCA": 537, + "GTTCAA": 538, + "TCGTG": 539, + "GCTTAA": 540, + "GCACC": 541, + "CTCCTG": 542, + "TAAATAAA": 543, + "CTACA": 544, + "CTTCCA": 545, + "TCCTCA": 546, + "CGCAA": 547, + "GAAAAAA": 548, + "GCCCA": 549, + "TCGTT": 550, + "GTAGA": 551, + "CTCTCA": 552, + "GTCCA": 553, + "TGACTT": 554, + "TCCCTT": 555, + "GCCATG": 556, + "CACACACACACACACA": 557, + "GTGATG": 558, + "CCTCTT": 559, + "GCCAGA": 560, + "TCCTA": 561, + "CGTTTT": 562, + "GTACA": 563, + "GCATA": 564, + "GAATTA": 565, + "TGTGTGTGTGTGTGTG": 566, + "CCCAGG": 567, + "GGTTTT": 568, + "TCAAAAA": 569, + "TCTATG": 570, + "CCATA": 571, + "TGACAA": 572, + "GGATA": 573, + "TCAGTG": 574, + "GTATTTT": 575, + "GAGATG": 576, + "GCGTG": 577, + "CGTCC": 578, + "TTAAAAA": 579, + "TAATCA": 580, + "CAATTA": 581, + "CCACTG": 582, + "CGGTT": 583, + "GTTGAA": 584, + "TGATTA": 585, + "CCTTTG": 586, + "CGGTG": 587, + "CAGGTG": 588, + "TCAATG": 589, + "CTGATG": 590, + "TCAGGA": 591, + "GTTTAA": 592, + "TATTAAA": 593, + "CTCTTA": 594, + "GCAGGA": 595, + "CTCTCC": 596, + "GAACC": 597, + "CTTTAA": 598, + "GGGCC": 599, + "GTATTA": 600, + "GCGCC": 601, + "CCAATT": 602, + "GCTAAA": 603, + "TGACTG": 604, + "GATTTG": 605, + "GATAAA": 606, + "TCAGCA": 607, + "GTTCCA": 608, + "GAAATA": 609, + "GACAAA": 610, + "GAGTC": 611, + "GCTATT": 612, + "TCACAA": 613, + "GAGGTT": 614, + "TAACC": 615, + "GAAGGA": 616, + "GCTCAA": 617, + "GAAAATT": 618, + "CCAGCA": 619, + "GTTTTAA": 620, + "GTGCC": 621, + "TGAGGA": 622, + "CATAAA": 623, + "GGTCC": 624, + "TCATTTT": 625, + "TATTTATT": 626, + "TAATAAA": 627, + "GCCTA": 628, + "CTTTTAA": 629, + "TAAGTG": 630, + "TAAGTA": 631, + "CTGGAA": 632, + "CACACA": 633, + "GACAGA": 634, + "CAACC": 635, + "GGGAAA": 636, + "CCAGAA": 637, + "TCAGTT": 638, + "TAACTA": 639, + "CTAAAAA": 640, + "TGGGTT": 641, + "TGAGTG": 642, + "TAAAATG": 643, + "TATATATATATATATA": 644, + "GCACTG": 645, + "GACTC": 646, + "TACAAA": 647, + "TAAAAAAA": 648, + "TCTACA": 649, + "GTTGTG": 650, + "TCGCC": 651, + "CCCAAA": 652, + "GTCATG": 653, + "CTGCTT": 654, + "GGAATG": 655, + "CTATTA": 656, + "GATATT": 657, + "TAGAAA": 658, + "GGCAGG": 659, + "GATGAA": 660, + "GTAGAA": 661, + "TCCTGA": 662, + "TAACTG": 663, + "GCTGGG": 664, + "GCAATG": 665, + "GCCCCA": 666, + "GTTTGA": 667, + "CATTTA": 668, + "GTGCA": 669, + "CTTGAA": 670, + "GTGGAA": 671, + "CTTCAA": 672, + "TAAATTA": 673, + "GTGGCA": 674, + "TCCTTA": 675, + "GGAAAAA": 676, + "TTTTTTA": 677, + "CCTGTG": 678, + "GTAATG": 679, + "GTGTTA": 680, + "CTAGG": 681, + "CAGGCTG": 682, + "GACACA": 683, + "GAAAAAAA": 684, + "TCGC": 685, + "GTAAAAA": 686, + "TGTTTA": 687, + "TCTCTA": 688, + "GTCCTG": 689, + "CCAGGA": 690, + "GAACAA": 691, + "TAAGTT": 692, + "TGAGCA": 693, + "GCTCCA": 694, + "TAAGCA": 695, + "CTCATG": 696, + "GTCTTA": 697, + "CCCACA": 698, + "CATATT": 699, + "GCCTCA": 700, + "CACTC": 701, + "CTTCTA": 702, + "TGATTTT": 703, + "TCGCA": 704, + "CCTGTT": 705, + "GAAGCA": 706, + "GCAAAAA": 707, + "GCGGA": 708, + "CCACAA": 709, + "GCGCA": 710, + "CATATA": 711, + "GACATT": 712, + "GTTCTA": 713, + "CAAAATT": 714, + "GAAAGAAA": 715, + "CCCGG": 716, + "TACACA": 717, + "CCAAAAA": 718, + "GAGGTG": 719, + "GGCTCA": 720, + "CAGTGA": 721, + "TCCCAA": 722, + "TATCTT": 723, + "TGAGTA": 724, + "TCGTA": 725, + "TTTTCTT": 726, + "GTGGGA": 727, + "GAGCTG": 728, + "CCCTCC": 729, + "TAGGTT": 730, + "TTAGG": 731, + "TAATATT": 732, + "CCAGCC": 733, + "CATCTT": 734, + "GTCTGA": 735, + "GTTTCC": 736, + "CCTGAA": 737, + "GGAGCA": 738, + "GAAAATG": 739, + "TCAGTA": 740, + "TAACCA": 741, + "GATGTT": 742, + "CTGTTA": 743, + "CATGTT": 744, + "GGCGG": 745, + "CATGTG": 746, + "GGGAGA": 747, + "CTTTGA": 748, + "TCTTTCTT": 749, + "AAAAAAAAA": 750, + "GGGGTG": 751, + "CTTTCC": 752, + "CTTGTT": 753, + "GCATTA": 754, + "CCCAGA": 755, + "CAAATA": 756, + "TCGGA": 757, + "CAGCTT": 758, + "TCACTA": 759, + "TAATTAA": 760, + "TAAGGA": 761, + "GAACTG": 762, + "GCACAA": 763, + "GCGTT": 764, + "GGCTC": 765, + "TCTTTTA": 766, + "CCTCCA": 767, + "GGCAAA": 768, + "CAGCTG": 769, + "CTACAA": 770, + "TACATT": 771, + "GCTATG": 772, + "CTTGTG": 773, + "GAGTCA": 774, + "GTTATG": 775, + "CTGCCA": 776, + "GTCTCC": 777, + "TGACCA": 778, + "CACCTG": 779, + "TATATTA": 780, + "TGATCA": 781, + "CAGCAA": 782, + "GATGTG": 783, + "GTCTTTT": 784, + "CTAGAA": 785, + "GCTACA": 786, + "CTGGGA": 787, + "GGGGTT": 788, + "CAAGTA": 789, + "CAAGGA": 790, + "CCCTCA": 791, + "TAGCC": 792, + "GTTGGA": 793, + "GCTATA": 794, + "TCTGAAA": 795, + "TATGTT": 796, + "CCCCTT": 797, + "GTTGTA": 798, + "CCCTGA": 799, + "TGACTA": 800, + "CAAGCA": 801, + "CAATAA": 802, + "GAACTT": 803, + "CATGAA": 804, + "CTTATG": 805, + "CTAATG": 806, + "TCTAAAA": 807, + "CCAATG": 808, + "GAAGTG": 809, + "CCTCAA": 810, + "CCCATT": 811, + "CAGTCA": 812, + "GAGAGAGAGAGAGAGA": 813, + "TATGTG": 814, + "GCAGTGA": 815, + "TCTCCTT": 816, + "TCCCAAA": 817, + "CCATTA": 818, + "CCAGTG": 819, + "GCATCA": 820, + "TCAAATT": 821, + "GATCTT": 822, + "GACAGG": 823, + "GGAGTG": 824, + "GTAGTA": 825, + "CAACTT": 826, + "GAAGTT": 827, + "CCCCTG": 828, + "TCTCAAA": 829, + "GGGTC": 830, + "GAGCTT": 831, + "TATGAAA": 832, + "TATGAA": 833, + "GACATG": 834, + "CAAGTG": 835, + "GATATA": 836, + "CATCTG": 837, + "CTGTGA": 838, + "TAATTTA": 839, + "GGCAGA": 840, + "GCGAA": 841, + "CCTAAA": 842, + "CCATCA": 843, + "CACTGA": 844, + "GGACTA": 845, + "GACGG": 846, + "CTCTTTT": 847, + "CTGTCA": 848, + "TCTCTCTCTCTCTCTC": 849, + "TTAATG": 850, + "GCAGCC": 851, + "CAAAAAAA": 852, + "GCACCA": 853, + "CTATTTT": 854, + "GAGCAA": 855, + "CTTGGA": 856, + "CTGGTG": 857, + "GAATAA": 858, + "TCCTTTT": 859, + "GAAGTA": 860, + "CAGTAA": 861, + "CAACCA": 862, + "CTGTAA": 863, + "TGATAA": 864, + "GCAGTT": 865, + "CACGG": 866, + "TAAATAA": 867, + "CTGTTTT": 868, + "CTACTA": 869, + "GCTCTA": 870, + "CGAAAA": 871, + "CAAGTT": 872, + "CTTGTA": 873, + "GAATGA": 874, + "GAGTGA": 875, + "GCCTGA": 876, + "GGTTTG": 877, + "CCCATG": 878, + "GGGGAA": 879, + "GAAGAAA": 880, + "TGTTA": 881, + "CAATTTT": 882, + "TATATTTT": 883, + "CTCAAAA": 884, + "GGTGGG": 885, + "CCGTG": 886, + "TATTTCA": 887, + "CCCCAA": 888, + "TATTTAA": 889, + "GGCTGA": 890, + "GGTGTG": 891, + "CATCAA": 892, + "CACTCA": 893, + "TCTCATT": 894, + "GAATTTT": 895, + "GAATCA": 896, + "CAGGAAA": 897, + "CATACA": 898, + "TATTTTA": 899, + "TTATAA": 900, + "GAGGAAA": 901, + "CATATG": 902, + "CTTTCTT": 903, + "CAACTG": 904, + "GGGCTG": 905, + "CCCCCA": 906, + "TTTGAAA": 907, + "CATTAAA": 908, + "CTTAAAA": 909, + "GACTGA": 910, + "CAATGA": 911, + "GGCACA": 912, + "CCAGTA": 913, + "GGATGA": 914, + "GTTTTTG": 915, + "GCATTTT": 916, + "GTGCCA": 917, + "GCAGTA": 918, + "GCCCTT": 919, + "TCGTC": 920, + "GAACTA": 921, + "GTGGTT": 922, + "GTGTGA": 923, + "GTGCTT": 924, + "CGCTA": 925, + "GTGTCA": 926, + "TCTTTA": 927, + "GCCTTA": 928, + "CCTATT": 929, + "CAAAATG": 930, + "GAACCA": 931, + "CTCCAGG": 932, + "GACTCA": 933, + "CATGAAA": 934, + "GCTAGG": 935, + "TGTTAAA": 936, + "GCGTA": 937, + "GCACTT": 938, + "TCTTAAA": 939, + "TAAGAAA": 940, + "GGCCTG": 941, + "TCCCTA": 942, + "GTGGTA": 943, + "CTGCTA": 944, + "GGAGTT": 945, + "GGTAAA": 946, + "CAAACAAA": 947, + "GATATG": 948, + "TCATGA": 949, + "GACCTT": 950, + "TAATATA": 951, + "GCTAGA": 952, + "GGACTG": 953, + "GGCATT": 954, + "CAGTTA": 955, + "CCCTAA": 956, + "CACCTT": 957, + "GGTGAA": 958, + "CAGCTA": 959, + "GTGTTTT": 960, + "CAACTA": 961, + "GATCAA": 962, + "GAGAAAA": 963, + "TGTGAAA": 964, + "AAAATA": 965, + "GATGAAA": 966, + "CTCTAA": 967, + "TTACTT": 968, + "GATCTG": 969, + "CCACTT": 970, + "GAGTTA": 971, + "CAATCA": 972, + "GGATTACAGG": 973, + "TTTATTTT": 974, + "TACATA": 975, + "TTTTATG": 976, + "GAGTAA": 977, + "GCTGAAA": 978, + "GTACTG": 979, + "GCTCTC": 980, + "TATGTA": 981, + "TGTGTA": 982, + "TCATAA": 983, + "GGACTT": 984, + "TCTCCAA": 985, + "GCATGA": 986, + "GACGA": 987, + "CGCCTG": 988, + "GACCTG": 989, + "GGTCTT": 990, + "CACCAA": 991, + "GATC": 992, + "GACCAA": 993, + "AAAATTA": 994, + "GTAAATT": 995, + "CCAGTT": 996, + "CAGAAAA": 997, + "TAACAAA": 998, + "GGTGTT": 999, + "GAAATTA": 1000, + "TGCCTCA": 1001, + "CCGCC": 1002, + "CCATTTT": 1003, + "CTTGCC": 1004, + "TCTGTA": 1005, + "CTGGCA": 1006, + "GGGATG": 1007, + "CCATGA": 1008, + "CTACTT": 1009, + "TAGGTG": 1010, + "TAAAAATT": 1011, + "GAAAGAA": 1012, + "TAAAATA": 1013, + "CTTTTTG": 1014, + "GTCAAAA": 1015, + "GGACAA": 1016, + "TCTGATT": 1017, + "CTCTCTT": 1018, + "TAATTTG": 1019, + "CTCTTTG": 1020, + "GGCCTT": 1021, + "GGATTTT": 1022, + "CTACTG": 1023, + "GTTGCA": 1024, + "GGCTCC": 1025, + "CTCTGTG": 1026, + "CTCCAGCC": 1027, + "TTACAA": 1028, + "GGACCA": 1029, + "GGAAGGAA": 1030, + "TAAAGAA": 1031, + "TTAGAA": 1032, + "GTGAAAA": 1033, + "CTTGCA": 1034, + "TGGGTG": 1035, + "GGAGCC": 1036, + "CCTCTA": 1037, + "CT": 1038, + "GGGCTT": 1039, + "GGCATG": 1040, + "CTGGTT": 1041, + "TACAGA": 1042, + "GATTAAA": 1043, + "CTCTGTT": 1044, + "TTATCA": 1045, + "CTGAAAA": 1046, + "GTAGTT": 1047, + "GGGTCA": 1048, + "GT": 1049, + "CAGCCA": 1050, + "GCGTC": 1051, + "CACTTA": 1052, + "GTGCTA": 1053, + "TCTTATT": 1054, + "GTACTT": 1055, + "GGTATT": 1056, + "TAGAGA": 1057, + "TACATG": 1058, + "CCACTA": 1059, + "TGAGAAA": 1060, + "CAATAAA": 1061, + "TCCAAAA": 1062, + "CGTGAA": 1063, + "GGTCTG": 1064, + "CTGAATT": 1065, + "TCAGCC": 1066, + "CCTCTC": 1067, + "GTTAAAA": 1068, + "GGGATT": 1069, + "TCCTAA": 1070, + "CACTAA": 1071, + "GGAGAAA": 1072, + "CCTTCCTT": 1073, + "GTTTCTT": 1074, + "TATCAA": 1075, + "GATACA": 1076, + "TAATCCCAGCA": 1077, + "CCGCA": 1078, + "TGAAATT": 1079, + "CGTAAA": 1080, + "CTCTCTG": 1081, + "TCTTTTTT": 1082, + "GTACAA": 1083, + "CCAAATT": 1084, + "TGTATTTT": 1085, + "TCGCTT": 1086, + "GGGTGA": 1087, + "GATAGA": 1088, + "CTTTATT": 1089, + "TAAACAA": 1090, + "GTTTATT": 1091, + "TGAATA": 1092, + "CTACCA": 1093, + "GTGTCC": 1094, + "CCCGA": 1095, + "TTTATTA": 1096, + "CTCCAAA": 1097, + "TTTTTTTTTTTT": 1098, + "TCATCC": 1099, + "GAAGCC": 1100, + "CTAAATT": 1101, + "CAAATTA": 1102, + "CCCCAAA": 1103, + "TCTTCTT": 1104, + "TAGGAAA": 1105, + "CACGA": 1106, + "CATTTTA": 1107, + "GTGCAA": 1108, + "TCTCCTG": 1109, + "TATTTTAA": 1110, + "GTTTGTT": 1111, + "GAGCCA": 1112, + "GGCCAA": 1113, + "CATTTCA": 1114, + "CATCCA": 1115, + "CCTATA": 1116, + "GACTTA": 1117, + "TCAAATG": 1118, + "GTATCA": 1119, + "TAAATTTT": 1120, + "CTGAGGCA": 1121, + "GCCCAA": 1122, + "GGTTAA": 1123, + "TATCTG": 1124, + "TGACAGA": 1125, + "GGAGAGA": 1126, + "GCTGCTG": 1127, + "CCCTTA": 1128, + "TCCTCTG": 1129, + "GTAGCA": 1130, + "CCTGAAA": 1131, + "CCGAA": 1132, + "TTTTTAA": 1133, + "CTATAA": 1134, + "CCTGTA": 1135, + "TTACTG": 1136, + "GTATAA": 1137, + "GGCGA": 1138, + "GACTAA": 1139, + "TCAGAAA": 1140, + "GTGTGTG": 1141, + "CAAAGAA": 1142, + "CCTATG": 1143, + "GCAGAGA": 1144, + "CCGTT": 1145, + "TTTTATTTT": 1146, + "GGAAGAA": 1147, + "TTACTA": 1148, + "GCCTGGG": 1149, + "TCCCTC": 1150, + "TCCTCTT": 1151, + "GGATCA": 1152, + "GGTCAA": 1153, + "TCGAGA": 1154, + "TATTCTT": 1155, + "TACTC": 1156, + "GTTAATT": 1157, + "GCGAGA": 1158, + "CTTAATT": 1159, + "TCCTTTG": 1160, + "GTCTAA": 1161, + "CACCCA": 1162, + "GGGTTA": 1163, + "GGGCAA": 1164, + "GGAAATG": 1165, + "GCAAATT": 1166, + "TAGATG": 1167, + "GCAGAAA": 1168, + "AAAAAAAAAAAAAAAA": 1169, + "CCTACA": 1170, + "GGAGTA": 1171, + "TCTAATT": 1172, + "CAACAAA": 1173, + "TAGATT": 1174, + "GGTTTA": 1175, + "CCTAGA": 1176, + "CTTTAAA": 1177, + "TACTTA": 1178, + "TAATGAA": 1179, + "CTATCA": 1180, + "TAGTAA": 1181, + "CAGAGAA": 1182, + "CAAGAAA": 1183, + "GGGGAAA": 1184, + "CGTTAA": 1185, + "CGTGTT": 1186, + "TCTGTCTG": 1187, + "TTTTAATT": 1188, + "CTGGCC": 1189, + "TAAATGA": 1190, + "CGTCAA": 1191, + "TTAGTA": 1192, + "GTCTCTG": 1193, + "TTTTAAAA": 1194, + "CAGTTTT": 1195, + "CTTCCTT": 1196, + "TATATAA": 1197, + "GCTTTTA": 1198, + "TTTTTCA": 1199, + "GGTC": 1200, + "TTATTAA": 1201, + "TTTTGTT": 1202, + "CATAGA": 1203, + "TAGGAA": 1204, + "GAGAGAA": 1205, + "GTAGCTG": 1206, + "TTATGA": 1207, + "GTAGTG": 1208, + "GGAGAGG": 1209, + "CTCTGAA": 1210, + "TAGTC": 1211, + "GACTCC": 1212, + "TCCCTCC": 1213, + "TAATGTT": 1214, + "CATCTA": 1215, + "GCCACCA": 1216, + "GTACTA": 1217, + "TGGGAAA": 1218, + "CGCCTT": 1219, + "GCCCGG": 1220, + "GGAGGAA": 1221, + "GTACCA": 1222, + "CGCAAA": 1223, + "CATAAAA": 1224, + "TAACATT": 1225, + "GCTAAAA": 1226, + "TCTTCTG": 1227, + "GCCAAAA": 1228, + "GTATGA": 1229, + "GTCTTTG": 1230, + "TACTGA": 1231, + "TCCCAGG": 1232, + "TTATTTA": 1233, + "TTAGTT": 1234, + "GGACC": 1235, + "TATAAAA": 1236, + "CAAACAA": 1237, + "CTTCTC": 1238, + "TCTATCTA": 1239, + "GAAATAA": 1240, + "GTGTAA": 1241, + "CTTTGTT": 1242, + "GATAAAA": 1243, + "GCCCAGG": 1244, + "GCGATT": 1245, + "AAAAAATT": 1246, + "TACAGG": 1247, + "GGCTAA": 1248, + "TAGCTT": 1249, + "GTCTCTA": 1250, + "CTCCTGA": 1251, + "GAATAAA": 1252, + "TTACCA": 1253, + "GGGACA": 1254, + "GCCACTG": 1255, + "GTTTAAA": 1256, + "GTCTGTG": 1257, + "TGACAAA": 1258, + "TACATTTT": 1259, + "GCCACC": 1260, + "TGTTTT": 1261, + "TAGCAA": 1262, + "TTATAAA": 1263, + "GACCCA": 1264, + "GCAGC": 1265, + "CAGACAGA": 1266, + "CACAAAA": 1267, + "GCCCTA": 1268, + "TATTAAAA": 1269, + "CGTATT": 1270, + "CCATCC": 1271, + "TCGATT": 1272, + "GAAGGAA": 1273, + "GATCCA": 1274, + "TATTTGA": 1275, + "GTGAATT": 1276, + "TACCTT": 1277, + "CGTCTT": 1278, + "CCTAGG": 1279, + "TCGAAA": 1280, + "CTTTCTG": 1281, + "TGAAGAA": 1282, + "TCTCTCA": 1283, + "GTCTCTT": 1284, + "GGAGGGG": 1285, + "GTCTGTT": 1286, + "CTATGA": 1287, + "GGAAATT": 1288, + "GCACACA": 1289, + "GCCTTTT": 1290, + "CAGTCC": 1291, + "CTGGTA": 1292, + "GCATCC": 1293, + "TAGTTA": 1294, + "GGCTTA": 1295, + "GAGTCC": 1296, + "TGAAAA": 1297, + "TAGATAGA": 1298, + "TGTTTGTT": 1299, + "TACTCA": 1300, + "CATTTAA": 1301, + "GATTTTA": 1302, + "CACTCC": 1303, + "GAAACAA": 1304, + "GCGCTG": 1305, + "TCTTTCA": 1306, + "CTGTCC": 1307, + "GAACTCA": 1308, + "CGGAAA": 1309, + "TATTGTT": 1310, + "GCACTA": 1311, + "TATTCAA": 1312, + "GCGGGG": 1313, + "GTGGCC": 1314, + "TAATTAAA": 1315, + "TACTAA": 1316, + "GCGGTG": 1317, + "TACCAA": 1318, + "GGTATA": 1319, + "CTAGTT": 1320, + "GCAGAGG": 1321, + "CTTTTTTTT": 1322, + "TTTTTTTTTTTTTTTT": 1323, + "TACAGTA": 1324, + "CCATGTT": 1325, + "TAGTGA": 1326, + "CGTGTG": 1327, + "GCTCTGA": 1328, + "CTTCCTG": 1329, + "TCGCTG": 1330, + "TAAATCA": 1331, + "TCCAATT": 1332, + "GTTTCTG": 1333, + "GAAGAGA": 1334, + "GGGTAA": 1335, + "CCATAA": 1336, + "TTATATT": 1337, + "CGAATT": 1338, + "CCGGA": 1339, + "TGAGCC": 1340, + "CCGTA": 1341, + "CAGAGGA": 1342, + "GTGTTTG": 1343, + "GACAAAA": 1344, + "TTTTTTAAA": 1345, + "GTTGCC": 1346, + "GAGTTTT": 1347, + "TCAAAAAA": 1348, + "TGTTTCA": 1349, + "TATCTA": 1350, + "TCTCTCC": 1351, + "CTCCACA": 1352, + "TAAATATT": 1353, + "TTTTCTG": 1354, + "CTCTCAA": 1355, + "CCTTAAA": 1356, + "TCTTTTAA": 1357, + "GAACAAA": 1358, + "TTAGCA": 1359, + "GCTCATG": 1360, + "TAAAGTA": 1361, + "GGATAA": 1362, + "TTATTAAA": 1363, + "CTCCATT": 1364, + "TCTCTGA": 1365, + "TTATTTG": 1366, + "CCTGTAA": 1367, + "TTATATA": 1368, + "GACTTTT": 1369, + "TGTTGTT": 1370, + "GCAAATG": 1371, + "CTTCAAA": 1372, + "GAATATT": 1373, + "GAATCC": 1374, + "CTCTTAA": 1375, + "GCATAA": 1376, + "GAATGAA": 1377, + "CTTAAAAA": 1378, + "TAAAAATG": 1379, + "TTTTAAAAA": 1380, + "CTCTGGG": 1381, + "TGATCC": 1382, + "GCTCTCA": 1383, + "CTCCAGA": 1384, + "GAGTGCAGTG": 1385, + "CAATATT": 1386, + "TAGAAAA": 1387, + "GTAAATG": 1388, + "TAGCTG": 1389, + "GCTCAAA": 1390, + "GCAGGAA": 1391, + "TACCTG": 1392, + "GGGAAAA": 1393, + "TTTTCTA": 1394, + "GGGGGGGG": 1395, + "CCGA": 1396, + "CTTTGAA": 1397, + "GGAGGTG": 1398, + "TAGTCA": 1399, + "GGCCCA": 1400, + "TGATGTT": 1401, + "CAAATAA": 1402, + "TCTTCCA": 1403, + "GCGCTT": 1404, + "GTATTTG": 1405, + "GTCTC": 1406, + "GAAATCA": 1407, + "TGATAAA": 1408, + "CATTCTT": 1409, + "TATCCA": 1410, + "GCCTCTG": 1411, + "TGAGATG": 1412, + "CGCCAA": 1413, + "GTTTTATT": 1414, + "TATATATT": 1415, + "GTAGGA": 1416, + "GACAGAA": 1417, + "CTCCAGCCTGGG": 1418, + "GCGTGA": 1419, + "GGTATG": 1420, + "GAGGGAGG": 1421, + "TCATTTG": 1422, + "CTACC": 1423, + "TACAGAA": 1424, + "GGTAGA": 1425, + "GATCTA": 1426, + "GTCCATG": 1427, + "TGAGGAA": 1428, + "TAATAAAA": 1429, + "TAAACTT": 1430, + "TCACATT": 1431, + "GGAGGCC": 1432, + "TCACAAA": 1433, + "CACTTTT": 1434, + "CGGCC": 1435, + "CAACAGA": 1436, + "GTAGAGA": 1437, + "GTTATTTT": 1438, + "CGTTTG": 1439, + "TCGTCA": 1440, + "TCTGCTG": 1441, + "CAACACA": 1442, + "GGTAGG": 1443, + "GCAGCTG": 1444, + "TAGTAGAGA": 1445, + "CAAGCC": 1446, + "GCATTTG": 1447, + "TAATATG": 1448, + "GCTTAAA": 1449, + "GCTTCTG": 1450, + "CTCTCCA": 1451, + "TCATCTT": 1452, + "CGTCTG": 1453, + "TCATTTA": 1454, + "CATAGG": 1455, + "GCTCCTT": 1456, + "TGTTCTT": 1457, + "TACATTA": 1458, + "CACAGAA": 1459, + "TAAATATA": 1460, + "TAGAGG": 1461, + "GATAGG": 1462, + "TCCTGAA": 1463, + "GGAGCTG": 1464, + "TGATATT": 1465, + "TCATTAA": 1466, + "CTTTTAAA": 1467, + "TCGTTA": 1468, + "TAAACTA": 1469, + "GTTTGAA": 1470, + "TAAAATTA": 1471, + "CACCCC": 1472, + "TCAGAGA": 1473, + "CTCCTGCCTCA": 1474, + "TGACATT": 1475, + "GTATTTA": 1476, + "CTTCATT": 1477, + "GAAACTG": 1478, + "TAACACA": 1479, + "GTTCAAA": 1480, + "GGAGATG": 1481, + "TCGGCC": 1482, + "CAGCATT": 1483, + "TCGATG": 1484, + "TATTCTA": 1485, + "CTGTGAA": 1486, + "TATTGAA": 1487, + "TTTTCCA": 1488, + "TATTTCTT": 1489, + "GGTGAAA": 1490, + "CTGAGAA": 1491, + "GCACAGA": 1492, + "GCGAGG": 1493, + "CTGTGTG": 1494, + "TGAAATG": 1495, + "TGATGAA": 1496, + "GTCCAAA": 1497, + "CTCAATT": 1498, + "TCCAGAA": 1499, + "GTATATA": 1500, + "TAAAGTT": 1501, + "TCTCAAAA": 1502, + "TCCATCA": 1503, + "GTCTGAA": 1504, + "TGAGAGA": 1505, + "TGATTTG": 1506, + "TTAGCC": 1507, + "CTCCATG": 1508, + "TCCCTGA": 1509, + "GAGCTA": 1510, + "CCCCCCCC": 1511, + "GTGGAAA": 1512, + "CTGGGAA": 1513, + "CAATGAA": 1514, + "CCACACA": 1515, + "CTTTCAA": 1516, + "CGGAGG": 1517, + "TCGTGA": 1518, + "CCAGAAA": 1519, + "GTTTTAAA": 1520, + "TGTTGAA": 1521, + "TCCTGTG": 1522, + "CTAAATG": 1523, + "TCCTTTA": 1524, + "GTCTGGG": 1525, + "TCTCTTTT": 1526, + "TACGG": 1527, + "TATTGTA": 1528, + "TTAGTG": 1529, + "TTACC": 1530, + "TAATCCCAGCACTTTG": 1531, + "TCTGGAA": 1532, + "CTTCTCA": 1533, + "CGCATT": 1534, + "TATTTAAA": 1535, + "TCACACA": 1536, + "TAATCAA": 1537, + "GCGAAA": 1538, + "GGGCCA": 1539, + "GTTCATT": 1540, + "GAGAAAAA": 1541, + "TTTTGTA": 1542, + "TACTTTT": 1543, + "TCGAGG": 1544, + "GTGAAAAA": 1545, + "CAATATA": 1546, + "TCCCATG": 1547, + "CAATTAA": 1548, + "CTGGAAA": 1549, + "CCCAGCA": 1550, + "TCCCATT": 1551, + "TCCTGTT": 1552, + "CTCTTTA": 1553, + "TCCCCTT": 1554, + "GTTTCAA": 1555, + "GTCCAGG": 1556, + "GGAAGGA": 1557, + "TAGTTTT": 1558, + "TGACCTT": 1559, + "GTGCTGGGATTACAGG": 1560, + "TATTTATA": 1561, + "TCTGCAA": 1562, + "CTGAAAAA": 1563, + "TATGTTA": 1564, + "CTTCACA": 1565, + "GCACAGG": 1566, + "CCTGCTG": 1567, + "TTTTTTAA": 1568, + "GTTATTA": 1569, + "CCCTTTT": 1570, + "TGATTTA": 1571, + "TACAAAA": 1572, + "TAAGTAA": 1573, + "TTTTTAAA": 1574, + "CATCTC": 1575, + "GTGGTGA": 1576, + "GTGGAGA": 1577, + "CTCTGCA": 1578, + "GTTAAAAA": 1579, + "TACATACA": 1580, + "CTTTGTG": 1581, + "GGACACA": 1582, + "TCTGATG": 1583, + "TATTATT": 1584, + "TCTTCTA": 1585, + "CTGTGTT": 1586, + "TCAGCTT": 1587, + "CTTTATA": 1588, + "GGCGC": 1589, + "TCCCTCA": 1590, + "GTACC": 1591, + "TGGAGAA": 1592, + "CAAAAATT": 1593, + "TCTTTAA": 1594, + "CTCTCTC": 1595, + "TGAGTGA": 1596, + "GCAGCTT": 1597, + "CGGATT": 1598, + "TACGA": 1599, + "TCTTGTT": 1600, + "TCGTAA": 1601, + "GCCTGTG": 1602, + "TATTCTG": 1603, + "GGGATA": 1604, + "GGGTCC": 1605, + "TGAGATT": 1606, + "CTTTTATT": 1607, + "TCCCACA": 1608, + "CATGGTG": 1609, + "TTAGGA": 1610, + "GAACACA": 1611, + "TCATAAA": 1612, + "CAACATT": 1613, + "GGTCCA": 1614, + "GAATTTG": 1615, + "TATTAATT": 1616, + "TCCTGGG": 1617, + "GCAGCAA": 1618, + "CTCTTCA": 1619, + "GAAGAGG": 1620, + "TCTGTCA": 1621, + "CTGAATG": 1622, + "CCACAAA": 1623, + "GTGGAGG": 1624, + "TGATTAA": 1625, + "CTCCCTCC": 1626, + "CACACACACACACACACACACACACACACACA": 1627, + "GCGATG": 1628, + "CATTCTG": 1629, + "GTAGAAA": 1630, + "TCATCAA": 1631, + "TTTTCAA": 1632, + "TATGTATG": 1633, + "CCAAATG": 1634, + "TAATTTTA": 1635, + "TAAGGAA": 1636, + "CTTGAAA": 1637, + "AAAAAAAAAAAA": 1638, + "GCTCCTG": 1639, + "GCAGATG": 1640, + "GAAAAATT": 1641, + "GACGC": 1642, + "GTGGGGG": 1643, + "GTCAATT": 1644, + "CTTGCTT": 1645, + "TGACACA": 1646, + "GTGTGTT": 1647, + "CCAGAGA": 1648, + "CCCAGCC": 1649, + "TAAAGAAA": 1650, + "GTCCATT": 1651, + "TAAATTAA": 1652, + "CCCAAAA": 1653, + "GAATTAA": 1654, + "TGAATTA": 1655, + "TTTTTTTG": 1656, + "CCAGCTT": 1657, + "CAATTTG": 1658, + "CTGTTTG": 1659, + "GTCTCAA": 1660, + "GTTTGTG": 1661, + "GGCATA": 1662, + "GGTACA": 1663, + "TGATGTG": 1664, + "GATTTCA": 1665, + "TCTGCTT": 1666, + "GTAATTA": 1667, + "TAAAAAAAA": 1668, + "GCCGCC": 1669, + "TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG": 1670, + "GCGTCA": 1671, + "GCTCATT": 1672, + "GAACCTG": 1673, + "TAAACAAA": 1674, + "GTGCTGA": 1675, + "TCAGGAA": 1676, + "TCCTCAA": 1677, + "TCTATTTT": 1678, + "TCTGTTTT": 1679, + "CAGAGCA": 1680, + "CCAGGAA": 1681, + "GTCTTTA": 1682, + "TCTTCAA": 1683, + "TCAAAATT": 1684, + "GCTTATT": 1685, + "GTTCCTT": 1686, + "CACCTA": 1687, + "TCACTGA": 1688, + "GAAGCAA": 1689, + "TAAAGA": 1690, + "TCCTTCA": 1691, + "TCTCATG": 1692, + "TCAGTGA": 1693, + "TACACAA": 1694, + "CACGTG": 1695, + "CCTAAAA": 1696, + "GCCTTTG": 1697, + "GGCTTTT": 1698, + "GTTGAAA": 1699, + "GTTCTC": 1700, + "CTAGA": 1701, + "CTACAAA": 1702, + "GCACAAA": 1703, + "TTACATT": 1704, + "GGCCCC": 1705, + "TAATGTG": 1706, + "CTGCCTT": 1707, + "TCCCAGA": 1708, + "GTGAATG": 1709, + "GGACAGG": 1710, + "GGATGTG": 1711, + "GTTTATA": 1712, + "TGACCAA": 1713, + "GTGGCTG": 1714, + "GTTCTCA": 1715, + "CTTATTTT": 1716, + "CTGGAGA": 1717, + "TTACAAA": 1718, + "GTCTTCA": 1719, + "CAAGAGA": 1720, + "CCATTTG": 1721, + "TCACAGA": 1722, + "CTAGTA": 1723, + "CATTATT": 1724, + "TTAGA": 1725, + "GCTCTCC": 1726, + "GCGCCA": 1727, + "TATGTTTT": 1728, + "TCCTCCA": 1729, + "CAGAAAAA": 1730, + "GTGGGAA": 1731, + "TAATCTT": 1732, + "TGAGTCA": 1733, + "CTGCTC": 1734, + "GTCTCCA": 1735, + "TCATGTT": 1736, + "GTTTCCA": 1737, + "TAAGCAA": 1738, + "CTAAAAATA": 1739, + "TGACTGA": 1740, + "TCGGTT": 1741, + "TTAGAAA": 1742, + "TAAGCC": 1743, + "TAAAGCA": 1744, + "CCTCTCC": 1745, + "CCTCCTT": 1746, + "TCAGATT": 1747, + "TATGAAAA": 1748, + "GCTGATG": 1749, + "CATATTTT": 1750, + "GCTCCAA": 1751, + "CGGCGG": 1752, + "CCACTGA": 1753, + "CAGCAAA": 1754, + "CTGTCTT": 1755, + "CTAGCA": 1756, + "TCGGGG": 1757, + "CACAGCA": 1758, + "GCTGATT": 1759, + "CTAGGA": 1760, + "TAACTC": 1761, + "TCATATT": 1762, + "CCTTCTT": 1763, + "CTGCAAA": 1764, + "CCCGC": 1765, + "GGTCTA": 1766, + "CCCAGGA": 1767, + "GTGTCTG": 1768, + "TAATAATAATAA": 1769, + "TCACATG": 1770, + "CAATTTA": 1771, + "TATATATATATATATATATATATATATATATA": 1772, + "CCACAGA": 1773, + "TCAATTTT": 1774, + "GTATTAA": 1775, + "GAACATT": 1776, + "TCTCTTA": 1777, + "CTATTTG": 1778, + "TCTTTCC": 1779, + "GGTTAAA": 1780, + "GCTAATT": 1781, + "CTGCTGA": 1782, + "TACCTA": 1783, + "CAGGGTT": 1784, + "TCGCCA": 1785, + "CAAAAATTA": 1786, + "CTTCTGA": 1787, + "GCATGTG": 1788, + "CTATTAA": 1789, + "GCACATG": 1790, + "CAACATG": 1791, + "TCATGAA": 1792, + "GAATGTT": 1793, + "GGGTTTT": 1794, + "CTGCCTG": 1795, + "GTCCACA": 1796, + "TAAACA": 1797, + "CTCTGGA": 1798, + "GACCCC": 1799, + "GGCAAAA": 1800, + "TCTGTTA": 1801, + "CTAGTG": 1802, + "CTATATA": 1803, + "TCAGTCA": 1804, + "TAACTAA": 1805, + "GAAGATG": 1806, + "GTCTTAA": 1807, + "CAAGGAA": 1808, + "GTAAAAAA": 1809, + "TCCCCTG": 1810, + "TCGCAA": 1811, + "TCTGCCTG": 1812, + "CCTTTTA": 1813, + "GTCCCAGCTA": 1814, + "TATATATG": 1815, + "TATTGTG": 1816, + "TGTGTTTT": 1817, + "GCGCAA": 1818, + "CACAGTG": 1819, + "TAAGATT": 1820, + "CTCTGTA": 1821, + "GGAGGCTGA": 1822, + "GGACAAA": 1823, + "TATTAAAAA": 1824, + "TCGTCC": 1825, + "TCGGAA": 1826, + "CTATAAA": 1827, + "CTTCAGA": 1828, + "CTAGAAA": 1829, + "CATTCAA": 1830, + "CACGCA": 1831, + "CAGGATT": 1832, + "CCATCTT": 1833, + "GTAGCC": 1834, + "GAATTTA": 1835, + "CACGC": 1836, + "CAATCC": 1837, + "TGAGCAA": 1838, + "GAAGCTG": 1839, + "TCAATTA": 1840, + "GAAGTCA": 1841, + "CTGCACA": 1842, + "CCACGG": 1843, + "GGATCTT": 1844, + "CTCCTGCCTCAGCCTCC": 1845, + "TAAATGAA": 1846, + "CCGTC": 1847, + "TCGGTG": 1848, + "TTTTATTA": 1849, + "GCAGGGG": 1850, + "GCAGGTG": 1851, + "TCTATTA": 1852, + "TAACTTA": 1853, + "CTAATTTT": 1854, + "CCCGCC": 1855, + "TAATACA": 1856, + "GGATTAAA": 1857, + "TCTCTCTG": 1858, + "GCTTCTT": 1859, + "CATTTATT": 1860, + "CCAGAGG": 1861, + "GGACAGA": 1862, + "GCCAATT": 1863, + "TCCCCAA": 1864, + "GTTGATT": 1865, + "GAAGAAAA": 1866, + "GCATTTA": 1867, + "CTCTAAA": 1868, + "CACACACACACA": 1869, + "CCTCAAA": 1870, + "TATAATT": 1871, + "CAATGTT": 1872, + "GCCCAGA": 1873, + "GTATATT": 1874, + "CTAAAAAA": 1875, + "CCACAGG": 1876, + "TAAGAGA": 1877, + "TCCTTAA": 1878, + "TATTTTTT": 1879, + "GAATATA": 1880, + "GGATTTG": 1881, + "GTGTGAA": 1882, + "CTGGCTT": 1883, + "GCGGCA": 1884, + "TCCGCC": 1885, + "GCATCTT": 1886, + "TCTAATA": 1887, + "CTGCATT": 1888, + "CTCTGCC": 1889, + "TCACTCA": 1890, + "TCAGCAA": 1891, + "TATTATG": 1892, + "CCAGCTG": 1893, + "GATCTC": 1894, + "GCCTCTT": 1895, + "CTTCCAA": 1896, + "TCCTAAA": 1897, + "TCATCTG": 1898, + "CTATTTA": 1899, + "CTGCAGG": 1900, + "CAAGCAA": 1901, + "GCGGAA": 1902, + "GAAATAAA": 1903, + "TAAAATAA": 1904, + "TCACCTT": 1905, + "CCATGTG": 1906, + "GACCTA": 1907, + "CAGATGA": 1908, + "GTGGCTT": 1909, + "TTATTATTATTA": 1910, + "TCCCGG": 1911, + "TATTTGTT": 1912, + "CTGTAAA": 1913, + "TCCATCCA": 1914, + "CTGTATA": 1915, + "GTTTCTA": 1916, + "GTTGCTT": 1917, + "CCATGAA": 1918, + "GCTCTTA": 1919, + "CTTCATG": 1920, + "GTTCCTG": 1921, + "GCTGGGA": 1922, + "TCAGAGG": 1923, + "CATTAAAA": 1924, + "TCAGTAA": 1925, + "GAATGTG": 1926, + "CTTATTA": 1927, + "GCACTGA": 1928, + "TGAGGTT": 1929, + "CATCAAA": 1930, + "CTTCTCC": 1931, + "GTTTATG": 1932, + "CTTTCCA": 1933, + "GTGCCTG": 1934, + "GAAAGGA": 1935, + "GCATCTG": 1936, + "TACCCA": 1937, + "TAACAGA": 1938, + "AAAAAAAAAAA": 1939, + "CTATGAA": 1940, + "CAGTAAA": 1941, + "TAGCTA": 1942, + "TCGTTTT": 1943, + "GTGTCTT": 1944, + "GAGCAAA": 1945, + "TCTAAAAA": 1946, + "GTTCACA": 1947, + "GAAATGA": 1948, + "CAAATGA": 1949, + "GCCCTGA": 1950, + "GTGTTTA": 1951, + "TCATGTG": 1952, + "CATATTA": 1953, + "TCAAAAAAA": 1954, + "TAAGTTA": 1955, + "TCTCTCTT": 1956, + "CCAGTGA": 1957, + "CCTCTGA": 1958, + "CAAGATG": 1959, + "GCCTGTT": 1960, + "GTTTGGG": 1961, + "CATTCATT": 1962, + "GCCCCTG": 1963, + "GTTCTGA": 1964, + "GCGGCC": 1965, + "GCGGTT": 1966, + "CAAAACAAAA": 1967, + "TACATATA": 1968, + "GAATTAAA": 1969, + "TCAAGAA": 1970, + "CTGTATT": 1971, + "TTTTTATT": 1972, + "GATTATT": 1973, + "TCTAATG": 1974, + "GTTGCTG": 1975, + "TGAATGAA": 1976, + "TCAGCTG": 1977, + "CTTGATT": 1978, + "CAGAATG": 1979, + "CTAATTA": 1980, + "TATAATG": 1981, + "GTTTTGTTTT": 1982, + "CCAGCCTG": 1983, + "TGATGGA": 1984, + "GCAGATT": 1985, + "CTCTATT": 1986, + "GCAGTCA": 1987, + "TAAGTGA": 1988, + "CTACACA": 1989, + "CGCATG": 1990, + "TAGCCA": 1991, + "GTGGCTCA": 1992, + "CAAATAAA": 1993, + "GTGCTCA": 1994, + "TTTTTTTTTT": 1995, + "TAACATG": 1996, + "TCCCAGCTA": 1997, + "CAAAGTA": 1998, + "TCATATA": 1999, + "CAGCATG": 2000, + "TGATCTT": 2001, + "CATAATT": 2002, + "TGTGTTA": 2003, + "TTTTGAA": 2004, + "TTAATTA": 2005, + "GATATTA": 2006, + "TCATTCA": 2007, + "TGATATA": 2008, + "TGACTCA": 2009, + "GACGTT": 2010, + "TGACATG": 2011, + "GTTGTGA": 2012, + "CATTTTTT": 2013, + "GCCTGGA": 2014, + "CTATGTT": 2015, + "CTTTGGG": 2016, + "GTCTCAAA": 2017, + "CTGGCTG": 2018, + "CCACATG": 2019, + "GGCGTG": 2020, + "CTTAATG": 2021, + "TAAGATG": 2022, + "GTATAAA": 2023, + "TGTATTA": 2024, + "TAACTCA": 2025, + "GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA": 2026, + "GCATGAA": 2027, + "GTTAATG": 2028, + "TCCAGGA": 2029, + "GAGAGAAA": 2030, + "TCTCTGTG": 2031, + "CTCTCTA": 2032, + "CCACCTG": 2033, + "GCCAGGA": 2034, + "CTGGAGG": 2035, + "CCATTTA": 2036, + "GTCTGGA": 2037, + "GCCCACA": 2038, + "TAGAGAA": 2039, + "CAACTCA": 2040, + "GGCAGGA": 2041, + "TCTTATG": 2042, + "CAAAGGA": 2043, + "GGTAAAA": 2044, + "GAGAGGA": 2045, + "GTCCAGA": 2046, + "GCCCTCA": 2047, + "GATATTTT": 2048, + "CAGGGAA": 2049, + "CCACATT": 2050, + "GAGGAGG": 2051, + "GAAACTT": 2052, + "CAGAATT": 2053, + "TCAGATG": 2054, + "TATTTCC": 2055, + "TACAGTG": 2056, + "TGAGCTG": 2057, + "CCATCTG": 2058, + "GAGAATG": 2059, + "TCAACAA": 2060, + "ATT": 2061, + "TAACTGA": 2062, + "TGAGAGG": 2063, + "CACTGAA": 2064, + "CCACCTT": 2065, + "CTGCAGA": 2066, + "TCACCAA": 2067, + "TGAGCTT": 2068, + "CAAAGCA": 2069, + "GGTTTTA": 2070, + "CGGGGTT": 2071, + "TCCAAAAA": 2072, + "TATGTATA": 2073, + "CCAGATG": 2074, + "TCCATTTT": 2075, + "CTGCTCA": 2076, + "GATAATT": 2077, + "CCACCAA": 2078, + "CTCCTCC": 2079, + "GAGAATT": 2080, + "GAAAGTA": 2081, + "TAAAATAAAA": 2082, + "CTTCTTA": 2083, + "CTGTTTA": 2084, + "GAATCAA": 2085, + "GCATGTT": 2086, + "GCACGG": 2087, + "GACTGAA": 2088, + "GTGCACA": 2089, + "GACGTG": 2090, + "TATACAA": 2091, + "TCGACA": 2092, + "GAAGACA": 2093, + "TAAAGGA": 2094, + "GATCAAA": 2095, + "CAGTGTG": 2096, + "CTAGCC": 2097, + "GAGGAAAA": 2098, + "TCTGAAAA": 2099, + "GAACCCA": 2100, + "GATGGATG": 2101, + "GTTCTTA": 2102, + "CTATATT": 2103, + "GCATTAA": 2104, + "TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC": 2105, + "TCAGTC": 2106, + "TATTTTTG": 2107, + "GAGGATT": 2108, + "GTATGTG": 2109, + "TAACCAA": 2110, + "GTTGTTTT": 2111, + "TTTTTCTT": 2112, + "GTGTTAA": 2113, + "CTTGGAA": 2114, + "AAAAAATG": 2115, + "CAATGTG": 2116, + "GTGCCTT": 2117, + "GCCTCAA": 2118, + "GAGTCTT": 2119, + "GCTAATTTT": 2120, + "CGAAAAA": 2121, + "GTGTATA": 2122, + "GCGTTA": 2123, + "CTGCACTCCAGCCTGGG": 2124, + "GTTCATG": 2125, + "CAAAGAAA": 2126, + "GCAGTAA": 2127, + "GGATGAA": 2128, + "CTTTATG": 2129, + "CAGGAAAA": 2130, + "TCCTGCA": 2131, + "CTGTCTG": 2132, + "GAACATG": 2133, + "GGATGGA": 2134, + "GCCTGAA": 2135, + "CAAAAATG": 2136, + "TCCAATG": 2137, + "CCAGCAA": 2138, + "GGCCTA": 2139, + "CAACTGA": 2140, + "GCACCTG": 2141, + "GTCTATT": 2142, + "CCTCTCA": 2143, + "GTGGTCA": 2144, + "GTGTAAA": 2145, + "GTACACA": 2146, + "GTAAAATT": 2147, + "GTACATT": 2148, + "TATATAAA": 2149, + "CTGTTAA": 2150, + "TAAGTCA": 2151, + "GCCTCCA": 2152, + "AAATTAAA": 2153, + "GTGCAGG": 2154, + "TCCTGGA": 2155, + "GTGCAAA": 2156, + "GCGTCC": 2157, + "CCATTAA": 2158, + "GGAGGGA": 2159, + "TCACTTA": 2160, + "TCATTAAA": 2161, + "CAACATA": 2162, + "TAATAGA": 2163, + "TAATGTA": 2164, + "GATTTTTT": 2165, + "GTTGTCA": 2166, + "GGAGACA": 2167, + "GTGTGGG": 2168, + "TCACAGG": 2169, + "TCGGCA": 2170, + "CTCCCTG": 2171, + "GACCAAA": 2172, + "TGTTTATT": 2173, + "CGAATG": 2174, + "CTCAATG": 2175, + "TCACCTG": 2176, + "CAGTGTT": 2177, + "TGAGACA": 2178, + "TAGGGG": 2179, + "GAAAAATG": 2180, + "GTTGAGA": 2181, + "TCGATA": 2182, + "CTCGGGAGG": 2183, + "GTTGTC": 2184, + "CCAGTCA": 2185, + "GCCCAGGCTG": 2186, + "GAACAGA": 2187, + "GGCTCACTGCAA": 2188, + "GCAGACA": 2189, + "TGAGGTG": 2190, + "CACGTT": 2191, + "TAAGAAAA": 2192, + "CCAGGCA": 2193, + "GTATCTT": 2194, + "CTTGGGAGG": 2195, + "CTTTCTA": 2196, + "CCGCTG": 2197, + "GAGCTCA": 2198, + "GAGACAGA": 2199, + "CTTCAGG": 2200, + "GCACATT": 2201, + "GTACAAA": 2202, + "CTTGTAA": 2203, + "GTGGGTG": 2204, + "GAAGTGA": 2205, + "GGTCTC": 2206, + "GTATGTT": 2207, + "GCACTCA": 2208, + "TTATGTT": 2209, + "CAAGTCA": 2210, + "CAAGTGA": 2211, + "GAAACTA": 2212, + "TAAATAAAA": 2213, + "TCTTAAAA": 2214, + "GTTGGAA": 2215, + "GTTCTAA": 2216, + "CCACTC": 2217, + "CAGTGAA": 2218, + "GAAAGG": 2219, + "GCACGA": 2220, + "TAACTTTT": 2221, + "GTTGTTA": 2222, + "TCAGTTA": 2223, + "CGGATG": 2224, + "TATTTGAA": 2225, + "CCCTGAA": 2226, + "GCCCTC": 2227, + "CTTCTAA": 2228, + "TTTGTTTT": 2229, + "GAGCTGA": 2230, + "CTGTGGG": 2231, + "CAAGATT": 2232, + "GAAGCTT": 2233, + "TGAGTAA": 2234, + "CTTGCTG": 2235, + "GGATGGG": 2236, + "CGTATG": 2237, + "TCCATTA": 2238, + "GTCTGCA": 2239, + "GCCATTTT": 2240, + "GTTGTAA": 2241, + "CACACAA": 2242, + "GGACTACAGG": 2243, + "CGTTTTA": 2244, + "TCTTCC": 2245, + "TAACCTT": 2246, + "CTTTAAAA": 2247, + "TGAATTTT": 2248, + "CTACAGA": 2249, + "GCAAGAA": 2250, + "TAACAAAA": 2251, + "CAATTAAA": 2252, + "CCACTCA": 2253, + "CATGGTGAAA": 2254, + "CCCAGAA": 2255, + "CTACATT": 2256, + "CCGAGG": 2257, + "TCCAGTG": 2258, + "TGAGTTA": 2259, + "GGAGTCA": 2260, + "TAACGA": 2261, + "GAGTAAA": 2262, + "GACTCTG": 2263, + "GGAGCTT": 2264, + "TACTCC": 2265, + "CTGCATG": 2266, + "GCTTTTTT": 2267, + "GTCTAAA": 2268, + "GTGCGG": 2269, + "CATCTCA": 2270, + "TGATCAA": 2271, + "GGAGATT": 2272, + "GCAAAAAA": 2273, + "CACCAAA": 2274, + "TGACGG": 2275, + "CAGAGG": 2276, + "GTTGATG": 2277, + "CTTGTCA": 2278, + "TCCACCTG": 2279, + "GGAGCAA": 2280, + "CAAGTAA": 2281, + "CCATAAA": 2282, + "GTGCATG": 2283, + "GCATATT": 2284, + "GTAGATT": 2285, + "GCCTAA": 2286, + "CTCAAAAA": 2287, + "GGAGAAAA": 2288, + "CTATCC": 2289, + "TAATATTA": 2290, + "GTGCTC": 2291, + "CAATATG": 2292, + "TGTGGAA": 2293, + "TGACTC": 2294, + "GTGTATG": 2295, + "TTTTAATG": 2296, + "GCTCTAA": 2297, + "CACAATG": 2298, + "CAGCTCA": 2299, + "GTTGGTT": 2300, + "CTAAAATT": 2301, + "GTCTATG": 2302, + "TGTGAAAA": 2303, + "CTGGGTT": 2304, + "CCCCTCC": 2305, + "CCCTCTT": 2306, + "GCAGGGA": 2307, + "GAAACCA": 2308, + "CATTTCC": 2309, + "GCAGCCA": 2310, + "TCATATG": 2311, + "GCAGGCA": 2312, + "CGTAAAA": 2313, + "TGACCTG": 2314, + "CAGAGGTT": 2315, + "CTTGTGA": 2316, + "TTATCTT": 2317, + "CTGTATG": 2318, + "GTCAATG": 2319, + "GGACGG": 2320, + "GCGTAA": 2321, + "CAAACTA": 2322, + "TAAATGTT": 2323, + "CTTCGG": 2324, + "CTCCCCA": 2325, + "TACAATG": 2326, + "TCTGTAA": 2327, + "GAATATG": 2328, + "GCGGGA": 2329, + "GGACATT": 2330, + "TTATGAA": 2331, + "GGATGTT": 2332, + "GGACATG": 2333, + "TCAGGTG": 2334, + "CAACAAAA": 2335, + "GAAAGAGA": 2336, + "GTGGATG": 2337, + "GGGCTA": 2338, + "CCATCAA": 2339, + "CAGCTGA": 2340, + "CTCCACC": 2341, + "CAATCAA": 2342, + "GTGGTC": 2343, + "TGACAGG": 2344, + "CCATTCA": 2345, + "GTCCCTG": 2346, + "CAGACACA": 2347, + "GTTGGTG": 2348, + "CCTCCTG": 2349, + "GAACTGA": 2350, + "TATTCATT": 2351, + "GCCCATG": 2352, + "CAATCTT": 2353, + "GAAAGCA": 2354, + "GAATCTG": 2355, + "TTATTTTA": 2356, + "GTTTGGA": 2357, + "TTTTTGTT": 2358, + "GGGAATG": 2359, + "GCGACA": 2360, + "TAAACTG": 2361, + "CCATATT": 2362, + "GGATCC": 2363, + "CAAGCTT": 2364, + "TAAAAAAAAA": 2365, + "TCACTC": 2366, + "CACTGTT": 2367, + "TGTTAATT": 2368, + "GGACTGA": 2369, + "GGAGTGA": 2370, + "CATACACA": 2371, + "GTTTGTA": 2372, + "TCCAGCA": 2373, + "GTGCATT": 2374, + "GGAAAAAA": 2375, + "CCAAGAA": 2376, + "TCAATA": 2377, + "CTTCCCA": 2378, + "TGAGAAAA": 2379, + "GGCCTCCCAAA": 2380, + "CAAGCTG": 2381, + "GCCCAAA": 2382, + "TGACTTA": 2383, + "CAGCCTT": 2384, + "CTGGATT": 2385, + "TTTTTTTA": 2386, + "TCACGG": 2387, + "GCAGTTA": 2388, + "TGACTAA": 2389, + "TTACAGG": 2390, + "TGATATG": 2391, + "TAATTATT": 2392, + "TCTTGAA": 2393, + "GCCCCTT": 2394, + "GTTCAGA": 2395, + "CTCTATG": 2396, + "CCATGGA": 2397, + "GAGGGAA": 2398, + "GGAGGCA": 2399, + "CTTTGCA": 2400, + "TCTTGG": 2401, + "GGAGGTT": 2402, + "GCCAATG": 2403, + "CTGGTGA": 2404, + "CAACCAA": 2405, + "CCAGTC": 2406, + "CTTGAGA": 2407, + "TACAGCA": 2408, + "CTTGTC": 2409, + "GACGGA": 2410, + "CTTCTTTT": 2411, + "GTGGC": 2412, + "GAGGATG": 2413, + "CAATAAAA": 2414, + "GAAATTTT": 2415, + "AAAAAAAAAA": 2416, + "CTCTATA": 2417, + "GTATGAA": 2418, + "CTTGTTA": 2419, + "TAACATA": 2420, + "CAAACACA": 2421, + "TGATTAAA": 2422, + "GCTCTGTT": 2423, + "GTGGGTT": 2424, + "GTTGGGG": 2425, + "GTGTGTA": 2426, + "GTAATTTT": 2427, + "GTATCC": 2428, + "TGTGTGTGTGTG": 2429, + "TCTTCCTT": 2430, + "TCACTAA": 2431, + "TCTCCAAA": 2432, + "TATCAAA": 2433, + "TGATGGG": 2434, + "GGATATT": 2435, + "CAAATTTT": 2436, + "GTTCAGG": 2437, + "GTGGATT": 2438, + "GTGCAGA": 2439, + "GCTGCC": 2440, + "CTCAGAA": 2441, + "GCAGTC": 2442, + "GGATAAA": 2443, + "GCCTTCA": 2444, + "CCAGGTG": 2445, + "TATCTC": 2446, + "CAATGCA": 2447, + "CCCACTG": 2448, + "GTGTATT": 2449, + "CGACAGA": 2450, + "TGAGATA": 2451, + "CCAGGTT": 2452, + "TGTTTAA": 2453, + "CATCATG": 2454, + "TGATTCA": 2455, + "GCAATTA": 2456, + "GAAATGAA": 2457, + "CTTGGTT": 2458, + "GAAGATT": 2459, + "GGATTAA": 2460, + "CCTCATT": 2461, + "GGCCAGGCTG": 2462, + "GCTATTA": 2463, + "GCCAGCA": 2464, + "GAGACAGG": 2465, + "CTTGAGG": 2466, + "CAGTCTT": 2467, + "GTTCTCC": 2468, + "TATTTCAA": 2469, + "TGACGA": 2470, + "CATGAAAA": 2471, + "CATTATG": 2472, + "TAAATTTA": 2473, + "GAGTGAA": 2474, + "CAACAGG": 2475, + "TAAGCTT": 2476, + "CACATTTT": 2477, + "GATCTCA": 2478, + "TAGTCC": 2479, + "GACCCTG": 2480, + "TAATGCA": 2481, + "TAAGTC": 2482, + "TAATAATT": 2483, + "GAAGTAA": 2484, + "CAACTC": 2485, + "CATCATT": 2486, + "GACGAA": 2487, + "GAAACAAA": 2488, + "TATTTCTG": 2489, + "CATTAATT": 2490, + "CCACCCC": 2491, + "TAATATTTT": 2492, + "GTTTAAAA": 2493, + "GTATCTG": 2494, + "GTCAAAAA": 2495, + "GATGCTG": 2496, + "TGTTCTG": 2497, + "GGTCAAA": 2498, + "GTAGGAA": 2499, + "GTATATG": 2500, + "TGATCTG": 2501, + "GGGGCTG": 2502, + "GCATCAA": 2503, + "GCCAAAAA": 2504, + "CCACGA": 2505, + "GCTAATG": 2506, + "CAGAGAAA": 2507, + "CCTTCTG": 2508, + "TCCTCTA": 2509, + "GCAGGTT": 2510, + "CTCACTG": 2511, + "TAGATTA": 2512, + "GCCGAGA": 2513, + "CCATCCA": 2514, + "CTTTACA": 2515, + "GTACATG": 2516, + "GCACCAA": 2517, + "CTTTGTA": 2518, + "CTATGTG": 2519, + "TCACTTTT": 2520, + "TGAGTC": 2521, + "CAAGAAAA": 2522, + "CTGACTG": 2523, + "GTTTTTTTT": 2524, + "GCATAAA": 2525, + "TAATCTG": 2526, + "GAAAAAAAA": 2527, + "CAGGATG": 2528, + "TGAGCCA": 2529, + "GAATTCA": 2530, + "TCAGACA": 2531, + "GTTCCAA": 2532, + "TCAGGTT": 2533, + "CAAACTG": 2534, + "CATTTCTT": 2535, + "TGTTAAAA": 2536, + "CCAGACA": 2537, + "CAAGTTA": 2538, + "CATGTTA": 2539, + "CATTCTA": 2540, + "TCTTTTTG": 2541, + "TGAGGGG": 2542, + "CACATTA": 2543, + "TAAAATAAA": 2544, + "GCATATA": 2545, + "TGTTCTA": 2546, + "GAAGGGG": 2547, + "GAGTGTG": 2548, + "TAAGACA": 2549, + "GAACTC": 2550, + "CCAGTAA": 2551, + "GAGAGAGG": 2552, + "GCGACC": 2553, + "CAATTCA": 2554, + "CGGCTG": 2555, + "CCAGATT": 2556, + "CCTGGG": 2557, + "GGAAGAAA": 2558, + "GAGAGG": 2559, + "TCAAAATG": 2560, + "CCTCATG": 2561, + "TAAAGG": 2562, + "CTTTGGA": 2563, + "CCAGGGA": 2564, + "GTACAGA": 2565, + "CTGAGGCAGGA": 2566, + "TGTTTCTT": 2567, + "CCAGGCTG": 2568, + "CTGAGG": 2569, + "GAGGCTG": 2570, + "CTCCTGGG": 2571, + "GAAGTC": 2572, + "CGACC": 2573, + "GGACTCA": 2574, + "GGAGTC": 2575, + "CACAATT": 2576, + "GTGTTCA": 2577, + "GACTAAA": 2578, + "GTCATTA": 2579, + "CAAAATTA": 2580, + "TGAAGAAA": 2581, + "GCACCTT": 2582, + "GTTTGCA": 2583, + "TCCTGCC": 2584, + "GTAGATG": 2585, + "GCCTGCA": 2586, + "GAGTTAA": 2587, + "TCCCTTA": 2588, + "GTGGTTA": 2589, + "TCGGGA": 2590, + "TACATAA": 2591, + "TCTCTCCA": 2592, + "CACTAAA": 2593, + "TATATATATATA": 2594, + "GTGGCAA": 2595, + "CACCATG": 2596, + "TTTGAAAA": 2597, + "CACACTG": 2598, + "CTTGGTG": 2599, + "TACACTG": 2600, + "CCTCCAA": 2601, + "CAACCTT": 2602, + "CAGCCAA": 2603, + "TTTTCAAA": 2604, + "TGATAGA": 2605, + "TACACTA": 2606, + "TCTGGG": 2607, + "TCCCAGCA": 2608, + "TAGGAAAA": 2609, + "CTTGGGG": 2610, + "TCTGTGAA": 2611, + "CCTTATT": 2612, + "CATTTAAA": 2613, + "TTTTATTTTA": 2614, + "GCCCTCC": 2615, + "CTGAGCA": 2616, + "CCCGTG": 2617, + "GTAGTGA": 2618, + "TCCTATT": 2619, + "GAAGGTG": 2620, + "TGTGCTG": 2621, + "TCCACTG": 2622, + "TAATCTA": 2623, + "TGATGTA": 2624, + "GTGGTAA": 2625, + "TAATGGA": 2626, + "GATGAAAA": 2627, + "GTAGTAA": 2628, + "GTGGGGA": 2629, + "GTGTCAA": 2630, + "CAGACTG": 2631, + "TCGAAAA": 2632, + "CTCATTA": 2633, + "TAATAATA": 2634, + "CTCAGAAA": 2635, + "CATCCTT": 2636, + "CCGCTT": 2637, + "GGAAGG": 2638, + "CCGTGA": 2639, + "CCACTCC": 2640, + "CTAGAGA": 2641, + "TAGAATG": 2642, + "GGATTTA": 2643, + "TTAATTTT": 2644, + "GCTAATA": 2645, + "TCCCCCA": 2646, + "CAAATATT": 2647, + "GATCATG": 2648, + "TCTTAATT": 2649, + "CAGTATT": 2650, + "GTCTTGAA": 2651, + "CCGAAA": 2652, + "CTATTCA": 2653, + "TAAGATA": 2654, + "CTTGCAA": 2655, + "GCCCCAA": 2656, + "TCCCTAA": 2657, + "GAAGTTA": 2658, + "GATGATG": 2659, + "CTTGATG": 2660, + "CCCTAAA": 2661, + "CCTGCCTG": 2662, + "GACATTTT": 2663, + "CCAGCCA": 2664, + "TGTGTGTGTG": 2665, + "GTCTATA": 2666, + "TCTCTGTT": 2667, + "GTCTGTA": 2668, + "TATAATA": 2669, + "CTTGTTTT": 2670, + "CGCCATT": 2671, + "CTCAGCA": 2672, + "TACAGTT": 2673, + "CAAGAGG": 2674, + "GGAAGCA": 2675, + "GCCTTTA": 2676, + "CCCCATT": 2677, + "CAACGA": 2678, + "GTCATTTT": 2679, + "CCCGCA": 2680, + "CAGTTAA": 2681, + "GAATCTT": 2682, + "CATGTTTT": 2683, + "CCGGGG": 2684, + "CTACTGA": 2685, + "TCACGA": 2686, + "TAAATTTG": 2687, + "GCCCATT": 2688, + "CTCTAGG": 2689, + "GGACCTG": 2690, + "TCAGGGA": 2691, + "GAGACTG": 2692, + "CCAAAAAA": 2693, + "GCCGG": 2694, + "CCAGGGG": 2695, + "TCAGAAAA": 2696, + "CATCTGA": 2697, + "TCTTCAAA": 2698, + "CTACAGG": 2699, + "GAGGCAGG": 2700, + "CATTGTA": 2701, + "TAAATCAA": 2702, + "GACTCTT": 2703, + "CTGATTA": 2704, + "GCATATG": 2705, + "GGACCTT": 2706, + "CAAGACA": 2707, + "TATTTATG": 2708, + "TATTTTAAA": 2709, + "CCGAGA": 2710, + "TCATTTTA": 2711, + "CTCACTCA": 2712, + "CCACCCA": 2713, + "CTCTAGA": 2714, + "CTACATG": 2715, + "GTGCTTA": 2716, + "CAACCTG": 2717, + "TCTGTGTT": 2718, + "TAAATATG": 2719, + "CAAAGG": 2720, + "CCCTGTT": 2721, + "GTTCGG": 2722, + "TGATAAAA": 2723, + "CACGAA": 2724, + "GTTGAGG": 2725, + "CAGAGTGA": 2726, + "GAAATTAA": 2727, + "CACATA": 2728, + "GAACAGG": 2729, + "TCTCCTGA": 2730, + "CCTGAGG": 2731, + "GGAGGCCAA": 2732, + "GTTTACA": 2733, + "TAACAGG": 2734, + "TGTGGTG": 2735, + "GCCTCCCAAA": 2736, + "CCATCCTG": 2737, + "GATTCTT": 2738, + "GAATGGA": 2739, + "GTAGTCA": 2740, + "CTCCTCTG": 2741, + "GAAAGAAAGAAAGAAA": 2742, + "CCCTGTG": 2743, + "CAGTATG": 2744, + "GCGATA": 2745, + "GGACTC": 2746, + "GAAAGA": 2747, + "TGTTGG": 2748, + "GTAGCTT": 2749, + "CATTTTAA": 2750, + "CCCTCTG": 2751, + "GCATTCA": 2752, + "CGATTA": 2753, + "TCACATA": 2754, + "TAATGAAA": 2755, + "GGAATTA": 2756, + "CTGTCAA": 2757, + "TAAATTAAA": 2758, + "CAAGTC": 2759, + "GTATTCA": 2760, + "GGCCATG": 2761, + "CTTTAGA": 2762, + "TGTTTCC": 2763, + "CATGTA": 2764, + "GAATAAAA": 2765, + "CAACTAA": 2766, + "TCATCTA": 2767, + "CACTCTT": 2768, + "CAGTTTG": 2769, + "CATAAAAA": 2770, + "GCATGCA": 2771, + "GATTTA": 2772, + "GAACCAA": 2773, + "TCTGTGA": 2774, + "TCAGCCA": 2775, + "TCTCCACA": 2776, + "TCTCAGCTCA": 2777, + "TATCATG": 2778, + "GCACTTA": 2779, + "CGCCAGG": 2780, + "CGGGG": 2781, + "CATTAAAAA": 2782, + "TTTGTTA": 2783, + "GGATATA": 2784, + "TCGACC": 2785, + "TAATCCA": 2786, + "CCGC": 2787, + "CATTGTT": 2788, + "CCAGTTA": 2789, + "GTAGTTA": 2790, + "CTAGGAA": 2791, + "CCTAATT": 2792, + "TCATGGG": 2793, + "GAACTAA": 2794, + "GCTATTTT": 2795, + "CCGTCA": 2796, + "CAGATTA": 2797, + "CCATATA": 2798, + "CAACTTA": 2799, + "TCAGTTTT": 2800, + "CTACCTT": 2801, + "GCACTC": 2802, + "GTGTGGA": 2803, + "GTGCCAA": 2804, + "GACAATG": 2805, + "GACAATT": 2806, + "GTACCTT": 2807, + "TAAACATT": 2808, + "CAGGAGG": 2809, + "GTGCGA": 2810, + "GAAAATTA": 2811, + "TCTCTTAA": 2812, + "CCGATT": 2813, + "GATGATT": 2814, + "CCATGGG": 2815, + "TCGGTA": 2816, + "CCATATG": 2817, + "CCAGTCC": 2818, + "GCCTTAA": 2819, + "TGATCCA": 2820, + "GTTGCAA": 2821, + "GTAGAGG": 2822, + "CAGATTTT": 2823, + "GTACTTA": 2824, + "TCTTTCTTTCTTTCTT": 2825, + "GCTCTGTG": 2826, + "TCAATAA": 2827, + "GTTTAGA": 2828, + "GTTCGA": 2829, + "CAAGGTT": 2830, + "CTCATTTT": 2831, + "CACAGG": 2832, + "CATGCTG": 2833, + "GAACGG": 2834, + "TATAAAAA": 2835, + "GAAGGCA": 2836, + "GAGCATT": 2837, + "TGTTTGTG": 2838, + "GCTGTTA": 2839, + "GTCACTG": 2840, + "CAAATGAA": 2841, + "GTGACTG": 2842, + "GTTCTTTT": 2843, + "CAGGCTGGAGTGCAGTG": 2844, + "TGATGAAA": 2845, + "TAACGG": 2846, + "CTACTAA": 2847, + "GACATTA": 2848, + "GGACGA": 2849, + "GAGCATG": 2850, + "GCATGGG": 2851, + "CCACTTA": 2852, + "CTATCAA": 2853, + "GCTGTTTT": 2854, + "GTCGTG": 2855, + "CCTGGCC": 2856, + "TCTCTGAA": 2857, + "TGTTGTA": 2858, + "CAGCCAGG": 2859, + "GTTTAGG": 2860, + "CCGCAA": 2861, + "GGAGTAA": 2862, + "CCAATTA": 2863, + "CAGCAAAA": 2864, + "TCATCCA": 2865, + "CACGTA": 2866, + "TCATAGA": 2867, + "TAATTAAAA": 2868, + "CACTTAA": 2869, + "TCTTTATT": 2870, + "GAGATTA": 2871, + "TAAGAGG": 2872, + "CAAATTAA": 2873, + "GACGCA": 2874, + "CACGGA": 2875, + "GTGTGCA": 2876, + "TCT": 2877, + "TATTATTA": 2878, + "GAAATATT": 2879, + "GGAGTTA": 2880, + "TCTTTGA": 2881, + "CTGATTTT": 2882, + "TGTGAATT": 2883, + "TCCCACC": 2884, + "CCCTTTG": 2885, + "CAAGGTG": 2886, + "CAGAGTT": 2887, + "CCCCATG": 2888, + "CTACCAA": 2889, + "CTCCAAAA": 2890, + "CTTCCCC": 2891, + "CTGCTAA": 2892, + "GATTAAAA": 2893, + "GCTTATG": 2894, + "CTACTTA": 2895, + "TAAAAAATT": 2896, + "TCAGTCC": 2897, + "CTATTAAA": 2898, + "GAATGGG": 2899, + "CACAGTA": 2900, + "CAACGG": 2901, + "GGTTATT": 2902, + "TCACCCA": 2903, + "TGATGCA": 2904, + "TAATTTTTT": 2905, + "GTTTGAGA": 2906, + "GTATTAAA": 2907, + "GCCCCCA": 2908, + "TATAGTA": 2909, + "TAGTAAA": 2910, + "TGATACA": 2911, + "GTGGTTTT": 2912, + "CCACTAA": 2913, + "CACAGAGA": 2914, + "CCTCTGCCTCC": 2915, + "CAAAAAAAA": 2916, + "CTCTCTCC": 2917, + "CATAATA": 2918, + "GAAGCCA": 2919, + "GTTCCCA": 2920, + "TGTGTTTG": 2921, + "CAATGGA": 2922, + "TGAAGTA": 2923, + "CTTCATA": 2924, + "CACTGTG": 2925, + "GCTCTTTT": 2926, + "TGACATA": 2927, + "TAAAGAAAA": 2928, + "GAGAAATG": 2929, + "CAGGGAGG": 2930, + "TGTTCAA": 2931, + "GAGCCAA": 2932, + "GACAGAGA": 2933, + "GGCTGAA": 2934, + "CAAATATA": 2935, + "GTGGAAAA": 2936, + "TAAGGTT": 2937, + "GTGATTA": 2938, + "GGATCTG": 2939, + "GATGTTA": 2940, + "GACTACACA": 2941, + "TCCTATA": 2942, + "CTGCCAA": 2943, + "TCCCGA": 2944, + "GTGATTTT": 2945, + "GCGTTTT": 2946, + "CAGAGTA": 2947, + "GAAAGGAA": 2948, + "CACTTTG": 2949, + "CCCCAAAA": 2950, + "GCAACCCA": 2951, + "TGCATTTT": 2952, + "TCTAGAA": 2953, + "TACTTTG": 2954, + "TGAGGCA": 2955, + "CATCTCC": 2956, + "TCGCTA": 2957, + "TGACTTTT": 2958, + "GAGCCTG": 2959, + "CATTTGTT": 2960, + "TCTTTGTT": 2961, + "GCAAAATT": 2962, + "CCTGATT": 2963, + "GATAAAAA": 2964, + "GAGTGTT": 2965, + "TCCTGTA": 2966, + "TACAGAAA": 2967, + "TCCAGGAA": 2968, + "GCCAGTG": 2969, + "TAGATTTT": 2970, + "TAATAGG": 2971, + "CTCCTCA": 2972, + "CATTTTTG": 2973, + "CATTTCAA": 2974, + "GCCATCA": 2975, + "TAAAATATA": 2976, + "GACTGTT": 2977, + "GCATGGA": 2978, + "CAAAGTT": 2979, + "CATGATT": 2980, + "GAGTTTG": 2981, + "CTAGCAA": 2982, + "CTTCCTA": 2983, + "GGGGAGG": 2984, + "CTATATG": 2985, + "TATTTATTTT": 2986, + "CACCATT": 2987, + "CCCTCAA": 2988, + "TTTTTTTTTTTTTT": 2989, + "GATCATT": 2990, + "GTACATA": 2991, + "CTCCATA": 2992, + "CCCCGTCTCTA": 2993, + "GCCTGCC": 2994, + "CTAGCTT": 2995, + "CCCGGA": 2996, + "GATGTTTT": 2997, + "GTATTTTA": 2998, + "TCAGATA": 2999, + "CCTGGAA": 3000, + "TATTCCA": 3001, + "GGACCAA": 3002, + "GCCATTA": 3003, + "CGACTGA": 3004, + "TAAGCTG": 3005, + "TAAACACA": 3006, + "GTTTCTC": 3007, + "CATCTTA": 3008, + "GAAATTTG": 3009, + "TAATGGG": 3010, + "TAAAATTTT": 3011, + "CTGTTCA": 3012, + "CCTGTTA": 3013, + "TACTGAA": 3014, + "TGACCCA": 3015, + "TGATTTTA": 3016, + "CTCCTTA": 3017, + "TATAGAA": 3018, + "CTGCGG": 3019, + "GCGGTA": 3020, + "GTGCTAA": 3021, + "CAGAGGAA": 3022, + "TACATCA": 3023, + "TCAATCAA": 3024, + "CTGCAGCC": 3025, + "TGAATATT": 3026, + "TCTACAA": 3027, + "CCACATA": 3028, + "CCCGTT": 3029, + "TATACACA": 3030, + "TCCTCTC": 3031, + "TCTACTT": 3032, + "CCGGAA": 3033, + "CTTTTTTA": 3034, + "GAAAGAAAA": 3035, + "CTATCTT": 3036, + "GACTTTG": 3037, + "TGAACAA": 3038, + "GCAGTTTT": 3039, + "GCTAAAAA": 3040, + "GAGGCGG": 3041, + "TAATAAAAA": 3042, + "CTGGTCA": 3043, + "CAGACAA": 3044, + "GGATATG": 3045, + "TGAAGG": 3046, + "GCCAGAA": 3047, + "CCAGGCC": 3048, + "CCACCATG": 3049, + "CAAACTT": 3050, + "TCATGTA": 3051, + "GCTGCTT": 3052, + "GTAATA": 3053, + "CCCCCAA": 3054, + "CAGCCTG": 3055, + "TCAACTT": 3056, + "TAAAATTAA": 3057, + "GCTGAAAA": 3058, + "CGACGA": 3059, + "GTGGGCA": 3060, + "TGAGGGA": 3061, + "CGCTCC": 3062, + "TTTTGTTTT": 3063, + "GAGTCAA": 3064, + "TCATGCA": 3065, + "CTGCTTA": 3066, + "TAAGTTTT": 3067, + "GTAGCAA": 3068, + "CCTTGG": 3069, + "TGACAAAA": 3070, + "CTGGTAA": 3071, + "TCTTTATA": 3072, + "TGTGTGTT": 3073, + "CTGGTC": 3074, + "CTGGCAA": 3075, + "CATTTCTG": 3076, + "CTCTACC": 3077, + "CTGAGGA": 3078, + "CTAAAATG": 3079, + "CTAGATT": 3080, + "GTATCAA": 3081, + "CAGTCAA": 3082, + "CTGGGTG": 3083, + "CCTCTTA": 3084, + "TGAGTTTT": 3085, + "TTTTATTTA": 3086, + "CCTTTTTT": 3087, + "TATATACA": 3088, + "TAGCAAA": 3089, + "AAATTA": 3090, + "CTGGATG": 3091, + "GATAATA": 3092, + "GACAAAAA": 3093, + "CCTGGGA": 3094, + "GCTTTCA": 3095, + "GTACAGG": 3096, + "GCTGGAA": 3097, + "CTACTCA": 3098, + "CAATGTA": 3099, + "GCGTGAA": 3100, + "GATCCTT": 3101, + "TATTAATG": 3102, + "GCCCGA": 3103, + "TAAAGTG": 3104, + "GCTTCCA": 3105, + "CATGGAA": 3106, + "TGAAGTT": 3107, + "CTTTCTC": 3108, + "TCTGTGTG": 3109, + "GTATGTA": 3110, + "CAATACA": 3111, + "TCAAGG": 3112, + "CCTCTAA": 3113, + "TGTGGG": 3114, + "GATCTGA": 3115, + "GTACTGA": 3116, + "TTAATTAA": 3117, + "GCAGAAAA": 3118, + "CTACATA": 3119, + "CCGGTG": 3120, + "GGGGAAAA": 3121, + "TACAAAAAA": 3122, + "TTTTGG": 3123, + "GTGAGAA": 3124, + "TCAATAAA": 3125, + "TCAAGTT": 3126, + "CTCAGGA": 3127, + "CTACTC": 3128, + "CAAATCA": 3129, + "GGCAGAA": 3130, + "CCCGAA": 3131, + "TGTTGTG": 3132, + "GAGCAAAA": 3133, + "TATTTGTG": 3134, + "GTAGGTT": 3135, + "CTACCTG": 3136, + "CACAAAAA": 3137, + "CTCAGG": 3138, + "GCTTTA": 3139, + "CAGAGCAA": 3140, + "CTCAGTG": 3141, + "GGAAGAGA": 3142, + "TAACCTG": 3143, + "GAAATATA": 3144, + "CGAGAA": 3145, + "GTGAGG": 3146, + "CATTTATA": 3147, + "GGCAGCA": 3148, + "TCTAAATT": 3149, + "CCCAGTG": 3150, + "GCCTAGG": 3151, + "TGCATTA": 3152, + "CCGTAA": 3153, + "CATTCCA": 3154, + "CTAGTTA": 3155, + "GACTTAA": 3156, + "CTATACA": 3157, + "GACACAA": 3158, + "TCTTCACA": 3159, + "CCGGTT": 3160, + "TAAAGTAA": 3161, + "CTGTGGA": 3162, + "TAAGGTG": 3163, + "TCCAGTA": 3164, + "CAAATTTA": 3165, + "AAATTAAAA": 3166, + "CCATCTA": 3167, + "CTCCCTT": 3168, + "CTCCTTTT": 3169, + "GAGAGAGAGAGA": 3170, + "GGAGATA": 3171, + "CCTATTA": 3172, + "CACCAAAA": 3173, + "CCGTTA": 3174, + "TGTTTATA": 3175, + "CTCAGGAGG": 3176, + "GACGTA": 3177, + "GTCCTTA": 3178, + "GAAAGTT": 3179, + "GCTGGTG": 3180, + "CTCTACA": 3181, + "CAATAGA": 3182, + "TAAAATATT": 3183, + "GTACCTG": 3184, + "GTACTAA": 3185, + "CTTTGAAA": 3186, + "CCTTTCC": 3187, + "TAAAAATTA": 3188, + "CTCGG": 3189, + "CAAGATA": 3190, + "CATTTGA": 3191, + "CACCTCA": 3192, + "GCCAGCC": 3193, + "GTCGG": 3194, + "GCACATA": 3195, + "CACTCAA": 3196, + "CTTTTAAAA": 3197, + "CAGGAATT": 3198, + "GCCTATT": 3199, + "TCTTTCTG": 3200, + "CTGAGGCAGGAGAA": 3201, + "CAGGCAGG": 3202, + "CTAGTAA": 3203, + "TCCATA": 3204, + "GAACTTA": 3205, + "CG": 3206, + "GCTGTGA": 3207, + "GAAAATA": 3208, + "TCTTCATT": 3209, + "GAGGGAGA": 3210, + "CCCATCC": 3211, + "GAGGTGGG": 3212, + "GCCTCTA": 3213, + "GTAGGTG": 3214, + "TAAACCA": 3215, + "GAAGGAAA": 3216, + "TATTGG": 3217, + "ATG": 3218, + "TCCAGTT": 3219, + "CCCACAA": 3220, + "GAAACACA": 3221, + "GTCTCAAAA": 3222, + "CTTTTCTTTT": 3223, + "TGAAGGA": 3224, + "TATTGATT": 3225, + "CTATGTA": 3226, + "AAAAAAAAAAAAAA": 3227, + "TCCTTAAA": 3228, + "GCGCTA": 3229, + "TCCACTT": 3230, + "GACTCAA": 3231, + "TAAATACA": 3232, + "TCATGGA": 3233, + "TCTGGGA": 3234, + "TCCTATG": 3235, + "CTGTGCA": 3236, + "TCAAGTGA": 3237, + "TCATAAAA": 3238, + "CATCCAA": 3239, + "CCTTCCA": 3240, + "CTGTACA": 3241, + "GAAGGTT": 3242, + "CTGTGTA": 3243, + "GTCACTT": 3244, + "TCACAAAA": 3245, + "TCAGGCA": 3246, + "GTGTTAAA": 3247, + "CCCTTAA": 3248, + "CAAAGTG": 3249, + "GAAATGTT": 3250, + "CTGGGGA": 3251, + "GACGCC": 3252, + "TATATGTG": 3253, + "CTAGATG": 3254, + "GAAATTAAA": 3255, + "GAATGCA": 3256, + "GCACTAA": 3257, + "CGGGAGG": 3258, + "GCCACAA": 3259, + "CGCTTA": 3260, + "TCCACAA": 3261, + "CAGATA": 3262, + "TCTGAATT": 3263, + "TATTATTTT": 3264, + "GCGCGG": 3265, + "CTCTGAAA": 3266, + "TCTCTTTG": 3267, + "TATTTCTA": 3268, + "GGGGTGGG": 3269, + "GGATGCA": 3270, + "CCACACC": 3271, + "TAAATGTG": 3272, + "TCTTCCTG": 3273, + "GCAAGG": 3274, + "CTGCTCC": 3275, + "CTGGAGTG": 3276, + "CTGTTAAA": 3277, + "CACACAAA": 3278, + "CTGACTT": 3279, + "GAAAAGAAAA": 3280, + "CCTTCTCC": 3281, + "GAAATAAAA": 3282, + "CCTCAGGTGA": 3283, + "GATAATG": 3284, + "GAATTGCTT": 3285, + "CCAAAATT": 3286, + "CGTGAAA": 3287, + "CACTGAAA": 3288, + "CAGTGAAA": 3289, + "GATCTTA": 3290, + "GAGATGGG": 3291, + "TCTGCCA": 3292, + "TGAGGTA": 3293, + "TATGGAA": 3294, + "TATATTTTA": 3295, + "TGAACTT": 3296, + "GCAGATA": 3297, + "CTTTTCTT": 3298, + "GTAAAATG": 3299, + "TCTCTAA": 3300, + "TCTGCAAA": 3301, + "GAGCCTT": 3302, + "TATCATT": 3303, + "CAATTTTA": 3304, + "CCGCCA": 3305, + "TATTTAAAA": 3306, + "GAGAGATG": 3307, + "GAGATGGA": 3308, + "GCCAGGATG": 3309, + "CGAGTAGCTG": 3310, + "TTCATTTT": 3311, + "TATACTT": 3312, + "GTCTACA": 3313, + "GTGAGTGA": 3314, + "GCTACACA": 3315, + "GGGAGGA": 3316, + "CAAGGCA": 3317, + "GCTTTTAA": 3318, + "CACTATT": 3319, + "GTTCATA": 3320, + "TCCTC": 3321, + "GTGGACA": 3322, + "TATTTGGA": 3323, + "CTCCAGTA": 3324, + "GTTCAGTT": 3325, + "CCAAGG": 3326, + "CAGAGCC": 3327, + "CTCGCC": 3328, + "CCGATG": 3329, + "GGAATTTT": 3330, + "TCCAGCC": 3331, + "CCTCTTTT": 3332, + "GAACCTT": 3333, + "CATGCACA": 3334, + "GTTTC": 3335, + "GAAGATA": 3336, + "TACCCC": 3337, + "GCTGCCA": 3338, + "GGGGGAGG": 3339, + "GCAGTGAGCTGA": 3340, + "CTGTCTA": 3341, + "CGAGGA": 3342, + "CAATGGG": 3343, + "GCTGTGAA": 3344, + "GAAAGTG": 3345, + "TACCAAAA": 3346, + "GTCAGG": 3347, + "CAGCTCC": 3348, + "TGTGCTT": 3349, + "GTCTAGG": 3350, + "TTTTTGTA": 3351, + "TTATATG": 3352, + "TCAGGGG": 3353, + "TATTGTTA": 3354, + "CCTGAGA": 3355, + "TATCTCA": 3356, + "CAATCTG": 3357, + "CACTCTG": 3358, + "GATTTAA": 3359, + "TGAATAA": 3360, + "TCTTGTA": 3361, + "TCAACTG": 3362, + "TCTCCAGG": 3363, + "CTAGAGG": 3364, + "CTGAGAAA": 3365, + "CTAGCTG": 3366, + "TCCACCA": 3367, + "CGATTTT": 3368, + "CCGGCC": 3369, + "GTTGACA": 3370, + "CTTAGAA": 3371, + "CATAATG": 3372, + "GAGTATT": 3373, + "CACAGAAA": 3374, + "GACTGTG": 3375, + "CTATTTTA": 3376, + "TGAGGAAA": 3377, + "TTATTAAAA": 3378, + "CTTATTTA": 3379, + "CAGACTT": 3380, + "CACGCC": 3381, + "GCTTGG": 3382, + "CCTGCTT": 3383, + "TAAAGCAA": 3384, + "CCTCGTGA": 3385, + "TAGAATT": 3386, + "CTTACAA": 3387, + "TAAAGGAA": 3388, + "GTCTAGA": 3389, + "GTGACTT": 3390, + "TACATATG": 3391, + "GTCAGGA": 3392, + "GCTCCAGG": 3393, + "GAAGGGA": 3394, + "CATGATG": 3395, + "TCATCAAA": 3396, + "CGTTAAA": 3397, + "GTACTCA": 3398, + "CTCCCAA": 3399, + "TATATGTA": 3400, + "GGTATTTT": 3401, + "TAAGCCA": 3402, + "CGAAATT": 3403, + "GTTTGTTTT": 3404, + "TCTGTCTT": 3405, + "TATATCA": 3406, + "TGTTCATT": 3407, + "CAAACCA": 3408, + "TTCATTA": 3409, + "TATTTGTA": 3410, + "GATTGAA": 3411, + "CTATAAAA": 3412, + "GATTAATT": 3413, + "CCCACCA": 3414, + "TCCTAGG": 3415, + "TAAATGTA": 3416, + "CTCTTAAA": 3417, + "GCAGTCC": 3418, + "GCGGCTG": 3419, + "GTCTCGAA": 3420, + "TGAATGA": 3421, + "CTGGGGG": 3422, + "GTCTCGA": 3423, + "GAACAAAA": 3424, + "TGAATCA": 3425, + "TGTATTTTTAGTAGAGA": 3426, + "GTTATTAA": 3427, + "TTTTTTAAAA": 3428, + "GTCAGTG": 3429, + "CCCATTA": 3430, + "CACAGGA": 3431, + "TATTCCTT": 3432, + "TCTGCCTT": 3433, + "CCTGGTG": 3434, + "GCGAGC": 3435, + "TACTAAA": 3436, + "TACACAAA": 3437, + "CCGTCC": 3438, + "GCTTTGTT": 3439, + "GCATCCA": 3440, + "CATCTAA": 3441, + "GCTGTGTT": 3442, + "GTAGACA": 3443, + "GCCTATG": 3444, + "TCTTTGTG": 3445, + "GATTCTG": 3446, + "CGCCCGG": 3447, + "GATGAGA": 3448, + "TATCTGA": 3449, + "TGAATTTG": 3450, + "CCTGATG": 3451, + "TAAAACAA": 3452, + "CTTTAGG": 3453, + "TTTTCCTT": 3454, + "TGAATAAA": 3455, + "CGGGGA": 3456, + "CAAACATT": 3457, + "GTATGGA": 3458, + "GCTTAAAA": 3459, + "TACCAAA": 3460, + "CAAAGAGA": 3461, + "CTCCTGCC": 3462, + "GTAAAAAAA": 3463, + "CACAGCC": 3464, + "CCATGCA": 3465, + "TACAATT": 3466, + "CTAGTGA": 3467, + "CTGAGTT": 3468, + "GAGTGAAA": 3469, + "TCTGTTTG": 3470, + "CTGTAGG": 3471, + "TATAAAAAA": 3472, + "GCATTAAA": 3473, + "GTCCATA": 3474, + "TGTTAAAAA": 3475, + "TGTTTGA": 3476, + "GAATAGA": 3477, + "CTTCAAAA": 3478, + "CTGGACA": 3479, + "CTGTAGA": 3480, + "CCATTAAA": 3481, + "CTATCTG": 3482, + "CACTATG": 3483, + "TTATCAA": 3484, + "TAAGTAAA": 3485, + "TAATCCCAGCACTTTGGGAGGCC": 3486, + "CCAGAAAA": 3487, + "TGAAGCA": 3488, + "TCCCTTTT": 3489, + "TCATACA": 3490, + "TACGTT": 3491, + "GCCGTG": 3492, + "GGAAGTG": 3493, + "GGCCAAA": 3494, + "GTACCAA": 3495, + "TCTCTACTAAAAATA": 3496, + "CATTGTG": 3497, + "TGTGTGA": 3498, + "GAAACAGA": 3499, + "CTTGACA": 3500, + "GATGAGG": 3501, + "GAGATTTT": 3502, + "CCTTCAA": 3503, + "GAATCTA": 3504, + "CTCTCCTT": 3505, + "GGCGGA": 3506, + "TCTATCTATCTATCTA": 3507, + "CACACAGA": 3508, + "TGTGTGTA": 3509, + "CAAAGCC": 3510, + "TGTGCCA": 3511, + "GTTGAAAA": 3512, + "CTCCAGCA": 3513, + "TCAAGGA": 3514, + "TAGCTCA": 3515, + "CGCTGA": 3516, + "CCTGAAAA": 3517, + "GACTATT": 3518, + "GATTCCA": 3519, + "GCTTCTA": 3520, + "GTCTGCC": 3521, + "CTTGGCA": 3522, + "TGTGGTA": 3523, + "GCTTTGA": 3524, + "GCTCTCTG": 3525, + "CTCACAGA": 3526, + "TCTTTAAA": 3527, + "CAAAGCAA": 3528, + "TACTTAA": 3529, + "GCTTCAA": 3530, + "CATTGAA": 3531, + "GGAGGAAA": 3532, + "CTATAGA": 3533, + "CTGAGGAA": 3534, + "CCTGGCA": 3535, + "CCCTATT": 3536, + "CTCGTG": 3537, + "TTACACA": 3538, + "TTAGGAA": 3539, + "CTGGTTA": 3540, + "GTTGTCC": 3541, + "TAATGAAAA": 3542, + "TATTTACA": 3543, + "GGGAATT": 3544, + "GTAGTTTT": 3545, + "GCTGCAA": 3546, + "CTACGG": 3547, + "GCCGGA": 3548, + "CTGGGCA": 3549, + "CCTTAAAA": 3550, + "GATGGAA": 3551, + "TAGATAGATAGATAGA": 3552, + "TATGTAA": 3553, + "GTACGG": 3554, + "TATTCAAA": 3555, + "GATCTCC": 3556, + "CCTGTTTT": 3557, + "TATTGCA": 3558, + "GGAAGGAAGGAAGGAA": 3559, + "GGTAATT": 3560, + "TTACAGA": 3561, + "TCAGC": 3562, + "GCAAAATG": 3563, + "GAGAGCA": 3564, + "GTAGAAAA": 3565, + "CATTTGAA": 3566, + "TCTTCTTTT": 3567, + "TCCCATA": 3568, + "GTTATTTA": 3569, + "CTATCTA": 3570, + "CATCCTG": 3571, + "TCTTGTG": 3572, + "TTATTATT": 3573, + "CCCGTC": 3574, + "TACTATG": 3575, + "TAAACATA": 3576, + "TAAGGAAA": 3577, + "GCTTGTG": 3578, + "CTCTAAAA": 3579, + "GTTTTAAAA": 3580, + "GACAGGA": 3581, + "TCCTAGA": 3582, + "TCCACCCA": 3583, + "GTTTGAAA": 3584, + "CCATCTCA": 3585, + "CTAAGAA": 3586, + "GTATCTA": 3587, + "GTGAGGA": 3588, + "GCTGGAGG": 3589, + "CCTGTAATCCCAGCTA": 3590, + "GCAACAA": 3591, + "CTTTCAAA": 3592, + "CAAATGTT": 3593, + "CTTGTCC": 3594, + "TCTCAAAAA": 3595, + "TATTTATTA": 3596, + "TAAGGCA": 3597, + "GAGAGGAA": 3598, + "TATGATT": 3599, + "GCATCTA": 3600, + "CGTTATT": 3601, + "GCCTGTA": 3602, + "GTTTCAAA": 3603, + "CCTTCCTTCCTTCCTT": 3604, + "GGCTTTG": 3605, + "GTCAGAA": 3606, + "CATGCATG": 3607, + "GTCATTTA": 3608, + "CTGGAAAA": 3609, + "CTTCGA": 3610, + "CCTATTTT": 3611, + "CCAACAA": 3612, + "TCCATCC": 3613, + "TAAAGTTA": 3614, + "GTCTCTC": 3615, + "TAATCAAA": 3616, + "GATTTTTG": 3617, + "GATTTCTT": 3618, + "GGGCTGA": 3619, + "GCATGTA": 3620, + "CCTGGGTT": 3621, + "GAGACAA": 3622, + "GCTGTCA": 3623, + "TGATAGG": 3624, + "GGAGACC": 3625, + "CCGGCA": 3626, + "TAATCTCA": 3627, + "TGAATTAA": 3628, + "TCTGGTG": 3629, + "GCCTC": 3630, + "GGCGCA": 3631, + "CCAGCTA": 3632, + "CAGTCTG": 3633, + "TGAACTA": 3634, + "GTAAGAA": 3635, + "CCTTTCA": 3636, + "TCCATGA": 3637, + "CAAAGGAA": 3638, + "CTCTC": 3639, + "CTCTCTCA": 3640, + "CTCCAGC": 3641, + "GTAGATA": 3642, + "CCCCCTCC": 3643, + "GGCGCC": 3644, + "TCTGTCC": 3645, + "GACCATT": 3646, + "CTTGAAAA": 3647, + "TTATCC": 3648, + "TACATGTG": 3649, + "CAAATTTG": 3650, + "TTTTGTG": 3651, + "CAGAGTG": 3652, + "GTAATAA": 3653, + "GTGAGTG": 3654, + "TTTTTCC": 3655, + "GGCTCTG": 3656, + "GCCCTAA": 3657, + "GGCTGTT": 3658, + "CCCAATT": 3659, + "CAGAGCTT": 3660, + "TATAAATG": 3661, + "GAGTCTG": 3662, + "TCTTAAAAA": 3663, + "GTTTTATG": 3664, + "GATCCAA": 3665, + "GGCCCTG": 3666, + "GATCCTG": 3667, + "TCAAGTG": 3668, + "GATTCAA": 3669, + "CCTCTCTT": 3670, + "GAGACGG": 3671, + "CAGATCA": 3672, + "TAAAAGAA": 3673, + "CTGAGCAA": 3674, + "CCTGCCA": 3675, + "CCTTCTA": 3676, + "CGCTCA": 3677, + "GGCTGTG": 3678, + "TGGGAAAA": 3679, + "GGAGCCTG": 3680, + "CTGAGTG": 3681, + "CGTCAAA": 3682, + "TCAAGTA": 3683, + "CGTAATT": 3684, + "TTACTTA": 3685, + "TATACTA": 3686, + "GGGCAAA": 3687, + "CAACTTTT": 3688, + "CTTTGCC": 3689, + "GCCAGGAA": 3690, + "CACACTA": 3691, + "GCCCAGC": 3692, + "TAAATAAATAAATAAA": 3693, + "CTTTCCTT": 3694, + "GGGAGAA": 3695, + "TATGGTA": 3696, + "CGGCCA": 3697, + "CCTCTCTG": 3698, + "GAAAGCAA": 3699, + "CAAGCCA": 3700, + "GGCGTT": 3701, + "CTCTTTTA": 3702, + "TCGGCCTCCCAAA": 3703, + "GATTTATT": 3704, + "CAAGTCC": 3705, + "TATCTTA": 3706, + "GTTCAAGACCA": 3707, + "CTCACACA": 3708, + "GAAATCAA": 3709, + "TGAGACC": 3710, + "GGGTAAA": 3711, + "GCTTGTT": 3712, + "GATTTTAA": 3713, + "TTTTTATA": 3714, + "CAGAGCTG": 3715, + "TCTGTTAA": 3716, + "GTAATTAA": 3717, + "TCTTTGAA": 3718, + "CTTGCCA": 3719, + "TTTTCATT": 3720, + "CCATGTA": 3721, + "TCTCGGCTCACTGCAA": 3722, + "GGATTCA": 3723, + "TCTATTAA": 3724, + "TACATAAA": 3725, + "GATTGATT": 3726, + "GGAGAGGA": 3727, + "CGCAAAA": 3728, + "GGACTAA": 3729, + "TTATGTG": 3730, + "GTCACTCA": 3731, + "GACAGCA": 3732, + "CGAGTT": 3733, + "GATGGTT": 3734, + "GGAAGAGG": 3735, + "GCCAACATGGTGAAA": 3736, + "GGAGCCA": 3737, + "TGAACTG": 3738, + "CCTCTGTG": 3739, + "GTATAAAA": 3740, + "TCCCAGAA": 3741, + "CATTTATG": 3742, + "GATTATG": 3743, + "TGTTTCTG": 3744, + "GAGTGGGTT": 3745, + "TACATATT": 3746, + "CTCCAGGA": 3747, + "GACACTG": 3748, + "GGTCTCA": 3749, + "CCGGGA": 3750, + "TGTTTAAA": 3751, + "CTCACCA": 3752, + "GGACTTA": 3753, + "GCCCACC": 3754, + "CAAATCAA": 3755, + "GAAATGTG": 3756, + "TAGTTAA": 3757, + "TCTATAA": 3758, + "TTAGATT": 3759, + "GTGTAGG": 3760, + "TACTGAAA": 3761, + "GCACCCA": 3762, + "GTGGGCTG": 3763, + "GAATGAAA": 3764, + "TCTAGTT": 3765, + "TCAGGAGA": 3766, + "TCCACTA": 3767, + "CTCAGTT": 3768, + "TACTTAAA": 3769, + "GACTCCA": 3770, + "TCCATTTG": 3771, + "CACAGCAA": 3772, + "GCTCATGCCTG": 3773, + "GGTGCTG": 3774, + "GCTTTCTT": 3775, + "GTGGCCA": 3776, + "TACGTG": 3777, + "GTGCAGTG": 3778, + "TGAAGTCA": 3779, + "CCTTTAA": 3780, + "TCTCAGCTCACTGCAA": 3781, + "GAAATATG": 3782, + "CCTCAAAA": 3783, + "GGGGCGG": 3784, + "CGACAA": 3785, + "GGTGATG": 3786, + "GTCTTAAA": 3787, + "CAGAAATG": 3788, + "CGTCATT": 3789, + "CCAAGCA": 3790, + "GGATCAA": 3791, + "GTGCTGGGATTA": 3792, + "GCTGGCC": 3793, + "CGGAGCTT": 3794, + "TACATGA": 3795, + "TGTTTGAA": 3796, + "TCTCCATT": 3797, + "TAAGCAAA": 3798, + "CCTTTCTT": 3799, + "TACTGTT": 3800, + "TCCATCTT": 3801, + "CTTACTT": 3802, + "CGGAGGTT": 3803, + "CAAAACAA": 3804, + "TCATAGG": 3805, + "TTACTAA": 3806, + "CTTATTTG": 3807, + "GAATGTA": 3808, + "CCCCATGGA": 3809, + "TTACTGA": 3810, + "CGGAAAA": 3811, + "CTCCAGTG": 3812, + "TGTTCCA": 3813, + "CAGATGAA": 3814, + "GTTGATA": 3815, + "TCCCCCC": 3816, + "CATTGCA": 3817, + "CTCAGCC": 3818, + "CTTACTG": 3819, + "TATCCTT": 3820, + "CTTTTATG": 3821, + "TGAGTAGCTG": 3822, + "GACTGAAA": 3823, + "CAATGAAA": 3824, + "CGACTG": 3825, + "CTTGGGA": 3826, + "GCAAGCA": 3827, + "TCACTCC": 3828, + "GATTTGA": 3829, + "CATTTTAAA": 3830, + "TCAACTA": 3831, + "GTCCAAAA": 3832, + "CACCCTG": 3833, + "TTACCTT": 3834, + "CAAGGGG": 3835, + "TTTTGGA": 3836, + "GTTATTTG": 3837, + "GCTACTG": 3838, + "CTGAGGCAGGAGAATG": 3839, + "GTGATGA": 3840, + "GTAGTC": 3841, + "TAGTATG": 3842, + "GTATAGA": 3843, + "GTGTCTA": 3844, + "GCTGCTA": 3845, + "TTAGTAA": 3846, + "TAAACATG": 3847, + "GTCACCA": 3848, + "CATCTTTT": 3849, + "CATATAA": 3850, + "TCTCTCTA": 3851, + "TTTTATTAA": 3852, + "TATTCTAA": 3853, + "GAAATTTA": 3854, + "CTTCCCTG": 3855, + "TAAAGATG": 3856, + "TACGTA": 3857, + "GTTTATTA": 3858, + "GAAAAGAA": 3859, + "CCCACCCA": 3860, + "CAATTAAAA": 3861, + "CCGACA": 3862, + "CAAAGTGA": 3863, + "CAAACAAAA": 3864, + "GCAATTTT": 3865, + "CGATTAA": 3866, + "TTAGAGA": 3867, + "CTGATGA": 3868, + "GGAGGAGG": 3869, + "GTCCTGGG": 3870, + "TCATGAAA": 3871, + "GCAACCA": 3872, + "GTTGGCA": 3873, + "GCGGCGG": 3874, + "GTCCCCA": 3875, + "GTAGGGG": 3876, + "GCCATGTT": 3877, + "GTTCGAGA": 3878, + "GCCTATA": 3879, + "TAAATTCA": 3880, + "GGCCATT": 3881, + "GAAAACAA": 3882, + "TGTGTATG": 3883, + "GTACTC": 3884, + "TAGGGAA": 3885, + "CCTTGAA": 3886, + "TCTATTTG": 3887, + "GAGGGCA": 3888, + "GAAACTGA": 3889, + "TACGC": 3890, + "TACAAAAA": 3891, + "TCATTATT": 3892, + "GGAAAATT": 3893, + "TCAATATT": 3894, + "CCCGTA": 3895, + "GGAGAGAA": 3896, + "TTAGTTA": 3897, + "CTCAGAGA": 3898, + "TCGAGC": 3899, + "CTAGTCA": 3900, + "GATGGCA": 3901, + "TGAACATT": 3902, + "CTATGGG": 3903, + "CACACCA": 3904, + "TCAATTAA": 3905, + "GGAACTG": 3906, + "TTACATG": 3907, + "CTTTCATT": 3908, + "CAGCTCTG": 3909, + "TCTTTTTTTT": 3910, + "TAAATCTT": 3911, + "TGATCTA": 3912, + "CATACAA": 3913, + "GCTCAAAA": 3914, + "GCTGTGTG": 3915, + "TCAATCA": 3916, + "GATTTGAA": 3917, + "CCAAGGA": 3918, + "GTCCTCA": 3919, + "GTGCTCC": 3920, + "AAAATAA": 3921, + "GTGACAA": 3922, + "GCTCACGCCTG": 3923, + "CGACGG": 3924, + "TATCCAA": 3925, + "CACACATG": 3926, + "TCTCTCTCC": 3927, + "TGTGGTT": 3928, + "CTTGGTA": 3929, + "TCTGGTT": 3930, + "TTTATAA": 3931, + "CTGCTTTT": 3932, + "TGTGTCA": 3933, + "CACATCA": 3934, + "CCTAATG": 3935, + "CGTTTTTT": 3936, + "GCTGGCA": 3937, + "GACGTC": 3938, + "TATAATTA": 3939, + "TACAGTAA": 3940, + "GAAAGTAA": 3941, + "GTCTGAAA": 3942, + "CCCATTTT": 3943, + "TATATGA": 3944, + "CTTGATA": 3945, + "CTTTATTTT": 3946, + "CTTTATTA": 3947, + "GGCGAA": 3948, + "CCATGCC": 3949, + "CCTGCCTT": 3950, + "GAAGAAGAAGAA": 3951, + "CTGACTGA": 3952, + "GCCCTTA": 3953, + "TATCTAA": 3954, + "GTGTTTTA": 3955, + "TGTGGCA": 3956, + "TATTGTAA": 3957, + "GCCAGAAA": 3958, + "CCCTGTCTC": 3959, + "CACAGGAA": 3960, + "AAAACAA": 3961, + "AAAAAAAAAAAAAAA": 3962, + "TAACTCC": 3963, + "GCCTAAA": 3964, + "CGAGTA": 3965, + "TAGTATT": 3966, + "GTATTTTTAGTAGAGA": 3967, + "GCTGCAGG": 3968, + "TATTGAAA": 3969, + "CCAGCCTGGG": 3970, + "GCTCCAAA": 3971, + "TACGAA": 3972, + "GGCCTCC": 3973, + "TATACAAA": 3974, + "CATGGCA": 3975, + "CATGCAA": 3976, + "TACACCA": 3977, + "CTTTACCA": 3978, + "TACAGAGA": 3979, + "TATTCTTA": 3980, + "TATGTCA": 3981, + "TCAAGCA": 3982, + "TCAATGA": 3983, + "GGCTCTT": 3984, + "GGAAGTT": 3985, + "TCCATGTT": 3986, + "GCTTTCC": 3987, + "TATGTGA": 3988, + "GTGTAGA": 3989, + "TTTTTAAAA": 3990, + "GCTGGAGA": 3991, + "GTGAGAGA": 3992, + "CCTAGAA": 3993, + "CCTCCAAA": 3994, + "CCAATGA": 3995, + "CAGGGCA": 3996, + "CTATGCA": 3997, + "CTTCACC": 3998, + "CTACAAAA": 3999, + "CTCACC": 4000, + "GAGTATG": 4001, + "TAGAAAAA": 4002, + "CTTTTGAA": 4003, + "TAAAGAGA": 4004, + "CATGTCA": 4005, + "TCTTTTAAA": 4006, + "CACAGTGA": 4007, + "GATCTAA": 4008, + "TAAGGTA": 4009, + "CATAGAA": 4010, + "CGCGCC": 4011, + "CAGCTTA": 4012, + "TATAGTT": 4013, + "CGGGCC": 4014, + "TATCCATT": 4015, + "TGTTTGTTTT": 4016, + "GCTGGCTG": 4017, + "TACAGGA": 4018, + "CTCCTTTG": 4019, + "CAATCTA": 4020, + "CCCCCTG": 4021, + "TATACTG": 4022, + "CTGAGCC": 4023, + "CGGTTA": 4024, + "TGAAGTG": 4025, + "GCTTCCTT": 4026, + "TTTTATTTG": 4027, + "TAGTGAA": 4028, + "CTGAGGTG": 4029, + "TCTTCTC": 4030, + "GACAGAAA": 4031, + "CTGAACTGAA": 4032, + "CCTGGGAA": 4033, + "TCCCCAAA": 4034, + "TATGTATT": 4035, + "GATTTCTG": 4036, + "CATTCAAA": 4037, + "CACAGTT": 4038, + "GCTTGAA": 4039, + "GTGGATCA": 4040, + "CTGAGTGA": 4041, + "TGAATTTA": 4042, + "TCAACAAA": 4043, + "GGTCATT": 4044, + "GTAATTTA": 4045, + "GCGACTT": 4046, + "CTGAGAGA": 4047, + "GTGCCCA": 4048, + "CTAGGTT": 4049, + "TCCTGAAA": 4050, + "GTCCACC": 4051, + "TCACAGAA": 4052, + "GCGAAAA": 4053, + "GTATGGG": 4054, + "TGAACAAA": 4055, + "TAAACAAAA": 4056, + "CCGTTTT": 4057, + "TCTCAATT": 4058, + "TCCAGAAA": 4059, + "GTAACAA": 4060, + "GCATTTTA": 4061, + "TCTCCATG": 4062, + "TTATAAAA": 4063, + "CAGGCAA": 4064, + "CTAAAAAAA": 4065, + "GTTGGGA": 4066, + "TAAAGATT": 4067, + "TGAAGAGA": 4068, + "CCCCTCA": 4069, + "TGTTTATG": 4070, + "TCTACTG": 4071, + "CCAATTTT": 4072, + "GGTGGTG": 4073, + "GGAACAA": 4074, + "TGTGGGA": 4075, + "TCTGCTA": 4076, + "GAACGA": 4077, + "GTAAGTA": 4078, + "GTTGCCA": 4079, + "AAAATTTT": 4080, + "GCGCGA": 4081, + "GAAAGATG": 4082, + "GTCTCTCA": 4083, + "TCCATCAA": 4084, + "GCAGCTA": 4085, + "CACATTTG": 4086, + "CTGACAA": 4087, + "TCCACC": 4088, + "GCT": 4089, + "CCCACTT": 4090, + "GCAGGTA": 4091, + "GAGGCCA": 4092, + "TAAAGTCA": 4093, + "CTGGATA": 4094, + "CGGCAA": 4095 + }, + "merges": [ + "A A", + "T T", + "T G", + "C A", + "C C", + "T A", + "G G", + "T C", + "G A", + "AA A", + "G C", + "T AA", + "TT TT", + "T CA", + "TG A", + "TT A", + "G AA", + "T CC", + "C AA", + "C TG", + "C TT", + "G TG", + "G TT", + "G CA", + "GG A", + "C CA", + "G TA", + "G CC", + "C TA", + "T AAA", + "AA AA", + "C TC", + "G TC", + "TG TG", + "TA TT", + "CA CA", + "G AAA", + "TA TA", + "TC TT", + "TG TT", + "C AAA", + "GA GA", + "CA TT", + "TG AA", + "CA GG", + "TC TG", + "CA GA", + "TC AA", + "GG AA", + "TAA AA", + "C TGA", + "GC TT", + "G TGA", + "GC TG", + "C TCA", + "CC TT", + "CA TG", + "GC AA", + "G TCA", + "G TAA", + "TTTT A", + "TA TG", + "GA GG", + "C GG", + "GA TT", + "CC TG", + "TC TC", + "CC AA", + "G TTA", + "C TCC", + "C TAA", + "TA CA", + "C TTA", + "TC CA", + "GA TG", + "TT AA", + "GAA AA", + "TT TG", + "G TTTT", + "TC TA", + "GC CA", + "G TCC", + "C TTTT", + "GG GG", + "C GA", + "TT TA", + "CC CA", + "CAA AA", + "TG GG", + "TA GA", + "TA GG", + "GA CA", + "GG TT", + "CC CC", + "GG TG", + "CA TA", + "GC TA", + "TG TA", + "TC AAA", + "TG GA", + "TAA TT", + "TTA TT", + "TG CA", + "GG CA", + "GA TA", + "CC TA", + "TT CA", + "TC TCA", + "GG GA", + "C GC", + "CTG AA", + "G TAAA", + "TC TCC", + "TTTT TT", + "C GTG", + "GC AAA", + "TAA AAA", + "TC TGA", + "TCA TT", + "GG AAA", + "TG AAA", + "TCC TT", + "CC AAA", + "GAA TT", + "C TAAA", + "C GTT", + "GTG AA", + "GG CC", + "TAA TA", + "GG TA", + "TG CC", + "CA CC", + "TGA TT", + "AAAA AA", + "GC TCA", + "TCC AA", + "GA GAA", + "CTG TT", + "TA TTA", + "CA GCA", + "CTC TT", + "CTT AA", + "CA GAA", + "GC TGA", + "GTT AA", + "TC TTA", + "TA TTTT", + "GCC AA", + "CTT TG", + "GA CC", + "C GCA", + "GTA TT", + "GTC TT", + "CAA TT", + "GTG TT", + "CTC AA", + "GGA GG", + "C GAA", + "TC TTTT", + "GTC AA", + "C GCC", + "TA TAA", + "TA CC", + "TC TAA", + "CCA TT", + "C GGA", + "CAA AAA", + "CA GTG", + "TCC TG", + "CTC TG", + "GAA AAA", + "CTG TG", + "CA GC", + "TTTT AA", + "GCA TT", + "GCC TT", + "TAA TG", + "CTA TT", + "GTT TG", + "TGA TG", + "GG CTG", + "CC TCA", + "GA GGA", + "GCC TG", + "AAA TT", + "C GTA", + "TC AAAA", + "TA CAA", + "CA TCA", + "CA GTT", + "TGA GA", + "GG GAA", + "CA CTG", + "CA CAA", + "CA GGA", + "CC CCA", + "CC CTG", + "TTTT TTTT", + "TA GAA", + "GA GCA", + "CC TCC", + "CA CCA", + "TA TCA", + "GA GC", + "CA TTA", + "CACA CACA", + "GA GTG", + "GGA TT", + "TGTG TGTG", + "TA CTT", + "CA CTT", + "GTC TG", + "TGA GG", + "GA GTT", + "GAA TG", + "TCA TG", + "GA CAA", + "GA CTT", + "TATT AA", + "TAA TAA", + "GG CCA", + "CA TTTT", + "CA GCC", + "CC CTT", + "GC TAA", + "TATA TATA", + "GTG TG", + "TA CTG", + "TA GTT", + "CAA TG", + "GC TC", + "CA GTA", + "GC TCC", + "CA TAA", + "TTA TG", + "TAAA TT", + "GA TGA", + "CA TGA", + "GC GG", + "AAAA AAAA", + "CCA TG", + "GA TAA", + "GA CTG", + "TA TGA", + "GCA GG", + "GA TCA", + "G TTTTA", + "GGA TG", + "CC TGA", + "G TAAAA", + "GAA GG", + "GA TTA", + "CC TC", + "GA CCA", + "GC TTA", + "CC CAA", + "AAA TG", + "GCA TG", + "TA GTA", + "TA CCA", + "GG CTT", + "C GTC", + "TC TCTT", + "GG TCA", + "TTA TTA", + "TA CTA", + "TA GCA", + "TA TC", + "CTG GG", + "CA TC", + "C TTTTA", + "C TAAAA", + "GTG GG", + "GA GTA", + "CCA GG", + "GA TTTT", + "TA GTG", + "GAAA TT", + "CA CTA", + "TC GG", + "TCA GG", + "CAGG AA", + "GC AAAA", + "CC TTA", + "CA TCC", + "CTT GG", + "TGTG AA", + "TATT TG", + "CC TAA", + "CTA TG", + "GA GAAA", + "GAGA GAGA", + "GC TTTT", + "TA TAAA", + "CAA GG", + "TC TCTG", + "TGTT AA", + "TGTG TT", + "GA GCC", + "GA CTA", + "TA TATT", + "TAA AAAA", + "TTTT TG", + "GTA TG", + "CATT AA", + "TA GGA", + "TA GC", + "GTT GG", + "GAA GAA", + "TAAA TG", + "TC TGTT", + "CA GAAA", + "CAAA TT", + "TAA TTA", + "TC TGTG", + "TA TCC", + "TGAA TT", + "CTC CA", + "GTG AAA", + "GG CAA", + "GGA GA", + "GAA GA", + "GG TGA", + "GG GCA", + "CC AAAA", + "TCTC TCTC", + "CTG CA", + "CTT CTT", + "TCTT AA", + "CC CTA", + "TGTG TG", + "AAA TA", + "TGTT TG", + "GG GTT", + "GTG CTG", + "GG AAAA", + "GG GGA", + "TCA GA", + "CC TTTT", + "GAAA TG", + "GCA GCA", + "TC TGAA", + "GG GTG", + "CACA TT", + "TCTT TG", + "GG GC", + "TCC CA", + "TC CATT", + "CTG AAA", + "CTT TA", + "TC GA", + "GTT TA", + "CAA CAA", + "CTT CC", + "GCC TCC", + "TT AAA", + "GC TCTG", + "GTT TCA", + "GGA GGA", + "C GTGA", + "CA GTC", + "GAA TA", + "CA GAGA", + "CC CTC", + "CAAA TG", + "CTG CTG", + "GA TCC", + "TTTTA TT", + "AAAA TT", + "TTA TA", + "TCAA TT", + "GG TAA", + "GTTA TT", + "GC CAGG", + "GGA GAA", + "CATT TG", + "TCA CC", + "CTC AAA", + "GG TTA", + "TCC AAA", + "TC TATT", + "GCA GA", + "CTT CA", + "TCA TCA", + "C GAGG", + "TAA CA", + "GTT GTT", + "CTTA TT", + "C GTCA", + "TAA GA", + "TAA TTTT", + "CTG TA", + "TC CACA", + "GC TGTG", + "C GCTG", + "TC TAAA", + "GC GA", + "CAA TA", + "CCA CCA", + "GAA CA", + "C GAAA", + "CAGA TT", + "TCA CA", + "TTA TTTT", + "TC TCAA", + "TGA CA", + "CTCC AA", + "AAAA AAA", + "TATA TG", + "TCC TCC", + "TCA CTT", + "TC CAGG", + "CAA GA", + "GG CTA", + "GTG GTG", + "C GTAA", + "C GAGA", + "TGA TA", + "GGA TTA", + "CAA CA", + "C GATT", + "TGA GAA", + "CTCC TT", + "CTCA TT", + "GTT AAA", + "TCA TA", + "CC TCTG", + "CTC TA", + "GC TGAA", + "CTG GA", + "TAA GG", + "CTT AAA", + "TATT TA", + "CCA CA", + "CC GG", + "GTC AAA", + "TG GAA", + "C GGAA", + "TGA TGA", + "GTT CA", + "TAA CAA", + "GC TGTT", + "TAA GAA", + "CTG CC", + "TTAA TT", + "CCA GA", + "TCA GAA", + "GTCA TT", + "C GCTT", + "GATT AA", + "CTGA TT", + "GC CACA", + "GTAA TT", + "TC CAGA", + "GCC AAA", + "GTGA TT", + "TAAAA TT", + "CAA GAA", + "CCA CC", + "TAA TCC", + "GTT CTT", + "TC CATG", + "GC TCTT", + "TG CTG", + "GG GTA", + "TTA CA", + "GC CATT", + "GCA CA", + "GCAA TT", + "TCC CTG", + "TG TGA", + "TC GAA", + "GGA CA", + "GGAA TT", + "GTG GA", + "CTT CTG", + "TCC CC", + "GCC CC", + "CTT GA", + "TAA TGA", + "TAAA TA", + "TATA TA", + "CTG CAA", + "TCA TTA", + "GTA TA", + "TCC CCA", + "C GTTA", + "GCA GAA", + "TGA GTT", + "CTTTT TT", + "C GATG", + "CTT TCA", + "AAAA TG", + "CAGG TT", + "CTAA TT", + "C GCCA", + "TGAA AAA", + "GTT CC", + "GTCC TT", + "GTCC AA", + "GTTTT TT", + "CTC TGA", + "GC GC", + "GTT GA", + "TGAA TG", + "CTA TA", + "GCA GTG", + "CCTT AA", + "TCA CCA", + "TCA CTG", + "GCC CTG", + "TAA CTT", + "CAGA TG", + "GTA GG", + "TC TATA", + "GAGA TT", + "GTC TA", + "TTTT AAA", + "CACA TG", + "TGA CC", + "CA CAAA", + "GTG TA", + "GG GAGG", + "GCTT TG", + "CAA AAAA", + "GA GGAA", + "GTT CTG", + "TTTT TA", + "GTC TCA", + "GTT CAA", + "TC GTG", + "GCTT AA", + "GCA CC", + "CTCC TG", + "TAAA TAAA", + "CTA CA", + "CTT CCA", + "TCC TCA", + "C GCAA", + "GAA AAAA", + "GCC CA", + "TC GTT", + "GTA GA", + "CTC TCA", + "GTC CA", + "TGA CTT", + "TCC CTT", + "GC CATG", + "CACACACA CACACACA", + "GTGA TG", + "CC TCTT", + "GC CAGA", + "TCC TA", + "C GTTTT", + "GTA CA", + "GCA TA", + "GAA TTA", + "TGTGTGTG TGTGTGTG", + "CC CAGG", + "GG TTTT", + "TCAA AAA", + "TC TATG", + "CCA TA", + "TGA CAA", + "GGA TA", + "TCA GTG", + "GTA TTTT", + "GAGA TG", + "GC GTG", + "C GTCC", + "TTAA AAA", + "TAA TCA", + "CAA TTA", + "CCA CTG", + "CGG TT", + "GTT GAA", + "TGA TTA", + "CCTT TG", + "CGG TG", + "CAGG TG", + "TCAA TG", + "CTGA TG", + "TCA GGA", + "GTT TAA", + "TATT AAA", + "CTC TTA", + "GCA GGA", + "CTC TCC", + "GAA CC", + "CTT TAA", + "GG GCC", + "GTA TTA", + "GC GCC", + "CCAA TT", + "GC TAAA", + "TGA CTG", + "GATT TG", + "GA TAAA", + "TCA GCA", + "GTT CCA", + "GAAA TA", + "GA CAAA", + "GA GTC", + "GC TATT", + "TCA CAA", + "GAGG TT", + "TAA CC", + "GAA GGA", + "GC TCAA", + "GAAAA TT", + "CCA GCA", + "GTTTT AA", + "GTG CC", + "TGA GGA", + "CA TAAA", + "GG TCC", + "TCA TTTT", + "TATT TATT", + "TAA TAAA", + "GCC TA", + "CTTTT AA", + "TAA GTG", + "TAA GTA", + "CTG GAA", + "CACA CA", + "GA CAGA", + "CAA CC", + "GG GAAA", + "CCA GAA", + "TCA GTT", + "TAA CTA", + "CTAA AAA", + "TGGG TT", + "TGA GTG", + "TAAAA TG", + "TATATATA TATATATA", + "GCA CTG", + "GA CTC", + "TA CAAA", + "TAAAA AAA", + "TC TACA", + "GTT GTG", + "TC GCC", + "CC CAAA", + "GTCA TG", + "CTG CTT", + "GGAA TG", + "CTA TTA", + "GA TATT", + "TA GAAA", + "GG CAGG", + "GA TGAA", + "GTA GAA", + "TCC TGA", + "TAA CTG", + "GCTG GG", + "GCAA TG", + "GCC CCA", + "GTT TGA", + "CATT TA", + "GTG CA", + "CTT GAA", + "GTG GAA", + "CTT CAA", + "TAAA TTA", + "GTG GCA", + "TCC TTA", + "GGAA AAA", + "TTTT TTA", + "CC TGTG", + "GTAA TG", + "GTG TTA", + "CTA GG", + "CAGG CTG", + "GA CACA", + "GAAAA AAA", + "TC GC", + "GTAA AAA", + "TGTT TA", + "TCTC TA", + "GTCC TG", + "CCA GGA", + "GAA CAA", + "TAA GTT", + "TGA GCA", + "GC TCCA", + "TAA GCA", + "CTCA TG", + "GTC TTA", + "CC CACA", + "CA TATT", + "GCC TCA", + "CA CTC", + "CTT CTA", + "TGA TTTT", + "TC GCA", + "CC TGTT", + "GAA GCA", + "GCAA AAA", + "GC GGA", + "CCA CAA", + "GC GCA", + "CA TATA", + "GA CATT", + "GTT CTA", + "CAAAA TT", + "GAAA GAAA", + "CC CGG", + "TA CACA", + "CCAA AAA", + "GAGG TG", + "GG CTCA", + "CA GTGA", + "TCC CAA", + "TA TCTT", + "TGA GTA", + "TC GTA", + "TTTT CTT", + "GTG GGA", + "GA GCTG", + "CC CTCC", + "TAGG TT", + "TTA GG", + "TAA TATT", + "CCA GCC", + "CA TCTT", + "GTC TGA", + "GTT TCC", + "CC TGAA", + "GGA GCA", + "GAAAA TG", + "TCA GTA", + "TAA CCA", + "GA TGTT", + "CTG TTA", + "CA TGTT", + "GG CGG", + "CA TGTG", + "GG GAGA", + "CTT TGA", + "TCTT TCTT", + "AAAAAA AAA", + "GGGG TG", + "CTT TCC", + "CTT GTT", + "GCA TTA", + "CC CAGA", + "CAAA TA", + "TC GGA", + "CA GCTT", + "TCA CTA", + "TAA TTAA", + "TAA GGA", + "GAA CTG", + "GCA CAA", + "GC GTT", + "GG CTC", + "TC TTTTA", + "CC TCCA", + "GG CAAA", + "CA GCTG", + "CTA CAA", + "TA CATT", + "GC TATG", + "CTT GTG", + "GA GTCA", + "GTTA TG", + "CTG CCA", + "GTC TCC", + "TGA CCA", + "CA CCTG", + "TATA TTA", + "TGA TCA", + "CA GCAA", + "GA TGTG", + "GTC TTTT", + "CTA GAA", + "GC TACA", + "CTG GGA", + "GGGG TT", + "CAA GTA", + "CAA GGA", + "CC CTCA", + "TA GCC", + "GTT GGA", + "GC TATA", + "TCTG AAA", + "TA TGTT", + "CC CCTT", + "GTT GTA", + "CC CTGA", + "TGA CTA", + "CAA GCA", + "CAA TAA", + "GAA CTT", + "CA TGAA", + "CTTA TG", + "CTAA TG", + "TC TAAAA", + "CCAA TG", + "GAA GTG", + "CC TCAA", + "CC CATT", + "CA GTCA", + "GAGAGAGA GAGAGAGA", + "TA TGTG", + "GCA GTGA", + "TCTCC TT", + "TCC CAAA", + "CCA TTA", + "CCA GTG", + "GCA TCA", + "TCAAA TT", + "GA TCTT", + "GA CAGG", + "GGA GTG", + "GTA GTA", + "CAA CTT", + "GAA GTT", + "CC CCTG", + "TCTC AAA", + "GG GTC", + "GA GCTT", + "TATG AAA", + "TA TGAA", + "GA CATG", + "CAA GTG", + "GA TATA", + "CA TCTG", + "CTG TGA", + "TAA TTTA", + "GG CAGA", + "GC GAA", + "CC TAAA", + "CCA TCA", + "CA CTGA", + "GGA CTA", + "GA CGG", + "CTC TTTT", + "CTG TCA", + "TCTCTCTC TCTCTCTC", + "TTAA TG", + "GCA GCC", + "CAAAA AAA", + "GCA CCA", + "CTA TTTT", + "GA GCAA", + "CTT GGA", + "CTG GTG", + "GAA TAA", + "TCC TTTT", + "GAA GTA", + "CA GTAA", + "CAA CCA", + "CTG TAA", + "TGA TAA", + "GCA GTT", + "CA CGG", + "TAAA TAA", + "CTG TTTT", + "CTA CTA", + "GC TCTA", + "C GAAAA", + "CAA GTT", + "CTT GTA", + "GAA TGA", + "GA GTGA", + "GCC TGA", + "GG TTTG", + "CC CATG", + "GG GGAA", + "GAA GAAA", + "TG TTA", + "CAA TTTT", + "TATA TTTT", + "CTC AAAA", + "GG TGGG", + "CC GTG", + "TATT TCA", + "CC CCAA", + "TATT TAA", + "GG CTGA", + "GG TGTG", + "CA TCAA", + "CA CTCA", + "TCTCA TT", + "GAA TTTT", + "GAA TCA", + "CAGG AAA", + "CA TACA", + "TA TTTTA", + "TTA TAA", + "GAGG AAA", + "CA TATG", + "CTT TCTT", + "CAA CTG", + "GG GCTG", + "CC CCCA", + "TTTG AAA", + "CATT AAA", + "CTT AAAA", + "GA CTGA", + "CAA TGA", + "GG CACA", + "CCA GTA", + "GGA TGA", + "GTTTT TG", + "GCA TTTT", + "GTG CCA", + "GCA GTA", + "GCC CTT", + "TC GTC", + "GAA CTA", + "GTG GTT", + "GTG TGA", + "GTG CTT", + "C GCTA", + "GTG TCA", + "TCTT TA", + "GCC TTA", + "CC TATT", + "CAAAA TG", + "GAA CCA", + "CTC CAGG", + "GA CTCA", + "CATG AAA", + "GC TAGG", + "TGTT AAA", + "GC GTA", + "GCA CTT", + "TCTT AAA", + "TAA GAAA", + "GG CCTG", + "TCC CTA", + "GTG GTA", + "CTG CTA", + "GGA GTT", + "GG TAAA", + "CAAA CAAA", + "GA TATG", + "TCA TGA", + "GA CCTT", + "TAA TATA", + "GC TAGA", + "GGA CTG", + "GG CATT", + "CA GTTA", + "CC CTAA", + "CA CCTT", + "GG TGAA", + "CA GCTA", + "GTG TTTT", + "CAA CTA", + "GA TCAA", + "GA GAAAA", + "TGTG AAA", + "AAAA TA", + "GATG AAA", + "CTC TAA", + "TTA CTT", + "GA TCTG", + "CCA CTT", + "GA GTTA", + "CAA TCA", + "GGATTA CAGG", + "TTTA TTTT", + "TACA TA", + "TTTTA TG", + "GA GTAA", + "GCTG AAA", + "GTA CTG", + "GC TCTC", + "TATG TA", + "TGTG TA", + "TCA TAA", + "GGA CTT", + "TCTCC AA", + "GCA TGA", + "GA CGA", + "CGCC TG", + "GA CCTG", + "GG TCTT", + "CA CCAA", + "GA TC", + "GA CCAA", + "AAAA TTA", + "GTAAA TT", + "CCA GTT", + "CA GAAAA", + "TAA CAAA", + "GG TGTT", + "GAAA TTA", + "TGCC TCA", + "CC GCC", + "CCA TTTT", + "CTT GCC", + "TCTG TA", + "CTG GCA", + "GG GATG", + "CCA TGA", + "CTA CTT", + "TAGG TG", + "TAAAAA TT", + "GAAA GAA", + "TAAAA TA", + "CTTTT TG", + "GTC AAAA", + "GGA CAA", + "TCTGA TT", + "CTC TCTT", + "TAA TTTG", + "CTC TTTG", + "GG CCTT", + "GGA TTTT", + "CTA CTG", + "GTT GCA", + "GG CTCC", + "CTC TGTG", + "CTC CAGCC", + "TTA CAA", + "GGA CCA", + "GGAA GGAA", + "TAAA GAA", + "TTA GAA", + "GTG AAAA", + "CTT GCA", + "TGGG TG", + "GGA GCC", + "CC TCTA", + "C T", + "GG GCTT", + "GG CATG", + "CTG GTT", + "TA CAGA", + "GATT AAA", + "CTC TGTT", + "TTA TCA", + "CTG AAAA", + "GTA GTT", + "GG GTCA", + "G T", + "CA GCCA", + "GC GTC", + "CA CTTA", + "GTG CTA", + "TC TTATT", + "GTA CTT", + "GG TATT", + "TA GAGA", + "TA CATG", + "CCA CTA", + "TGA GAAA", + "CAA TAAA", + "TCC AAAA", + "CGTG AA", + "GG TCTG", + "CTGAA TT", + "TCA GCC", + "CC TCTC", + "GTT AAAA", + "GG GATT", + "TCC TAA", + "CA CTAA", + "GGA GAAA", + "CCTT CCTT", + "GTT TCTT", + "TA TCAA", + "GA TACA", + "TAATCC CAGCA", + "CC GCA", + "TGAAA TT", + "C GTAAA", + "CTC TCTG", + "TC TTTTTT", + "GTA CAA", + "CCAAA TT", + "TGTA TTTT", + "TC GCTT", + "GG GTGA", + "GA TAGA", + "CTT TATT", + "TAAA CAA", + "GTT TATT", + "TGAA TA", + "CTA CCA", + "GTG TCC", + "CC CGA", + "TTTA TTA", + "CTCC AAA", + "TTTTTTTT TTTT", + "TCA TCC", + "GAA GCC", + "CTAAA TT", + "CAAA TTA", + "CCCC AAA", + "TCTT CTT", + "TAGG AAA", + "CA CGA", + "CA TTTTA", + "GTG CAA", + "TCTCC TG", + "TATTTT AA", + "GTT TGTT", + "GA GCCA", + "GG CCAA", + "CATT TCA", + "CA TCCA", + "CC TATA", + "GA CTTA", + "TCAAA TG", + "GTA TCA", + "TAAA TTTT", + "CTGA GGCA", + "GCC CAA", + "GG TTAA", + "TA TCTG", + "TGA CAGA", + "GGA GAGA", + "GCTG CTG", + "CC CTTA", + "TCC TCTG", + "GTA GCA", + "CCTG AAA", + "CC GAA", + "TTTT TAA", + "CTA TAA", + "CCTG TA", + "TTA CTG", + "GTA TAA", + "GG CGA", + "GA CTAA", + "TCA GAAA", + "GTG TGTG", + "CAAA GAA", + "CC TATG", + "GCA GAGA", + "CC GTT", + "TTTTA TTTT", + "GGAA GAA", + "TTA CTA", + "GCC TGGG", + "TCC CTC", + "TCC TCTT", + "GGA TCA", + "GG TCAA", + "TC GAGA", + "TATT CTT", + "TA CTC", + "GTTAA TT", + "GC GAGA", + "CTTAA TT", + "TCC TTTG", + "GTC TAA", + "CA CCCA", + "GG GTTA", + "GG GCAA", + "GGAAA TG", + "GCAAA TT", + "TA GATG", + "GCA GAAA", + "AAAAAAAA AAAAAAAA", + "CC TACA", + "GGA GTA", + "TC TAATT", + "CAA CAAA", + "TA GATT", + "GG TTTA", + "CC TAGA", + "CTT TAAA", + "TA CTTA", + "TAA TGAA", + "CTA TCA", + "TA GTAA", + "CAGA GAA", + "CAA GAAA", + "GGGG AAA", + "CGTT AA", + "CGTG TT", + "TCTG TCTG", + "TTTTAA TT", + "CTG GCC", + "TAAA TGA", + "C GTCAA", + "TTA GTA", + "GTC TCTG", + "TTTT AAAA", + "CA GTTTT", + "CTT CCTT", + "TATA TAA", + "GC TTTTA", + "TTTT TCA", + "GG TC", + "TTA TTAA", + "TTTT GTT", + "CA TAGA", + "TA GGAA", + "GAGA GAA", + "GTA GCTG", + "TTA TGA", + "GTA GTG", + "GGA GAGG", + "CTC TGAA", + "TA GTC", + "GA CTCC", + "TCC CTCC", + "TAA TGTT", + "CA TCTA", + "GCCA CCA", + "GTA CTA", + "TGGG AAA", + "CGCC TT", + "GCC CGG", + "GGA GGAA", + "GTA CCA", + "CGC AAA", + "CA TAAAA", + "TAA CATT", + "GC TAAAA", + "TCTT CTG", + "GCC AAAA", + "GTA TGA", + "GTC TTTG", + "TA CTGA", + "TCC CAGG", + "TTA TTTA", + "TTA GTT", + "GGA CC", + "TA TAAAA", + "CAAA CAA", + "CTT CTC", + "TCTA TCTA", + "GAAA TAA", + "GTG TAA", + "CTT TGTT", + "GA TAAAA", + "GCC CAGG", + "GC GATT", + "AAAAAA TT", + "TA CAGG", + "GG CTAA", + "TA GCTT", + "GTC TCTA", + "CTCC TGA", + "GAA TAAA", + "TTA CCA", + "GG GACA", + "GCCA CTG", + "GTT TAAA", + "GTC TGTG", + "TGA CAAA", + "TACA TTTT", + "GCCA CC", + "TG TTTT", + "TA GCAA", + "TTA TAAA", + "GA CCCA", + "GCA GC", + "CAGA CAGA", + "CA CAAAA", + "GCC CTA", + "TATT AAAA", + "C GTATT", + "CCA TCC", + "TC GATT", + "GAA GGAA", + "GA TCCA", + "TATT TGA", + "GTGAA TT", + "TA CCTT", + "C GTCTT", + "CC TAGG", + "TC GAAA", + "CTT TCTG", + "TGAA GAA", + "TCTC TCA", + "GTC TCTT", + "GGA GGGG", + "GTC TGTT", + "CTA TGA", + "GGAAA TT", + "GCA CACA", + "GCC TTTT", + "CA GTCC", + "CTG GTA", + "GCA TCC", + "TA GTTA", + "GG CTTA", + "GA GTCC", + "TG AAAA", + "TAGA TAGA", + "TGTT TGTT", + "TA CTCA", + "CATT TAA", + "GA TTTTA", + "CA CTCC", + "GAAA CAA", + "GC GCTG", + "TCTT TCA", + "CTG TCC", + "GAA CTCA", + "CGG AAA", + "TATT GTT", + "GCA CTA", + "TATT CAA", + "GC GGGG", + "GTG GCC", + "TAATT AAA", + "TA CTAA", + "GC GGTG", + "TA CCAA", + "GG TATA", + "CTA GTT", + "GCA GAGG", + "CTTTT TTTT", + "TTTTTTTT TTTTTTTT", + "TACA GTA", + "CCA TGTT", + "TA GTGA", + "CGTG TG", + "GC TCTGA", + "CTT CCTG", + "TC GCTG", + "TAAA TCA", + "TCCAA TT", + "GTT TCTG", + "GAA GAGA", + "GG GTAA", + "CCA TAA", + "TTA TATT", + "C GAATT", + "CC GGA", + "TGA GCC", + "CC GTA", + "CAGA GGA", + "GTG TTTG", + "GA CAAAA", + "TTTTTT AAA", + "GTT GCC", + "GA GTTTT", + "TC AAAAAA", + "TGTT TCA", + "TA TCTA", + "TCTC TCC", + "CTC CACA", + "TAAA TATT", + "TTTT CTG", + "CTC TCAA", + "CCTT AAA", + "TCTTTT AA", + "GAA CAAA", + "TTA GCA", + "GCTCA TG", + "TAAA GTA", + "GGA TAA", + "TTATT AAA", + "CTC CATT", + "TCTC TGA", + "TTA TTTG", + "CCTG TAA", + "TTA TATA", + "GA CTTTT", + "TGTT GTT", + "GCAAA TG", + "CTT CAAA", + "GAA TATT", + "GAA TCC", + "CTC TTAA", + "GCA TAA", + "GAA TGAA", + "CTTAA AAA", + "TAAAAA TG", + "TTTTAA AAA", + "CTC TGGG", + "TGA TCC", + "GC TCTCA", + "CTC CAGA", + "GAGTG CAGTG", + "CAA TATT", + "TA GAAAA", + "GTAAA TG", + "TA GCTG", + "GC TCAAA", + "GCA GGAA", + "TA CCTG", + "GG GAAAA", + "TTTT CTA", + "GGGG GGGG", + "CC GA", + "CTT TGAA", + "GGA GGTG", + "TA GTCA", + "GG CCCA", + "TGA TGTT", + "CAAA TAA", + "TCTT CCA", + "GC GCTT", + "GTA TTTG", + "GTC TC", + "GAAA TCA", + "TGA TAAA", + "CATT CTT", + "TA TCCA", + "GCC TCTG", + "TGA GATG", + "C GCCAA", + "GTTTTA TT", + "TATA TATT", + "GTA GGA", + "GACA GAA", + "CTCCAGCC TGGG", + "GC GTGA", + "GG TATG", + "GAGG GAGG", + "TCA TTTG", + "CTA CC", + "TACA GAA", + "GG TAGA", + "GA TCTA", + "GTC CATG", + "TGA GGAA", + "TAA TAAAA", + "TAAA CTT", + "TCA CATT", + "GGA GGCC", + "TCA CAAA", + "CA CTTTT", + "CGG CC", + "CAA CAGA", + "GTA GAGA", + "GTTA TTTT", + "CGTT TG", + "TC GTCA", + "TCTG CTG", + "CAA CACA", + "GG TAGG", + "GCA GCTG", + "TAGTA GAGA", + "CAA GCC", + "GCA TTTG", + "TAA TATG", + "GCTT AAA", + "GCTT CTG", + "CTC TCCA", + "TCA TCTT", + "C GTCTG", + "TCA TTTA", + "CA TAGG", + "GC TCCTT", + "TGTT CTT", + "TACA TTA", + "CACA GAA", + "TAAA TATA", + "TA GAGG", + "GA TAGG", + "TCC TGAA", + "GGA GCTG", + "TGA TATT", + "TCA TTAA", + "CTTTT AAA", + "TC GTTA", + "TAAA CTA", + "GTT TGAA", + "TAAAA TTA", + "CA CCCC", + "TCA GAGA", + "CTCC TGCCTCA", + "TGA CATT", + "GTA TTTA", + "CTT CATT", + "GAAA CTG", + "TAA CACA", + "GTT CAAA", + "GGA GATG", + "TC GGCC", + "CAGCA TT", + "TC GATG", + "TATT CTA", + "CTG TGAA", + "TATT GAA", + "TTTT CCA", + "TATT TCTT", + "GGTG AAA", + "CTGA GAA", + "GCA CAGA", + "GC GAGG", + "CTG TGTG", + "TGAAA TG", + "TGA TGAA", + "GTCC AAA", + "CTCAA TT", + "TCCA GAA", + "GTA TATA", + "TAAA GTT", + "TCTC AAAA", + "TCCA TCA", + "GTC TGAA", + "TGA GAGA", + "TGA TTTG", + "TTA GCC", + "CTC CATG", + "TCC CTGA", + "GA GCTA", + "CCCC CCCC", + "GTG GAAA", + "CTG GGAA", + "CAA TGAA", + "CCA CACA", + "CTT TCAA", + "C GGAGG", + "TC GTGA", + "CCA GAAA", + "GTTTT AAA", + "TGTT GAA", + "TCC TGTG", + "CTAAA TG", + "TCC TTTA", + "GTC TGGG", + "TCTC TTTT", + "TA CGG", + "TATT GTA", + "TTA GTG", + "TTA CC", + "TAATCCCAGCA CTTTG", + "TCTG GAA", + "CTT CTCA", + "CGCA TT", + "TATT TAAA", + "TCA CACA", + "TAA TCAA", + "GC GAAA", + "GG GCCA", + "GTT CATT", + "GAGAA AAA", + "TTTT GTA", + "TA CTTTT", + "TC GAGG", + "GTGAA AAA", + "CAA TATA", + "TCC CATG", + "CAA TTAA", + "CTG GAAA", + "CCCA GCA", + "TCC CATT", + "TCC TGTT", + "CTC TTTA", + "TCC CCTT", + "GTT TCAA", + "GTC CAGG", + "GGAA GGA", + "TA GTTTT", + "TGA CCTT", + "GTGCTG GGATTACAGG", + "TATT TATA", + "TCTG CAA", + "CTGAA AAA", + "TATG TTA", + "CTT CACA", + "GCA CAGG", + "CCTG CTG", + "TTTT TTAA", + "GTTA TTA", + "CC CTTTT", + "TGA TTTA", + "TA CAAAA", + "TAA GTAA", + "TTTT TAAA", + "CA TCTC", + "GTG GTGA", + "GTG GAGA", + "CTC TGCA", + "GTTAA AAA", + "TACA TACA", + "CTT TGTG", + "GGA CACA", + "TCTGA TG", + "TA TTATT", + "TCTT CTA", + "CTG TGTT", + "TCA GCTT", + "CTT TATA", + "GG CGC", + "TCC CTCA", + "GTA CC", + "TGGA GAA", + "CAAAAA TT", + "TCTT TAA", + "CTC TCTC", + "TGA GTGA", + "GCA GCTT", + "CGGA TT", + "TA CGA", + "TCTT GTT", + "TC GTAA", + "GCC TGTG", + "TATT CTG", + "GG GATA", + "GG GTCC", + "TGA GATT", + "CTTTTA TT", + "TCC CACA", + "CATG GTG", + "TTA GGA", + "GAA CACA", + "TCA TAAA", + "CAA CATT", + "GG TCCA", + "GAA TTTG", + "TATTAA TT", + "TCC TGGG", + "GCA GCAA", + "CTC TTCA", + "GAA GAGG", + "TCTG TCA", + "CTGAA TG", + "CCA CAAA", + "GTG GAGG", + "TGA TTAA", + "CTCC CTCC", + "CACACACACACACACA CACACACACACACACA", + "GC GATG", + "CATT CTG", + "GTA GAAA", + "TCA TCAA", + "TTTT CAA", + "TATG TATG", + "CCAAA TG", + "TAA TTTTA", + "TAA GGAA", + "CTT GAAA", + "AAAAAAAA AAAA", + "GC TCCTG", + "GCA GATG", + "GAAAAA TT", + "GA CGC", + "GTG GGGG", + "GTCAA TT", + "CTT GCTT", + "TGA CACA", + "GTG TGTT", + "CCA GAGA", + "CCCA GCC", + "TAAA GAAA", + "GTC CATT", + "TAAA TTAA", + "CC CAAAA", + "GAA TTAA", + "TGAA TTA", + "TTTT TTTG", + "CCA GCTT", + "CAA TTTG", + "CTG TTTG", + "GTC TCAA", + "GTT TGTG", + "GG CATA", + "GG TACA", + "TGA TGTG", + "GATT TCA", + "TCTG CTT", + "GTAA TTA", + "TAA AAAAAA", + "GCC GCC", + "TGTGTGTGTGTGTGTG TGTGTGTGTGTGTGTG", + "GC GTCA", + "GC TCATT", + "GAA CCTG", + "TAAA CAAA", + "GTG CTGA", + "TCA GGAA", + "TCC TCAA", + "TCTA TTTT", + "TCTG TTTT", + "CAGA GCA", + "CCA GGAA", + "GTC TTTA", + "TCTT CAA", + "TCAAAA TT", + "GC TTATT", + "GTT CCTT", + "CA CCTA", + "TCA CTGA", + "GAA GCAA", + "TAAA GA", + "TCC TTCA", + "TCTCA TG", + "TCA GTGA", + "TACA CAA", + "CA CGTG", + "CC TAAAA", + "GCC TTTG", + "GG CTTTT", + "GTT GAAA", + "GTT CTC", + "CTA GA", + "CTA CAAA", + "GCA CAAA", + "TTA CATT", + "GG CCCC", + "TAA TGTG", + "CTG CCTT", + "TCC CAGA", + "GTGAA TG", + "GGA CAGG", + "GGA TGTG", + "GTT TATA", + "TGA CCAA", + "GTG GCTG", + "GTT CTCA", + "CTTA TTTT", + "CTG GAGA", + "TTA CAAA", + "GTC TTCA", + "CAA GAGA", + "CCA TTTG", + "TCA CAGA", + "CTA GTA", + "CA TTATT", + "TTA GA", + "GC TCTCC", + "GC GCCA", + "TATG TTTT", + "TCC TCCA", + "CAGAA AAA", + "GTG GGAA", + "TAA TCTT", + "TGA GTCA", + "CTG CTC", + "GTC TCCA", + "TCA TGTT", + "GTT TCCA", + "TAA GCAA", + "CTAA AAATA", + "TGA CTGA", + "TC GGTT", + "TTA GAAA", + "TAA GCC", + "TAAA GCA", + "CC TCTCC", + "CC TCCTT", + "TCA GATT", + "TATG AAAA", + "GCTGA TG", + "CATA TTTT", + "GC TCCAA", + "CGG CGG", + "CCA CTGA", + "CA GCAAA", + "CTG TCTT", + "CTA GCA", + "TC GGGG", + "CACA GCA", + "GC TGATT", + "CTA GGA", + "TAA CTC", + "TCA TATT", + "CCTT CTT", + "CTG CAAA", + "CC CGC", + "GG TCTA", + "CCCA GGA", + "GTG TCTG", + "TAATAA TAATAA", + "TCA CATG", + "CAA TTTA", + "TATATATATATATATA TATATATATATATATA", + "CCA CAGA", + "TCAA TTTT", + "GTA TTAA", + "GAA CATT", + "TCTC TTA", + "CTA TTTG", + "TCTT TCC", + "GGTT AAA", + "GC TAATT", + "CTG CTGA", + "TA CCTA", + "CAGG GTT", + "TC GCCA", + "CAAAAA TTA", + "CTT CTGA", + "GCA TGTG", + "CTA TTAA", + "GCA CATG", + "CAA CATG", + "TCA TGAA", + "GAA TGTT", + "GG GTTTT", + "CTG CCTG", + "GTC CACA", + "TAAA CA", + "CTC TGGA", + "GA CCCC", + "GG CAAAA", + "TCTG TTA", + "CTA GTG", + "CTA TATA", + "TCA GTCA", + "TAA CTAA", + "GAA GATG", + "GTC TTAA", + "CAA GGAA", + "GTAA AAAA", + "TCC CCTG", + "TC GCAA", + "TCTG CCTG", + "CC TTTTA", + "GTCC CAGCTA", + "TATA TATG", + "TATT GTG", + "TGTG TTTT", + "GC GCAA", + "CACA GTG", + "TAA GATT", + "CTC TGTA", + "GGAGG CTGA", + "GGA CAAA", + "TATTAA AAA", + "TC GTCC", + "TC GGAA", + "CTA TAAA", + "CTT CAGA", + "CTA GAAA", + "CATT CAA", + "CA CGCA", + "CAGGA TT", + "CCA TCTT", + "GTA GCC", + "GAA TTTA", + "CA CGC", + "CAA TCC", + "TGA GCAA", + "GAA GCTG", + "TCAA TTA", + "GAA GTCA", + "CTG CACA", + "CCA CGG", + "GGA TCTT", + "CTCCTGCCTCA GCCTCC", + "TAAA TGAA", + "CC GTC", + "TC GGTG", + "TTTTA TTA", + "GCA GGGG", + "GCA GGTG", + "TCTA TTA", + "TAA CTTA", + "CTAA TTTT", + "CC CGCC", + "TAA TACA", + "GGATT AAA", + "TCTC TCTG", + "GCTT CTT", + "CATT TATT", + "CCA GAGG", + "GGA CAGA", + "GCCAA TT", + "TCC CCAA", + "GTT GATT", + "GAA GAAAA", + "GCA TTTA", + "CTC TAAA", + "CACACACA CACA", + "CC TCAAA", + "TA TAATT", + "CAA TGTT", + "GCC CAGA", + "GTA TATT", + "CTAA AAAA", + "CCA CAGG", + "TAA GAGA", + "TCC TTAA", + "TA TTTTTT", + "GAA TATA", + "GGA TTTG", + "GTG TGAA", + "CTG GCTT", + "GC GGCA", + "TCC GCC", + "GCA TCTT", + "TC TAATA", + "CTG CATT", + "CTC TGCC", + "TCA CTCA", + "TCA GCAA", + "TATTA TG", + "CCA GCTG", + "GA TCTC", + "GCC TCTT", + "CTT CCAA", + "TCC TAAA", + "TCA TCTG", + "CTA TTTA", + "CTG CAGG", + "CAA GCAA", + "GC GGAA", + "GAAA TAAA", + "TAAAA TAA", + "TCA CCTT", + "CCA TGTG", + "GA CCTA", + "CAGA TGA", + "GTG GCTT", + "TTATTA TTATTA", + "TCC CGG", + "TATT TGTT", + "CTG TAAA", + "TCCA TCCA", + "CTG TATA", + "GTT TCTA", + "GTT GCTT", + "CCA TGAA", + "GC TCTTA", + "CTT CATG", + "GTT CCTG", + "GCTG GGA", + "TCA GAGG", + "CATT AAAA", + "TCA GTAA", + "GAA TGTG", + "CTTA TTA", + "GCA CTGA", + "TGA GGTT", + "CA TCAAA", + "CTT CTCC", + "GTT TATG", + "CTT TCCA", + "GTG CCTG", + "GAAA GGA", + "GCA TCTG", + "TA CCCA", + "TAA CAGA", + "AAAAAAAA AAA", + "CTA TGAA", + "CA GTAAA", + "TA GCTA", + "TC GTTTT", + "GTG TCTT", + "GA GCAAA", + "TC TAAAAA", + "GTT CACA", + "GAAA TGA", + "CAAA TGA", + "GCC CTGA", + "GTG TTTA", + "TCA TGTG", + "CATA TTA", + "TCAAAA AAA", + "TAA GTTA", + "TCTC TCTT", + "CCA GTGA", + "CC TCTGA", + "CAA GATG", + "GCC TGTT", + "GTT TGGG", + "CATT CATT", + "GCC CCTG", + "GTT CTGA", + "GC GGCC", + "GC GGTT", + "CAAAA CAAAA", + "TACA TATA", + "GAATT AAA", + "TCAA GAA", + "CTG TATT", + "TTTT TATT", + "GA TTATT", + "TCTAA TG", + "GTT GCTG", + "TGAA TGAA", + "TCA GCTG", + "CTT GATT", + "CAGAA TG", + "CTAA TTA", + "TATAA TG", + "GTTTT GTTTT", + "CCA GCCTG", + "TGA TGGA", + "GCA GATT", + "CTC TATT", + "GCA GTCA", + "TAA GTGA", + "CTA CACA", + "CGCA TG", + "TA GCCA", + "GTG GCTCA", + "CAAA TAAA", + "GTG CTCA", + "TTTT TTTTTT", + "TAA CATG", + "TCCCA GCTA", + "CAAA GTA", + "TCA TATA", + "CAGCA TG", + "TGA TCTT", + "CA TAATT", + "TGTG TTA", + "TTTT GAA", + "TTAA TTA", + "GATA TTA", + "TCA TTCA", + "TGA TATA", + "TGA CTCA", + "GA CGTT", + "TGA CATG", + "GTT GTGA", + "CA TTTTTT", + "GCC TGGA", + "CTA TGTT", + "CTT TGGG", + "GTC TCAAA", + "CTG GCTG", + "CCA CATG", + "GG CGTG", + "CTTAA TG", + "TAA GATG", + "GTA TAAA", + "TGTA TTA", + "TAA CTCA", + "GAGAGAGAGAGAGAGA GAGAGAGAGAGAGAGA", + "GCA TGAA", + "GTTAA TG", + "TCCA GGA", + "GAGA GAAA", + "TCTC TGTG", + "CTC TCTA", + "CCA CCTG", + "GCCA GGA", + "CTG GAGG", + "CCA TTTA", + "GTC TGGA", + "GCC CACA", + "TAGA GAA", + "CAA CTCA", + "GGCA GGA", + "TCTTA TG", + "CAAA GGA", + "GG TAAAA", + "GAGA GGA", + "GTC CAGA", + "GCC CTCA", + "GATA TTTT", + "CAGG GAA", + "CCA CATT", + "GA GGAGG", + "GAAA CTT", + "CA GAATT", + "TCA GATG", + "TATT TCC", + "TACA GTG", + "TGA GCTG", + "CCA TCTG", + "GAGAA TG", + "TCAA CAA", + "A TT", + "TAA CTGA", + "TGA GAGG", + "CA CTGAA", + "CCA CCTT", + "CTG CAGA", + "TCA CCAA", + "TGA GCTT", + "CAAA GCA", + "GG TTTTA", + "CGG GGTT", + "TCCAA AAA", + "TATG TATA", + "CCA GATG", + "TCCA TTTT", + "CTG CTCA", + "GA TAATT", + "CCA CCAA", + "CTCC TCC", + "GA GAATT", + "GAAA GTA", + "TAAAA TAAAA", + "CTT CTTA", + "CTG TTTA", + "GAA TCAA", + "GCA TGTT", + "GCA CGG", + "GA CTGAA", + "GTG CACA", + "GA CGTG", + "TATA CAA", + "TC GACA", + "GAA GACA", + "TAAA GGA", + "GA TCAAA", + "CAGTG TG", + "CTA GCC", + "GAGG AAAA", + "TCTG AAAA", + "GAA CCCA", + "GATG GATG", + "GTT CTTA", + "CTA TATT", + "GCA TTAA", + "TCTCTCTCTCTCTCTC TCTCTCTCTCTCTCTC", + "TCA GTC", + "TATTTT TG", + "GAGGA TT", + "GTA TGTG", + "TAA CCAA", + "GTT GTTTT", + "TTTT TCTT", + "GTG TTAA", + "CTT GGAA", + "AAAAAA TG", + "CAA TGTG", + "GTG CCTT", + "GCC TCAA", + "GA GTCTT", + "GCTAA TTTT", + "CGAA AAA", + "GTG TATA", + "GC GTTA", + "CTGCA CTCCAGCCTGGG", + "GTT CATG", + "CAAA GAAA", + "GCA GTAA", + "GGA TGAA", + "CTT TATG", + "CAGG AAAA", + "TCC TGCA", + "CTG TCTG", + "GAA CATG", + "GGA TGGA", + "GCC TGAA", + "CAAAAA TG", + "TCCAA TG", + "CCA GCAA", + "GG CCTA", + "CAA CTGA", + "GCA CCTG", + "GTC TATT", + "CC TCTCA", + "GTG GTCA", + "GTG TAAA", + "GTA CACA", + "GTAAAA TT", + "GTA CATT", + "TATA TAAA", + "CTG TTAA", + "TAA GTCA", + "GCC TCCA", + "AAATT AAA", + "GTG CAGG", + "TCC TGGA", + "GTG CAAA", + "GC GTCC", + "CCA TTAA", + "GGA GGGA", + "TCA CTTA", + "TCATT AAA", + "CAA CATA", + "TAA TAGA", + "TAA TGTA", + "GA TTTTTT", + "GTT GTCA", + "GGA GACA", + "GTG TGGG", + "TCA CAGG", + "TC GGCA", + "CTCC CTG", + "GA CCAAA", + "TGTT TATT", + "CGAA TG", + "CTCAA TG", + "TCA CCTG", + "CA GTGTT", + "TGA GACA", + "TA GGGG", + "GAAAAA TG", + "GTT GAGA", + "TC GATA", + "CTC GGGAGG", + "GTT GTC", + "CCA GTCA", + "GCC CAGGCTG", + "GAA CAGA", + "GGCTCA CTGCAA", + "GCA GACA", + "TGA GGTG", + "CA CGTT", + "TAA GAAAA", + "CCA GGCA", + "GTA TCTT", + "CTTGG GAGG", + "CTT TCTA", + "CC GCTG", + "GA GCTCA", + "GAGA CAGA", + "CTT CAGG", + "GCA CATT", + "GTA CAAA", + "CTT GTAA", + "GTG GGTG", + "GAA GTGA", + "GG TCTC", + "GTA TGTT", + "GCA CTCA", + "TTA TGTT", + "CAA GTCA", + "CAA GTGA", + "GAAA CTA", + "TAAA TAAAA", + "TCTT AAAA", + "GTT GGAA", + "GTT CTAA", + "CCA CTC", + "CA GTGAA", + "GAAA GG", + "GCA CGA", + "TAA CTTTT", + "GTT GTTA", + "TCA GTTA", + "CGGA TG", + "TATT TGAA", + "CC CTGAA", + "GCC CTC", + "CTT CTAA", + "TTTG TTTT", + "GA GCTGA", + "CTG TGGG", + "CAA GATT", + "GAA GCTT", + "TGA GTAA", + "CTT GCTG", + "GGA TGGG", + "CGTA TG", + "TCCA TTA", + "GTC TGCA", + "GCCA TTTT", + "GTT GTAA", + "CACA CAA", + "GGACTA CAGG", + "C GTTTTA", + "TCTT CC", + "TAA CCTT", + "CTT TAAAA", + "TGAA TTTT", + "CTA CAGA", + "GCAA GAA", + "TAA CAAAA", + "CAATT AAA", + "CCA CTCA", + "CATG GTGAAA", + "CCCA GAA", + "CTA CATT", + "CC GAGG", + "TCCA GTG", + "TGA GTTA", + "GGA GTCA", + "TAA CGA", + "GA GTAAA", + "GA CTCTG", + "GGA GCTT", + "TA CTCC", + "CTG CATG", + "GC TTTTTT", + "GTC TAAA", + "GTG CGG", + "CA TCTCA", + "TGA TCAA", + "GGA GATT", + "GC AAAAAA", + "CA CCAAA", + "TGA CGG", + "CAGA GG", + "GTT GATG", + "CTT GTCA", + "TCCA CCTG", + "GGA GCAA", + "CAA GTAA", + "CCA TAAA", + "GTG CATG", + "GCA TATT", + "GTA GATT", + "GCC TAA", + "CTCAA AAA", + "GGA GAAAA", + "CTA TCC", + "TAATA TTA", + "GTG CTC", + "CAA TATG", + "TGTG GAA", + "TGA CTC", + "GTG TATG", + "TTTTAA TG", + "GC TCTAA", + "CACAA TG", + "CA GCTCA", + "GTT GGTT", + "CTAAAA TT", + "GTC TATG", + "TGTG AAAA", + "CTG GGTT", + "CCCC TCC", + "CC CTCTT", + "GCA GGGA", + "GAAA CCA", + "CATT TCC", + "GCA GCCA", + "TCA TATG", + "GCA GGCA", + "C GTAAAA", + "TGA CCTG", + "CAGA GGTT", + "CTT GTGA", + "TTA TCTT", + "CTG TATG", + "GTCAA TG", + "GGA CGG", + "GC GTAA", + "CAAA CTA", + "TAAA TGTT", + "CTT CGG", + "CTCC CCA", + "TACAA TG", + "TCTG TAA", + "GAA TATG", + "GC GGGA", + "GGA CATT", + "TTA TGAA", + "GGA TGTT", + "GGA CATG", + "TCA GGTG", + "CAA CAAAA", + "GAAA GAGA", + "GTG GATG", + "GG GCTA", + "CCA TCAA", + "CA GCTGA", + "CTC CACC", + "CAA TCAA", + "GTG GTC", + "TGA CAGG", + "CCA TTCA", + "GTCC CTG", + "CAGA CACA", + "GTT GGTG", + "CC TCCTG", + "GAA CTGA", + "TATT CATT", + "GCC CATG", + "CAA TCTT", + "GAAA GCA", + "GAA TCTG", + "TTA TTTTA", + "GTT TGGA", + "TTTT TGTT", + "GGGAA TG", + "GC GACA", + "TAAA CTG", + "CCA TATT", + "GGA TCC", + "CAA GCTT", + "TAAAAAA AAA", + "TCA CTC", + "CA CTGTT", + "TGTTAA TT", + "GGA CTGA", + "GGA GTGA", + "CATA CACA", + "GTT TGTA", + "TCCA GCA", + "GTG CATT", + "GG AAAAAA", + "CCAA GAA", + "TCAA TA", + "CTT CCCA", + "TGA GAAAA", + "GGCC TCCCAAA", + "CAA GCTG", + "GCC CAAA", + "TGA CTTA", + "CA GCCTT", + "CTG GATT", + "TTTT TTTA", + "TCA CGG", + "GCA GTTA", + "TGA CTAA", + "TTA CAGG", + "TGA TATG", + "TAA TTATT", + "TCTT GAA", + "GCC CCTT", + "GTT CAGA", + "CTC TATG", + "CCA TGGA", + "GAGG GAA", + "GGA GGCA", + "CTT TGCA", + "TCTT GG", + "GGA GGTT", + "GCCAA TG", + "CTG GTGA", + "CAA CCAA", + "CCA GTC", + "CTT GAGA", + "TACA GCA", + "CTT GTC", + "GA CGGA", + "CTT CTTTT", + "GTG GC", + "GAGGA TG", + "CAA TAAAA", + "GAAA TTTT", + "AAAA AAAAAA", + "CTC TATA", + "GTA TGAA", + "CTT GTTA", + "TAA CATA", + "CAAA CACA", + "TGATT AAA", + "GCTC TGTT", + "GTG GGTT", + "GTT GGGG", + "GTG TGTA", + "GTAA TTTT", + "GTA TCC", + "TGTGTGTG TGTG", + "TCTT CCTT", + "TCA CTAA", + "TCTCC AAA", + "TA TCAAA", + "TGA TGGG", + "GGA TATT", + "CAAA TTTT", + "GTT CAGG", + "GTG GATT", + "GTG CAGA", + "GCTG CC", + "CTCA GAA", + "GCA GTC", + "GGA TAAA", + "GCC TTCA", + "CCA GGTG", + "TA TCTC", + "CAA TGCA", + "CCCA CTG", + "GTG TATT", + "CGA CAGA", + "TGA GATA", + "CCA GGTT", + "TGTT TAA", + "CATCA TG", + "TGA TTCA", + "GCAA TTA", + "GAAA TGAA", + "CTT GGTT", + "GAA GATT", + "GGA TTAA", + "CC TCATT", + "GGCCA GGCTG", + "GCTA TTA", + "GCCA GCA", + "GAGA CAGG", + "CTT GAGG", + "CA GTCTT", + "GTT CTCC", + "TATT TCAA", + "TGA CGA", + "CATG AAAA", + "CATTA TG", + "TAAA TTTA", + "GA GTGAA", + "CAA CAGG", + "TAA GCTT", + "CACA TTTT", + "GA TCTCA", + "TA GTCC", + "GACC CTG", + "TAA TGCA", + "TAA GTC", + "TAA TAATT", + "GAA GTAA", + "CAA CTC", + "CA TCATT", + "GA CGAA", + "GAAA CAAA", + "TATT TCTG", + "CATTAA TT", + "CCA CCCC", + "TAATA TTTT", + "GTT TAAAA", + "GTA TCTG", + "GTCAA AAA", + "GATG CTG", + "TGTT CTG", + "GG TCAAA", + "GTA GGAA", + "GTA TATG", + "TGA TCTG", + "GGGG CTG", + "GCA TCAA", + "GCCAA AAA", + "CCA CGA", + "GC TAATG", + "CAGA GAAA", + "CCTT CTG", + "TCC TCTA", + "GCA GGTT", + "CTCA CTG", + "TAGA TTA", + "GCC GAGA", + "CCA TCCA", + "CTT TACA", + "GTA CATG", + "GCA CCAA", + "CTT TGTA", + "CTA TGTG", + "TCA CTTTT", + "TGA GTC", + "CAA GAAAA", + "CTGA CTG", + "GTTTT TTTT", + "GCA TAAA", + "TAA TCTG", + "GAA AAAAAA", + "CAGGA TG", + "TGA GCCA", + "GAA TTCA", + "TCA GACA", + "GTT CCAA", + "TCA GGTT", + "CAAA CTG", + "CATT TCTT", + "TGTT AAAA", + "CCA GACA", + "CAA GTTA", + "CATG TTA", + "CATT CTA", + "TCTTTT TG", + "TGA GGGG", + "CACA TTA", + "TAAAA TAAA", + "GCA TATA", + "TGTT CTA", + "GAA GGGG", + "GAGTG TG", + "TAA GACA", + "GAA CTC", + "CCA GTAA", + "GAGA GAGG", + "GC GACC", + "CAA TTCA", + "CGG CTG", + "CCA GATT", + "CCTG GG", + "GGAA GAAA", + "GAGA GG", + "TCAAAA TG", + "CCTCA TG", + "TAAA GG", + "CTT TGGA", + "CCA GGGA", + "GTA CAGA", + "CTGAGGCA GGA", + "TGTT TCTT", + "CCA GGCTG", + "CTGA GG", + "GAGG CTG", + "CTCC TGGG", + "GAA GTC", + "CGA CC", + "GGA CTCA", + "GGA GTC", + "CA CAATT", + "GTG TTCA", + "GA CTAAA", + "GTCA TTA", + "CAAAA TTA", + "TGAA GAAA", + "GCA CCTT", + "GTT TGCA", + "TCC TGCC", + "GTA GATG", + "GCC TGCA", + "GA GTTAA", + "TCC CTTA", + "GTG GTTA", + "TC GGGA", + "TACA TAA", + "TCTC TCCA", + "CA CTAAA", + "TATATATA TATA", + "GTG GCAA", + "CACCA TG", + "TTTG AAAA", + "CACA CTG", + "CTT GGTG", + "TACA CTG", + "CC TCCAA", + "CAA CCTT", + "CA GCCAA", + "TTTT CAAA", + "TGA TAGA", + "TACA CTA", + "TCTG GG", + "TCC CAGCA", + "TAGG AAAA", + "CTT GGGG", + "TC TGTGAA", + "CC TTATT", + "CATT TAAA", + "TTTTA TTTTA", + "GCC CTCC", + "CTGA GCA", + "CC CGTG", + "GTA GTGA", + "TCC TATT", + "GAA GGTG", + "TGTG CTG", + "TCCA CTG", + "TAA TCTA", + "TGA TGTA", + "GTG GTAA", + "TAA TGGA", + "GATG AAAA", + "GTA GTAA", + "GTG GGGA", + "GTG TCAA", + "CAGA CTG", + "TC GAAAA", + "CTCA TTA", + "TAA TAATA", + "CTCA GAAA", + "CA TCCTT", + "CC GCTT", + "GGAA GG", + "CC GTGA", + "CCA CTCC", + "CTA GAGA", + "TAGAA TG", + "GGA TTTA", + "TTAA TTTT", + "GC TAATA", + "TCC CCCA", + "CAAA TATT", + "GA TCATG", + "TCTTAA TT", + "CA GTATT", + "GTCTT GAA", + "CC GAAA", + "CTA TTCA", + "TAA GATA", + "CTT GCAA", + "GCC CCAA", + "TCC CTAA", + "GAA GTTA", + "GA TGATG", + "CTT GATG", + "CC CTAAA", + "CCTG CCTG", + "GACA TTTT", + "CCA GCCA", + "TGTGTGTG TG", + "GTC TATA", + "TCTC TGTT", + "GTC TGTA", + "TA TAATA", + "CTT GTTTT", + "CGC CATT", + "CTCA GCA", + "TACA GTT", + "CAA GAGG", + "GGAA GCA", + "GCC TTTA", + "CC CCATT", + "CAA CGA", + "GTCA TTTT", + "CC CGCA", + "CA GTTAA", + "GAA TCTT", + "CATG TTTT", + "CC GGGG", + "CTA CTGA", + "TCA CGA", + "TAAA TTTG", + "GCC CATT", + "CTC TAGG", + "GGA CCTG", + "TCA GGGA", + "GAGA CTG", + "CC AAAAAA", + "GCC GG", + "CCA GGGG", + "TCA GAAAA", + "CA TCTGA", + "TCTT CAAA", + "CTA CAGG", + "GAGG CAGG", + "CATT GTA", + "TAAA TCAA", + "GA CTCTT", + "CTGA TTA", + "GCA TATG", + "GGA CCTT", + "CAA GACA", + "TATT TATG", + "TATTTT AAA", + "CC GAGA", + "TCA TTTTA", + "CTCA CTCA", + "CCA CCCA", + "CTC TAGA", + "CTA CATG", + "GTG CTTA", + "CAA CCTG", + "TC TGTGTT", + "TAAA TATG", + "CAAA GG", + "CC CTGTT", + "GTT CGG", + "TGA TAAAA", + "CA CGAA", + "GTT GAGG", + "CAGA GTGA", + "GAAA TTAA", + "CACA TA", + "GAA CAGG", + "TCTCC TGA", + "CC TGAGG", + "GGAGG CCAA", + "GTT TACA", + "TAA CAGG", + "TGTG GTG", + "GCCTCC CAAA", + "CCA TCCTG", + "GATT CTT", + "GAA TGGA", + "GTA GTCA", + "CTCC TCTG", + "GAAAGAAA GAAAGAAA", + "CC CTGTG", + "CAGTA TG", + "GC GATA", + "GGA CTC", + "GAAA GA", + "TGTT GG", + "GTA GCTT", + "CA TTTTAA", + "CC CTCTG", + "GCA TTCA", + "CGA TTA", + "TCA CATA", + "TAA TGAAA", + "GGAA TTA", + "CTG TCAA", + "TAAATT AAA", + "CAA GTC", + "GTA TTCA", + "GGCCA TG", + "CTT TAGA", + "TGTT TCC", + "CATG TA", + "GAA TAAAA", + "CAA CTAA", + "TCA TCTA", + "CA CTCTT", + "CA GTTTG", + "CA TAAAAA", + "GCA TGCA", + "GATT TA", + "GAA CCAA", + "TCTG TGA", + "TCA GCCA", + "TCTC CACA", + "TCTCA GCTCA", + "TATCA TG", + "GCA CTTA", + "CGC CAGG", + "CGG GG", + "CATTAA AAA", + "TTTG TTA", + "GGA TATA", + "TC GACC", + "TAA TCCA", + "CC GC", + "CATT GTT", + "CCA GTTA", + "GTA GTTA", + "CTA GGAA", + "CC TAATT", + "TCA TGGG", + "GAA CTAA", + "GCTA TTTT", + "CC GTCA", + "CAGA TTA", + "CCA TATA", + "CAA CTTA", + "TCA GTTTT", + "CTA CCTT", + "GCA CTC", + "GTG TGGA", + "GTG CCAA", + "GACAA TG", + "GA CAATT", + "GTA CCTT", + "TAAA CATT", + "CA GGAGG", + "GTG CGA", + "GAAAA TTA", + "TCTCTT AA", + "CC GATT", + "GA TGATT", + "CCA TGGG", + "TC GGTA", + "CCA TATG", + "CCA GTCC", + "GCC TTAA", + "TGA TCCA", + "GTT GCAA", + "GTA GAGG", + "CAGA TTTT", + "GTA CTTA", + "TCTTTCTT TCTTTCTT", + "GCTC TGTG", + "TCAA TAA", + "GTT TAGA", + "GTT CGA", + "CAA GGTT", + "CTCA TTTT", + "CACA GG", + "CATG CTG", + "GAA CGG", + "TA TAAAAA", + "GAA GGCA", + "GA GCATT", + "TGTT TGTG", + "GCTG TTA", + "GTCA CTG", + "CAAA TGAA", + "GTGA CTG", + "GTT CTTTT", + "CAGGCTG GAGTGCAGTG", + "TGA TGAAA", + "TAA CGG", + "CTA CTAA", + "GACA TTA", + "GGA CGA", + "GAGCA TG", + "GCA TGGG", + "CCA CTTA", + "CTA TCAA", + "GCTG TTTT", + "GTC GTG", + "CCTG GCC", + "TCTC TGAA", + "TGTT GTA", + "CAGC CAGG", + "GTT TAGG", + "CC GCAA", + "GGA GTAA", + "CCAA TTA", + "CAGC AAAA", + "TCA TCCA", + "CA CGTA", + "TCA TAGA", + "TAATT AAAA", + "CA CTTAA", + "TCTT TATT", + "GAGA TTA", + "TAA GAGG", + "CAAA TTAA", + "GA CGCA", + "CA CGGA", + "GTG TGCA", + "TC T", + "TATTA TTA", + "GAAA TATT", + "GGA GTTA", + "TCTT TGA", + "CTGA TTTT", + "TGTGAA TT", + "TCC CACC", + "CC CTTTG", + "CAA GGTG", + "CAGA GTT", + "CCCCA TG", + "CTA CCAA", + "CTCC AAAA", + "CTT CCCC", + "CTG CTAA", + "GATT AAAA", + "GC TTATG", + "CTA CTTA", + "TAAAAAA TT", + "TCA GTCC", + "CTATT AAA", + "GAA TGGG", + "CACA GTA", + "CAA CGG", + "GG TTATT", + "TCA CCCA", + "TGA TGCA", + "TAA TTTTTT", + "GTT TGAGA", + "GTATT AAA", + "GCC CCCA", + "TATA GTA", + "TA GTAAA", + "TGA TACA", + "GTG GTTTT", + "CCA CTAA", + "CACA GAGA", + "CCTCTG CCTCC", + "CAA AAAAAA", + "CTC TCTCC", + "CA TAATA", + "GAA GCCA", + "GTT CCCA", + "TGTG TTTG", + "CAA TGGA", + "TGAA GTA", + "CTT CATA", + "CA CTGTG", + "GC TCTTTT", + "TGA CATA", + "TAAA GAAAA", + "GAGAAA TG", + "CAGG GAGG", + "TGTT CAA", + "GA GCCAA", + "GACA GAGA", + "GG CTGAA", + "CAAA TATA", + "GTG GAAAA", + "TAA GGTT", + "GTGA TTA", + "GGA TCTG", + "GATG TTA", + "GACTA CACA", + "TCC TATA", + "CTG CCAA", + "TCC CGA", + "GTGA TTTT", + "GC GTTTT", + "CAGA GTA", + "GAAA GGAA", + "CA CTTTG", + "CCCC AAAA", + "GCAA CCCA", + "TGCA TTTT", + "TCTA GAA", + "TA CTTTG", + "TGA GGCA", + "CA TCTCC", + "TC GCTA", + "TGA CTTTT", + "GA GCCTG", + "CATT TGTT", + "TCTT TGTT", + "GCAAAA TT", + "CC TGATT", + "GA TAAAAA", + "GA GTGTT", + "TCC TGTA", + "TACA GAAA", + "TC CAGGAA", + "GCCA GTG", + "TAGA TTTT", + "TAA TAGG", + "CTCC TCA", + "CATTTT TG", + "CATT TCAA", + "GCCA TCA", + "TAAAA TATA", + "GA CTGTT", + "GCA TGGA", + "CAAA GTT", + "CA TGATT", + "GA GTTTG", + "CTA GCAA", + "CTT CCTA", + "GG GGAGG", + "CTA TATG", + "TATT TATTTT", + "CA CCATT", + "CC CTCAA", + "TTTTTTTT TTTTTT", + "GA TCATT", + "GTA CATA", + "CTC CATA", + "CCCC GTCTCTA", + "GCC TGCC", + "CTA GCTT", + "CC CGGA", + "GATG TTTT", + "GTA TTTTA", + "TCA GATA", + "CCTG GAA", + "TATT CCA", + "GGA CCAA", + "GCCA TTA", + "CGA CTGA", + "TAA GCTG", + "TAAA CACA", + "GTT TCTC", + "CA TCTTA", + "GAAA TTTG", + "TAA TGGG", + "TAAAA TTTT", + "CTG TTCA", + "CCTG TTA", + "TA CTGAA", + "TGA CCCA", + "TGA TTTTA", + "CTCC TTA", + "TATA GAA", + "CTG CGG", + "GC GGTA", + "GTG CTAA", + "CAGA GGAA", + "TACA TCA", + "TCAA TCAA", + "CTG CAGCC", + "TGAA TATT", + "TCTA CAA", + "CCA CATA", + "CC CGTT", + "TATA CACA", + "TCC TCTC", + "TCTA CTT", + "CC GGAA", + "CTTTT TTA", + "GAAA GAAAA", + "CTA TCTT", + "GA CTTTG", + "TGAA CAA", + "GCA GTTTT", + "GC TAAAAA", + "GAGG CGG", + "TAA TAAAAA", + "CTG GTCA", + "CAGA CAA", + "GGA TATG", + "TGAA GG", + "GCCA GAA", + "CCA GGCC", + "CCA CCATG", + "CAAA CTT", + "TCA TGTA", + "GCTG CTT", + "GTAA TA", + "CCCC CAA", + "CA GCCTG", + "TCAA CTT", + "TAAAA TTAA", + "GCTG AAAA", + "CGA CGA", + "GTG GGCA", + "TGA GGGA", + "CGC TCC", + "TTTT GTTTT", + "GA GTCAA", + "TCA TGCA", + "CTG CTTA", + "TAA GTTTT", + "GTA GCAA", + "CCTT GG", + "TGA CAAAA", + "CTG GTAA", + "TCTT TATA", + "TGTG TGTT", + "CTG GTC", + "CTG GCAA", + "CATT TCTG", + "CTC TACC", + "CTGA GGA", + "CTAAAA TG", + "CTA GATT", + "GTA TCAA", + "CA GTCAA", + "CTG GGTG", + "CC TCTTA", + "TGA GTTTT", + "TTTTA TTTA", + "CC TTTTTT", + "TATA TACA", + "TA GCAAA", + "AAA TTA", + "CTG GATG", + "GA TAATA", + "GA CAAAAA", + "CCTG GGA", + "GCTT TCA", + "GTA CAGG", + "GCTG GAA", + "CTA CTCA", + "CAA TGTA", + "GC GTGAA", + "GA TCCTT", + "TATTAA TG", + "GCC CGA", + "TAAA GTG", + "GCTT CCA", + "CATG GAA", + "TGAA GTT", + "CTT TCTC", + "TCTGTG TG", + "GTA TGTA", + "CAA TACA", + "TCAA GG", + "CC TCTAA", + "TGTG GG", + "GA TCTGA", + "GTA CTGA", + "TTAA TTAA", + "GCA GAAAA", + "CTA CATA", + "CC GGTG", + "GGGG AAAA", + "TACAA AAAA", + "TTTT GG", + "GTGA GAA", + "TCAA TAAA", + "TCAA GTT", + "CTCA GGA", + "CTA CTC", + "CAAA TCA", + "GGCA GAA", + "CC CGAA", + "TGTT GTG", + "GAGC AAAA", + "TATT TGTG", + "GTA GGTT", + "CTA CCTG", + "CA CAAAAA", + "CTCA GG", + "GCTT TA", + "CAGA GCAA", + "CTCA GTG", + "GGAA GAGA", + "TAA CCTG", + "GAAA TATA", + "CGA GAA", + "GTGA GG", + "CATT TATA", + "GGCA GCA", + "TC TAAATT", + "CCCA GTG", + "GCC TAGG", + "TGCA TTA", + "CC GTAA", + "CATT CCA", + "CTA GTTA", + "GA CTTAA", + "CTA TACA", + "GACA CAA", + "TCTT CACA", + "CC GGTT", + "TAAA GTAA", + "CTG TGGA", + "TAA GGTG", + "TCCA GTA", + "CAAA TTTA", + "AAATT AAAA", + "CCA TCTA", + "CTCC CTT", + "CTCC TTTT", + "GAGAGAGA GAGA", + "GGA GATA", + "CCTA TTA", + "CACC AAAA", + "CC GTTA", + "TGTT TATA", + "CTCA GGAGG", + "GA CGTA", + "GTCC TTA", + "GAAA GTT", + "GCTG GTG", + "CTC TACA", + "CAA TAGA", + "TAAAA TATT", + "GTA CCTG", + "GTA CTAA", + "CTT TGAAA", + "CCTT TCC", + "TAAAAA TTA", + "CTC GG", + "CAA GATA", + "CATT TGA", + "CACC TCA", + "GCCA GCC", + "GTC GG", + "GCA CATA", + "CA CTCAA", + "CTTTT AAAA", + "CAGGAA TT", + "GCC TATT", + "TCTT TCTG", + "CTGAGGCA GGAGAA", + "CAGG CAGG", + "CTA GTAA", + "TCCA TA", + "GAA CTTA", + "C G", + "GCTG TGA", + "GAAAA TA", + "TCTT CATT", + "GAGG GAGA", + "CCCA TCC", + "GAGG TGGG", + "GCC TCTA", + "GTA GGTG", + "TAAA CCA", + "GAA GGAAA", + "TATT GG", + "A TG", + "TCCA GTT", + "CCCA CAA", + "GAAA CACA", + "GTC TCAAAA", + "CTTTT CTTTT", + "TGAA GGA", + "TATT GATT", + "CTA TGTA", + "AAAAAAAA AAAAAA", + "TCCTT AAA", + "GC GCTA", + "TCCA CTT", + "GA CTCAA", + "TAAA TACA", + "TCA TGGA", + "TCTG GGA", + "TCC TATG", + "CTG TGCA", + "TCAA GTGA", + "TCA TAAAA", + "CA TCCAA", + "CCTT CCA", + "CTG TACA", + "GAA GGTT", + "CTG TGTA", + "GTCA CTT", + "TCA CAAAA", + "TCA GGCA", + "GTGTT AAA", + "CC CTTAA", + "CAAA GTG", + "GAAA TGTT", + "CTG GGGA", + "GA CGCC", + "TATA TGTG", + "CTA GATG", + "GAAATT AAA", + "GAA TGCA", + "GCA CTAA", + "CGG GAGG", + "GCCA CAA", + "CGC TTA", + "TCCA CAA", + "CAGA TA", + "TC TGAATT", + "TATTA TTTT", + "GC GCGG", + "CTC TGAAA", + "TCTCTT TG", + "TATT TCTA", + "GGGG TGGG", + "GGA TGCA", + "CCA CACC", + "TAAA TGTG", + "TCTT CCTG", + "GCAA GG", + "CTG CTCC", + "CTG GAGTG", + "CTGTT AAA", + "CACA CAAA", + "CTGA CTT", + "GAAAA GAAAA", + "CCTT CTCC", + "GAAA TAAAA", + "CCTCA GGTGA", + "GA TAATG", + "GAATT GCTT", + "CCAAAA TT", + "CGTG AAA", + "CACTG AAA", + "CAGTG AAA", + "GA TCTTA", + "GAGA TGGG", + "TCTG CCA", + "TGA GGTA", + "TATG GAA", + "TATA TTTTA", + "TGAA CTT", + "GCA GATA", + "CTTTT CTT", + "GTAAAA TG", + "TCTC TAA", + "TCTG CAAA", + "GA GCCTT", + "TA TCATT", + "CAA TTTTA", + "CC GCCA", + "TATT TAAAA", + "GAGA GATG", + "GAGA TGGA", + "GCCA GGATG", + "CGA GTAGCTG", + "TTCA TTTT", + "TATA CTT", + "GTC TACA", + "GTGA GTGA", + "GCTA CACA", + "GGGA GGA", + "CAA GGCA", + "GC TTTTAA", + "CA CTATT", + "GTT CATA", + "TCC TC", + "GTG GACA", + "TATT TGGA", + "CTC CAGTA", + "GTT CAGTT", + "CCAA GG", + "CAGA GCC", + "CTC GCC", + "CC GATG", + "GGAA TTTT", + "TCCA GCC", + "CC TCTTTT", + "GAA CCTT", + "CATG CACA", + "GTT TC", + "GAA GATA", + "TA CCCC", + "GCTG CCA", + "GGGG GAGG", + "GCAGTGA GCTGA", + "CTG TCTA", + "CGA GGA", + "CAA TGGG", + "GC TGTGAA", + "GAAA GTG", + "TACC AAAA", + "GTCA GG", + "CAGC TCC", + "TGTG CTT", + "GTC TAGG", + "TTTT TGTA", + "TTA TATG", + "TCA GGGG", + "TATT GTTA", + "CC TGAGA", + "TA TCTCA", + "CAA TCTG", + "CA CTCTG", + "GATT TAA", + "TGAA TAA", + "TCTT GTA", + "TCAA CTG", + "TCTC CAGG", + "CTA GAGG", + "CTGA GAAA", + "CTA GCTG", + "TCCA CCA", + "CGA TTTT", + "CC GGCC", + "GTT GACA", + "CTTA GAA", + "CA TAATG", + "GA GTATT", + "CACA GAAA", + "GA CTGTG", + "CTA TTTTA", + "TGA GGAAA", + "TTATT AAAA", + "CTTA TTTA", + "CAGA CTT", + "CA CGCC", + "GCTT GG", + "CCTG CTT", + "TAAA GCAA", + "CCTC GTGA", + "TA GAATT", + "CTTA CAA", + "TAAA GGAA", + "GTC TAGA", + "GTGA CTT", + "TACA TATG", + "GTCA GGA", + "GCTC CAGG", + "GAA GGGA", + "CA TGATG", + "TCA TCAAA", + "CGTT AAA", + "GTA CTCA", + "CTCC CAA", + "TATA TGTA", + "GGTA TTTT", + "TAA GCCA", + "C GAAATT", + "GTTTG TTTT", + "TCTG TCTT", + "TATA TCA", + "TGTT CATT", + "CAAA CCA", + "TTCA TTA", + "TATT TGTA", + "GATT GAA", + "CTA TAAAA", + "GATTAA TT", + "CCCA CCA", + "TCC TAGG", + "TAAA TGTA", + "CTCTT AAA", + "GCA GTCC", + "GC GGCTG", + "GTC TCGAA", + "TGAA TGA", + "CTG GGGG", + "GTC TCGA", + "GAA CAAAA", + "TGAA TCA", + "TGTATTTT TAGTAGAGA", + "GTTA TTAA", + "TTTTTT AAAA", + "GTCA GTG", + "CCCA TTA", + "CACA GGA", + "TATT CCTT", + "TCTG CCTT", + "CCTG GTG", + "GC GAGC", + "TA CTAAA", + "TACA CAAA", + "CC GTCC", + "GCTT TGTT", + "GCA TCCA", + "CA TCTAA", + "GC TGTGTT", + "GTA GACA", + "GCC TATG", + "TCTT TGTG", + "GATT CTG", + "CGCC CGG", + "GA TGAGA", + "TA TCTGA", + "TGAA TTTG", + "CC TGATG", + "TAAAA CAA", + "CTT TAGG", + "TTTT CCTT", + "TGAA TAAA", + "CGG GGA", + "CAAA CATT", + "GTA TGGA", + "GCTT AAAA", + "TA CCAAA", + "CAAA GAGA", + "CTCC TGCC", + "GTAAAA AAA", + "CACA GCC", + "CCA TGCA", + "TA CAATT", + "CTA GTGA", + "CTGA GTT", + "GAGTG AAA", + "TCTGTT TG", + "CTG TAGG", + "TATAA AAAA", + "GCATT AAA", + "GTC CATA", + "TGTTAA AAA", + "TGTT TGA", + "GAA TAGA", + "CTT CAAAA", + "CTG GACA", + "CTG TAGA", + "CCATT AAA", + "CTA TCTG", + "CACTA TG", + "TTA TCAA", + "TAA GTAAA", + "TAATCCCAGCACTTTG GGAGGCC", + "CCA GAAAA", + "TGAA GCA", + "TCC CTTTT", + "TCA TACA", + "TA CGTT", + "GCC GTG", + "GGAA GTG", + "GG CCAAA", + "GTA CCAA", + "TCTCTA CTAAAAATA", + "CATT GTG", + "TGTG TGA", + "GAAA CAGA", + "CTT GACA", + "GA TGAGG", + "GAGA TTTT", + "CCTT CAA", + "GAA TCTA", + "CTC TCCTT", + "GG CGGA", + "TCTATCTA TCTATCTA", + "CACA CAGA", + "TGTG TGTA", + "CAAA GCC", + "TGTG CCA", + "GTT GAAAA", + "CTC CAGCA", + "TCAA GGA", + "TA GCTCA", + "CGC TGA", + "CCTG AAAA", + "GA CTATT", + "GATT CCA", + "GCTT CTA", + "GTC TGCC", + "CTT GGCA", + "TGTG GTA", + "GCTT TGA", + "GCTC TCTG", + "CTCA CAGA", + "TCTT TAAA", + "CAAA GCAA", + "TA CTTAA", + "GCTT CAA", + "CATT GAA", + "GGA GGAAA", + "CTA TAGA", + "CTGA GGAA", + "CCTG GCA", + "CC CTATT", + "CTC GTG", + "TTA CACA", + "TTA GGAA", + "CTG GTTA", + "GTT GTCC", + "TAATG AAAA", + "TATT TACA", + "GG GAATT", + "GTA GTTTT", + "GCTG CAA", + "CTA CGG", + "GCC GGA", + "CTG GGCA", + "CCTT AAAA", + "GATG GAA", + "TAGATAGA TAGATAGA", + "TATG TAA", + "GTA CGG", + "TATT CAAA", + "GA TCTCC", + "CCTG TTTT", + "TATT GCA", + "GGAAGGAA GGAAGGAA", + "GG TAATT", + "TTA CAGA", + "TCA GC", + "GCAAAA TG", + "GAGA GCA", + "GTA GAAAA", + "CATT TGAA", + "TCTT CTTTT", + "TCC CATA", + "GTTA TTTA", + "CTA TCTA", + "CA TCCTG", + "TCTT GTG", + "TTA TTATT", + "CC CGTC", + "TACTA TG", + "TAAA CATA", + "TAA GGAAA", + "GCTT GTG", + "CTC TAAAA", + "GTTTT AAAA", + "GACA GGA", + "TCC TAGA", + "TCCA CCCA", + "GTT TGAAA", + "CCA TCTCA", + "CTAA GAA", + "GTA TCTA", + "GTGA GGA", + "GCTG GAGG", + "CCTGTAA TCCCAGCTA", + "GCAA CAA", + "CTT TCAAA", + "CAAA TGTT", + "CTT GTCC", + "TCTCAA AAA", + "TATT TATTA", + "TAA GGCA", + "GAGA GGAA", + "TA TGATT", + "GCA TCTA", + "C GTTATT", + "GCC TGTA", + "GTT TCAAA", + "CCTTCCTT CCTTCCTT", + "GG CTTTG", + "GTCA GAA", + "CATG CATG", + "GTCA TTTA", + "CTG GAAAA", + "CTT CGA", + "CCTA TTTT", + "CCAA CAA", + "TCCA TCC", + "TAAA GTTA", + "GTC TCTC", + "TAA TCAAA", + "GATTTT TG", + "GATT TCTT", + "GG GCTGA", + "GCA TGTA", + "CCTG GGTT", + "GAGA CAA", + "GCTG TCA", + "TGA TAGG", + "GGA GACC", + "CC GGCA", + "TAA TCTCA", + "TGAA TTAA", + "TCTG GTG", + "GCC TC", + "GG CGCA", + "CCA GCTA", + "CA GTCTG", + "TGAA CTA", + "GTAA GAA", + "CCTT TCA", + "TCCA TGA", + "CAAA GGAA", + "CTC TC", + "CTC TCTCA", + "CTC CAGC", + "GTA GATA", + "CCCC CTCC", + "GG CGCC", + "TCTG TCC", + "GA CCATT", + "CTT GAAAA", + "TTA TCC", + "TACA TGTG", + "CAAA TTTG", + "TTTT GTG", + "CAGA GTG", + "GTAA TAA", + "GTGA GTG", + "TTTT TCC", + "GG CTCTG", + "GCC CTAA", + "GG CTGTT", + "CC CAATT", + "CAGA GCTT", + "TATAAA TG", + "GA GTCTG", + "TCTTAA AAA", + "GTTTTA TG", + "GA TCCAA", + "GGCC CTG", + "GA TCCTG", + "TCAA GTG", + "GATT CAA", + "CCTC TCTT", + "GAGA CGG", + "CAGA TCA", + "TAAAA GAA", + "CTGA GCAA", + "CCTG CCA", + "CCTT CTA", + "CGC TCA", + "GG CTGTG", + "TGGG AAAA", + "GGA GCCTG", + "CTGA GTG", + "CGTC AAA", + "TCAA GTA", + "CGTAA TT", + "TTA CTTA", + "TATA CTA", + "GG GCAAA", + "CAA CTTTT", + "CTT TGCC", + "GC CAGGAA", + "CACA CTA", + "GCC CAGC", + "TAAATAAA TAAATAAA", + "CTT TCCTT", + "GGGA GAA", + "TATG GTA", + "CGG CCA", + "CCTC TCTG", + "GAAA GCAA", + "CAA GCCA", + "GG CGTT", + "CTC TTTTA", + "TCGGCC TCCCAAA", + "GATT TATT", + "CAA GTCC", + "TA TCTTA", + "GTTCAA GACCA", + "CTCA CACA", + "GAAA TCAA", + "TGA GACC", + "GG GTAAA", + "GCTT GTT", + "GA TTTTAA", + "TTTT TATA", + "CAGA GCTG", + "TC TGTTAA", + "GTAA TTAA", + "TCTT TGAA", + "CTT GCCA", + "TTTT CATT", + "CCA TGTA", + "TCTC GGCTCACTGCAA", + "GGA TTCA", + "TC TATTAA", + "TACA TAAA", + "GATT GATT", + "GGA GAGGA", + "CGC AAAA", + "GGA CTAA", + "TTA TGTG", + "GTCA CTCA", + "GACA GCA", + "CGA GTT", + "GATG GTT", + "GGAA GAGG", + "GCCAA CATGGTGAAA", + "GGA GCCA", + "TGAA CTG", + "CCTC TGTG", + "GTA TAAAA", + "TCC CAGAA", + "CATT TATG", + "GA TTATG", + "TGTT TCTG", + "GAGTG GGTT", + "TACA TATT", + "CTC CAGGA", + "GACA CTG", + "GG TCTCA", + "CC GGGA", + "TGTT TAAA", + "CTCA CCA", + "GGA CTTA", + "GCC CACC", + "CAAA TCAA", + "GAAA TGTG", + "TA GTTAA", + "TCTA TAA", + "TTA GATT", + "GTG TAGG", + "TACTG AAA", + "GCA CCCA", + "GTG GGCTG", + "GAA TGAAA", + "TCTA GTT", + "TCA GGAGA", + "TCCA CTA", + "CTCA GTT", + "TACTT AAA", + "GA CTCCA", + "TCCATT TG", + "CACA GCAA", + "GCTCATG CCTG", + "GGTG CTG", + "GCTT TCTT", + "GTG GCCA", + "TA CGTG", + "GTG CAGTG", + "TGAA GTCA", + "CCTT TAA", + "TCTCAGCTCA CTGCAA", + "GAAA TATG", + "CC TCAAAA", + "GGGG CGG", + "CGA CAA", + "GG TGATG", + "GTCTT AAA", + "CAGAAA TG", + "CGTCA TT", + "CCAA GCA", + "GGA TCAA", + "GTGCTG GGATTA", + "GCTG GCC", + "CGGA GCTT", + "TACA TGA", + "TGTT TGAA", + "TCTC CATT", + "TAA GCAAA", + "CCTT TCTT", + "TA CTGTT", + "TCCA TCTT", + "CTTA CTT", + "CGGA GGTT", + "CAAAA CAA", + "TCA TAGG", + "TTA CTAA", + "CTTA TTTG", + "GAA TGTA", + "CCCCA TGGA", + "TTA CTGA", + "CGG AAAA", + "CTC CAGTG", + "TGTT CCA", + "CAGA TGAA", + "GTT GATA", + "TCC CCCC", + "CATT GCA", + "CTCA GCC", + "CTTA CTG", + "TA TCCTT", + "CTTTTA TG", + "TGAGTA GCTG", + "GACTG AAA", + "CAA TGAAA", + "CGA CTG", + "CTT GGGA", + "GCAA GCA", + "TCA CTCC", + "GATT TGA", + "CATTTT AAA", + "TCAA CTA", + "GTCC AAAA", + "CACC CTG", + "TTA CCTT", + "CAA GGGG", + "TTTT GGA", + "GTTA TTTG", + "GCTA CTG", + "CTGAGGCAGGA GAATG", + "GTGA TGA", + "GTA GTC", + "TAGTA TG", + "GTA TAGA", + "GTG TCTA", + "GCTG CTA", + "TTA GTAA", + "TAAA CATG", + "GTCA CCA", + "CA TCTTTT", + "CATA TAA", + "TCTC TCTA", + "TTTTA TTAA", + "TATT CTAA", + "GAAA TTTA", + "CTT CCCTG", + "TAAA GATG", + "TA CGTA", + "GTT TATTA", + "GAAAA GAA", + "CCCA CCCA", + "CAATT AAAA", + "CC GACA", + "CAAA GTGA", + "CAAA CAAAA", + "GCAA TTTT", + "CGATT AA", + "TTA GAGA", + "CTGA TGA", + "GGA GGAGG", + "GTCC TGGG", + "TCA TGAAA", + "GCAA CCA", + "GTT GGCA", + "GCGG CGG", + "GTCC CCA", + "GTA GGGG", + "GCCA TGTT", + "GTT CGAGA", + "GCC TATA", + "TAAA TTCA", + "GG CCATT", + "GAAAA CAA", + "TGTG TATG", + "GTA CTC", + "TAGG GAA", + "CCTT GAA", + "TC TATTTG", + "GAGG GCA", + "GAAA CTGA", + "TA CGC", + "TA CAAAAA", + "TCA TTATT", + "GGAAAA TT", + "TCAA TATT", + "CC CGTA", + "GGA GAGAA", + "TTA GTTA", + "CTCA GAGA", + "TC GAGC", + "CTA GTCA", + "GATG GCA", + "TGAA CATT", + "CTA TGGG", + "CACA CCA", + "TCAA TTAA", + "GGAA CTG", + "TTA CATG", + "CTT TCATT", + "CAGC TCTG", + "TCTTTT TTTT", + "TAAA TCTT", + "TGA TCTA", + "CATA CAA", + "GC TCAAAA", + "GC TGTGTG", + "TCAA TCA", + "GATT TGAA", + "CCAA GGA", + "GTCC TCA", + "GTG CTCC", + "AAAA TAA", + "GTGA CAA", + "GCTCA CGCCTG", + "CGA CGG", + "TA TCCAA", + "CACA CATG", + "TCTC TCTCC", + "TGTG GTT", + "CTT GGTA", + "TCTG GTT", + "TTTA TAA", + "CTG CTTTT", + "TGTG TCA", + "CACA TCA", + "CC TAATG", + "C GTTTTTT", + "GCTG GCA", + "GA CGTC", + "TATAA TTA", + "TACA GTAA", + "GAAA GTAA", + "GTC TGAAA", + "CCCA TTTT", + "TATA TGA", + "CTT GATA", + "CTT TATTTT", + "CTT TATTA", + "GG CGAA", + "CCA TGCC", + "CCTG CCTT", + "GAAGAA GAAGAA", + "CTGA CTGA", + "GCC CTTA", + "TA TCTAA", + "GTG TTTTA", + "TGTG GCA", + "TATT GTAA", + "GCCA GAAA", + "CCCTG TCTC", + "CACA GGAA", + "AAAA CAA", + "AAAAAAAA AAAAAAA", + "TAA CTCC", + "GCC TAAA", + "CGA GTA", + "TA GTATT", + "GTATTTT TAGTAGAGA", + "GCTG CAGG", + "TATT GAAA", + "CCAGCC TGGG", + "GCTCC AAA", + "TA CGAA", + "GGCC TCC", + "TATA CAAA", + "CATG GCA", + "CATG CAA", + "TACA CCA", + "CTT TACCA", + "TACA GAGA", + "TATT CTTA", + "TATG TCA", + "TCAA GCA", + "TCAA TGA", + "GG CTCTT", + "GGAA GTT", + "TCCA TGTT", + "GCTT TCC", + "TATG TGA", + "GTG TAGA", + "TTTT TAAAA", + "GCTG GAGA", + "GTGA GAGA", + "CCTA GAA", + "CCTCC AAA", + "CCAA TGA", + "CAGG GCA", + "CTA TGCA", + "CTT CACC", + "CTA CAAAA", + "CTCA CC", + "GAGTA TG", + "TA GAAAAA", + "CTTTT GAA", + "TAAA GAGA", + "CATG TCA", + "TCTTTT AAA", + "CACA GTGA", + "GA TCTAA", + "TAA GGTA", + "CATA GAA", + "CGC GCC", + "CAGC TTA", + "TATA GTT", + "CGG GCC", + "TATC CATT", + "TGTTTG TTTT", + "GCTG GCTG", + "TACA GGA", + "CTCC TTTG", + "CAA TCTA", + "CCCC CTG", + "TATA CTG", + "CTGA GCC", + "CGG TTA", + "TGAA GTG", + "GCTT CCTT", + "TTTTA TTTG", + "TA GTGAA", + "CTGA GGTG", + "TCTT CTC", + "GACA GAAA", + "CTGAA CTGAA", + "CCTG GGAA", + "TCC CCAAA", + "TATG TATT", + "GATT TCTG", + "CATT CAAA", + "CACA GTT", + "GCTT GAA", + "GTG GATCA", + "CTGA GTGA", + "TGAA TTTA", + "TCAA CAAA", + "GG TCATT", + "GTAA TTTA", + "GC GACTT", + "CTGA GAGA", + "GTG CCCA", + "CTA GGTT", + "TCC TGAAA", + "GTC CACC", + "TCA CAGAA", + "GC GAAAA", + "GTA TGGG", + "TGAA CAAA", + "TAAA CAAAA", + "CC GTTTT", + "TC TCAATT", + "TCCA GAAA", + "GTAA CAA", + "GCA TTTTA", + "TCTC CATG", + "TTA TAAAA", + "CAGG CAA", + "CTAAAA AAA", + "GTT GGGA", + "TAAA GATT", + "TGAA GAGA", + "CCCC TCA", + "TGTT TATG", + "TCTA CTG", + "CCAA TTTT", + "GGTG GTG", + "GGAA CAA", + "TGTG GGA", + "TCTG CTA", + "GAA CGA", + "GTAA GTA", + "GTT GCCA", + "AAAA TTTT", + "GC GCGA", + "GAAA GATG", + "GTC TCTCA", + "TCCA TCAA", + "GCA GCTA", + "CACA TTTG", + "CTGA CAA", + "TCCA CC", + "GC T", + "CCCA CTT", + "GCA GGTA", + "GAGG CCA", + "TAAA GTCA", + "CTG GATA", + "CGG CAA" + ] + } +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/tokenizer_config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e926cbe05847fc6887cfc8a947900d32ebe68356 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/tokenizer_config.json @@ -0,0 +1,56 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "cache_dir": null, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "model_max_length": 100, + "pad_token": "[PAD]", + "padding_side": "right", + "sep_token": "[SEP]", + "tokenizer_class": "PreTrainedTokenizerFast", + "trust_remote_code": true, + "unk_token": "[UNK]", + "use_fast": true +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/trainer_state.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e5416fabcb512702eecec6813e44eb703b7edbe6 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/trainer_state.json @@ -0,0 +1,38 @@ +{ + "best_metric": 0.7227702479304581, + "best_model_checkpoint": "genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174", + "epoch": 1.0, + "eval_steps": 100, + "global_step": 174, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.57, + "learning_rate": 1.3214285714285714e-05, + "loss": 0.5599, + "step": 100 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.7229219143576826, + "eval_f1": 0.7227702479304581, + "eval_loss": 0.5351111888885498, + "eval_matthews_correlation": 0.4478109097894003, + "eval_precision": 0.7243213139100618, + "eval_recall": 0.7234903668213308, + "eval_runtime": 1.1662, + "eval_samples_per_second": 2383.002, + "eval_steps_per_second": 18.865, + "step": 174 + } + ], + "logging_steps": 100, + "max_steps": 174, + "num_train_epochs": 1, + "save_steps": 100, + "total_flos": 1142477535696000.0, + "trial_name": null, + "trial_params": null +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/training_args.bin b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4fc648de6aa520fa17e646a1ad50f7981dd767b8 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:813bbf48f0962396319a5fe93f85a3dc9f1b744b55d30824d371a20aa15781ee +size 5393 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/results/base5120_human_enhancers_cohn_lr3e-5_wd0.0_wr0.03_ep1_seed42/eval_results.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/results/base5120_human_enhancers_cohn_lr3e-5_wd0.0_wr0.03_ep1_seed42/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..52b64f51bdd62f22b16925fe2c3227eea6b06716 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/results/base5120_human_enhancers_cohn_lr3e-5_wd0.0_wr0.03_ep1_seed42/eval_results.json @@ -0,0 +1 @@ +{"eval_loss": 0.5238381624221802, "eval_accuracy": 0.7284172661870504, "eval_f1": 0.7276967640297661, "eval_matthews_correlation": 0.4587468093902783, "eval_precision": 0.7305107639295452, "eval_recall": 0.7282416572855608, "eval_runtime": 1.1454, "eval_samples_per_second": 2427.082, "eval_steps_per_second": 19.207, "epoch": 1.0} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/config.json new file mode 100644 index 0000000000000000000000000000000000000000..45e4c6c10a6211acf374c78e8078ab7ac74985f9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.35.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 4096 +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/model.safetensors b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3db2f7fc1b879dbc35d0a0b99af2fc3046f1d190 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4738a9f9f2025e37c3e4cbd3e9d075e31248aa5614dadc841b48f9685f005c67 +size 356777880 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/optimizer.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbdcd6087b60dc6232f725369d04db80ed08888d --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77245f2c75e15c94077dba32fdec1b191189cf0fa4c8204b9a33c26f3791873b +size 713677451 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/rng_state.pth b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b9be46f6469c9207cd950ffff80331d078e786a6 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b064b2260fc196f286e4d00135461b57b988d7c809b149f649f8b174a1ca3dfc +size 14645 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/scheduler.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..23724fbd24d47d1cd143295c978aaef4d337af64 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788a3151f9a748947f5038cbdef0a606c0864aece28f79566dab42892a6849b0 +size 1465 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/special_tokens_map.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8b3208c2884c4efb86e49300fdd3dc877220cdf --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "unk_token": "[UNK]" +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/tokenizer.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8a569df5e832e1e62816e174612061cfbf0790d0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/tokenizer.json @@ -0,0 +1,8340 @@ +{ + "version": "1.0", + "truncation": { + "direction": "Right", + "max_length": 512, + "strategy": "LongestFirst", + "stride": 0 + }, + "padding": { + "strategy": "BatchLongest", + "direction": "Right", + "pad_to_multiple_of": null, + "pad_id": 3, + "pad_type_id": 0, + "pad_token": "[PAD]" + }, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 1 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 2 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "vocab": { + "[UNK]": 0, + "[CLS]": 1, + "[SEP]": 2, + "[PAD]": 3, + "[MASK]": 4, + "A": 5, + "C": 6, + "G": 7, + "T": 8, + "AA": 9, + "TT": 10, + "TG": 11, + "CA": 12, + "CC": 13, + "TA": 14, + "GG": 15, + "TC": 16, + "GA": 17, + "AAA": 18, + "GC": 19, + "TAA": 20, + "TTTT": 21, + "TCA": 22, + "TGA": 23, + "TTA": 24, + "GAA": 25, + "TCC": 26, + "CAA": 27, + "CTG": 28, + "CTT": 29, + "GTG": 30, + "GTT": 31, + "GCA": 32, + "GGA": 33, + "CCA": 34, + "GTA": 35, + "GCC": 36, + "CTA": 37, + "TAAA": 38, + "AAAA": 39, + "CTC": 40, + "GTC": 41, + "TGTG": 42, + "TATT": 43, + "CACA": 44, + "GAAA": 45, + "TATA": 46, + "TCTT": 47, + "TGTT": 48, + "CAAA": 49, + "GAGA": 50, + "CATT": 51, + "TGAA": 52, + "CAGG": 53, + "TCTG": 54, + "CAGA": 55, + "TCAA": 56, + "GGAA": 57, + "TAAAA": 58, + "CTGA": 59, + "GCTT": 60, + "GTGA": 61, + "GCTG": 62, + "CTCA": 63, + "CCTT": 64, + "CATG": 65, + "GCAA": 66, + "GTCA": 67, + "GTAA": 68, + "TTTTA": 69, + "TATG": 70, + "GAGG": 71, + "CGG": 72, + "GATT": 73, + "CCTG": 74, + "TCTC": 75, + "CCAA": 76, + "GTTA": 77, + "CTCC": 78, + "CTAA": 79, + "TACA": 80, + "CTTA": 81, + "TCCA": 82, + "GATG": 83, + "TTAA": 84, + "GAAAA": 85, + "TTTG": 86, + "GTTTT": 87, + "TCTA": 88, + "GCCA": 89, + "GTCC": 90, + "CTTTT": 91, + "GGGG": 92, + "CGA": 93, + "TTTA": 94, + "CCCA": 95, + "CAAAA": 96, + "TGGG": 97, + "TAGA": 98, + "TAGG": 99, + "GACA": 100, + "GGTT": 101, + "CCCC": 102, + "GGTG": 103, + "CATA": 104, + "GCTA": 105, + "TGTA": 106, + "TCAAA": 107, + "TGGA": 108, + "TAATT": 109, + "TTATT": 110, + "TGCA": 111, + "GGCA": 112, + "GATA": 113, + "CCTA": 114, + "TTCA": 115, + "TCTCA": 116, + "GGGA": 117, + "CGC": 118, + "CTGAA": 119, + "GTAAA": 120, + "TCTCC": 121, + "TTTTTT": 122, + "CGTG": 123, + "GCAAA": 124, + "TAAAAA": 125, + "TCTGA": 126, + "TCATT": 127, + "GGAAA": 128, + "TGAAA": 129, + "TCCTT": 130, + "CCAAA": 131, + "GAATT": 132, + "CTAAA": 133, + "CGTT": 134, + "GTGAA": 135, + "GGCC": 136, + "TAATA": 137, + "GGTA": 138, + "TGCC": 139, + "CACC": 140, + "TGATT": 141, + "AAAAAA": 142, + "GCTCA": 143, + "TCCAA": 144, + "GAGAA": 145, + "CTGTT": 146, + "TATTA": 147, + "CAGCA": 148, + "CTCTT": 149, + "CTTAA": 150, + "CAGAA": 151, + "GCTGA": 152, + "GTTAA": 153, + "TCTTA": 154, + "TATTTT": 155, + "GCCAA": 156, + "CTTTG": 157, + "GACC": 158, + "CGCA": 159, + "GTATT": 160, + "GTCTT": 161, + "CAATT": 162, + "GTGTT": 163, + "CTCAA": 164, + "GGAGG": 165, + "CGAA": 166, + "TCTTTT": 167, + "GTCAA": 168, + "CGCC": 169, + "TATAA": 170, + "TACC": 171, + "TCTAA": 172, + "CCATT": 173, + "CGGA": 174, + "CAAAAA": 175, + "CAGTG": 176, + "TCCTG": 177, + "CTCTG": 178, + "GAAAAA": 179, + "CTGTG": 180, + "CAGC": 181, + "TTTTAA": 182, + "GCATT": 183, + "GCCTT": 184, + "TAATG": 185, + "CTATT": 186, + "GTTTG": 187, + "TGATG": 188, + "GGCTG": 189, + "CCTCA": 190, + "GAGGA": 191, + "GCCTG": 192, + "AAATT": 193, + "CGTA": 194, + "TCAAAA": 195, + "TACAA": 196, + "CATCA": 197, + "CAGTT": 198, + "TGAGA": 199, + "GGGAA": 200, + "CACTG": 201, + "CACAA": 202, + "CAGGA": 203, + "CCCCA": 204, + "CCCTG": 205, + "TTTTTTTT": 206, + "TAGAA": 207, + "GAGCA": 208, + "CCTCC": 209, + "CACCA": 210, + "TATCA": 211, + "GAGC": 212, + "CATTA": 213, + "CACACACA": 214, + "GAGTG": 215, + "GGATT": 216, + "TGTGTGTG": 217, + "TACTT": 218, + "CACTT": 219, + "GTCTG": 220, + "TGAGG": 221, + "GAGTT": 222, + "GAATG": 223, + "TCATG": 224, + "GACAA": 225, + "GACTT": 226, + "TATTAA": 227, + "TAATAA": 228, + "GGCCA": 229, + "CATTTT": 230, + "CAGCC": 231, + "CCCTT": 232, + "GCTAA": 233, + "TATATATA": 234, + "GTGTG": 235, + "TACTG": 236, + "TAGTT": 237, + "CAATG": 238, + "GCTC": 239, + "CAGTA": 240, + "GCTCC": 241, + "CATAA": 242, + "TTATG": 243, + "TAAATT": 244, + "GATGA": 245, + "CATGA": 246, + "GCGG": 247, + "AAAAAAAA": 248, + "CCATG": 249, + "GATAA": 250, + "GACTG": 251, + "TATGA": 252, + "GCAGG": 253, + "GATCA": 254, + "GTTTTA": 255, + "GGATG": 256, + "CCTGA": 257, + "GTAAAA": 258, + "GAAGG": 259, + "GATTA": 260, + "CCTC": 261, + "GACCA": 262, + "GCTTA": 263, + "CCCAA": 264, + "AAATG": 265, + "GCATG": 266, + "TAGTA": 267, + "TACCA": 268, + "GGCTT": 269, + "CGTC": 270, + "TCTCTT": 271, + "GGTCA": 272, + "TTATTA": 273, + "TACTA": 274, + "TAGCA": 275, + "TATC": 276, + "CTGGG": 277, + "CATC": 278, + "CTTTTA": 279, + "CTAAAA": 280, + "GTGGG": 281, + "GAGTA": 282, + "CCAGG": 283, + "GATTTT": 284, + "TAGTG": 285, + "GAAATT": 286, + "CACTA": 287, + "TCGG": 288, + "TCAGG": 289, + "CAGGAA": 290, + "GCAAAA": 291, + "CCTTA": 292, + "CATCC": 293, + "CTTGG": 294, + "TGTGAA": 295, + "TATTTG": 296, + "CCTAA": 297, + "CTATG": 298, + "GAGAAA": 299, + "GAGAGAGA": 300, + "GCTTTT": 301, + "TATAAA": 302, + "CAAGG": 303, + "TCTCTG": 304, + "TGTTAA": 305, + "TGTGTT": 306, + "GAGCC": 307, + "GACTA": 308, + "TATATT": 309, + "TAAAAAA": 310, + "TTTTTG": 311, + "GTATG": 312, + "CATTAA": 313, + "TAGGA": 314, + "TAGC": 315, + "GTTGG": 316, + "GAAGAA": 317, + "TAAATG": 318, + "TCTGTT": 319, + "CAGAAA": 320, + "CAAATT": 321, + "TAATTA": 322, + "TCTGTG": 323, + "TATCC": 324, + "TGAATT": 325, + "CTCCA": 326, + "GTGAAA": 327, + "GGCAA": 328, + "GGAGA": 329, + "GAAGA": 330, + "GGTGA": 331, + "GGGCA": 332, + "CCAAAA": 333, + "TCTCTCTC": 334, + "CTGCA": 335, + "CTTCTT": 336, + "TCTTAA": 337, + "CCCTA": 338, + "TGTGTG": 339, + "AAATA": 340, + "TGTTTG": 341, + "GGGTT": 342, + "GTGCTG": 343, + "GGAAAA": 344, + "GGGGA": 345, + "TCAGA": 346, + "CCTTTT": 347, + "GAAATG": 348, + "GCAGCA": 349, + "TCTGAA": 350, + "GGGTG": 351, + "CACATT": 352, + "TCTTTG": 353, + "GGGC": 354, + "TCCCA": 355, + "TCCATT": 356, + "CTGAAA": 357, + "CTTTA": 358, + "TCGA": 359, + "GTTTA": 360, + "CAACAA": 361, + "CTTCC": 362, + "GCCTCC": 363, + "TTAAA": 364, + "GCTCTG": 365, + "GTTTCA": 366, + "GGAGGA": 367, + "CGTGA": 368, + "CAGTC": 369, + "GAATA": 370, + "CAGAGA": 371, + "CCCTC": 372, + "CAAATG": 373, + "CTGCTG": 374, + "GATCC": 375, + "TTTTATT": 376, + "AAAATT": 377, + "TTATA": 378, + "TCAATT": 379, + "GGTAA": 380, + "GTTATT": 381, + "GCCAGG": 382, + "GGAGAA": 383, + "CATTTG": 384, + "TCACC": 385, + "CTCAAA": 386, + "GGTTA": 387, + "TCCAAA": 388, + "TCTATT": 389, + "GCAGA": 390, + "CTTCA": 391, + "TCATCA": 392, + "CGAGG": 393, + "TAACA": 394, + "GTTGTT": 395, + "CTTATT": 396, + "CGTCA": 397, + "TAAGA": 398, + "TAATTTT": 399, + "CTGTA": 400, + "TCCACA": 401, + "GCTGTG": 402, + "CGCTG": 403, + "TCTAAA": 404, + "GCGA": 405, + "CAATA": 406, + "CCACCA": 407, + "GAACA": 408, + "CGAAA": 409, + "CAGATT": 410, + "TCACA": 411, + "TTATTTT": 412, + "TCTCAA": 413, + "TGACA": 414, + "CTCCAA": 415, + "AAAAAAA": 416, + "TATATG": 417, + "TCCTCC": 418, + "TCACTT": 419, + "TCCAGG": 420, + "CAAGA": 421, + "GGCTA": 422, + "GTGGTG": 423, + "CGTAA": 424, + "CGAGA": 425, + "TGATA": 426, + "GGATTA": 427, + "CAACA": 428, + "CGATT": 429, + "TGAGAA": 430, + "CTCCTT": 431, + "CTCATT": 432, + "GTTAAA": 433, + "TCATA": 434, + "CCTCTG": 435, + "CTCTA": 436, + "GCTGAA": 437, + "CTGGA": 438, + "TAAGG": 439, + "CTTAAA": 440, + "TATTTA": 441, + "CCACA": 442, + "CCGG": 443, + "GTCAAA": 444, + "TGGAA": 445, + "CGGAA": 446, + "TGATGA": 447, + "GTTCA": 448, + "TAACAA": 449, + "GCTGTT": 450, + "TAAGAA": 451, + "CTGCC": 452, + "TTAATT": 453, + "CCAGA": 454, + "TCAGAA": 455, + "GTCATT": 456, + "CGCTT": 457, + "GATTAA": 458, + "CTGATT": 459, + "GCCACA": 460, + "GTAATT": 461, + "TCCAGA": 462, + "GCCAAA": 463, + "GTGATT": 464, + "TAAAATT": 465, + "CAAGAA": 466, + "CCACC": 467, + "TAATCC": 468, + "GTTCTT": 469, + "TCCATG": 470, + "GCTCTT": 471, + "TGCTG": 472, + "GGGTA": 473, + "TTACA": 474, + "GCCATT": 475, + "GCACA": 476, + "GCAATT": 477, + "TCCCTG": 478, + "TGTGA": 479, + "TCGAA": 480, + "GGACA": 481, + "GGAATT": 482, + "GTGGA": 483, + "CTTCTG": 484, + "TCCCC": 485, + "GCCCC": 486, + "CTTGA": 487, + "TAATGA": 488, + "TAAATA": 489, + "TATATA": 490, + "CTGCAA": 491, + "TCATTA": 492, + "GTATA": 493, + "TCCCCA": 494, + "CGTTA": 495, + "GCAGAA": 496, + "TGAGTT": 497, + "CTTTTTT": 498, + "CGATG": 499, + "CTTTCA": 500, + "AAAATG": 501, + "CAGGTT": 502, + "CTAATT": 503, + "CGCCA": 504, + "TGAAAAA": 505, + "GTTCC": 506, + "GTCCTT": 507, + "GTCCAA": 508, + "GTTTTTT": 509, + "CTCTGA": 510, + "GCGC": 511, + "GTTGA": 512, + "TGAATG": 513, + "CTATA": 514, + "GCAGTG": 515, + "CCTTAA": 516, + "TCACCA": 517, + "TCACTG": 518, + "GCCCTG": 519, + "TAACTT": 520, + "CAGATG": 521, + "GTAGG": 522, + "TCTATA": 523, + "GAGATT": 524, + "GTCTA": 525, + "TTTTAAA": 526, + "CACATG": 527, + "TGACC": 528, + "CACAAA": 529, + "GTGTA": 530, + "GGGAGG": 531, + "GCTTTG": 532, + "CAAAAAA": 533, + "GAGGAA": 534, + "GTTCTG": 535, + "TTTTTA": 536, + "GTCTCA": 537, + "GTTCAA": 538, + "TCGTG": 539, + "GCTTAA": 540, + "GCACC": 541, + "CTCCTG": 542, + "TAAATAAA": 543, + "CTACA": 544, + "CTTCCA": 545, + "TCCTCA": 546, + "CGCAA": 547, + "GAAAAAA": 548, + "GCCCA": 549, + "TCGTT": 550, + "GTAGA": 551, + "CTCTCA": 552, + "GTCCA": 553, + "TGACTT": 554, + "TCCCTT": 555, + "GCCATG": 556, + "CACACACACACACACA": 557, + "GTGATG": 558, + "CCTCTT": 559, + "GCCAGA": 560, + "TCCTA": 561, + "CGTTTT": 562, + "GTACA": 563, + "GCATA": 564, + "GAATTA": 565, + "TGTGTGTGTGTGTGTG": 566, + "CCCAGG": 567, + "GGTTTT": 568, + "TCAAAAA": 569, + "TCTATG": 570, + "CCATA": 571, + "TGACAA": 572, + "GGATA": 573, + "TCAGTG": 574, + "GTATTTT": 575, + "GAGATG": 576, + "GCGTG": 577, + "CGTCC": 578, + "TTAAAAA": 579, + "TAATCA": 580, + "CAATTA": 581, + "CCACTG": 582, + "CGGTT": 583, + "GTTGAA": 584, + "TGATTA": 585, + "CCTTTG": 586, + "CGGTG": 587, + "CAGGTG": 588, + "TCAATG": 589, + "CTGATG": 590, + "TCAGGA": 591, + "GTTTAA": 592, + "TATTAAA": 593, + "CTCTTA": 594, + "GCAGGA": 595, + "CTCTCC": 596, + "GAACC": 597, + "CTTTAA": 598, + "GGGCC": 599, + "GTATTA": 600, + "GCGCC": 601, + "CCAATT": 602, + "GCTAAA": 603, + "TGACTG": 604, + "GATTTG": 605, + "GATAAA": 606, + "TCAGCA": 607, + "GTTCCA": 608, + "GAAATA": 609, + "GACAAA": 610, + "GAGTC": 611, + "GCTATT": 612, + "TCACAA": 613, + "GAGGTT": 614, + "TAACC": 615, + "GAAGGA": 616, + "GCTCAA": 617, + "GAAAATT": 618, + "CCAGCA": 619, + "GTTTTAA": 620, + "GTGCC": 621, + "TGAGGA": 622, + "CATAAA": 623, + "GGTCC": 624, + "TCATTTT": 625, + "TATTTATT": 626, + "TAATAAA": 627, + "GCCTA": 628, + "CTTTTAA": 629, + "TAAGTG": 630, + "TAAGTA": 631, + "CTGGAA": 632, + "CACACA": 633, + "GACAGA": 634, + "CAACC": 635, + "GGGAAA": 636, + "CCAGAA": 637, + "TCAGTT": 638, + "TAACTA": 639, + "CTAAAAA": 640, + "TGGGTT": 641, + "TGAGTG": 642, + "TAAAATG": 643, + "TATATATATATATATA": 644, + "GCACTG": 645, + "GACTC": 646, + "TACAAA": 647, + "TAAAAAAA": 648, + "TCTACA": 649, + "GTTGTG": 650, + "TCGCC": 651, + "CCCAAA": 652, + "GTCATG": 653, + "CTGCTT": 654, + "GGAATG": 655, + "CTATTA": 656, + "GATATT": 657, + "TAGAAA": 658, + "GGCAGG": 659, + "GATGAA": 660, + "GTAGAA": 661, + "TCCTGA": 662, + "TAACTG": 663, + "GCTGGG": 664, + "GCAATG": 665, + "GCCCCA": 666, + "GTTTGA": 667, + "CATTTA": 668, + "GTGCA": 669, + "CTTGAA": 670, + "GTGGAA": 671, + "CTTCAA": 672, + "TAAATTA": 673, + "GTGGCA": 674, + "TCCTTA": 675, + "GGAAAAA": 676, + "TTTTTTA": 677, + "CCTGTG": 678, + "GTAATG": 679, + "GTGTTA": 680, + "CTAGG": 681, + "CAGGCTG": 682, + "GACACA": 683, + "GAAAAAAA": 684, + "TCGC": 685, + "GTAAAAA": 686, + "TGTTTA": 687, + "TCTCTA": 688, + "GTCCTG": 689, + "CCAGGA": 690, + "GAACAA": 691, + "TAAGTT": 692, + "TGAGCA": 693, + "GCTCCA": 694, + "TAAGCA": 695, + "CTCATG": 696, + "GTCTTA": 697, + "CCCACA": 698, + "CATATT": 699, + "GCCTCA": 700, + "CACTC": 701, + "CTTCTA": 702, + "TGATTTT": 703, + "TCGCA": 704, + "CCTGTT": 705, + "GAAGCA": 706, + "GCAAAAA": 707, + "GCGGA": 708, + "CCACAA": 709, + "GCGCA": 710, + "CATATA": 711, + "GACATT": 712, + "GTTCTA": 713, + "CAAAATT": 714, + "GAAAGAAA": 715, + "CCCGG": 716, + "TACACA": 717, + "CCAAAAA": 718, + "GAGGTG": 719, + "GGCTCA": 720, + "CAGTGA": 721, + "TCCCAA": 722, + "TATCTT": 723, + "TGAGTA": 724, + "TCGTA": 725, + "TTTTCTT": 726, + "GTGGGA": 727, + "GAGCTG": 728, + "CCCTCC": 729, + "TAGGTT": 730, + "TTAGG": 731, + "TAATATT": 732, + "CCAGCC": 733, + "CATCTT": 734, + "GTCTGA": 735, + "GTTTCC": 736, + "CCTGAA": 737, + "GGAGCA": 738, + "GAAAATG": 739, + "TCAGTA": 740, + "TAACCA": 741, + "GATGTT": 742, + "CTGTTA": 743, + "CATGTT": 744, + "GGCGG": 745, + "CATGTG": 746, + "GGGAGA": 747, + "CTTTGA": 748, + "TCTTTCTT": 749, + "AAAAAAAAA": 750, + "GGGGTG": 751, + "CTTTCC": 752, + "CTTGTT": 753, + "GCATTA": 754, + "CCCAGA": 755, + "CAAATA": 756, + "TCGGA": 757, + "CAGCTT": 758, + "TCACTA": 759, + "TAATTAA": 760, + "TAAGGA": 761, + "GAACTG": 762, + "GCACAA": 763, + "GCGTT": 764, + "GGCTC": 765, + "TCTTTTA": 766, + "CCTCCA": 767, + "GGCAAA": 768, + "CAGCTG": 769, + "CTACAA": 770, + "TACATT": 771, + "GCTATG": 772, + "CTTGTG": 773, + "GAGTCA": 774, + "GTTATG": 775, + "CTGCCA": 776, + "GTCTCC": 777, + "TGACCA": 778, + "CACCTG": 779, + "TATATTA": 780, + "TGATCA": 781, + "CAGCAA": 782, + "GATGTG": 783, + "GTCTTTT": 784, + "CTAGAA": 785, + "GCTACA": 786, + "CTGGGA": 787, + "GGGGTT": 788, + "CAAGTA": 789, + "CAAGGA": 790, + "CCCTCA": 791, + "TAGCC": 792, + "GTTGGA": 793, + "GCTATA": 794, + "TCTGAAA": 795, + "TATGTT": 796, + "CCCCTT": 797, + "GTTGTA": 798, + "CCCTGA": 799, + "TGACTA": 800, + "CAAGCA": 801, + "CAATAA": 802, + "GAACTT": 803, + "CATGAA": 804, + "CTTATG": 805, + "CTAATG": 806, + "TCTAAAA": 807, + "CCAATG": 808, + "GAAGTG": 809, + "CCTCAA": 810, + "CCCATT": 811, + "CAGTCA": 812, + "GAGAGAGAGAGAGAGA": 813, + "TATGTG": 814, + "GCAGTGA": 815, + "TCTCCTT": 816, + "TCCCAAA": 817, + "CCATTA": 818, + "CCAGTG": 819, + "GCATCA": 820, + "TCAAATT": 821, + "GATCTT": 822, + "GACAGG": 823, + "GGAGTG": 824, + "GTAGTA": 825, + "CAACTT": 826, + "GAAGTT": 827, + "CCCCTG": 828, + "TCTCAAA": 829, + "GGGTC": 830, + "GAGCTT": 831, + "TATGAAA": 832, + "TATGAA": 833, + "GACATG": 834, + "CAAGTG": 835, + "GATATA": 836, + "CATCTG": 837, + "CTGTGA": 838, + "TAATTTA": 839, + "GGCAGA": 840, + "GCGAA": 841, + "CCTAAA": 842, + "CCATCA": 843, + "CACTGA": 844, + "GGACTA": 845, + "GACGG": 846, + "CTCTTTT": 847, + "CTGTCA": 848, + "TCTCTCTCTCTCTCTC": 849, + "TTAATG": 850, + "GCAGCC": 851, + "CAAAAAAA": 852, + "GCACCA": 853, + "CTATTTT": 854, + "GAGCAA": 855, + "CTTGGA": 856, + "CTGGTG": 857, + "GAATAA": 858, + "TCCTTTT": 859, + "GAAGTA": 860, + "CAGTAA": 861, + "CAACCA": 862, + "CTGTAA": 863, + "TGATAA": 864, + "GCAGTT": 865, + "CACGG": 866, + "TAAATAA": 867, + "CTGTTTT": 868, + "CTACTA": 869, + "GCTCTA": 870, + "CGAAAA": 871, + "CAAGTT": 872, + "CTTGTA": 873, + "GAATGA": 874, + "GAGTGA": 875, + "GCCTGA": 876, + "GGTTTG": 877, + "CCCATG": 878, + "GGGGAA": 879, + "GAAGAAA": 880, + "TGTTA": 881, + "CAATTTT": 882, + "TATATTTT": 883, + "CTCAAAA": 884, + "GGTGGG": 885, + "CCGTG": 886, + "TATTTCA": 887, + "CCCCAA": 888, + "TATTTAA": 889, + "GGCTGA": 890, + "GGTGTG": 891, + "CATCAA": 892, + "CACTCA": 893, + "TCTCATT": 894, + "GAATTTT": 895, + "GAATCA": 896, + "CAGGAAA": 897, + "CATACA": 898, + "TATTTTA": 899, + "TTATAA": 900, + "GAGGAAA": 901, + "CATATG": 902, + "CTTTCTT": 903, + "CAACTG": 904, + "GGGCTG": 905, + "CCCCCA": 906, + "TTTGAAA": 907, + "CATTAAA": 908, + "CTTAAAA": 909, + "GACTGA": 910, + "CAATGA": 911, + "GGCACA": 912, + "CCAGTA": 913, + "GGATGA": 914, + "GTTTTTG": 915, + "GCATTTT": 916, + "GTGCCA": 917, + "GCAGTA": 918, + "GCCCTT": 919, + "TCGTC": 920, + "GAACTA": 921, + "GTGGTT": 922, + "GTGTGA": 923, + "GTGCTT": 924, + "CGCTA": 925, + "GTGTCA": 926, + "TCTTTA": 927, + "GCCTTA": 928, + "CCTATT": 929, + "CAAAATG": 930, + "GAACCA": 931, + "CTCCAGG": 932, + "GACTCA": 933, + "CATGAAA": 934, + "GCTAGG": 935, + "TGTTAAA": 936, + "GCGTA": 937, + "GCACTT": 938, + "TCTTAAA": 939, + "TAAGAAA": 940, + "GGCCTG": 941, + "TCCCTA": 942, + "GTGGTA": 943, + "CTGCTA": 944, + "GGAGTT": 945, + "GGTAAA": 946, + "CAAACAAA": 947, + "GATATG": 948, + "TCATGA": 949, + "GACCTT": 950, + "TAATATA": 951, + "GCTAGA": 952, + "GGACTG": 953, + "GGCATT": 954, + "CAGTTA": 955, + "CCCTAA": 956, + "CACCTT": 957, + "GGTGAA": 958, + "CAGCTA": 959, + "GTGTTTT": 960, + "CAACTA": 961, + "GATCAA": 962, + "GAGAAAA": 963, + "TGTGAAA": 964, + "AAAATA": 965, + "GATGAAA": 966, + "CTCTAA": 967, + "TTACTT": 968, + "GATCTG": 969, + "CCACTT": 970, + "GAGTTA": 971, + "CAATCA": 972, + "GGATTACAGG": 973, + "TTTATTTT": 974, + "TACATA": 975, + "TTTTATG": 976, + "GAGTAA": 977, + "GCTGAAA": 978, + "GTACTG": 979, + "GCTCTC": 980, + "TATGTA": 981, + "TGTGTA": 982, + "TCATAA": 983, + "GGACTT": 984, + "TCTCCAA": 985, + "GCATGA": 986, + "GACGA": 987, + "CGCCTG": 988, + "GACCTG": 989, + "GGTCTT": 990, + "CACCAA": 991, + "GATC": 992, + "GACCAA": 993, + "AAAATTA": 994, + "GTAAATT": 995, + "CCAGTT": 996, + "CAGAAAA": 997, + "TAACAAA": 998, + "GGTGTT": 999, + "GAAATTA": 1000, + "TGCCTCA": 1001, + "CCGCC": 1002, + "CCATTTT": 1003, + "CTTGCC": 1004, + "TCTGTA": 1005, + "CTGGCA": 1006, + "GGGATG": 1007, + "CCATGA": 1008, + "CTACTT": 1009, + "TAGGTG": 1010, + "TAAAAATT": 1011, + "GAAAGAA": 1012, + "TAAAATA": 1013, + "CTTTTTG": 1014, + "GTCAAAA": 1015, + "GGACAA": 1016, + "TCTGATT": 1017, + "CTCTCTT": 1018, + "TAATTTG": 1019, + "CTCTTTG": 1020, + "GGCCTT": 1021, + "GGATTTT": 1022, + "CTACTG": 1023, + "GTTGCA": 1024, + "GGCTCC": 1025, + "CTCTGTG": 1026, + "CTCCAGCC": 1027, + "TTACAA": 1028, + "GGACCA": 1029, + "GGAAGGAA": 1030, + "TAAAGAA": 1031, + "TTAGAA": 1032, + "GTGAAAA": 1033, + "CTTGCA": 1034, + "TGGGTG": 1035, + "GGAGCC": 1036, + "CCTCTA": 1037, + "CT": 1038, + "GGGCTT": 1039, + "GGCATG": 1040, + "CTGGTT": 1041, + "TACAGA": 1042, + "GATTAAA": 1043, + "CTCTGTT": 1044, + "TTATCA": 1045, + "CTGAAAA": 1046, + "GTAGTT": 1047, + "GGGTCA": 1048, + "GT": 1049, + "CAGCCA": 1050, + "GCGTC": 1051, + "CACTTA": 1052, + "GTGCTA": 1053, + "TCTTATT": 1054, + "GTACTT": 1055, + "GGTATT": 1056, + "TAGAGA": 1057, + "TACATG": 1058, + "CCACTA": 1059, + "TGAGAAA": 1060, + "CAATAAA": 1061, + "TCCAAAA": 1062, + "CGTGAA": 1063, + "GGTCTG": 1064, + "CTGAATT": 1065, + "TCAGCC": 1066, + "CCTCTC": 1067, + "GTTAAAA": 1068, + "GGGATT": 1069, + "TCCTAA": 1070, + "CACTAA": 1071, + "GGAGAAA": 1072, + "CCTTCCTT": 1073, + "GTTTCTT": 1074, + "TATCAA": 1075, + "GATACA": 1076, + "TAATCCCAGCA": 1077, + "CCGCA": 1078, + "TGAAATT": 1079, + "CGTAAA": 1080, + "CTCTCTG": 1081, + "TCTTTTTT": 1082, + "GTACAA": 1083, + "CCAAATT": 1084, + "TGTATTTT": 1085, + "TCGCTT": 1086, + "GGGTGA": 1087, + "GATAGA": 1088, + "CTTTATT": 1089, + "TAAACAA": 1090, + "GTTTATT": 1091, + "TGAATA": 1092, + "CTACCA": 1093, + "GTGTCC": 1094, + "CCCGA": 1095, + "TTTATTA": 1096, + "CTCCAAA": 1097, + "TTTTTTTTTTTT": 1098, + "TCATCC": 1099, + "GAAGCC": 1100, + "CTAAATT": 1101, + "CAAATTA": 1102, + "CCCCAAA": 1103, + "TCTTCTT": 1104, + "TAGGAAA": 1105, + "CACGA": 1106, + "CATTTTA": 1107, + "GTGCAA": 1108, + "TCTCCTG": 1109, + "TATTTTAA": 1110, + "GTTTGTT": 1111, + "GAGCCA": 1112, + "GGCCAA": 1113, + "CATTTCA": 1114, + "CATCCA": 1115, + "CCTATA": 1116, + "GACTTA": 1117, + "TCAAATG": 1118, + "GTATCA": 1119, + "TAAATTTT": 1120, + "CTGAGGCA": 1121, + "GCCCAA": 1122, + "GGTTAA": 1123, + "TATCTG": 1124, + "TGACAGA": 1125, + "GGAGAGA": 1126, + "GCTGCTG": 1127, + "CCCTTA": 1128, + "TCCTCTG": 1129, + "GTAGCA": 1130, + "CCTGAAA": 1131, + "CCGAA": 1132, + "TTTTTAA": 1133, + "CTATAA": 1134, + "CCTGTA": 1135, + "TTACTG": 1136, + "GTATAA": 1137, + "GGCGA": 1138, + "GACTAA": 1139, + "TCAGAAA": 1140, + "GTGTGTG": 1141, + "CAAAGAA": 1142, + "CCTATG": 1143, + "GCAGAGA": 1144, + "CCGTT": 1145, + "TTTTATTTT": 1146, + "GGAAGAA": 1147, + "TTACTA": 1148, + "GCCTGGG": 1149, + "TCCCTC": 1150, + "TCCTCTT": 1151, + "GGATCA": 1152, + "GGTCAA": 1153, + "TCGAGA": 1154, + "TATTCTT": 1155, + "TACTC": 1156, + "GTTAATT": 1157, + "GCGAGA": 1158, + "CTTAATT": 1159, + "TCCTTTG": 1160, + "GTCTAA": 1161, + "CACCCA": 1162, + "GGGTTA": 1163, + "GGGCAA": 1164, + "GGAAATG": 1165, + "GCAAATT": 1166, + "TAGATG": 1167, + "GCAGAAA": 1168, + "AAAAAAAAAAAAAAAA": 1169, + "CCTACA": 1170, + "GGAGTA": 1171, + "TCTAATT": 1172, + "CAACAAA": 1173, + "TAGATT": 1174, + "GGTTTA": 1175, + "CCTAGA": 1176, + "CTTTAAA": 1177, + "TACTTA": 1178, + "TAATGAA": 1179, + "CTATCA": 1180, + "TAGTAA": 1181, + "CAGAGAA": 1182, + "CAAGAAA": 1183, + "GGGGAAA": 1184, + "CGTTAA": 1185, + "CGTGTT": 1186, + "TCTGTCTG": 1187, + "TTTTAATT": 1188, + "CTGGCC": 1189, + "TAAATGA": 1190, + "CGTCAA": 1191, + "TTAGTA": 1192, + "GTCTCTG": 1193, + "TTTTAAAA": 1194, + "CAGTTTT": 1195, + "CTTCCTT": 1196, + "TATATAA": 1197, + "GCTTTTA": 1198, + "TTTTTCA": 1199, + "GGTC": 1200, + "TTATTAA": 1201, + "TTTTGTT": 1202, + "CATAGA": 1203, + "TAGGAA": 1204, + "GAGAGAA": 1205, + "GTAGCTG": 1206, + "TTATGA": 1207, + "GTAGTG": 1208, + "GGAGAGG": 1209, + "CTCTGAA": 1210, + "TAGTC": 1211, + "GACTCC": 1212, + "TCCCTCC": 1213, + "TAATGTT": 1214, + "CATCTA": 1215, + "GCCACCA": 1216, + "GTACTA": 1217, + "TGGGAAA": 1218, + "CGCCTT": 1219, + "GCCCGG": 1220, + "GGAGGAA": 1221, + "GTACCA": 1222, + "CGCAAA": 1223, + "CATAAAA": 1224, + "TAACATT": 1225, + "GCTAAAA": 1226, + "TCTTCTG": 1227, + "GCCAAAA": 1228, + "GTATGA": 1229, + "GTCTTTG": 1230, + "TACTGA": 1231, + "TCCCAGG": 1232, + "TTATTTA": 1233, + "TTAGTT": 1234, + "GGACC": 1235, + "TATAAAA": 1236, + "CAAACAA": 1237, + "CTTCTC": 1238, + "TCTATCTA": 1239, + "GAAATAA": 1240, + "GTGTAA": 1241, + "CTTTGTT": 1242, + "GATAAAA": 1243, + "GCCCAGG": 1244, + "GCGATT": 1245, + "AAAAAATT": 1246, + "TACAGG": 1247, + "GGCTAA": 1248, + "TAGCTT": 1249, + "GTCTCTA": 1250, + "CTCCTGA": 1251, + "GAATAAA": 1252, + "TTACCA": 1253, + "GGGACA": 1254, + "GCCACTG": 1255, + "GTTTAAA": 1256, + "GTCTGTG": 1257, + "TGACAAA": 1258, + "TACATTTT": 1259, + "GCCACC": 1260, + "TGTTTT": 1261, + "TAGCAA": 1262, + "TTATAAA": 1263, + "GACCCA": 1264, + "GCAGC": 1265, + "CAGACAGA": 1266, + "CACAAAA": 1267, + "GCCCTA": 1268, + "TATTAAAA": 1269, + "CGTATT": 1270, + "CCATCC": 1271, + "TCGATT": 1272, + "GAAGGAA": 1273, + "GATCCA": 1274, + "TATTTGA": 1275, + "GTGAATT": 1276, + "TACCTT": 1277, + "CGTCTT": 1278, + "CCTAGG": 1279, + "TCGAAA": 1280, + "CTTTCTG": 1281, + "TGAAGAA": 1282, + "TCTCTCA": 1283, + "GTCTCTT": 1284, + "GGAGGGG": 1285, + "GTCTGTT": 1286, + "CTATGA": 1287, + "GGAAATT": 1288, + "GCACACA": 1289, + "GCCTTTT": 1290, + "CAGTCC": 1291, + "CTGGTA": 1292, + "GCATCC": 1293, + "TAGTTA": 1294, + "GGCTTA": 1295, + "GAGTCC": 1296, + "TGAAAA": 1297, + "TAGATAGA": 1298, + "TGTTTGTT": 1299, + "TACTCA": 1300, + "CATTTAA": 1301, + "GATTTTA": 1302, + "CACTCC": 1303, + "GAAACAA": 1304, + "GCGCTG": 1305, + "TCTTTCA": 1306, + "CTGTCC": 1307, + "GAACTCA": 1308, + "CGGAAA": 1309, + "TATTGTT": 1310, + "GCACTA": 1311, + "TATTCAA": 1312, + "GCGGGG": 1313, + "GTGGCC": 1314, + "TAATTAAA": 1315, + "TACTAA": 1316, + "GCGGTG": 1317, + "TACCAA": 1318, + "GGTATA": 1319, + "CTAGTT": 1320, + "GCAGAGG": 1321, + "CTTTTTTTT": 1322, + "TTTTTTTTTTTTTTTT": 1323, + "TACAGTA": 1324, + "CCATGTT": 1325, + "TAGTGA": 1326, + "CGTGTG": 1327, + "GCTCTGA": 1328, + "CTTCCTG": 1329, + "TCGCTG": 1330, + "TAAATCA": 1331, + "TCCAATT": 1332, + "GTTTCTG": 1333, + "GAAGAGA": 1334, + "GGGTAA": 1335, + "CCATAA": 1336, + "TTATATT": 1337, + "CGAATT": 1338, + "CCGGA": 1339, + "TGAGCC": 1340, + "CCGTA": 1341, + "CAGAGGA": 1342, + "GTGTTTG": 1343, + "GACAAAA": 1344, + "TTTTTTAAA": 1345, + "GTTGCC": 1346, + "GAGTTTT": 1347, + "TCAAAAAA": 1348, + "TGTTTCA": 1349, + "TATCTA": 1350, + "TCTCTCC": 1351, + "CTCCACA": 1352, + "TAAATATT": 1353, + "TTTTCTG": 1354, + "CTCTCAA": 1355, + "CCTTAAA": 1356, + "TCTTTTAA": 1357, + "GAACAAA": 1358, + "TTAGCA": 1359, + "GCTCATG": 1360, + "TAAAGTA": 1361, + "GGATAA": 1362, + "TTATTAAA": 1363, + "CTCCATT": 1364, + "TCTCTGA": 1365, + "TTATTTG": 1366, + "CCTGTAA": 1367, + "TTATATA": 1368, + "GACTTTT": 1369, + "TGTTGTT": 1370, + "GCAAATG": 1371, + "CTTCAAA": 1372, + "GAATATT": 1373, + "GAATCC": 1374, + "CTCTTAA": 1375, + "GCATAA": 1376, + "GAATGAA": 1377, + "CTTAAAAA": 1378, + "TAAAAATG": 1379, + "TTTTAAAAA": 1380, + "CTCTGGG": 1381, + "TGATCC": 1382, + "GCTCTCA": 1383, + "CTCCAGA": 1384, + "GAGTGCAGTG": 1385, + "CAATATT": 1386, + "TAGAAAA": 1387, + "GTAAATG": 1388, + "TAGCTG": 1389, + "GCTCAAA": 1390, + "GCAGGAA": 1391, + "TACCTG": 1392, + "GGGAAAA": 1393, + "TTTTCTA": 1394, + "GGGGGGGG": 1395, + "CCGA": 1396, + "CTTTGAA": 1397, + "GGAGGTG": 1398, + "TAGTCA": 1399, + "GGCCCA": 1400, + "TGATGTT": 1401, + "CAAATAA": 1402, + "TCTTCCA": 1403, + "GCGCTT": 1404, + "GTATTTG": 1405, + "GTCTC": 1406, + "GAAATCA": 1407, + "TGATAAA": 1408, + "CATTCTT": 1409, + "TATCCA": 1410, + "GCCTCTG": 1411, + "TGAGATG": 1412, + "CGCCAA": 1413, + "GTTTTATT": 1414, + "TATATATT": 1415, + "GTAGGA": 1416, + "GACAGAA": 1417, + "CTCCAGCCTGGG": 1418, + "GCGTGA": 1419, + "GGTATG": 1420, + "GAGGGAGG": 1421, + "TCATTTG": 1422, + "CTACC": 1423, + "TACAGAA": 1424, + "GGTAGA": 1425, + "GATCTA": 1426, + "GTCCATG": 1427, + "TGAGGAA": 1428, + "TAATAAAA": 1429, + "TAAACTT": 1430, + "TCACATT": 1431, + "GGAGGCC": 1432, + "TCACAAA": 1433, + "CACTTTT": 1434, + "CGGCC": 1435, + "CAACAGA": 1436, + "GTAGAGA": 1437, + "GTTATTTT": 1438, + "CGTTTG": 1439, + "TCGTCA": 1440, + "TCTGCTG": 1441, + "CAACACA": 1442, + "GGTAGG": 1443, + "GCAGCTG": 1444, + "TAGTAGAGA": 1445, + "CAAGCC": 1446, + "GCATTTG": 1447, + "TAATATG": 1448, + "GCTTAAA": 1449, + "GCTTCTG": 1450, + "CTCTCCA": 1451, + "TCATCTT": 1452, + "CGTCTG": 1453, + "TCATTTA": 1454, + "CATAGG": 1455, + "GCTCCTT": 1456, + "TGTTCTT": 1457, + "TACATTA": 1458, + "CACAGAA": 1459, + "TAAATATA": 1460, + "TAGAGG": 1461, + "GATAGG": 1462, + "TCCTGAA": 1463, + "GGAGCTG": 1464, + "TGATATT": 1465, + "TCATTAA": 1466, + "CTTTTAAA": 1467, + "TCGTTA": 1468, + "TAAACTA": 1469, + "GTTTGAA": 1470, + "TAAAATTA": 1471, + "CACCCC": 1472, + "TCAGAGA": 1473, + "CTCCTGCCTCA": 1474, + "TGACATT": 1475, + "GTATTTA": 1476, + "CTTCATT": 1477, + "GAAACTG": 1478, + "TAACACA": 1479, + "GTTCAAA": 1480, + "GGAGATG": 1481, + "TCGGCC": 1482, + "CAGCATT": 1483, + "TCGATG": 1484, + "TATTCTA": 1485, + "CTGTGAA": 1486, + "TATTGAA": 1487, + "TTTTCCA": 1488, + "TATTTCTT": 1489, + "GGTGAAA": 1490, + "CTGAGAA": 1491, + "GCACAGA": 1492, + "GCGAGG": 1493, + "CTGTGTG": 1494, + "TGAAATG": 1495, + "TGATGAA": 1496, + "GTCCAAA": 1497, + "CTCAATT": 1498, + "TCCAGAA": 1499, + "GTATATA": 1500, + "TAAAGTT": 1501, + "TCTCAAAA": 1502, + "TCCATCA": 1503, + "GTCTGAA": 1504, + "TGAGAGA": 1505, + "TGATTTG": 1506, + "TTAGCC": 1507, + "CTCCATG": 1508, + "TCCCTGA": 1509, + "GAGCTA": 1510, + "CCCCCCCC": 1511, + "GTGGAAA": 1512, + "CTGGGAA": 1513, + "CAATGAA": 1514, + "CCACACA": 1515, + "CTTTCAA": 1516, + "CGGAGG": 1517, + "TCGTGA": 1518, + "CCAGAAA": 1519, + "GTTTTAAA": 1520, + "TGTTGAA": 1521, + "TCCTGTG": 1522, + "CTAAATG": 1523, + "TCCTTTA": 1524, + "GTCTGGG": 1525, + "TCTCTTTT": 1526, + "TACGG": 1527, + "TATTGTA": 1528, + "TTAGTG": 1529, + "TTACC": 1530, + "TAATCCCAGCACTTTG": 1531, + "TCTGGAA": 1532, + "CTTCTCA": 1533, + "CGCATT": 1534, + "TATTTAAA": 1535, + "TCACACA": 1536, + "TAATCAA": 1537, + "GCGAAA": 1538, + "GGGCCA": 1539, + "GTTCATT": 1540, + "GAGAAAAA": 1541, + "TTTTGTA": 1542, + "TACTTTT": 1543, + "TCGAGG": 1544, + "GTGAAAAA": 1545, + "CAATATA": 1546, + "TCCCATG": 1547, + "CAATTAA": 1548, + "CTGGAAA": 1549, + "CCCAGCA": 1550, + "TCCCATT": 1551, + "TCCTGTT": 1552, + "CTCTTTA": 1553, + "TCCCCTT": 1554, + "GTTTCAA": 1555, + "GTCCAGG": 1556, + "GGAAGGA": 1557, + "TAGTTTT": 1558, + "TGACCTT": 1559, + "GTGCTGGGATTACAGG": 1560, + "TATTTATA": 1561, + "TCTGCAA": 1562, + "CTGAAAAA": 1563, + "TATGTTA": 1564, + "CTTCACA": 1565, + "GCACAGG": 1566, + "CCTGCTG": 1567, + "TTTTTTAA": 1568, + "GTTATTA": 1569, + "CCCTTTT": 1570, + "TGATTTA": 1571, + "TACAAAA": 1572, + "TAAGTAA": 1573, + "TTTTTAAA": 1574, + "CATCTC": 1575, + "GTGGTGA": 1576, + "GTGGAGA": 1577, + "CTCTGCA": 1578, + "GTTAAAAA": 1579, + "TACATACA": 1580, + "CTTTGTG": 1581, + "GGACACA": 1582, + "TCTGATG": 1583, + "TATTATT": 1584, + "TCTTCTA": 1585, + "CTGTGTT": 1586, + "TCAGCTT": 1587, + "CTTTATA": 1588, + "GGCGC": 1589, + "TCCCTCA": 1590, + "GTACC": 1591, + "TGGAGAA": 1592, + "CAAAAATT": 1593, + "TCTTTAA": 1594, + "CTCTCTC": 1595, + "TGAGTGA": 1596, + "GCAGCTT": 1597, + "CGGATT": 1598, + "TACGA": 1599, + "TCTTGTT": 1600, + "TCGTAA": 1601, + "GCCTGTG": 1602, + "TATTCTG": 1603, + "GGGATA": 1604, + "GGGTCC": 1605, + "TGAGATT": 1606, + "CTTTTATT": 1607, + "TCCCACA": 1608, + "CATGGTG": 1609, + "TTAGGA": 1610, + "GAACACA": 1611, + "TCATAAA": 1612, + "CAACATT": 1613, + "GGTCCA": 1614, + "GAATTTG": 1615, + "TATTAATT": 1616, + "TCCTGGG": 1617, + "GCAGCAA": 1618, + "CTCTTCA": 1619, + "GAAGAGG": 1620, + "TCTGTCA": 1621, + "CTGAATG": 1622, + "CCACAAA": 1623, + "GTGGAGG": 1624, + "TGATTAA": 1625, + "CTCCCTCC": 1626, + "CACACACACACACACACACACACACACACACA": 1627, + "GCGATG": 1628, + "CATTCTG": 1629, + "GTAGAAA": 1630, + "TCATCAA": 1631, + "TTTTCAA": 1632, + "TATGTATG": 1633, + "CCAAATG": 1634, + "TAATTTTA": 1635, + "TAAGGAA": 1636, + "CTTGAAA": 1637, + "AAAAAAAAAAAA": 1638, + "GCTCCTG": 1639, + "GCAGATG": 1640, + "GAAAAATT": 1641, + "GACGC": 1642, + "GTGGGGG": 1643, + "GTCAATT": 1644, + "CTTGCTT": 1645, + "TGACACA": 1646, + "GTGTGTT": 1647, + "CCAGAGA": 1648, + "CCCAGCC": 1649, + "TAAAGAAA": 1650, + "GTCCATT": 1651, + "TAAATTAA": 1652, + "CCCAAAA": 1653, + "GAATTAA": 1654, + "TGAATTA": 1655, + "TTTTTTTG": 1656, + "CCAGCTT": 1657, + "CAATTTG": 1658, + "CTGTTTG": 1659, + "GTCTCAA": 1660, + "GTTTGTG": 1661, + "GGCATA": 1662, + "GGTACA": 1663, + "TGATGTG": 1664, + "GATTTCA": 1665, + "TCTGCTT": 1666, + "GTAATTA": 1667, + "TAAAAAAAA": 1668, + "GCCGCC": 1669, + "TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG": 1670, + "GCGTCA": 1671, + "GCTCATT": 1672, + "GAACCTG": 1673, + "TAAACAAA": 1674, + "GTGCTGA": 1675, + "TCAGGAA": 1676, + "TCCTCAA": 1677, + "TCTATTTT": 1678, + "TCTGTTTT": 1679, + "CAGAGCA": 1680, + "CCAGGAA": 1681, + "GTCTTTA": 1682, + "TCTTCAA": 1683, + "TCAAAATT": 1684, + "GCTTATT": 1685, + "GTTCCTT": 1686, + "CACCTA": 1687, + "TCACTGA": 1688, + "GAAGCAA": 1689, + "TAAAGA": 1690, + "TCCTTCA": 1691, + "TCTCATG": 1692, + "TCAGTGA": 1693, + "TACACAA": 1694, + "CACGTG": 1695, + "CCTAAAA": 1696, + "GCCTTTG": 1697, + "GGCTTTT": 1698, + "GTTGAAA": 1699, + "GTTCTC": 1700, + "CTAGA": 1701, + "CTACAAA": 1702, + "GCACAAA": 1703, + "TTACATT": 1704, + "GGCCCC": 1705, + "TAATGTG": 1706, + "CTGCCTT": 1707, + "TCCCAGA": 1708, + "GTGAATG": 1709, + "GGACAGG": 1710, + "GGATGTG": 1711, + "GTTTATA": 1712, + "TGACCAA": 1713, + "GTGGCTG": 1714, + "GTTCTCA": 1715, + "CTTATTTT": 1716, + "CTGGAGA": 1717, + "TTACAAA": 1718, + "GTCTTCA": 1719, + "CAAGAGA": 1720, + "CCATTTG": 1721, + "TCACAGA": 1722, + "CTAGTA": 1723, + "CATTATT": 1724, + "TTAGA": 1725, + "GCTCTCC": 1726, + "GCGCCA": 1727, + "TATGTTTT": 1728, + "TCCTCCA": 1729, + "CAGAAAAA": 1730, + "GTGGGAA": 1731, + "TAATCTT": 1732, + "TGAGTCA": 1733, + "CTGCTC": 1734, + "GTCTCCA": 1735, + "TCATGTT": 1736, + "GTTTCCA": 1737, + "TAAGCAA": 1738, + "CTAAAAATA": 1739, + "TGACTGA": 1740, + "TCGGTT": 1741, + "TTAGAAA": 1742, + "TAAGCC": 1743, + "TAAAGCA": 1744, + "CCTCTCC": 1745, + "CCTCCTT": 1746, + "TCAGATT": 1747, + "TATGAAAA": 1748, + "GCTGATG": 1749, + "CATATTTT": 1750, + "GCTCCAA": 1751, + "CGGCGG": 1752, + "CCACTGA": 1753, + "CAGCAAA": 1754, + "CTGTCTT": 1755, + "CTAGCA": 1756, + "TCGGGG": 1757, + "CACAGCA": 1758, + "GCTGATT": 1759, + "CTAGGA": 1760, + "TAACTC": 1761, + "TCATATT": 1762, + "CCTTCTT": 1763, + "CTGCAAA": 1764, + "CCCGC": 1765, + "GGTCTA": 1766, + "CCCAGGA": 1767, + "GTGTCTG": 1768, + "TAATAATAATAA": 1769, + "TCACATG": 1770, + "CAATTTA": 1771, + "TATATATATATATATATATATATATATATATA": 1772, + "CCACAGA": 1773, + "TCAATTTT": 1774, + "GTATTAA": 1775, + "GAACATT": 1776, + "TCTCTTA": 1777, + "CTATTTG": 1778, + "TCTTTCC": 1779, + "GGTTAAA": 1780, + "GCTAATT": 1781, + "CTGCTGA": 1782, + "TACCTA": 1783, + "CAGGGTT": 1784, + "TCGCCA": 1785, + "CAAAAATTA": 1786, + "CTTCTGA": 1787, + "GCATGTG": 1788, + "CTATTAA": 1789, + "GCACATG": 1790, + "CAACATG": 1791, + "TCATGAA": 1792, + "GAATGTT": 1793, + "GGGTTTT": 1794, + "CTGCCTG": 1795, + "GTCCACA": 1796, + "TAAACA": 1797, + "CTCTGGA": 1798, + "GACCCC": 1799, + "GGCAAAA": 1800, + "TCTGTTA": 1801, + "CTAGTG": 1802, + "CTATATA": 1803, + "TCAGTCA": 1804, + "TAACTAA": 1805, + "GAAGATG": 1806, + "GTCTTAA": 1807, + "CAAGGAA": 1808, + "GTAAAAAA": 1809, + "TCCCCTG": 1810, + "TCGCAA": 1811, + "TCTGCCTG": 1812, + "CCTTTTA": 1813, + "GTCCCAGCTA": 1814, + "TATATATG": 1815, + "TATTGTG": 1816, + "TGTGTTTT": 1817, + "GCGCAA": 1818, + "CACAGTG": 1819, + "TAAGATT": 1820, + "CTCTGTA": 1821, + "GGAGGCTGA": 1822, + "GGACAAA": 1823, + "TATTAAAAA": 1824, + "TCGTCC": 1825, + "TCGGAA": 1826, + "CTATAAA": 1827, + "CTTCAGA": 1828, + "CTAGAAA": 1829, + "CATTCAA": 1830, + "CACGCA": 1831, + "CAGGATT": 1832, + "CCATCTT": 1833, + "GTAGCC": 1834, + "GAATTTA": 1835, + "CACGC": 1836, + "CAATCC": 1837, + "TGAGCAA": 1838, + "GAAGCTG": 1839, + "TCAATTA": 1840, + "GAAGTCA": 1841, + "CTGCACA": 1842, + "CCACGG": 1843, + "GGATCTT": 1844, + "CTCCTGCCTCAGCCTCC": 1845, + "TAAATGAA": 1846, + "CCGTC": 1847, + "TCGGTG": 1848, + "TTTTATTA": 1849, + "GCAGGGG": 1850, + "GCAGGTG": 1851, + "TCTATTA": 1852, + "TAACTTA": 1853, + "CTAATTTT": 1854, + "CCCGCC": 1855, + "TAATACA": 1856, + "GGATTAAA": 1857, + "TCTCTCTG": 1858, + "GCTTCTT": 1859, + "CATTTATT": 1860, + "CCAGAGG": 1861, + "GGACAGA": 1862, + "GCCAATT": 1863, + "TCCCCAA": 1864, + "GTTGATT": 1865, + "GAAGAAAA": 1866, + "GCATTTA": 1867, + "CTCTAAA": 1868, + "CACACACACACA": 1869, + "CCTCAAA": 1870, + "TATAATT": 1871, + "CAATGTT": 1872, + "GCCCAGA": 1873, + "GTATATT": 1874, + "CTAAAAAA": 1875, + "CCACAGG": 1876, + "TAAGAGA": 1877, + "TCCTTAA": 1878, + "TATTTTTT": 1879, + "GAATATA": 1880, + "GGATTTG": 1881, + "GTGTGAA": 1882, + "CTGGCTT": 1883, + "GCGGCA": 1884, + "TCCGCC": 1885, + "GCATCTT": 1886, + "TCTAATA": 1887, + "CTGCATT": 1888, + "CTCTGCC": 1889, + "TCACTCA": 1890, + "TCAGCAA": 1891, + "TATTATG": 1892, + "CCAGCTG": 1893, + "GATCTC": 1894, + "GCCTCTT": 1895, + "CTTCCAA": 1896, + "TCCTAAA": 1897, + "TCATCTG": 1898, + "CTATTTA": 1899, + "CTGCAGG": 1900, + "CAAGCAA": 1901, + "GCGGAA": 1902, + "GAAATAAA": 1903, + "TAAAATAA": 1904, + "TCACCTT": 1905, + "CCATGTG": 1906, + "GACCTA": 1907, + "CAGATGA": 1908, + "GTGGCTT": 1909, + "TTATTATTATTA": 1910, + "TCCCGG": 1911, + "TATTTGTT": 1912, + "CTGTAAA": 1913, + "TCCATCCA": 1914, + "CTGTATA": 1915, + "GTTTCTA": 1916, + "GTTGCTT": 1917, + "CCATGAA": 1918, + "GCTCTTA": 1919, + "CTTCATG": 1920, + "GTTCCTG": 1921, + "GCTGGGA": 1922, + "TCAGAGG": 1923, + "CATTAAAA": 1924, + "TCAGTAA": 1925, + "GAATGTG": 1926, + "CTTATTA": 1927, + "GCACTGA": 1928, + "TGAGGTT": 1929, + "CATCAAA": 1930, + "CTTCTCC": 1931, + "GTTTATG": 1932, + "CTTTCCA": 1933, + "GTGCCTG": 1934, + "GAAAGGA": 1935, + "GCATCTG": 1936, + "TACCCA": 1937, + "TAACAGA": 1938, + "AAAAAAAAAAA": 1939, + "CTATGAA": 1940, + "CAGTAAA": 1941, + "TAGCTA": 1942, + "TCGTTTT": 1943, + "GTGTCTT": 1944, + "GAGCAAA": 1945, + "TCTAAAAA": 1946, + "GTTCACA": 1947, + "GAAATGA": 1948, + "CAAATGA": 1949, + "GCCCTGA": 1950, + "GTGTTTA": 1951, + "TCATGTG": 1952, + "CATATTA": 1953, + "TCAAAAAAA": 1954, + "TAAGTTA": 1955, + "TCTCTCTT": 1956, + "CCAGTGA": 1957, + "CCTCTGA": 1958, + "CAAGATG": 1959, + "GCCTGTT": 1960, + "GTTTGGG": 1961, + "CATTCATT": 1962, + "GCCCCTG": 1963, + "GTTCTGA": 1964, + "GCGGCC": 1965, + "GCGGTT": 1966, + "CAAAACAAAA": 1967, + "TACATATA": 1968, + "GAATTAAA": 1969, + "TCAAGAA": 1970, + "CTGTATT": 1971, + "TTTTTATT": 1972, + "GATTATT": 1973, + "TCTAATG": 1974, + "GTTGCTG": 1975, + "TGAATGAA": 1976, + "TCAGCTG": 1977, + "CTTGATT": 1978, + "CAGAATG": 1979, + "CTAATTA": 1980, + "TATAATG": 1981, + "GTTTTGTTTT": 1982, + "CCAGCCTG": 1983, + "TGATGGA": 1984, + "GCAGATT": 1985, + "CTCTATT": 1986, + "GCAGTCA": 1987, + "TAAGTGA": 1988, + "CTACACA": 1989, + "CGCATG": 1990, + "TAGCCA": 1991, + "GTGGCTCA": 1992, + "CAAATAAA": 1993, + "GTGCTCA": 1994, + "TTTTTTTTTT": 1995, + "TAACATG": 1996, + "TCCCAGCTA": 1997, + "CAAAGTA": 1998, + "TCATATA": 1999, + "CAGCATG": 2000, + "TGATCTT": 2001, + "CATAATT": 2002, + "TGTGTTA": 2003, + "TTTTGAA": 2004, + "TTAATTA": 2005, + "GATATTA": 2006, + "TCATTCA": 2007, + "TGATATA": 2008, + "TGACTCA": 2009, + "GACGTT": 2010, + "TGACATG": 2011, + "GTTGTGA": 2012, + "CATTTTTT": 2013, + "GCCTGGA": 2014, + "CTATGTT": 2015, + "CTTTGGG": 2016, + "GTCTCAAA": 2017, + "CTGGCTG": 2018, + "CCACATG": 2019, + "GGCGTG": 2020, + "CTTAATG": 2021, + "TAAGATG": 2022, + "GTATAAA": 2023, + "TGTATTA": 2024, + "TAACTCA": 2025, + "GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA": 2026, + "GCATGAA": 2027, + "GTTAATG": 2028, + "TCCAGGA": 2029, + "GAGAGAAA": 2030, + "TCTCTGTG": 2031, + "CTCTCTA": 2032, + "CCACCTG": 2033, + "GCCAGGA": 2034, + "CTGGAGG": 2035, + "CCATTTA": 2036, + "GTCTGGA": 2037, + "GCCCACA": 2038, + "TAGAGAA": 2039, + "CAACTCA": 2040, + "GGCAGGA": 2041, + "TCTTATG": 2042, + "CAAAGGA": 2043, + "GGTAAAA": 2044, + "GAGAGGA": 2045, + "GTCCAGA": 2046, + "GCCCTCA": 2047, + "GATATTTT": 2048, + "CAGGGAA": 2049, + "CCACATT": 2050, + "GAGGAGG": 2051, + "GAAACTT": 2052, + "CAGAATT": 2053, + "TCAGATG": 2054, + "TATTTCC": 2055, + "TACAGTG": 2056, + "TGAGCTG": 2057, + "CCATCTG": 2058, + "GAGAATG": 2059, + "TCAACAA": 2060, + "ATT": 2061, + "TAACTGA": 2062, + "TGAGAGG": 2063, + "CACTGAA": 2064, + "CCACCTT": 2065, + "CTGCAGA": 2066, + "TCACCAA": 2067, + "TGAGCTT": 2068, + "CAAAGCA": 2069, + "GGTTTTA": 2070, + "CGGGGTT": 2071, + "TCCAAAAA": 2072, + "TATGTATA": 2073, + "CCAGATG": 2074, + "TCCATTTT": 2075, + "CTGCTCA": 2076, + "GATAATT": 2077, + "CCACCAA": 2078, + "CTCCTCC": 2079, + "GAGAATT": 2080, + "GAAAGTA": 2081, + "TAAAATAAAA": 2082, + "CTTCTTA": 2083, + "CTGTTTA": 2084, + "GAATCAA": 2085, + "GCATGTT": 2086, + "GCACGG": 2087, + "GACTGAA": 2088, + "GTGCACA": 2089, + "GACGTG": 2090, + "TATACAA": 2091, + "TCGACA": 2092, + "GAAGACA": 2093, + "TAAAGGA": 2094, + "GATCAAA": 2095, + "CAGTGTG": 2096, + "CTAGCC": 2097, + "GAGGAAAA": 2098, + "TCTGAAAA": 2099, + "GAACCCA": 2100, + "GATGGATG": 2101, + "GTTCTTA": 2102, + "CTATATT": 2103, + "GCATTAA": 2104, + "TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC": 2105, + "TCAGTC": 2106, + "TATTTTTG": 2107, + "GAGGATT": 2108, + "GTATGTG": 2109, + "TAACCAA": 2110, + "GTTGTTTT": 2111, + "TTTTTCTT": 2112, + "GTGTTAA": 2113, + "CTTGGAA": 2114, + "AAAAAATG": 2115, + "CAATGTG": 2116, + "GTGCCTT": 2117, + "GCCTCAA": 2118, + "GAGTCTT": 2119, + "GCTAATTTT": 2120, + "CGAAAAA": 2121, + "GTGTATA": 2122, + "GCGTTA": 2123, + "CTGCACTCCAGCCTGGG": 2124, + "GTTCATG": 2125, + "CAAAGAAA": 2126, + "GCAGTAA": 2127, + "GGATGAA": 2128, + "CTTTATG": 2129, + "CAGGAAAA": 2130, + "TCCTGCA": 2131, + "CTGTCTG": 2132, + "GAACATG": 2133, + "GGATGGA": 2134, + "GCCTGAA": 2135, + "CAAAAATG": 2136, + "TCCAATG": 2137, + "CCAGCAA": 2138, + "GGCCTA": 2139, + "CAACTGA": 2140, + "GCACCTG": 2141, + "GTCTATT": 2142, + "CCTCTCA": 2143, + "GTGGTCA": 2144, + "GTGTAAA": 2145, + "GTACACA": 2146, + "GTAAAATT": 2147, + "GTACATT": 2148, + "TATATAAA": 2149, + "CTGTTAA": 2150, + "TAAGTCA": 2151, + "GCCTCCA": 2152, + "AAATTAAA": 2153, + "GTGCAGG": 2154, + "TCCTGGA": 2155, + "GTGCAAA": 2156, + "GCGTCC": 2157, + "CCATTAA": 2158, + "GGAGGGA": 2159, + "TCACTTA": 2160, + "TCATTAAA": 2161, + "CAACATA": 2162, + "TAATAGA": 2163, + "TAATGTA": 2164, + "GATTTTTT": 2165, + "GTTGTCA": 2166, + "GGAGACA": 2167, + "GTGTGGG": 2168, + "TCACAGG": 2169, + "TCGGCA": 2170, + "CTCCCTG": 2171, + "GACCAAA": 2172, + "TGTTTATT": 2173, + "CGAATG": 2174, + "CTCAATG": 2175, + "TCACCTG": 2176, + "CAGTGTT": 2177, + "TGAGACA": 2178, + "TAGGGG": 2179, + "GAAAAATG": 2180, + "GTTGAGA": 2181, + "TCGATA": 2182, + "CTCGGGAGG": 2183, + "GTTGTC": 2184, + "CCAGTCA": 2185, + "GCCCAGGCTG": 2186, + "GAACAGA": 2187, + "GGCTCACTGCAA": 2188, + "GCAGACA": 2189, + "TGAGGTG": 2190, + "CACGTT": 2191, + "TAAGAAAA": 2192, + "CCAGGCA": 2193, + "GTATCTT": 2194, + "CTTGGGAGG": 2195, + "CTTTCTA": 2196, + "CCGCTG": 2197, + "GAGCTCA": 2198, + "GAGACAGA": 2199, + "CTTCAGG": 2200, + "GCACATT": 2201, + "GTACAAA": 2202, + "CTTGTAA": 2203, + "GTGGGTG": 2204, + "GAAGTGA": 2205, + "GGTCTC": 2206, + "GTATGTT": 2207, + "GCACTCA": 2208, + "TTATGTT": 2209, + "CAAGTCA": 2210, + "CAAGTGA": 2211, + "GAAACTA": 2212, + "TAAATAAAA": 2213, + "TCTTAAAA": 2214, + "GTTGGAA": 2215, + "GTTCTAA": 2216, + "CCACTC": 2217, + "CAGTGAA": 2218, + "GAAAGG": 2219, + "GCACGA": 2220, + "TAACTTTT": 2221, + "GTTGTTA": 2222, + "TCAGTTA": 2223, + "CGGATG": 2224, + "TATTTGAA": 2225, + "CCCTGAA": 2226, + "GCCCTC": 2227, + "CTTCTAA": 2228, + "TTTGTTTT": 2229, + "GAGCTGA": 2230, + "CTGTGGG": 2231, + "CAAGATT": 2232, + "GAAGCTT": 2233, + "TGAGTAA": 2234, + "CTTGCTG": 2235, + "GGATGGG": 2236, + "CGTATG": 2237, + "TCCATTA": 2238, + "GTCTGCA": 2239, + "GCCATTTT": 2240, + "GTTGTAA": 2241, + "CACACAA": 2242, + "GGACTACAGG": 2243, + "CGTTTTA": 2244, + "TCTTCC": 2245, + "TAACCTT": 2246, + "CTTTAAAA": 2247, + "TGAATTTT": 2248, + "CTACAGA": 2249, + "GCAAGAA": 2250, + "TAACAAAA": 2251, + "CAATTAAA": 2252, + "CCACTCA": 2253, + "CATGGTGAAA": 2254, + "CCCAGAA": 2255, + "CTACATT": 2256, + "CCGAGG": 2257, + "TCCAGTG": 2258, + "TGAGTTA": 2259, + "GGAGTCA": 2260, + "TAACGA": 2261, + "GAGTAAA": 2262, + "GACTCTG": 2263, + "GGAGCTT": 2264, + "TACTCC": 2265, + "CTGCATG": 2266, + "GCTTTTTT": 2267, + "GTCTAAA": 2268, + "GTGCGG": 2269, + "CATCTCA": 2270, + "TGATCAA": 2271, + "GGAGATT": 2272, + "GCAAAAAA": 2273, + "CACCAAA": 2274, + "TGACGG": 2275, + "CAGAGG": 2276, + "GTTGATG": 2277, + "CTTGTCA": 2278, + "TCCACCTG": 2279, + "GGAGCAA": 2280, + "CAAGTAA": 2281, + "CCATAAA": 2282, + "GTGCATG": 2283, + "GCATATT": 2284, + "GTAGATT": 2285, + "GCCTAA": 2286, + "CTCAAAAA": 2287, + "GGAGAAAA": 2288, + "CTATCC": 2289, + "TAATATTA": 2290, + "GTGCTC": 2291, + "CAATATG": 2292, + "TGTGGAA": 2293, + "TGACTC": 2294, + "GTGTATG": 2295, + "TTTTAATG": 2296, + "GCTCTAA": 2297, + "CACAATG": 2298, + "CAGCTCA": 2299, + "GTTGGTT": 2300, + "CTAAAATT": 2301, + "GTCTATG": 2302, + "TGTGAAAA": 2303, + "CTGGGTT": 2304, + "CCCCTCC": 2305, + "CCCTCTT": 2306, + "GCAGGGA": 2307, + "GAAACCA": 2308, + "CATTTCC": 2309, + "GCAGCCA": 2310, + "TCATATG": 2311, + "GCAGGCA": 2312, + "CGTAAAA": 2313, + "TGACCTG": 2314, + "CAGAGGTT": 2315, + "CTTGTGA": 2316, + "TTATCTT": 2317, + "CTGTATG": 2318, + "GTCAATG": 2319, + "GGACGG": 2320, + "GCGTAA": 2321, + "CAAACTA": 2322, + "TAAATGTT": 2323, + "CTTCGG": 2324, + "CTCCCCA": 2325, + "TACAATG": 2326, + "TCTGTAA": 2327, + "GAATATG": 2328, + "GCGGGA": 2329, + "GGACATT": 2330, + "TTATGAA": 2331, + "GGATGTT": 2332, + "GGACATG": 2333, + "TCAGGTG": 2334, + "CAACAAAA": 2335, + "GAAAGAGA": 2336, + "GTGGATG": 2337, + "GGGCTA": 2338, + "CCATCAA": 2339, + "CAGCTGA": 2340, + "CTCCACC": 2341, + "CAATCAA": 2342, + "GTGGTC": 2343, + "TGACAGG": 2344, + "CCATTCA": 2345, + "GTCCCTG": 2346, + "CAGACACA": 2347, + "GTTGGTG": 2348, + "CCTCCTG": 2349, + "GAACTGA": 2350, + "TATTCATT": 2351, + "GCCCATG": 2352, + "CAATCTT": 2353, + "GAAAGCA": 2354, + "GAATCTG": 2355, + "TTATTTTA": 2356, + "GTTTGGA": 2357, + "TTTTTGTT": 2358, + "GGGAATG": 2359, + "GCGACA": 2360, + "TAAACTG": 2361, + "CCATATT": 2362, + "GGATCC": 2363, + "CAAGCTT": 2364, + "TAAAAAAAAA": 2365, + "TCACTC": 2366, + "CACTGTT": 2367, + "TGTTAATT": 2368, + "GGACTGA": 2369, + "GGAGTGA": 2370, + "CATACACA": 2371, + "GTTTGTA": 2372, + "TCCAGCA": 2373, + "GTGCATT": 2374, + "GGAAAAAA": 2375, + "CCAAGAA": 2376, + "TCAATA": 2377, + "CTTCCCA": 2378, + "TGAGAAAA": 2379, + "GGCCTCCCAAA": 2380, + "CAAGCTG": 2381, + "GCCCAAA": 2382, + "TGACTTA": 2383, + "CAGCCTT": 2384, + "CTGGATT": 2385, + "TTTTTTTA": 2386, + "TCACGG": 2387, + "GCAGTTA": 2388, + "TGACTAA": 2389, + "TTACAGG": 2390, + "TGATATG": 2391, + "TAATTATT": 2392, + "TCTTGAA": 2393, + "GCCCCTT": 2394, + "GTTCAGA": 2395, + "CTCTATG": 2396, + "CCATGGA": 2397, + "GAGGGAA": 2398, + "GGAGGCA": 2399, + "CTTTGCA": 2400, + "TCTTGG": 2401, + "GGAGGTT": 2402, + "GCCAATG": 2403, + "CTGGTGA": 2404, + "CAACCAA": 2405, + "CCAGTC": 2406, + "CTTGAGA": 2407, + "TACAGCA": 2408, + "CTTGTC": 2409, + "GACGGA": 2410, + "CTTCTTTT": 2411, + "GTGGC": 2412, + "GAGGATG": 2413, + "CAATAAAA": 2414, + "GAAATTTT": 2415, + "AAAAAAAAAA": 2416, + "CTCTATA": 2417, + "GTATGAA": 2418, + "CTTGTTA": 2419, + "TAACATA": 2420, + "CAAACACA": 2421, + "TGATTAAA": 2422, + "GCTCTGTT": 2423, + "GTGGGTT": 2424, + "GTTGGGG": 2425, + "GTGTGTA": 2426, + "GTAATTTT": 2427, + "GTATCC": 2428, + "TGTGTGTGTGTG": 2429, + "TCTTCCTT": 2430, + "TCACTAA": 2431, + "TCTCCAAA": 2432, + "TATCAAA": 2433, + "TGATGGG": 2434, + "GGATATT": 2435, + "CAAATTTT": 2436, + "GTTCAGG": 2437, + "GTGGATT": 2438, + "GTGCAGA": 2439, + "GCTGCC": 2440, + "CTCAGAA": 2441, + "GCAGTC": 2442, + "GGATAAA": 2443, + "GCCTTCA": 2444, + "CCAGGTG": 2445, + "TATCTC": 2446, + "CAATGCA": 2447, + "CCCACTG": 2448, + "GTGTATT": 2449, + "CGACAGA": 2450, + "TGAGATA": 2451, + "CCAGGTT": 2452, + "TGTTTAA": 2453, + "CATCATG": 2454, + "TGATTCA": 2455, + "GCAATTA": 2456, + "GAAATGAA": 2457, + "CTTGGTT": 2458, + "GAAGATT": 2459, + "GGATTAA": 2460, + "CCTCATT": 2461, + "GGCCAGGCTG": 2462, + "GCTATTA": 2463, + "GCCAGCA": 2464, + "GAGACAGG": 2465, + "CTTGAGG": 2466, + "CAGTCTT": 2467, + "GTTCTCC": 2468, + "TATTTCAA": 2469, + "TGACGA": 2470, + "CATGAAAA": 2471, + "CATTATG": 2472, + "TAAATTTA": 2473, + "GAGTGAA": 2474, + "CAACAGG": 2475, + "TAAGCTT": 2476, + "CACATTTT": 2477, + "GATCTCA": 2478, + "TAGTCC": 2479, + "GACCCTG": 2480, + "TAATGCA": 2481, + "TAAGTC": 2482, + "TAATAATT": 2483, + "GAAGTAA": 2484, + "CAACTC": 2485, + "CATCATT": 2486, + "GACGAA": 2487, + "GAAACAAA": 2488, + "TATTTCTG": 2489, + "CATTAATT": 2490, + "CCACCCC": 2491, + "TAATATTTT": 2492, + "GTTTAAAA": 2493, + "GTATCTG": 2494, + "GTCAAAAA": 2495, + "GATGCTG": 2496, + "TGTTCTG": 2497, + "GGTCAAA": 2498, + "GTAGGAA": 2499, + "GTATATG": 2500, + "TGATCTG": 2501, + "GGGGCTG": 2502, + "GCATCAA": 2503, + "GCCAAAAA": 2504, + "CCACGA": 2505, + "GCTAATG": 2506, + "CAGAGAAA": 2507, + "CCTTCTG": 2508, + "TCCTCTA": 2509, + "GCAGGTT": 2510, + "CTCACTG": 2511, + "TAGATTA": 2512, + "GCCGAGA": 2513, + "CCATCCA": 2514, + "CTTTACA": 2515, + "GTACATG": 2516, + "GCACCAA": 2517, + "CTTTGTA": 2518, + "CTATGTG": 2519, + "TCACTTTT": 2520, + "TGAGTC": 2521, + "CAAGAAAA": 2522, + "CTGACTG": 2523, + "GTTTTTTTT": 2524, + "GCATAAA": 2525, + "TAATCTG": 2526, + "GAAAAAAAA": 2527, + "CAGGATG": 2528, + "TGAGCCA": 2529, + "GAATTCA": 2530, + "TCAGACA": 2531, + "GTTCCAA": 2532, + "TCAGGTT": 2533, + "CAAACTG": 2534, + "CATTTCTT": 2535, + "TGTTAAAA": 2536, + "CCAGACA": 2537, + "CAAGTTA": 2538, + "CATGTTA": 2539, + "CATTCTA": 2540, + "TCTTTTTG": 2541, + "TGAGGGG": 2542, + "CACATTA": 2543, + "TAAAATAAA": 2544, + "GCATATA": 2545, + "TGTTCTA": 2546, + "GAAGGGG": 2547, + "GAGTGTG": 2548, + "TAAGACA": 2549, + "GAACTC": 2550, + "CCAGTAA": 2551, + "GAGAGAGG": 2552, + "GCGACC": 2553, + "CAATTCA": 2554, + "CGGCTG": 2555, + "CCAGATT": 2556, + "CCTGGG": 2557, + "GGAAGAAA": 2558, + "GAGAGG": 2559, + "TCAAAATG": 2560, + "CCTCATG": 2561, + "TAAAGG": 2562, + "CTTTGGA": 2563, + "CCAGGGA": 2564, + "GTACAGA": 2565, + "CTGAGGCAGGA": 2566, + "TGTTTCTT": 2567, + "CCAGGCTG": 2568, + "CTGAGG": 2569, + "GAGGCTG": 2570, + "CTCCTGGG": 2571, + "GAAGTC": 2572, + "CGACC": 2573, + "GGACTCA": 2574, + "GGAGTC": 2575, + "CACAATT": 2576, + "GTGTTCA": 2577, + "GACTAAA": 2578, + "GTCATTA": 2579, + "CAAAATTA": 2580, + "TGAAGAAA": 2581, + "GCACCTT": 2582, + "GTTTGCA": 2583, + "TCCTGCC": 2584, + "GTAGATG": 2585, + "GCCTGCA": 2586, + "GAGTTAA": 2587, + "TCCCTTA": 2588, + "GTGGTTA": 2589, + "TCGGGA": 2590, + "TACATAA": 2591, + "TCTCTCCA": 2592, + "CACTAAA": 2593, + "TATATATATATA": 2594, + "GTGGCAA": 2595, + "CACCATG": 2596, + "TTTGAAAA": 2597, + "CACACTG": 2598, + "CTTGGTG": 2599, + "TACACTG": 2600, + "CCTCCAA": 2601, + "CAACCTT": 2602, + "CAGCCAA": 2603, + "TTTTCAAA": 2604, + "TGATAGA": 2605, + "TACACTA": 2606, + "TCTGGG": 2607, + "TCCCAGCA": 2608, + "TAGGAAAA": 2609, + "CTTGGGG": 2610, + "TCTGTGAA": 2611, + "CCTTATT": 2612, + "CATTTAAA": 2613, + "TTTTATTTTA": 2614, + "GCCCTCC": 2615, + "CTGAGCA": 2616, + "CCCGTG": 2617, + "GTAGTGA": 2618, + "TCCTATT": 2619, + "GAAGGTG": 2620, + "TGTGCTG": 2621, + "TCCACTG": 2622, + "TAATCTA": 2623, + "TGATGTA": 2624, + "GTGGTAA": 2625, + "TAATGGA": 2626, + "GATGAAAA": 2627, + "GTAGTAA": 2628, + "GTGGGGA": 2629, + "GTGTCAA": 2630, + "CAGACTG": 2631, + "TCGAAAA": 2632, + "CTCATTA": 2633, + "TAATAATA": 2634, + "CTCAGAAA": 2635, + "CATCCTT": 2636, + "CCGCTT": 2637, + "GGAAGG": 2638, + "CCGTGA": 2639, + "CCACTCC": 2640, + "CTAGAGA": 2641, + "TAGAATG": 2642, + "GGATTTA": 2643, + "TTAATTTT": 2644, + "GCTAATA": 2645, + "TCCCCCA": 2646, + "CAAATATT": 2647, + "GATCATG": 2648, + "TCTTAATT": 2649, + "CAGTATT": 2650, + "GTCTTGAA": 2651, + "CCGAAA": 2652, + "CTATTCA": 2653, + "TAAGATA": 2654, + "CTTGCAA": 2655, + "GCCCCAA": 2656, + "TCCCTAA": 2657, + "GAAGTTA": 2658, + "GATGATG": 2659, + "CTTGATG": 2660, + "CCCTAAA": 2661, + "CCTGCCTG": 2662, + "GACATTTT": 2663, + "CCAGCCA": 2664, + "TGTGTGTGTG": 2665, + "GTCTATA": 2666, + "TCTCTGTT": 2667, + "GTCTGTA": 2668, + "TATAATA": 2669, + "CTTGTTTT": 2670, + "CGCCATT": 2671, + "CTCAGCA": 2672, + "TACAGTT": 2673, + "CAAGAGG": 2674, + "GGAAGCA": 2675, + "GCCTTTA": 2676, + "CCCCATT": 2677, + "CAACGA": 2678, + "GTCATTTT": 2679, + "CCCGCA": 2680, + "CAGTTAA": 2681, + "GAATCTT": 2682, + "CATGTTTT": 2683, + "CCGGGG": 2684, + "CTACTGA": 2685, + "TCACGA": 2686, + "TAAATTTG": 2687, + "GCCCATT": 2688, + "CTCTAGG": 2689, + "GGACCTG": 2690, + "TCAGGGA": 2691, + "GAGACTG": 2692, + "CCAAAAAA": 2693, + "GCCGG": 2694, + "CCAGGGG": 2695, + "TCAGAAAA": 2696, + "CATCTGA": 2697, + "TCTTCAAA": 2698, + "CTACAGG": 2699, + "GAGGCAGG": 2700, + "CATTGTA": 2701, + "TAAATCAA": 2702, + "GACTCTT": 2703, + "CTGATTA": 2704, + "GCATATG": 2705, + "GGACCTT": 2706, + "CAAGACA": 2707, + "TATTTATG": 2708, + "TATTTTAAA": 2709, + "CCGAGA": 2710, + "TCATTTTA": 2711, + "CTCACTCA": 2712, + "CCACCCA": 2713, + "CTCTAGA": 2714, + "CTACATG": 2715, + "GTGCTTA": 2716, + "CAACCTG": 2717, + "TCTGTGTT": 2718, + "TAAATATG": 2719, + "CAAAGG": 2720, + "CCCTGTT": 2721, + "GTTCGG": 2722, + "TGATAAAA": 2723, + "CACGAA": 2724, + "GTTGAGG": 2725, + "CAGAGTGA": 2726, + "GAAATTAA": 2727, + "CACATA": 2728, + "GAACAGG": 2729, + "TCTCCTGA": 2730, + "CCTGAGG": 2731, + "GGAGGCCAA": 2732, + "GTTTACA": 2733, + "TAACAGG": 2734, + "TGTGGTG": 2735, + "GCCTCCCAAA": 2736, + "CCATCCTG": 2737, + "GATTCTT": 2738, + "GAATGGA": 2739, + "GTAGTCA": 2740, + "CTCCTCTG": 2741, + "GAAAGAAAGAAAGAAA": 2742, + "CCCTGTG": 2743, + "CAGTATG": 2744, + "GCGATA": 2745, + "GGACTC": 2746, + "GAAAGA": 2747, + "TGTTGG": 2748, + "GTAGCTT": 2749, + "CATTTTAA": 2750, + "CCCTCTG": 2751, + "GCATTCA": 2752, + "CGATTA": 2753, + "TCACATA": 2754, + "TAATGAAA": 2755, + "GGAATTA": 2756, + "CTGTCAA": 2757, + "TAAATTAAA": 2758, + "CAAGTC": 2759, + "GTATTCA": 2760, + "GGCCATG": 2761, + "CTTTAGA": 2762, + "TGTTTCC": 2763, + "CATGTA": 2764, + "GAATAAAA": 2765, + "CAACTAA": 2766, + "TCATCTA": 2767, + "CACTCTT": 2768, + "CAGTTTG": 2769, + "CATAAAAA": 2770, + "GCATGCA": 2771, + "GATTTA": 2772, + "GAACCAA": 2773, + "TCTGTGA": 2774, + "TCAGCCA": 2775, + "TCTCCACA": 2776, + "TCTCAGCTCA": 2777, + "TATCATG": 2778, + "GCACTTA": 2779, + "CGCCAGG": 2780, + "CGGGG": 2781, + "CATTAAAAA": 2782, + "TTTGTTA": 2783, + "GGATATA": 2784, + "TCGACC": 2785, + "TAATCCA": 2786, + "CCGC": 2787, + "CATTGTT": 2788, + "CCAGTTA": 2789, + "GTAGTTA": 2790, + "CTAGGAA": 2791, + "CCTAATT": 2792, + "TCATGGG": 2793, + "GAACTAA": 2794, + "GCTATTTT": 2795, + "CCGTCA": 2796, + "CAGATTA": 2797, + "CCATATA": 2798, + "CAACTTA": 2799, + "TCAGTTTT": 2800, + "CTACCTT": 2801, + "GCACTC": 2802, + "GTGTGGA": 2803, + "GTGCCAA": 2804, + "GACAATG": 2805, + "GACAATT": 2806, + "GTACCTT": 2807, + "TAAACATT": 2808, + "CAGGAGG": 2809, + "GTGCGA": 2810, + "GAAAATTA": 2811, + "TCTCTTAA": 2812, + "CCGATT": 2813, + "GATGATT": 2814, + "CCATGGG": 2815, + "TCGGTA": 2816, + "CCATATG": 2817, + "CCAGTCC": 2818, + "GCCTTAA": 2819, + "TGATCCA": 2820, + "GTTGCAA": 2821, + "GTAGAGG": 2822, + "CAGATTTT": 2823, + "GTACTTA": 2824, + "TCTTTCTTTCTTTCTT": 2825, + "GCTCTGTG": 2826, + "TCAATAA": 2827, + "GTTTAGA": 2828, + "GTTCGA": 2829, + "CAAGGTT": 2830, + "CTCATTTT": 2831, + "CACAGG": 2832, + "CATGCTG": 2833, + "GAACGG": 2834, + "TATAAAAA": 2835, + "GAAGGCA": 2836, + "GAGCATT": 2837, + "TGTTTGTG": 2838, + "GCTGTTA": 2839, + "GTCACTG": 2840, + "CAAATGAA": 2841, + "GTGACTG": 2842, + "GTTCTTTT": 2843, + "CAGGCTGGAGTGCAGTG": 2844, + "TGATGAAA": 2845, + "TAACGG": 2846, + "CTACTAA": 2847, + "GACATTA": 2848, + "GGACGA": 2849, + "GAGCATG": 2850, + "GCATGGG": 2851, + "CCACTTA": 2852, + "CTATCAA": 2853, + "GCTGTTTT": 2854, + "GTCGTG": 2855, + "CCTGGCC": 2856, + "TCTCTGAA": 2857, + "TGTTGTA": 2858, + "CAGCCAGG": 2859, + "GTTTAGG": 2860, + "CCGCAA": 2861, + "GGAGTAA": 2862, + "CCAATTA": 2863, + "CAGCAAAA": 2864, + "TCATCCA": 2865, + "CACGTA": 2866, + "TCATAGA": 2867, + "TAATTAAAA": 2868, + "CACTTAA": 2869, + "TCTTTATT": 2870, + "GAGATTA": 2871, + "TAAGAGG": 2872, + "CAAATTAA": 2873, + "GACGCA": 2874, + "CACGGA": 2875, + "GTGTGCA": 2876, + "TCT": 2877, + "TATTATTA": 2878, + "GAAATATT": 2879, + "GGAGTTA": 2880, + "TCTTTGA": 2881, + "CTGATTTT": 2882, + "TGTGAATT": 2883, + "TCCCACC": 2884, + "CCCTTTG": 2885, + "CAAGGTG": 2886, + "CAGAGTT": 2887, + "CCCCATG": 2888, + "CTACCAA": 2889, + "CTCCAAAA": 2890, + "CTTCCCC": 2891, + "CTGCTAA": 2892, + "GATTAAAA": 2893, + "GCTTATG": 2894, + "CTACTTA": 2895, + "TAAAAAATT": 2896, + "TCAGTCC": 2897, + "CTATTAAA": 2898, + "GAATGGG": 2899, + "CACAGTA": 2900, + "CAACGG": 2901, + "GGTTATT": 2902, + "TCACCCA": 2903, + "TGATGCA": 2904, + "TAATTTTTT": 2905, + "GTTTGAGA": 2906, + "GTATTAAA": 2907, + "GCCCCCA": 2908, + "TATAGTA": 2909, + "TAGTAAA": 2910, + "TGATACA": 2911, + "GTGGTTTT": 2912, + "CCACTAA": 2913, + "CACAGAGA": 2914, + "CCTCTGCCTCC": 2915, + "CAAAAAAAA": 2916, + "CTCTCTCC": 2917, + "CATAATA": 2918, + "GAAGCCA": 2919, + "GTTCCCA": 2920, + "TGTGTTTG": 2921, + "CAATGGA": 2922, + "TGAAGTA": 2923, + "CTTCATA": 2924, + "CACTGTG": 2925, + "GCTCTTTT": 2926, + "TGACATA": 2927, + "TAAAGAAAA": 2928, + "GAGAAATG": 2929, + "CAGGGAGG": 2930, + "TGTTCAA": 2931, + "GAGCCAA": 2932, + "GACAGAGA": 2933, + "GGCTGAA": 2934, + "CAAATATA": 2935, + "GTGGAAAA": 2936, + "TAAGGTT": 2937, + "GTGATTA": 2938, + "GGATCTG": 2939, + "GATGTTA": 2940, + "GACTACACA": 2941, + "TCCTATA": 2942, + "CTGCCAA": 2943, + "TCCCGA": 2944, + "GTGATTTT": 2945, + "GCGTTTT": 2946, + "CAGAGTA": 2947, + "GAAAGGAA": 2948, + "CACTTTG": 2949, + "CCCCAAAA": 2950, + "GCAACCCA": 2951, + "TGCATTTT": 2952, + "TCTAGAA": 2953, + "TACTTTG": 2954, + "TGAGGCA": 2955, + "CATCTCC": 2956, + "TCGCTA": 2957, + "TGACTTTT": 2958, + "GAGCCTG": 2959, + "CATTTGTT": 2960, + "TCTTTGTT": 2961, + "GCAAAATT": 2962, + "CCTGATT": 2963, + "GATAAAAA": 2964, + "GAGTGTT": 2965, + "TCCTGTA": 2966, + "TACAGAAA": 2967, + "TCCAGGAA": 2968, + "GCCAGTG": 2969, + "TAGATTTT": 2970, + "TAATAGG": 2971, + "CTCCTCA": 2972, + "CATTTTTG": 2973, + "CATTTCAA": 2974, + "GCCATCA": 2975, + "TAAAATATA": 2976, + "GACTGTT": 2977, + "GCATGGA": 2978, + "CAAAGTT": 2979, + "CATGATT": 2980, + "GAGTTTG": 2981, + "CTAGCAA": 2982, + "CTTCCTA": 2983, + "GGGGAGG": 2984, + "CTATATG": 2985, + "TATTTATTTT": 2986, + "CACCATT": 2987, + "CCCTCAA": 2988, + "TTTTTTTTTTTTTT": 2989, + "GATCATT": 2990, + "GTACATA": 2991, + "CTCCATA": 2992, + "CCCCGTCTCTA": 2993, + "GCCTGCC": 2994, + "CTAGCTT": 2995, + "CCCGGA": 2996, + "GATGTTTT": 2997, + "GTATTTTA": 2998, + "TCAGATA": 2999, + "CCTGGAA": 3000, + "TATTCCA": 3001, + "GGACCAA": 3002, + "GCCATTA": 3003, + "CGACTGA": 3004, + "TAAGCTG": 3005, + "TAAACACA": 3006, + "GTTTCTC": 3007, + "CATCTTA": 3008, + "GAAATTTG": 3009, + "TAATGGG": 3010, + "TAAAATTTT": 3011, + "CTGTTCA": 3012, + "CCTGTTA": 3013, + "TACTGAA": 3014, + "TGACCCA": 3015, + "TGATTTTA": 3016, + "CTCCTTA": 3017, + "TATAGAA": 3018, + "CTGCGG": 3019, + "GCGGTA": 3020, + "GTGCTAA": 3021, + "CAGAGGAA": 3022, + "TACATCA": 3023, + "TCAATCAA": 3024, + "CTGCAGCC": 3025, + "TGAATATT": 3026, + "TCTACAA": 3027, + "CCACATA": 3028, + "CCCGTT": 3029, + "TATACACA": 3030, + "TCCTCTC": 3031, + "TCTACTT": 3032, + "CCGGAA": 3033, + "CTTTTTTA": 3034, + "GAAAGAAAA": 3035, + "CTATCTT": 3036, + "GACTTTG": 3037, + "TGAACAA": 3038, + "GCAGTTTT": 3039, + "GCTAAAAA": 3040, + "GAGGCGG": 3041, + "TAATAAAAA": 3042, + "CTGGTCA": 3043, + "CAGACAA": 3044, + "GGATATG": 3045, + "TGAAGG": 3046, + "GCCAGAA": 3047, + "CCAGGCC": 3048, + "CCACCATG": 3049, + "CAAACTT": 3050, + "TCATGTA": 3051, + "GCTGCTT": 3052, + "GTAATA": 3053, + "CCCCCAA": 3054, + "CAGCCTG": 3055, + "TCAACTT": 3056, + "TAAAATTAA": 3057, + "GCTGAAAA": 3058, + "CGACGA": 3059, + "GTGGGCA": 3060, + "TGAGGGA": 3061, + "CGCTCC": 3062, + "TTTTGTTTT": 3063, + "GAGTCAA": 3064, + "TCATGCA": 3065, + "CTGCTTA": 3066, + "TAAGTTTT": 3067, + "GTAGCAA": 3068, + "CCTTGG": 3069, + "TGACAAAA": 3070, + "CTGGTAA": 3071, + "TCTTTATA": 3072, + "TGTGTGTT": 3073, + "CTGGTC": 3074, + "CTGGCAA": 3075, + "CATTTCTG": 3076, + "CTCTACC": 3077, + "CTGAGGA": 3078, + "CTAAAATG": 3079, + "CTAGATT": 3080, + "GTATCAA": 3081, + "CAGTCAA": 3082, + "CTGGGTG": 3083, + "CCTCTTA": 3084, + "TGAGTTTT": 3085, + "TTTTATTTA": 3086, + "CCTTTTTT": 3087, + "TATATACA": 3088, + "TAGCAAA": 3089, + "AAATTA": 3090, + "CTGGATG": 3091, + "GATAATA": 3092, + "GACAAAAA": 3093, + "CCTGGGA": 3094, + "GCTTTCA": 3095, + "GTACAGG": 3096, + "GCTGGAA": 3097, + "CTACTCA": 3098, + "CAATGTA": 3099, + "GCGTGAA": 3100, + "GATCCTT": 3101, + "TATTAATG": 3102, + "GCCCGA": 3103, + "TAAAGTG": 3104, + "GCTTCCA": 3105, + "CATGGAA": 3106, + "TGAAGTT": 3107, + "CTTTCTC": 3108, + "TCTGTGTG": 3109, + "GTATGTA": 3110, + "CAATACA": 3111, + "TCAAGG": 3112, + "CCTCTAA": 3113, + "TGTGGG": 3114, + "GATCTGA": 3115, + "GTACTGA": 3116, + "TTAATTAA": 3117, + "GCAGAAAA": 3118, + "CTACATA": 3119, + "CCGGTG": 3120, + "GGGGAAAA": 3121, + "TACAAAAAA": 3122, + "TTTTGG": 3123, + "GTGAGAA": 3124, + "TCAATAAA": 3125, + "TCAAGTT": 3126, + "CTCAGGA": 3127, + "CTACTC": 3128, + "CAAATCA": 3129, + "GGCAGAA": 3130, + "CCCGAA": 3131, + "TGTTGTG": 3132, + "GAGCAAAA": 3133, + "TATTTGTG": 3134, + "GTAGGTT": 3135, + "CTACCTG": 3136, + "CACAAAAA": 3137, + "CTCAGG": 3138, + "GCTTTA": 3139, + "CAGAGCAA": 3140, + "CTCAGTG": 3141, + "GGAAGAGA": 3142, + "TAACCTG": 3143, + "GAAATATA": 3144, + "CGAGAA": 3145, + "GTGAGG": 3146, + "CATTTATA": 3147, + "GGCAGCA": 3148, + "TCTAAATT": 3149, + "CCCAGTG": 3150, + "GCCTAGG": 3151, + "TGCATTA": 3152, + "CCGTAA": 3153, + "CATTCCA": 3154, + "CTAGTTA": 3155, + "GACTTAA": 3156, + "CTATACA": 3157, + "GACACAA": 3158, + "TCTTCACA": 3159, + "CCGGTT": 3160, + "TAAAGTAA": 3161, + "CTGTGGA": 3162, + "TAAGGTG": 3163, + "TCCAGTA": 3164, + "CAAATTTA": 3165, + "AAATTAAAA": 3166, + "CCATCTA": 3167, + "CTCCCTT": 3168, + "CTCCTTTT": 3169, + "GAGAGAGAGAGA": 3170, + "GGAGATA": 3171, + "CCTATTA": 3172, + "CACCAAAA": 3173, + "CCGTTA": 3174, + "TGTTTATA": 3175, + "CTCAGGAGG": 3176, + "GACGTA": 3177, + "GTCCTTA": 3178, + "GAAAGTT": 3179, + "GCTGGTG": 3180, + "CTCTACA": 3181, + "CAATAGA": 3182, + "TAAAATATT": 3183, + "GTACCTG": 3184, + "GTACTAA": 3185, + "CTTTGAAA": 3186, + "CCTTTCC": 3187, + "TAAAAATTA": 3188, + "CTCGG": 3189, + "CAAGATA": 3190, + "CATTTGA": 3191, + "CACCTCA": 3192, + "GCCAGCC": 3193, + "GTCGG": 3194, + "GCACATA": 3195, + "CACTCAA": 3196, + "CTTTTAAAA": 3197, + "CAGGAATT": 3198, + "GCCTATT": 3199, + "TCTTTCTG": 3200, + "CTGAGGCAGGAGAA": 3201, + "CAGGCAGG": 3202, + "CTAGTAA": 3203, + "TCCATA": 3204, + "GAACTTA": 3205, + "CG": 3206, + "GCTGTGA": 3207, + "GAAAATA": 3208, + "TCTTCATT": 3209, + "GAGGGAGA": 3210, + "CCCATCC": 3211, + "GAGGTGGG": 3212, + "GCCTCTA": 3213, + "GTAGGTG": 3214, + "TAAACCA": 3215, + "GAAGGAAA": 3216, + "TATTGG": 3217, + "ATG": 3218, + "TCCAGTT": 3219, + "CCCACAA": 3220, + "GAAACACA": 3221, + "GTCTCAAAA": 3222, + "CTTTTCTTTT": 3223, + "TGAAGGA": 3224, + "TATTGATT": 3225, + "CTATGTA": 3226, + "AAAAAAAAAAAAAA": 3227, + "TCCTTAAA": 3228, + "GCGCTA": 3229, + "TCCACTT": 3230, + "GACTCAA": 3231, + "TAAATACA": 3232, + "TCATGGA": 3233, + "TCTGGGA": 3234, + "TCCTATG": 3235, + "CTGTGCA": 3236, + "TCAAGTGA": 3237, + "TCATAAAA": 3238, + "CATCCAA": 3239, + "CCTTCCA": 3240, + "CTGTACA": 3241, + "GAAGGTT": 3242, + "CTGTGTA": 3243, + "GTCACTT": 3244, + "TCACAAAA": 3245, + "TCAGGCA": 3246, + "GTGTTAAA": 3247, + "CCCTTAA": 3248, + "CAAAGTG": 3249, + "GAAATGTT": 3250, + "CTGGGGA": 3251, + "GACGCC": 3252, + "TATATGTG": 3253, + "CTAGATG": 3254, + "GAAATTAAA": 3255, + "GAATGCA": 3256, + "GCACTAA": 3257, + "CGGGAGG": 3258, + "GCCACAA": 3259, + "CGCTTA": 3260, + "TCCACAA": 3261, + "CAGATA": 3262, + "TCTGAATT": 3263, + "TATTATTTT": 3264, + "GCGCGG": 3265, + "CTCTGAAA": 3266, + "TCTCTTTG": 3267, + "TATTTCTA": 3268, + "GGGGTGGG": 3269, + "GGATGCA": 3270, + "CCACACC": 3271, + "TAAATGTG": 3272, + "TCTTCCTG": 3273, + "GCAAGG": 3274, + "CTGCTCC": 3275, + "CTGGAGTG": 3276, + "CTGTTAAA": 3277, + "CACACAAA": 3278, + "CTGACTT": 3279, + "GAAAAGAAAA": 3280, + "CCTTCTCC": 3281, + "GAAATAAAA": 3282, + "CCTCAGGTGA": 3283, + "GATAATG": 3284, + "GAATTGCTT": 3285, + "CCAAAATT": 3286, + "CGTGAAA": 3287, + "CACTGAAA": 3288, + "CAGTGAAA": 3289, + "GATCTTA": 3290, + "GAGATGGG": 3291, + "TCTGCCA": 3292, + "TGAGGTA": 3293, + "TATGGAA": 3294, + "TATATTTTA": 3295, + "TGAACTT": 3296, + "GCAGATA": 3297, + "CTTTTCTT": 3298, + "GTAAAATG": 3299, + "TCTCTAA": 3300, + "TCTGCAAA": 3301, + "GAGCCTT": 3302, + "TATCATT": 3303, + "CAATTTTA": 3304, + "CCGCCA": 3305, + "TATTTAAAA": 3306, + "GAGAGATG": 3307, + "GAGATGGA": 3308, + "GCCAGGATG": 3309, + "CGAGTAGCTG": 3310, + "TTCATTTT": 3311, + "TATACTT": 3312, + "GTCTACA": 3313, + "GTGAGTGA": 3314, + "GCTACACA": 3315, + "GGGAGGA": 3316, + "CAAGGCA": 3317, + "GCTTTTAA": 3318, + "CACTATT": 3319, + "GTTCATA": 3320, + "TCCTC": 3321, + "GTGGACA": 3322, + "TATTTGGA": 3323, + "CTCCAGTA": 3324, + "GTTCAGTT": 3325, + "CCAAGG": 3326, + "CAGAGCC": 3327, + "CTCGCC": 3328, + "CCGATG": 3329, + "GGAATTTT": 3330, + "TCCAGCC": 3331, + "CCTCTTTT": 3332, + "GAACCTT": 3333, + "CATGCACA": 3334, + "GTTTC": 3335, + "GAAGATA": 3336, + "TACCCC": 3337, + "GCTGCCA": 3338, + "GGGGGAGG": 3339, + "GCAGTGAGCTGA": 3340, + "CTGTCTA": 3341, + "CGAGGA": 3342, + "CAATGGG": 3343, + "GCTGTGAA": 3344, + "GAAAGTG": 3345, + "TACCAAAA": 3346, + "GTCAGG": 3347, + "CAGCTCC": 3348, + "TGTGCTT": 3349, + "GTCTAGG": 3350, + "TTTTTGTA": 3351, + "TTATATG": 3352, + "TCAGGGG": 3353, + "TATTGTTA": 3354, + "CCTGAGA": 3355, + "TATCTCA": 3356, + "CAATCTG": 3357, + "CACTCTG": 3358, + "GATTTAA": 3359, + "TGAATAA": 3360, + "TCTTGTA": 3361, + "TCAACTG": 3362, + "TCTCCAGG": 3363, + "CTAGAGG": 3364, + "CTGAGAAA": 3365, + "CTAGCTG": 3366, + "TCCACCA": 3367, + "CGATTTT": 3368, + "CCGGCC": 3369, + "GTTGACA": 3370, + "CTTAGAA": 3371, + "CATAATG": 3372, + "GAGTATT": 3373, + "CACAGAAA": 3374, + "GACTGTG": 3375, + "CTATTTTA": 3376, + "TGAGGAAA": 3377, + "TTATTAAAA": 3378, + "CTTATTTA": 3379, + "CAGACTT": 3380, + "CACGCC": 3381, + "GCTTGG": 3382, + "CCTGCTT": 3383, + "TAAAGCAA": 3384, + "CCTCGTGA": 3385, + "TAGAATT": 3386, + "CTTACAA": 3387, + "TAAAGGAA": 3388, + "GTCTAGA": 3389, + "GTGACTT": 3390, + "TACATATG": 3391, + "GTCAGGA": 3392, + "GCTCCAGG": 3393, + "GAAGGGA": 3394, + "CATGATG": 3395, + "TCATCAAA": 3396, + "CGTTAAA": 3397, + "GTACTCA": 3398, + "CTCCCAA": 3399, + "TATATGTA": 3400, + "GGTATTTT": 3401, + "TAAGCCA": 3402, + "CGAAATT": 3403, + "GTTTGTTTT": 3404, + "TCTGTCTT": 3405, + "TATATCA": 3406, + "TGTTCATT": 3407, + "CAAACCA": 3408, + "TTCATTA": 3409, + "TATTTGTA": 3410, + "GATTGAA": 3411, + "CTATAAAA": 3412, + "GATTAATT": 3413, + "CCCACCA": 3414, + "TCCTAGG": 3415, + "TAAATGTA": 3416, + "CTCTTAAA": 3417, + "GCAGTCC": 3418, + "GCGGCTG": 3419, + "GTCTCGAA": 3420, + "TGAATGA": 3421, + "CTGGGGG": 3422, + "GTCTCGA": 3423, + "GAACAAAA": 3424, + "TGAATCA": 3425, + "TGTATTTTTAGTAGAGA": 3426, + "GTTATTAA": 3427, + "TTTTTTAAAA": 3428, + "GTCAGTG": 3429, + "CCCATTA": 3430, + "CACAGGA": 3431, + "TATTCCTT": 3432, + "TCTGCCTT": 3433, + "CCTGGTG": 3434, + "GCGAGC": 3435, + "TACTAAA": 3436, + "TACACAAA": 3437, + "CCGTCC": 3438, + "GCTTTGTT": 3439, + "GCATCCA": 3440, + "CATCTAA": 3441, + "GCTGTGTT": 3442, + "GTAGACA": 3443, + "GCCTATG": 3444, + "TCTTTGTG": 3445, + "GATTCTG": 3446, + "CGCCCGG": 3447, + "GATGAGA": 3448, + "TATCTGA": 3449, + "TGAATTTG": 3450, + "CCTGATG": 3451, + "TAAAACAA": 3452, + "CTTTAGG": 3453, + "TTTTCCTT": 3454, + "TGAATAAA": 3455, + "CGGGGA": 3456, + "CAAACATT": 3457, + "GTATGGA": 3458, + "GCTTAAAA": 3459, + "TACCAAA": 3460, + "CAAAGAGA": 3461, + "CTCCTGCC": 3462, + "GTAAAAAAA": 3463, + "CACAGCC": 3464, + "CCATGCA": 3465, + "TACAATT": 3466, + "CTAGTGA": 3467, + "CTGAGTT": 3468, + "GAGTGAAA": 3469, + "TCTGTTTG": 3470, + "CTGTAGG": 3471, + "TATAAAAAA": 3472, + "GCATTAAA": 3473, + "GTCCATA": 3474, + "TGTTAAAAA": 3475, + "TGTTTGA": 3476, + "GAATAGA": 3477, + "CTTCAAAA": 3478, + "CTGGACA": 3479, + "CTGTAGA": 3480, + "CCATTAAA": 3481, + "CTATCTG": 3482, + "CACTATG": 3483, + "TTATCAA": 3484, + "TAAGTAAA": 3485, + "TAATCCCAGCACTTTGGGAGGCC": 3486, + "CCAGAAAA": 3487, + "TGAAGCA": 3488, + "TCCCTTTT": 3489, + "TCATACA": 3490, + "TACGTT": 3491, + "GCCGTG": 3492, + "GGAAGTG": 3493, + "GGCCAAA": 3494, + "GTACCAA": 3495, + "TCTCTACTAAAAATA": 3496, + "CATTGTG": 3497, + "TGTGTGA": 3498, + "GAAACAGA": 3499, + "CTTGACA": 3500, + "GATGAGG": 3501, + "GAGATTTT": 3502, + "CCTTCAA": 3503, + "GAATCTA": 3504, + "CTCTCCTT": 3505, + "GGCGGA": 3506, + "TCTATCTATCTATCTA": 3507, + "CACACAGA": 3508, + "TGTGTGTA": 3509, + "CAAAGCC": 3510, + "TGTGCCA": 3511, + "GTTGAAAA": 3512, + "CTCCAGCA": 3513, + "TCAAGGA": 3514, + "TAGCTCA": 3515, + "CGCTGA": 3516, + "CCTGAAAA": 3517, + "GACTATT": 3518, + "GATTCCA": 3519, + "GCTTCTA": 3520, + "GTCTGCC": 3521, + "CTTGGCA": 3522, + "TGTGGTA": 3523, + "GCTTTGA": 3524, + "GCTCTCTG": 3525, + "CTCACAGA": 3526, + "TCTTTAAA": 3527, + "CAAAGCAA": 3528, + "TACTTAA": 3529, + "GCTTCAA": 3530, + "CATTGAA": 3531, + "GGAGGAAA": 3532, + "CTATAGA": 3533, + "CTGAGGAA": 3534, + "CCTGGCA": 3535, + "CCCTATT": 3536, + "CTCGTG": 3537, + "TTACACA": 3538, + "TTAGGAA": 3539, + "CTGGTTA": 3540, + "GTTGTCC": 3541, + "TAATGAAAA": 3542, + "TATTTACA": 3543, + "GGGAATT": 3544, + "GTAGTTTT": 3545, + "GCTGCAA": 3546, + "CTACGG": 3547, + "GCCGGA": 3548, + "CTGGGCA": 3549, + "CCTTAAAA": 3550, + "GATGGAA": 3551, + "TAGATAGATAGATAGA": 3552, + "TATGTAA": 3553, + "GTACGG": 3554, + "TATTCAAA": 3555, + "GATCTCC": 3556, + "CCTGTTTT": 3557, + "TATTGCA": 3558, + "GGAAGGAAGGAAGGAA": 3559, + "GGTAATT": 3560, + "TTACAGA": 3561, + "TCAGC": 3562, + "GCAAAATG": 3563, + "GAGAGCA": 3564, + "GTAGAAAA": 3565, + "CATTTGAA": 3566, + "TCTTCTTTT": 3567, + "TCCCATA": 3568, + "GTTATTTA": 3569, + "CTATCTA": 3570, + "CATCCTG": 3571, + "TCTTGTG": 3572, + "TTATTATT": 3573, + "CCCGTC": 3574, + "TACTATG": 3575, + "TAAACATA": 3576, + "TAAGGAAA": 3577, + "GCTTGTG": 3578, + "CTCTAAAA": 3579, + "GTTTTAAAA": 3580, + "GACAGGA": 3581, + "TCCTAGA": 3582, + "TCCACCCA": 3583, + "GTTTGAAA": 3584, + "CCATCTCA": 3585, + "CTAAGAA": 3586, + "GTATCTA": 3587, + "GTGAGGA": 3588, + "GCTGGAGG": 3589, + "CCTGTAATCCCAGCTA": 3590, + "GCAACAA": 3591, + "CTTTCAAA": 3592, + "CAAATGTT": 3593, + "CTTGTCC": 3594, + "TCTCAAAAA": 3595, + "TATTTATTA": 3596, + "TAAGGCA": 3597, + "GAGAGGAA": 3598, + "TATGATT": 3599, + "GCATCTA": 3600, + "CGTTATT": 3601, + "GCCTGTA": 3602, + "GTTTCAAA": 3603, + "CCTTCCTTCCTTCCTT": 3604, + "GGCTTTG": 3605, + "GTCAGAA": 3606, + "CATGCATG": 3607, + "GTCATTTA": 3608, + "CTGGAAAA": 3609, + "CTTCGA": 3610, + "CCTATTTT": 3611, + "CCAACAA": 3612, + "TCCATCC": 3613, + "TAAAGTTA": 3614, + "GTCTCTC": 3615, + "TAATCAAA": 3616, + "GATTTTTG": 3617, + "GATTTCTT": 3618, + "GGGCTGA": 3619, + "GCATGTA": 3620, + "CCTGGGTT": 3621, + "GAGACAA": 3622, + "GCTGTCA": 3623, + "TGATAGG": 3624, + "GGAGACC": 3625, + "CCGGCA": 3626, + "TAATCTCA": 3627, + "TGAATTAA": 3628, + "TCTGGTG": 3629, + "GCCTC": 3630, + "GGCGCA": 3631, + "CCAGCTA": 3632, + "CAGTCTG": 3633, + "TGAACTA": 3634, + "GTAAGAA": 3635, + "CCTTTCA": 3636, + "TCCATGA": 3637, + "CAAAGGAA": 3638, + "CTCTC": 3639, + "CTCTCTCA": 3640, + "CTCCAGC": 3641, + "GTAGATA": 3642, + "CCCCCTCC": 3643, + "GGCGCC": 3644, + "TCTGTCC": 3645, + "GACCATT": 3646, + "CTTGAAAA": 3647, + "TTATCC": 3648, + "TACATGTG": 3649, + "CAAATTTG": 3650, + "TTTTGTG": 3651, + "CAGAGTG": 3652, + "GTAATAA": 3653, + "GTGAGTG": 3654, + "TTTTTCC": 3655, + "GGCTCTG": 3656, + "GCCCTAA": 3657, + "GGCTGTT": 3658, + "CCCAATT": 3659, + "CAGAGCTT": 3660, + "TATAAATG": 3661, + "GAGTCTG": 3662, + "TCTTAAAAA": 3663, + "GTTTTATG": 3664, + "GATCCAA": 3665, + "GGCCCTG": 3666, + "GATCCTG": 3667, + "TCAAGTG": 3668, + "GATTCAA": 3669, + "CCTCTCTT": 3670, + "GAGACGG": 3671, + "CAGATCA": 3672, + "TAAAAGAA": 3673, + "CTGAGCAA": 3674, + "CCTGCCA": 3675, + "CCTTCTA": 3676, + "CGCTCA": 3677, + "GGCTGTG": 3678, + "TGGGAAAA": 3679, + "GGAGCCTG": 3680, + "CTGAGTG": 3681, + "CGTCAAA": 3682, + "TCAAGTA": 3683, + "CGTAATT": 3684, + "TTACTTA": 3685, + "TATACTA": 3686, + "GGGCAAA": 3687, + "CAACTTTT": 3688, + "CTTTGCC": 3689, + "GCCAGGAA": 3690, + "CACACTA": 3691, + "GCCCAGC": 3692, + "TAAATAAATAAATAAA": 3693, + "CTTTCCTT": 3694, + "GGGAGAA": 3695, + "TATGGTA": 3696, + "CGGCCA": 3697, + "CCTCTCTG": 3698, + "GAAAGCAA": 3699, + "CAAGCCA": 3700, + "GGCGTT": 3701, + "CTCTTTTA": 3702, + "TCGGCCTCCCAAA": 3703, + "GATTTATT": 3704, + "CAAGTCC": 3705, + "TATCTTA": 3706, + "GTTCAAGACCA": 3707, + "CTCACACA": 3708, + "GAAATCAA": 3709, + "TGAGACC": 3710, + "GGGTAAA": 3711, + "GCTTGTT": 3712, + "GATTTTAA": 3713, + "TTTTTATA": 3714, + "CAGAGCTG": 3715, + "TCTGTTAA": 3716, + "GTAATTAA": 3717, + "TCTTTGAA": 3718, + "CTTGCCA": 3719, + "TTTTCATT": 3720, + "CCATGTA": 3721, + "TCTCGGCTCACTGCAA": 3722, + "GGATTCA": 3723, + "TCTATTAA": 3724, + "TACATAAA": 3725, + "GATTGATT": 3726, + "GGAGAGGA": 3727, + "CGCAAAA": 3728, + "GGACTAA": 3729, + "TTATGTG": 3730, + "GTCACTCA": 3731, + "GACAGCA": 3732, + "CGAGTT": 3733, + "GATGGTT": 3734, + "GGAAGAGG": 3735, + "GCCAACATGGTGAAA": 3736, + "GGAGCCA": 3737, + "TGAACTG": 3738, + "CCTCTGTG": 3739, + "GTATAAAA": 3740, + "TCCCAGAA": 3741, + "CATTTATG": 3742, + "GATTATG": 3743, + "TGTTTCTG": 3744, + "GAGTGGGTT": 3745, + "TACATATT": 3746, + "CTCCAGGA": 3747, + "GACACTG": 3748, + "GGTCTCA": 3749, + "CCGGGA": 3750, + "TGTTTAAA": 3751, + "CTCACCA": 3752, + "GGACTTA": 3753, + "GCCCACC": 3754, + "CAAATCAA": 3755, + "GAAATGTG": 3756, + "TAGTTAA": 3757, + "TCTATAA": 3758, + "TTAGATT": 3759, + "GTGTAGG": 3760, + "TACTGAAA": 3761, + "GCACCCA": 3762, + "GTGGGCTG": 3763, + "GAATGAAA": 3764, + "TCTAGTT": 3765, + "TCAGGAGA": 3766, + "TCCACTA": 3767, + "CTCAGTT": 3768, + "TACTTAAA": 3769, + "GACTCCA": 3770, + "TCCATTTG": 3771, + "CACAGCAA": 3772, + "GCTCATGCCTG": 3773, + "GGTGCTG": 3774, + "GCTTTCTT": 3775, + "GTGGCCA": 3776, + "TACGTG": 3777, + "GTGCAGTG": 3778, + "TGAAGTCA": 3779, + "CCTTTAA": 3780, + "TCTCAGCTCACTGCAA": 3781, + "GAAATATG": 3782, + "CCTCAAAA": 3783, + "GGGGCGG": 3784, + "CGACAA": 3785, + "GGTGATG": 3786, + "GTCTTAAA": 3787, + "CAGAAATG": 3788, + "CGTCATT": 3789, + "CCAAGCA": 3790, + "GGATCAA": 3791, + "GTGCTGGGATTA": 3792, + "GCTGGCC": 3793, + "CGGAGCTT": 3794, + "TACATGA": 3795, + "TGTTTGAA": 3796, + "TCTCCATT": 3797, + "TAAGCAAA": 3798, + "CCTTTCTT": 3799, + "TACTGTT": 3800, + "TCCATCTT": 3801, + "CTTACTT": 3802, + "CGGAGGTT": 3803, + "CAAAACAA": 3804, + "TCATAGG": 3805, + "TTACTAA": 3806, + "CTTATTTG": 3807, + "GAATGTA": 3808, + "CCCCATGGA": 3809, + "TTACTGA": 3810, + "CGGAAAA": 3811, + "CTCCAGTG": 3812, + "TGTTCCA": 3813, + "CAGATGAA": 3814, + "GTTGATA": 3815, + "TCCCCCC": 3816, + "CATTGCA": 3817, + "CTCAGCC": 3818, + "CTTACTG": 3819, + "TATCCTT": 3820, + "CTTTTATG": 3821, + "TGAGTAGCTG": 3822, + "GACTGAAA": 3823, + "CAATGAAA": 3824, + "CGACTG": 3825, + "CTTGGGA": 3826, + "GCAAGCA": 3827, + "TCACTCC": 3828, + "GATTTGA": 3829, + "CATTTTAAA": 3830, + "TCAACTA": 3831, + "GTCCAAAA": 3832, + "CACCCTG": 3833, + "TTACCTT": 3834, + "CAAGGGG": 3835, + "TTTTGGA": 3836, + "GTTATTTG": 3837, + "GCTACTG": 3838, + "CTGAGGCAGGAGAATG": 3839, + "GTGATGA": 3840, + "GTAGTC": 3841, + "TAGTATG": 3842, + "GTATAGA": 3843, + "GTGTCTA": 3844, + "GCTGCTA": 3845, + "TTAGTAA": 3846, + "TAAACATG": 3847, + "GTCACCA": 3848, + "CATCTTTT": 3849, + "CATATAA": 3850, + "TCTCTCTA": 3851, + "TTTTATTAA": 3852, + "TATTCTAA": 3853, + "GAAATTTA": 3854, + "CTTCCCTG": 3855, + "TAAAGATG": 3856, + "TACGTA": 3857, + "GTTTATTA": 3858, + "GAAAAGAA": 3859, + "CCCACCCA": 3860, + "CAATTAAAA": 3861, + "CCGACA": 3862, + "CAAAGTGA": 3863, + "CAAACAAAA": 3864, + "GCAATTTT": 3865, + "CGATTAA": 3866, + "TTAGAGA": 3867, + "CTGATGA": 3868, + "GGAGGAGG": 3869, + "GTCCTGGG": 3870, + "TCATGAAA": 3871, + "GCAACCA": 3872, + "GTTGGCA": 3873, + "GCGGCGG": 3874, + "GTCCCCA": 3875, + "GTAGGGG": 3876, + "GCCATGTT": 3877, + "GTTCGAGA": 3878, + "GCCTATA": 3879, + "TAAATTCA": 3880, + "GGCCATT": 3881, + "GAAAACAA": 3882, + "TGTGTATG": 3883, + "GTACTC": 3884, + "TAGGGAA": 3885, + "CCTTGAA": 3886, + "TCTATTTG": 3887, + "GAGGGCA": 3888, + "GAAACTGA": 3889, + "TACGC": 3890, + "TACAAAAA": 3891, + "TCATTATT": 3892, + "GGAAAATT": 3893, + "TCAATATT": 3894, + "CCCGTA": 3895, + "GGAGAGAA": 3896, + "TTAGTTA": 3897, + "CTCAGAGA": 3898, + "TCGAGC": 3899, + "CTAGTCA": 3900, + "GATGGCA": 3901, + "TGAACATT": 3902, + "CTATGGG": 3903, + "CACACCA": 3904, + "TCAATTAA": 3905, + "GGAACTG": 3906, + "TTACATG": 3907, + "CTTTCATT": 3908, + "CAGCTCTG": 3909, + "TCTTTTTTTT": 3910, + "TAAATCTT": 3911, + "TGATCTA": 3912, + "CATACAA": 3913, + "GCTCAAAA": 3914, + "GCTGTGTG": 3915, + "TCAATCA": 3916, + "GATTTGAA": 3917, + "CCAAGGA": 3918, + "GTCCTCA": 3919, + "GTGCTCC": 3920, + "AAAATAA": 3921, + "GTGACAA": 3922, + "GCTCACGCCTG": 3923, + "CGACGG": 3924, + "TATCCAA": 3925, + "CACACATG": 3926, + "TCTCTCTCC": 3927, + "TGTGGTT": 3928, + "CTTGGTA": 3929, + "TCTGGTT": 3930, + "TTTATAA": 3931, + "CTGCTTTT": 3932, + "TGTGTCA": 3933, + "CACATCA": 3934, + "CCTAATG": 3935, + "CGTTTTTT": 3936, + "GCTGGCA": 3937, + "GACGTC": 3938, + "TATAATTA": 3939, + "TACAGTAA": 3940, + "GAAAGTAA": 3941, + "GTCTGAAA": 3942, + "CCCATTTT": 3943, + "TATATGA": 3944, + "CTTGATA": 3945, + "CTTTATTTT": 3946, + "CTTTATTA": 3947, + "GGCGAA": 3948, + "CCATGCC": 3949, + "CCTGCCTT": 3950, + "GAAGAAGAAGAA": 3951, + "CTGACTGA": 3952, + "GCCCTTA": 3953, + "TATCTAA": 3954, + "GTGTTTTA": 3955, + "TGTGGCA": 3956, + "TATTGTAA": 3957, + "GCCAGAAA": 3958, + "CCCTGTCTC": 3959, + "CACAGGAA": 3960, + "AAAACAA": 3961, + "AAAAAAAAAAAAAAA": 3962, + "TAACTCC": 3963, + "GCCTAAA": 3964, + "CGAGTA": 3965, + "TAGTATT": 3966, + "GTATTTTTAGTAGAGA": 3967, + "GCTGCAGG": 3968, + "TATTGAAA": 3969, + "CCAGCCTGGG": 3970, + "GCTCCAAA": 3971, + "TACGAA": 3972, + "GGCCTCC": 3973, + "TATACAAA": 3974, + "CATGGCA": 3975, + "CATGCAA": 3976, + "TACACCA": 3977, + "CTTTACCA": 3978, + "TACAGAGA": 3979, + "TATTCTTA": 3980, + "TATGTCA": 3981, + "TCAAGCA": 3982, + "TCAATGA": 3983, + "GGCTCTT": 3984, + "GGAAGTT": 3985, + "TCCATGTT": 3986, + "GCTTTCC": 3987, + "TATGTGA": 3988, + "GTGTAGA": 3989, + "TTTTTAAAA": 3990, + "GCTGGAGA": 3991, + "GTGAGAGA": 3992, + "CCTAGAA": 3993, + "CCTCCAAA": 3994, + "CCAATGA": 3995, + "CAGGGCA": 3996, + "CTATGCA": 3997, + "CTTCACC": 3998, + "CTACAAAA": 3999, + "CTCACC": 4000, + "GAGTATG": 4001, + "TAGAAAAA": 4002, + "CTTTTGAA": 4003, + "TAAAGAGA": 4004, + "CATGTCA": 4005, + "TCTTTTAAA": 4006, + "CACAGTGA": 4007, + "GATCTAA": 4008, + "TAAGGTA": 4009, + "CATAGAA": 4010, + "CGCGCC": 4011, + "CAGCTTA": 4012, + "TATAGTT": 4013, + "CGGGCC": 4014, + "TATCCATT": 4015, + "TGTTTGTTTT": 4016, + "GCTGGCTG": 4017, + "TACAGGA": 4018, + "CTCCTTTG": 4019, + "CAATCTA": 4020, + "CCCCCTG": 4021, + "TATACTG": 4022, + "CTGAGCC": 4023, + "CGGTTA": 4024, + "TGAAGTG": 4025, + "GCTTCCTT": 4026, + "TTTTATTTG": 4027, + "TAGTGAA": 4028, + "CTGAGGTG": 4029, + "TCTTCTC": 4030, + "GACAGAAA": 4031, + "CTGAACTGAA": 4032, + "CCTGGGAA": 4033, + "TCCCCAAA": 4034, + "TATGTATT": 4035, + "GATTTCTG": 4036, + "CATTCAAA": 4037, + "CACAGTT": 4038, + "GCTTGAA": 4039, + "GTGGATCA": 4040, + "CTGAGTGA": 4041, + "TGAATTTA": 4042, + "TCAACAAA": 4043, + "GGTCATT": 4044, + "GTAATTTA": 4045, + "GCGACTT": 4046, + "CTGAGAGA": 4047, + "GTGCCCA": 4048, + "CTAGGTT": 4049, + "TCCTGAAA": 4050, + "GTCCACC": 4051, + "TCACAGAA": 4052, + "GCGAAAA": 4053, + "GTATGGG": 4054, + "TGAACAAA": 4055, + "TAAACAAAA": 4056, + "CCGTTTT": 4057, + "TCTCAATT": 4058, + "TCCAGAAA": 4059, + "GTAACAA": 4060, + "GCATTTTA": 4061, + "TCTCCATG": 4062, + "TTATAAAA": 4063, + "CAGGCAA": 4064, + "CTAAAAAAA": 4065, + "GTTGGGA": 4066, + "TAAAGATT": 4067, + "TGAAGAGA": 4068, + "CCCCTCA": 4069, + "TGTTTATG": 4070, + "TCTACTG": 4071, + "CCAATTTT": 4072, + "GGTGGTG": 4073, + "GGAACAA": 4074, + "TGTGGGA": 4075, + "TCTGCTA": 4076, + "GAACGA": 4077, + "GTAAGTA": 4078, + "GTTGCCA": 4079, + "AAAATTTT": 4080, + "GCGCGA": 4081, + "GAAAGATG": 4082, + "GTCTCTCA": 4083, + "TCCATCAA": 4084, + "GCAGCTA": 4085, + "CACATTTG": 4086, + "CTGACAA": 4087, + "TCCACC": 4088, + "GCT": 4089, + "CCCACTT": 4090, + "GCAGGTA": 4091, + "GAGGCCA": 4092, + "TAAAGTCA": 4093, + "CTGGATA": 4094, + "CGGCAA": 4095 + }, + "merges": [ + "A A", + "T T", + "T G", + "C A", + "C C", + "T A", + "G G", + "T C", + "G A", + "AA A", + "G C", + "T AA", + "TT TT", + "T CA", + "TG A", + "TT A", + "G AA", + "T CC", + "C AA", + "C TG", + "C TT", + "G TG", + "G TT", + "G CA", + "GG A", + "C CA", + "G TA", + "G CC", + "C TA", + "T AAA", + "AA AA", + "C TC", + "G TC", + "TG TG", + "TA TT", + "CA CA", + "G AAA", + "TA TA", + "TC TT", + "TG TT", + "C AAA", + "GA GA", + "CA TT", + "TG AA", + "CA GG", + "TC TG", + "CA GA", + "TC AA", + "GG AA", + "TAA AA", + "C TGA", + "GC TT", + "G TGA", + "GC TG", + "C TCA", + "CC TT", + "CA TG", + "GC AA", + "G TCA", + "G TAA", + "TTTT A", + "TA TG", + "GA GG", + "C GG", + "GA TT", + "CC TG", + "TC TC", + "CC AA", + "G TTA", + "C TCC", + "C TAA", + "TA CA", + "C TTA", + "TC CA", + "GA TG", + "TT AA", + "GAA AA", + "TT TG", + "G TTTT", + "TC TA", + "GC CA", + "G TCC", + "C TTTT", + "GG GG", + "C GA", + "TT TA", + "CC CA", + "CAA AA", + "TG GG", + "TA GA", + "TA GG", + "GA CA", + "GG TT", + "CC CC", + "GG TG", + "CA TA", + "GC TA", + "TG TA", + "TC AAA", + "TG GA", + "TAA TT", + "TTA TT", + "TG CA", + "GG CA", + "GA TA", + "CC TA", + "TT CA", + "TC TCA", + "GG GA", + "C GC", + "CTG AA", + "G TAAA", + "TC TCC", + "TTTT TT", + "C GTG", + "GC AAA", + "TAA AAA", + "TC TGA", + "TCA TT", + "GG AAA", + "TG AAA", + "TCC TT", + "CC AAA", + "GAA TT", + "C TAAA", + "C GTT", + "GTG AA", + "GG CC", + "TAA TA", + "GG TA", + "TG CC", + "CA CC", + "TGA TT", + "AAAA AA", + "GC TCA", + "TCC AA", + "GA GAA", + "CTG TT", + "TA TTA", + "CA GCA", + "CTC TT", + "CTT AA", + "CA GAA", + "GC TGA", + "GTT AA", + "TC TTA", + "TA TTTT", + "GCC AA", + "CTT TG", + "GA CC", + "C GCA", + "GTA TT", + "GTC TT", + "CAA TT", + "GTG TT", + "CTC AA", + "GGA GG", + "C GAA", + "TC TTTT", + "GTC AA", + "C GCC", + "TA TAA", + "TA CC", + "TC TAA", + "CCA TT", + "C GGA", + "CAA AAA", + "CA GTG", + "TCC TG", + "CTC TG", + "GAA AAA", + "CTG TG", + "CA GC", + "TTTT AA", + "GCA TT", + "GCC TT", + "TAA TG", + "CTA TT", + "GTT TG", + "TGA TG", + "GG CTG", + "CC TCA", + "GA GGA", + "GCC TG", + "AAA TT", + "C GTA", + "TC AAAA", + "TA CAA", + "CA TCA", + "CA GTT", + "TGA GA", + "GG GAA", + "CA CTG", + "CA CAA", + "CA GGA", + "CC CCA", + "CC CTG", + "TTTT TTTT", + "TA GAA", + "GA GCA", + "CC TCC", + "CA CCA", + "TA TCA", + "GA GC", + "CA TTA", + "CACA CACA", + "GA GTG", + "GGA TT", + "TGTG TGTG", + "TA CTT", + "CA CTT", + "GTC TG", + "TGA GG", + "GA GTT", + "GAA TG", + "TCA TG", + "GA CAA", + "GA CTT", + "TATT AA", + "TAA TAA", + "GG CCA", + "CA TTTT", + "CA GCC", + "CC CTT", + "GC TAA", + "TATA TATA", + "GTG TG", + "TA CTG", + "TA GTT", + "CAA TG", + "GC TC", + "CA GTA", + "GC TCC", + "CA TAA", + "TTA TG", + "TAAA TT", + "GA TGA", + "CA TGA", + "GC GG", + "AAAA AAAA", + "CCA TG", + "GA TAA", + "GA CTG", + "TA TGA", + "GCA GG", + "GA TCA", + "G TTTTA", + "GGA TG", + "CC TGA", + "G TAAAA", + "GAA GG", + "GA TTA", + "CC TC", + "GA CCA", + "GC TTA", + "CC CAA", + "AAA TG", + "GCA TG", + "TA GTA", + "TA CCA", + "GG CTT", + "C GTC", + "TC TCTT", + "GG TCA", + "TTA TTA", + "TA CTA", + "TA GCA", + "TA TC", + "CTG GG", + "CA TC", + "C TTTTA", + "C TAAAA", + "GTG GG", + "GA GTA", + "CCA GG", + "GA TTTT", + "TA GTG", + "GAAA TT", + "CA CTA", + "TC GG", + "TCA GG", + "CAGG AA", + "GC AAAA", + "CC TTA", + "CA TCC", + "CTT GG", + "TGTG AA", + "TATT TG", + "CC TAA", + "CTA TG", + "GA GAAA", + "GAGA GAGA", + "GC TTTT", + "TA TAAA", + "CAA GG", + "TC TCTG", + "TGTT AA", + "TGTG TT", + "GA GCC", + "GA CTA", + "TA TATT", + "TAA AAAA", + "TTTT TG", + "GTA TG", + "CATT AA", + "TA GGA", + "TA GC", + "GTT GG", + "GAA GAA", + "TAAA TG", + "TC TGTT", + "CA GAAA", + "CAAA TT", + "TAA TTA", + "TC TGTG", + "TA TCC", + "TGAA TT", + "CTC CA", + "GTG AAA", + "GG CAA", + "GGA GA", + "GAA GA", + "GG TGA", + "GG GCA", + "CC AAAA", + "TCTC TCTC", + "CTG CA", + "CTT CTT", + "TCTT AA", + "CC CTA", + "TGTG TG", + "AAA TA", + "TGTT TG", + "GG GTT", + "GTG CTG", + "GG AAAA", + "GG GGA", + "TCA GA", + "CC TTTT", + "GAAA TG", + "GCA GCA", + "TC TGAA", + "GG GTG", + "CACA TT", + "TCTT TG", + "GG GC", + "TCC CA", + "TC CATT", + "CTG AAA", + "CTT TA", + "TC GA", + "GTT TA", + "CAA CAA", + "CTT CC", + "GCC TCC", + "TT AAA", + "GC TCTG", + "GTT TCA", + "GGA GGA", + "C GTGA", + "CA GTC", + "GAA TA", + "CA GAGA", + "CC CTC", + "CAAA TG", + "CTG CTG", + "GA TCC", + "TTTTA TT", + "AAAA TT", + "TTA TA", + "TCAA TT", + "GG TAA", + "GTTA TT", + "GC CAGG", + "GGA GAA", + "CATT TG", + "TCA CC", + "CTC AAA", + "GG TTA", + "TCC AAA", + "TC TATT", + "GCA GA", + "CTT CA", + "TCA TCA", + "C GAGG", + "TAA CA", + "GTT GTT", + "CTTA TT", + "C GTCA", + "TAA GA", + "TAA TTTT", + "CTG TA", + "TC CACA", + "GC TGTG", + "C GCTG", + "TC TAAA", + "GC GA", + "CAA TA", + "CCA CCA", + "GAA CA", + "C GAAA", + "CAGA TT", + "TCA CA", + "TTA TTTT", + "TC TCAA", + "TGA CA", + "CTCC AA", + "AAAA AAA", + "TATA TG", + "TCC TCC", + "TCA CTT", + "TC CAGG", + "CAA GA", + "GG CTA", + "GTG GTG", + "C GTAA", + "C GAGA", + "TGA TA", + "GGA TTA", + "CAA CA", + "C GATT", + "TGA GAA", + "CTCC TT", + "CTCA TT", + "GTT AAA", + "TCA TA", + "CC TCTG", + "CTC TA", + "GC TGAA", + "CTG GA", + "TAA GG", + "CTT AAA", + "TATT TA", + "CCA CA", + "CC GG", + "GTC AAA", + "TG GAA", + "C GGAA", + "TGA TGA", + "GTT CA", + "TAA CAA", + "GC TGTT", + "TAA GAA", + "CTG CC", + "TTAA TT", + "CCA GA", + "TCA GAA", + "GTCA TT", + "C GCTT", + "GATT AA", + "CTGA TT", + "GC CACA", + "GTAA TT", + "TC CAGA", + "GCC AAA", + "GTGA TT", + "TAAAA TT", + "CAA GAA", + "CCA CC", + "TAA TCC", + "GTT CTT", + "TC CATG", + "GC TCTT", + "TG CTG", + "GG GTA", + "TTA CA", + "GC CATT", + "GCA CA", + "GCAA TT", + "TCC CTG", + "TG TGA", + "TC GAA", + "GGA CA", + "GGAA TT", + "GTG GA", + "CTT CTG", + "TCC CC", + "GCC CC", + "CTT GA", + "TAA TGA", + "TAAA TA", + "TATA TA", + "CTG CAA", + "TCA TTA", + "GTA TA", + "TCC CCA", + "C GTTA", + "GCA GAA", + "TGA GTT", + "CTTTT TT", + "C GATG", + "CTT TCA", + "AAAA TG", + "CAGG TT", + "CTAA TT", + "C GCCA", + "TGAA AAA", + "GTT CC", + "GTCC TT", + "GTCC AA", + "GTTTT TT", + "CTC TGA", + "GC GC", + "GTT GA", + "TGAA TG", + "CTA TA", + "GCA GTG", + "CCTT AA", + "TCA CCA", + "TCA CTG", + "GCC CTG", + "TAA CTT", + "CAGA TG", + "GTA GG", + "TC TATA", + "GAGA TT", + "GTC TA", + "TTTT AAA", + "CACA TG", + "TGA CC", + "CA CAAA", + "GTG TA", + "GG GAGG", + "GCTT TG", + "CAA AAAA", + "GA GGAA", + "GTT CTG", + "TTTT TA", + "GTC TCA", + "GTT CAA", + "TC GTG", + "GCTT AA", + "GCA CC", + "CTCC TG", + "TAAA TAAA", + "CTA CA", + "CTT CCA", + "TCC TCA", + "C GCAA", + "GAA AAAA", + "GCC CA", + "TC GTT", + "GTA GA", + "CTC TCA", + "GTC CA", + "TGA CTT", + "TCC CTT", + "GC CATG", + "CACACACA CACACACA", + "GTGA TG", + "CC TCTT", + "GC CAGA", + "TCC TA", + "C GTTTT", + "GTA CA", + "GCA TA", + "GAA TTA", + "TGTGTGTG TGTGTGTG", + "CC CAGG", + "GG TTTT", + "TCAA AAA", + "TC TATG", + "CCA TA", + "TGA CAA", + "GGA TA", + "TCA GTG", + "GTA TTTT", + "GAGA TG", + "GC GTG", + "C GTCC", + "TTAA AAA", + "TAA TCA", + "CAA TTA", + "CCA CTG", + "CGG TT", + "GTT GAA", + "TGA TTA", + "CCTT TG", + "CGG TG", + "CAGG TG", + "TCAA TG", + "CTGA TG", + "TCA GGA", + "GTT TAA", + "TATT AAA", + "CTC TTA", + "GCA GGA", + "CTC TCC", + "GAA CC", + "CTT TAA", + "GG GCC", + "GTA TTA", + "GC GCC", + "CCAA TT", + "GC TAAA", + "TGA CTG", + "GATT TG", + "GA TAAA", + "TCA GCA", + "GTT CCA", + "GAAA TA", + "GA CAAA", + "GA GTC", + "GC TATT", + "TCA CAA", + "GAGG TT", + "TAA CC", + "GAA GGA", + "GC TCAA", + "GAAAA TT", + "CCA GCA", + "GTTTT AA", + "GTG CC", + "TGA GGA", + "CA TAAA", + "GG TCC", + "TCA TTTT", + "TATT TATT", + "TAA TAAA", + "GCC TA", + "CTTTT AA", + "TAA GTG", + "TAA GTA", + "CTG GAA", + "CACA CA", + "GA CAGA", + "CAA CC", + "GG GAAA", + "CCA GAA", + "TCA GTT", + "TAA CTA", + "CTAA AAA", + "TGGG TT", + "TGA GTG", + "TAAAA TG", + "TATATATA TATATATA", + "GCA CTG", + "GA CTC", + "TA CAAA", + "TAAAA AAA", + "TC TACA", + "GTT GTG", + "TC GCC", + "CC CAAA", + "GTCA TG", + "CTG CTT", + "GGAA TG", + "CTA TTA", + "GA TATT", + "TA GAAA", + "GG CAGG", + "GA TGAA", + "GTA GAA", + "TCC TGA", + "TAA CTG", + "GCTG GG", + "GCAA TG", + "GCC CCA", + "GTT TGA", + "CATT TA", + "GTG CA", + "CTT GAA", + "GTG GAA", + "CTT CAA", + "TAAA TTA", + "GTG GCA", + "TCC TTA", + "GGAA AAA", + "TTTT TTA", + "CC TGTG", + "GTAA TG", + "GTG TTA", + "CTA GG", + "CAGG CTG", + "GA CACA", + "GAAAA AAA", + "TC GC", + "GTAA AAA", + "TGTT TA", + "TCTC TA", + "GTCC TG", + "CCA GGA", + "GAA CAA", + "TAA GTT", + "TGA GCA", + "GC TCCA", + "TAA GCA", + "CTCA TG", + "GTC TTA", + "CC CACA", + "CA TATT", + "GCC TCA", + "CA CTC", + "CTT CTA", + "TGA TTTT", + "TC GCA", + "CC TGTT", + "GAA GCA", + "GCAA AAA", + "GC GGA", + "CCA CAA", + "GC GCA", + "CA TATA", + "GA CATT", + "GTT CTA", + "CAAAA TT", + "GAAA GAAA", + "CC CGG", + "TA CACA", + "CCAA AAA", + "GAGG TG", + "GG CTCA", + "CA GTGA", + "TCC CAA", + "TA TCTT", + "TGA GTA", + "TC GTA", + "TTTT CTT", + "GTG GGA", + "GA GCTG", + "CC CTCC", + "TAGG TT", + "TTA GG", + "TAA TATT", + "CCA GCC", + "CA TCTT", + "GTC TGA", + "GTT TCC", + "CC TGAA", + "GGA GCA", + "GAAAA TG", + "TCA GTA", + "TAA CCA", + "GA TGTT", + "CTG TTA", + "CA TGTT", + "GG CGG", + "CA TGTG", + "GG GAGA", + "CTT TGA", + "TCTT TCTT", + "AAAAAA AAA", + "GGGG TG", + "CTT TCC", + "CTT GTT", + "GCA TTA", + "CC CAGA", + "CAAA TA", + "TC GGA", + "CA GCTT", + "TCA CTA", + "TAA TTAA", + "TAA GGA", + "GAA CTG", + "GCA CAA", + "GC GTT", + "GG CTC", + "TC TTTTA", + "CC TCCA", + "GG CAAA", + "CA GCTG", + "CTA CAA", + "TA CATT", + "GC TATG", + "CTT GTG", + "GA GTCA", + "GTTA TG", + "CTG CCA", + "GTC TCC", + "TGA CCA", + "CA CCTG", + "TATA TTA", + "TGA TCA", + "CA GCAA", + "GA TGTG", + "GTC TTTT", + "CTA GAA", + "GC TACA", + "CTG GGA", + "GGGG TT", + "CAA GTA", + "CAA GGA", + "CC CTCA", + "TA GCC", + "GTT GGA", + "GC TATA", + "TCTG AAA", + "TA TGTT", + "CC CCTT", + "GTT GTA", + "CC CTGA", + "TGA CTA", + "CAA GCA", + "CAA TAA", + "GAA CTT", + "CA TGAA", + "CTTA TG", + "CTAA TG", + "TC TAAAA", + "CCAA TG", + "GAA GTG", + "CC TCAA", + "CC CATT", + "CA GTCA", + "GAGAGAGA GAGAGAGA", + "TA TGTG", + "GCA GTGA", + "TCTCC TT", + "TCC CAAA", + "CCA TTA", + "CCA GTG", + "GCA TCA", + "TCAAA TT", + "GA TCTT", + "GA CAGG", + "GGA GTG", + "GTA GTA", + "CAA CTT", + "GAA GTT", + "CC CCTG", + "TCTC AAA", + "GG GTC", + "GA GCTT", + "TATG AAA", + "TA TGAA", + "GA CATG", + "CAA GTG", + "GA TATA", + "CA TCTG", + "CTG TGA", + "TAA TTTA", + "GG CAGA", + "GC GAA", + "CC TAAA", + "CCA TCA", + "CA CTGA", + "GGA CTA", + "GA CGG", + "CTC TTTT", + "CTG TCA", + "TCTCTCTC TCTCTCTC", + "TTAA TG", + "GCA GCC", + "CAAAA AAA", + "GCA CCA", + "CTA TTTT", + "GA GCAA", + "CTT GGA", + "CTG GTG", + "GAA TAA", + "TCC TTTT", + "GAA GTA", + "CA GTAA", + "CAA CCA", + "CTG TAA", + "TGA TAA", + "GCA GTT", + "CA CGG", + "TAAA TAA", + "CTG TTTT", + "CTA CTA", + "GC TCTA", + "C GAAAA", + "CAA GTT", + "CTT GTA", + "GAA TGA", + "GA GTGA", + "GCC TGA", + "GG TTTG", + "CC CATG", + "GG GGAA", + "GAA GAAA", + "TG TTA", + "CAA TTTT", + "TATA TTTT", + "CTC AAAA", + "GG TGGG", + "CC GTG", + "TATT TCA", + "CC CCAA", + "TATT TAA", + "GG CTGA", + "GG TGTG", + "CA TCAA", + "CA CTCA", + "TCTCA TT", + "GAA TTTT", + "GAA TCA", + "CAGG AAA", + "CA TACA", + "TA TTTTA", + "TTA TAA", + "GAGG AAA", + "CA TATG", + "CTT TCTT", + "CAA CTG", + "GG GCTG", + "CC CCCA", + "TTTG AAA", + "CATT AAA", + "CTT AAAA", + "GA CTGA", + "CAA TGA", + "GG CACA", + "CCA GTA", + "GGA TGA", + "GTTTT TG", + "GCA TTTT", + "GTG CCA", + "GCA GTA", + "GCC CTT", + "TC GTC", + "GAA CTA", + "GTG GTT", + "GTG TGA", + "GTG CTT", + "C GCTA", + "GTG TCA", + "TCTT TA", + "GCC TTA", + "CC TATT", + "CAAAA TG", + "GAA CCA", + "CTC CAGG", + "GA CTCA", + "CATG AAA", + "GC TAGG", + "TGTT AAA", + "GC GTA", + "GCA CTT", + "TCTT AAA", + "TAA GAAA", + "GG CCTG", + "TCC CTA", + "GTG GTA", + "CTG CTA", + "GGA GTT", + "GG TAAA", + "CAAA CAAA", + "GA TATG", + "TCA TGA", + "GA CCTT", + "TAA TATA", + "GC TAGA", + "GGA CTG", + "GG CATT", + "CA GTTA", + "CC CTAA", + "CA CCTT", + "GG TGAA", + "CA GCTA", + "GTG TTTT", + "CAA CTA", + "GA TCAA", + "GA GAAAA", + "TGTG AAA", + "AAAA TA", + "GATG AAA", + "CTC TAA", + "TTA CTT", + "GA TCTG", + "CCA CTT", + "GA GTTA", + "CAA TCA", + "GGATTA CAGG", + "TTTA TTTT", + "TACA TA", + "TTTTA TG", + "GA GTAA", + "GCTG AAA", + "GTA CTG", + "GC TCTC", + "TATG TA", + "TGTG TA", + "TCA TAA", + "GGA CTT", + "TCTCC AA", + "GCA TGA", + "GA CGA", + "CGCC TG", + "GA CCTG", + "GG TCTT", + "CA CCAA", + "GA TC", + "GA CCAA", + "AAAA TTA", + "GTAAA TT", + "CCA GTT", + "CA GAAAA", + "TAA CAAA", + "GG TGTT", + "GAAA TTA", + "TGCC TCA", + "CC GCC", + "CCA TTTT", + "CTT GCC", + "TCTG TA", + "CTG GCA", + "GG GATG", + "CCA TGA", + "CTA CTT", + "TAGG TG", + "TAAAAA TT", + "GAAA GAA", + "TAAAA TA", + "CTTTT TG", + "GTC AAAA", + "GGA CAA", + "TCTGA TT", + "CTC TCTT", + "TAA TTTG", + "CTC TTTG", + "GG CCTT", + "GGA TTTT", + "CTA CTG", + "GTT GCA", + "GG CTCC", + "CTC TGTG", + "CTC CAGCC", + "TTA CAA", + "GGA CCA", + "GGAA GGAA", + "TAAA GAA", + "TTA GAA", + "GTG AAAA", + "CTT GCA", + "TGGG TG", + "GGA GCC", + "CC TCTA", + "C T", + "GG GCTT", + "GG CATG", + "CTG GTT", + "TA CAGA", + "GATT AAA", + "CTC TGTT", + "TTA TCA", + "CTG AAAA", + "GTA GTT", + "GG GTCA", + "G T", + "CA GCCA", + "GC GTC", + "CA CTTA", + "GTG CTA", + "TC TTATT", + "GTA CTT", + "GG TATT", + "TA GAGA", + "TA CATG", + "CCA CTA", + "TGA GAAA", + "CAA TAAA", + "TCC AAAA", + "CGTG AA", + "GG TCTG", + "CTGAA TT", + "TCA GCC", + "CC TCTC", + "GTT AAAA", + "GG GATT", + "TCC TAA", + "CA CTAA", + "GGA GAAA", + "CCTT CCTT", + "GTT TCTT", + "TA TCAA", + "GA TACA", + "TAATCC CAGCA", + "CC GCA", + "TGAAA TT", + "C GTAAA", + "CTC TCTG", + "TC TTTTTT", + "GTA CAA", + "CCAAA TT", + "TGTA TTTT", + "TC GCTT", + "GG GTGA", + "GA TAGA", + "CTT TATT", + "TAAA CAA", + "GTT TATT", + "TGAA TA", + "CTA CCA", + "GTG TCC", + "CC CGA", + "TTTA TTA", + "CTCC AAA", + "TTTTTTTT TTTT", + "TCA TCC", + "GAA GCC", + "CTAAA TT", + "CAAA TTA", + "CCCC AAA", + "TCTT CTT", + "TAGG AAA", + "CA CGA", + "CA TTTTA", + "GTG CAA", + "TCTCC TG", + "TATTTT AA", + "GTT TGTT", + "GA GCCA", + "GG CCAA", + "CATT TCA", + "CA TCCA", + "CC TATA", + "GA CTTA", + "TCAAA TG", + "GTA TCA", + "TAAA TTTT", + "CTGA GGCA", + "GCC CAA", + "GG TTAA", + "TA TCTG", + "TGA CAGA", + "GGA GAGA", + "GCTG CTG", + "CC CTTA", + "TCC TCTG", + "GTA GCA", + "CCTG AAA", + "CC GAA", + "TTTT TAA", + "CTA TAA", + "CCTG TA", + "TTA CTG", + "GTA TAA", + "GG CGA", + "GA CTAA", + "TCA GAAA", + "GTG TGTG", + "CAAA GAA", + "CC TATG", + "GCA GAGA", + "CC GTT", + "TTTTA TTTT", + "GGAA GAA", + "TTA CTA", + "GCC TGGG", + "TCC CTC", + "TCC TCTT", + "GGA TCA", + "GG TCAA", + "TC GAGA", + "TATT CTT", + "TA CTC", + "GTTAA TT", + "GC GAGA", + "CTTAA TT", + "TCC TTTG", + "GTC TAA", + "CA CCCA", + "GG GTTA", + "GG GCAA", + "GGAAA TG", + "GCAAA TT", + "TA GATG", + "GCA GAAA", + "AAAAAAAA AAAAAAAA", + "CC TACA", + "GGA GTA", + "TC TAATT", + "CAA CAAA", + "TA GATT", + "GG TTTA", + "CC TAGA", + "CTT TAAA", + "TA CTTA", + "TAA TGAA", + "CTA TCA", + "TA GTAA", + "CAGA GAA", + "CAA GAAA", + "GGGG AAA", + "CGTT AA", + "CGTG TT", + "TCTG TCTG", + "TTTTAA TT", + "CTG GCC", + "TAAA TGA", + "C GTCAA", + "TTA GTA", + "GTC TCTG", + "TTTT AAAA", + "CA GTTTT", + "CTT CCTT", + "TATA TAA", + "GC TTTTA", + "TTTT TCA", + "GG TC", + "TTA TTAA", + "TTTT GTT", + "CA TAGA", + "TA GGAA", + "GAGA GAA", + "GTA GCTG", + "TTA TGA", + "GTA GTG", + "GGA GAGG", + "CTC TGAA", + "TA GTC", + "GA CTCC", + "TCC CTCC", + "TAA TGTT", + "CA TCTA", + "GCCA CCA", + "GTA CTA", + "TGGG AAA", + "CGCC TT", + "GCC CGG", + "GGA GGAA", + "GTA CCA", + "CGC AAA", + "CA TAAAA", + "TAA CATT", + "GC TAAAA", + "TCTT CTG", + "GCC AAAA", + "GTA TGA", + "GTC TTTG", + "TA CTGA", + "TCC CAGG", + "TTA TTTA", + "TTA GTT", + "GGA CC", + "TA TAAAA", + "CAAA CAA", + "CTT CTC", + "TCTA TCTA", + "GAAA TAA", + "GTG TAA", + "CTT TGTT", + "GA TAAAA", + "GCC CAGG", + "GC GATT", + "AAAAAA TT", + "TA CAGG", + "GG CTAA", + "TA GCTT", + "GTC TCTA", + "CTCC TGA", + "GAA TAAA", + "TTA CCA", + "GG GACA", + "GCCA CTG", + "GTT TAAA", + "GTC TGTG", + "TGA CAAA", + "TACA TTTT", + "GCCA CC", + "TG TTTT", + "TA GCAA", + "TTA TAAA", + "GA CCCA", + "GCA GC", + "CAGA CAGA", + "CA CAAAA", + "GCC CTA", + "TATT AAAA", + "C GTATT", + "CCA TCC", + "TC GATT", + "GAA GGAA", + "GA TCCA", + "TATT TGA", + "GTGAA TT", + "TA CCTT", + "C GTCTT", + "CC TAGG", + "TC GAAA", + "CTT TCTG", + "TGAA GAA", + "TCTC TCA", + "GTC TCTT", + "GGA GGGG", + "GTC TGTT", + "CTA TGA", + "GGAAA TT", + "GCA CACA", + "GCC TTTT", + "CA GTCC", + "CTG GTA", + "GCA TCC", + "TA GTTA", + "GG CTTA", + "GA GTCC", + "TG AAAA", + "TAGA TAGA", + "TGTT TGTT", + "TA CTCA", + "CATT TAA", + "GA TTTTA", + "CA CTCC", + "GAAA CAA", + "GC GCTG", + "TCTT TCA", + "CTG TCC", + "GAA CTCA", + "CGG AAA", + "TATT GTT", + "GCA CTA", + "TATT CAA", + "GC GGGG", + "GTG GCC", + "TAATT AAA", + "TA CTAA", + "GC GGTG", + "TA CCAA", + "GG TATA", + "CTA GTT", + "GCA GAGG", + "CTTTT TTTT", + "TTTTTTTT TTTTTTTT", + "TACA GTA", + "CCA TGTT", + "TA GTGA", + "CGTG TG", + "GC TCTGA", + "CTT CCTG", + "TC GCTG", + "TAAA TCA", + "TCCAA TT", + "GTT TCTG", + "GAA GAGA", + "GG GTAA", + "CCA TAA", + "TTA TATT", + "C GAATT", + "CC GGA", + "TGA GCC", + "CC GTA", + "CAGA GGA", + "GTG TTTG", + "GA CAAAA", + "TTTTTT AAA", + "GTT GCC", + "GA GTTTT", + "TC AAAAAA", + "TGTT TCA", + "TA TCTA", + "TCTC TCC", + "CTC CACA", + "TAAA TATT", + "TTTT CTG", + "CTC TCAA", + "CCTT AAA", + "TCTTTT AA", + "GAA CAAA", + "TTA GCA", + "GCTCA TG", + "TAAA GTA", + "GGA TAA", + "TTATT AAA", + "CTC CATT", + "TCTC TGA", + "TTA TTTG", + "CCTG TAA", + "TTA TATA", + "GA CTTTT", + "TGTT GTT", + "GCAAA TG", + "CTT CAAA", + "GAA TATT", + "GAA TCC", + "CTC TTAA", + "GCA TAA", + "GAA TGAA", + "CTTAA AAA", + "TAAAAA TG", + "TTTTAA AAA", + "CTC TGGG", + "TGA TCC", + "GC TCTCA", + "CTC CAGA", + "GAGTG CAGTG", + "CAA TATT", + "TA GAAAA", + "GTAAA TG", + "TA GCTG", + "GC TCAAA", + "GCA GGAA", + "TA CCTG", + "GG GAAAA", + "TTTT CTA", + "GGGG GGGG", + "CC GA", + "CTT TGAA", + "GGA GGTG", + "TA GTCA", + "GG CCCA", + "TGA TGTT", + "CAAA TAA", + "TCTT CCA", + "GC GCTT", + "GTA TTTG", + "GTC TC", + "GAAA TCA", + "TGA TAAA", + "CATT CTT", + "TA TCCA", + "GCC TCTG", + "TGA GATG", + "C GCCAA", + "GTTTTA TT", + "TATA TATT", + "GTA GGA", + "GACA GAA", + "CTCCAGCC TGGG", + "GC GTGA", + "GG TATG", + "GAGG GAGG", + "TCA TTTG", + "CTA CC", + "TACA GAA", + "GG TAGA", + "GA TCTA", + "GTC CATG", + "TGA GGAA", + "TAA TAAAA", + "TAAA CTT", + "TCA CATT", + "GGA GGCC", + "TCA CAAA", + "CA CTTTT", + "CGG CC", + "CAA CAGA", + "GTA GAGA", + "GTTA TTTT", + "CGTT TG", + "TC GTCA", + "TCTG CTG", + "CAA CACA", + "GG TAGG", + "GCA GCTG", + "TAGTA GAGA", + "CAA GCC", + "GCA TTTG", + "TAA TATG", + "GCTT AAA", + "GCTT CTG", + "CTC TCCA", + "TCA TCTT", + "C GTCTG", + "TCA TTTA", + "CA TAGG", + "GC TCCTT", + "TGTT CTT", + "TACA TTA", + "CACA GAA", + "TAAA TATA", + "TA GAGG", + "GA TAGG", + "TCC TGAA", + "GGA GCTG", + "TGA TATT", + "TCA TTAA", + "CTTTT AAA", + "TC GTTA", + "TAAA CTA", + "GTT TGAA", + "TAAAA TTA", + "CA CCCC", + "TCA GAGA", + "CTCC TGCCTCA", + "TGA CATT", + "GTA TTTA", + "CTT CATT", + "GAAA CTG", + "TAA CACA", + "GTT CAAA", + "GGA GATG", + "TC GGCC", + "CAGCA TT", + "TC GATG", + "TATT CTA", + "CTG TGAA", + "TATT GAA", + "TTTT CCA", + "TATT TCTT", + "GGTG AAA", + "CTGA GAA", + "GCA CAGA", + "GC GAGG", + "CTG TGTG", + "TGAAA TG", + "TGA TGAA", + "GTCC AAA", + "CTCAA TT", + "TCCA GAA", + "GTA TATA", + "TAAA GTT", + "TCTC AAAA", + "TCCA TCA", + "GTC TGAA", + "TGA GAGA", + "TGA TTTG", + "TTA GCC", + "CTC CATG", + "TCC CTGA", + "GA GCTA", + "CCCC CCCC", + "GTG GAAA", + "CTG GGAA", + "CAA TGAA", + "CCA CACA", + "CTT TCAA", + "C GGAGG", + "TC GTGA", + "CCA GAAA", + "GTTTT AAA", + "TGTT GAA", + "TCC TGTG", + "CTAAA TG", + "TCC TTTA", + "GTC TGGG", + "TCTC TTTT", + "TA CGG", + "TATT GTA", + "TTA GTG", + "TTA CC", + "TAATCCCAGCA CTTTG", + "TCTG GAA", + "CTT CTCA", + "CGCA TT", + "TATT TAAA", + "TCA CACA", + "TAA TCAA", + "GC GAAA", + "GG GCCA", + "GTT CATT", + "GAGAA AAA", + "TTTT GTA", + "TA CTTTT", + "TC GAGG", + "GTGAA AAA", + "CAA TATA", + "TCC CATG", + "CAA TTAA", + "CTG GAAA", + "CCCA GCA", + "TCC CATT", + "TCC TGTT", + "CTC TTTA", + "TCC CCTT", + "GTT TCAA", + "GTC CAGG", + "GGAA GGA", + "TA GTTTT", + "TGA CCTT", + "GTGCTG GGATTACAGG", + "TATT TATA", + "TCTG CAA", + "CTGAA AAA", + "TATG TTA", + "CTT CACA", + "GCA CAGG", + "CCTG CTG", + "TTTT TTAA", + "GTTA TTA", + "CC CTTTT", + "TGA TTTA", + "TA CAAAA", + "TAA GTAA", + "TTTT TAAA", + "CA TCTC", + "GTG GTGA", + "GTG GAGA", + "CTC TGCA", + "GTTAA AAA", + "TACA TACA", + "CTT TGTG", + "GGA CACA", + "TCTGA TG", + "TA TTATT", + "TCTT CTA", + "CTG TGTT", + "TCA GCTT", + "CTT TATA", + "GG CGC", + "TCC CTCA", + "GTA CC", + "TGGA GAA", + "CAAAAA TT", + "TCTT TAA", + "CTC TCTC", + "TGA GTGA", + "GCA GCTT", + "CGGA TT", + "TA CGA", + "TCTT GTT", + "TC GTAA", + "GCC TGTG", + "TATT CTG", + "GG GATA", + "GG GTCC", + "TGA GATT", + "CTTTTA TT", + "TCC CACA", + "CATG GTG", + "TTA GGA", + "GAA CACA", + "TCA TAAA", + "CAA CATT", + "GG TCCA", + "GAA TTTG", + "TATTAA TT", + "TCC TGGG", + "GCA GCAA", + "CTC TTCA", + "GAA GAGG", + "TCTG TCA", + "CTGAA TG", + "CCA CAAA", + "GTG GAGG", + "TGA TTAA", + "CTCC CTCC", + "CACACACACACACACA CACACACACACACACA", + "GC GATG", + "CATT CTG", + "GTA GAAA", + "TCA TCAA", + "TTTT CAA", + "TATG TATG", + "CCAAA TG", + "TAA TTTTA", + "TAA GGAA", + "CTT GAAA", + "AAAAAAAA AAAA", + "GC TCCTG", + "GCA GATG", + "GAAAAA TT", + "GA CGC", + "GTG GGGG", + "GTCAA TT", + "CTT GCTT", + "TGA CACA", + "GTG TGTT", + "CCA GAGA", + "CCCA GCC", + "TAAA GAAA", + "GTC CATT", + "TAAA TTAA", + "CC CAAAA", + "GAA TTAA", + "TGAA TTA", + "TTTT TTTG", + "CCA GCTT", + "CAA TTTG", + "CTG TTTG", + "GTC TCAA", + "GTT TGTG", + "GG CATA", + "GG TACA", + "TGA TGTG", + "GATT TCA", + "TCTG CTT", + "GTAA TTA", + "TAA AAAAAA", + "GCC GCC", + "TGTGTGTGTGTGTGTG TGTGTGTGTGTGTGTG", + "GC GTCA", + "GC TCATT", + "GAA CCTG", + "TAAA CAAA", + "GTG CTGA", + "TCA GGAA", + "TCC TCAA", + "TCTA TTTT", + "TCTG TTTT", + "CAGA GCA", + "CCA GGAA", + "GTC TTTA", + "TCTT CAA", + "TCAAAA TT", + "GC TTATT", + "GTT CCTT", + "CA CCTA", + "TCA CTGA", + "GAA GCAA", + "TAAA GA", + "TCC TTCA", + "TCTCA TG", + "TCA GTGA", + "TACA CAA", + "CA CGTG", + "CC TAAAA", + "GCC TTTG", + "GG CTTTT", + "GTT GAAA", + "GTT CTC", + "CTA GA", + "CTA CAAA", + "GCA CAAA", + "TTA CATT", + "GG CCCC", + "TAA TGTG", + "CTG CCTT", + "TCC CAGA", + "GTGAA TG", + "GGA CAGG", + "GGA TGTG", + "GTT TATA", + "TGA CCAA", + "GTG GCTG", + "GTT CTCA", + "CTTA TTTT", + "CTG GAGA", + "TTA CAAA", + "GTC TTCA", + "CAA GAGA", + "CCA TTTG", + "TCA CAGA", + "CTA GTA", + "CA TTATT", + "TTA GA", + "GC TCTCC", + "GC GCCA", + "TATG TTTT", + "TCC TCCA", + "CAGAA AAA", + "GTG GGAA", + "TAA TCTT", + "TGA GTCA", + "CTG CTC", + "GTC TCCA", + "TCA TGTT", + "GTT TCCA", + "TAA GCAA", + "CTAA AAATA", + "TGA CTGA", + "TC GGTT", + "TTA GAAA", + "TAA GCC", + "TAAA GCA", + "CC TCTCC", + "CC TCCTT", + "TCA GATT", + "TATG AAAA", + "GCTGA TG", + "CATA TTTT", + "GC TCCAA", + "CGG CGG", + "CCA CTGA", + "CA GCAAA", + "CTG TCTT", + "CTA GCA", + "TC GGGG", + "CACA GCA", + "GC TGATT", + "CTA GGA", + "TAA CTC", + "TCA TATT", + "CCTT CTT", + "CTG CAAA", + "CC CGC", + "GG TCTA", + "CCCA GGA", + "GTG TCTG", + "TAATAA TAATAA", + "TCA CATG", + "CAA TTTA", + "TATATATATATATATA TATATATATATATATA", + "CCA CAGA", + "TCAA TTTT", + "GTA TTAA", + "GAA CATT", + "TCTC TTA", + "CTA TTTG", + "TCTT TCC", + "GGTT AAA", + "GC TAATT", + "CTG CTGA", + "TA CCTA", + "CAGG GTT", + "TC GCCA", + "CAAAAA TTA", + "CTT CTGA", + "GCA TGTG", + "CTA TTAA", + "GCA CATG", + "CAA CATG", + "TCA TGAA", + "GAA TGTT", + "GG GTTTT", + "CTG CCTG", + "GTC CACA", + "TAAA CA", + "CTC TGGA", + "GA CCCC", + "GG CAAAA", + "TCTG TTA", + "CTA GTG", + "CTA TATA", + "TCA GTCA", + "TAA CTAA", + "GAA GATG", + "GTC TTAA", + "CAA GGAA", + "GTAA AAAA", + "TCC CCTG", + "TC GCAA", + "TCTG CCTG", + "CC TTTTA", + "GTCC CAGCTA", + "TATA TATG", + "TATT GTG", + "TGTG TTTT", + "GC GCAA", + "CACA GTG", + "TAA GATT", + "CTC TGTA", + "GGAGG CTGA", + "GGA CAAA", + "TATTAA AAA", + "TC GTCC", + "TC GGAA", + "CTA TAAA", + "CTT CAGA", + "CTA GAAA", + "CATT CAA", + "CA CGCA", + "CAGGA TT", + "CCA TCTT", + "GTA GCC", + "GAA TTTA", + "CA CGC", + "CAA TCC", + "TGA GCAA", + "GAA GCTG", + "TCAA TTA", + "GAA GTCA", + "CTG CACA", + "CCA CGG", + "GGA TCTT", + "CTCCTGCCTCA GCCTCC", + "TAAA TGAA", + "CC GTC", + "TC GGTG", + "TTTTA TTA", + "GCA GGGG", + "GCA GGTG", + "TCTA TTA", + "TAA CTTA", + "CTAA TTTT", + "CC CGCC", + "TAA TACA", + "GGATT AAA", + "TCTC TCTG", + "GCTT CTT", + "CATT TATT", + "CCA GAGG", + "GGA CAGA", + "GCCAA TT", + "TCC CCAA", + "GTT GATT", + "GAA GAAAA", + "GCA TTTA", + "CTC TAAA", + "CACACACA CACA", + "CC TCAAA", + "TA TAATT", + "CAA TGTT", + "GCC CAGA", + "GTA TATT", + "CTAA AAAA", + "CCA CAGG", + "TAA GAGA", + "TCC TTAA", + "TA TTTTTT", + "GAA TATA", + "GGA TTTG", + "GTG TGAA", + "CTG GCTT", + "GC GGCA", + "TCC GCC", + "GCA TCTT", + "TC TAATA", + "CTG CATT", + "CTC TGCC", + "TCA CTCA", + "TCA GCAA", + "TATTA TG", + "CCA GCTG", + "GA TCTC", + "GCC TCTT", + "CTT CCAA", + "TCC TAAA", + "TCA TCTG", + "CTA TTTA", + "CTG CAGG", + "CAA GCAA", + "GC GGAA", + "GAAA TAAA", + "TAAAA TAA", + "TCA CCTT", + "CCA TGTG", + "GA CCTA", + "CAGA TGA", + "GTG GCTT", + "TTATTA TTATTA", + "TCC CGG", + "TATT TGTT", + "CTG TAAA", + "TCCA TCCA", + "CTG TATA", + "GTT TCTA", + "GTT GCTT", + "CCA TGAA", + "GC TCTTA", + "CTT CATG", + "GTT CCTG", + "GCTG GGA", + "TCA GAGG", + "CATT AAAA", + "TCA GTAA", + "GAA TGTG", + "CTTA TTA", + "GCA CTGA", + "TGA GGTT", + "CA TCAAA", + "CTT CTCC", + "GTT TATG", + "CTT TCCA", + "GTG CCTG", + "GAAA GGA", + "GCA TCTG", + "TA CCCA", + "TAA CAGA", + "AAAAAAAA AAA", + "CTA TGAA", + "CA GTAAA", + "TA GCTA", + "TC GTTTT", + "GTG TCTT", + "GA GCAAA", + "TC TAAAAA", + "GTT CACA", + "GAAA TGA", + "CAAA TGA", + "GCC CTGA", + "GTG TTTA", + "TCA TGTG", + "CATA TTA", + "TCAAAA AAA", + "TAA GTTA", + "TCTC TCTT", + "CCA GTGA", + "CC TCTGA", + "CAA GATG", + "GCC TGTT", + "GTT TGGG", + "CATT CATT", + "GCC CCTG", + "GTT CTGA", + "GC GGCC", + "GC GGTT", + "CAAAA CAAAA", + "TACA TATA", + "GAATT AAA", + "TCAA GAA", + "CTG TATT", + "TTTT TATT", + "GA TTATT", + "TCTAA TG", + "GTT GCTG", + "TGAA TGAA", + "TCA GCTG", + "CTT GATT", + "CAGAA TG", + "CTAA TTA", + "TATAA TG", + "GTTTT GTTTT", + "CCA GCCTG", + "TGA TGGA", + "GCA GATT", + "CTC TATT", + "GCA GTCA", + "TAA GTGA", + "CTA CACA", + "CGCA TG", + "TA GCCA", + "GTG GCTCA", + "CAAA TAAA", + "GTG CTCA", + "TTTT TTTTTT", + "TAA CATG", + "TCCCA GCTA", + "CAAA GTA", + "TCA TATA", + "CAGCA TG", + "TGA TCTT", + "CA TAATT", + "TGTG TTA", + "TTTT GAA", + "TTAA TTA", + "GATA TTA", + "TCA TTCA", + "TGA TATA", + "TGA CTCA", + "GA CGTT", + "TGA CATG", + "GTT GTGA", + "CA TTTTTT", + "GCC TGGA", + "CTA TGTT", + "CTT TGGG", + "GTC TCAAA", + "CTG GCTG", + "CCA CATG", + "GG CGTG", + "CTTAA TG", + "TAA GATG", + "GTA TAAA", + "TGTA TTA", + "TAA CTCA", + "GAGAGAGAGAGAGAGA GAGAGAGAGAGAGAGA", + "GCA TGAA", + "GTTAA TG", + "TCCA GGA", + "GAGA GAAA", + "TCTC TGTG", + "CTC TCTA", + "CCA CCTG", + "GCCA GGA", + "CTG GAGG", + "CCA TTTA", + "GTC TGGA", + "GCC CACA", + "TAGA GAA", + "CAA CTCA", + "GGCA GGA", + "TCTTA TG", + "CAAA GGA", + "GG TAAAA", + "GAGA GGA", + "GTC CAGA", + "GCC CTCA", + "GATA TTTT", + "CAGG GAA", + "CCA CATT", + "GA GGAGG", + "GAAA CTT", + "CA GAATT", + "TCA GATG", + "TATT TCC", + "TACA GTG", + "TGA GCTG", + "CCA TCTG", + "GAGAA TG", + "TCAA CAA", + "A TT", + "TAA CTGA", + "TGA GAGG", + "CA CTGAA", + "CCA CCTT", + "CTG CAGA", + "TCA CCAA", + "TGA GCTT", + "CAAA GCA", + "GG TTTTA", + "CGG GGTT", + "TCCAA AAA", + "TATG TATA", + "CCA GATG", + "TCCA TTTT", + "CTG CTCA", + "GA TAATT", + "CCA CCAA", + "CTCC TCC", + "GA GAATT", + "GAAA GTA", + "TAAAA TAAAA", + "CTT CTTA", + "CTG TTTA", + "GAA TCAA", + "GCA TGTT", + "GCA CGG", + "GA CTGAA", + "GTG CACA", + "GA CGTG", + "TATA CAA", + "TC GACA", + "GAA GACA", + "TAAA GGA", + "GA TCAAA", + "CAGTG TG", + "CTA GCC", + "GAGG AAAA", + "TCTG AAAA", + "GAA CCCA", + "GATG GATG", + "GTT CTTA", + "CTA TATT", + "GCA TTAA", + "TCTCTCTCTCTCTCTC TCTCTCTCTCTCTCTC", + "TCA GTC", + "TATTTT TG", + "GAGGA TT", + "GTA TGTG", + "TAA CCAA", + "GTT GTTTT", + "TTTT TCTT", + "GTG TTAA", + "CTT GGAA", + "AAAAAA TG", + "CAA TGTG", + "GTG CCTT", + "GCC TCAA", + "GA GTCTT", + "GCTAA TTTT", + "CGAA AAA", + "GTG TATA", + "GC GTTA", + "CTGCA CTCCAGCCTGGG", + "GTT CATG", + "CAAA GAAA", + "GCA GTAA", + "GGA TGAA", + "CTT TATG", + "CAGG AAAA", + "TCC TGCA", + "CTG TCTG", + "GAA CATG", + "GGA TGGA", + "GCC TGAA", + "CAAAAA TG", + "TCCAA TG", + "CCA GCAA", + "GG CCTA", + "CAA CTGA", + "GCA CCTG", + "GTC TATT", + "CC TCTCA", + "GTG GTCA", + "GTG TAAA", + "GTA CACA", + "GTAAAA TT", + "GTA CATT", + "TATA TAAA", + "CTG TTAA", + "TAA GTCA", + "GCC TCCA", + "AAATT AAA", + "GTG CAGG", + "TCC TGGA", + "GTG CAAA", + "GC GTCC", + "CCA TTAA", + "GGA GGGA", + "TCA CTTA", + "TCATT AAA", + "CAA CATA", + "TAA TAGA", + "TAA TGTA", + "GA TTTTTT", + "GTT GTCA", + "GGA GACA", + "GTG TGGG", + "TCA CAGG", + "TC GGCA", + "CTCC CTG", + "GA CCAAA", + "TGTT TATT", + "CGAA TG", + "CTCAA TG", + "TCA CCTG", + "CA GTGTT", + "TGA GACA", + "TA GGGG", + "GAAAAA TG", + "GTT GAGA", + "TC GATA", + "CTC GGGAGG", + "GTT GTC", + "CCA GTCA", + "GCC CAGGCTG", + "GAA CAGA", + "GGCTCA CTGCAA", + "GCA GACA", + "TGA GGTG", + "CA CGTT", + "TAA GAAAA", + "CCA GGCA", + "GTA TCTT", + "CTTGG GAGG", + "CTT TCTA", + "CC GCTG", + "GA GCTCA", + "GAGA CAGA", + "CTT CAGG", + "GCA CATT", + "GTA CAAA", + "CTT GTAA", + "GTG GGTG", + "GAA GTGA", + "GG TCTC", + "GTA TGTT", + "GCA CTCA", + "TTA TGTT", + "CAA GTCA", + "CAA GTGA", + "GAAA CTA", + "TAAA TAAAA", + "TCTT AAAA", + "GTT GGAA", + "GTT CTAA", + "CCA CTC", + "CA GTGAA", + "GAAA GG", + "GCA CGA", + "TAA CTTTT", + "GTT GTTA", + "TCA GTTA", + "CGGA TG", + "TATT TGAA", + "CC CTGAA", + "GCC CTC", + "CTT CTAA", + "TTTG TTTT", + "GA GCTGA", + "CTG TGGG", + "CAA GATT", + "GAA GCTT", + "TGA GTAA", + "CTT GCTG", + "GGA TGGG", + "CGTA TG", + "TCCA TTA", + "GTC TGCA", + "GCCA TTTT", + "GTT GTAA", + "CACA CAA", + "GGACTA CAGG", + "C GTTTTA", + "TCTT CC", + "TAA CCTT", + "CTT TAAAA", + "TGAA TTTT", + "CTA CAGA", + "GCAA GAA", + "TAA CAAAA", + "CAATT AAA", + "CCA CTCA", + "CATG GTGAAA", + "CCCA GAA", + "CTA CATT", + "CC GAGG", + "TCCA GTG", + "TGA GTTA", + "GGA GTCA", + "TAA CGA", + "GA GTAAA", + "GA CTCTG", + "GGA GCTT", + "TA CTCC", + "CTG CATG", + "GC TTTTTT", + "GTC TAAA", + "GTG CGG", + "CA TCTCA", + "TGA TCAA", + "GGA GATT", + "GC AAAAAA", + "CA CCAAA", + "TGA CGG", + "CAGA GG", + "GTT GATG", + "CTT GTCA", + "TCCA CCTG", + "GGA GCAA", + "CAA GTAA", + "CCA TAAA", + "GTG CATG", + "GCA TATT", + "GTA GATT", + "GCC TAA", + "CTCAA AAA", + "GGA GAAAA", + "CTA TCC", + "TAATA TTA", + "GTG CTC", + "CAA TATG", + "TGTG GAA", + "TGA CTC", + "GTG TATG", + "TTTTAA TG", + "GC TCTAA", + "CACAA TG", + "CA GCTCA", + "GTT GGTT", + "CTAAAA TT", + "GTC TATG", + "TGTG AAAA", + "CTG GGTT", + "CCCC TCC", + "CC CTCTT", + "GCA GGGA", + "GAAA CCA", + "CATT TCC", + "GCA GCCA", + "TCA TATG", + "GCA GGCA", + "C GTAAAA", + "TGA CCTG", + "CAGA GGTT", + "CTT GTGA", + "TTA TCTT", + "CTG TATG", + "GTCAA TG", + "GGA CGG", + "GC GTAA", + "CAAA CTA", + "TAAA TGTT", + "CTT CGG", + "CTCC CCA", + "TACAA TG", + "TCTG TAA", + "GAA TATG", + "GC GGGA", + "GGA CATT", + "TTA TGAA", + "GGA TGTT", + "GGA CATG", + "TCA GGTG", + "CAA CAAAA", + "GAAA GAGA", + "GTG GATG", + "GG GCTA", + "CCA TCAA", + "CA GCTGA", + "CTC CACC", + "CAA TCAA", + "GTG GTC", + "TGA CAGG", + "CCA TTCA", + "GTCC CTG", + "CAGA CACA", + "GTT GGTG", + "CC TCCTG", + "GAA CTGA", + "TATT CATT", + "GCC CATG", + "CAA TCTT", + "GAAA GCA", + "GAA TCTG", + "TTA TTTTA", + "GTT TGGA", + "TTTT TGTT", + "GGGAA TG", + "GC GACA", + "TAAA CTG", + "CCA TATT", + "GGA TCC", + "CAA GCTT", + "TAAAAAA AAA", + "TCA CTC", + "CA CTGTT", + "TGTTAA TT", + "GGA CTGA", + "GGA GTGA", + "CATA CACA", + "GTT TGTA", + "TCCA GCA", + "GTG CATT", + "GG AAAAAA", + "CCAA GAA", + "TCAA TA", + "CTT CCCA", + "TGA GAAAA", + "GGCC TCCCAAA", + "CAA GCTG", + "GCC CAAA", + "TGA CTTA", + "CA GCCTT", + "CTG GATT", + "TTTT TTTA", + "TCA CGG", + "GCA GTTA", + "TGA CTAA", + "TTA CAGG", + "TGA TATG", + "TAA TTATT", + "TCTT GAA", + "GCC CCTT", + "GTT CAGA", + "CTC TATG", + "CCA TGGA", + "GAGG GAA", + "GGA GGCA", + "CTT TGCA", + "TCTT GG", + "GGA GGTT", + "GCCAA TG", + "CTG GTGA", + "CAA CCAA", + "CCA GTC", + "CTT GAGA", + "TACA GCA", + "CTT GTC", + "GA CGGA", + "CTT CTTTT", + "GTG GC", + "GAGGA TG", + "CAA TAAAA", + "GAAA TTTT", + "AAAA AAAAAA", + "CTC TATA", + "GTA TGAA", + "CTT GTTA", + "TAA CATA", + "CAAA CACA", + "TGATT AAA", + "GCTC TGTT", + "GTG GGTT", + "GTT GGGG", + "GTG TGTA", + "GTAA TTTT", + "GTA TCC", + "TGTGTGTG TGTG", + "TCTT CCTT", + "TCA CTAA", + "TCTCC AAA", + "TA TCAAA", + "TGA TGGG", + "GGA TATT", + "CAAA TTTT", + "GTT CAGG", + "GTG GATT", + "GTG CAGA", + "GCTG CC", + "CTCA GAA", + "GCA GTC", + "GGA TAAA", + "GCC TTCA", + "CCA GGTG", + "TA TCTC", + "CAA TGCA", + "CCCA CTG", + "GTG TATT", + "CGA CAGA", + "TGA GATA", + "CCA GGTT", + "TGTT TAA", + "CATCA TG", + "TGA TTCA", + "GCAA TTA", + "GAAA TGAA", + "CTT GGTT", + "GAA GATT", + "GGA TTAA", + "CC TCATT", + "GGCCA GGCTG", + "GCTA TTA", + "GCCA GCA", + "GAGA CAGG", + "CTT GAGG", + "CA GTCTT", + "GTT CTCC", + "TATT TCAA", + "TGA CGA", + "CATG AAAA", + "CATTA TG", + "TAAA TTTA", + "GA GTGAA", + "CAA CAGG", + "TAA GCTT", + "CACA TTTT", + "GA TCTCA", + "TA GTCC", + "GACC CTG", + "TAA TGCA", + "TAA GTC", + "TAA TAATT", + "GAA GTAA", + "CAA CTC", + "CA TCATT", + "GA CGAA", + "GAAA CAAA", + "TATT TCTG", + "CATTAA TT", + "CCA CCCC", + "TAATA TTTT", + "GTT TAAAA", + "GTA TCTG", + "GTCAA AAA", + "GATG CTG", + "TGTT CTG", + "GG TCAAA", + "GTA GGAA", + "GTA TATG", + "TGA TCTG", + "GGGG CTG", + "GCA TCAA", + "GCCAA AAA", + "CCA CGA", + "GC TAATG", + "CAGA GAAA", + "CCTT CTG", + "TCC TCTA", + "GCA GGTT", + "CTCA CTG", + "TAGA TTA", + "GCC GAGA", + "CCA TCCA", + "CTT TACA", + "GTA CATG", + "GCA CCAA", + "CTT TGTA", + "CTA TGTG", + "TCA CTTTT", + "TGA GTC", + "CAA GAAAA", + "CTGA CTG", + "GTTTT TTTT", + "GCA TAAA", + "TAA TCTG", + "GAA AAAAAA", + "CAGGA TG", + "TGA GCCA", + "GAA TTCA", + "TCA GACA", + "GTT CCAA", + "TCA GGTT", + "CAAA CTG", + "CATT TCTT", + "TGTT AAAA", + "CCA GACA", + "CAA GTTA", + "CATG TTA", + "CATT CTA", + "TCTTTT TG", + "TGA GGGG", + "CACA TTA", + "TAAAA TAAA", + "GCA TATA", + "TGTT CTA", + "GAA GGGG", + "GAGTG TG", + "TAA GACA", + "GAA CTC", + "CCA GTAA", + "GAGA GAGG", + "GC GACC", + "CAA TTCA", + "CGG CTG", + "CCA GATT", + "CCTG GG", + "GGAA GAAA", + "GAGA GG", + "TCAAAA TG", + "CCTCA TG", + "TAAA GG", + "CTT TGGA", + "CCA GGGA", + "GTA CAGA", + "CTGAGGCA GGA", + "TGTT TCTT", + "CCA GGCTG", + "CTGA GG", + "GAGG CTG", + "CTCC TGGG", + "GAA GTC", + "CGA CC", + "GGA CTCA", + "GGA GTC", + "CA CAATT", + "GTG TTCA", + "GA CTAAA", + "GTCA TTA", + "CAAAA TTA", + "TGAA GAAA", + "GCA CCTT", + "GTT TGCA", + "TCC TGCC", + "GTA GATG", + "GCC TGCA", + "GA GTTAA", + "TCC CTTA", + "GTG GTTA", + "TC GGGA", + "TACA TAA", + "TCTC TCCA", + "CA CTAAA", + "TATATATA TATA", + "GTG GCAA", + "CACCA TG", + "TTTG AAAA", + "CACA CTG", + "CTT GGTG", + "TACA CTG", + "CC TCCAA", + "CAA CCTT", + "CA GCCAA", + "TTTT CAAA", + "TGA TAGA", + "TACA CTA", + "TCTG GG", + "TCC CAGCA", + "TAGG AAAA", + "CTT GGGG", + "TC TGTGAA", + "CC TTATT", + "CATT TAAA", + "TTTTA TTTTA", + "GCC CTCC", + "CTGA GCA", + "CC CGTG", + "GTA GTGA", + "TCC TATT", + "GAA GGTG", + "TGTG CTG", + "TCCA CTG", + "TAA TCTA", + "TGA TGTA", + "GTG GTAA", + "TAA TGGA", + "GATG AAAA", + "GTA GTAA", + "GTG GGGA", + "GTG TCAA", + "CAGA CTG", + "TC GAAAA", + "CTCA TTA", + "TAA TAATA", + "CTCA GAAA", + "CA TCCTT", + "CC GCTT", + "GGAA GG", + "CC GTGA", + "CCA CTCC", + "CTA GAGA", + "TAGAA TG", + "GGA TTTA", + "TTAA TTTT", + "GC TAATA", + "TCC CCCA", + "CAAA TATT", + "GA TCATG", + "TCTTAA TT", + "CA GTATT", + "GTCTT GAA", + "CC GAAA", + "CTA TTCA", + "TAA GATA", + "CTT GCAA", + "GCC CCAA", + "TCC CTAA", + "GAA GTTA", + "GA TGATG", + "CTT GATG", + "CC CTAAA", + "CCTG CCTG", + "GACA TTTT", + "CCA GCCA", + "TGTGTGTG TG", + "GTC TATA", + "TCTC TGTT", + "GTC TGTA", + "TA TAATA", + "CTT GTTTT", + "CGC CATT", + "CTCA GCA", + "TACA GTT", + "CAA GAGG", + "GGAA GCA", + "GCC TTTA", + "CC CCATT", + "CAA CGA", + "GTCA TTTT", + "CC CGCA", + "CA GTTAA", + "GAA TCTT", + "CATG TTTT", + "CC GGGG", + "CTA CTGA", + "TCA CGA", + "TAAA TTTG", + "GCC CATT", + "CTC TAGG", + "GGA CCTG", + "TCA GGGA", + "GAGA CTG", + "CC AAAAAA", + "GCC GG", + "CCA GGGG", + "TCA GAAAA", + "CA TCTGA", + "TCTT CAAA", + "CTA CAGG", + "GAGG CAGG", + "CATT GTA", + "TAAA TCAA", + "GA CTCTT", + "CTGA TTA", + "GCA TATG", + "GGA CCTT", + "CAA GACA", + "TATT TATG", + "TATTTT AAA", + "CC GAGA", + "TCA TTTTA", + "CTCA CTCA", + "CCA CCCA", + "CTC TAGA", + "CTA CATG", + "GTG CTTA", + "CAA CCTG", + "TC TGTGTT", + "TAAA TATG", + "CAAA GG", + "CC CTGTT", + "GTT CGG", + "TGA TAAAA", + "CA CGAA", + "GTT GAGG", + "CAGA GTGA", + "GAAA TTAA", + "CACA TA", + "GAA CAGG", + "TCTCC TGA", + "CC TGAGG", + "GGAGG CCAA", + "GTT TACA", + "TAA CAGG", + "TGTG GTG", + "GCCTCC CAAA", + "CCA TCCTG", + "GATT CTT", + "GAA TGGA", + "GTA GTCA", + "CTCC TCTG", + "GAAAGAAA GAAAGAAA", + "CC CTGTG", + "CAGTA TG", + "GC GATA", + "GGA CTC", + "GAAA GA", + "TGTT GG", + "GTA GCTT", + "CA TTTTAA", + "CC CTCTG", + "GCA TTCA", + "CGA TTA", + "TCA CATA", + "TAA TGAAA", + "GGAA TTA", + "CTG TCAA", + "TAAATT AAA", + "CAA GTC", + "GTA TTCA", + "GGCCA TG", + "CTT TAGA", + "TGTT TCC", + "CATG TA", + "GAA TAAAA", + "CAA CTAA", + "TCA TCTA", + "CA CTCTT", + "CA GTTTG", + "CA TAAAAA", + "GCA TGCA", + "GATT TA", + "GAA CCAA", + "TCTG TGA", + "TCA GCCA", + "TCTC CACA", + "TCTCA GCTCA", + "TATCA TG", + "GCA CTTA", + "CGC CAGG", + "CGG GG", + "CATTAA AAA", + "TTTG TTA", + "GGA TATA", + "TC GACC", + "TAA TCCA", + "CC GC", + "CATT GTT", + "CCA GTTA", + "GTA GTTA", + "CTA GGAA", + "CC TAATT", + "TCA TGGG", + "GAA CTAA", + "GCTA TTTT", + "CC GTCA", + "CAGA TTA", + "CCA TATA", + "CAA CTTA", + "TCA GTTTT", + "CTA CCTT", + "GCA CTC", + "GTG TGGA", + "GTG CCAA", + "GACAA TG", + "GA CAATT", + "GTA CCTT", + "TAAA CATT", + "CA GGAGG", + "GTG CGA", + "GAAAA TTA", + "TCTCTT AA", + "CC GATT", + "GA TGATT", + "CCA TGGG", + "TC GGTA", + "CCA TATG", + "CCA GTCC", + "GCC TTAA", + "TGA TCCA", + "GTT GCAA", + "GTA GAGG", + "CAGA TTTT", + "GTA CTTA", + "TCTTTCTT TCTTTCTT", + "GCTC TGTG", + "TCAA TAA", + "GTT TAGA", + "GTT CGA", + "CAA GGTT", + "CTCA TTTT", + "CACA GG", + "CATG CTG", + "GAA CGG", + "TA TAAAAA", + "GAA GGCA", + "GA GCATT", + "TGTT TGTG", + "GCTG TTA", + "GTCA CTG", + "CAAA TGAA", + "GTGA CTG", + "GTT CTTTT", + "CAGGCTG GAGTGCAGTG", + "TGA TGAAA", + "TAA CGG", + "CTA CTAA", + "GACA TTA", + "GGA CGA", + "GAGCA TG", + "GCA TGGG", + "CCA CTTA", + "CTA TCAA", + "GCTG TTTT", + "GTC GTG", + "CCTG GCC", + "TCTC TGAA", + "TGTT GTA", + "CAGC CAGG", + "GTT TAGG", + "CC GCAA", + "GGA GTAA", + "CCAA TTA", + "CAGC AAAA", + "TCA TCCA", + "CA CGTA", + "TCA TAGA", + "TAATT AAAA", + "CA CTTAA", + "TCTT TATT", + "GAGA TTA", + "TAA GAGG", + "CAAA TTAA", + "GA CGCA", + "CA CGGA", + "GTG TGCA", + "TC T", + "TATTA TTA", + "GAAA TATT", + "GGA GTTA", + "TCTT TGA", + "CTGA TTTT", + "TGTGAA TT", + "TCC CACC", + "CC CTTTG", + "CAA GGTG", + "CAGA GTT", + "CCCCA TG", + "CTA CCAA", + "CTCC AAAA", + "CTT CCCC", + "CTG CTAA", + "GATT AAAA", + "GC TTATG", + "CTA CTTA", + "TAAAAAA TT", + "TCA GTCC", + "CTATT AAA", + "GAA TGGG", + "CACA GTA", + "CAA CGG", + "GG TTATT", + "TCA CCCA", + "TGA TGCA", + "TAA TTTTTT", + "GTT TGAGA", + "GTATT AAA", + "GCC CCCA", + "TATA GTA", + "TA GTAAA", + "TGA TACA", + "GTG GTTTT", + "CCA CTAA", + "CACA GAGA", + "CCTCTG CCTCC", + "CAA AAAAAA", + "CTC TCTCC", + "CA TAATA", + "GAA GCCA", + "GTT CCCA", + "TGTG TTTG", + "CAA TGGA", + "TGAA GTA", + "CTT CATA", + "CA CTGTG", + "GC TCTTTT", + "TGA CATA", + "TAAA GAAAA", + "GAGAAA TG", + "CAGG GAGG", + "TGTT CAA", + "GA GCCAA", + "GACA GAGA", + "GG CTGAA", + "CAAA TATA", + "GTG GAAAA", + "TAA GGTT", + "GTGA TTA", + "GGA TCTG", + "GATG TTA", + "GACTA CACA", + "TCC TATA", + "CTG CCAA", + "TCC CGA", + "GTGA TTTT", + "GC GTTTT", + "CAGA GTA", + "GAAA GGAA", + "CA CTTTG", + "CCCC AAAA", + "GCAA CCCA", + "TGCA TTTT", + "TCTA GAA", + "TA CTTTG", + "TGA GGCA", + "CA TCTCC", + "TC GCTA", + "TGA CTTTT", + "GA GCCTG", + "CATT TGTT", + "TCTT TGTT", + "GCAAAA TT", + "CC TGATT", + "GA TAAAAA", + "GA GTGTT", + "TCC TGTA", + "TACA GAAA", + "TC CAGGAA", + "GCCA GTG", + "TAGA TTTT", + "TAA TAGG", + "CTCC TCA", + "CATTTT TG", + "CATT TCAA", + "GCCA TCA", + "TAAAA TATA", + "GA CTGTT", + "GCA TGGA", + "CAAA GTT", + "CA TGATT", + "GA GTTTG", + "CTA GCAA", + "CTT CCTA", + "GG GGAGG", + "CTA TATG", + "TATT TATTTT", + "CA CCATT", + "CC CTCAA", + "TTTTTTTT TTTTTT", + "GA TCATT", + "GTA CATA", + "CTC CATA", + "CCCC GTCTCTA", + "GCC TGCC", + "CTA GCTT", + "CC CGGA", + "GATG TTTT", + "GTA TTTTA", + "TCA GATA", + "CCTG GAA", + "TATT CCA", + "GGA CCAA", + "GCCA TTA", + "CGA CTGA", + "TAA GCTG", + "TAAA CACA", + "GTT TCTC", + "CA TCTTA", + "GAAA TTTG", + "TAA TGGG", + "TAAAA TTTT", + "CTG TTCA", + "CCTG TTA", + "TA CTGAA", + "TGA CCCA", + "TGA TTTTA", + "CTCC TTA", + "TATA GAA", + "CTG CGG", + "GC GGTA", + "GTG CTAA", + "CAGA GGAA", + "TACA TCA", + "TCAA TCAA", + "CTG CAGCC", + "TGAA TATT", + "TCTA CAA", + "CCA CATA", + "CC CGTT", + "TATA CACA", + "TCC TCTC", + "TCTA CTT", + "CC GGAA", + "CTTTT TTA", + "GAAA GAAAA", + "CTA TCTT", + "GA CTTTG", + "TGAA CAA", + "GCA GTTTT", + "GC TAAAAA", + "GAGG CGG", + "TAA TAAAAA", + "CTG GTCA", + "CAGA CAA", + "GGA TATG", + "TGAA GG", + "GCCA GAA", + "CCA GGCC", + "CCA CCATG", + "CAAA CTT", + "TCA TGTA", + "GCTG CTT", + "GTAA TA", + "CCCC CAA", + "CA GCCTG", + "TCAA CTT", + "TAAAA TTAA", + "GCTG AAAA", + "CGA CGA", + "GTG GGCA", + "TGA GGGA", + "CGC TCC", + "TTTT GTTTT", + "GA GTCAA", + "TCA TGCA", + "CTG CTTA", + "TAA GTTTT", + "GTA GCAA", + "CCTT GG", + "TGA CAAAA", + "CTG GTAA", + "TCTT TATA", + "TGTG TGTT", + "CTG GTC", + "CTG GCAA", + "CATT TCTG", + "CTC TACC", + "CTGA GGA", + "CTAAAA TG", + "CTA GATT", + "GTA TCAA", + "CA GTCAA", + "CTG GGTG", + "CC TCTTA", + "TGA GTTTT", + "TTTTA TTTA", + "CC TTTTTT", + "TATA TACA", + "TA GCAAA", + "AAA TTA", + "CTG GATG", + "GA TAATA", + "GA CAAAAA", + "CCTG GGA", + "GCTT TCA", + "GTA CAGG", + "GCTG GAA", + "CTA CTCA", + "CAA TGTA", + "GC GTGAA", + "GA TCCTT", + "TATTAA TG", + "GCC CGA", + "TAAA GTG", + "GCTT CCA", + "CATG GAA", + "TGAA GTT", + "CTT TCTC", + "TCTGTG TG", + "GTA TGTA", + "CAA TACA", + "TCAA GG", + "CC TCTAA", + "TGTG GG", + "GA TCTGA", + "GTA CTGA", + "TTAA TTAA", + "GCA GAAAA", + "CTA CATA", + "CC GGTG", + "GGGG AAAA", + "TACAA AAAA", + "TTTT GG", + "GTGA GAA", + "TCAA TAAA", + "TCAA GTT", + "CTCA GGA", + "CTA CTC", + "CAAA TCA", + "GGCA GAA", + "CC CGAA", + "TGTT GTG", + "GAGC AAAA", + "TATT TGTG", + "GTA GGTT", + "CTA CCTG", + "CA CAAAAA", + "CTCA GG", + "GCTT TA", + "CAGA GCAA", + "CTCA GTG", + "GGAA GAGA", + "TAA CCTG", + "GAAA TATA", + "CGA GAA", + "GTGA GG", + "CATT TATA", + "GGCA GCA", + "TC TAAATT", + "CCCA GTG", + "GCC TAGG", + "TGCA TTA", + "CC GTAA", + "CATT CCA", + "CTA GTTA", + "GA CTTAA", + "CTA TACA", + "GACA CAA", + "TCTT CACA", + "CC GGTT", + "TAAA GTAA", + "CTG TGGA", + "TAA GGTG", + "TCCA GTA", + "CAAA TTTA", + "AAATT AAAA", + "CCA TCTA", + "CTCC CTT", + "CTCC TTTT", + "GAGAGAGA GAGA", + "GGA GATA", + "CCTA TTA", + "CACC AAAA", + "CC GTTA", + "TGTT TATA", + "CTCA GGAGG", + "GA CGTA", + "GTCC TTA", + "GAAA GTT", + "GCTG GTG", + "CTC TACA", + "CAA TAGA", + "TAAAA TATT", + "GTA CCTG", + "GTA CTAA", + "CTT TGAAA", + "CCTT TCC", + "TAAAAA TTA", + "CTC GG", + "CAA GATA", + "CATT TGA", + "CACC TCA", + "GCCA GCC", + "GTC GG", + "GCA CATA", + "CA CTCAA", + "CTTTT AAAA", + "CAGGAA TT", + "GCC TATT", + "TCTT TCTG", + "CTGAGGCA GGAGAA", + "CAGG CAGG", + "CTA GTAA", + "TCCA TA", + "GAA CTTA", + "C G", + "GCTG TGA", + "GAAAA TA", + "TCTT CATT", + "GAGG GAGA", + "CCCA TCC", + "GAGG TGGG", + "GCC TCTA", + "GTA GGTG", + "TAAA CCA", + "GAA GGAAA", + "TATT GG", + "A TG", + "TCCA GTT", + "CCCA CAA", + "GAAA CACA", + "GTC TCAAAA", + "CTTTT CTTTT", + "TGAA GGA", + "TATT GATT", + "CTA TGTA", + "AAAAAAAA AAAAAA", + "TCCTT AAA", + "GC GCTA", + "TCCA CTT", + "GA CTCAA", + "TAAA TACA", + "TCA TGGA", + "TCTG GGA", + "TCC TATG", + "CTG TGCA", + "TCAA GTGA", + "TCA TAAAA", + "CA TCCAA", + "CCTT CCA", + "CTG TACA", + "GAA GGTT", + "CTG TGTA", + "GTCA CTT", + "TCA CAAAA", + "TCA GGCA", + "GTGTT AAA", + "CC CTTAA", + "CAAA GTG", + "GAAA TGTT", + "CTG GGGA", + "GA CGCC", + "TATA TGTG", + "CTA GATG", + "GAAATT AAA", + "GAA TGCA", + "GCA CTAA", + "CGG GAGG", + "GCCA CAA", + "CGC TTA", + "TCCA CAA", + "CAGA TA", + "TC TGAATT", + "TATTA TTTT", + "GC GCGG", + "CTC TGAAA", + "TCTCTT TG", + "TATT TCTA", + "GGGG TGGG", + "GGA TGCA", + "CCA CACC", + "TAAA TGTG", + "TCTT CCTG", + "GCAA GG", + "CTG CTCC", + "CTG GAGTG", + "CTGTT AAA", + "CACA CAAA", + "CTGA CTT", + "GAAAA GAAAA", + "CCTT CTCC", + "GAAA TAAAA", + "CCTCA GGTGA", + "GA TAATG", + "GAATT GCTT", + "CCAAAA TT", + "CGTG AAA", + "CACTG AAA", + "CAGTG AAA", + "GA TCTTA", + "GAGA TGGG", + "TCTG CCA", + "TGA GGTA", + "TATG GAA", + "TATA TTTTA", + "TGAA CTT", + "GCA GATA", + "CTTTT CTT", + "GTAAAA TG", + "TCTC TAA", + "TCTG CAAA", + "GA GCCTT", + "TA TCATT", + "CAA TTTTA", + "CC GCCA", + "TATT TAAAA", + "GAGA GATG", + "GAGA TGGA", + "GCCA GGATG", + "CGA GTAGCTG", + "TTCA TTTT", + "TATA CTT", + "GTC TACA", + "GTGA GTGA", + "GCTA CACA", + "GGGA GGA", + "CAA GGCA", + "GC TTTTAA", + "CA CTATT", + "GTT CATA", + "TCC TC", + "GTG GACA", + "TATT TGGA", + "CTC CAGTA", + "GTT CAGTT", + "CCAA GG", + "CAGA GCC", + "CTC GCC", + "CC GATG", + "GGAA TTTT", + "TCCA GCC", + "CC TCTTTT", + "GAA CCTT", + "CATG CACA", + "GTT TC", + "GAA GATA", + "TA CCCC", + "GCTG CCA", + "GGGG GAGG", + "GCAGTGA GCTGA", + "CTG TCTA", + "CGA GGA", + "CAA TGGG", + "GC TGTGAA", + "GAAA GTG", + "TACC AAAA", + "GTCA GG", + "CAGC TCC", + "TGTG CTT", + "GTC TAGG", + "TTTT TGTA", + "TTA TATG", + "TCA GGGG", + "TATT GTTA", + "CC TGAGA", + "TA TCTCA", + "CAA TCTG", + "CA CTCTG", + "GATT TAA", + "TGAA TAA", + "TCTT GTA", + "TCAA CTG", + "TCTC CAGG", + "CTA GAGG", + "CTGA GAAA", + "CTA GCTG", + "TCCA CCA", + "CGA TTTT", + "CC GGCC", + "GTT GACA", + "CTTA GAA", + "CA TAATG", + "GA GTATT", + "CACA GAAA", + "GA CTGTG", + "CTA TTTTA", + "TGA GGAAA", + "TTATT AAAA", + "CTTA TTTA", + "CAGA CTT", + "CA CGCC", + "GCTT GG", + "CCTG CTT", + "TAAA GCAA", + "CCTC GTGA", + "TA GAATT", + "CTTA CAA", + "TAAA GGAA", + "GTC TAGA", + "GTGA CTT", + "TACA TATG", + "GTCA GGA", + "GCTC CAGG", + "GAA GGGA", + "CA TGATG", + "TCA TCAAA", + "CGTT AAA", + "GTA CTCA", + "CTCC CAA", + "TATA TGTA", + "GGTA TTTT", + "TAA GCCA", + "C GAAATT", + "GTTTG TTTT", + "TCTG TCTT", + "TATA TCA", + "TGTT CATT", + "CAAA CCA", + "TTCA TTA", + "TATT TGTA", + "GATT GAA", + "CTA TAAAA", + "GATTAA TT", + "CCCA CCA", + "TCC TAGG", + "TAAA TGTA", + "CTCTT AAA", + "GCA GTCC", + "GC GGCTG", + "GTC TCGAA", + "TGAA TGA", + "CTG GGGG", + "GTC TCGA", + "GAA CAAAA", + "TGAA TCA", + "TGTATTTT TAGTAGAGA", + "GTTA TTAA", + "TTTTTT AAAA", + "GTCA GTG", + "CCCA TTA", + "CACA GGA", + "TATT CCTT", + "TCTG CCTT", + "CCTG GTG", + "GC GAGC", + "TA CTAAA", + "TACA CAAA", + "CC GTCC", + "GCTT TGTT", + "GCA TCCA", + "CA TCTAA", + "GC TGTGTT", + "GTA GACA", + "GCC TATG", + "TCTT TGTG", + "GATT CTG", + "CGCC CGG", + "GA TGAGA", + "TA TCTGA", + "TGAA TTTG", + "CC TGATG", + "TAAAA CAA", + "CTT TAGG", + "TTTT CCTT", + "TGAA TAAA", + "CGG GGA", + "CAAA CATT", + "GTA TGGA", + "GCTT AAAA", + "TA CCAAA", + "CAAA GAGA", + "CTCC TGCC", + "GTAAAA AAA", + "CACA GCC", + "CCA TGCA", + "TA CAATT", + "CTA GTGA", + "CTGA GTT", + "GAGTG AAA", + "TCTGTT TG", + "CTG TAGG", + "TATAA AAAA", + "GCATT AAA", + "GTC CATA", + "TGTTAA AAA", + "TGTT TGA", + "GAA TAGA", + "CTT CAAAA", + "CTG GACA", + "CTG TAGA", + "CCATT AAA", + "CTA TCTG", + "CACTA TG", + "TTA TCAA", + "TAA GTAAA", + "TAATCCCAGCACTTTG GGAGGCC", + "CCA GAAAA", + "TGAA GCA", + "TCC CTTTT", + "TCA TACA", + "TA CGTT", + "GCC GTG", + "GGAA GTG", + "GG CCAAA", + "GTA CCAA", + "TCTCTA CTAAAAATA", + "CATT GTG", + "TGTG TGA", + "GAAA CAGA", + "CTT GACA", + "GA TGAGG", + "GAGA TTTT", + "CCTT CAA", + "GAA TCTA", + "CTC TCCTT", + "GG CGGA", + "TCTATCTA TCTATCTA", + "CACA CAGA", + "TGTG TGTA", + "CAAA GCC", + "TGTG CCA", + "GTT GAAAA", + "CTC CAGCA", + "TCAA GGA", + "TA GCTCA", + "CGC TGA", + "CCTG AAAA", + "GA CTATT", + "GATT CCA", + "GCTT CTA", + "GTC TGCC", + "CTT GGCA", + "TGTG GTA", + "GCTT TGA", + "GCTC TCTG", + "CTCA CAGA", + "TCTT TAAA", + "CAAA GCAA", + "TA CTTAA", + "GCTT CAA", + "CATT GAA", + "GGA GGAAA", + "CTA TAGA", + "CTGA GGAA", + "CCTG GCA", + "CC CTATT", + "CTC GTG", + "TTA CACA", + "TTA GGAA", + "CTG GTTA", + "GTT GTCC", + "TAATG AAAA", + "TATT TACA", + "GG GAATT", + "GTA GTTTT", + "GCTG CAA", + "CTA CGG", + "GCC GGA", + "CTG GGCA", + "CCTT AAAA", + "GATG GAA", + "TAGATAGA TAGATAGA", + "TATG TAA", + "GTA CGG", + "TATT CAAA", + "GA TCTCC", + "CCTG TTTT", + "TATT GCA", + "GGAAGGAA GGAAGGAA", + "GG TAATT", + "TTA CAGA", + "TCA GC", + "GCAAAA TG", + "GAGA GCA", + "GTA GAAAA", + "CATT TGAA", + "TCTT CTTTT", + "TCC CATA", + "GTTA TTTA", + "CTA TCTA", + "CA TCCTG", + "TCTT GTG", + "TTA TTATT", + "CC CGTC", + "TACTA TG", + "TAAA CATA", + "TAA GGAAA", + "GCTT GTG", + "CTC TAAAA", + "GTTTT AAAA", + "GACA GGA", + "TCC TAGA", + "TCCA CCCA", + "GTT TGAAA", + "CCA TCTCA", + "CTAA GAA", + "GTA TCTA", + "GTGA GGA", + "GCTG GAGG", + "CCTGTAA TCCCAGCTA", + "GCAA CAA", + "CTT TCAAA", + "CAAA TGTT", + "CTT GTCC", + "TCTCAA AAA", + "TATT TATTA", + "TAA GGCA", + "GAGA GGAA", + "TA TGATT", + "GCA TCTA", + "C GTTATT", + "GCC TGTA", + "GTT TCAAA", + "CCTTCCTT CCTTCCTT", + "GG CTTTG", + "GTCA GAA", + "CATG CATG", + "GTCA TTTA", + "CTG GAAAA", + "CTT CGA", + "CCTA TTTT", + "CCAA CAA", + "TCCA TCC", + "TAAA GTTA", + "GTC TCTC", + "TAA TCAAA", + "GATTTT TG", + "GATT TCTT", + "GG GCTGA", + "GCA TGTA", + "CCTG GGTT", + "GAGA CAA", + "GCTG TCA", + "TGA TAGG", + "GGA GACC", + "CC GGCA", + "TAA TCTCA", + "TGAA TTAA", + "TCTG GTG", + "GCC TC", + "GG CGCA", + "CCA GCTA", + "CA GTCTG", + "TGAA CTA", + "GTAA GAA", + "CCTT TCA", + "TCCA TGA", + "CAAA GGAA", + "CTC TC", + "CTC TCTCA", + "CTC CAGC", + "GTA GATA", + "CCCC CTCC", + "GG CGCC", + "TCTG TCC", + "GA CCATT", + "CTT GAAAA", + "TTA TCC", + "TACA TGTG", + "CAAA TTTG", + "TTTT GTG", + "CAGA GTG", + "GTAA TAA", + "GTGA GTG", + "TTTT TCC", + "GG CTCTG", + "GCC CTAA", + "GG CTGTT", + "CC CAATT", + "CAGA GCTT", + "TATAAA TG", + "GA GTCTG", + "TCTTAA AAA", + "GTTTTA TG", + "GA TCCAA", + "GGCC CTG", + "GA TCCTG", + "TCAA GTG", + "GATT CAA", + "CCTC TCTT", + "GAGA CGG", + "CAGA TCA", + "TAAAA GAA", + "CTGA GCAA", + "CCTG CCA", + "CCTT CTA", + "CGC TCA", + "GG CTGTG", + "TGGG AAAA", + "GGA GCCTG", + "CTGA GTG", + "CGTC AAA", + "TCAA GTA", + "CGTAA TT", + "TTA CTTA", + "TATA CTA", + "GG GCAAA", + "CAA CTTTT", + "CTT TGCC", + "GC CAGGAA", + "CACA CTA", + "GCC CAGC", + "TAAATAAA TAAATAAA", + "CTT TCCTT", + "GGGA GAA", + "TATG GTA", + "CGG CCA", + "CCTC TCTG", + "GAAA GCAA", + "CAA GCCA", + "GG CGTT", + "CTC TTTTA", + "TCGGCC TCCCAAA", + "GATT TATT", + "CAA GTCC", + "TA TCTTA", + "GTTCAA GACCA", + "CTCA CACA", + "GAAA TCAA", + "TGA GACC", + "GG GTAAA", + "GCTT GTT", + "GA TTTTAA", + "TTTT TATA", + "CAGA GCTG", + "TC TGTTAA", + "GTAA TTAA", + "TCTT TGAA", + "CTT GCCA", + "TTTT CATT", + "CCA TGTA", + "TCTC GGCTCACTGCAA", + "GGA TTCA", + "TC TATTAA", + "TACA TAAA", + "GATT GATT", + "GGA GAGGA", + "CGC AAAA", + "GGA CTAA", + "TTA TGTG", + "GTCA CTCA", + "GACA GCA", + "CGA GTT", + "GATG GTT", + "GGAA GAGG", + "GCCAA CATGGTGAAA", + "GGA GCCA", + "TGAA CTG", + "CCTC TGTG", + "GTA TAAAA", + "TCC CAGAA", + "CATT TATG", + "GA TTATG", + "TGTT TCTG", + "GAGTG GGTT", + "TACA TATT", + "CTC CAGGA", + "GACA CTG", + "GG TCTCA", + "CC GGGA", + "TGTT TAAA", + "CTCA CCA", + "GGA CTTA", + "GCC CACC", + "CAAA TCAA", + "GAAA TGTG", + "TA GTTAA", + "TCTA TAA", + "TTA GATT", + "GTG TAGG", + "TACTG AAA", + "GCA CCCA", + "GTG GGCTG", + "GAA TGAAA", + "TCTA GTT", + "TCA GGAGA", + "TCCA CTA", + "CTCA GTT", + "TACTT AAA", + "GA CTCCA", + "TCCATT TG", + "CACA GCAA", + "GCTCATG CCTG", + "GGTG CTG", + "GCTT TCTT", + "GTG GCCA", + "TA CGTG", + "GTG CAGTG", + "TGAA GTCA", + "CCTT TAA", + "TCTCAGCTCA CTGCAA", + "GAAA TATG", + "CC TCAAAA", + "GGGG CGG", + "CGA CAA", + "GG TGATG", + "GTCTT AAA", + "CAGAAA TG", + "CGTCA TT", + "CCAA GCA", + "GGA TCAA", + "GTGCTG GGATTA", + "GCTG GCC", + "CGGA GCTT", + "TACA TGA", + "TGTT TGAA", + "TCTC CATT", + "TAA GCAAA", + "CCTT TCTT", + "TA CTGTT", + "TCCA TCTT", + "CTTA CTT", + "CGGA GGTT", + "CAAAA CAA", + "TCA TAGG", + "TTA CTAA", + "CTTA TTTG", + "GAA TGTA", + "CCCCA TGGA", + "TTA CTGA", + "CGG AAAA", + "CTC CAGTG", + "TGTT CCA", + "CAGA TGAA", + "GTT GATA", + "TCC CCCC", + "CATT GCA", + "CTCA GCC", + "CTTA CTG", + "TA TCCTT", + "CTTTTA TG", + "TGAGTA GCTG", + "GACTG AAA", + "CAA TGAAA", + "CGA CTG", + "CTT GGGA", + "GCAA GCA", + "TCA CTCC", + "GATT TGA", + "CATTTT AAA", + "TCAA CTA", + "GTCC AAAA", + "CACC CTG", + "TTA CCTT", + "CAA GGGG", + "TTTT GGA", + "GTTA TTTG", + "GCTA CTG", + "CTGAGGCAGGA GAATG", + "GTGA TGA", + "GTA GTC", + "TAGTA TG", + "GTA TAGA", + "GTG TCTA", + "GCTG CTA", + "TTA GTAA", + "TAAA CATG", + "GTCA CCA", + "CA TCTTTT", + "CATA TAA", + "TCTC TCTA", + "TTTTA TTAA", + "TATT CTAA", + "GAAA TTTA", + "CTT CCCTG", + "TAAA GATG", + "TA CGTA", + "GTT TATTA", + "GAAAA GAA", + "CCCA CCCA", + "CAATT AAAA", + "CC GACA", + "CAAA GTGA", + "CAAA CAAAA", + "GCAA TTTT", + "CGATT AA", + "TTA GAGA", + "CTGA TGA", + "GGA GGAGG", + "GTCC TGGG", + "TCA TGAAA", + "GCAA CCA", + "GTT GGCA", + "GCGG CGG", + "GTCC CCA", + "GTA GGGG", + "GCCA TGTT", + "GTT CGAGA", + "GCC TATA", + "TAAA TTCA", + "GG CCATT", + "GAAAA CAA", + "TGTG TATG", + "GTA CTC", + "TAGG GAA", + "CCTT GAA", + "TC TATTTG", + "GAGG GCA", + "GAAA CTGA", + "TA CGC", + "TA CAAAAA", + "TCA TTATT", + "GGAAAA TT", + "TCAA TATT", + "CC CGTA", + "GGA GAGAA", + "TTA GTTA", + "CTCA GAGA", + "TC GAGC", + "CTA GTCA", + "GATG GCA", + "TGAA CATT", + "CTA TGGG", + "CACA CCA", + "TCAA TTAA", + "GGAA CTG", + "TTA CATG", + "CTT TCATT", + "CAGC TCTG", + "TCTTTT TTTT", + "TAAA TCTT", + "TGA TCTA", + "CATA CAA", + "GC TCAAAA", + "GC TGTGTG", + "TCAA TCA", + "GATT TGAA", + "CCAA GGA", + "GTCC TCA", + "GTG CTCC", + "AAAA TAA", + "GTGA CAA", + "GCTCA CGCCTG", + "CGA CGG", + "TA TCCAA", + "CACA CATG", + "TCTC TCTCC", + "TGTG GTT", + "CTT GGTA", + "TCTG GTT", + "TTTA TAA", + "CTG CTTTT", + "TGTG TCA", + "CACA TCA", + "CC TAATG", + "C GTTTTTT", + "GCTG GCA", + "GA CGTC", + "TATAA TTA", + "TACA GTAA", + "GAAA GTAA", + "GTC TGAAA", + "CCCA TTTT", + "TATA TGA", + "CTT GATA", + "CTT TATTTT", + "CTT TATTA", + "GG CGAA", + "CCA TGCC", + "CCTG CCTT", + "GAAGAA GAAGAA", + "CTGA CTGA", + "GCC CTTA", + "TA TCTAA", + "GTG TTTTA", + "TGTG GCA", + "TATT GTAA", + "GCCA GAAA", + "CCCTG TCTC", + "CACA GGAA", + "AAAA CAA", + "AAAAAAAA AAAAAAA", + "TAA CTCC", + "GCC TAAA", + "CGA GTA", + "TA GTATT", + "GTATTTT TAGTAGAGA", + "GCTG CAGG", + "TATT GAAA", + "CCAGCC TGGG", + "GCTCC AAA", + "TA CGAA", + "GGCC TCC", + "TATA CAAA", + "CATG GCA", + "CATG CAA", + "TACA CCA", + "CTT TACCA", + "TACA GAGA", + "TATT CTTA", + "TATG TCA", + "TCAA GCA", + "TCAA TGA", + "GG CTCTT", + "GGAA GTT", + "TCCA TGTT", + "GCTT TCC", + "TATG TGA", + "GTG TAGA", + "TTTT TAAAA", + "GCTG GAGA", + "GTGA GAGA", + "CCTA GAA", + "CCTCC AAA", + "CCAA TGA", + "CAGG GCA", + "CTA TGCA", + "CTT CACC", + "CTA CAAAA", + "CTCA CC", + "GAGTA TG", + "TA GAAAAA", + "CTTTT GAA", + "TAAA GAGA", + "CATG TCA", + "TCTTTT AAA", + "CACA GTGA", + "GA TCTAA", + "TAA GGTA", + "CATA GAA", + "CGC GCC", + "CAGC TTA", + "TATA GTT", + "CGG GCC", + "TATC CATT", + "TGTTTG TTTT", + "GCTG GCTG", + "TACA GGA", + "CTCC TTTG", + "CAA TCTA", + "CCCC CTG", + "TATA CTG", + "CTGA GCC", + "CGG TTA", + "TGAA GTG", + "GCTT CCTT", + "TTTTA TTTG", + "TA GTGAA", + "CTGA GGTG", + "TCTT CTC", + "GACA GAAA", + "CTGAA CTGAA", + "CCTG GGAA", + "TCC CCAAA", + "TATG TATT", + "GATT TCTG", + "CATT CAAA", + "CACA GTT", + "GCTT GAA", + "GTG GATCA", + "CTGA GTGA", + "TGAA TTTA", + "TCAA CAAA", + "GG TCATT", + "GTAA TTTA", + "GC GACTT", + "CTGA GAGA", + "GTG CCCA", + "CTA GGTT", + "TCC TGAAA", + "GTC CACC", + "TCA CAGAA", + "GC GAAAA", + "GTA TGGG", + "TGAA CAAA", + "TAAA CAAAA", + "CC GTTTT", + "TC TCAATT", + "TCCA GAAA", + "GTAA CAA", + "GCA TTTTA", + "TCTC CATG", + "TTA TAAAA", + "CAGG CAA", + "CTAAAA AAA", + "GTT GGGA", + "TAAA GATT", + "TGAA GAGA", + "CCCC TCA", + "TGTT TATG", + "TCTA CTG", + "CCAA TTTT", + "GGTG GTG", + "GGAA CAA", + "TGTG GGA", + "TCTG CTA", + "GAA CGA", + "GTAA GTA", + "GTT GCCA", + "AAAA TTTT", + "GC GCGA", + "GAAA GATG", + "GTC TCTCA", + "TCCA TCAA", + "GCA GCTA", + "CACA TTTG", + "CTGA CAA", + "TCCA CC", + "GC T", + "CCCA CTT", + "GCA GGTA", + "GAGG CCA", + "TAAA GTCA", + "CTG GATA", + "CGG CAA" + ] + } +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/tokenizer_config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..12cee777f1285b52e37dffd583040cdba7f5a0d3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/tokenizer_config.json @@ -0,0 +1,56 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "cache_dir": null, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "model_max_length": 512, + "pad_token": "[PAD]", + "padding_side": "right", + "sep_token": "[SEP]", + "tokenizer_class": "PreTrainedTokenizerFast", + "trust_remote_code": true, + "unk_token": "[UNK]", + "use_fast": true +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/trainer_state.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d62f175e8c3baab75cead4d3df9e789b8fe7c449 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/trainer_state.json @@ -0,0 +1,372 @@ +{ + "best_metric": 0.8932781986457534, + "best_model_checkpoint": "genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840", + "epoch": 5.0, + "eval_steps": 100, + "global_step": 4840, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "learning_rate": 1.2396694214876034e-05, + "loss": 0.6293, + "step": 100 + }, + { + "epoch": 0.21, + "learning_rate": 2.479338842975207e-05, + "loss": 0.5214, + "step": 200 + }, + { + "epoch": 0.31, + "learning_rate": 2.9621574597651154e-05, + "loss": 0.4632, + "step": 300 + }, + { + "epoch": 0.41, + "learning_rate": 2.896911700739452e-05, + "loss": 0.4305, + "step": 400 + }, + { + "epoch": 0.52, + "learning_rate": 2.8316659417137886e-05, + "loss": 0.4449, + "step": 500 + }, + { + "epoch": 0.62, + "learning_rate": 2.7664201826881252e-05, + "loss": 0.4143, + "step": 600 + }, + { + "epoch": 0.72, + "learning_rate": 2.701174423662462e-05, + "loss": 0.4093, + "step": 700 + }, + { + "epoch": 0.83, + "learning_rate": 2.6359286646367988e-05, + "loss": 0.3943, + "step": 800 + }, + { + "epoch": 0.93, + "learning_rate": 2.5706829056111354e-05, + "loss": 0.3822, + "step": 900 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.8288555928700594, + "eval_f1": 0.8281272063918453, + "eval_loss": 0.3810736835002899, + "eval_matthews_correlation": 0.6624810694750157, + "eval_precision": 0.8339016038859941, + "eval_recall": 0.8286006733009063, + "eval_runtime": 38.3959, + "eval_samples_per_second": 403.272, + "eval_steps_per_second": 12.606, + "step": 968 + }, + { + "epoch": 1.03, + "learning_rate": 2.505437146585472e-05, + "loss": 0.3789, + "step": 1000 + }, + { + "epoch": 1.14, + "learning_rate": 2.4401913875598086e-05, + "loss": 0.3441, + "step": 1100 + }, + { + "epoch": 1.24, + "learning_rate": 2.3749456285341452e-05, + "loss": 0.339, + "step": 1200 + }, + { + "epoch": 1.34, + "learning_rate": 2.309699869508482e-05, + "loss": 0.3471, + "step": 1300 + }, + { + "epoch": 1.45, + "learning_rate": 2.2444541104828188e-05, + "loss": 0.3339, + "step": 1400 + }, + { + "epoch": 1.55, + "learning_rate": 2.1792083514571554e-05, + "loss": 0.3329, + "step": 1500 + }, + { + "epoch": 1.65, + "learning_rate": 2.113962592431492e-05, + "loss": 0.3296, + "step": 1600 + }, + { + "epoch": 1.76, + "learning_rate": 2.0487168334058287e-05, + "loss": 0.3236, + "step": 1700 + }, + { + "epoch": 1.86, + "learning_rate": 1.9834710743801653e-05, + "loss": 0.3212, + "step": 1800 + }, + { + "epoch": 1.96, + "learning_rate": 1.918225315354502e-05, + "loss": 0.3223, + "step": 1900 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8521053991216739, + "eval_f1": 0.8521051005612541, + "eval_loss": 0.3406522572040558, + "eval_matthews_correlation": 0.7042627748459929, + "eval_precision": 0.8521369452783758, + "eval_recall": 0.8521258296553378, + "eval_runtime": 38.3903, + "eval_samples_per_second": 403.331, + "eval_steps_per_second": 12.607, + "step": 1936 + }, + { + "epoch": 2.07, + "learning_rate": 1.8529795563288385e-05, + "loss": 0.2789, + "step": 2000 + }, + { + "epoch": 2.17, + "learning_rate": 1.7877337973031755e-05, + "loss": 0.26, + "step": 2100 + }, + { + "epoch": 2.27, + "learning_rate": 1.722488038277512e-05, + "loss": 0.2586, + "step": 2200 + }, + { + "epoch": 2.38, + "learning_rate": 1.6572422792518487e-05, + "loss": 0.2591, + "step": 2300 + }, + { + "epoch": 2.48, + "learning_rate": 1.5919965202261853e-05, + "loss": 0.256, + "step": 2400 + }, + { + "epoch": 2.58, + "learning_rate": 1.526750761200522e-05, + "loss": 0.2601, + "step": 2500 + }, + { + "epoch": 2.69, + "learning_rate": 1.4615050021748586e-05, + "loss": 0.2436, + "step": 2600 + }, + { + "epoch": 2.79, + "learning_rate": 1.3962592431491953e-05, + "loss": 0.2538, + "step": 2700 + }, + { + "epoch": 2.89, + "learning_rate": 1.3310134841235321e-05, + "loss": 0.2544, + "step": 2800 + }, + { + "epoch": 3.0, + "learning_rate": 1.2657677250978686e-05, + "loss": 0.2476, + "step": 2900 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.8736760526995608, + "eval_f1": 0.8736553931540352, + "eval_loss": 0.309709757566452, + "eval_matthews_correlation": 0.7474416977926268, + "eval_precision": 0.8738035921545504, + "eval_recall": 0.8736381239537026, + "eval_runtime": 38.8875, + "eval_samples_per_second": 398.174, + "eval_steps_per_second": 12.446, + "step": 2904 + }, + { + "epoch": 3.1, + "learning_rate": 1.2005219660722054e-05, + "loss": 0.1833, + "step": 3000 + }, + { + "epoch": 3.2, + "learning_rate": 1.135276207046542e-05, + "loss": 0.1724, + "step": 3100 + }, + { + "epoch": 3.31, + "learning_rate": 1.0700304480208786e-05, + "loss": 0.1752, + "step": 3200 + }, + { + "epoch": 3.41, + "learning_rate": 1.0047846889952154e-05, + "loss": 0.1834, + "step": 3300 + }, + { + "epoch": 3.51, + "learning_rate": 9.39538929969552e-06, + "loss": 0.1786, + "step": 3400 + }, + { + "epoch": 3.62, + "learning_rate": 8.742931709438888e-06, + "loss": 0.1774, + "step": 3500 + }, + { + "epoch": 3.72, + "learning_rate": 8.090474119182252e-06, + "loss": 0.1701, + "step": 3600 + }, + { + "epoch": 3.82, + "learning_rate": 7.43801652892562e-06, + "loss": 0.1724, + "step": 3700 + }, + { + "epoch": 3.93, + "learning_rate": 6.785558938668986e-06, + "loss": 0.1757, + "step": 3800 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8842676311030742, + "eval_f1": 0.8841256535963352, + "eval_loss": 0.328061044216156, + "eval_matthews_correlation": 0.7709701679609638, + "eval_precision": 0.8865456555886244, + "eval_recall": 0.8844274222799268, + "eval_runtime": 38.8256, + "eval_samples_per_second": 398.809, + "eval_steps_per_second": 12.466, + "step": 3872 + }, + { + "epoch": 4.03, + "learning_rate": 6.1331013484123534e-06, + "loss": 0.1582, + "step": 3900 + }, + { + "epoch": 4.13, + "learning_rate": 5.48064375815572e-06, + "loss": 0.1202, + "step": 4000 + }, + { + "epoch": 4.24, + "learning_rate": 4.828186167899087e-06, + "loss": 0.1205, + "step": 4100 + }, + { + "epoch": 4.34, + "learning_rate": 4.175728577642454e-06, + "loss": 0.1181, + "step": 4200 + }, + { + "epoch": 4.44, + "learning_rate": 3.5232709873858202e-06, + "loss": 0.1176, + "step": 4300 + }, + { + "epoch": 4.55, + "learning_rate": 2.870813397129187e-06, + "loss": 0.1107, + "step": 4400 + }, + { + "epoch": 4.65, + "learning_rate": 2.218355806872553e-06, + "loss": 0.1246, + "step": 4500 + }, + { + "epoch": 4.75, + "learning_rate": 1.56589821661592e-06, + "loss": 0.1138, + "step": 4600 + }, + { + "epoch": 4.86, + "learning_rate": 9.134406263592866e-07, + "loss": 0.1114, + "step": 4700 + }, + { + "epoch": 4.96, + "learning_rate": 2.6098303610265335e-07, + "loss": 0.1065, + "step": 4800 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.8933092224231465, + "eval_f1": 0.8932781986457534, + "eval_loss": 0.3555919826030731, + "eval_matthews_correlation": 0.7873588707657256, + "eval_precision": 0.8939641189977475, + "eval_recall": 0.8933949574840743, + "eval_runtime": 38.6536, + "eval_samples_per_second": 400.583, + "eval_steps_per_second": 12.521, + "step": 4840 + } + ], + "logging_steps": 100, + "max_steps": 4840, + "num_train_epochs": 5, + "save_steps": 100, + "total_flos": 1.629617788030464e+17, + "trial_name": null, + "trial_params": null +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/training_args.bin b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..209f54a8f6912df58d8af6397a95bf873109fdc0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ed09620e925a500ce2d3c700b2b5f605d67b0a946bb53717315fe5ab641594 +size 5393 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/results/base5120_human_enhancers_ensembl_lr3e-5_wd0.0_wr0.05_ep5_seed42/eval_results.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/results/base5120_human_enhancers_ensembl_lr3e-5_wd0.0_wr0.05_ep5_seed42/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8ae47f17c3136e62ffab88842c63667f2f318ef2 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/results/base5120_human_enhancers_ensembl_lr3e-5_wd0.0_wr0.05_ep5_seed42/eval_results.json @@ -0,0 +1 @@ +{"eval_loss": 0.35072019696235657, "eval_accuracy": 0.8933161123668066, "eval_f1": 0.8931562374251557, "eval_matthews_correlation": 0.7873247541592554, "eval_precision": 0.8943883205492502, "eval_recall": 0.8929377698410588, "eval_runtime": 42.4373, "eval_samples_per_second": 364.891, "eval_steps_per_second": 11.405, "epoch": 5.0} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4f0427da7d802a654b7134e83949e06e77397301 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/config.json @@ -0,0 +1,37 @@ +{ + "_name_or_path": "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "layer_norm_eps": 1e-12, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.35.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 4096 +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/model.safetensors b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8823758b51c2f82f88c51069316dd0bf9a6f4541 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a0fc27ea3562021af7b9997781dcdd003816af1e85cd852d01e313311dcb9e5 +size 356780956 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/optimizer.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e424650447a92ddc0305c5a926593495329abb4d --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b27be8e908f3196a20c9398ab5e68b421c39b20c670a56ee67f68663cc567a +size 713683595 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/rng_state.pth b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e51d124c03a35aed4c3b8b81ff08fce908ef28b3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b371ea0c2acba839ba1435c53919aa751ee8182b44821138c052f52a05d411 +size 14645 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/scheduler.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b8eec79bd6b25b027b808aa28c23aba23020a59 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8067ed8f3e566cc66acff2a585b587504ac9ed3ca32a343238c065ff17dcca4 +size 1465 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/special_tokens_map.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8b3208c2884c4efb86e49300fdd3dc877220cdf --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "unk_token": "[UNK]" +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/tokenizer.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..d56cdca9d1ef6171ba7303ce90306050af4d592a --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/tokenizer.json @@ -0,0 +1,8340 @@ +{ + "version": "1.0", + "truncation": { + "direction": "Right", + "max_length": 250, + "strategy": "LongestFirst", + "stride": 0 + }, + "padding": { + "strategy": "BatchLongest", + "direction": "Right", + "pad_to_multiple_of": null, + "pad_id": 3, + "pad_type_id": 0, + "pad_token": "[PAD]" + }, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 1 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 2 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "vocab": { + "[UNK]": 0, + "[CLS]": 1, + "[SEP]": 2, + "[PAD]": 3, + "[MASK]": 4, + "A": 5, + "C": 6, + "G": 7, + "T": 8, + "AA": 9, + "TT": 10, + "TG": 11, + "CA": 12, + "CC": 13, + "TA": 14, + "GG": 15, + "TC": 16, + "GA": 17, + "AAA": 18, + "GC": 19, + "TAA": 20, + "TTTT": 21, + "TCA": 22, + "TGA": 23, + "TTA": 24, + "GAA": 25, + "TCC": 26, + "CAA": 27, + "CTG": 28, + "CTT": 29, + "GTG": 30, + "GTT": 31, + "GCA": 32, + "GGA": 33, + "CCA": 34, + "GTA": 35, + "GCC": 36, + "CTA": 37, + "TAAA": 38, + "AAAA": 39, + "CTC": 40, + "GTC": 41, + "TGTG": 42, + "TATT": 43, + "CACA": 44, + "GAAA": 45, + "TATA": 46, + "TCTT": 47, + "TGTT": 48, + "CAAA": 49, + "GAGA": 50, + "CATT": 51, + "TGAA": 52, + "CAGG": 53, + "TCTG": 54, + "CAGA": 55, + "TCAA": 56, + "GGAA": 57, + "TAAAA": 58, + "CTGA": 59, + "GCTT": 60, + "GTGA": 61, + "GCTG": 62, + "CTCA": 63, + "CCTT": 64, + "CATG": 65, + "GCAA": 66, + "GTCA": 67, + "GTAA": 68, + "TTTTA": 69, + "TATG": 70, + "GAGG": 71, + "CGG": 72, + "GATT": 73, + "CCTG": 74, + "TCTC": 75, + "CCAA": 76, + "GTTA": 77, + "CTCC": 78, + "CTAA": 79, + "TACA": 80, + "CTTA": 81, + "TCCA": 82, + "GATG": 83, + "TTAA": 84, + "GAAAA": 85, + "TTTG": 86, + "GTTTT": 87, + "TCTA": 88, + "GCCA": 89, + "GTCC": 90, + "CTTTT": 91, + "GGGG": 92, + "CGA": 93, + "TTTA": 94, + "CCCA": 95, + "CAAAA": 96, + "TGGG": 97, + "TAGA": 98, + "TAGG": 99, + "GACA": 100, + "GGTT": 101, + "CCCC": 102, + "GGTG": 103, + "CATA": 104, + "GCTA": 105, + "TGTA": 106, + "TCAAA": 107, + "TGGA": 108, + "TAATT": 109, + "TTATT": 110, + "TGCA": 111, + "GGCA": 112, + "GATA": 113, + "CCTA": 114, + "TTCA": 115, + "TCTCA": 116, + "GGGA": 117, + "CGC": 118, + "CTGAA": 119, + "GTAAA": 120, + "TCTCC": 121, + "TTTTTT": 122, + "CGTG": 123, + "GCAAA": 124, + "TAAAAA": 125, + "TCTGA": 126, + "TCATT": 127, + "GGAAA": 128, + "TGAAA": 129, + "TCCTT": 130, + "CCAAA": 131, + "GAATT": 132, + "CTAAA": 133, + "CGTT": 134, + "GTGAA": 135, + "GGCC": 136, + "TAATA": 137, + "GGTA": 138, + "TGCC": 139, + "CACC": 140, + "TGATT": 141, + "AAAAAA": 142, + "GCTCA": 143, + "TCCAA": 144, + "GAGAA": 145, + "CTGTT": 146, + "TATTA": 147, + "CAGCA": 148, + "CTCTT": 149, + "CTTAA": 150, + "CAGAA": 151, + "GCTGA": 152, + "GTTAA": 153, + "TCTTA": 154, + "TATTTT": 155, + "GCCAA": 156, + "CTTTG": 157, + "GACC": 158, + "CGCA": 159, + "GTATT": 160, + "GTCTT": 161, + "CAATT": 162, + "GTGTT": 163, + "CTCAA": 164, + "GGAGG": 165, + "CGAA": 166, + "TCTTTT": 167, + "GTCAA": 168, + "CGCC": 169, + "TATAA": 170, + "TACC": 171, + "TCTAA": 172, + "CCATT": 173, + "CGGA": 174, + "CAAAAA": 175, + "CAGTG": 176, + "TCCTG": 177, + "CTCTG": 178, + "GAAAAA": 179, + "CTGTG": 180, + "CAGC": 181, + "TTTTAA": 182, + "GCATT": 183, + "GCCTT": 184, + "TAATG": 185, + "CTATT": 186, + "GTTTG": 187, + "TGATG": 188, + "GGCTG": 189, + "CCTCA": 190, + "GAGGA": 191, + "GCCTG": 192, + "AAATT": 193, + "CGTA": 194, + "TCAAAA": 195, + "TACAA": 196, + "CATCA": 197, + "CAGTT": 198, + "TGAGA": 199, + "GGGAA": 200, + "CACTG": 201, + "CACAA": 202, + "CAGGA": 203, + "CCCCA": 204, + "CCCTG": 205, + "TTTTTTTT": 206, + "TAGAA": 207, + "GAGCA": 208, + "CCTCC": 209, + "CACCA": 210, + "TATCA": 211, + "GAGC": 212, + "CATTA": 213, + "CACACACA": 214, + "GAGTG": 215, + "GGATT": 216, + "TGTGTGTG": 217, + "TACTT": 218, + "CACTT": 219, + "GTCTG": 220, + "TGAGG": 221, + "GAGTT": 222, + "GAATG": 223, + "TCATG": 224, + "GACAA": 225, + "GACTT": 226, + "TATTAA": 227, + "TAATAA": 228, + "GGCCA": 229, + "CATTTT": 230, + "CAGCC": 231, + "CCCTT": 232, + "GCTAA": 233, + "TATATATA": 234, + "GTGTG": 235, + "TACTG": 236, + "TAGTT": 237, + "CAATG": 238, + "GCTC": 239, + "CAGTA": 240, + "GCTCC": 241, + "CATAA": 242, + "TTATG": 243, + "TAAATT": 244, + "GATGA": 245, + "CATGA": 246, + "GCGG": 247, + "AAAAAAAA": 248, + "CCATG": 249, + "GATAA": 250, + "GACTG": 251, + "TATGA": 252, + "GCAGG": 253, + "GATCA": 254, + "GTTTTA": 255, + "GGATG": 256, + "CCTGA": 257, + "GTAAAA": 258, + "GAAGG": 259, + "GATTA": 260, + "CCTC": 261, + "GACCA": 262, + "GCTTA": 263, + "CCCAA": 264, + "AAATG": 265, + "GCATG": 266, + "TAGTA": 267, + "TACCA": 268, + "GGCTT": 269, + "CGTC": 270, + "TCTCTT": 271, + "GGTCA": 272, + "TTATTA": 273, + "TACTA": 274, + "TAGCA": 275, + "TATC": 276, + "CTGGG": 277, + "CATC": 278, + "CTTTTA": 279, + "CTAAAA": 280, + "GTGGG": 281, + "GAGTA": 282, + "CCAGG": 283, + "GATTTT": 284, + "TAGTG": 285, + "GAAATT": 286, + "CACTA": 287, + "TCGG": 288, + "TCAGG": 289, + "CAGGAA": 290, + "GCAAAA": 291, + "CCTTA": 292, + "CATCC": 293, + "CTTGG": 294, + "TGTGAA": 295, + "TATTTG": 296, + "CCTAA": 297, + "CTATG": 298, + "GAGAAA": 299, + "GAGAGAGA": 300, + "GCTTTT": 301, + "TATAAA": 302, + "CAAGG": 303, + "TCTCTG": 304, + "TGTTAA": 305, + "TGTGTT": 306, + "GAGCC": 307, + "GACTA": 308, + "TATATT": 309, + "TAAAAAA": 310, + "TTTTTG": 311, + "GTATG": 312, + "CATTAA": 313, + "TAGGA": 314, + "TAGC": 315, + "GTTGG": 316, + "GAAGAA": 317, + "TAAATG": 318, + "TCTGTT": 319, + "CAGAAA": 320, + "CAAATT": 321, + "TAATTA": 322, + "TCTGTG": 323, + "TATCC": 324, + "TGAATT": 325, + "CTCCA": 326, + "GTGAAA": 327, + "GGCAA": 328, + "GGAGA": 329, + "GAAGA": 330, + "GGTGA": 331, + "GGGCA": 332, + "CCAAAA": 333, + "TCTCTCTC": 334, + "CTGCA": 335, + "CTTCTT": 336, + "TCTTAA": 337, + "CCCTA": 338, + "TGTGTG": 339, + "AAATA": 340, + "TGTTTG": 341, + "GGGTT": 342, + "GTGCTG": 343, + "GGAAAA": 344, + "GGGGA": 345, + "TCAGA": 346, + "CCTTTT": 347, + "GAAATG": 348, + "GCAGCA": 349, + "TCTGAA": 350, + "GGGTG": 351, + "CACATT": 352, + "TCTTTG": 353, + "GGGC": 354, + "TCCCA": 355, + "TCCATT": 356, + "CTGAAA": 357, + "CTTTA": 358, + "TCGA": 359, + "GTTTA": 360, + "CAACAA": 361, + "CTTCC": 362, + "GCCTCC": 363, + "TTAAA": 364, + "GCTCTG": 365, + "GTTTCA": 366, + "GGAGGA": 367, + "CGTGA": 368, + "CAGTC": 369, + "GAATA": 370, + "CAGAGA": 371, + "CCCTC": 372, + "CAAATG": 373, + "CTGCTG": 374, + "GATCC": 375, + "TTTTATT": 376, + "AAAATT": 377, + "TTATA": 378, + "TCAATT": 379, + "GGTAA": 380, + "GTTATT": 381, + "GCCAGG": 382, + "GGAGAA": 383, + "CATTTG": 384, + "TCACC": 385, + "CTCAAA": 386, + "GGTTA": 387, + "TCCAAA": 388, + "TCTATT": 389, + "GCAGA": 390, + "CTTCA": 391, + "TCATCA": 392, + "CGAGG": 393, + "TAACA": 394, + "GTTGTT": 395, + "CTTATT": 396, + "CGTCA": 397, + "TAAGA": 398, + "TAATTTT": 399, + "CTGTA": 400, + "TCCACA": 401, + "GCTGTG": 402, + "CGCTG": 403, + "TCTAAA": 404, + "GCGA": 405, + "CAATA": 406, + "CCACCA": 407, + "GAACA": 408, + "CGAAA": 409, + "CAGATT": 410, + "TCACA": 411, + "TTATTTT": 412, + "TCTCAA": 413, + "TGACA": 414, + "CTCCAA": 415, + "AAAAAAA": 416, + "TATATG": 417, + "TCCTCC": 418, + "TCACTT": 419, + "TCCAGG": 420, + "CAAGA": 421, + "GGCTA": 422, + "GTGGTG": 423, + "CGTAA": 424, + "CGAGA": 425, + "TGATA": 426, + "GGATTA": 427, + "CAACA": 428, + "CGATT": 429, + "TGAGAA": 430, + "CTCCTT": 431, + "CTCATT": 432, + "GTTAAA": 433, + "TCATA": 434, + "CCTCTG": 435, + "CTCTA": 436, + "GCTGAA": 437, + "CTGGA": 438, + "TAAGG": 439, + "CTTAAA": 440, + "TATTTA": 441, + "CCACA": 442, + "CCGG": 443, + "GTCAAA": 444, + "TGGAA": 445, + "CGGAA": 446, + "TGATGA": 447, + "GTTCA": 448, + "TAACAA": 449, + "GCTGTT": 450, + "TAAGAA": 451, + "CTGCC": 452, + "TTAATT": 453, + "CCAGA": 454, + "TCAGAA": 455, + "GTCATT": 456, + "CGCTT": 457, + "GATTAA": 458, + "CTGATT": 459, + "GCCACA": 460, + "GTAATT": 461, + "TCCAGA": 462, + "GCCAAA": 463, + "GTGATT": 464, + "TAAAATT": 465, + "CAAGAA": 466, + "CCACC": 467, + "TAATCC": 468, + "GTTCTT": 469, + "TCCATG": 470, + "GCTCTT": 471, + "TGCTG": 472, + "GGGTA": 473, + "TTACA": 474, + "GCCATT": 475, + "GCACA": 476, + "GCAATT": 477, + "TCCCTG": 478, + "TGTGA": 479, + "TCGAA": 480, + "GGACA": 481, + "GGAATT": 482, + "GTGGA": 483, + "CTTCTG": 484, + "TCCCC": 485, + "GCCCC": 486, + "CTTGA": 487, + "TAATGA": 488, + "TAAATA": 489, + "TATATA": 490, + "CTGCAA": 491, + "TCATTA": 492, + "GTATA": 493, + "TCCCCA": 494, + "CGTTA": 495, + "GCAGAA": 496, + "TGAGTT": 497, + "CTTTTTT": 498, + "CGATG": 499, + "CTTTCA": 500, + "AAAATG": 501, + "CAGGTT": 502, + "CTAATT": 503, + "CGCCA": 504, + "TGAAAAA": 505, + "GTTCC": 506, + "GTCCTT": 507, + "GTCCAA": 508, + "GTTTTTT": 509, + "CTCTGA": 510, + "GCGC": 511, + "GTTGA": 512, + "TGAATG": 513, + "CTATA": 514, + "GCAGTG": 515, + "CCTTAA": 516, + "TCACCA": 517, + "TCACTG": 518, + "GCCCTG": 519, + "TAACTT": 520, + "CAGATG": 521, + "GTAGG": 522, + "TCTATA": 523, + "GAGATT": 524, + "GTCTA": 525, + "TTTTAAA": 526, + "CACATG": 527, + "TGACC": 528, + "CACAAA": 529, + "GTGTA": 530, + "GGGAGG": 531, + "GCTTTG": 532, + "CAAAAAA": 533, + "GAGGAA": 534, + "GTTCTG": 535, + "TTTTTA": 536, + "GTCTCA": 537, + "GTTCAA": 538, + "TCGTG": 539, + "GCTTAA": 540, + "GCACC": 541, + "CTCCTG": 542, + "TAAATAAA": 543, + "CTACA": 544, + "CTTCCA": 545, + "TCCTCA": 546, + "CGCAA": 547, + "GAAAAAA": 548, + "GCCCA": 549, + "TCGTT": 550, + "GTAGA": 551, + "CTCTCA": 552, + "GTCCA": 553, + "TGACTT": 554, + "TCCCTT": 555, + "GCCATG": 556, + "CACACACACACACACA": 557, + "GTGATG": 558, + "CCTCTT": 559, + "GCCAGA": 560, + "TCCTA": 561, + "CGTTTT": 562, + "GTACA": 563, + "GCATA": 564, + "GAATTA": 565, + "TGTGTGTGTGTGTGTG": 566, + "CCCAGG": 567, + "GGTTTT": 568, + "TCAAAAA": 569, + "TCTATG": 570, + "CCATA": 571, + "TGACAA": 572, + "GGATA": 573, + "TCAGTG": 574, + "GTATTTT": 575, + "GAGATG": 576, + "GCGTG": 577, + "CGTCC": 578, + "TTAAAAA": 579, + "TAATCA": 580, + "CAATTA": 581, + "CCACTG": 582, + "CGGTT": 583, + "GTTGAA": 584, + "TGATTA": 585, + "CCTTTG": 586, + "CGGTG": 587, + "CAGGTG": 588, + "TCAATG": 589, + "CTGATG": 590, + "TCAGGA": 591, + "GTTTAA": 592, + "TATTAAA": 593, + "CTCTTA": 594, + "GCAGGA": 595, + "CTCTCC": 596, + "GAACC": 597, + "CTTTAA": 598, + "GGGCC": 599, + "GTATTA": 600, + "GCGCC": 601, + "CCAATT": 602, + "GCTAAA": 603, + "TGACTG": 604, + "GATTTG": 605, + "GATAAA": 606, + "TCAGCA": 607, + "GTTCCA": 608, + "GAAATA": 609, + "GACAAA": 610, + "GAGTC": 611, + "GCTATT": 612, + "TCACAA": 613, + "GAGGTT": 614, + "TAACC": 615, + "GAAGGA": 616, + "GCTCAA": 617, + "GAAAATT": 618, + "CCAGCA": 619, + "GTTTTAA": 620, + "GTGCC": 621, + "TGAGGA": 622, + "CATAAA": 623, + "GGTCC": 624, + "TCATTTT": 625, + "TATTTATT": 626, + "TAATAAA": 627, + "GCCTA": 628, + "CTTTTAA": 629, + "TAAGTG": 630, + "TAAGTA": 631, + "CTGGAA": 632, + "CACACA": 633, + "GACAGA": 634, + "CAACC": 635, + "GGGAAA": 636, + "CCAGAA": 637, + "TCAGTT": 638, + "TAACTA": 639, + "CTAAAAA": 640, + "TGGGTT": 641, + "TGAGTG": 642, + "TAAAATG": 643, + "TATATATATATATATA": 644, + "GCACTG": 645, + "GACTC": 646, + "TACAAA": 647, + "TAAAAAAA": 648, + "TCTACA": 649, + "GTTGTG": 650, + "TCGCC": 651, + "CCCAAA": 652, + "GTCATG": 653, + "CTGCTT": 654, + "GGAATG": 655, + "CTATTA": 656, + "GATATT": 657, + "TAGAAA": 658, + "GGCAGG": 659, + "GATGAA": 660, + "GTAGAA": 661, + "TCCTGA": 662, + "TAACTG": 663, + "GCTGGG": 664, + "GCAATG": 665, + "GCCCCA": 666, + "GTTTGA": 667, + "CATTTA": 668, + "GTGCA": 669, + "CTTGAA": 670, + "GTGGAA": 671, + "CTTCAA": 672, + "TAAATTA": 673, + "GTGGCA": 674, + "TCCTTA": 675, + "GGAAAAA": 676, + "TTTTTTA": 677, + "CCTGTG": 678, + "GTAATG": 679, + "GTGTTA": 680, + "CTAGG": 681, + "CAGGCTG": 682, + "GACACA": 683, + "GAAAAAAA": 684, + "TCGC": 685, + "GTAAAAA": 686, + "TGTTTA": 687, + "TCTCTA": 688, + "GTCCTG": 689, + "CCAGGA": 690, + "GAACAA": 691, + "TAAGTT": 692, + "TGAGCA": 693, + "GCTCCA": 694, + "TAAGCA": 695, + "CTCATG": 696, + "GTCTTA": 697, + "CCCACA": 698, + "CATATT": 699, + "GCCTCA": 700, + "CACTC": 701, + "CTTCTA": 702, + "TGATTTT": 703, + "TCGCA": 704, + "CCTGTT": 705, + "GAAGCA": 706, + "GCAAAAA": 707, + "GCGGA": 708, + "CCACAA": 709, + "GCGCA": 710, + "CATATA": 711, + "GACATT": 712, + "GTTCTA": 713, + "CAAAATT": 714, + "GAAAGAAA": 715, + "CCCGG": 716, + "TACACA": 717, + "CCAAAAA": 718, + "GAGGTG": 719, + "GGCTCA": 720, + "CAGTGA": 721, + "TCCCAA": 722, + "TATCTT": 723, + "TGAGTA": 724, + "TCGTA": 725, + "TTTTCTT": 726, + "GTGGGA": 727, + "GAGCTG": 728, + "CCCTCC": 729, + "TAGGTT": 730, + "TTAGG": 731, + "TAATATT": 732, + "CCAGCC": 733, + "CATCTT": 734, + "GTCTGA": 735, + "GTTTCC": 736, + "CCTGAA": 737, + "GGAGCA": 738, + "GAAAATG": 739, + "TCAGTA": 740, + "TAACCA": 741, + "GATGTT": 742, + "CTGTTA": 743, + "CATGTT": 744, + "GGCGG": 745, + "CATGTG": 746, + "GGGAGA": 747, + "CTTTGA": 748, + "TCTTTCTT": 749, + "AAAAAAAAA": 750, + "GGGGTG": 751, + "CTTTCC": 752, + "CTTGTT": 753, + "GCATTA": 754, + "CCCAGA": 755, + "CAAATA": 756, + "TCGGA": 757, + "CAGCTT": 758, + "TCACTA": 759, + "TAATTAA": 760, + "TAAGGA": 761, + "GAACTG": 762, + "GCACAA": 763, + "GCGTT": 764, + "GGCTC": 765, + "TCTTTTA": 766, + "CCTCCA": 767, + "GGCAAA": 768, + "CAGCTG": 769, + "CTACAA": 770, + "TACATT": 771, + "GCTATG": 772, + "CTTGTG": 773, + "GAGTCA": 774, + "GTTATG": 775, + "CTGCCA": 776, + "GTCTCC": 777, + "TGACCA": 778, + "CACCTG": 779, + "TATATTA": 780, + "TGATCA": 781, + "CAGCAA": 782, + "GATGTG": 783, + "GTCTTTT": 784, + "CTAGAA": 785, + "GCTACA": 786, + "CTGGGA": 787, + "GGGGTT": 788, + "CAAGTA": 789, + "CAAGGA": 790, + "CCCTCA": 791, + "TAGCC": 792, + "GTTGGA": 793, + "GCTATA": 794, + "TCTGAAA": 795, + "TATGTT": 796, + "CCCCTT": 797, + "GTTGTA": 798, + "CCCTGA": 799, + "TGACTA": 800, + "CAAGCA": 801, + "CAATAA": 802, + "GAACTT": 803, + "CATGAA": 804, + "CTTATG": 805, + "CTAATG": 806, + "TCTAAAA": 807, + "CCAATG": 808, + "GAAGTG": 809, + "CCTCAA": 810, + "CCCATT": 811, + "CAGTCA": 812, + "GAGAGAGAGAGAGAGA": 813, + "TATGTG": 814, + "GCAGTGA": 815, + "TCTCCTT": 816, + "TCCCAAA": 817, + "CCATTA": 818, + "CCAGTG": 819, + "GCATCA": 820, + "TCAAATT": 821, + "GATCTT": 822, + "GACAGG": 823, + "GGAGTG": 824, + "GTAGTA": 825, + "CAACTT": 826, + "GAAGTT": 827, + "CCCCTG": 828, + "TCTCAAA": 829, + "GGGTC": 830, + "GAGCTT": 831, + "TATGAAA": 832, + "TATGAA": 833, + "GACATG": 834, + "CAAGTG": 835, + "GATATA": 836, + "CATCTG": 837, + "CTGTGA": 838, + "TAATTTA": 839, + "GGCAGA": 840, + "GCGAA": 841, + "CCTAAA": 842, + "CCATCA": 843, + "CACTGA": 844, + "GGACTA": 845, + "GACGG": 846, + "CTCTTTT": 847, + "CTGTCA": 848, + "TCTCTCTCTCTCTCTC": 849, + "TTAATG": 850, + "GCAGCC": 851, + "CAAAAAAA": 852, + "GCACCA": 853, + "CTATTTT": 854, + "GAGCAA": 855, + "CTTGGA": 856, + "CTGGTG": 857, + "GAATAA": 858, + "TCCTTTT": 859, + "GAAGTA": 860, + "CAGTAA": 861, + "CAACCA": 862, + "CTGTAA": 863, + "TGATAA": 864, + "GCAGTT": 865, + "CACGG": 866, + "TAAATAA": 867, + "CTGTTTT": 868, + "CTACTA": 869, + "GCTCTA": 870, + "CGAAAA": 871, + "CAAGTT": 872, + "CTTGTA": 873, + "GAATGA": 874, + "GAGTGA": 875, + "GCCTGA": 876, + "GGTTTG": 877, + "CCCATG": 878, + "GGGGAA": 879, + "GAAGAAA": 880, + "TGTTA": 881, + "CAATTTT": 882, + "TATATTTT": 883, + "CTCAAAA": 884, + "GGTGGG": 885, + "CCGTG": 886, + "TATTTCA": 887, + "CCCCAA": 888, + "TATTTAA": 889, + "GGCTGA": 890, + "GGTGTG": 891, + "CATCAA": 892, + "CACTCA": 893, + "TCTCATT": 894, + "GAATTTT": 895, + "GAATCA": 896, + "CAGGAAA": 897, + "CATACA": 898, + "TATTTTA": 899, + "TTATAA": 900, + "GAGGAAA": 901, + "CATATG": 902, + "CTTTCTT": 903, + "CAACTG": 904, + "GGGCTG": 905, + "CCCCCA": 906, + "TTTGAAA": 907, + "CATTAAA": 908, + "CTTAAAA": 909, + "GACTGA": 910, + "CAATGA": 911, + "GGCACA": 912, + "CCAGTA": 913, + "GGATGA": 914, + "GTTTTTG": 915, + "GCATTTT": 916, + "GTGCCA": 917, + "GCAGTA": 918, + "GCCCTT": 919, + "TCGTC": 920, + "GAACTA": 921, + "GTGGTT": 922, + "GTGTGA": 923, + "GTGCTT": 924, + "CGCTA": 925, + "GTGTCA": 926, + "TCTTTA": 927, + "GCCTTA": 928, + "CCTATT": 929, + "CAAAATG": 930, + "GAACCA": 931, + "CTCCAGG": 932, + "GACTCA": 933, + "CATGAAA": 934, + "GCTAGG": 935, + "TGTTAAA": 936, + "GCGTA": 937, + "GCACTT": 938, + "TCTTAAA": 939, + "TAAGAAA": 940, + "GGCCTG": 941, + "TCCCTA": 942, + "GTGGTA": 943, + "CTGCTA": 944, + "GGAGTT": 945, + "GGTAAA": 946, + "CAAACAAA": 947, + "GATATG": 948, + "TCATGA": 949, + "GACCTT": 950, + "TAATATA": 951, + "GCTAGA": 952, + "GGACTG": 953, + "GGCATT": 954, + "CAGTTA": 955, + "CCCTAA": 956, + "CACCTT": 957, + "GGTGAA": 958, + "CAGCTA": 959, + "GTGTTTT": 960, + "CAACTA": 961, + "GATCAA": 962, + "GAGAAAA": 963, + "TGTGAAA": 964, + "AAAATA": 965, + "GATGAAA": 966, + "CTCTAA": 967, + "TTACTT": 968, + "GATCTG": 969, + "CCACTT": 970, + "GAGTTA": 971, + "CAATCA": 972, + "GGATTACAGG": 973, + "TTTATTTT": 974, + "TACATA": 975, + "TTTTATG": 976, + "GAGTAA": 977, + "GCTGAAA": 978, + "GTACTG": 979, + "GCTCTC": 980, + "TATGTA": 981, + "TGTGTA": 982, + "TCATAA": 983, + "GGACTT": 984, + "TCTCCAA": 985, + "GCATGA": 986, + "GACGA": 987, + "CGCCTG": 988, + "GACCTG": 989, + "GGTCTT": 990, + "CACCAA": 991, + "GATC": 992, + "GACCAA": 993, + "AAAATTA": 994, + "GTAAATT": 995, + "CCAGTT": 996, + "CAGAAAA": 997, + "TAACAAA": 998, + "GGTGTT": 999, + "GAAATTA": 1000, + "TGCCTCA": 1001, + "CCGCC": 1002, + "CCATTTT": 1003, + "CTTGCC": 1004, + "TCTGTA": 1005, + "CTGGCA": 1006, + "GGGATG": 1007, + "CCATGA": 1008, + "CTACTT": 1009, + "TAGGTG": 1010, + "TAAAAATT": 1011, + "GAAAGAA": 1012, + "TAAAATA": 1013, + "CTTTTTG": 1014, + "GTCAAAA": 1015, + "GGACAA": 1016, + "TCTGATT": 1017, + "CTCTCTT": 1018, + "TAATTTG": 1019, + "CTCTTTG": 1020, + "GGCCTT": 1021, + "GGATTTT": 1022, + "CTACTG": 1023, + "GTTGCA": 1024, + "GGCTCC": 1025, + "CTCTGTG": 1026, + "CTCCAGCC": 1027, + "TTACAA": 1028, + "GGACCA": 1029, + "GGAAGGAA": 1030, + "TAAAGAA": 1031, + "TTAGAA": 1032, + "GTGAAAA": 1033, + "CTTGCA": 1034, + "TGGGTG": 1035, + "GGAGCC": 1036, + "CCTCTA": 1037, + "CT": 1038, + "GGGCTT": 1039, + "GGCATG": 1040, + "CTGGTT": 1041, + "TACAGA": 1042, + "GATTAAA": 1043, + "CTCTGTT": 1044, + "TTATCA": 1045, + "CTGAAAA": 1046, + "GTAGTT": 1047, + "GGGTCA": 1048, + "GT": 1049, + "CAGCCA": 1050, + "GCGTC": 1051, + "CACTTA": 1052, + "GTGCTA": 1053, + "TCTTATT": 1054, + "GTACTT": 1055, + "GGTATT": 1056, + "TAGAGA": 1057, + "TACATG": 1058, + "CCACTA": 1059, + "TGAGAAA": 1060, + "CAATAAA": 1061, + "TCCAAAA": 1062, + "CGTGAA": 1063, + "GGTCTG": 1064, + "CTGAATT": 1065, + "TCAGCC": 1066, + "CCTCTC": 1067, + "GTTAAAA": 1068, + "GGGATT": 1069, + "TCCTAA": 1070, + "CACTAA": 1071, + "GGAGAAA": 1072, + "CCTTCCTT": 1073, + "GTTTCTT": 1074, + "TATCAA": 1075, + "GATACA": 1076, + "TAATCCCAGCA": 1077, + "CCGCA": 1078, + "TGAAATT": 1079, + "CGTAAA": 1080, + "CTCTCTG": 1081, + "TCTTTTTT": 1082, + "GTACAA": 1083, + "CCAAATT": 1084, + "TGTATTTT": 1085, + "TCGCTT": 1086, + "GGGTGA": 1087, + "GATAGA": 1088, + "CTTTATT": 1089, + "TAAACAA": 1090, + "GTTTATT": 1091, + "TGAATA": 1092, + "CTACCA": 1093, + "GTGTCC": 1094, + "CCCGA": 1095, + "TTTATTA": 1096, + "CTCCAAA": 1097, + "TTTTTTTTTTTT": 1098, + "TCATCC": 1099, + "GAAGCC": 1100, + "CTAAATT": 1101, + "CAAATTA": 1102, + "CCCCAAA": 1103, + "TCTTCTT": 1104, + "TAGGAAA": 1105, + "CACGA": 1106, + "CATTTTA": 1107, + "GTGCAA": 1108, + "TCTCCTG": 1109, + "TATTTTAA": 1110, + "GTTTGTT": 1111, + "GAGCCA": 1112, + "GGCCAA": 1113, + "CATTTCA": 1114, + "CATCCA": 1115, + "CCTATA": 1116, + "GACTTA": 1117, + "TCAAATG": 1118, + "GTATCA": 1119, + "TAAATTTT": 1120, + "CTGAGGCA": 1121, + "GCCCAA": 1122, + "GGTTAA": 1123, + "TATCTG": 1124, + "TGACAGA": 1125, + "GGAGAGA": 1126, + "GCTGCTG": 1127, + "CCCTTA": 1128, + "TCCTCTG": 1129, + "GTAGCA": 1130, + "CCTGAAA": 1131, + "CCGAA": 1132, + "TTTTTAA": 1133, + "CTATAA": 1134, + "CCTGTA": 1135, + "TTACTG": 1136, + "GTATAA": 1137, + "GGCGA": 1138, + "GACTAA": 1139, + "TCAGAAA": 1140, + "GTGTGTG": 1141, + "CAAAGAA": 1142, + "CCTATG": 1143, + "GCAGAGA": 1144, + "CCGTT": 1145, + "TTTTATTTT": 1146, + "GGAAGAA": 1147, + "TTACTA": 1148, + "GCCTGGG": 1149, + "TCCCTC": 1150, + "TCCTCTT": 1151, + "GGATCA": 1152, + "GGTCAA": 1153, + "TCGAGA": 1154, + "TATTCTT": 1155, + "TACTC": 1156, + "GTTAATT": 1157, + "GCGAGA": 1158, + "CTTAATT": 1159, + "TCCTTTG": 1160, + "GTCTAA": 1161, + "CACCCA": 1162, + "GGGTTA": 1163, + "GGGCAA": 1164, + "GGAAATG": 1165, + "GCAAATT": 1166, + "TAGATG": 1167, + "GCAGAAA": 1168, + "AAAAAAAAAAAAAAAA": 1169, + "CCTACA": 1170, + "GGAGTA": 1171, + "TCTAATT": 1172, + "CAACAAA": 1173, + "TAGATT": 1174, + "GGTTTA": 1175, + "CCTAGA": 1176, + "CTTTAAA": 1177, + "TACTTA": 1178, + "TAATGAA": 1179, + "CTATCA": 1180, + "TAGTAA": 1181, + "CAGAGAA": 1182, + "CAAGAAA": 1183, + "GGGGAAA": 1184, + "CGTTAA": 1185, + "CGTGTT": 1186, + "TCTGTCTG": 1187, + "TTTTAATT": 1188, + "CTGGCC": 1189, + "TAAATGA": 1190, + "CGTCAA": 1191, + "TTAGTA": 1192, + "GTCTCTG": 1193, + "TTTTAAAA": 1194, + "CAGTTTT": 1195, + "CTTCCTT": 1196, + "TATATAA": 1197, + "GCTTTTA": 1198, + "TTTTTCA": 1199, + "GGTC": 1200, + "TTATTAA": 1201, + "TTTTGTT": 1202, + "CATAGA": 1203, + "TAGGAA": 1204, + "GAGAGAA": 1205, + "GTAGCTG": 1206, + "TTATGA": 1207, + "GTAGTG": 1208, + "GGAGAGG": 1209, + "CTCTGAA": 1210, + "TAGTC": 1211, + "GACTCC": 1212, + "TCCCTCC": 1213, + "TAATGTT": 1214, + "CATCTA": 1215, + "GCCACCA": 1216, + "GTACTA": 1217, + "TGGGAAA": 1218, + "CGCCTT": 1219, + "GCCCGG": 1220, + "GGAGGAA": 1221, + "GTACCA": 1222, + "CGCAAA": 1223, + "CATAAAA": 1224, + "TAACATT": 1225, + "GCTAAAA": 1226, + "TCTTCTG": 1227, + "GCCAAAA": 1228, + "GTATGA": 1229, + "GTCTTTG": 1230, + "TACTGA": 1231, + "TCCCAGG": 1232, + "TTATTTA": 1233, + "TTAGTT": 1234, + "GGACC": 1235, + "TATAAAA": 1236, + "CAAACAA": 1237, + "CTTCTC": 1238, + "TCTATCTA": 1239, + "GAAATAA": 1240, + "GTGTAA": 1241, + "CTTTGTT": 1242, + "GATAAAA": 1243, + "GCCCAGG": 1244, + "GCGATT": 1245, + "AAAAAATT": 1246, + "TACAGG": 1247, + "GGCTAA": 1248, + "TAGCTT": 1249, + "GTCTCTA": 1250, + "CTCCTGA": 1251, + "GAATAAA": 1252, + "TTACCA": 1253, + "GGGACA": 1254, + "GCCACTG": 1255, + "GTTTAAA": 1256, + "GTCTGTG": 1257, + "TGACAAA": 1258, + "TACATTTT": 1259, + "GCCACC": 1260, + "TGTTTT": 1261, + "TAGCAA": 1262, + "TTATAAA": 1263, + "GACCCA": 1264, + "GCAGC": 1265, + "CAGACAGA": 1266, + "CACAAAA": 1267, + "GCCCTA": 1268, + "TATTAAAA": 1269, + "CGTATT": 1270, + "CCATCC": 1271, + "TCGATT": 1272, + "GAAGGAA": 1273, + "GATCCA": 1274, + "TATTTGA": 1275, + "GTGAATT": 1276, + "TACCTT": 1277, + "CGTCTT": 1278, + "CCTAGG": 1279, + "TCGAAA": 1280, + "CTTTCTG": 1281, + "TGAAGAA": 1282, + "TCTCTCA": 1283, + "GTCTCTT": 1284, + "GGAGGGG": 1285, + "GTCTGTT": 1286, + "CTATGA": 1287, + "GGAAATT": 1288, + "GCACACA": 1289, + "GCCTTTT": 1290, + "CAGTCC": 1291, + "CTGGTA": 1292, + "GCATCC": 1293, + "TAGTTA": 1294, + "GGCTTA": 1295, + "GAGTCC": 1296, + "TGAAAA": 1297, + "TAGATAGA": 1298, + "TGTTTGTT": 1299, + "TACTCA": 1300, + "CATTTAA": 1301, + "GATTTTA": 1302, + "CACTCC": 1303, + "GAAACAA": 1304, + "GCGCTG": 1305, + "TCTTTCA": 1306, + "CTGTCC": 1307, + "GAACTCA": 1308, + "CGGAAA": 1309, + "TATTGTT": 1310, + "GCACTA": 1311, + "TATTCAA": 1312, + "GCGGGG": 1313, + "GTGGCC": 1314, + "TAATTAAA": 1315, + "TACTAA": 1316, + "GCGGTG": 1317, + "TACCAA": 1318, + "GGTATA": 1319, + "CTAGTT": 1320, + "GCAGAGG": 1321, + "CTTTTTTTT": 1322, + "TTTTTTTTTTTTTTTT": 1323, + "TACAGTA": 1324, + "CCATGTT": 1325, + "TAGTGA": 1326, + "CGTGTG": 1327, + "GCTCTGA": 1328, + "CTTCCTG": 1329, + "TCGCTG": 1330, + "TAAATCA": 1331, + "TCCAATT": 1332, + "GTTTCTG": 1333, + "GAAGAGA": 1334, + "GGGTAA": 1335, + "CCATAA": 1336, + "TTATATT": 1337, + "CGAATT": 1338, + "CCGGA": 1339, + "TGAGCC": 1340, + "CCGTA": 1341, + "CAGAGGA": 1342, + "GTGTTTG": 1343, + "GACAAAA": 1344, + "TTTTTTAAA": 1345, + "GTTGCC": 1346, + "GAGTTTT": 1347, + "TCAAAAAA": 1348, + "TGTTTCA": 1349, + "TATCTA": 1350, + "TCTCTCC": 1351, + "CTCCACA": 1352, + "TAAATATT": 1353, + "TTTTCTG": 1354, + "CTCTCAA": 1355, + "CCTTAAA": 1356, + "TCTTTTAA": 1357, + "GAACAAA": 1358, + "TTAGCA": 1359, + "GCTCATG": 1360, + "TAAAGTA": 1361, + "GGATAA": 1362, + "TTATTAAA": 1363, + "CTCCATT": 1364, + "TCTCTGA": 1365, + "TTATTTG": 1366, + "CCTGTAA": 1367, + "TTATATA": 1368, + "GACTTTT": 1369, + "TGTTGTT": 1370, + "GCAAATG": 1371, + "CTTCAAA": 1372, + "GAATATT": 1373, + "GAATCC": 1374, + "CTCTTAA": 1375, + "GCATAA": 1376, + "GAATGAA": 1377, + "CTTAAAAA": 1378, + "TAAAAATG": 1379, + "TTTTAAAAA": 1380, + "CTCTGGG": 1381, + "TGATCC": 1382, + "GCTCTCA": 1383, + "CTCCAGA": 1384, + "GAGTGCAGTG": 1385, + "CAATATT": 1386, + "TAGAAAA": 1387, + "GTAAATG": 1388, + "TAGCTG": 1389, + "GCTCAAA": 1390, + "GCAGGAA": 1391, + "TACCTG": 1392, + "GGGAAAA": 1393, + "TTTTCTA": 1394, + "GGGGGGGG": 1395, + "CCGA": 1396, + "CTTTGAA": 1397, + "GGAGGTG": 1398, + "TAGTCA": 1399, + "GGCCCA": 1400, + "TGATGTT": 1401, + "CAAATAA": 1402, + "TCTTCCA": 1403, + "GCGCTT": 1404, + "GTATTTG": 1405, + "GTCTC": 1406, + "GAAATCA": 1407, + "TGATAAA": 1408, + "CATTCTT": 1409, + "TATCCA": 1410, + "GCCTCTG": 1411, + "TGAGATG": 1412, + "CGCCAA": 1413, + "GTTTTATT": 1414, + "TATATATT": 1415, + "GTAGGA": 1416, + "GACAGAA": 1417, + "CTCCAGCCTGGG": 1418, + "GCGTGA": 1419, + "GGTATG": 1420, + "GAGGGAGG": 1421, + "TCATTTG": 1422, + "CTACC": 1423, + "TACAGAA": 1424, + "GGTAGA": 1425, + "GATCTA": 1426, + "GTCCATG": 1427, + "TGAGGAA": 1428, + "TAATAAAA": 1429, + "TAAACTT": 1430, + "TCACATT": 1431, + "GGAGGCC": 1432, + "TCACAAA": 1433, + "CACTTTT": 1434, + "CGGCC": 1435, + "CAACAGA": 1436, + "GTAGAGA": 1437, + "GTTATTTT": 1438, + "CGTTTG": 1439, + "TCGTCA": 1440, + "TCTGCTG": 1441, + "CAACACA": 1442, + "GGTAGG": 1443, + "GCAGCTG": 1444, + "TAGTAGAGA": 1445, + "CAAGCC": 1446, + "GCATTTG": 1447, + "TAATATG": 1448, + "GCTTAAA": 1449, + "GCTTCTG": 1450, + "CTCTCCA": 1451, + "TCATCTT": 1452, + "CGTCTG": 1453, + "TCATTTA": 1454, + "CATAGG": 1455, + "GCTCCTT": 1456, + "TGTTCTT": 1457, + "TACATTA": 1458, + "CACAGAA": 1459, + "TAAATATA": 1460, + "TAGAGG": 1461, + "GATAGG": 1462, + "TCCTGAA": 1463, + "GGAGCTG": 1464, + "TGATATT": 1465, + "TCATTAA": 1466, + "CTTTTAAA": 1467, + "TCGTTA": 1468, + "TAAACTA": 1469, + "GTTTGAA": 1470, + "TAAAATTA": 1471, + "CACCCC": 1472, + "TCAGAGA": 1473, + "CTCCTGCCTCA": 1474, + "TGACATT": 1475, + "GTATTTA": 1476, + "CTTCATT": 1477, + "GAAACTG": 1478, + "TAACACA": 1479, + "GTTCAAA": 1480, + "GGAGATG": 1481, + "TCGGCC": 1482, + "CAGCATT": 1483, + "TCGATG": 1484, + "TATTCTA": 1485, + "CTGTGAA": 1486, + "TATTGAA": 1487, + "TTTTCCA": 1488, + "TATTTCTT": 1489, + "GGTGAAA": 1490, + "CTGAGAA": 1491, + "GCACAGA": 1492, + "GCGAGG": 1493, + "CTGTGTG": 1494, + "TGAAATG": 1495, + "TGATGAA": 1496, + "GTCCAAA": 1497, + "CTCAATT": 1498, + "TCCAGAA": 1499, + "GTATATA": 1500, + "TAAAGTT": 1501, + "TCTCAAAA": 1502, + "TCCATCA": 1503, + "GTCTGAA": 1504, + "TGAGAGA": 1505, + "TGATTTG": 1506, + "TTAGCC": 1507, + "CTCCATG": 1508, + "TCCCTGA": 1509, + "GAGCTA": 1510, + "CCCCCCCC": 1511, + "GTGGAAA": 1512, + "CTGGGAA": 1513, + "CAATGAA": 1514, + "CCACACA": 1515, + "CTTTCAA": 1516, + "CGGAGG": 1517, + "TCGTGA": 1518, + "CCAGAAA": 1519, + "GTTTTAAA": 1520, + "TGTTGAA": 1521, + "TCCTGTG": 1522, + "CTAAATG": 1523, + "TCCTTTA": 1524, + "GTCTGGG": 1525, + "TCTCTTTT": 1526, + "TACGG": 1527, + "TATTGTA": 1528, + "TTAGTG": 1529, + "TTACC": 1530, + "TAATCCCAGCACTTTG": 1531, + "TCTGGAA": 1532, + "CTTCTCA": 1533, + "CGCATT": 1534, + "TATTTAAA": 1535, + "TCACACA": 1536, + "TAATCAA": 1537, + "GCGAAA": 1538, + "GGGCCA": 1539, + "GTTCATT": 1540, + "GAGAAAAA": 1541, + "TTTTGTA": 1542, + "TACTTTT": 1543, + "TCGAGG": 1544, + "GTGAAAAA": 1545, + "CAATATA": 1546, + "TCCCATG": 1547, + "CAATTAA": 1548, + "CTGGAAA": 1549, + "CCCAGCA": 1550, + "TCCCATT": 1551, + "TCCTGTT": 1552, + "CTCTTTA": 1553, + "TCCCCTT": 1554, + "GTTTCAA": 1555, + "GTCCAGG": 1556, + "GGAAGGA": 1557, + "TAGTTTT": 1558, + "TGACCTT": 1559, + "GTGCTGGGATTACAGG": 1560, + "TATTTATA": 1561, + "TCTGCAA": 1562, + "CTGAAAAA": 1563, + "TATGTTA": 1564, + "CTTCACA": 1565, + "GCACAGG": 1566, + "CCTGCTG": 1567, + "TTTTTTAA": 1568, + "GTTATTA": 1569, + "CCCTTTT": 1570, + "TGATTTA": 1571, + "TACAAAA": 1572, + "TAAGTAA": 1573, + "TTTTTAAA": 1574, + "CATCTC": 1575, + "GTGGTGA": 1576, + "GTGGAGA": 1577, + "CTCTGCA": 1578, + "GTTAAAAA": 1579, + "TACATACA": 1580, + "CTTTGTG": 1581, + "GGACACA": 1582, + "TCTGATG": 1583, + "TATTATT": 1584, + "TCTTCTA": 1585, + "CTGTGTT": 1586, + "TCAGCTT": 1587, + "CTTTATA": 1588, + "GGCGC": 1589, + "TCCCTCA": 1590, + "GTACC": 1591, + "TGGAGAA": 1592, + "CAAAAATT": 1593, + "TCTTTAA": 1594, + "CTCTCTC": 1595, + "TGAGTGA": 1596, + "GCAGCTT": 1597, + "CGGATT": 1598, + "TACGA": 1599, + "TCTTGTT": 1600, + "TCGTAA": 1601, + "GCCTGTG": 1602, + "TATTCTG": 1603, + "GGGATA": 1604, + "GGGTCC": 1605, + "TGAGATT": 1606, + "CTTTTATT": 1607, + "TCCCACA": 1608, + "CATGGTG": 1609, + "TTAGGA": 1610, + "GAACACA": 1611, + "TCATAAA": 1612, + "CAACATT": 1613, + "GGTCCA": 1614, + "GAATTTG": 1615, + "TATTAATT": 1616, + "TCCTGGG": 1617, + "GCAGCAA": 1618, + "CTCTTCA": 1619, + "GAAGAGG": 1620, + "TCTGTCA": 1621, + "CTGAATG": 1622, + "CCACAAA": 1623, + "GTGGAGG": 1624, + "TGATTAA": 1625, + "CTCCCTCC": 1626, + "CACACACACACACACACACACACACACACACA": 1627, + "GCGATG": 1628, + "CATTCTG": 1629, + "GTAGAAA": 1630, + "TCATCAA": 1631, + "TTTTCAA": 1632, + "TATGTATG": 1633, + "CCAAATG": 1634, + "TAATTTTA": 1635, + "TAAGGAA": 1636, + "CTTGAAA": 1637, + "AAAAAAAAAAAA": 1638, + "GCTCCTG": 1639, + "GCAGATG": 1640, + "GAAAAATT": 1641, + "GACGC": 1642, + "GTGGGGG": 1643, + "GTCAATT": 1644, + "CTTGCTT": 1645, + "TGACACA": 1646, + "GTGTGTT": 1647, + "CCAGAGA": 1648, + "CCCAGCC": 1649, + "TAAAGAAA": 1650, + "GTCCATT": 1651, + "TAAATTAA": 1652, + "CCCAAAA": 1653, + "GAATTAA": 1654, + "TGAATTA": 1655, + "TTTTTTTG": 1656, + "CCAGCTT": 1657, + "CAATTTG": 1658, + "CTGTTTG": 1659, + "GTCTCAA": 1660, + "GTTTGTG": 1661, + "GGCATA": 1662, + "GGTACA": 1663, + "TGATGTG": 1664, + "GATTTCA": 1665, + "TCTGCTT": 1666, + "GTAATTA": 1667, + "TAAAAAAAA": 1668, + "GCCGCC": 1669, + "TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG": 1670, + "GCGTCA": 1671, + "GCTCATT": 1672, + "GAACCTG": 1673, + "TAAACAAA": 1674, + "GTGCTGA": 1675, + "TCAGGAA": 1676, + "TCCTCAA": 1677, + "TCTATTTT": 1678, + "TCTGTTTT": 1679, + "CAGAGCA": 1680, + "CCAGGAA": 1681, + "GTCTTTA": 1682, + "TCTTCAA": 1683, + "TCAAAATT": 1684, + "GCTTATT": 1685, + "GTTCCTT": 1686, + "CACCTA": 1687, + "TCACTGA": 1688, + "GAAGCAA": 1689, + "TAAAGA": 1690, + "TCCTTCA": 1691, + "TCTCATG": 1692, + "TCAGTGA": 1693, + "TACACAA": 1694, + "CACGTG": 1695, + "CCTAAAA": 1696, + "GCCTTTG": 1697, + "GGCTTTT": 1698, + "GTTGAAA": 1699, + "GTTCTC": 1700, + "CTAGA": 1701, + "CTACAAA": 1702, + "GCACAAA": 1703, + "TTACATT": 1704, + "GGCCCC": 1705, + "TAATGTG": 1706, + "CTGCCTT": 1707, + "TCCCAGA": 1708, + "GTGAATG": 1709, + "GGACAGG": 1710, + "GGATGTG": 1711, + "GTTTATA": 1712, + "TGACCAA": 1713, + "GTGGCTG": 1714, + "GTTCTCA": 1715, + "CTTATTTT": 1716, + "CTGGAGA": 1717, + "TTACAAA": 1718, + "GTCTTCA": 1719, + "CAAGAGA": 1720, + "CCATTTG": 1721, + "TCACAGA": 1722, + "CTAGTA": 1723, + "CATTATT": 1724, + "TTAGA": 1725, + "GCTCTCC": 1726, + "GCGCCA": 1727, + "TATGTTTT": 1728, + "TCCTCCA": 1729, + "CAGAAAAA": 1730, + "GTGGGAA": 1731, + "TAATCTT": 1732, + "TGAGTCA": 1733, + "CTGCTC": 1734, + "GTCTCCA": 1735, + "TCATGTT": 1736, + "GTTTCCA": 1737, + "TAAGCAA": 1738, + "CTAAAAATA": 1739, + "TGACTGA": 1740, + "TCGGTT": 1741, + "TTAGAAA": 1742, + "TAAGCC": 1743, + "TAAAGCA": 1744, + "CCTCTCC": 1745, + "CCTCCTT": 1746, + "TCAGATT": 1747, + "TATGAAAA": 1748, + "GCTGATG": 1749, + "CATATTTT": 1750, + "GCTCCAA": 1751, + "CGGCGG": 1752, + "CCACTGA": 1753, + "CAGCAAA": 1754, + "CTGTCTT": 1755, + "CTAGCA": 1756, + "TCGGGG": 1757, + "CACAGCA": 1758, + "GCTGATT": 1759, + "CTAGGA": 1760, + "TAACTC": 1761, + "TCATATT": 1762, + "CCTTCTT": 1763, + "CTGCAAA": 1764, + "CCCGC": 1765, + "GGTCTA": 1766, + "CCCAGGA": 1767, + "GTGTCTG": 1768, + "TAATAATAATAA": 1769, + "TCACATG": 1770, + "CAATTTA": 1771, + "TATATATATATATATATATATATATATATATA": 1772, + "CCACAGA": 1773, + "TCAATTTT": 1774, + "GTATTAA": 1775, + "GAACATT": 1776, + "TCTCTTA": 1777, + "CTATTTG": 1778, + "TCTTTCC": 1779, + "GGTTAAA": 1780, + "GCTAATT": 1781, + "CTGCTGA": 1782, + "TACCTA": 1783, + "CAGGGTT": 1784, + "TCGCCA": 1785, + "CAAAAATTA": 1786, + "CTTCTGA": 1787, + "GCATGTG": 1788, + "CTATTAA": 1789, + "GCACATG": 1790, + "CAACATG": 1791, + "TCATGAA": 1792, + "GAATGTT": 1793, + "GGGTTTT": 1794, + "CTGCCTG": 1795, + "GTCCACA": 1796, + "TAAACA": 1797, + "CTCTGGA": 1798, + "GACCCC": 1799, + "GGCAAAA": 1800, + "TCTGTTA": 1801, + "CTAGTG": 1802, + "CTATATA": 1803, + "TCAGTCA": 1804, + "TAACTAA": 1805, + "GAAGATG": 1806, + "GTCTTAA": 1807, + "CAAGGAA": 1808, + "GTAAAAAA": 1809, + "TCCCCTG": 1810, + "TCGCAA": 1811, + "TCTGCCTG": 1812, + "CCTTTTA": 1813, + "GTCCCAGCTA": 1814, + "TATATATG": 1815, + "TATTGTG": 1816, + "TGTGTTTT": 1817, + "GCGCAA": 1818, + "CACAGTG": 1819, + "TAAGATT": 1820, + "CTCTGTA": 1821, + "GGAGGCTGA": 1822, + "GGACAAA": 1823, + "TATTAAAAA": 1824, + "TCGTCC": 1825, + "TCGGAA": 1826, + "CTATAAA": 1827, + "CTTCAGA": 1828, + "CTAGAAA": 1829, + "CATTCAA": 1830, + "CACGCA": 1831, + "CAGGATT": 1832, + "CCATCTT": 1833, + "GTAGCC": 1834, + "GAATTTA": 1835, + "CACGC": 1836, + "CAATCC": 1837, + "TGAGCAA": 1838, + "GAAGCTG": 1839, + "TCAATTA": 1840, + "GAAGTCA": 1841, + "CTGCACA": 1842, + "CCACGG": 1843, + "GGATCTT": 1844, + "CTCCTGCCTCAGCCTCC": 1845, + "TAAATGAA": 1846, + "CCGTC": 1847, + "TCGGTG": 1848, + "TTTTATTA": 1849, + "GCAGGGG": 1850, + "GCAGGTG": 1851, + "TCTATTA": 1852, + "TAACTTA": 1853, + "CTAATTTT": 1854, + "CCCGCC": 1855, + "TAATACA": 1856, + "GGATTAAA": 1857, + "TCTCTCTG": 1858, + "GCTTCTT": 1859, + "CATTTATT": 1860, + "CCAGAGG": 1861, + "GGACAGA": 1862, + "GCCAATT": 1863, + "TCCCCAA": 1864, + "GTTGATT": 1865, + "GAAGAAAA": 1866, + "GCATTTA": 1867, + "CTCTAAA": 1868, + "CACACACACACA": 1869, + "CCTCAAA": 1870, + "TATAATT": 1871, + "CAATGTT": 1872, + "GCCCAGA": 1873, + "GTATATT": 1874, + "CTAAAAAA": 1875, + "CCACAGG": 1876, + "TAAGAGA": 1877, + "TCCTTAA": 1878, + "TATTTTTT": 1879, + "GAATATA": 1880, + "GGATTTG": 1881, + "GTGTGAA": 1882, + "CTGGCTT": 1883, + "GCGGCA": 1884, + "TCCGCC": 1885, + "GCATCTT": 1886, + "TCTAATA": 1887, + "CTGCATT": 1888, + "CTCTGCC": 1889, + "TCACTCA": 1890, + "TCAGCAA": 1891, + "TATTATG": 1892, + "CCAGCTG": 1893, + "GATCTC": 1894, + "GCCTCTT": 1895, + "CTTCCAA": 1896, + "TCCTAAA": 1897, + "TCATCTG": 1898, + "CTATTTA": 1899, + "CTGCAGG": 1900, + "CAAGCAA": 1901, + "GCGGAA": 1902, + "GAAATAAA": 1903, + "TAAAATAA": 1904, + "TCACCTT": 1905, + "CCATGTG": 1906, + "GACCTA": 1907, + "CAGATGA": 1908, + "GTGGCTT": 1909, + "TTATTATTATTA": 1910, + "TCCCGG": 1911, + "TATTTGTT": 1912, + "CTGTAAA": 1913, + "TCCATCCA": 1914, + "CTGTATA": 1915, + "GTTTCTA": 1916, + "GTTGCTT": 1917, + "CCATGAA": 1918, + "GCTCTTA": 1919, + "CTTCATG": 1920, + "GTTCCTG": 1921, + "GCTGGGA": 1922, + "TCAGAGG": 1923, + "CATTAAAA": 1924, + "TCAGTAA": 1925, + "GAATGTG": 1926, + "CTTATTA": 1927, + "GCACTGA": 1928, + "TGAGGTT": 1929, + "CATCAAA": 1930, + "CTTCTCC": 1931, + "GTTTATG": 1932, + "CTTTCCA": 1933, + "GTGCCTG": 1934, + "GAAAGGA": 1935, + "GCATCTG": 1936, + "TACCCA": 1937, + "TAACAGA": 1938, + "AAAAAAAAAAA": 1939, + "CTATGAA": 1940, + "CAGTAAA": 1941, + "TAGCTA": 1942, + "TCGTTTT": 1943, + "GTGTCTT": 1944, + "GAGCAAA": 1945, + "TCTAAAAA": 1946, + "GTTCACA": 1947, + "GAAATGA": 1948, + "CAAATGA": 1949, + "GCCCTGA": 1950, + "GTGTTTA": 1951, + "TCATGTG": 1952, + "CATATTA": 1953, + "TCAAAAAAA": 1954, + "TAAGTTA": 1955, + "TCTCTCTT": 1956, + "CCAGTGA": 1957, + "CCTCTGA": 1958, + "CAAGATG": 1959, + "GCCTGTT": 1960, + "GTTTGGG": 1961, + "CATTCATT": 1962, + "GCCCCTG": 1963, + "GTTCTGA": 1964, + "GCGGCC": 1965, + "GCGGTT": 1966, + "CAAAACAAAA": 1967, + "TACATATA": 1968, + "GAATTAAA": 1969, + "TCAAGAA": 1970, + "CTGTATT": 1971, + "TTTTTATT": 1972, + "GATTATT": 1973, + "TCTAATG": 1974, + "GTTGCTG": 1975, + "TGAATGAA": 1976, + "TCAGCTG": 1977, + "CTTGATT": 1978, + "CAGAATG": 1979, + "CTAATTA": 1980, + "TATAATG": 1981, + "GTTTTGTTTT": 1982, + "CCAGCCTG": 1983, + "TGATGGA": 1984, + "GCAGATT": 1985, + "CTCTATT": 1986, + "GCAGTCA": 1987, + "TAAGTGA": 1988, + "CTACACA": 1989, + "CGCATG": 1990, + "TAGCCA": 1991, + "GTGGCTCA": 1992, + "CAAATAAA": 1993, + "GTGCTCA": 1994, + "TTTTTTTTTT": 1995, + "TAACATG": 1996, + "TCCCAGCTA": 1997, + "CAAAGTA": 1998, + "TCATATA": 1999, + "CAGCATG": 2000, + "TGATCTT": 2001, + "CATAATT": 2002, + "TGTGTTA": 2003, + "TTTTGAA": 2004, + "TTAATTA": 2005, + "GATATTA": 2006, + "TCATTCA": 2007, + "TGATATA": 2008, + "TGACTCA": 2009, + "GACGTT": 2010, + "TGACATG": 2011, + "GTTGTGA": 2012, + "CATTTTTT": 2013, + "GCCTGGA": 2014, + "CTATGTT": 2015, + "CTTTGGG": 2016, + "GTCTCAAA": 2017, + "CTGGCTG": 2018, + "CCACATG": 2019, + "GGCGTG": 2020, + "CTTAATG": 2021, + "TAAGATG": 2022, + "GTATAAA": 2023, + "TGTATTA": 2024, + "TAACTCA": 2025, + "GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA": 2026, + "GCATGAA": 2027, + "GTTAATG": 2028, + "TCCAGGA": 2029, + "GAGAGAAA": 2030, + "TCTCTGTG": 2031, + "CTCTCTA": 2032, + "CCACCTG": 2033, + "GCCAGGA": 2034, + "CTGGAGG": 2035, + "CCATTTA": 2036, + "GTCTGGA": 2037, + "GCCCACA": 2038, + "TAGAGAA": 2039, + "CAACTCA": 2040, + "GGCAGGA": 2041, + "TCTTATG": 2042, + "CAAAGGA": 2043, + "GGTAAAA": 2044, + "GAGAGGA": 2045, + "GTCCAGA": 2046, + "GCCCTCA": 2047, + "GATATTTT": 2048, + "CAGGGAA": 2049, + "CCACATT": 2050, + "GAGGAGG": 2051, + "GAAACTT": 2052, + "CAGAATT": 2053, + "TCAGATG": 2054, + "TATTTCC": 2055, + "TACAGTG": 2056, + "TGAGCTG": 2057, + "CCATCTG": 2058, + "GAGAATG": 2059, + "TCAACAA": 2060, + "ATT": 2061, + "TAACTGA": 2062, + "TGAGAGG": 2063, + "CACTGAA": 2064, + "CCACCTT": 2065, + "CTGCAGA": 2066, + "TCACCAA": 2067, + "TGAGCTT": 2068, + "CAAAGCA": 2069, + "GGTTTTA": 2070, + "CGGGGTT": 2071, + "TCCAAAAA": 2072, + "TATGTATA": 2073, + "CCAGATG": 2074, + "TCCATTTT": 2075, + "CTGCTCA": 2076, + "GATAATT": 2077, + "CCACCAA": 2078, + "CTCCTCC": 2079, + "GAGAATT": 2080, + "GAAAGTA": 2081, + "TAAAATAAAA": 2082, + "CTTCTTA": 2083, + "CTGTTTA": 2084, + "GAATCAA": 2085, + "GCATGTT": 2086, + "GCACGG": 2087, + "GACTGAA": 2088, + "GTGCACA": 2089, + "GACGTG": 2090, + "TATACAA": 2091, + "TCGACA": 2092, + "GAAGACA": 2093, + "TAAAGGA": 2094, + "GATCAAA": 2095, + "CAGTGTG": 2096, + "CTAGCC": 2097, + "GAGGAAAA": 2098, + "TCTGAAAA": 2099, + "GAACCCA": 2100, + "GATGGATG": 2101, + "GTTCTTA": 2102, + "CTATATT": 2103, + "GCATTAA": 2104, + "TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC": 2105, + "TCAGTC": 2106, + "TATTTTTG": 2107, + "GAGGATT": 2108, + "GTATGTG": 2109, + "TAACCAA": 2110, + "GTTGTTTT": 2111, + "TTTTTCTT": 2112, + "GTGTTAA": 2113, + "CTTGGAA": 2114, + "AAAAAATG": 2115, + "CAATGTG": 2116, + "GTGCCTT": 2117, + "GCCTCAA": 2118, + "GAGTCTT": 2119, + "GCTAATTTT": 2120, + "CGAAAAA": 2121, + "GTGTATA": 2122, + "GCGTTA": 2123, + "CTGCACTCCAGCCTGGG": 2124, + "GTTCATG": 2125, + "CAAAGAAA": 2126, + "GCAGTAA": 2127, + "GGATGAA": 2128, + "CTTTATG": 2129, + "CAGGAAAA": 2130, + "TCCTGCA": 2131, + "CTGTCTG": 2132, + "GAACATG": 2133, + "GGATGGA": 2134, + "GCCTGAA": 2135, + "CAAAAATG": 2136, + "TCCAATG": 2137, + "CCAGCAA": 2138, + "GGCCTA": 2139, + "CAACTGA": 2140, + "GCACCTG": 2141, + "GTCTATT": 2142, + "CCTCTCA": 2143, + "GTGGTCA": 2144, + "GTGTAAA": 2145, + "GTACACA": 2146, + "GTAAAATT": 2147, + "GTACATT": 2148, + "TATATAAA": 2149, + "CTGTTAA": 2150, + "TAAGTCA": 2151, + "GCCTCCA": 2152, + "AAATTAAA": 2153, + "GTGCAGG": 2154, + "TCCTGGA": 2155, + "GTGCAAA": 2156, + "GCGTCC": 2157, + "CCATTAA": 2158, + "GGAGGGA": 2159, + "TCACTTA": 2160, + "TCATTAAA": 2161, + "CAACATA": 2162, + "TAATAGA": 2163, + "TAATGTA": 2164, + "GATTTTTT": 2165, + "GTTGTCA": 2166, + "GGAGACA": 2167, + "GTGTGGG": 2168, + "TCACAGG": 2169, + "TCGGCA": 2170, + "CTCCCTG": 2171, + "GACCAAA": 2172, + "TGTTTATT": 2173, + "CGAATG": 2174, + "CTCAATG": 2175, + "TCACCTG": 2176, + "CAGTGTT": 2177, + "TGAGACA": 2178, + "TAGGGG": 2179, + "GAAAAATG": 2180, + "GTTGAGA": 2181, + "TCGATA": 2182, + "CTCGGGAGG": 2183, + "GTTGTC": 2184, + "CCAGTCA": 2185, + "GCCCAGGCTG": 2186, + "GAACAGA": 2187, + "GGCTCACTGCAA": 2188, + "GCAGACA": 2189, + "TGAGGTG": 2190, + "CACGTT": 2191, + "TAAGAAAA": 2192, + "CCAGGCA": 2193, + "GTATCTT": 2194, + "CTTGGGAGG": 2195, + "CTTTCTA": 2196, + "CCGCTG": 2197, + "GAGCTCA": 2198, + "GAGACAGA": 2199, + "CTTCAGG": 2200, + "GCACATT": 2201, + "GTACAAA": 2202, + "CTTGTAA": 2203, + "GTGGGTG": 2204, + "GAAGTGA": 2205, + "GGTCTC": 2206, + "GTATGTT": 2207, + "GCACTCA": 2208, + "TTATGTT": 2209, + "CAAGTCA": 2210, + "CAAGTGA": 2211, + "GAAACTA": 2212, + "TAAATAAAA": 2213, + "TCTTAAAA": 2214, + "GTTGGAA": 2215, + "GTTCTAA": 2216, + "CCACTC": 2217, + "CAGTGAA": 2218, + "GAAAGG": 2219, + "GCACGA": 2220, + "TAACTTTT": 2221, + "GTTGTTA": 2222, + "TCAGTTA": 2223, + "CGGATG": 2224, + "TATTTGAA": 2225, + "CCCTGAA": 2226, + "GCCCTC": 2227, + "CTTCTAA": 2228, + "TTTGTTTT": 2229, + "GAGCTGA": 2230, + "CTGTGGG": 2231, + "CAAGATT": 2232, + "GAAGCTT": 2233, + "TGAGTAA": 2234, + "CTTGCTG": 2235, + "GGATGGG": 2236, + "CGTATG": 2237, + "TCCATTA": 2238, + "GTCTGCA": 2239, + "GCCATTTT": 2240, + "GTTGTAA": 2241, + "CACACAA": 2242, + "GGACTACAGG": 2243, + "CGTTTTA": 2244, + "TCTTCC": 2245, + "TAACCTT": 2246, + "CTTTAAAA": 2247, + "TGAATTTT": 2248, + "CTACAGA": 2249, + "GCAAGAA": 2250, + "TAACAAAA": 2251, + "CAATTAAA": 2252, + "CCACTCA": 2253, + "CATGGTGAAA": 2254, + "CCCAGAA": 2255, + "CTACATT": 2256, + "CCGAGG": 2257, + "TCCAGTG": 2258, + "TGAGTTA": 2259, + "GGAGTCA": 2260, + "TAACGA": 2261, + "GAGTAAA": 2262, + "GACTCTG": 2263, + "GGAGCTT": 2264, + "TACTCC": 2265, + "CTGCATG": 2266, + "GCTTTTTT": 2267, + "GTCTAAA": 2268, + "GTGCGG": 2269, + "CATCTCA": 2270, + "TGATCAA": 2271, + "GGAGATT": 2272, + "GCAAAAAA": 2273, + "CACCAAA": 2274, + "TGACGG": 2275, + "CAGAGG": 2276, + "GTTGATG": 2277, + "CTTGTCA": 2278, + "TCCACCTG": 2279, + "GGAGCAA": 2280, + "CAAGTAA": 2281, + "CCATAAA": 2282, + "GTGCATG": 2283, + "GCATATT": 2284, + "GTAGATT": 2285, + "GCCTAA": 2286, + "CTCAAAAA": 2287, + "GGAGAAAA": 2288, + "CTATCC": 2289, + "TAATATTA": 2290, + "GTGCTC": 2291, + "CAATATG": 2292, + "TGTGGAA": 2293, + "TGACTC": 2294, + "GTGTATG": 2295, + "TTTTAATG": 2296, + "GCTCTAA": 2297, + "CACAATG": 2298, + "CAGCTCA": 2299, + "GTTGGTT": 2300, + "CTAAAATT": 2301, + "GTCTATG": 2302, + "TGTGAAAA": 2303, + "CTGGGTT": 2304, + "CCCCTCC": 2305, + "CCCTCTT": 2306, + "GCAGGGA": 2307, + "GAAACCA": 2308, + "CATTTCC": 2309, + "GCAGCCA": 2310, + "TCATATG": 2311, + "GCAGGCA": 2312, + "CGTAAAA": 2313, + "TGACCTG": 2314, + "CAGAGGTT": 2315, + "CTTGTGA": 2316, + "TTATCTT": 2317, + "CTGTATG": 2318, + "GTCAATG": 2319, + "GGACGG": 2320, + "GCGTAA": 2321, + "CAAACTA": 2322, + "TAAATGTT": 2323, + "CTTCGG": 2324, + "CTCCCCA": 2325, + "TACAATG": 2326, + "TCTGTAA": 2327, + "GAATATG": 2328, + "GCGGGA": 2329, + "GGACATT": 2330, + "TTATGAA": 2331, + "GGATGTT": 2332, + "GGACATG": 2333, + "TCAGGTG": 2334, + "CAACAAAA": 2335, + "GAAAGAGA": 2336, + "GTGGATG": 2337, + "GGGCTA": 2338, + "CCATCAA": 2339, + "CAGCTGA": 2340, + "CTCCACC": 2341, + "CAATCAA": 2342, + "GTGGTC": 2343, + "TGACAGG": 2344, + "CCATTCA": 2345, + "GTCCCTG": 2346, + "CAGACACA": 2347, + "GTTGGTG": 2348, + "CCTCCTG": 2349, + "GAACTGA": 2350, + "TATTCATT": 2351, + "GCCCATG": 2352, + "CAATCTT": 2353, + "GAAAGCA": 2354, + "GAATCTG": 2355, + "TTATTTTA": 2356, + "GTTTGGA": 2357, + "TTTTTGTT": 2358, + "GGGAATG": 2359, + "GCGACA": 2360, + "TAAACTG": 2361, + "CCATATT": 2362, + "GGATCC": 2363, + "CAAGCTT": 2364, + "TAAAAAAAAA": 2365, + "TCACTC": 2366, + "CACTGTT": 2367, + "TGTTAATT": 2368, + "GGACTGA": 2369, + "GGAGTGA": 2370, + "CATACACA": 2371, + "GTTTGTA": 2372, + "TCCAGCA": 2373, + "GTGCATT": 2374, + "GGAAAAAA": 2375, + "CCAAGAA": 2376, + "TCAATA": 2377, + "CTTCCCA": 2378, + "TGAGAAAA": 2379, + "GGCCTCCCAAA": 2380, + "CAAGCTG": 2381, + "GCCCAAA": 2382, + "TGACTTA": 2383, + "CAGCCTT": 2384, + "CTGGATT": 2385, + "TTTTTTTA": 2386, + "TCACGG": 2387, + "GCAGTTA": 2388, + "TGACTAA": 2389, + "TTACAGG": 2390, + "TGATATG": 2391, + "TAATTATT": 2392, + "TCTTGAA": 2393, + "GCCCCTT": 2394, + "GTTCAGA": 2395, + "CTCTATG": 2396, + "CCATGGA": 2397, + "GAGGGAA": 2398, + "GGAGGCA": 2399, + "CTTTGCA": 2400, + "TCTTGG": 2401, + "GGAGGTT": 2402, + "GCCAATG": 2403, + "CTGGTGA": 2404, + "CAACCAA": 2405, + "CCAGTC": 2406, + "CTTGAGA": 2407, + "TACAGCA": 2408, + "CTTGTC": 2409, + "GACGGA": 2410, + "CTTCTTTT": 2411, + "GTGGC": 2412, + "GAGGATG": 2413, + "CAATAAAA": 2414, + "GAAATTTT": 2415, + "AAAAAAAAAA": 2416, + "CTCTATA": 2417, + "GTATGAA": 2418, + "CTTGTTA": 2419, + "TAACATA": 2420, + "CAAACACA": 2421, + "TGATTAAA": 2422, + "GCTCTGTT": 2423, + "GTGGGTT": 2424, + "GTTGGGG": 2425, + "GTGTGTA": 2426, + "GTAATTTT": 2427, + "GTATCC": 2428, + "TGTGTGTGTGTG": 2429, + "TCTTCCTT": 2430, + "TCACTAA": 2431, + "TCTCCAAA": 2432, + "TATCAAA": 2433, + "TGATGGG": 2434, + "GGATATT": 2435, + "CAAATTTT": 2436, + "GTTCAGG": 2437, + "GTGGATT": 2438, + "GTGCAGA": 2439, + "GCTGCC": 2440, + "CTCAGAA": 2441, + "GCAGTC": 2442, + "GGATAAA": 2443, + "GCCTTCA": 2444, + "CCAGGTG": 2445, + "TATCTC": 2446, + "CAATGCA": 2447, + "CCCACTG": 2448, + "GTGTATT": 2449, + "CGACAGA": 2450, + "TGAGATA": 2451, + "CCAGGTT": 2452, + "TGTTTAA": 2453, + "CATCATG": 2454, + "TGATTCA": 2455, + "GCAATTA": 2456, + "GAAATGAA": 2457, + "CTTGGTT": 2458, + "GAAGATT": 2459, + "GGATTAA": 2460, + "CCTCATT": 2461, + "GGCCAGGCTG": 2462, + "GCTATTA": 2463, + "GCCAGCA": 2464, + "GAGACAGG": 2465, + "CTTGAGG": 2466, + "CAGTCTT": 2467, + "GTTCTCC": 2468, + "TATTTCAA": 2469, + "TGACGA": 2470, + "CATGAAAA": 2471, + "CATTATG": 2472, + "TAAATTTA": 2473, + "GAGTGAA": 2474, + "CAACAGG": 2475, + "TAAGCTT": 2476, + "CACATTTT": 2477, + "GATCTCA": 2478, + "TAGTCC": 2479, + "GACCCTG": 2480, + "TAATGCA": 2481, + "TAAGTC": 2482, + "TAATAATT": 2483, + "GAAGTAA": 2484, + "CAACTC": 2485, + "CATCATT": 2486, + "GACGAA": 2487, + "GAAACAAA": 2488, + "TATTTCTG": 2489, + "CATTAATT": 2490, + "CCACCCC": 2491, + "TAATATTTT": 2492, + "GTTTAAAA": 2493, + "GTATCTG": 2494, + "GTCAAAAA": 2495, + "GATGCTG": 2496, + "TGTTCTG": 2497, + "GGTCAAA": 2498, + "GTAGGAA": 2499, + "GTATATG": 2500, + "TGATCTG": 2501, + "GGGGCTG": 2502, + "GCATCAA": 2503, + "GCCAAAAA": 2504, + "CCACGA": 2505, + "GCTAATG": 2506, + "CAGAGAAA": 2507, + "CCTTCTG": 2508, + "TCCTCTA": 2509, + "GCAGGTT": 2510, + "CTCACTG": 2511, + "TAGATTA": 2512, + "GCCGAGA": 2513, + "CCATCCA": 2514, + "CTTTACA": 2515, + "GTACATG": 2516, + "GCACCAA": 2517, + "CTTTGTA": 2518, + "CTATGTG": 2519, + "TCACTTTT": 2520, + "TGAGTC": 2521, + "CAAGAAAA": 2522, + "CTGACTG": 2523, + "GTTTTTTTT": 2524, + "GCATAAA": 2525, + "TAATCTG": 2526, + "GAAAAAAAA": 2527, + "CAGGATG": 2528, + "TGAGCCA": 2529, + "GAATTCA": 2530, + "TCAGACA": 2531, + "GTTCCAA": 2532, + "TCAGGTT": 2533, + "CAAACTG": 2534, + "CATTTCTT": 2535, + "TGTTAAAA": 2536, + "CCAGACA": 2537, + "CAAGTTA": 2538, + "CATGTTA": 2539, + "CATTCTA": 2540, + "TCTTTTTG": 2541, + "TGAGGGG": 2542, + "CACATTA": 2543, + "TAAAATAAA": 2544, + "GCATATA": 2545, + "TGTTCTA": 2546, + "GAAGGGG": 2547, + "GAGTGTG": 2548, + "TAAGACA": 2549, + "GAACTC": 2550, + "CCAGTAA": 2551, + "GAGAGAGG": 2552, + "GCGACC": 2553, + "CAATTCA": 2554, + "CGGCTG": 2555, + "CCAGATT": 2556, + "CCTGGG": 2557, + "GGAAGAAA": 2558, + "GAGAGG": 2559, + "TCAAAATG": 2560, + "CCTCATG": 2561, + "TAAAGG": 2562, + "CTTTGGA": 2563, + "CCAGGGA": 2564, + "GTACAGA": 2565, + "CTGAGGCAGGA": 2566, + "TGTTTCTT": 2567, + "CCAGGCTG": 2568, + "CTGAGG": 2569, + "GAGGCTG": 2570, + "CTCCTGGG": 2571, + "GAAGTC": 2572, + "CGACC": 2573, + "GGACTCA": 2574, + "GGAGTC": 2575, + "CACAATT": 2576, + "GTGTTCA": 2577, + "GACTAAA": 2578, + "GTCATTA": 2579, + "CAAAATTA": 2580, + "TGAAGAAA": 2581, + "GCACCTT": 2582, + "GTTTGCA": 2583, + "TCCTGCC": 2584, + "GTAGATG": 2585, + "GCCTGCA": 2586, + "GAGTTAA": 2587, + "TCCCTTA": 2588, + "GTGGTTA": 2589, + "TCGGGA": 2590, + "TACATAA": 2591, + "TCTCTCCA": 2592, + "CACTAAA": 2593, + "TATATATATATA": 2594, + "GTGGCAA": 2595, + "CACCATG": 2596, + "TTTGAAAA": 2597, + "CACACTG": 2598, + "CTTGGTG": 2599, + "TACACTG": 2600, + "CCTCCAA": 2601, + "CAACCTT": 2602, + "CAGCCAA": 2603, + "TTTTCAAA": 2604, + "TGATAGA": 2605, + "TACACTA": 2606, + "TCTGGG": 2607, + "TCCCAGCA": 2608, + "TAGGAAAA": 2609, + "CTTGGGG": 2610, + "TCTGTGAA": 2611, + "CCTTATT": 2612, + "CATTTAAA": 2613, + "TTTTATTTTA": 2614, + "GCCCTCC": 2615, + "CTGAGCA": 2616, + "CCCGTG": 2617, + "GTAGTGA": 2618, + "TCCTATT": 2619, + "GAAGGTG": 2620, + "TGTGCTG": 2621, + "TCCACTG": 2622, + "TAATCTA": 2623, + "TGATGTA": 2624, + "GTGGTAA": 2625, + "TAATGGA": 2626, + "GATGAAAA": 2627, + "GTAGTAA": 2628, + "GTGGGGA": 2629, + "GTGTCAA": 2630, + "CAGACTG": 2631, + "TCGAAAA": 2632, + "CTCATTA": 2633, + "TAATAATA": 2634, + "CTCAGAAA": 2635, + "CATCCTT": 2636, + "CCGCTT": 2637, + "GGAAGG": 2638, + "CCGTGA": 2639, + "CCACTCC": 2640, + "CTAGAGA": 2641, + "TAGAATG": 2642, + "GGATTTA": 2643, + "TTAATTTT": 2644, + "GCTAATA": 2645, + "TCCCCCA": 2646, + "CAAATATT": 2647, + "GATCATG": 2648, + "TCTTAATT": 2649, + "CAGTATT": 2650, + "GTCTTGAA": 2651, + "CCGAAA": 2652, + "CTATTCA": 2653, + "TAAGATA": 2654, + "CTTGCAA": 2655, + "GCCCCAA": 2656, + "TCCCTAA": 2657, + "GAAGTTA": 2658, + "GATGATG": 2659, + "CTTGATG": 2660, + "CCCTAAA": 2661, + "CCTGCCTG": 2662, + "GACATTTT": 2663, + "CCAGCCA": 2664, + "TGTGTGTGTG": 2665, + "GTCTATA": 2666, + "TCTCTGTT": 2667, + "GTCTGTA": 2668, + "TATAATA": 2669, + "CTTGTTTT": 2670, + "CGCCATT": 2671, + "CTCAGCA": 2672, + "TACAGTT": 2673, + "CAAGAGG": 2674, + "GGAAGCA": 2675, + "GCCTTTA": 2676, + "CCCCATT": 2677, + "CAACGA": 2678, + "GTCATTTT": 2679, + "CCCGCA": 2680, + "CAGTTAA": 2681, + "GAATCTT": 2682, + "CATGTTTT": 2683, + "CCGGGG": 2684, + "CTACTGA": 2685, + "TCACGA": 2686, + "TAAATTTG": 2687, + "GCCCATT": 2688, + "CTCTAGG": 2689, + "GGACCTG": 2690, + "TCAGGGA": 2691, + "GAGACTG": 2692, + "CCAAAAAA": 2693, + "GCCGG": 2694, + "CCAGGGG": 2695, + "TCAGAAAA": 2696, + "CATCTGA": 2697, + "TCTTCAAA": 2698, + "CTACAGG": 2699, + "GAGGCAGG": 2700, + "CATTGTA": 2701, + "TAAATCAA": 2702, + "GACTCTT": 2703, + "CTGATTA": 2704, + "GCATATG": 2705, + "GGACCTT": 2706, + "CAAGACA": 2707, + "TATTTATG": 2708, + "TATTTTAAA": 2709, + "CCGAGA": 2710, + "TCATTTTA": 2711, + "CTCACTCA": 2712, + "CCACCCA": 2713, + "CTCTAGA": 2714, + "CTACATG": 2715, + "GTGCTTA": 2716, + "CAACCTG": 2717, + "TCTGTGTT": 2718, + "TAAATATG": 2719, + "CAAAGG": 2720, + "CCCTGTT": 2721, + "GTTCGG": 2722, + "TGATAAAA": 2723, + "CACGAA": 2724, + "GTTGAGG": 2725, + "CAGAGTGA": 2726, + "GAAATTAA": 2727, + "CACATA": 2728, + "GAACAGG": 2729, + "TCTCCTGA": 2730, + "CCTGAGG": 2731, + "GGAGGCCAA": 2732, + "GTTTACA": 2733, + "TAACAGG": 2734, + "TGTGGTG": 2735, + "GCCTCCCAAA": 2736, + "CCATCCTG": 2737, + "GATTCTT": 2738, + "GAATGGA": 2739, + "GTAGTCA": 2740, + "CTCCTCTG": 2741, + "GAAAGAAAGAAAGAAA": 2742, + "CCCTGTG": 2743, + "CAGTATG": 2744, + "GCGATA": 2745, + "GGACTC": 2746, + "GAAAGA": 2747, + "TGTTGG": 2748, + "GTAGCTT": 2749, + "CATTTTAA": 2750, + "CCCTCTG": 2751, + "GCATTCA": 2752, + "CGATTA": 2753, + "TCACATA": 2754, + "TAATGAAA": 2755, + "GGAATTA": 2756, + "CTGTCAA": 2757, + "TAAATTAAA": 2758, + "CAAGTC": 2759, + "GTATTCA": 2760, + "GGCCATG": 2761, + "CTTTAGA": 2762, + "TGTTTCC": 2763, + "CATGTA": 2764, + "GAATAAAA": 2765, + "CAACTAA": 2766, + "TCATCTA": 2767, + "CACTCTT": 2768, + "CAGTTTG": 2769, + "CATAAAAA": 2770, + "GCATGCA": 2771, + "GATTTA": 2772, + "GAACCAA": 2773, + "TCTGTGA": 2774, + "TCAGCCA": 2775, + "TCTCCACA": 2776, + "TCTCAGCTCA": 2777, + "TATCATG": 2778, + "GCACTTA": 2779, + "CGCCAGG": 2780, + "CGGGG": 2781, + "CATTAAAAA": 2782, + "TTTGTTA": 2783, + "GGATATA": 2784, + "TCGACC": 2785, + "TAATCCA": 2786, + "CCGC": 2787, + "CATTGTT": 2788, + "CCAGTTA": 2789, + "GTAGTTA": 2790, + "CTAGGAA": 2791, + "CCTAATT": 2792, + "TCATGGG": 2793, + "GAACTAA": 2794, + "GCTATTTT": 2795, + "CCGTCA": 2796, + "CAGATTA": 2797, + "CCATATA": 2798, + "CAACTTA": 2799, + "TCAGTTTT": 2800, + "CTACCTT": 2801, + "GCACTC": 2802, + "GTGTGGA": 2803, + "GTGCCAA": 2804, + "GACAATG": 2805, + "GACAATT": 2806, + "GTACCTT": 2807, + "TAAACATT": 2808, + "CAGGAGG": 2809, + "GTGCGA": 2810, + "GAAAATTA": 2811, + "TCTCTTAA": 2812, + "CCGATT": 2813, + "GATGATT": 2814, + "CCATGGG": 2815, + "TCGGTA": 2816, + "CCATATG": 2817, + "CCAGTCC": 2818, + "GCCTTAA": 2819, + "TGATCCA": 2820, + "GTTGCAA": 2821, + "GTAGAGG": 2822, + "CAGATTTT": 2823, + "GTACTTA": 2824, + "TCTTTCTTTCTTTCTT": 2825, + "GCTCTGTG": 2826, + "TCAATAA": 2827, + "GTTTAGA": 2828, + "GTTCGA": 2829, + "CAAGGTT": 2830, + "CTCATTTT": 2831, + "CACAGG": 2832, + "CATGCTG": 2833, + "GAACGG": 2834, + "TATAAAAA": 2835, + "GAAGGCA": 2836, + "GAGCATT": 2837, + "TGTTTGTG": 2838, + "GCTGTTA": 2839, + "GTCACTG": 2840, + "CAAATGAA": 2841, + "GTGACTG": 2842, + "GTTCTTTT": 2843, + "CAGGCTGGAGTGCAGTG": 2844, + "TGATGAAA": 2845, + "TAACGG": 2846, + "CTACTAA": 2847, + "GACATTA": 2848, + "GGACGA": 2849, + "GAGCATG": 2850, + "GCATGGG": 2851, + "CCACTTA": 2852, + "CTATCAA": 2853, + "GCTGTTTT": 2854, + "GTCGTG": 2855, + "CCTGGCC": 2856, + "TCTCTGAA": 2857, + "TGTTGTA": 2858, + "CAGCCAGG": 2859, + "GTTTAGG": 2860, + "CCGCAA": 2861, + "GGAGTAA": 2862, + "CCAATTA": 2863, + "CAGCAAAA": 2864, + "TCATCCA": 2865, + "CACGTA": 2866, + "TCATAGA": 2867, + "TAATTAAAA": 2868, + "CACTTAA": 2869, + "TCTTTATT": 2870, + "GAGATTA": 2871, + "TAAGAGG": 2872, + "CAAATTAA": 2873, + "GACGCA": 2874, + "CACGGA": 2875, + "GTGTGCA": 2876, + "TCT": 2877, + "TATTATTA": 2878, + "GAAATATT": 2879, + "GGAGTTA": 2880, + "TCTTTGA": 2881, + "CTGATTTT": 2882, + "TGTGAATT": 2883, + "TCCCACC": 2884, + "CCCTTTG": 2885, + "CAAGGTG": 2886, + "CAGAGTT": 2887, + "CCCCATG": 2888, + "CTACCAA": 2889, + "CTCCAAAA": 2890, + "CTTCCCC": 2891, + "CTGCTAA": 2892, + "GATTAAAA": 2893, + "GCTTATG": 2894, + "CTACTTA": 2895, + "TAAAAAATT": 2896, + "TCAGTCC": 2897, + "CTATTAAA": 2898, + "GAATGGG": 2899, + "CACAGTA": 2900, + "CAACGG": 2901, + "GGTTATT": 2902, + "TCACCCA": 2903, + "TGATGCA": 2904, + "TAATTTTTT": 2905, + "GTTTGAGA": 2906, + "GTATTAAA": 2907, + "GCCCCCA": 2908, + "TATAGTA": 2909, + "TAGTAAA": 2910, + "TGATACA": 2911, + "GTGGTTTT": 2912, + "CCACTAA": 2913, + "CACAGAGA": 2914, + "CCTCTGCCTCC": 2915, + "CAAAAAAAA": 2916, + "CTCTCTCC": 2917, + "CATAATA": 2918, + "GAAGCCA": 2919, + "GTTCCCA": 2920, + "TGTGTTTG": 2921, + "CAATGGA": 2922, + "TGAAGTA": 2923, + "CTTCATA": 2924, + "CACTGTG": 2925, + "GCTCTTTT": 2926, + "TGACATA": 2927, + "TAAAGAAAA": 2928, + "GAGAAATG": 2929, + "CAGGGAGG": 2930, + "TGTTCAA": 2931, + "GAGCCAA": 2932, + "GACAGAGA": 2933, + "GGCTGAA": 2934, + "CAAATATA": 2935, + "GTGGAAAA": 2936, + "TAAGGTT": 2937, + "GTGATTA": 2938, + "GGATCTG": 2939, + "GATGTTA": 2940, + "GACTACACA": 2941, + "TCCTATA": 2942, + "CTGCCAA": 2943, + "TCCCGA": 2944, + "GTGATTTT": 2945, + "GCGTTTT": 2946, + "CAGAGTA": 2947, + "GAAAGGAA": 2948, + "CACTTTG": 2949, + "CCCCAAAA": 2950, + "GCAACCCA": 2951, + "TGCATTTT": 2952, + "TCTAGAA": 2953, + "TACTTTG": 2954, + "TGAGGCA": 2955, + "CATCTCC": 2956, + "TCGCTA": 2957, + "TGACTTTT": 2958, + "GAGCCTG": 2959, + "CATTTGTT": 2960, + "TCTTTGTT": 2961, + "GCAAAATT": 2962, + "CCTGATT": 2963, + "GATAAAAA": 2964, + "GAGTGTT": 2965, + "TCCTGTA": 2966, + "TACAGAAA": 2967, + "TCCAGGAA": 2968, + "GCCAGTG": 2969, + "TAGATTTT": 2970, + "TAATAGG": 2971, + "CTCCTCA": 2972, + "CATTTTTG": 2973, + "CATTTCAA": 2974, + "GCCATCA": 2975, + "TAAAATATA": 2976, + "GACTGTT": 2977, + "GCATGGA": 2978, + "CAAAGTT": 2979, + "CATGATT": 2980, + "GAGTTTG": 2981, + "CTAGCAA": 2982, + "CTTCCTA": 2983, + "GGGGAGG": 2984, + "CTATATG": 2985, + "TATTTATTTT": 2986, + "CACCATT": 2987, + "CCCTCAA": 2988, + "TTTTTTTTTTTTTT": 2989, + "GATCATT": 2990, + "GTACATA": 2991, + "CTCCATA": 2992, + "CCCCGTCTCTA": 2993, + "GCCTGCC": 2994, + "CTAGCTT": 2995, + "CCCGGA": 2996, + "GATGTTTT": 2997, + "GTATTTTA": 2998, + "TCAGATA": 2999, + "CCTGGAA": 3000, + "TATTCCA": 3001, + "GGACCAA": 3002, + "GCCATTA": 3003, + "CGACTGA": 3004, + "TAAGCTG": 3005, + "TAAACACA": 3006, + "GTTTCTC": 3007, + "CATCTTA": 3008, + "GAAATTTG": 3009, + "TAATGGG": 3010, + "TAAAATTTT": 3011, + "CTGTTCA": 3012, + "CCTGTTA": 3013, + "TACTGAA": 3014, + "TGACCCA": 3015, + "TGATTTTA": 3016, + "CTCCTTA": 3017, + "TATAGAA": 3018, + "CTGCGG": 3019, + "GCGGTA": 3020, + "GTGCTAA": 3021, + "CAGAGGAA": 3022, + "TACATCA": 3023, + "TCAATCAA": 3024, + "CTGCAGCC": 3025, + "TGAATATT": 3026, + "TCTACAA": 3027, + "CCACATA": 3028, + "CCCGTT": 3029, + "TATACACA": 3030, + "TCCTCTC": 3031, + "TCTACTT": 3032, + "CCGGAA": 3033, + "CTTTTTTA": 3034, + "GAAAGAAAA": 3035, + "CTATCTT": 3036, + "GACTTTG": 3037, + "TGAACAA": 3038, + "GCAGTTTT": 3039, + "GCTAAAAA": 3040, + "GAGGCGG": 3041, + "TAATAAAAA": 3042, + "CTGGTCA": 3043, + "CAGACAA": 3044, + "GGATATG": 3045, + "TGAAGG": 3046, + "GCCAGAA": 3047, + "CCAGGCC": 3048, + "CCACCATG": 3049, + "CAAACTT": 3050, + "TCATGTA": 3051, + "GCTGCTT": 3052, + "GTAATA": 3053, + "CCCCCAA": 3054, + "CAGCCTG": 3055, + "TCAACTT": 3056, + "TAAAATTAA": 3057, + "GCTGAAAA": 3058, + "CGACGA": 3059, + "GTGGGCA": 3060, + "TGAGGGA": 3061, + "CGCTCC": 3062, + "TTTTGTTTT": 3063, + "GAGTCAA": 3064, + "TCATGCA": 3065, + "CTGCTTA": 3066, + "TAAGTTTT": 3067, + "GTAGCAA": 3068, + "CCTTGG": 3069, + "TGACAAAA": 3070, + "CTGGTAA": 3071, + "TCTTTATA": 3072, + "TGTGTGTT": 3073, + "CTGGTC": 3074, + "CTGGCAA": 3075, + "CATTTCTG": 3076, + "CTCTACC": 3077, + "CTGAGGA": 3078, + "CTAAAATG": 3079, + "CTAGATT": 3080, + "GTATCAA": 3081, + "CAGTCAA": 3082, + "CTGGGTG": 3083, + "CCTCTTA": 3084, + "TGAGTTTT": 3085, + "TTTTATTTA": 3086, + "CCTTTTTT": 3087, + "TATATACA": 3088, + "TAGCAAA": 3089, + "AAATTA": 3090, + "CTGGATG": 3091, + "GATAATA": 3092, + "GACAAAAA": 3093, + "CCTGGGA": 3094, + "GCTTTCA": 3095, + "GTACAGG": 3096, + "GCTGGAA": 3097, + "CTACTCA": 3098, + "CAATGTA": 3099, + "GCGTGAA": 3100, + "GATCCTT": 3101, + "TATTAATG": 3102, + "GCCCGA": 3103, + "TAAAGTG": 3104, + "GCTTCCA": 3105, + "CATGGAA": 3106, + "TGAAGTT": 3107, + "CTTTCTC": 3108, + "TCTGTGTG": 3109, + "GTATGTA": 3110, + "CAATACA": 3111, + "TCAAGG": 3112, + "CCTCTAA": 3113, + "TGTGGG": 3114, + "GATCTGA": 3115, + "GTACTGA": 3116, + "TTAATTAA": 3117, + "GCAGAAAA": 3118, + "CTACATA": 3119, + "CCGGTG": 3120, + "GGGGAAAA": 3121, + "TACAAAAAA": 3122, + "TTTTGG": 3123, + "GTGAGAA": 3124, + "TCAATAAA": 3125, + "TCAAGTT": 3126, + "CTCAGGA": 3127, + "CTACTC": 3128, + "CAAATCA": 3129, + "GGCAGAA": 3130, + "CCCGAA": 3131, + "TGTTGTG": 3132, + "GAGCAAAA": 3133, + "TATTTGTG": 3134, + "GTAGGTT": 3135, + "CTACCTG": 3136, + "CACAAAAA": 3137, + "CTCAGG": 3138, + "GCTTTA": 3139, + "CAGAGCAA": 3140, + "CTCAGTG": 3141, + "GGAAGAGA": 3142, + "TAACCTG": 3143, + "GAAATATA": 3144, + "CGAGAA": 3145, + "GTGAGG": 3146, + "CATTTATA": 3147, + "GGCAGCA": 3148, + "TCTAAATT": 3149, + "CCCAGTG": 3150, + "GCCTAGG": 3151, + "TGCATTA": 3152, + "CCGTAA": 3153, + "CATTCCA": 3154, + "CTAGTTA": 3155, + "GACTTAA": 3156, + "CTATACA": 3157, + "GACACAA": 3158, + "TCTTCACA": 3159, + "CCGGTT": 3160, + "TAAAGTAA": 3161, + "CTGTGGA": 3162, + "TAAGGTG": 3163, + "TCCAGTA": 3164, + "CAAATTTA": 3165, + "AAATTAAAA": 3166, + "CCATCTA": 3167, + "CTCCCTT": 3168, + "CTCCTTTT": 3169, + "GAGAGAGAGAGA": 3170, + "GGAGATA": 3171, + "CCTATTA": 3172, + "CACCAAAA": 3173, + "CCGTTA": 3174, + "TGTTTATA": 3175, + "CTCAGGAGG": 3176, + "GACGTA": 3177, + "GTCCTTA": 3178, + "GAAAGTT": 3179, + "GCTGGTG": 3180, + "CTCTACA": 3181, + "CAATAGA": 3182, + "TAAAATATT": 3183, + "GTACCTG": 3184, + "GTACTAA": 3185, + "CTTTGAAA": 3186, + "CCTTTCC": 3187, + "TAAAAATTA": 3188, + "CTCGG": 3189, + "CAAGATA": 3190, + "CATTTGA": 3191, + "CACCTCA": 3192, + "GCCAGCC": 3193, + "GTCGG": 3194, + "GCACATA": 3195, + "CACTCAA": 3196, + "CTTTTAAAA": 3197, + "CAGGAATT": 3198, + "GCCTATT": 3199, + "TCTTTCTG": 3200, + "CTGAGGCAGGAGAA": 3201, + "CAGGCAGG": 3202, + "CTAGTAA": 3203, + "TCCATA": 3204, + "GAACTTA": 3205, + "CG": 3206, + "GCTGTGA": 3207, + "GAAAATA": 3208, + "TCTTCATT": 3209, + "GAGGGAGA": 3210, + "CCCATCC": 3211, + "GAGGTGGG": 3212, + "GCCTCTA": 3213, + "GTAGGTG": 3214, + "TAAACCA": 3215, + "GAAGGAAA": 3216, + "TATTGG": 3217, + "ATG": 3218, + "TCCAGTT": 3219, + "CCCACAA": 3220, + "GAAACACA": 3221, + "GTCTCAAAA": 3222, + "CTTTTCTTTT": 3223, + "TGAAGGA": 3224, + "TATTGATT": 3225, + "CTATGTA": 3226, + "AAAAAAAAAAAAAA": 3227, + "TCCTTAAA": 3228, + "GCGCTA": 3229, + "TCCACTT": 3230, + "GACTCAA": 3231, + "TAAATACA": 3232, + "TCATGGA": 3233, + "TCTGGGA": 3234, + "TCCTATG": 3235, + "CTGTGCA": 3236, + "TCAAGTGA": 3237, + "TCATAAAA": 3238, + "CATCCAA": 3239, + "CCTTCCA": 3240, + "CTGTACA": 3241, + "GAAGGTT": 3242, + "CTGTGTA": 3243, + "GTCACTT": 3244, + "TCACAAAA": 3245, + "TCAGGCA": 3246, + "GTGTTAAA": 3247, + "CCCTTAA": 3248, + "CAAAGTG": 3249, + "GAAATGTT": 3250, + "CTGGGGA": 3251, + "GACGCC": 3252, + "TATATGTG": 3253, + "CTAGATG": 3254, + "GAAATTAAA": 3255, + "GAATGCA": 3256, + "GCACTAA": 3257, + "CGGGAGG": 3258, + "GCCACAA": 3259, + "CGCTTA": 3260, + "TCCACAA": 3261, + "CAGATA": 3262, + "TCTGAATT": 3263, + "TATTATTTT": 3264, + "GCGCGG": 3265, + "CTCTGAAA": 3266, + "TCTCTTTG": 3267, + "TATTTCTA": 3268, + "GGGGTGGG": 3269, + "GGATGCA": 3270, + "CCACACC": 3271, + "TAAATGTG": 3272, + "TCTTCCTG": 3273, + "GCAAGG": 3274, + "CTGCTCC": 3275, + "CTGGAGTG": 3276, + "CTGTTAAA": 3277, + "CACACAAA": 3278, + "CTGACTT": 3279, + "GAAAAGAAAA": 3280, + "CCTTCTCC": 3281, + "GAAATAAAA": 3282, + "CCTCAGGTGA": 3283, + "GATAATG": 3284, + "GAATTGCTT": 3285, + "CCAAAATT": 3286, + "CGTGAAA": 3287, + "CACTGAAA": 3288, + "CAGTGAAA": 3289, + "GATCTTA": 3290, + "GAGATGGG": 3291, + "TCTGCCA": 3292, + "TGAGGTA": 3293, + "TATGGAA": 3294, + "TATATTTTA": 3295, + "TGAACTT": 3296, + "GCAGATA": 3297, + "CTTTTCTT": 3298, + "GTAAAATG": 3299, + "TCTCTAA": 3300, + "TCTGCAAA": 3301, + "GAGCCTT": 3302, + "TATCATT": 3303, + "CAATTTTA": 3304, + "CCGCCA": 3305, + "TATTTAAAA": 3306, + "GAGAGATG": 3307, + "GAGATGGA": 3308, + "GCCAGGATG": 3309, + "CGAGTAGCTG": 3310, + "TTCATTTT": 3311, + "TATACTT": 3312, + "GTCTACA": 3313, + "GTGAGTGA": 3314, + "GCTACACA": 3315, + "GGGAGGA": 3316, + "CAAGGCA": 3317, + "GCTTTTAA": 3318, + "CACTATT": 3319, + "GTTCATA": 3320, + "TCCTC": 3321, + "GTGGACA": 3322, + "TATTTGGA": 3323, + "CTCCAGTA": 3324, + "GTTCAGTT": 3325, + "CCAAGG": 3326, + "CAGAGCC": 3327, + "CTCGCC": 3328, + "CCGATG": 3329, + "GGAATTTT": 3330, + "TCCAGCC": 3331, + "CCTCTTTT": 3332, + "GAACCTT": 3333, + "CATGCACA": 3334, + "GTTTC": 3335, + "GAAGATA": 3336, + "TACCCC": 3337, + "GCTGCCA": 3338, + "GGGGGAGG": 3339, + "GCAGTGAGCTGA": 3340, + "CTGTCTA": 3341, + "CGAGGA": 3342, + "CAATGGG": 3343, + "GCTGTGAA": 3344, + "GAAAGTG": 3345, + "TACCAAAA": 3346, + "GTCAGG": 3347, + "CAGCTCC": 3348, + "TGTGCTT": 3349, + "GTCTAGG": 3350, + "TTTTTGTA": 3351, + "TTATATG": 3352, + "TCAGGGG": 3353, + "TATTGTTA": 3354, + "CCTGAGA": 3355, + "TATCTCA": 3356, + "CAATCTG": 3357, + "CACTCTG": 3358, + "GATTTAA": 3359, + "TGAATAA": 3360, + "TCTTGTA": 3361, + "TCAACTG": 3362, + "TCTCCAGG": 3363, + "CTAGAGG": 3364, + "CTGAGAAA": 3365, + "CTAGCTG": 3366, + "TCCACCA": 3367, + "CGATTTT": 3368, + "CCGGCC": 3369, + "GTTGACA": 3370, + "CTTAGAA": 3371, + "CATAATG": 3372, + "GAGTATT": 3373, + "CACAGAAA": 3374, + "GACTGTG": 3375, + "CTATTTTA": 3376, + "TGAGGAAA": 3377, + "TTATTAAAA": 3378, + "CTTATTTA": 3379, + "CAGACTT": 3380, + "CACGCC": 3381, + "GCTTGG": 3382, + "CCTGCTT": 3383, + "TAAAGCAA": 3384, + "CCTCGTGA": 3385, + "TAGAATT": 3386, + "CTTACAA": 3387, + "TAAAGGAA": 3388, + "GTCTAGA": 3389, + "GTGACTT": 3390, + "TACATATG": 3391, + "GTCAGGA": 3392, + "GCTCCAGG": 3393, + "GAAGGGA": 3394, + "CATGATG": 3395, + "TCATCAAA": 3396, + "CGTTAAA": 3397, + "GTACTCA": 3398, + "CTCCCAA": 3399, + "TATATGTA": 3400, + "GGTATTTT": 3401, + "TAAGCCA": 3402, + "CGAAATT": 3403, + "GTTTGTTTT": 3404, + "TCTGTCTT": 3405, + "TATATCA": 3406, + "TGTTCATT": 3407, + "CAAACCA": 3408, + "TTCATTA": 3409, + "TATTTGTA": 3410, + "GATTGAA": 3411, + "CTATAAAA": 3412, + "GATTAATT": 3413, + "CCCACCA": 3414, + "TCCTAGG": 3415, + "TAAATGTA": 3416, + "CTCTTAAA": 3417, + "GCAGTCC": 3418, + "GCGGCTG": 3419, + "GTCTCGAA": 3420, + "TGAATGA": 3421, + "CTGGGGG": 3422, + "GTCTCGA": 3423, + "GAACAAAA": 3424, + "TGAATCA": 3425, + "TGTATTTTTAGTAGAGA": 3426, + "GTTATTAA": 3427, + "TTTTTTAAAA": 3428, + "GTCAGTG": 3429, + "CCCATTA": 3430, + "CACAGGA": 3431, + "TATTCCTT": 3432, + "TCTGCCTT": 3433, + "CCTGGTG": 3434, + "GCGAGC": 3435, + "TACTAAA": 3436, + "TACACAAA": 3437, + "CCGTCC": 3438, + "GCTTTGTT": 3439, + "GCATCCA": 3440, + "CATCTAA": 3441, + "GCTGTGTT": 3442, + "GTAGACA": 3443, + "GCCTATG": 3444, + "TCTTTGTG": 3445, + "GATTCTG": 3446, + "CGCCCGG": 3447, + "GATGAGA": 3448, + "TATCTGA": 3449, + "TGAATTTG": 3450, + "CCTGATG": 3451, + "TAAAACAA": 3452, + "CTTTAGG": 3453, + "TTTTCCTT": 3454, + "TGAATAAA": 3455, + "CGGGGA": 3456, + "CAAACATT": 3457, + "GTATGGA": 3458, + "GCTTAAAA": 3459, + "TACCAAA": 3460, + "CAAAGAGA": 3461, + "CTCCTGCC": 3462, + "GTAAAAAAA": 3463, + "CACAGCC": 3464, + "CCATGCA": 3465, + "TACAATT": 3466, + "CTAGTGA": 3467, + "CTGAGTT": 3468, + "GAGTGAAA": 3469, + "TCTGTTTG": 3470, + "CTGTAGG": 3471, + "TATAAAAAA": 3472, + "GCATTAAA": 3473, + "GTCCATA": 3474, + "TGTTAAAAA": 3475, + "TGTTTGA": 3476, + "GAATAGA": 3477, + "CTTCAAAA": 3478, + "CTGGACA": 3479, + "CTGTAGA": 3480, + "CCATTAAA": 3481, + "CTATCTG": 3482, + "CACTATG": 3483, + "TTATCAA": 3484, + "TAAGTAAA": 3485, + "TAATCCCAGCACTTTGGGAGGCC": 3486, + "CCAGAAAA": 3487, + "TGAAGCA": 3488, + "TCCCTTTT": 3489, + "TCATACA": 3490, + "TACGTT": 3491, + "GCCGTG": 3492, + "GGAAGTG": 3493, + "GGCCAAA": 3494, + "GTACCAA": 3495, + "TCTCTACTAAAAATA": 3496, + "CATTGTG": 3497, + "TGTGTGA": 3498, + "GAAACAGA": 3499, + "CTTGACA": 3500, + "GATGAGG": 3501, + "GAGATTTT": 3502, + "CCTTCAA": 3503, + "GAATCTA": 3504, + "CTCTCCTT": 3505, + "GGCGGA": 3506, + "TCTATCTATCTATCTA": 3507, + "CACACAGA": 3508, + "TGTGTGTA": 3509, + "CAAAGCC": 3510, + "TGTGCCA": 3511, + "GTTGAAAA": 3512, + "CTCCAGCA": 3513, + "TCAAGGA": 3514, + "TAGCTCA": 3515, + "CGCTGA": 3516, + "CCTGAAAA": 3517, + "GACTATT": 3518, + "GATTCCA": 3519, + "GCTTCTA": 3520, + "GTCTGCC": 3521, + "CTTGGCA": 3522, + "TGTGGTA": 3523, + "GCTTTGA": 3524, + "GCTCTCTG": 3525, + "CTCACAGA": 3526, + "TCTTTAAA": 3527, + "CAAAGCAA": 3528, + "TACTTAA": 3529, + "GCTTCAA": 3530, + "CATTGAA": 3531, + "GGAGGAAA": 3532, + "CTATAGA": 3533, + "CTGAGGAA": 3534, + "CCTGGCA": 3535, + "CCCTATT": 3536, + "CTCGTG": 3537, + "TTACACA": 3538, + "TTAGGAA": 3539, + "CTGGTTA": 3540, + "GTTGTCC": 3541, + "TAATGAAAA": 3542, + "TATTTACA": 3543, + "GGGAATT": 3544, + "GTAGTTTT": 3545, + "GCTGCAA": 3546, + "CTACGG": 3547, + "GCCGGA": 3548, + "CTGGGCA": 3549, + "CCTTAAAA": 3550, + "GATGGAA": 3551, + "TAGATAGATAGATAGA": 3552, + "TATGTAA": 3553, + "GTACGG": 3554, + "TATTCAAA": 3555, + "GATCTCC": 3556, + "CCTGTTTT": 3557, + "TATTGCA": 3558, + "GGAAGGAAGGAAGGAA": 3559, + "GGTAATT": 3560, + "TTACAGA": 3561, + "TCAGC": 3562, + "GCAAAATG": 3563, + "GAGAGCA": 3564, + "GTAGAAAA": 3565, + "CATTTGAA": 3566, + "TCTTCTTTT": 3567, + "TCCCATA": 3568, + "GTTATTTA": 3569, + "CTATCTA": 3570, + "CATCCTG": 3571, + "TCTTGTG": 3572, + "TTATTATT": 3573, + "CCCGTC": 3574, + "TACTATG": 3575, + "TAAACATA": 3576, + "TAAGGAAA": 3577, + "GCTTGTG": 3578, + "CTCTAAAA": 3579, + "GTTTTAAAA": 3580, + "GACAGGA": 3581, + "TCCTAGA": 3582, + "TCCACCCA": 3583, + "GTTTGAAA": 3584, + "CCATCTCA": 3585, + "CTAAGAA": 3586, + "GTATCTA": 3587, + "GTGAGGA": 3588, + "GCTGGAGG": 3589, + "CCTGTAATCCCAGCTA": 3590, + "GCAACAA": 3591, + "CTTTCAAA": 3592, + "CAAATGTT": 3593, + "CTTGTCC": 3594, + "TCTCAAAAA": 3595, + "TATTTATTA": 3596, + "TAAGGCA": 3597, + "GAGAGGAA": 3598, + "TATGATT": 3599, + "GCATCTA": 3600, + "CGTTATT": 3601, + "GCCTGTA": 3602, + "GTTTCAAA": 3603, + "CCTTCCTTCCTTCCTT": 3604, + "GGCTTTG": 3605, + "GTCAGAA": 3606, + "CATGCATG": 3607, + "GTCATTTA": 3608, + "CTGGAAAA": 3609, + "CTTCGA": 3610, + "CCTATTTT": 3611, + "CCAACAA": 3612, + "TCCATCC": 3613, + "TAAAGTTA": 3614, + "GTCTCTC": 3615, + "TAATCAAA": 3616, + "GATTTTTG": 3617, + "GATTTCTT": 3618, + "GGGCTGA": 3619, + "GCATGTA": 3620, + "CCTGGGTT": 3621, + "GAGACAA": 3622, + "GCTGTCA": 3623, + "TGATAGG": 3624, + "GGAGACC": 3625, + "CCGGCA": 3626, + "TAATCTCA": 3627, + "TGAATTAA": 3628, + "TCTGGTG": 3629, + "GCCTC": 3630, + "GGCGCA": 3631, + "CCAGCTA": 3632, + "CAGTCTG": 3633, + "TGAACTA": 3634, + "GTAAGAA": 3635, + "CCTTTCA": 3636, + "TCCATGA": 3637, + "CAAAGGAA": 3638, + "CTCTC": 3639, + "CTCTCTCA": 3640, + "CTCCAGC": 3641, + "GTAGATA": 3642, + "CCCCCTCC": 3643, + "GGCGCC": 3644, + "TCTGTCC": 3645, + "GACCATT": 3646, + "CTTGAAAA": 3647, + "TTATCC": 3648, + "TACATGTG": 3649, + "CAAATTTG": 3650, + "TTTTGTG": 3651, + "CAGAGTG": 3652, + "GTAATAA": 3653, + "GTGAGTG": 3654, + "TTTTTCC": 3655, + "GGCTCTG": 3656, + "GCCCTAA": 3657, + "GGCTGTT": 3658, + "CCCAATT": 3659, + "CAGAGCTT": 3660, + "TATAAATG": 3661, + "GAGTCTG": 3662, + "TCTTAAAAA": 3663, + "GTTTTATG": 3664, + "GATCCAA": 3665, + "GGCCCTG": 3666, + "GATCCTG": 3667, + "TCAAGTG": 3668, + "GATTCAA": 3669, + "CCTCTCTT": 3670, + "GAGACGG": 3671, + "CAGATCA": 3672, + "TAAAAGAA": 3673, + "CTGAGCAA": 3674, + "CCTGCCA": 3675, + "CCTTCTA": 3676, + "CGCTCA": 3677, + "GGCTGTG": 3678, + "TGGGAAAA": 3679, + "GGAGCCTG": 3680, + "CTGAGTG": 3681, + "CGTCAAA": 3682, + "TCAAGTA": 3683, + "CGTAATT": 3684, + "TTACTTA": 3685, + "TATACTA": 3686, + "GGGCAAA": 3687, + "CAACTTTT": 3688, + "CTTTGCC": 3689, + "GCCAGGAA": 3690, + "CACACTA": 3691, + "GCCCAGC": 3692, + "TAAATAAATAAATAAA": 3693, + "CTTTCCTT": 3694, + "GGGAGAA": 3695, + "TATGGTA": 3696, + "CGGCCA": 3697, + "CCTCTCTG": 3698, + "GAAAGCAA": 3699, + "CAAGCCA": 3700, + "GGCGTT": 3701, + "CTCTTTTA": 3702, + "TCGGCCTCCCAAA": 3703, + "GATTTATT": 3704, + "CAAGTCC": 3705, + "TATCTTA": 3706, + "GTTCAAGACCA": 3707, + "CTCACACA": 3708, + "GAAATCAA": 3709, + "TGAGACC": 3710, + "GGGTAAA": 3711, + "GCTTGTT": 3712, + "GATTTTAA": 3713, + "TTTTTATA": 3714, + "CAGAGCTG": 3715, + "TCTGTTAA": 3716, + "GTAATTAA": 3717, + "TCTTTGAA": 3718, + "CTTGCCA": 3719, + "TTTTCATT": 3720, + "CCATGTA": 3721, + "TCTCGGCTCACTGCAA": 3722, + "GGATTCA": 3723, + "TCTATTAA": 3724, + "TACATAAA": 3725, + "GATTGATT": 3726, + "GGAGAGGA": 3727, + "CGCAAAA": 3728, + "GGACTAA": 3729, + "TTATGTG": 3730, + "GTCACTCA": 3731, + "GACAGCA": 3732, + "CGAGTT": 3733, + "GATGGTT": 3734, + "GGAAGAGG": 3735, + "GCCAACATGGTGAAA": 3736, + "GGAGCCA": 3737, + "TGAACTG": 3738, + "CCTCTGTG": 3739, + "GTATAAAA": 3740, + "TCCCAGAA": 3741, + "CATTTATG": 3742, + "GATTATG": 3743, + "TGTTTCTG": 3744, + "GAGTGGGTT": 3745, + "TACATATT": 3746, + "CTCCAGGA": 3747, + "GACACTG": 3748, + "GGTCTCA": 3749, + "CCGGGA": 3750, + "TGTTTAAA": 3751, + "CTCACCA": 3752, + "GGACTTA": 3753, + "GCCCACC": 3754, + "CAAATCAA": 3755, + "GAAATGTG": 3756, + "TAGTTAA": 3757, + "TCTATAA": 3758, + "TTAGATT": 3759, + "GTGTAGG": 3760, + "TACTGAAA": 3761, + "GCACCCA": 3762, + "GTGGGCTG": 3763, + "GAATGAAA": 3764, + "TCTAGTT": 3765, + "TCAGGAGA": 3766, + "TCCACTA": 3767, + "CTCAGTT": 3768, + "TACTTAAA": 3769, + "GACTCCA": 3770, + "TCCATTTG": 3771, + "CACAGCAA": 3772, + "GCTCATGCCTG": 3773, + "GGTGCTG": 3774, + "GCTTTCTT": 3775, + "GTGGCCA": 3776, + "TACGTG": 3777, + "GTGCAGTG": 3778, + "TGAAGTCA": 3779, + "CCTTTAA": 3780, + "TCTCAGCTCACTGCAA": 3781, + "GAAATATG": 3782, + "CCTCAAAA": 3783, + "GGGGCGG": 3784, + "CGACAA": 3785, + "GGTGATG": 3786, + "GTCTTAAA": 3787, + "CAGAAATG": 3788, + "CGTCATT": 3789, + "CCAAGCA": 3790, + "GGATCAA": 3791, + "GTGCTGGGATTA": 3792, + "GCTGGCC": 3793, + "CGGAGCTT": 3794, + "TACATGA": 3795, + "TGTTTGAA": 3796, + "TCTCCATT": 3797, + "TAAGCAAA": 3798, + "CCTTTCTT": 3799, + "TACTGTT": 3800, + "TCCATCTT": 3801, + "CTTACTT": 3802, + "CGGAGGTT": 3803, + "CAAAACAA": 3804, + "TCATAGG": 3805, + "TTACTAA": 3806, + "CTTATTTG": 3807, + "GAATGTA": 3808, + "CCCCATGGA": 3809, + "TTACTGA": 3810, + "CGGAAAA": 3811, + "CTCCAGTG": 3812, + "TGTTCCA": 3813, + "CAGATGAA": 3814, + "GTTGATA": 3815, + "TCCCCCC": 3816, + "CATTGCA": 3817, + "CTCAGCC": 3818, + "CTTACTG": 3819, + "TATCCTT": 3820, + "CTTTTATG": 3821, + "TGAGTAGCTG": 3822, + "GACTGAAA": 3823, + "CAATGAAA": 3824, + "CGACTG": 3825, + "CTTGGGA": 3826, + "GCAAGCA": 3827, + "TCACTCC": 3828, + "GATTTGA": 3829, + "CATTTTAAA": 3830, + "TCAACTA": 3831, + "GTCCAAAA": 3832, + "CACCCTG": 3833, + "TTACCTT": 3834, + "CAAGGGG": 3835, + "TTTTGGA": 3836, + "GTTATTTG": 3837, + "GCTACTG": 3838, + "CTGAGGCAGGAGAATG": 3839, + "GTGATGA": 3840, + "GTAGTC": 3841, + "TAGTATG": 3842, + "GTATAGA": 3843, + "GTGTCTA": 3844, + "GCTGCTA": 3845, + "TTAGTAA": 3846, + "TAAACATG": 3847, + "GTCACCA": 3848, + "CATCTTTT": 3849, + "CATATAA": 3850, + "TCTCTCTA": 3851, + "TTTTATTAA": 3852, + "TATTCTAA": 3853, + "GAAATTTA": 3854, + "CTTCCCTG": 3855, + "TAAAGATG": 3856, + "TACGTA": 3857, + "GTTTATTA": 3858, + "GAAAAGAA": 3859, + "CCCACCCA": 3860, + "CAATTAAAA": 3861, + "CCGACA": 3862, + "CAAAGTGA": 3863, + "CAAACAAAA": 3864, + "GCAATTTT": 3865, + "CGATTAA": 3866, + "TTAGAGA": 3867, + "CTGATGA": 3868, + "GGAGGAGG": 3869, + "GTCCTGGG": 3870, + "TCATGAAA": 3871, + "GCAACCA": 3872, + "GTTGGCA": 3873, + "GCGGCGG": 3874, + "GTCCCCA": 3875, + "GTAGGGG": 3876, + "GCCATGTT": 3877, + "GTTCGAGA": 3878, + "GCCTATA": 3879, + "TAAATTCA": 3880, + "GGCCATT": 3881, + "GAAAACAA": 3882, + "TGTGTATG": 3883, + "GTACTC": 3884, + "TAGGGAA": 3885, + "CCTTGAA": 3886, + "TCTATTTG": 3887, + "GAGGGCA": 3888, + "GAAACTGA": 3889, + "TACGC": 3890, + "TACAAAAA": 3891, + "TCATTATT": 3892, + "GGAAAATT": 3893, + "TCAATATT": 3894, + "CCCGTA": 3895, + "GGAGAGAA": 3896, + "TTAGTTA": 3897, + "CTCAGAGA": 3898, + "TCGAGC": 3899, + "CTAGTCA": 3900, + "GATGGCA": 3901, + "TGAACATT": 3902, + "CTATGGG": 3903, + "CACACCA": 3904, + "TCAATTAA": 3905, + "GGAACTG": 3906, + "TTACATG": 3907, + "CTTTCATT": 3908, + "CAGCTCTG": 3909, + "TCTTTTTTTT": 3910, + "TAAATCTT": 3911, + "TGATCTA": 3912, + "CATACAA": 3913, + "GCTCAAAA": 3914, + "GCTGTGTG": 3915, + "TCAATCA": 3916, + "GATTTGAA": 3917, + "CCAAGGA": 3918, + "GTCCTCA": 3919, + "GTGCTCC": 3920, + "AAAATAA": 3921, + "GTGACAA": 3922, + "GCTCACGCCTG": 3923, + "CGACGG": 3924, + "TATCCAA": 3925, + "CACACATG": 3926, + "TCTCTCTCC": 3927, + "TGTGGTT": 3928, + "CTTGGTA": 3929, + "TCTGGTT": 3930, + "TTTATAA": 3931, + "CTGCTTTT": 3932, + "TGTGTCA": 3933, + "CACATCA": 3934, + "CCTAATG": 3935, + "CGTTTTTT": 3936, + "GCTGGCA": 3937, + "GACGTC": 3938, + "TATAATTA": 3939, + "TACAGTAA": 3940, + "GAAAGTAA": 3941, + "GTCTGAAA": 3942, + "CCCATTTT": 3943, + "TATATGA": 3944, + "CTTGATA": 3945, + "CTTTATTTT": 3946, + "CTTTATTA": 3947, + "GGCGAA": 3948, + "CCATGCC": 3949, + "CCTGCCTT": 3950, + "GAAGAAGAAGAA": 3951, + "CTGACTGA": 3952, + "GCCCTTA": 3953, + "TATCTAA": 3954, + "GTGTTTTA": 3955, + "TGTGGCA": 3956, + "TATTGTAA": 3957, + "GCCAGAAA": 3958, + "CCCTGTCTC": 3959, + "CACAGGAA": 3960, + "AAAACAA": 3961, + "AAAAAAAAAAAAAAA": 3962, + "TAACTCC": 3963, + "GCCTAAA": 3964, + "CGAGTA": 3965, + "TAGTATT": 3966, + "GTATTTTTAGTAGAGA": 3967, + "GCTGCAGG": 3968, + "TATTGAAA": 3969, + "CCAGCCTGGG": 3970, + "GCTCCAAA": 3971, + "TACGAA": 3972, + "GGCCTCC": 3973, + "TATACAAA": 3974, + "CATGGCA": 3975, + "CATGCAA": 3976, + "TACACCA": 3977, + "CTTTACCA": 3978, + "TACAGAGA": 3979, + "TATTCTTA": 3980, + "TATGTCA": 3981, + "TCAAGCA": 3982, + "TCAATGA": 3983, + "GGCTCTT": 3984, + "GGAAGTT": 3985, + "TCCATGTT": 3986, + "GCTTTCC": 3987, + "TATGTGA": 3988, + "GTGTAGA": 3989, + "TTTTTAAAA": 3990, + "GCTGGAGA": 3991, + "GTGAGAGA": 3992, + "CCTAGAA": 3993, + "CCTCCAAA": 3994, + "CCAATGA": 3995, + "CAGGGCA": 3996, + "CTATGCA": 3997, + "CTTCACC": 3998, + "CTACAAAA": 3999, + "CTCACC": 4000, + "GAGTATG": 4001, + "TAGAAAAA": 4002, + "CTTTTGAA": 4003, + "TAAAGAGA": 4004, + "CATGTCA": 4005, + "TCTTTTAAA": 4006, + "CACAGTGA": 4007, + "GATCTAA": 4008, + "TAAGGTA": 4009, + "CATAGAA": 4010, + "CGCGCC": 4011, + "CAGCTTA": 4012, + "TATAGTT": 4013, + "CGGGCC": 4014, + "TATCCATT": 4015, + "TGTTTGTTTT": 4016, + "GCTGGCTG": 4017, + "TACAGGA": 4018, + "CTCCTTTG": 4019, + "CAATCTA": 4020, + "CCCCCTG": 4021, + "TATACTG": 4022, + "CTGAGCC": 4023, + "CGGTTA": 4024, + "TGAAGTG": 4025, + "GCTTCCTT": 4026, + "TTTTATTTG": 4027, + "TAGTGAA": 4028, + "CTGAGGTG": 4029, + "TCTTCTC": 4030, + "GACAGAAA": 4031, + "CTGAACTGAA": 4032, + "CCTGGGAA": 4033, + "TCCCCAAA": 4034, + "TATGTATT": 4035, + "GATTTCTG": 4036, + "CATTCAAA": 4037, + "CACAGTT": 4038, + "GCTTGAA": 4039, + "GTGGATCA": 4040, + "CTGAGTGA": 4041, + "TGAATTTA": 4042, + "TCAACAAA": 4043, + "GGTCATT": 4044, + "GTAATTTA": 4045, + "GCGACTT": 4046, + "CTGAGAGA": 4047, + "GTGCCCA": 4048, + "CTAGGTT": 4049, + "TCCTGAAA": 4050, + "GTCCACC": 4051, + "TCACAGAA": 4052, + "GCGAAAA": 4053, + "GTATGGG": 4054, + "TGAACAAA": 4055, + "TAAACAAAA": 4056, + "CCGTTTT": 4057, + "TCTCAATT": 4058, + "TCCAGAAA": 4059, + "GTAACAA": 4060, + "GCATTTTA": 4061, + "TCTCCATG": 4062, + "TTATAAAA": 4063, + "CAGGCAA": 4064, + "CTAAAAAAA": 4065, + "GTTGGGA": 4066, + "TAAAGATT": 4067, + "TGAAGAGA": 4068, + "CCCCTCA": 4069, + "TGTTTATG": 4070, + "TCTACTG": 4071, + "CCAATTTT": 4072, + "GGTGGTG": 4073, + "GGAACAA": 4074, + "TGTGGGA": 4075, + "TCTGCTA": 4076, + "GAACGA": 4077, + "GTAAGTA": 4078, + "GTTGCCA": 4079, + "AAAATTTT": 4080, + "GCGCGA": 4081, + "GAAAGATG": 4082, + "GTCTCTCA": 4083, + "TCCATCAA": 4084, + "GCAGCTA": 4085, + "CACATTTG": 4086, + "CTGACAA": 4087, + "TCCACC": 4088, + "GCT": 4089, + "CCCACTT": 4090, + "GCAGGTA": 4091, + "GAGGCCA": 4092, + "TAAAGTCA": 4093, + "CTGGATA": 4094, + "CGGCAA": 4095 + }, + "merges": [ + "A A", + "T T", + "T G", + "C A", + "C C", + "T A", + "G G", + "T C", + "G A", + "AA A", + "G C", + "T AA", + "TT TT", + "T CA", + "TG A", + "TT A", + "G AA", + "T CC", + "C AA", + "C TG", + "C TT", + "G TG", + "G TT", + "G CA", + "GG A", + "C CA", + "G TA", + "G CC", + "C TA", + "T AAA", + "AA AA", + "C TC", + "G TC", + "TG TG", + "TA TT", + "CA CA", + "G AAA", + "TA TA", + "TC TT", + "TG TT", + "C AAA", + "GA GA", + "CA TT", + "TG AA", + "CA GG", + "TC TG", + "CA GA", + "TC AA", + "GG AA", + "TAA AA", + "C TGA", + "GC TT", + "G TGA", + "GC TG", + "C TCA", + "CC TT", + "CA TG", + "GC AA", + "G TCA", + "G TAA", + "TTTT A", + "TA TG", + "GA GG", + "C GG", + "GA TT", + "CC TG", + "TC TC", + "CC AA", + "G TTA", + "C TCC", + "C TAA", + "TA CA", + "C TTA", + "TC CA", + "GA TG", + "TT AA", + "GAA AA", + "TT TG", + "G TTTT", + "TC TA", + "GC CA", + "G TCC", + "C TTTT", + "GG GG", + "C GA", + "TT TA", + "CC CA", + "CAA AA", + "TG GG", + "TA GA", + "TA GG", + "GA CA", + "GG TT", + "CC CC", + "GG TG", + "CA TA", + "GC TA", + "TG TA", + "TC AAA", + "TG GA", + "TAA TT", + "TTA TT", + "TG CA", + "GG CA", + "GA TA", + "CC TA", + "TT CA", + "TC TCA", + "GG GA", + "C GC", + "CTG AA", + "G TAAA", + "TC TCC", + "TTTT TT", + "C GTG", + "GC AAA", + "TAA AAA", + "TC TGA", + "TCA TT", + "GG AAA", + "TG AAA", + "TCC TT", + "CC AAA", + "GAA TT", + "C TAAA", + "C GTT", + "GTG AA", + "GG CC", + "TAA TA", + "GG TA", + "TG CC", + "CA CC", + "TGA TT", + "AAAA AA", + "GC TCA", + "TCC AA", + "GA GAA", + "CTG TT", + "TA TTA", + "CA GCA", + "CTC TT", + "CTT AA", + "CA GAA", + "GC TGA", + "GTT AA", + "TC TTA", + "TA TTTT", + "GCC AA", + "CTT TG", + "GA CC", + "C GCA", + "GTA TT", + "GTC TT", + "CAA TT", + "GTG TT", + "CTC AA", + "GGA GG", + "C GAA", + "TC TTTT", + "GTC AA", + "C GCC", + "TA TAA", + "TA CC", + "TC TAA", + "CCA TT", + "C GGA", + "CAA AAA", + "CA GTG", + "TCC TG", + "CTC TG", + "GAA AAA", + "CTG TG", + "CA GC", + "TTTT AA", + "GCA TT", + "GCC TT", + "TAA TG", + "CTA TT", + "GTT TG", + "TGA TG", + "GG CTG", + "CC TCA", + "GA GGA", + "GCC TG", + "AAA TT", + "C GTA", + "TC AAAA", + "TA CAA", + "CA TCA", + "CA GTT", + "TGA GA", + "GG GAA", + "CA CTG", + "CA CAA", + "CA GGA", + "CC CCA", + "CC CTG", + "TTTT TTTT", + "TA GAA", + "GA GCA", + "CC TCC", + "CA CCA", + "TA TCA", + "GA GC", + "CA TTA", + "CACA CACA", + "GA GTG", + "GGA TT", + "TGTG TGTG", + "TA CTT", + "CA CTT", + "GTC TG", + "TGA GG", + "GA GTT", + "GAA TG", + "TCA TG", + "GA CAA", + "GA CTT", + "TATT AA", + "TAA TAA", + "GG CCA", + "CA TTTT", + "CA GCC", + "CC CTT", + "GC TAA", + "TATA TATA", + "GTG TG", + "TA CTG", + "TA GTT", + "CAA TG", + "GC TC", + "CA GTA", + "GC TCC", + "CA TAA", + "TTA TG", + "TAAA TT", + "GA TGA", + "CA TGA", + "GC GG", + "AAAA AAAA", + "CCA TG", + "GA TAA", + "GA CTG", + "TA TGA", + "GCA GG", + "GA TCA", + "G TTTTA", + "GGA TG", + "CC TGA", + "G TAAAA", + "GAA GG", + "GA TTA", + "CC TC", + "GA CCA", + "GC TTA", + "CC CAA", + "AAA TG", + "GCA TG", + "TA GTA", + "TA CCA", + "GG CTT", + "C GTC", + "TC TCTT", + "GG TCA", + "TTA TTA", + "TA CTA", + "TA GCA", + "TA TC", + "CTG GG", + "CA TC", + "C TTTTA", + "C TAAAA", + "GTG GG", + "GA GTA", + "CCA GG", + "GA TTTT", + "TA GTG", + "GAAA TT", + "CA CTA", + "TC GG", + "TCA GG", + "CAGG AA", + "GC AAAA", + "CC TTA", + "CA TCC", + "CTT GG", + "TGTG AA", + "TATT TG", + "CC TAA", + "CTA TG", + "GA GAAA", + "GAGA GAGA", + "GC TTTT", + "TA TAAA", + "CAA GG", + "TC TCTG", + "TGTT AA", + "TGTG TT", + "GA GCC", + "GA CTA", + "TA TATT", + "TAA AAAA", + "TTTT TG", + "GTA TG", + "CATT AA", + "TA GGA", + "TA GC", + "GTT GG", + "GAA GAA", + "TAAA TG", + "TC TGTT", + "CA GAAA", + "CAAA TT", + "TAA TTA", + "TC TGTG", + "TA TCC", + "TGAA TT", + "CTC CA", + "GTG AAA", + "GG CAA", + "GGA GA", + "GAA GA", + "GG TGA", + "GG GCA", + "CC AAAA", + "TCTC TCTC", + "CTG CA", + "CTT CTT", + "TCTT AA", + "CC CTA", + "TGTG TG", + "AAA TA", + "TGTT TG", + "GG GTT", + "GTG CTG", + "GG AAAA", + "GG GGA", + "TCA GA", + "CC TTTT", + "GAAA TG", + "GCA GCA", + "TC TGAA", + "GG GTG", + "CACA TT", + "TCTT TG", + "GG GC", + "TCC CA", + "TC CATT", + "CTG AAA", + "CTT TA", + "TC GA", + "GTT TA", + "CAA CAA", + "CTT CC", + "GCC TCC", + "TT AAA", + "GC TCTG", + "GTT TCA", + "GGA GGA", + "C GTGA", + "CA GTC", + "GAA TA", + "CA GAGA", + "CC CTC", + "CAAA TG", + "CTG CTG", + "GA TCC", + "TTTTA TT", + "AAAA TT", + "TTA TA", + "TCAA TT", + "GG TAA", + "GTTA TT", + "GC CAGG", + "GGA GAA", + "CATT TG", + "TCA CC", + "CTC AAA", + "GG TTA", + "TCC AAA", + "TC TATT", + "GCA GA", + "CTT CA", + "TCA TCA", + "C GAGG", + "TAA CA", + "GTT GTT", + "CTTA TT", + "C GTCA", + "TAA GA", + "TAA TTTT", + "CTG TA", + "TC CACA", + "GC TGTG", + "C GCTG", + "TC TAAA", + "GC GA", + "CAA TA", + "CCA CCA", + "GAA CA", + "C GAAA", + "CAGA TT", + "TCA CA", + "TTA TTTT", + "TC TCAA", + "TGA CA", + "CTCC AA", + "AAAA AAA", + "TATA TG", + "TCC TCC", + "TCA CTT", + "TC CAGG", + "CAA GA", + "GG CTA", + "GTG GTG", + "C GTAA", + "C GAGA", + "TGA TA", + "GGA TTA", + "CAA CA", + "C GATT", + "TGA GAA", + "CTCC TT", + "CTCA TT", + "GTT AAA", + "TCA TA", + "CC TCTG", + "CTC TA", + "GC TGAA", + "CTG GA", + "TAA GG", + "CTT AAA", + "TATT TA", + "CCA CA", + "CC GG", + "GTC AAA", + "TG GAA", + "C GGAA", + "TGA TGA", + "GTT CA", + "TAA CAA", + "GC TGTT", + "TAA GAA", + "CTG CC", + "TTAA TT", + "CCA GA", + "TCA GAA", + "GTCA TT", + "C GCTT", + "GATT AA", + "CTGA TT", + "GC CACA", + "GTAA TT", + "TC CAGA", + "GCC AAA", + "GTGA TT", + "TAAAA TT", + "CAA GAA", + "CCA CC", + "TAA TCC", + "GTT CTT", + "TC CATG", + "GC TCTT", + "TG CTG", + "GG GTA", + "TTA CA", + "GC CATT", + "GCA CA", + "GCAA TT", + "TCC CTG", + "TG TGA", + "TC GAA", + "GGA CA", + "GGAA TT", + "GTG GA", + "CTT CTG", + "TCC CC", + "GCC CC", + "CTT GA", + "TAA TGA", + "TAAA TA", + "TATA TA", + "CTG CAA", + "TCA TTA", + "GTA TA", + "TCC CCA", + "C GTTA", + "GCA GAA", + "TGA GTT", + "CTTTT TT", + "C GATG", + "CTT TCA", + "AAAA TG", + "CAGG TT", + "CTAA TT", + "C GCCA", + "TGAA AAA", + "GTT CC", + "GTCC TT", + "GTCC AA", + "GTTTT TT", + "CTC TGA", + "GC GC", + "GTT GA", + "TGAA TG", + "CTA TA", + "GCA GTG", + "CCTT AA", + "TCA CCA", + "TCA CTG", + "GCC CTG", + "TAA CTT", + "CAGA TG", + "GTA GG", + "TC TATA", + "GAGA TT", + "GTC TA", + "TTTT AAA", + "CACA TG", + "TGA CC", + "CA CAAA", + "GTG TA", + "GG GAGG", + "GCTT TG", + "CAA AAAA", + "GA GGAA", + "GTT CTG", + "TTTT TA", + "GTC TCA", + "GTT CAA", + "TC GTG", + "GCTT AA", + "GCA CC", + "CTCC TG", + "TAAA TAAA", + "CTA CA", + "CTT CCA", + "TCC TCA", + "C GCAA", + "GAA AAAA", + "GCC CA", + "TC GTT", + "GTA GA", + "CTC TCA", + "GTC CA", + "TGA CTT", + "TCC CTT", + "GC CATG", + "CACACACA CACACACA", + "GTGA TG", + "CC TCTT", + "GC CAGA", + "TCC TA", + "C GTTTT", + "GTA CA", + "GCA TA", + "GAA TTA", + "TGTGTGTG TGTGTGTG", + "CC CAGG", + "GG TTTT", + "TCAA AAA", + "TC TATG", + "CCA TA", + "TGA CAA", + "GGA TA", + "TCA GTG", + "GTA TTTT", + "GAGA TG", + "GC GTG", + "C GTCC", + "TTAA AAA", + "TAA TCA", + "CAA TTA", + "CCA CTG", + "CGG TT", + "GTT GAA", + "TGA TTA", + "CCTT TG", + "CGG TG", + "CAGG TG", + "TCAA TG", + "CTGA TG", + "TCA GGA", + "GTT TAA", + "TATT AAA", + "CTC TTA", + "GCA GGA", + "CTC TCC", + "GAA CC", + "CTT TAA", + "GG GCC", + "GTA TTA", + "GC GCC", + "CCAA TT", + "GC TAAA", + "TGA CTG", + "GATT TG", + "GA TAAA", + "TCA GCA", + "GTT CCA", + "GAAA TA", + "GA CAAA", + "GA GTC", + "GC TATT", + "TCA CAA", + "GAGG TT", + "TAA CC", + "GAA GGA", + "GC TCAA", + "GAAAA TT", + "CCA GCA", + "GTTTT AA", + "GTG CC", + "TGA GGA", + "CA TAAA", + "GG TCC", + "TCA TTTT", + "TATT TATT", + "TAA TAAA", + "GCC TA", + "CTTTT AA", + "TAA GTG", + "TAA GTA", + "CTG GAA", + "CACA CA", + "GA CAGA", + "CAA CC", + "GG GAAA", + "CCA GAA", + "TCA GTT", + "TAA CTA", + "CTAA AAA", + "TGGG TT", + "TGA GTG", + "TAAAA TG", + "TATATATA TATATATA", + "GCA CTG", + "GA CTC", + "TA CAAA", + "TAAAA AAA", + "TC TACA", + "GTT GTG", + "TC GCC", + "CC CAAA", + "GTCA TG", + "CTG CTT", + "GGAA TG", + "CTA TTA", + "GA TATT", + "TA GAAA", + "GG CAGG", + "GA TGAA", + "GTA GAA", + "TCC TGA", + "TAA CTG", + "GCTG GG", + "GCAA TG", + "GCC CCA", + "GTT TGA", + "CATT TA", + "GTG CA", + "CTT GAA", + "GTG GAA", + "CTT CAA", + "TAAA TTA", + "GTG GCA", + "TCC TTA", + "GGAA AAA", + "TTTT TTA", + "CC TGTG", + "GTAA TG", + "GTG TTA", + "CTA GG", + "CAGG CTG", + "GA CACA", + "GAAAA AAA", + "TC GC", + "GTAA AAA", + "TGTT TA", + "TCTC TA", + "GTCC TG", + "CCA GGA", + "GAA CAA", + "TAA GTT", + "TGA GCA", + "GC TCCA", + "TAA GCA", + "CTCA TG", + "GTC TTA", + "CC CACA", + "CA TATT", + "GCC TCA", + "CA CTC", + "CTT CTA", + "TGA TTTT", + "TC GCA", + "CC TGTT", + "GAA GCA", + "GCAA AAA", + "GC GGA", + "CCA CAA", + "GC GCA", + "CA TATA", + "GA CATT", + "GTT CTA", + "CAAAA TT", + "GAAA GAAA", + "CC CGG", + "TA CACA", + "CCAA AAA", + "GAGG TG", + "GG CTCA", + "CA GTGA", + "TCC CAA", + "TA TCTT", + "TGA GTA", + "TC GTA", + "TTTT CTT", + "GTG GGA", + "GA GCTG", + "CC CTCC", + "TAGG TT", + "TTA GG", + "TAA TATT", + "CCA GCC", + "CA TCTT", + "GTC TGA", + "GTT TCC", + "CC TGAA", + "GGA GCA", + "GAAAA TG", + "TCA GTA", + "TAA CCA", + "GA TGTT", + "CTG TTA", + "CA TGTT", + "GG CGG", + "CA TGTG", + "GG GAGA", + "CTT TGA", + "TCTT TCTT", + "AAAAAA AAA", + "GGGG TG", + "CTT TCC", + "CTT GTT", + "GCA TTA", + "CC CAGA", + "CAAA TA", + "TC GGA", + "CA GCTT", + "TCA CTA", + "TAA TTAA", + "TAA GGA", + "GAA CTG", + "GCA CAA", + "GC GTT", + "GG CTC", + "TC TTTTA", + "CC TCCA", + "GG CAAA", + "CA GCTG", + "CTA CAA", + "TA CATT", + "GC TATG", + "CTT GTG", + "GA GTCA", + "GTTA TG", + "CTG CCA", + "GTC TCC", + "TGA CCA", + "CA CCTG", + "TATA TTA", + "TGA TCA", + "CA GCAA", + "GA TGTG", + "GTC TTTT", + "CTA GAA", + "GC TACA", + "CTG GGA", + "GGGG TT", + "CAA GTA", + "CAA GGA", + "CC CTCA", + "TA GCC", + "GTT GGA", + "GC TATA", + "TCTG AAA", + "TA TGTT", + "CC CCTT", + "GTT GTA", + "CC CTGA", + "TGA CTA", + "CAA GCA", + "CAA TAA", + "GAA CTT", + "CA TGAA", + "CTTA TG", + "CTAA TG", + "TC TAAAA", + "CCAA TG", + "GAA GTG", + "CC TCAA", + "CC CATT", + "CA GTCA", + "GAGAGAGA GAGAGAGA", + "TA TGTG", + "GCA GTGA", + "TCTCC TT", + "TCC CAAA", + "CCA TTA", + "CCA GTG", + "GCA TCA", + "TCAAA TT", + "GA TCTT", + "GA CAGG", + "GGA GTG", + "GTA GTA", + "CAA CTT", + "GAA GTT", + "CC CCTG", + "TCTC AAA", + "GG GTC", + "GA GCTT", + "TATG AAA", + "TA TGAA", + "GA CATG", + "CAA GTG", + "GA TATA", + "CA TCTG", + "CTG TGA", + "TAA TTTA", + "GG CAGA", + "GC GAA", + "CC TAAA", + "CCA TCA", + "CA CTGA", + "GGA CTA", + "GA CGG", + "CTC TTTT", + "CTG TCA", + "TCTCTCTC TCTCTCTC", + "TTAA TG", + "GCA GCC", + "CAAAA AAA", + "GCA CCA", + "CTA TTTT", + "GA GCAA", + "CTT GGA", + "CTG GTG", + "GAA TAA", + "TCC TTTT", + "GAA GTA", + "CA GTAA", + "CAA CCA", + "CTG TAA", + "TGA TAA", + "GCA GTT", + "CA CGG", + "TAAA TAA", + "CTG TTTT", + "CTA CTA", + "GC TCTA", + "C GAAAA", + "CAA GTT", + "CTT GTA", + "GAA TGA", + "GA GTGA", + "GCC TGA", + "GG TTTG", + "CC CATG", + "GG GGAA", + "GAA GAAA", + "TG TTA", + "CAA TTTT", + "TATA TTTT", + "CTC AAAA", + "GG TGGG", + "CC GTG", + "TATT TCA", + "CC CCAA", + "TATT TAA", + "GG CTGA", + "GG TGTG", + "CA TCAA", + "CA CTCA", + "TCTCA TT", + "GAA TTTT", + "GAA TCA", + "CAGG AAA", + "CA TACA", + "TA TTTTA", + "TTA TAA", + "GAGG AAA", + "CA TATG", + "CTT TCTT", + "CAA CTG", + "GG GCTG", + "CC CCCA", + "TTTG AAA", + "CATT AAA", + "CTT AAAA", + "GA CTGA", + "CAA TGA", + "GG CACA", + "CCA GTA", + "GGA TGA", + "GTTTT TG", + "GCA TTTT", + "GTG CCA", + "GCA GTA", + "GCC CTT", + "TC GTC", + "GAA CTA", + "GTG GTT", + "GTG TGA", + "GTG CTT", + "C GCTA", + "GTG TCA", + "TCTT TA", + "GCC TTA", + "CC TATT", + "CAAAA TG", + "GAA CCA", + "CTC CAGG", + "GA CTCA", + "CATG AAA", + "GC TAGG", + "TGTT AAA", + "GC GTA", + "GCA CTT", + "TCTT AAA", + "TAA GAAA", + "GG CCTG", + "TCC CTA", + "GTG GTA", + "CTG CTA", + "GGA GTT", + "GG TAAA", + "CAAA CAAA", + "GA TATG", + "TCA TGA", + "GA CCTT", + "TAA TATA", + "GC TAGA", + "GGA CTG", + "GG CATT", + "CA GTTA", + "CC CTAA", + "CA CCTT", + "GG TGAA", + "CA GCTA", + "GTG TTTT", + "CAA CTA", + "GA TCAA", + "GA GAAAA", + "TGTG AAA", + "AAAA TA", + "GATG AAA", + "CTC TAA", + "TTA CTT", + "GA TCTG", + "CCA CTT", + "GA GTTA", + "CAA TCA", + "GGATTA CAGG", + "TTTA TTTT", + "TACA TA", + "TTTTA TG", + "GA GTAA", + "GCTG AAA", + "GTA CTG", + "GC TCTC", + "TATG TA", + "TGTG TA", + "TCA TAA", + "GGA CTT", + "TCTCC AA", + "GCA TGA", + "GA CGA", + "CGCC TG", + "GA CCTG", + "GG TCTT", + "CA CCAA", + "GA TC", + "GA CCAA", + "AAAA TTA", + "GTAAA TT", + "CCA GTT", + "CA GAAAA", + "TAA CAAA", + "GG TGTT", + "GAAA TTA", + "TGCC TCA", + "CC GCC", + "CCA TTTT", + "CTT GCC", + "TCTG TA", + "CTG GCA", + "GG GATG", + "CCA TGA", + "CTA CTT", + "TAGG TG", + "TAAAAA TT", + "GAAA GAA", + "TAAAA TA", + "CTTTT TG", + "GTC AAAA", + "GGA CAA", + "TCTGA TT", + "CTC TCTT", + "TAA TTTG", + "CTC TTTG", + "GG CCTT", + "GGA TTTT", + "CTA CTG", + "GTT GCA", + "GG CTCC", + "CTC TGTG", + "CTC CAGCC", + "TTA CAA", + "GGA CCA", + "GGAA GGAA", + "TAAA GAA", + "TTA GAA", + "GTG AAAA", + "CTT GCA", + "TGGG TG", + "GGA GCC", + "CC TCTA", + "C T", + "GG GCTT", + "GG CATG", + "CTG GTT", + "TA CAGA", + "GATT AAA", + "CTC TGTT", + "TTA TCA", + "CTG AAAA", + "GTA GTT", + "GG GTCA", + "G T", + "CA GCCA", + "GC GTC", + "CA CTTA", + "GTG CTA", + "TC TTATT", + "GTA CTT", + "GG TATT", + "TA GAGA", + "TA CATG", + "CCA CTA", + "TGA GAAA", + "CAA TAAA", + "TCC AAAA", + "CGTG AA", + "GG TCTG", + "CTGAA TT", + "TCA GCC", + "CC TCTC", + "GTT AAAA", + "GG GATT", + "TCC TAA", + "CA CTAA", + "GGA GAAA", + "CCTT CCTT", + "GTT TCTT", + "TA TCAA", + "GA TACA", + "TAATCC CAGCA", + "CC GCA", + "TGAAA TT", + "C GTAAA", + "CTC TCTG", + "TC TTTTTT", + "GTA CAA", + "CCAAA TT", + "TGTA TTTT", + "TC GCTT", + "GG GTGA", + "GA TAGA", + "CTT TATT", + "TAAA CAA", + "GTT TATT", + "TGAA TA", + "CTA CCA", + "GTG TCC", + "CC CGA", + "TTTA TTA", + "CTCC AAA", + "TTTTTTTT TTTT", + "TCA TCC", + "GAA GCC", + "CTAAA TT", + "CAAA TTA", + "CCCC AAA", + "TCTT CTT", + "TAGG AAA", + "CA CGA", + "CA TTTTA", + "GTG CAA", + "TCTCC TG", + "TATTTT AA", + "GTT TGTT", + "GA GCCA", + "GG CCAA", + "CATT TCA", + "CA TCCA", + "CC TATA", + "GA CTTA", + "TCAAA TG", + "GTA TCA", + "TAAA TTTT", + "CTGA GGCA", + "GCC CAA", + "GG TTAA", + "TA TCTG", + "TGA CAGA", + "GGA GAGA", + "GCTG CTG", + "CC CTTA", + "TCC TCTG", + "GTA GCA", + "CCTG AAA", + "CC GAA", + "TTTT TAA", + "CTA TAA", + "CCTG TA", + "TTA CTG", + "GTA TAA", + "GG CGA", + "GA CTAA", + "TCA GAAA", + "GTG TGTG", + "CAAA GAA", + "CC TATG", + "GCA GAGA", + "CC GTT", + "TTTTA TTTT", + "GGAA GAA", + "TTA CTA", + "GCC TGGG", + "TCC CTC", + "TCC TCTT", + "GGA TCA", + "GG TCAA", + "TC GAGA", + "TATT CTT", + "TA CTC", + "GTTAA TT", + "GC GAGA", + "CTTAA TT", + "TCC TTTG", + "GTC TAA", + "CA CCCA", + "GG GTTA", + "GG GCAA", + "GGAAA TG", + "GCAAA TT", + "TA GATG", + "GCA GAAA", + "AAAAAAAA AAAAAAAA", + "CC TACA", + "GGA GTA", + "TC TAATT", + "CAA CAAA", + "TA GATT", + "GG TTTA", + "CC TAGA", + "CTT TAAA", + "TA CTTA", + "TAA TGAA", + "CTA TCA", + "TA GTAA", + "CAGA GAA", + "CAA GAAA", + "GGGG AAA", + "CGTT AA", + "CGTG TT", + "TCTG TCTG", + "TTTTAA TT", + "CTG GCC", + "TAAA TGA", + "C GTCAA", + "TTA GTA", + "GTC TCTG", + "TTTT AAAA", + "CA GTTTT", + "CTT CCTT", + "TATA TAA", + "GC TTTTA", + "TTTT TCA", + "GG TC", + "TTA TTAA", + "TTTT GTT", + "CA TAGA", + "TA GGAA", + "GAGA GAA", + "GTA GCTG", + "TTA TGA", + "GTA GTG", + "GGA GAGG", + "CTC TGAA", + "TA GTC", + "GA CTCC", + "TCC CTCC", + "TAA TGTT", + "CA TCTA", + "GCCA CCA", + "GTA CTA", + "TGGG AAA", + "CGCC TT", + "GCC CGG", + "GGA GGAA", + "GTA CCA", + "CGC AAA", + "CA TAAAA", + "TAA CATT", + "GC TAAAA", + "TCTT CTG", + "GCC AAAA", + "GTA TGA", + "GTC TTTG", + "TA CTGA", + "TCC CAGG", + "TTA TTTA", + "TTA GTT", + "GGA CC", + "TA TAAAA", + "CAAA CAA", + "CTT CTC", + "TCTA TCTA", + "GAAA TAA", + "GTG TAA", + "CTT TGTT", + "GA TAAAA", + "GCC CAGG", + "GC GATT", + "AAAAAA TT", + "TA CAGG", + "GG CTAA", + "TA GCTT", + "GTC TCTA", + "CTCC TGA", + "GAA TAAA", + "TTA CCA", + "GG GACA", + "GCCA CTG", + "GTT TAAA", + "GTC TGTG", + "TGA CAAA", + "TACA TTTT", + "GCCA CC", + "TG TTTT", + "TA GCAA", + "TTA TAAA", + "GA CCCA", + "GCA GC", + "CAGA CAGA", + "CA CAAAA", + "GCC CTA", + "TATT AAAA", + "C GTATT", + "CCA TCC", + "TC GATT", + "GAA GGAA", + "GA TCCA", + "TATT TGA", + "GTGAA TT", + "TA CCTT", + "C GTCTT", + "CC TAGG", + "TC GAAA", + "CTT TCTG", + "TGAA GAA", + "TCTC TCA", + "GTC TCTT", + "GGA GGGG", + "GTC TGTT", + "CTA TGA", + "GGAAA TT", + "GCA CACA", + "GCC TTTT", + "CA GTCC", + "CTG GTA", + "GCA TCC", + "TA GTTA", + "GG CTTA", + "GA GTCC", + "TG AAAA", + "TAGA TAGA", + "TGTT TGTT", + "TA CTCA", + "CATT TAA", + "GA TTTTA", + "CA CTCC", + "GAAA CAA", + "GC GCTG", + "TCTT TCA", + "CTG TCC", + "GAA CTCA", + "CGG AAA", + "TATT GTT", + "GCA CTA", + "TATT CAA", + "GC GGGG", + "GTG GCC", + "TAATT AAA", + "TA CTAA", + "GC GGTG", + "TA CCAA", + "GG TATA", + "CTA GTT", + "GCA GAGG", + "CTTTT TTTT", + "TTTTTTTT TTTTTTTT", + "TACA GTA", + "CCA TGTT", + "TA GTGA", + "CGTG TG", + "GC TCTGA", + "CTT CCTG", + "TC GCTG", + "TAAA TCA", + "TCCAA TT", + "GTT TCTG", + "GAA GAGA", + "GG GTAA", + "CCA TAA", + "TTA TATT", + "C GAATT", + "CC GGA", + "TGA GCC", + "CC GTA", + "CAGA GGA", + "GTG TTTG", + "GA CAAAA", + "TTTTTT AAA", + "GTT GCC", + "GA GTTTT", + "TC AAAAAA", + "TGTT TCA", + "TA TCTA", + "TCTC TCC", + "CTC CACA", + "TAAA TATT", + "TTTT CTG", + "CTC TCAA", + "CCTT AAA", + "TCTTTT AA", + "GAA CAAA", + "TTA GCA", + "GCTCA TG", + "TAAA GTA", + "GGA TAA", + "TTATT AAA", + "CTC CATT", + "TCTC TGA", + "TTA TTTG", + "CCTG TAA", + "TTA TATA", + "GA CTTTT", + "TGTT GTT", + "GCAAA TG", + "CTT CAAA", + "GAA TATT", + "GAA TCC", + "CTC TTAA", + "GCA TAA", + "GAA TGAA", + "CTTAA AAA", + "TAAAAA TG", + "TTTTAA AAA", + "CTC TGGG", + "TGA TCC", + "GC TCTCA", + "CTC CAGA", + "GAGTG CAGTG", + "CAA TATT", + "TA GAAAA", + "GTAAA TG", + "TA GCTG", + "GC TCAAA", + "GCA GGAA", + "TA CCTG", + "GG GAAAA", + "TTTT CTA", + "GGGG GGGG", + "CC GA", + "CTT TGAA", + "GGA GGTG", + "TA GTCA", + "GG CCCA", + "TGA TGTT", + "CAAA TAA", + "TCTT CCA", + "GC GCTT", + "GTA TTTG", + "GTC TC", + "GAAA TCA", + "TGA TAAA", + "CATT CTT", + "TA TCCA", + "GCC TCTG", + "TGA GATG", + "C GCCAA", + "GTTTTA TT", + "TATA TATT", + "GTA GGA", + "GACA GAA", + "CTCCAGCC TGGG", + "GC GTGA", + "GG TATG", + "GAGG GAGG", + "TCA TTTG", + "CTA CC", + "TACA GAA", + "GG TAGA", + "GA TCTA", + "GTC CATG", + "TGA GGAA", + "TAA TAAAA", + "TAAA CTT", + "TCA CATT", + "GGA GGCC", + "TCA CAAA", + "CA CTTTT", + "CGG CC", + "CAA CAGA", + "GTA GAGA", + "GTTA TTTT", + "CGTT TG", + "TC GTCA", + "TCTG CTG", + "CAA CACA", + "GG TAGG", + "GCA GCTG", + "TAGTA GAGA", + "CAA GCC", + "GCA TTTG", + "TAA TATG", + "GCTT AAA", + "GCTT CTG", + "CTC TCCA", + "TCA TCTT", + "C GTCTG", + "TCA TTTA", + "CA TAGG", + "GC TCCTT", + "TGTT CTT", + "TACA TTA", + "CACA GAA", + "TAAA TATA", + "TA GAGG", + "GA TAGG", + "TCC TGAA", + "GGA GCTG", + "TGA TATT", + "TCA TTAA", + "CTTTT AAA", + "TC GTTA", + "TAAA CTA", + "GTT TGAA", + "TAAAA TTA", + "CA CCCC", + "TCA GAGA", + "CTCC TGCCTCA", + "TGA CATT", + "GTA TTTA", + "CTT CATT", + "GAAA CTG", + "TAA CACA", + "GTT CAAA", + "GGA GATG", + "TC GGCC", + "CAGCA TT", + "TC GATG", + "TATT CTA", + "CTG TGAA", + "TATT GAA", + "TTTT CCA", + "TATT TCTT", + "GGTG AAA", + "CTGA GAA", + "GCA CAGA", + "GC GAGG", + "CTG TGTG", + "TGAAA TG", + "TGA TGAA", + "GTCC AAA", + "CTCAA TT", + "TCCA GAA", + "GTA TATA", + "TAAA GTT", + "TCTC AAAA", + "TCCA TCA", + "GTC TGAA", + "TGA GAGA", + "TGA TTTG", + "TTA GCC", + "CTC CATG", + "TCC CTGA", + "GA GCTA", + "CCCC CCCC", + "GTG GAAA", + "CTG GGAA", + "CAA TGAA", + "CCA CACA", + "CTT TCAA", + "C GGAGG", + "TC GTGA", + "CCA GAAA", + "GTTTT AAA", + "TGTT GAA", + "TCC TGTG", + "CTAAA TG", + "TCC TTTA", + "GTC TGGG", + "TCTC TTTT", + "TA CGG", + "TATT GTA", + "TTA GTG", + "TTA CC", + "TAATCCCAGCA CTTTG", + "TCTG GAA", + "CTT CTCA", + "CGCA TT", + "TATT TAAA", + "TCA CACA", + "TAA TCAA", + "GC GAAA", + "GG GCCA", + "GTT CATT", + "GAGAA AAA", + "TTTT GTA", + "TA CTTTT", + "TC GAGG", + "GTGAA AAA", + "CAA TATA", + "TCC CATG", + "CAA TTAA", + "CTG GAAA", + "CCCA GCA", + "TCC CATT", + "TCC TGTT", + "CTC TTTA", + "TCC CCTT", + "GTT TCAA", + "GTC CAGG", + "GGAA GGA", + "TA GTTTT", + "TGA CCTT", + "GTGCTG GGATTACAGG", + "TATT TATA", + "TCTG CAA", + "CTGAA AAA", + "TATG TTA", + "CTT CACA", + "GCA CAGG", + "CCTG CTG", + "TTTT TTAA", + "GTTA TTA", + "CC CTTTT", + "TGA TTTA", + "TA CAAAA", + "TAA GTAA", + "TTTT TAAA", + "CA TCTC", + "GTG GTGA", + "GTG GAGA", + "CTC TGCA", + "GTTAA AAA", + "TACA TACA", + "CTT TGTG", + "GGA CACA", + "TCTGA TG", + "TA TTATT", + "TCTT CTA", + "CTG TGTT", + "TCA GCTT", + "CTT TATA", + "GG CGC", + "TCC CTCA", + "GTA CC", + "TGGA GAA", + "CAAAAA TT", + "TCTT TAA", + "CTC TCTC", + "TGA GTGA", + "GCA GCTT", + "CGGA TT", + "TA CGA", + "TCTT GTT", + "TC GTAA", + "GCC TGTG", + "TATT CTG", + "GG GATA", + "GG GTCC", + "TGA GATT", + "CTTTTA TT", + "TCC CACA", + "CATG GTG", + "TTA GGA", + "GAA CACA", + "TCA TAAA", + "CAA CATT", + "GG TCCA", + "GAA TTTG", + "TATTAA TT", + "TCC TGGG", + "GCA GCAA", + "CTC TTCA", + "GAA GAGG", + "TCTG TCA", + "CTGAA TG", + "CCA CAAA", + "GTG GAGG", + "TGA TTAA", + "CTCC CTCC", + "CACACACACACACACA CACACACACACACACA", + "GC GATG", + "CATT CTG", + "GTA GAAA", + "TCA TCAA", + "TTTT CAA", + "TATG TATG", + "CCAAA TG", + "TAA TTTTA", + "TAA GGAA", + "CTT GAAA", + "AAAAAAAA AAAA", + "GC TCCTG", + "GCA GATG", + "GAAAAA TT", + "GA CGC", + "GTG GGGG", + "GTCAA TT", + "CTT GCTT", + "TGA CACA", + "GTG TGTT", + "CCA GAGA", + "CCCA GCC", + "TAAA GAAA", + "GTC CATT", + "TAAA TTAA", + "CC CAAAA", + "GAA TTAA", + "TGAA TTA", + "TTTT TTTG", + "CCA GCTT", + "CAA TTTG", + "CTG TTTG", + "GTC TCAA", + "GTT TGTG", + "GG CATA", + "GG TACA", + "TGA TGTG", + "GATT TCA", + "TCTG CTT", + "GTAA TTA", + "TAA AAAAAA", + "GCC GCC", + "TGTGTGTGTGTGTGTG TGTGTGTGTGTGTGTG", + "GC GTCA", + "GC TCATT", + "GAA CCTG", + "TAAA CAAA", + "GTG CTGA", + "TCA GGAA", + "TCC TCAA", + "TCTA TTTT", + "TCTG TTTT", + "CAGA GCA", + "CCA GGAA", + "GTC TTTA", + "TCTT CAA", + "TCAAAA TT", + "GC TTATT", + "GTT CCTT", + "CA CCTA", + "TCA CTGA", + "GAA GCAA", + "TAAA GA", + "TCC TTCA", + "TCTCA TG", + "TCA GTGA", + "TACA CAA", + "CA CGTG", + "CC TAAAA", + "GCC TTTG", + "GG CTTTT", + "GTT GAAA", + "GTT CTC", + "CTA GA", + "CTA CAAA", + "GCA CAAA", + "TTA CATT", + "GG CCCC", + "TAA TGTG", + "CTG CCTT", + "TCC CAGA", + "GTGAA TG", + "GGA CAGG", + "GGA TGTG", + "GTT TATA", + "TGA CCAA", + "GTG GCTG", + "GTT CTCA", + "CTTA TTTT", + "CTG GAGA", + "TTA CAAA", + "GTC TTCA", + "CAA GAGA", + "CCA TTTG", + "TCA CAGA", + "CTA GTA", + "CA TTATT", + "TTA GA", + "GC TCTCC", + "GC GCCA", + "TATG TTTT", + "TCC TCCA", + "CAGAA AAA", + "GTG GGAA", + "TAA TCTT", + "TGA GTCA", + "CTG CTC", + "GTC TCCA", + "TCA TGTT", + "GTT TCCA", + "TAA GCAA", + "CTAA AAATA", + "TGA CTGA", + "TC GGTT", + "TTA GAAA", + "TAA GCC", + "TAAA GCA", + "CC TCTCC", + "CC TCCTT", + "TCA GATT", + "TATG AAAA", + "GCTGA TG", + "CATA TTTT", + "GC TCCAA", + "CGG CGG", + "CCA CTGA", + "CA GCAAA", + "CTG TCTT", + "CTA GCA", + "TC GGGG", + "CACA GCA", + "GC TGATT", + "CTA GGA", + "TAA CTC", + "TCA TATT", + "CCTT CTT", + "CTG CAAA", + "CC CGC", + "GG TCTA", + "CCCA GGA", + "GTG TCTG", + "TAATAA TAATAA", + "TCA CATG", + "CAA TTTA", + "TATATATATATATATA TATATATATATATATA", + "CCA CAGA", + "TCAA TTTT", + "GTA TTAA", + "GAA CATT", + "TCTC TTA", + "CTA TTTG", + "TCTT TCC", + "GGTT AAA", + "GC TAATT", + "CTG CTGA", + "TA CCTA", + "CAGG GTT", + "TC GCCA", + "CAAAAA TTA", + "CTT CTGA", + "GCA TGTG", + "CTA TTAA", + "GCA CATG", + "CAA CATG", + "TCA TGAA", + "GAA TGTT", + "GG GTTTT", + "CTG CCTG", + "GTC CACA", + "TAAA CA", + "CTC TGGA", + "GA CCCC", + "GG CAAAA", + "TCTG TTA", + "CTA GTG", + "CTA TATA", + "TCA GTCA", + "TAA CTAA", + "GAA GATG", + "GTC TTAA", + "CAA GGAA", + "GTAA AAAA", + "TCC CCTG", + "TC GCAA", + "TCTG CCTG", + "CC TTTTA", + "GTCC CAGCTA", + "TATA TATG", + "TATT GTG", + "TGTG TTTT", + "GC GCAA", + "CACA GTG", + "TAA GATT", + "CTC TGTA", + "GGAGG CTGA", + "GGA CAAA", + "TATTAA AAA", + "TC GTCC", + "TC GGAA", + "CTA TAAA", + "CTT CAGA", + "CTA GAAA", + "CATT CAA", + "CA CGCA", + "CAGGA TT", + "CCA TCTT", + "GTA GCC", + "GAA TTTA", + "CA CGC", + "CAA TCC", + "TGA GCAA", + "GAA GCTG", + "TCAA TTA", + "GAA GTCA", + "CTG CACA", + "CCA CGG", + "GGA TCTT", + "CTCCTGCCTCA GCCTCC", + "TAAA TGAA", + "CC GTC", + "TC GGTG", + "TTTTA TTA", + "GCA GGGG", + "GCA GGTG", + "TCTA TTA", + "TAA CTTA", + "CTAA TTTT", + "CC CGCC", + "TAA TACA", + "GGATT AAA", + "TCTC TCTG", + "GCTT CTT", + "CATT TATT", + "CCA GAGG", + "GGA CAGA", + "GCCAA TT", + "TCC CCAA", + "GTT GATT", + "GAA GAAAA", + "GCA TTTA", + "CTC TAAA", + "CACACACA CACA", + "CC TCAAA", + "TA TAATT", + "CAA TGTT", + "GCC CAGA", + "GTA TATT", + "CTAA AAAA", + "CCA CAGG", + "TAA GAGA", + "TCC TTAA", + "TA TTTTTT", + "GAA TATA", + "GGA TTTG", + "GTG TGAA", + "CTG GCTT", + "GC GGCA", + "TCC GCC", + "GCA TCTT", + "TC TAATA", + "CTG CATT", + "CTC TGCC", + "TCA CTCA", + "TCA GCAA", + "TATTA TG", + "CCA GCTG", + "GA TCTC", + "GCC TCTT", + "CTT CCAA", + "TCC TAAA", + "TCA TCTG", + "CTA TTTA", + "CTG CAGG", + "CAA GCAA", + "GC GGAA", + "GAAA TAAA", + "TAAAA TAA", + "TCA CCTT", + "CCA TGTG", + "GA CCTA", + "CAGA TGA", + "GTG GCTT", + "TTATTA TTATTA", + "TCC CGG", + "TATT TGTT", + "CTG TAAA", + "TCCA TCCA", + "CTG TATA", + "GTT TCTA", + "GTT GCTT", + "CCA TGAA", + "GC TCTTA", + "CTT CATG", + "GTT CCTG", + "GCTG GGA", + "TCA GAGG", + "CATT AAAA", + "TCA GTAA", + "GAA TGTG", + "CTTA TTA", + "GCA CTGA", + "TGA GGTT", + "CA TCAAA", + "CTT CTCC", + "GTT TATG", + "CTT TCCA", + "GTG CCTG", + "GAAA GGA", + "GCA TCTG", + "TA CCCA", + "TAA CAGA", + "AAAAAAAA AAA", + "CTA TGAA", + "CA GTAAA", + "TA GCTA", + "TC GTTTT", + "GTG TCTT", + "GA GCAAA", + "TC TAAAAA", + "GTT CACA", + "GAAA TGA", + "CAAA TGA", + "GCC CTGA", + "GTG TTTA", + "TCA TGTG", + "CATA TTA", + "TCAAAA AAA", + "TAA GTTA", + "TCTC TCTT", + "CCA GTGA", + "CC TCTGA", + "CAA GATG", + "GCC TGTT", + "GTT TGGG", + "CATT CATT", + "GCC CCTG", + "GTT CTGA", + "GC GGCC", + "GC GGTT", + "CAAAA CAAAA", + "TACA TATA", + "GAATT AAA", + "TCAA GAA", + "CTG TATT", + "TTTT TATT", + "GA TTATT", + "TCTAA TG", + "GTT GCTG", + "TGAA TGAA", + "TCA GCTG", + "CTT GATT", + "CAGAA TG", + "CTAA TTA", + "TATAA TG", + "GTTTT GTTTT", + "CCA GCCTG", + "TGA TGGA", + "GCA GATT", + "CTC TATT", + "GCA GTCA", + "TAA GTGA", + "CTA CACA", + "CGCA TG", + "TA GCCA", + "GTG GCTCA", + "CAAA TAAA", + "GTG CTCA", + "TTTT TTTTTT", + "TAA CATG", + "TCCCA GCTA", + "CAAA GTA", + "TCA TATA", + "CAGCA TG", + "TGA TCTT", + "CA TAATT", + "TGTG TTA", + "TTTT GAA", + "TTAA TTA", + "GATA TTA", + "TCA TTCA", + "TGA TATA", + "TGA CTCA", + "GA CGTT", + "TGA CATG", + "GTT GTGA", + "CA TTTTTT", + "GCC TGGA", + "CTA TGTT", + "CTT TGGG", + "GTC TCAAA", + "CTG GCTG", + "CCA CATG", + "GG CGTG", + "CTTAA TG", + "TAA GATG", + "GTA TAAA", + "TGTA TTA", + "TAA CTCA", + "GAGAGAGAGAGAGAGA GAGAGAGAGAGAGAGA", + "GCA TGAA", + "GTTAA TG", + "TCCA GGA", + "GAGA GAAA", + "TCTC TGTG", + "CTC TCTA", + "CCA CCTG", + "GCCA GGA", + "CTG GAGG", + "CCA TTTA", + "GTC TGGA", + "GCC CACA", + "TAGA GAA", + "CAA CTCA", + "GGCA GGA", + "TCTTA TG", + "CAAA GGA", + "GG TAAAA", + "GAGA GGA", + "GTC CAGA", + "GCC CTCA", + "GATA TTTT", + "CAGG GAA", + "CCA CATT", + "GA GGAGG", + "GAAA CTT", + "CA GAATT", + "TCA GATG", + "TATT TCC", + "TACA GTG", + "TGA GCTG", + "CCA TCTG", + "GAGAA TG", + "TCAA CAA", + "A TT", + "TAA CTGA", + "TGA GAGG", + "CA CTGAA", + "CCA CCTT", + "CTG CAGA", + "TCA CCAA", + "TGA GCTT", + "CAAA GCA", + "GG TTTTA", + "CGG GGTT", + "TCCAA AAA", + "TATG TATA", + "CCA GATG", + "TCCA TTTT", + "CTG CTCA", + "GA TAATT", + "CCA CCAA", + "CTCC TCC", + "GA GAATT", + "GAAA GTA", + "TAAAA TAAAA", + "CTT CTTA", + "CTG TTTA", + "GAA TCAA", + "GCA TGTT", + "GCA CGG", + "GA CTGAA", + "GTG CACA", + "GA CGTG", + "TATA CAA", + "TC GACA", + "GAA GACA", + "TAAA GGA", + "GA TCAAA", + "CAGTG TG", + "CTA GCC", + "GAGG AAAA", + "TCTG AAAA", + "GAA CCCA", + "GATG GATG", + "GTT CTTA", + "CTA TATT", + "GCA TTAA", + "TCTCTCTCTCTCTCTC TCTCTCTCTCTCTCTC", + "TCA GTC", + "TATTTT TG", + "GAGGA TT", + "GTA TGTG", + "TAA CCAA", + "GTT GTTTT", + "TTTT TCTT", + "GTG TTAA", + "CTT GGAA", + "AAAAAA TG", + "CAA TGTG", + "GTG CCTT", + "GCC TCAA", + "GA GTCTT", + "GCTAA TTTT", + "CGAA AAA", + "GTG TATA", + "GC GTTA", + "CTGCA CTCCAGCCTGGG", + "GTT CATG", + "CAAA GAAA", + "GCA GTAA", + "GGA TGAA", + "CTT TATG", + "CAGG AAAA", + "TCC TGCA", + "CTG TCTG", + "GAA CATG", + "GGA TGGA", + "GCC TGAA", + "CAAAAA TG", + "TCCAA TG", + "CCA GCAA", + "GG CCTA", + "CAA CTGA", + "GCA CCTG", + "GTC TATT", + "CC TCTCA", + "GTG GTCA", + "GTG TAAA", + "GTA CACA", + "GTAAAA TT", + "GTA CATT", + "TATA TAAA", + "CTG TTAA", + "TAA GTCA", + "GCC TCCA", + "AAATT AAA", + "GTG CAGG", + "TCC TGGA", + "GTG CAAA", + "GC GTCC", + "CCA TTAA", + "GGA GGGA", + "TCA CTTA", + "TCATT AAA", + "CAA CATA", + "TAA TAGA", + "TAA TGTA", + "GA TTTTTT", + "GTT GTCA", + "GGA GACA", + "GTG TGGG", + "TCA CAGG", + "TC GGCA", + "CTCC CTG", + "GA CCAAA", + "TGTT TATT", + "CGAA TG", + "CTCAA TG", + "TCA CCTG", + "CA GTGTT", + "TGA GACA", + "TA GGGG", + "GAAAAA TG", + "GTT GAGA", + "TC GATA", + "CTC GGGAGG", + "GTT GTC", + "CCA GTCA", + "GCC CAGGCTG", + "GAA CAGA", + "GGCTCA CTGCAA", + "GCA GACA", + "TGA GGTG", + "CA CGTT", + "TAA GAAAA", + "CCA GGCA", + "GTA TCTT", + "CTTGG GAGG", + "CTT TCTA", + "CC GCTG", + "GA GCTCA", + "GAGA CAGA", + "CTT CAGG", + "GCA CATT", + "GTA CAAA", + "CTT GTAA", + "GTG GGTG", + "GAA GTGA", + "GG TCTC", + "GTA TGTT", + "GCA CTCA", + "TTA TGTT", + "CAA GTCA", + "CAA GTGA", + "GAAA CTA", + "TAAA TAAAA", + "TCTT AAAA", + "GTT GGAA", + "GTT CTAA", + "CCA CTC", + "CA GTGAA", + "GAAA GG", + "GCA CGA", + "TAA CTTTT", + "GTT GTTA", + "TCA GTTA", + "CGGA TG", + "TATT TGAA", + "CC CTGAA", + "GCC CTC", + "CTT CTAA", + "TTTG TTTT", + "GA GCTGA", + "CTG TGGG", + "CAA GATT", + "GAA GCTT", + "TGA GTAA", + "CTT GCTG", + "GGA TGGG", + "CGTA TG", + "TCCA TTA", + "GTC TGCA", + "GCCA TTTT", + "GTT GTAA", + "CACA CAA", + "GGACTA CAGG", + "C GTTTTA", + "TCTT CC", + "TAA CCTT", + "CTT TAAAA", + "TGAA TTTT", + "CTA CAGA", + "GCAA GAA", + "TAA CAAAA", + "CAATT AAA", + "CCA CTCA", + "CATG GTGAAA", + "CCCA GAA", + "CTA CATT", + "CC GAGG", + "TCCA GTG", + "TGA GTTA", + "GGA GTCA", + "TAA CGA", + "GA GTAAA", + "GA CTCTG", + "GGA GCTT", + "TA CTCC", + "CTG CATG", + "GC TTTTTT", + "GTC TAAA", + "GTG CGG", + "CA TCTCA", + "TGA TCAA", + "GGA GATT", + "GC AAAAAA", + "CA CCAAA", + "TGA CGG", + "CAGA GG", + "GTT GATG", + "CTT GTCA", + "TCCA CCTG", + "GGA GCAA", + "CAA GTAA", + "CCA TAAA", + "GTG CATG", + "GCA TATT", + "GTA GATT", + "GCC TAA", + "CTCAA AAA", + "GGA GAAAA", + "CTA TCC", + "TAATA TTA", + "GTG CTC", + "CAA TATG", + "TGTG GAA", + "TGA CTC", + "GTG TATG", + "TTTTAA TG", + "GC TCTAA", + "CACAA TG", + "CA GCTCA", + "GTT GGTT", + "CTAAAA TT", + "GTC TATG", + "TGTG AAAA", + "CTG GGTT", + "CCCC TCC", + "CC CTCTT", + "GCA GGGA", + "GAAA CCA", + "CATT TCC", + "GCA GCCA", + "TCA TATG", + "GCA GGCA", + "C GTAAAA", + "TGA CCTG", + "CAGA GGTT", + "CTT GTGA", + "TTA TCTT", + "CTG TATG", + "GTCAA TG", + "GGA CGG", + "GC GTAA", + "CAAA CTA", + "TAAA TGTT", + "CTT CGG", + "CTCC CCA", + "TACAA TG", + "TCTG TAA", + "GAA TATG", + "GC GGGA", + "GGA CATT", + "TTA TGAA", + "GGA TGTT", + "GGA CATG", + "TCA GGTG", + "CAA CAAAA", + "GAAA GAGA", + "GTG GATG", + "GG GCTA", + "CCA TCAA", + "CA GCTGA", + "CTC CACC", + "CAA TCAA", + "GTG GTC", + "TGA CAGG", + "CCA TTCA", + "GTCC CTG", + "CAGA CACA", + "GTT GGTG", + "CC TCCTG", + "GAA CTGA", + "TATT CATT", + "GCC CATG", + "CAA TCTT", + "GAAA GCA", + "GAA TCTG", + "TTA TTTTA", + "GTT TGGA", + "TTTT TGTT", + "GGGAA TG", + "GC GACA", + "TAAA CTG", + "CCA TATT", + "GGA TCC", + "CAA GCTT", + "TAAAAAA AAA", + "TCA CTC", + "CA CTGTT", + "TGTTAA TT", + "GGA CTGA", + "GGA GTGA", + "CATA CACA", + "GTT TGTA", + "TCCA GCA", + "GTG CATT", + "GG AAAAAA", + "CCAA GAA", + "TCAA TA", + "CTT CCCA", + "TGA GAAAA", + "GGCC TCCCAAA", + "CAA GCTG", + "GCC CAAA", + "TGA CTTA", + "CA GCCTT", + "CTG GATT", + "TTTT TTTA", + "TCA CGG", + "GCA GTTA", + "TGA CTAA", + "TTA CAGG", + "TGA TATG", + "TAA TTATT", + "TCTT GAA", + "GCC CCTT", + "GTT CAGA", + "CTC TATG", + "CCA TGGA", + "GAGG GAA", + "GGA GGCA", + "CTT TGCA", + "TCTT GG", + "GGA GGTT", + "GCCAA TG", + "CTG GTGA", + "CAA CCAA", + "CCA GTC", + "CTT GAGA", + "TACA GCA", + "CTT GTC", + "GA CGGA", + "CTT CTTTT", + "GTG GC", + "GAGGA TG", + "CAA TAAAA", + "GAAA TTTT", + "AAAA AAAAAA", + "CTC TATA", + "GTA TGAA", + "CTT GTTA", + "TAA CATA", + "CAAA CACA", + "TGATT AAA", + "GCTC TGTT", + "GTG GGTT", + "GTT GGGG", + "GTG TGTA", + "GTAA TTTT", + "GTA TCC", + "TGTGTGTG TGTG", + "TCTT CCTT", + "TCA CTAA", + "TCTCC AAA", + "TA TCAAA", + "TGA TGGG", + "GGA TATT", + "CAAA TTTT", + "GTT CAGG", + "GTG GATT", + "GTG CAGA", + "GCTG CC", + "CTCA GAA", + "GCA GTC", + "GGA TAAA", + "GCC TTCA", + "CCA GGTG", + "TA TCTC", + "CAA TGCA", + "CCCA CTG", + "GTG TATT", + "CGA CAGA", + "TGA GATA", + "CCA GGTT", + "TGTT TAA", + "CATCA TG", + "TGA TTCA", + "GCAA TTA", + "GAAA TGAA", + "CTT GGTT", + "GAA GATT", + "GGA TTAA", + "CC TCATT", + "GGCCA GGCTG", + "GCTA TTA", + "GCCA GCA", + "GAGA CAGG", + "CTT GAGG", + "CA GTCTT", + "GTT CTCC", + "TATT TCAA", + "TGA CGA", + "CATG AAAA", + "CATTA TG", + "TAAA TTTA", + "GA GTGAA", + "CAA CAGG", + "TAA GCTT", + "CACA TTTT", + "GA TCTCA", + "TA GTCC", + "GACC CTG", + "TAA TGCA", + "TAA GTC", + "TAA TAATT", + "GAA GTAA", + "CAA CTC", + "CA TCATT", + "GA CGAA", + "GAAA CAAA", + "TATT TCTG", + "CATTAA TT", + "CCA CCCC", + "TAATA TTTT", + "GTT TAAAA", + "GTA TCTG", + "GTCAA AAA", + "GATG CTG", + "TGTT CTG", + "GG TCAAA", + "GTA GGAA", + "GTA TATG", + "TGA TCTG", + "GGGG CTG", + "GCA TCAA", + "GCCAA AAA", + "CCA CGA", + "GC TAATG", + "CAGA GAAA", + "CCTT CTG", + "TCC TCTA", + "GCA GGTT", + "CTCA CTG", + "TAGA TTA", + "GCC GAGA", + "CCA TCCA", + "CTT TACA", + "GTA CATG", + "GCA CCAA", + "CTT TGTA", + "CTA TGTG", + "TCA CTTTT", + "TGA GTC", + "CAA GAAAA", + "CTGA CTG", + "GTTTT TTTT", + "GCA TAAA", + "TAA TCTG", + "GAA AAAAAA", + "CAGGA TG", + "TGA GCCA", + "GAA TTCA", + "TCA GACA", + "GTT CCAA", + "TCA GGTT", + "CAAA CTG", + "CATT TCTT", + "TGTT AAAA", + "CCA GACA", + "CAA GTTA", + "CATG TTA", + "CATT CTA", + "TCTTTT TG", + "TGA GGGG", + "CACA TTA", + "TAAAA TAAA", + "GCA TATA", + "TGTT CTA", + "GAA GGGG", + "GAGTG TG", + "TAA GACA", + "GAA CTC", + "CCA GTAA", + "GAGA GAGG", + "GC GACC", + "CAA TTCA", + "CGG CTG", + "CCA GATT", + "CCTG GG", + "GGAA GAAA", + "GAGA GG", + "TCAAAA TG", + "CCTCA TG", + "TAAA GG", + "CTT TGGA", + "CCA GGGA", + "GTA CAGA", + "CTGAGGCA GGA", + "TGTT TCTT", + "CCA GGCTG", + "CTGA GG", + "GAGG CTG", + "CTCC TGGG", + "GAA GTC", + "CGA CC", + "GGA CTCA", + "GGA GTC", + "CA CAATT", + "GTG TTCA", + "GA CTAAA", + "GTCA TTA", + "CAAAA TTA", + "TGAA GAAA", + "GCA CCTT", + "GTT TGCA", + "TCC TGCC", + "GTA GATG", + "GCC TGCA", + "GA GTTAA", + "TCC CTTA", + "GTG GTTA", + "TC GGGA", + "TACA TAA", + "TCTC TCCA", + "CA CTAAA", + "TATATATA TATA", + "GTG GCAA", + "CACCA TG", + "TTTG AAAA", + "CACA CTG", + "CTT GGTG", + "TACA CTG", + "CC TCCAA", + "CAA CCTT", + "CA GCCAA", + "TTTT CAAA", + "TGA TAGA", + "TACA CTA", + "TCTG GG", + "TCC CAGCA", + "TAGG AAAA", + "CTT GGGG", + "TC TGTGAA", + "CC TTATT", + "CATT TAAA", + "TTTTA TTTTA", + "GCC CTCC", + "CTGA GCA", + "CC CGTG", + "GTA GTGA", + "TCC TATT", + "GAA GGTG", + "TGTG CTG", + "TCCA CTG", + "TAA TCTA", + "TGA TGTA", + "GTG GTAA", + "TAA TGGA", + "GATG AAAA", + "GTA GTAA", + "GTG GGGA", + "GTG TCAA", + "CAGA CTG", + "TC GAAAA", + "CTCA TTA", + "TAA TAATA", + "CTCA GAAA", + "CA TCCTT", + "CC GCTT", + "GGAA GG", + "CC GTGA", + "CCA CTCC", + "CTA GAGA", + "TAGAA TG", + "GGA TTTA", + "TTAA TTTT", + "GC TAATA", + "TCC CCCA", + "CAAA TATT", + "GA TCATG", + "TCTTAA TT", + "CA GTATT", + "GTCTT GAA", + "CC GAAA", + "CTA TTCA", + "TAA GATA", + "CTT GCAA", + "GCC CCAA", + "TCC CTAA", + "GAA GTTA", + "GA TGATG", + "CTT GATG", + "CC CTAAA", + "CCTG CCTG", + "GACA TTTT", + "CCA GCCA", + "TGTGTGTG TG", + "GTC TATA", + "TCTC TGTT", + "GTC TGTA", + "TA TAATA", + "CTT GTTTT", + "CGC CATT", + "CTCA GCA", + "TACA GTT", + "CAA GAGG", + "GGAA GCA", + "GCC TTTA", + "CC CCATT", + "CAA CGA", + "GTCA TTTT", + "CC CGCA", + "CA GTTAA", + "GAA TCTT", + "CATG TTTT", + "CC GGGG", + "CTA CTGA", + "TCA CGA", + "TAAA TTTG", + "GCC CATT", + "CTC TAGG", + "GGA CCTG", + "TCA GGGA", + "GAGA CTG", + "CC AAAAAA", + "GCC GG", + "CCA GGGG", + "TCA GAAAA", + "CA TCTGA", + "TCTT CAAA", + "CTA CAGG", + "GAGG CAGG", + "CATT GTA", + "TAAA TCAA", + "GA CTCTT", + "CTGA TTA", + "GCA TATG", + "GGA CCTT", + "CAA GACA", + "TATT TATG", + "TATTTT AAA", + "CC GAGA", + "TCA TTTTA", + "CTCA CTCA", + "CCA CCCA", + "CTC TAGA", + "CTA CATG", + "GTG CTTA", + "CAA CCTG", + "TC TGTGTT", + "TAAA TATG", + "CAAA GG", + "CC CTGTT", + "GTT CGG", + "TGA TAAAA", + "CA CGAA", + "GTT GAGG", + "CAGA GTGA", + "GAAA TTAA", + "CACA TA", + "GAA CAGG", + "TCTCC TGA", + "CC TGAGG", + "GGAGG CCAA", + "GTT TACA", + "TAA CAGG", + "TGTG GTG", + "GCCTCC CAAA", + "CCA TCCTG", + "GATT CTT", + "GAA TGGA", + "GTA GTCA", + "CTCC TCTG", + "GAAAGAAA GAAAGAAA", + "CC CTGTG", + "CAGTA TG", + "GC GATA", + "GGA CTC", + "GAAA GA", + "TGTT GG", + "GTA GCTT", + "CA TTTTAA", + "CC CTCTG", + "GCA TTCA", + "CGA TTA", + "TCA CATA", + "TAA TGAAA", + "GGAA TTA", + "CTG TCAA", + "TAAATT AAA", + "CAA GTC", + "GTA TTCA", + "GGCCA TG", + "CTT TAGA", + "TGTT TCC", + "CATG TA", + "GAA TAAAA", + "CAA CTAA", + "TCA TCTA", + "CA CTCTT", + "CA GTTTG", + "CA TAAAAA", + "GCA TGCA", + "GATT TA", + "GAA CCAA", + "TCTG TGA", + "TCA GCCA", + "TCTC CACA", + "TCTCA GCTCA", + "TATCA TG", + "GCA CTTA", + "CGC CAGG", + "CGG GG", + "CATTAA AAA", + "TTTG TTA", + "GGA TATA", + "TC GACC", + "TAA TCCA", + "CC GC", + "CATT GTT", + "CCA GTTA", + "GTA GTTA", + "CTA GGAA", + "CC TAATT", + "TCA TGGG", + "GAA CTAA", + "GCTA TTTT", + "CC GTCA", + "CAGA TTA", + "CCA TATA", + "CAA CTTA", + "TCA GTTTT", + "CTA CCTT", + "GCA CTC", + "GTG TGGA", + "GTG CCAA", + "GACAA TG", + "GA CAATT", + "GTA CCTT", + "TAAA CATT", + "CA GGAGG", + "GTG CGA", + "GAAAA TTA", + "TCTCTT AA", + "CC GATT", + "GA TGATT", + "CCA TGGG", + "TC GGTA", + "CCA TATG", + "CCA GTCC", + "GCC TTAA", + "TGA TCCA", + "GTT GCAA", + "GTA GAGG", + "CAGA TTTT", + "GTA CTTA", + "TCTTTCTT TCTTTCTT", + "GCTC TGTG", + "TCAA TAA", + "GTT TAGA", + "GTT CGA", + "CAA GGTT", + "CTCA TTTT", + "CACA GG", + "CATG CTG", + "GAA CGG", + "TA TAAAAA", + "GAA GGCA", + "GA GCATT", + "TGTT TGTG", + "GCTG TTA", + "GTCA CTG", + "CAAA TGAA", + "GTGA CTG", + "GTT CTTTT", + "CAGGCTG GAGTGCAGTG", + "TGA TGAAA", + "TAA CGG", + "CTA CTAA", + "GACA TTA", + "GGA CGA", + "GAGCA TG", + "GCA TGGG", + "CCA CTTA", + "CTA TCAA", + "GCTG TTTT", + "GTC GTG", + "CCTG GCC", + "TCTC TGAA", + "TGTT GTA", + "CAGC CAGG", + "GTT TAGG", + "CC GCAA", + "GGA GTAA", + "CCAA TTA", + "CAGC AAAA", + "TCA TCCA", + "CA CGTA", + "TCA TAGA", + "TAATT AAAA", + "CA CTTAA", + "TCTT TATT", + "GAGA TTA", + "TAA GAGG", + "CAAA TTAA", + "GA CGCA", + "CA CGGA", + "GTG TGCA", + "TC T", + "TATTA TTA", + "GAAA TATT", + "GGA GTTA", + "TCTT TGA", + "CTGA TTTT", + "TGTGAA TT", + "TCC CACC", + "CC CTTTG", + "CAA GGTG", + "CAGA GTT", + "CCCCA TG", + "CTA CCAA", + "CTCC AAAA", + "CTT CCCC", + "CTG CTAA", + "GATT AAAA", + "GC TTATG", + "CTA CTTA", + "TAAAAAA TT", + "TCA GTCC", + "CTATT AAA", + "GAA TGGG", + "CACA GTA", + "CAA CGG", + "GG TTATT", + "TCA CCCA", + "TGA TGCA", + "TAA TTTTTT", + "GTT TGAGA", + "GTATT AAA", + "GCC CCCA", + "TATA GTA", + "TA GTAAA", + "TGA TACA", + "GTG GTTTT", + "CCA CTAA", + "CACA GAGA", + "CCTCTG CCTCC", + "CAA AAAAAA", + "CTC TCTCC", + "CA TAATA", + "GAA GCCA", + "GTT CCCA", + "TGTG TTTG", + "CAA TGGA", + "TGAA GTA", + "CTT CATA", + "CA CTGTG", + "GC TCTTTT", + "TGA CATA", + "TAAA GAAAA", + "GAGAAA TG", + "CAGG GAGG", + "TGTT CAA", + "GA GCCAA", + "GACA GAGA", + "GG CTGAA", + "CAAA TATA", + "GTG GAAAA", + "TAA GGTT", + "GTGA TTA", + "GGA TCTG", + "GATG TTA", + "GACTA CACA", + "TCC TATA", + "CTG CCAA", + "TCC CGA", + "GTGA TTTT", + "GC GTTTT", + "CAGA GTA", + "GAAA GGAA", + "CA CTTTG", + "CCCC AAAA", + "GCAA CCCA", + "TGCA TTTT", + "TCTA GAA", + "TA CTTTG", + "TGA GGCA", + "CA TCTCC", + "TC GCTA", + "TGA CTTTT", + "GA GCCTG", + "CATT TGTT", + "TCTT TGTT", + "GCAAAA TT", + "CC TGATT", + "GA TAAAAA", + "GA GTGTT", + "TCC TGTA", + "TACA GAAA", + "TC CAGGAA", + "GCCA GTG", + "TAGA TTTT", + "TAA TAGG", + "CTCC TCA", + "CATTTT TG", + "CATT TCAA", + "GCCA TCA", + "TAAAA TATA", + "GA CTGTT", + "GCA TGGA", + "CAAA GTT", + "CA TGATT", + "GA GTTTG", + "CTA GCAA", + "CTT CCTA", + "GG GGAGG", + "CTA TATG", + "TATT TATTTT", + "CA CCATT", + "CC CTCAA", + "TTTTTTTT TTTTTT", + "GA TCATT", + "GTA CATA", + "CTC CATA", + "CCCC GTCTCTA", + "GCC TGCC", + "CTA GCTT", + "CC CGGA", + "GATG TTTT", + "GTA TTTTA", + "TCA GATA", + "CCTG GAA", + "TATT CCA", + "GGA CCAA", + "GCCA TTA", + "CGA CTGA", + "TAA GCTG", + "TAAA CACA", + "GTT TCTC", + "CA TCTTA", + "GAAA TTTG", + "TAA TGGG", + "TAAAA TTTT", + "CTG TTCA", + "CCTG TTA", + "TA CTGAA", + "TGA CCCA", + "TGA TTTTA", + "CTCC TTA", + "TATA GAA", + "CTG CGG", + "GC GGTA", + "GTG CTAA", + "CAGA GGAA", + "TACA TCA", + "TCAA TCAA", + "CTG CAGCC", + "TGAA TATT", + "TCTA CAA", + "CCA CATA", + "CC CGTT", + "TATA CACA", + "TCC TCTC", + "TCTA CTT", + "CC GGAA", + "CTTTT TTA", + "GAAA GAAAA", + "CTA TCTT", + "GA CTTTG", + "TGAA CAA", + "GCA GTTTT", + "GC TAAAAA", + "GAGG CGG", + "TAA TAAAAA", + "CTG GTCA", + "CAGA CAA", + "GGA TATG", + "TGAA GG", + "GCCA GAA", + "CCA GGCC", + "CCA CCATG", + "CAAA CTT", + "TCA TGTA", + "GCTG CTT", + "GTAA TA", + "CCCC CAA", + "CA GCCTG", + "TCAA CTT", + "TAAAA TTAA", + "GCTG AAAA", + "CGA CGA", + "GTG GGCA", + "TGA GGGA", + "CGC TCC", + "TTTT GTTTT", + "GA GTCAA", + "TCA TGCA", + "CTG CTTA", + "TAA GTTTT", + "GTA GCAA", + "CCTT GG", + "TGA CAAAA", + "CTG GTAA", + "TCTT TATA", + "TGTG TGTT", + "CTG GTC", + "CTG GCAA", + "CATT TCTG", + "CTC TACC", + "CTGA GGA", + "CTAAAA TG", + "CTA GATT", + "GTA TCAA", + "CA GTCAA", + "CTG GGTG", + "CC TCTTA", + "TGA GTTTT", + "TTTTA TTTA", + "CC TTTTTT", + "TATA TACA", + "TA GCAAA", + "AAA TTA", + "CTG GATG", + "GA TAATA", + "GA CAAAAA", + "CCTG GGA", + "GCTT TCA", + "GTA CAGG", + "GCTG GAA", + "CTA CTCA", + "CAA TGTA", + "GC GTGAA", + "GA TCCTT", + "TATTAA TG", + "GCC CGA", + "TAAA GTG", + "GCTT CCA", + "CATG GAA", + "TGAA GTT", + "CTT TCTC", + "TCTGTG TG", + "GTA TGTA", + "CAA TACA", + "TCAA GG", + "CC TCTAA", + "TGTG GG", + "GA TCTGA", + "GTA CTGA", + "TTAA TTAA", + "GCA GAAAA", + "CTA CATA", + "CC GGTG", + "GGGG AAAA", + "TACAA AAAA", + "TTTT GG", + "GTGA GAA", + "TCAA TAAA", + "TCAA GTT", + "CTCA GGA", + "CTA CTC", + "CAAA TCA", + "GGCA GAA", + "CC CGAA", + "TGTT GTG", + "GAGC AAAA", + "TATT TGTG", + "GTA GGTT", + "CTA CCTG", + "CA CAAAAA", + "CTCA GG", + "GCTT TA", + "CAGA GCAA", + "CTCA GTG", + "GGAA GAGA", + "TAA CCTG", + "GAAA TATA", + "CGA GAA", + "GTGA GG", + "CATT TATA", + "GGCA GCA", + "TC TAAATT", + "CCCA GTG", + "GCC TAGG", + "TGCA TTA", + "CC GTAA", + "CATT CCA", + "CTA GTTA", + "GA CTTAA", + "CTA TACA", + "GACA CAA", + "TCTT CACA", + "CC GGTT", + "TAAA GTAA", + "CTG TGGA", + "TAA GGTG", + "TCCA GTA", + "CAAA TTTA", + "AAATT AAAA", + "CCA TCTA", + "CTCC CTT", + "CTCC TTTT", + "GAGAGAGA GAGA", + "GGA GATA", + "CCTA TTA", + "CACC AAAA", + "CC GTTA", + "TGTT TATA", + "CTCA GGAGG", + "GA CGTA", + "GTCC TTA", + "GAAA GTT", + "GCTG GTG", + "CTC TACA", + "CAA TAGA", + "TAAAA TATT", + "GTA CCTG", + "GTA CTAA", + "CTT TGAAA", + "CCTT TCC", + "TAAAAA TTA", + "CTC GG", + "CAA GATA", + "CATT TGA", + "CACC TCA", + "GCCA GCC", + "GTC GG", + "GCA CATA", + "CA CTCAA", + "CTTTT AAAA", + "CAGGAA TT", + "GCC TATT", + "TCTT TCTG", + "CTGAGGCA GGAGAA", + "CAGG CAGG", + "CTA GTAA", + "TCCA TA", + "GAA CTTA", + "C G", + "GCTG TGA", + "GAAAA TA", + "TCTT CATT", + "GAGG GAGA", + "CCCA TCC", + "GAGG TGGG", + "GCC TCTA", + "GTA GGTG", + "TAAA CCA", + "GAA GGAAA", + "TATT GG", + "A TG", + "TCCA GTT", + "CCCA CAA", + "GAAA CACA", + "GTC TCAAAA", + "CTTTT CTTTT", + "TGAA GGA", + "TATT GATT", + "CTA TGTA", + "AAAAAAAA AAAAAA", + "TCCTT AAA", + "GC GCTA", + "TCCA CTT", + "GA CTCAA", + "TAAA TACA", + "TCA TGGA", + "TCTG GGA", + "TCC TATG", + "CTG TGCA", + "TCAA GTGA", + "TCA TAAAA", + "CA TCCAA", + "CCTT CCA", + "CTG TACA", + "GAA GGTT", + "CTG TGTA", + "GTCA CTT", + "TCA CAAAA", + "TCA GGCA", + "GTGTT AAA", + "CC CTTAA", + "CAAA GTG", + "GAAA TGTT", + "CTG GGGA", + "GA CGCC", + "TATA TGTG", + "CTA GATG", + "GAAATT AAA", + "GAA TGCA", + "GCA CTAA", + "CGG GAGG", + "GCCA CAA", + "CGC TTA", + "TCCA CAA", + "CAGA TA", + "TC TGAATT", + "TATTA TTTT", + "GC GCGG", + "CTC TGAAA", + "TCTCTT TG", + "TATT TCTA", + "GGGG TGGG", + "GGA TGCA", + "CCA CACC", + "TAAA TGTG", + "TCTT CCTG", + "GCAA GG", + "CTG CTCC", + "CTG GAGTG", + "CTGTT AAA", + "CACA CAAA", + "CTGA CTT", + "GAAAA GAAAA", + "CCTT CTCC", + "GAAA TAAAA", + "CCTCA GGTGA", + "GA TAATG", + "GAATT GCTT", + "CCAAAA TT", + "CGTG AAA", + "CACTG AAA", + "CAGTG AAA", + "GA TCTTA", + "GAGA TGGG", + "TCTG CCA", + "TGA GGTA", + "TATG GAA", + "TATA TTTTA", + "TGAA CTT", + "GCA GATA", + "CTTTT CTT", + "GTAAAA TG", + "TCTC TAA", + "TCTG CAAA", + "GA GCCTT", + "TA TCATT", + "CAA TTTTA", + "CC GCCA", + "TATT TAAAA", + "GAGA GATG", + "GAGA TGGA", + "GCCA GGATG", + "CGA GTAGCTG", + "TTCA TTTT", + "TATA CTT", + "GTC TACA", + "GTGA GTGA", + "GCTA CACA", + "GGGA GGA", + "CAA GGCA", + "GC TTTTAA", + "CA CTATT", + "GTT CATA", + "TCC TC", + "GTG GACA", + "TATT TGGA", + "CTC CAGTA", + "GTT CAGTT", + "CCAA GG", + "CAGA GCC", + "CTC GCC", + "CC GATG", + "GGAA TTTT", + "TCCA GCC", + "CC TCTTTT", + "GAA CCTT", + "CATG CACA", + "GTT TC", + "GAA GATA", + "TA CCCC", + "GCTG CCA", + "GGGG GAGG", + "GCAGTGA GCTGA", + "CTG TCTA", + "CGA GGA", + "CAA TGGG", + "GC TGTGAA", + "GAAA GTG", + "TACC AAAA", + "GTCA GG", + "CAGC TCC", + "TGTG CTT", + "GTC TAGG", + "TTTT TGTA", + "TTA TATG", + "TCA GGGG", + "TATT GTTA", + "CC TGAGA", + "TA TCTCA", + "CAA TCTG", + "CA CTCTG", + "GATT TAA", + "TGAA TAA", + "TCTT GTA", + "TCAA CTG", + "TCTC CAGG", + "CTA GAGG", + "CTGA GAAA", + "CTA GCTG", + "TCCA CCA", + "CGA TTTT", + "CC GGCC", + "GTT GACA", + "CTTA GAA", + "CA TAATG", + "GA GTATT", + "CACA GAAA", + "GA CTGTG", + "CTA TTTTA", + "TGA GGAAA", + "TTATT AAAA", + "CTTA TTTA", + "CAGA CTT", + "CA CGCC", + "GCTT GG", + "CCTG CTT", + "TAAA GCAA", + "CCTC GTGA", + "TA GAATT", + "CTTA CAA", + "TAAA GGAA", + "GTC TAGA", + "GTGA CTT", + "TACA TATG", + "GTCA GGA", + "GCTC CAGG", + "GAA GGGA", + "CA TGATG", + "TCA TCAAA", + "CGTT AAA", + "GTA CTCA", + "CTCC CAA", + "TATA TGTA", + "GGTA TTTT", + "TAA GCCA", + "C GAAATT", + "GTTTG TTTT", + "TCTG TCTT", + "TATA TCA", + "TGTT CATT", + "CAAA CCA", + "TTCA TTA", + "TATT TGTA", + "GATT GAA", + "CTA TAAAA", + "GATTAA TT", + "CCCA CCA", + "TCC TAGG", + "TAAA TGTA", + "CTCTT AAA", + "GCA GTCC", + "GC GGCTG", + "GTC TCGAA", + "TGAA TGA", + "CTG GGGG", + "GTC TCGA", + "GAA CAAAA", + "TGAA TCA", + "TGTATTTT TAGTAGAGA", + "GTTA TTAA", + "TTTTTT AAAA", + "GTCA GTG", + "CCCA TTA", + "CACA GGA", + "TATT CCTT", + "TCTG CCTT", + "CCTG GTG", + "GC GAGC", + "TA CTAAA", + "TACA CAAA", + "CC GTCC", + "GCTT TGTT", + "GCA TCCA", + "CA TCTAA", + "GC TGTGTT", + "GTA GACA", + "GCC TATG", + "TCTT TGTG", + "GATT CTG", + "CGCC CGG", + "GA TGAGA", + "TA TCTGA", + "TGAA TTTG", + "CC TGATG", + "TAAAA CAA", + "CTT TAGG", + "TTTT CCTT", + "TGAA TAAA", + "CGG GGA", + "CAAA CATT", + "GTA TGGA", + "GCTT AAAA", + "TA CCAAA", + "CAAA GAGA", + "CTCC TGCC", + "GTAAAA AAA", + "CACA GCC", + "CCA TGCA", + "TA CAATT", + "CTA GTGA", + "CTGA GTT", + "GAGTG AAA", + "TCTGTT TG", + "CTG TAGG", + "TATAA AAAA", + "GCATT AAA", + "GTC CATA", + "TGTTAA AAA", + "TGTT TGA", + "GAA TAGA", + "CTT CAAAA", + "CTG GACA", + "CTG TAGA", + "CCATT AAA", + "CTA TCTG", + "CACTA TG", + "TTA TCAA", + "TAA GTAAA", + "TAATCCCAGCACTTTG GGAGGCC", + "CCA GAAAA", + "TGAA GCA", + "TCC CTTTT", + "TCA TACA", + "TA CGTT", + "GCC GTG", + "GGAA GTG", + "GG CCAAA", + "GTA CCAA", + "TCTCTA CTAAAAATA", + "CATT GTG", + "TGTG TGA", + "GAAA CAGA", + "CTT GACA", + "GA TGAGG", + "GAGA TTTT", + "CCTT CAA", + "GAA TCTA", + "CTC TCCTT", + "GG CGGA", + "TCTATCTA TCTATCTA", + "CACA CAGA", + "TGTG TGTA", + "CAAA GCC", + "TGTG CCA", + "GTT GAAAA", + "CTC CAGCA", + "TCAA GGA", + "TA GCTCA", + "CGC TGA", + "CCTG AAAA", + "GA CTATT", + "GATT CCA", + "GCTT CTA", + "GTC TGCC", + "CTT GGCA", + "TGTG GTA", + "GCTT TGA", + "GCTC TCTG", + "CTCA CAGA", + "TCTT TAAA", + "CAAA GCAA", + "TA CTTAA", + "GCTT CAA", + "CATT GAA", + "GGA GGAAA", + "CTA TAGA", + "CTGA GGAA", + "CCTG GCA", + "CC CTATT", + "CTC GTG", + "TTA CACA", + "TTA GGAA", + "CTG GTTA", + "GTT GTCC", + "TAATG AAAA", + "TATT TACA", + "GG GAATT", + "GTA GTTTT", + "GCTG CAA", + "CTA CGG", + "GCC GGA", + "CTG GGCA", + "CCTT AAAA", + "GATG GAA", + "TAGATAGA TAGATAGA", + "TATG TAA", + "GTA CGG", + "TATT CAAA", + "GA TCTCC", + "CCTG TTTT", + "TATT GCA", + "GGAAGGAA GGAAGGAA", + "GG TAATT", + "TTA CAGA", + "TCA GC", + "GCAAAA TG", + "GAGA GCA", + "GTA GAAAA", + "CATT TGAA", + "TCTT CTTTT", + "TCC CATA", + "GTTA TTTA", + "CTA TCTA", + "CA TCCTG", + "TCTT GTG", + "TTA TTATT", + "CC CGTC", + "TACTA TG", + "TAAA CATA", + "TAA GGAAA", + "GCTT GTG", + "CTC TAAAA", + "GTTTT AAAA", + "GACA GGA", + "TCC TAGA", + "TCCA CCCA", + "GTT TGAAA", + "CCA TCTCA", + "CTAA GAA", + "GTA TCTA", + "GTGA GGA", + "GCTG GAGG", + "CCTGTAA TCCCAGCTA", + "GCAA CAA", + "CTT TCAAA", + "CAAA TGTT", + "CTT GTCC", + "TCTCAA AAA", + "TATT TATTA", + "TAA GGCA", + "GAGA GGAA", + "TA TGATT", + "GCA TCTA", + "C GTTATT", + "GCC TGTA", + "GTT TCAAA", + "CCTTCCTT CCTTCCTT", + "GG CTTTG", + "GTCA GAA", + "CATG CATG", + "GTCA TTTA", + "CTG GAAAA", + "CTT CGA", + "CCTA TTTT", + "CCAA CAA", + "TCCA TCC", + "TAAA GTTA", + "GTC TCTC", + "TAA TCAAA", + "GATTTT TG", + "GATT TCTT", + "GG GCTGA", + "GCA TGTA", + "CCTG GGTT", + "GAGA CAA", + "GCTG TCA", + "TGA TAGG", + "GGA GACC", + "CC GGCA", + "TAA TCTCA", + "TGAA TTAA", + "TCTG GTG", + "GCC TC", + "GG CGCA", + "CCA GCTA", + "CA GTCTG", + "TGAA CTA", + "GTAA GAA", + "CCTT TCA", + "TCCA TGA", + "CAAA GGAA", + "CTC TC", + "CTC TCTCA", + "CTC CAGC", + "GTA GATA", + "CCCC CTCC", + "GG CGCC", + "TCTG TCC", + "GA CCATT", + "CTT GAAAA", + "TTA TCC", + "TACA TGTG", + "CAAA TTTG", + "TTTT GTG", + "CAGA GTG", + "GTAA TAA", + "GTGA GTG", + "TTTT TCC", + "GG CTCTG", + "GCC CTAA", + "GG CTGTT", + "CC CAATT", + "CAGA GCTT", + "TATAAA TG", + "GA GTCTG", + "TCTTAA AAA", + "GTTTTA TG", + "GA TCCAA", + "GGCC CTG", + "GA TCCTG", + "TCAA GTG", + "GATT CAA", + "CCTC TCTT", + "GAGA CGG", + "CAGA TCA", + "TAAAA GAA", + "CTGA GCAA", + "CCTG CCA", + "CCTT CTA", + "CGC TCA", + "GG CTGTG", + "TGGG AAAA", + "GGA GCCTG", + "CTGA GTG", + "CGTC AAA", + "TCAA GTA", + "CGTAA TT", + "TTA CTTA", + "TATA CTA", + "GG GCAAA", + "CAA CTTTT", + "CTT TGCC", + "GC CAGGAA", + "CACA CTA", + "GCC CAGC", + "TAAATAAA TAAATAAA", + "CTT TCCTT", + "GGGA GAA", + "TATG GTA", + "CGG CCA", + "CCTC TCTG", + "GAAA GCAA", + "CAA GCCA", + "GG CGTT", + "CTC TTTTA", + "TCGGCC TCCCAAA", + "GATT TATT", + "CAA GTCC", + "TA TCTTA", + "GTTCAA GACCA", + "CTCA CACA", + "GAAA TCAA", + "TGA GACC", + "GG GTAAA", + "GCTT GTT", + "GA TTTTAA", + "TTTT TATA", + "CAGA GCTG", + "TC TGTTAA", + "GTAA TTAA", + "TCTT TGAA", + "CTT GCCA", + "TTTT CATT", + "CCA TGTA", + "TCTC GGCTCACTGCAA", + "GGA TTCA", + "TC TATTAA", + "TACA TAAA", + "GATT GATT", + "GGA GAGGA", + "CGC AAAA", + "GGA CTAA", + "TTA TGTG", + "GTCA CTCA", + "GACA GCA", + "CGA GTT", + "GATG GTT", + "GGAA GAGG", + "GCCAA CATGGTGAAA", + "GGA GCCA", + "TGAA CTG", + "CCTC TGTG", + "GTA TAAAA", + "TCC CAGAA", + "CATT TATG", + "GA TTATG", + "TGTT TCTG", + "GAGTG GGTT", + "TACA TATT", + "CTC CAGGA", + "GACA CTG", + "GG TCTCA", + "CC GGGA", + "TGTT TAAA", + "CTCA CCA", + "GGA CTTA", + "GCC CACC", + "CAAA TCAA", + "GAAA TGTG", + "TA GTTAA", + "TCTA TAA", + "TTA GATT", + "GTG TAGG", + "TACTG AAA", + "GCA CCCA", + "GTG GGCTG", + "GAA TGAAA", + "TCTA GTT", + "TCA GGAGA", + "TCCA CTA", + "CTCA GTT", + "TACTT AAA", + "GA CTCCA", + "TCCATT TG", + "CACA GCAA", + "GCTCATG CCTG", + "GGTG CTG", + "GCTT TCTT", + "GTG GCCA", + "TA CGTG", + "GTG CAGTG", + "TGAA GTCA", + "CCTT TAA", + "TCTCAGCTCA CTGCAA", + "GAAA TATG", + "CC TCAAAA", + "GGGG CGG", + "CGA CAA", + "GG TGATG", + "GTCTT AAA", + "CAGAAA TG", + "CGTCA TT", + "CCAA GCA", + "GGA TCAA", + "GTGCTG GGATTA", + "GCTG GCC", + "CGGA GCTT", + "TACA TGA", + "TGTT TGAA", + "TCTC CATT", + "TAA GCAAA", + "CCTT TCTT", + "TA CTGTT", + "TCCA TCTT", + "CTTA CTT", + "CGGA GGTT", + "CAAAA CAA", + "TCA TAGG", + "TTA CTAA", + "CTTA TTTG", + "GAA TGTA", + "CCCCA TGGA", + "TTA CTGA", + "CGG AAAA", + "CTC CAGTG", + "TGTT CCA", + "CAGA TGAA", + "GTT GATA", + "TCC CCCC", + "CATT GCA", + "CTCA GCC", + "CTTA CTG", + "TA TCCTT", + "CTTTTA TG", + "TGAGTA GCTG", + "GACTG AAA", + "CAA TGAAA", + "CGA CTG", + "CTT GGGA", + "GCAA GCA", + "TCA CTCC", + "GATT TGA", + "CATTTT AAA", + "TCAA CTA", + "GTCC AAAA", + "CACC CTG", + "TTA CCTT", + "CAA GGGG", + "TTTT GGA", + "GTTA TTTG", + "GCTA CTG", + "CTGAGGCAGGA GAATG", + "GTGA TGA", + "GTA GTC", + "TAGTA TG", + "GTA TAGA", + "GTG TCTA", + "GCTG CTA", + "TTA GTAA", + "TAAA CATG", + "GTCA CCA", + "CA TCTTTT", + "CATA TAA", + "TCTC TCTA", + "TTTTA TTAA", + "TATT CTAA", + "GAAA TTTA", + "CTT CCCTG", + "TAAA GATG", + "TA CGTA", + "GTT TATTA", + "GAAAA GAA", + "CCCA CCCA", + "CAATT AAAA", + "CC GACA", + "CAAA GTGA", + "CAAA CAAAA", + "GCAA TTTT", + "CGATT AA", + "TTA GAGA", + "CTGA TGA", + "GGA GGAGG", + "GTCC TGGG", + "TCA TGAAA", + "GCAA CCA", + "GTT GGCA", + "GCGG CGG", + "GTCC CCA", + "GTA GGGG", + "GCCA TGTT", + "GTT CGAGA", + "GCC TATA", + "TAAA TTCA", + "GG CCATT", + "GAAAA CAA", + "TGTG TATG", + "GTA CTC", + "TAGG GAA", + "CCTT GAA", + "TC TATTTG", + "GAGG GCA", + "GAAA CTGA", + "TA CGC", + "TA CAAAAA", + "TCA TTATT", + "GGAAAA TT", + "TCAA TATT", + "CC CGTA", + "GGA GAGAA", + "TTA GTTA", + "CTCA GAGA", + "TC GAGC", + "CTA GTCA", + "GATG GCA", + "TGAA CATT", + "CTA TGGG", + "CACA CCA", + "TCAA TTAA", + "GGAA CTG", + "TTA CATG", + "CTT TCATT", + "CAGC TCTG", + "TCTTTT TTTT", + "TAAA TCTT", + "TGA TCTA", + "CATA CAA", + "GC TCAAAA", + "GC TGTGTG", + "TCAA TCA", + "GATT TGAA", + "CCAA GGA", + "GTCC TCA", + "GTG CTCC", + "AAAA TAA", + "GTGA CAA", + "GCTCA CGCCTG", + "CGA CGG", + "TA TCCAA", + "CACA CATG", + "TCTC TCTCC", + "TGTG GTT", + "CTT GGTA", + "TCTG GTT", + "TTTA TAA", + "CTG CTTTT", + "TGTG TCA", + "CACA TCA", + "CC TAATG", + "C GTTTTTT", + "GCTG GCA", + "GA CGTC", + "TATAA TTA", + "TACA GTAA", + "GAAA GTAA", + "GTC TGAAA", + "CCCA TTTT", + "TATA TGA", + "CTT GATA", + "CTT TATTTT", + "CTT TATTA", + "GG CGAA", + "CCA TGCC", + "CCTG CCTT", + "GAAGAA GAAGAA", + "CTGA CTGA", + "GCC CTTA", + "TA TCTAA", + "GTG TTTTA", + "TGTG GCA", + "TATT GTAA", + "GCCA GAAA", + "CCCTG TCTC", + "CACA GGAA", + "AAAA CAA", + "AAAAAAAA AAAAAAA", + "TAA CTCC", + "GCC TAAA", + "CGA GTA", + "TA GTATT", + "GTATTTT TAGTAGAGA", + "GCTG CAGG", + "TATT GAAA", + "CCAGCC TGGG", + "GCTCC AAA", + "TA CGAA", + "GGCC TCC", + "TATA CAAA", + "CATG GCA", + "CATG CAA", + "TACA CCA", + "CTT TACCA", + "TACA GAGA", + "TATT CTTA", + "TATG TCA", + "TCAA GCA", + "TCAA TGA", + "GG CTCTT", + "GGAA GTT", + "TCCA TGTT", + "GCTT TCC", + "TATG TGA", + "GTG TAGA", + "TTTT TAAAA", + "GCTG GAGA", + "GTGA GAGA", + "CCTA GAA", + "CCTCC AAA", + "CCAA TGA", + "CAGG GCA", + "CTA TGCA", + "CTT CACC", + "CTA CAAAA", + "CTCA CC", + "GAGTA TG", + "TA GAAAAA", + "CTTTT GAA", + "TAAA GAGA", + "CATG TCA", + "TCTTTT AAA", + "CACA GTGA", + "GA TCTAA", + "TAA GGTA", + "CATA GAA", + "CGC GCC", + "CAGC TTA", + "TATA GTT", + "CGG GCC", + "TATC CATT", + "TGTTTG TTTT", + "GCTG GCTG", + "TACA GGA", + "CTCC TTTG", + "CAA TCTA", + "CCCC CTG", + "TATA CTG", + "CTGA GCC", + "CGG TTA", + "TGAA GTG", + "GCTT CCTT", + "TTTTA TTTG", + "TA GTGAA", + "CTGA GGTG", + "TCTT CTC", + "GACA GAAA", + "CTGAA CTGAA", + "CCTG GGAA", + "TCC CCAAA", + "TATG TATT", + "GATT TCTG", + "CATT CAAA", + "CACA GTT", + "GCTT GAA", + "GTG GATCA", + "CTGA GTGA", + "TGAA TTTA", + "TCAA CAAA", + "GG TCATT", + "GTAA TTTA", + "GC GACTT", + "CTGA GAGA", + "GTG CCCA", + "CTA GGTT", + "TCC TGAAA", + "GTC CACC", + "TCA CAGAA", + "GC GAAAA", + "GTA TGGG", + "TGAA CAAA", + "TAAA CAAAA", + "CC GTTTT", + "TC TCAATT", + "TCCA GAAA", + "GTAA CAA", + "GCA TTTTA", + "TCTC CATG", + "TTA TAAAA", + "CAGG CAA", + "CTAAAA AAA", + "GTT GGGA", + "TAAA GATT", + "TGAA GAGA", + "CCCC TCA", + "TGTT TATG", + "TCTA CTG", + "CCAA TTTT", + "GGTG GTG", + "GGAA CAA", + "TGTG GGA", + "TCTG CTA", + "GAA CGA", + "GTAA GTA", + "GTT GCCA", + "AAAA TTTT", + "GC GCGA", + "GAAA GATG", + "GTC TCTCA", + "TCCA TCAA", + "GCA GCTA", + "CACA TTTG", + "CTGA CAA", + "TCCA CC", + "GC T", + "CCCA CTT", + "GCA GGTA", + "GAGG CCA", + "TAAA GTCA", + "CTG GATA", + "CGG CAA" + ] + } +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/tokenizer_config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e23b345b1406f6678be487d2c169e05d58c0487 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/tokenizer_config.json @@ -0,0 +1,56 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "cache_dir": null, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "model_max_length": 250, + "pad_token": "[PAD]", + "padding_side": "right", + "sep_token": "[SEP]", + "tokenizer_class": "PreTrainedTokenizerFast", + "trust_remote_code": true, + "unk_token": "[UNK]", + "use_fast": true +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/trainer_state.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2d127a7a0aaf7b2409489806d18dd7dcc473d1de --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/trainer_state.json @@ -0,0 +1,624 @@ +{ + "best_metric": 0.8996340973968899, + "best_model_checkpoint": "genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030", + "epoch": 4.997924449979244, + "eval_steps": 100, + "global_step": 9030, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06, + "learning_rate": 1.107011070110701e-05, + "loss": 0.9622, + "step": 100 + }, + { + "epoch": 0.11, + "learning_rate": 2.214022140221402e-05, + "loss": 0.7009, + "step": 200 + }, + { + "epoch": 0.17, + "learning_rate": 2.99006735928759e-05, + "loss": 0.5302, + "step": 300 + }, + { + "epoch": 0.22, + "learning_rate": 2.9558168740723828e-05, + "loss": 0.4629, + "step": 400 + }, + { + "epoch": 0.28, + "learning_rate": 2.9215663888571756e-05, + "loss": 0.4315, + "step": 500 + }, + { + "epoch": 0.33, + "learning_rate": 2.887315903641968e-05, + "loss": 0.3898, + "step": 600 + }, + { + "epoch": 0.39, + "learning_rate": 2.853065418426761e-05, + "loss": 0.3906, + "step": 700 + }, + { + "epoch": 0.44, + "learning_rate": 2.818814933211554e-05, + "loss": 0.3697, + "step": 800 + }, + { + "epoch": 0.5, + "learning_rate": 2.7845644479963468e-05, + "loss": 0.3494, + "step": 900 + }, + { + "epoch": 0.55, + "learning_rate": 2.7503139627811396e-05, + "loss": 0.3635, + "step": 1000 + }, + { + "epoch": 0.61, + "learning_rate": 2.7160634775659324e-05, + "loss": 0.3492, + "step": 1100 + }, + { + "epoch": 0.66, + "learning_rate": 2.6818129923507252e-05, + "loss": 0.3424, + "step": 1200 + }, + { + "epoch": 0.72, + "learning_rate": 2.647562507135518e-05, + "loss": 0.3311, + "step": 1300 + }, + { + "epoch": 0.77, + "learning_rate": 2.6133120219203104e-05, + "loss": 0.3341, + "step": 1400 + }, + { + "epoch": 0.83, + "learning_rate": 2.5790615367051032e-05, + "loss": 0.3222, + "step": 1500 + }, + { + "epoch": 0.89, + "learning_rate": 2.544811051489896e-05, + "loss": 0.3196, + "step": 1600 + }, + { + "epoch": 0.94, + "learning_rate": 2.510560566274689e-05, + "loss": 0.3258, + "step": 1700 + }, + { + "epoch": 1.0, + "learning_rate": 2.4763100810594816e-05, + "loss": 0.3128, + "step": 1800 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.8786757074655781, + "eval_f1": 0.8800256036767294, + "eval_loss": 0.30727851390838623, + "eval_matthews_correlation": 0.8180828481018108, + "eval_precision": 0.8840421410896541, + "eval_recall": 0.8781635253112077, + "eval_runtime": 26.4413, + "eval_samples_per_second": 1093.212, + "eval_steps_per_second": 34.189, + "step": 1806 + }, + { + "epoch": 1.05, + "learning_rate": 2.4420595958442745e-05, + "loss": 0.3041, + "step": 1900 + }, + { + "epoch": 1.11, + "learning_rate": 2.4078091106290673e-05, + "loss": 0.3024, + "step": 2000 + }, + { + "epoch": 1.16, + "learning_rate": 2.37355862541386e-05, + "loss": 0.3024, + "step": 2100 + }, + { + "epoch": 1.22, + "learning_rate": 2.339308140198653e-05, + "loss": 0.3037, + "step": 2200 + }, + { + "epoch": 1.27, + "learning_rate": 2.3050576549834457e-05, + "loss": 0.3018, + "step": 2300 + }, + { + "epoch": 1.33, + "learning_rate": 2.2708071697682385e-05, + "loss": 0.3007, + "step": 2400 + }, + { + "epoch": 1.38, + "learning_rate": 2.2365566845530313e-05, + "loss": 0.2866, + "step": 2500 + }, + { + "epoch": 1.44, + "learning_rate": 2.202306199337824e-05, + "loss": 0.2889, + "step": 2600 + }, + { + "epoch": 1.49, + "learning_rate": 2.168055714122617e-05, + "loss": 0.2937, + "step": 2700 + }, + { + "epoch": 1.55, + "learning_rate": 2.1338052289074097e-05, + "loss": 0.2936, + "step": 2800 + }, + { + "epoch": 1.61, + "learning_rate": 2.0995547436922025e-05, + "loss": 0.2854, + "step": 2900 + }, + { + "epoch": 1.66, + "learning_rate": 2.0653042584769953e-05, + "loss": 0.2824, + "step": 3000 + }, + { + "epoch": 1.72, + "learning_rate": 2.0310537732617877e-05, + "loss": 0.2796, + "step": 3100 + }, + { + "epoch": 1.77, + "learning_rate": 1.9968032880465805e-05, + "loss": 0.2862, + "step": 3200 + }, + { + "epoch": 1.83, + "learning_rate": 1.9625528028313733e-05, + "loss": 0.2718, + "step": 3300 + }, + { + "epoch": 1.88, + "learning_rate": 1.928302317616166e-05, + "loss": 0.2792, + "step": 3400 + }, + { + "epoch": 1.94, + "learning_rate": 1.894051832400959e-05, + "loss": 0.2864, + "step": 3500 + }, + { + "epoch": 1.99, + "learning_rate": 1.859801347185752e-05, + "loss": 0.2852, + "step": 3600 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8898844530547291, + "eval_f1": 0.8911132938895318, + "eval_loss": 0.27124258875846863, + "eval_matthews_correlation": 0.8365873619656282, + "eval_precision": 0.8999899065715472, + "eval_recall": 0.8877519423163003, + "eval_runtime": 26.2697, + "eval_samples_per_second": 1100.356, + "eval_steps_per_second": 34.412, + "step": 3613 + }, + { + "epoch": 2.05, + "learning_rate": 1.825550861970545e-05, + "loss": 0.2613, + "step": 3700 + }, + { + "epoch": 2.1, + "learning_rate": 1.7913003767553377e-05, + "loss": 0.2573, + "step": 3800 + }, + { + "epoch": 2.16, + "learning_rate": 1.75704989154013e-05, + "loss": 0.2622, + "step": 3900 + }, + { + "epoch": 2.21, + "learning_rate": 1.722799406324923e-05, + "loss": 0.25, + "step": 4000 + }, + { + "epoch": 2.27, + "learning_rate": 1.6885489211097158e-05, + "loss": 0.256, + "step": 4100 + }, + { + "epoch": 2.32, + "learning_rate": 1.6542984358945086e-05, + "loss": 0.2546, + "step": 4200 + }, + { + "epoch": 2.38, + "learning_rate": 1.6200479506793014e-05, + "loss": 0.2561, + "step": 4300 + }, + { + "epoch": 2.44, + "learning_rate": 1.585797465464094e-05, + "loss": 0.267, + "step": 4400 + }, + { + "epoch": 2.49, + "learning_rate": 1.551546980248887e-05, + "loss": 0.2591, + "step": 4500 + }, + { + "epoch": 2.55, + "learning_rate": 1.5172964950336796e-05, + "loss": 0.2566, + "step": 4600 + }, + { + "epoch": 2.6, + "learning_rate": 1.4830460098184726e-05, + "loss": 0.2609, + "step": 4700 + }, + { + "epoch": 2.66, + "learning_rate": 1.4487955246032654e-05, + "loss": 0.2562, + "step": 4800 + }, + { + "epoch": 2.71, + "learning_rate": 1.414545039388058e-05, + "loss": 0.2495, + "step": 4900 + }, + { + "epoch": 2.77, + "learning_rate": 1.3802945541728508e-05, + "loss": 0.2497, + "step": 5000 + }, + { + "epoch": 2.82, + "learning_rate": 1.3460440689576436e-05, + "loss": 0.2578, + "step": 5100 + }, + { + "epoch": 2.88, + "learning_rate": 1.3117935837424364e-05, + "loss": 0.256, + "step": 5200 + }, + { + "epoch": 2.93, + "learning_rate": 1.277543098527229e-05, + "loss": 0.2459, + "step": 5300 + }, + { + "epoch": 2.99, + "learning_rate": 1.2432926133120218e-05, + "loss": 0.2565, + "step": 5400 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.8888120113471252, + "eval_f1": 0.889180642434028, + "eval_loss": 0.27161338925361633, + "eval_matthews_correlation": 0.8362364396798708, + "eval_precision": 0.8995711757292421, + "eval_recall": 0.886977935999356, + "eval_runtime": 26.4679, + "eval_samples_per_second": 1092.114, + "eval_steps_per_second": 34.155, + "step": 5420 + }, + { + "epoch": 3.04, + "learning_rate": 1.2090421280968148e-05, + "loss": 0.2428, + "step": 5500 + }, + { + "epoch": 3.1, + "learning_rate": 1.1747916428816076e-05, + "loss": 0.2235, + "step": 5600 + }, + { + "epoch": 3.15, + "learning_rate": 1.1405411576664002e-05, + "loss": 0.2317, + "step": 5700 + }, + { + "epoch": 3.21, + "learning_rate": 1.106290672451193e-05, + "loss": 0.2369, + "step": 5800 + }, + { + "epoch": 3.27, + "learning_rate": 1.0720401872359858e-05, + "loss": 0.2346, + "step": 5900 + }, + { + "epoch": 3.32, + "learning_rate": 1.0377897020207786e-05, + "loss": 0.234, + "step": 6000 + }, + { + "epoch": 3.38, + "learning_rate": 1.0038817216577235e-05, + "loss": 0.2289, + "step": 6100 + }, + { + "epoch": 3.43, + "learning_rate": 9.696312364425162e-06, + "loss": 0.2316, + "step": 6200 + }, + { + "epoch": 3.49, + "learning_rate": 9.353807512273091e-06, + "loss": 0.2293, + "step": 6300 + }, + { + "epoch": 3.54, + "learning_rate": 9.01130266012102e-06, + "loss": 0.2288, + "step": 6400 + }, + { + "epoch": 3.6, + "learning_rate": 8.668797807968947e-06, + "loss": 0.2191, + "step": 6500 + }, + { + "epoch": 3.65, + "learning_rate": 8.326292955816874e-06, + "loss": 0.2274, + "step": 6600 + }, + { + "epoch": 3.71, + "learning_rate": 7.983788103664802e-06, + "loss": 0.2249, + "step": 6700 + }, + { + "epoch": 3.76, + "learning_rate": 7.64128325151273e-06, + "loss": 0.224, + "step": 6800 + }, + { + "epoch": 3.82, + "learning_rate": 7.298778399360658e-06, + "loss": 0.2292, + "step": 6900 + }, + { + "epoch": 3.87, + "learning_rate": 6.956273547208586e-06, + "loss": 0.2235, + "step": 7000 + }, + { + "epoch": 3.93, + "learning_rate": 6.613768695056513e-06, + "loss": 0.2267, + "step": 7100 + }, + { + "epoch": 3.99, + "learning_rate": 6.271263842904442e-06, + "loss": 0.2097, + "step": 7200 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8907147305057773, + "eval_f1": 0.8926286555871359, + "eval_loss": 0.28062257170677185, + "eval_matthews_correlation": 0.8357783119636794, + "eval_precision": 0.8957037127875959, + "eval_recall": 0.8907174550142468, + "eval_runtime": 26.3462, + "eval_samples_per_second": 1097.162, + "eval_steps_per_second": 34.312, + "step": 7227 + }, + { + "epoch": 4.04, + "learning_rate": 5.928758990752369e-06, + "loss": 0.1994, + "step": 7300 + }, + { + "epoch": 4.1, + "learning_rate": 5.586254138600297e-06, + "loss": 0.2056, + "step": 7400 + }, + { + "epoch": 4.15, + "learning_rate": 5.243749286448224e-06, + "loss": 0.2071, + "step": 7500 + }, + { + "epoch": 4.21, + "learning_rate": 4.901244434296153e-06, + "loss": 0.2004, + "step": 7600 + }, + { + "epoch": 4.26, + "learning_rate": 4.55873958214408e-06, + "loss": 0.199, + "step": 7700 + }, + { + "epoch": 4.32, + "learning_rate": 4.216234729992008e-06, + "loss": 0.2007, + "step": 7800 + }, + { + "epoch": 4.37, + "learning_rate": 3.873729877839935e-06, + "loss": 0.1982, + "step": 7900 + }, + { + "epoch": 4.43, + "learning_rate": 3.5312250256878642e-06, + "loss": 0.2004, + "step": 8000 + }, + { + "epoch": 4.48, + "learning_rate": 3.188720173535792e-06, + "loss": 0.1988, + "step": 8100 + }, + { + "epoch": 4.54, + "learning_rate": 2.84621532138372e-06, + "loss": 0.1952, + "step": 8200 + }, + { + "epoch": 4.59, + "learning_rate": 2.5037104692316474e-06, + "loss": 0.1938, + "step": 8300 + }, + { + "epoch": 4.65, + "learning_rate": 2.1612056170795755e-06, + "loss": 0.1969, + "step": 8400 + }, + { + "epoch": 4.7, + "learning_rate": 1.818700764927503e-06, + "loss": 0.2002, + "step": 8500 + }, + { + "epoch": 4.76, + "learning_rate": 1.476195912775431e-06, + "loss": 0.1922, + "step": 8600 + }, + { + "epoch": 4.82, + "learning_rate": 1.1336910606233589e-06, + "loss": 0.1959, + "step": 8700 + }, + { + "epoch": 4.87, + "learning_rate": 7.911862084712867e-07, + "loss": 0.1964, + "step": 8800 + }, + { + "epoch": 4.93, + "learning_rate": 4.4868135631921454e-07, + "loss": 0.1941, + "step": 8900 + }, + { + "epoch": 4.98, + "learning_rate": 1.0617650416714236e-07, + "loss": 0.1984, + "step": 9000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.8981526326714177, + "eval_f1": 0.8996340973968899, + "eval_loss": 0.2620416581630707, + "eval_matthews_correlation": 0.8478487817413687, + "eval_precision": 0.9051439118111951, + "eval_recall": 0.8970906218119197, + "eval_runtime": 26.209, + "eval_samples_per_second": 1102.904, + "eval_steps_per_second": 34.492, + "step": 9030 + } + ], + "logging_steps": 100, + "max_steps": 9030, + "num_train_epochs": 5, + "save_steps": 100, + "total_flos": 1.0512838028784269e+17, + "trial_name": null, + "trial_params": null +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/training_args.bin b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..085d299b90fd70ae38f32f753ce9774d8b4edaf2 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca985c6ca3bb14bcbf5dacf79ec4b721a7439b13a6943d62eb17720be9bae9cc +size 5393 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/results/base5120_human_ensembl_regulatory_lr3e-5_wd0.0_wr0.03_ep5_seed42/eval_results.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/results/base5120_human_ensembl_regulatory_lr3e-5_wd0.0_wr0.03_ep5_seed42/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0cb36b7b9ca6415986ddfdf449c527f219c2fba3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/results/base5120_human_ensembl_regulatory_lr3e-5_wd0.0_wr0.03_ep5_seed42/eval_results.json @@ -0,0 +1 @@ +{"eval_loss": 0.25783056020736694, "eval_accuracy": 0.8992285605562667, "eval_f1": 0.9006401658314979, "eval_matthews_correlation": 0.8490984535285055, "eval_precision": 0.9055273184685374, "eval_recall": 0.8980398114829752, "eval_runtime": 26.0552, "eval_samples_per_second": 1109.452, "eval_steps_per_second": 34.696, "epoch": 5.0} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/config.json new file mode 100644 index 0000000000000000000000000000000000000000..45e4c6c10a6211acf374c78e8078ab7ac74985f9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.35.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 4096 +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/model.safetensors b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0bbb8c34248c02bcfbb03397440a772eee71605f --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f46ef40da8b316c1d0eff5cc1fe9f6ae4c16e9d06990c10ff339a70a48e0516 +size 356777880 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/optimizer.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a798125d84c37c7c0c9ecc0ec17d06077ccab301 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd4298b112e100db7feb6eada1b39722118797c5b554ed940c678792f4954a1 +size 713677451 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/rng_state.pth b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..379ff28f5b0fcc6317320d86d178405ac69fba64 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f852a7e0cfc8cc2be2584732067240012bcbc7bf0fa2bcc717cab4b62c42a6 +size 14645 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/scheduler.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b143e177792bf838055e3669fb3f2a606fa73a81 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:220cfec5dcb75f6dbee6f4fd1cb3df10c70bed9be730045a710272b60af06be5 +size 1465 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/special_tokens_map.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8b3208c2884c4efb86e49300fdd3dc877220cdf --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "unk_token": "[UNK]" +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/tokenizer.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c390760bdfa97b696a762628a15dd3bf7932038a --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/tokenizer.json @@ -0,0 +1,8340 @@ +{ + "version": "1.0", + "truncation": { + "direction": "Right", + "max_length": 100, + "strategy": "LongestFirst", + "stride": 0 + }, + "padding": { + "strategy": "BatchLongest", + "direction": "Right", + "pad_to_multiple_of": null, + "pad_id": 3, + "pad_type_id": 0, + "pad_token": "[PAD]" + }, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 1 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 2 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "vocab": { + "[UNK]": 0, + "[CLS]": 1, + "[SEP]": 2, + "[PAD]": 3, + "[MASK]": 4, + "A": 5, + "C": 6, + "G": 7, + "T": 8, + "AA": 9, + "TT": 10, + "TG": 11, + "CA": 12, + "CC": 13, + "TA": 14, + "GG": 15, + "TC": 16, + "GA": 17, + "AAA": 18, + "GC": 19, + "TAA": 20, + "TTTT": 21, + "TCA": 22, + "TGA": 23, + "TTA": 24, + "GAA": 25, + "TCC": 26, + "CAA": 27, + "CTG": 28, + "CTT": 29, + "GTG": 30, + "GTT": 31, + "GCA": 32, + "GGA": 33, + "CCA": 34, + "GTA": 35, + "GCC": 36, + "CTA": 37, + "TAAA": 38, + "AAAA": 39, + "CTC": 40, + "GTC": 41, + "TGTG": 42, + "TATT": 43, + "CACA": 44, + "GAAA": 45, + "TATA": 46, + "TCTT": 47, + "TGTT": 48, + "CAAA": 49, + "GAGA": 50, + "CATT": 51, + "TGAA": 52, + "CAGG": 53, + "TCTG": 54, + "CAGA": 55, + "TCAA": 56, + "GGAA": 57, + "TAAAA": 58, + "CTGA": 59, + "GCTT": 60, + "GTGA": 61, + "GCTG": 62, + "CTCA": 63, + "CCTT": 64, + "CATG": 65, + "GCAA": 66, + "GTCA": 67, + "GTAA": 68, + "TTTTA": 69, + "TATG": 70, + "GAGG": 71, + "CGG": 72, + "GATT": 73, + "CCTG": 74, + "TCTC": 75, + "CCAA": 76, + "GTTA": 77, + "CTCC": 78, + "CTAA": 79, + "TACA": 80, + "CTTA": 81, + "TCCA": 82, + "GATG": 83, + "TTAA": 84, + "GAAAA": 85, + "TTTG": 86, + "GTTTT": 87, + "TCTA": 88, + "GCCA": 89, + "GTCC": 90, + "CTTTT": 91, + "GGGG": 92, + "CGA": 93, + "TTTA": 94, + "CCCA": 95, + "CAAAA": 96, + "TGGG": 97, + "TAGA": 98, + "TAGG": 99, + "GACA": 100, + "GGTT": 101, + "CCCC": 102, + "GGTG": 103, + "CATA": 104, + "GCTA": 105, + "TGTA": 106, + "TCAAA": 107, + "TGGA": 108, + "TAATT": 109, + "TTATT": 110, + "TGCA": 111, + "GGCA": 112, + "GATA": 113, + "CCTA": 114, + "TTCA": 115, + "TCTCA": 116, + "GGGA": 117, + "CGC": 118, + "CTGAA": 119, + "GTAAA": 120, + "TCTCC": 121, + "TTTTTT": 122, + "CGTG": 123, + "GCAAA": 124, + "TAAAAA": 125, + "TCTGA": 126, + "TCATT": 127, + "GGAAA": 128, + "TGAAA": 129, + "TCCTT": 130, + "CCAAA": 131, + "GAATT": 132, + "CTAAA": 133, + "CGTT": 134, + "GTGAA": 135, + "GGCC": 136, + "TAATA": 137, + "GGTA": 138, + "TGCC": 139, + "CACC": 140, + "TGATT": 141, + "AAAAAA": 142, + "GCTCA": 143, + "TCCAA": 144, + "GAGAA": 145, + "CTGTT": 146, + "TATTA": 147, + "CAGCA": 148, + "CTCTT": 149, + "CTTAA": 150, + "CAGAA": 151, + "GCTGA": 152, + "GTTAA": 153, + "TCTTA": 154, + "TATTTT": 155, + "GCCAA": 156, + "CTTTG": 157, + "GACC": 158, + "CGCA": 159, + "GTATT": 160, + "GTCTT": 161, + "CAATT": 162, + "GTGTT": 163, + "CTCAA": 164, + "GGAGG": 165, + "CGAA": 166, + "TCTTTT": 167, + "GTCAA": 168, + "CGCC": 169, + "TATAA": 170, + "TACC": 171, + "TCTAA": 172, + "CCATT": 173, + "CGGA": 174, + "CAAAAA": 175, + "CAGTG": 176, + "TCCTG": 177, + "CTCTG": 178, + "GAAAAA": 179, + "CTGTG": 180, + "CAGC": 181, + "TTTTAA": 182, + "GCATT": 183, + "GCCTT": 184, + "TAATG": 185, + "CTATT": 186, + "GTTTG": 187, + "TGATG": 188, + "GGCTG": 189, + "CCTCA": 190, + "GAGGA": 191, + "GCCTG": 192, + "AAATT": 193, + "CGTA": 194, + "TCAAAA": 195, + "TACAA": 196, + "CATCA": 197, + "CAGTT": 198, + "TGAGA": 199, + "GGGAA": 200, + "CACTG": 201, + "CACAA": 202, + "CAGGA": 203, + "CCCCA": 204, + "CCCTG": 205, + "TTTTTTTT": 206, + "TAGAA": 207, + "GAGCA": 208, + "CCTCC": 209, + "CACCA": 210, + "TATCA": 211, + "GAGC": 212, + "CATTA": 213, + "CACACACA": 214, + "GAGTG": 215, + "GGATT": 216, + "TGTGTGTG": 217, + "TACTT": 218, + "CACTT": 219, + "GTCTG": 220, + "TGAGG": 221, + "GAGTT": 222, + "GAATG": 223, + "TCATG": 224, + "GACAA": 225, + "GACTT": 226, + "TATTAA": 227, + "TAATAA": 228, + "GGCCA": 229, + "CATTTT": 230, + "CAGCC": 231, + "CCCTT": 232, + "GCTAA": 233, + "TATATATA": 234, + "GTGTG": 235, + "TACTG": 236, + "TAGTT": 237, + "CAATG": 238, + "GCTC": 239, + "CAGTA": 240, + "GCTCC": 241, + "CATAA": 242, + "TTATG": 243, + "TAAATT": 244, + "GATGA": 245, + "CATGA": 246, + "GCGG": 247, + "AAAAAAAA": 248, + "CCATG": 249, + "GATAA": 250, + "GACTG": 251, + "TATGA": 252, + "GCAGG": 253, + "GATCA": 254, + "GTTTTA": 255, + "GGATG": 256, + "CCTGA": 257, + "GTAAAA": 258, + "GAAGG": 259, + "GATTA": 260, + "CCTC": 261, + "GACCA": 262, + "GCTTA": 263, + "CCCAA": 264, + "AAATG": 265, + "GCATG": 266, + "TAGTA": 267, + "TACCA": 268, + "GGCTT": 269, + "CGTC": 270, + "TCTCTT": 271, + "GGTCA": 272, + "TTATTA": 273, + "TACTA": 274, + "TAGCA": 275, + "TATC": 276, + "CTGGG": 277, + "CATC": 278, + "CTTTTA": 279, + "CTAAAA": 280, + "GTGGG": 281, + "GAGTA": 282, + "CCAGG": 283, + "GATTTT": 284, + "TAGTG": 285, + "GAAATT": 286, + "CACTA": 287, + "TCGG": 288, + "TCAGG": 289, + "CAGGAA": 290, + "GCAAAA": 291, + "CCTTA": 292, + "CATCC": 293, + "CTTGG": 294, + "TGTGAA": 295, + "TATTTG": 296, + "CCTAA": 297, + "CTATG": 298, + "GAGAAA": 299, + "GAGAGAGA": 300, + "GCTTTT": 301, + "TATAAA": 302, + "CAAGG": 303, + "TCTCTG": 304, + "TGTTAA": 305, + "TGTGTT": 306, + "GAGCC": 307, + "GACTA": 308, + "TATATT": 309, + "TAAAAAA": 310, + "TTTTTG": 311, + "GTATG": 312, + "CATTAA": 313, + "TAGGA": 314, + "TAGC": 315, + "GTTGG": 316, + "GAAGAA": 317, + "TAAATG": 318, + "TCTGTT": 319, + "CAGAAA": 320, + "CAAATT": 321, + "TAATTA": 322, + "TCTGTG": 323, + "TATCC": 324, + "TGAATT": 325, + "CTCCA": 326, + "GTGAAA": 327, + "GGCAA": 328, + "GGAGA": 329, + "GAAGA": 330, + "GGTGA": 331, + "GGGCA": 332, + "CCAAAA": 333, + "TCTCTCTC": 334, + "CTGCA": 335, + "CTTCTT": 336, + "TCTTAA": 337, + "CCCTA": 338, + "TGTGTG": 339, + "AAATA": 340, + "TGTTTG": 341, + "GGGTT": 342, + "GTGCTG": 343, + "GGAAAA": 344, + "GGGGA": 345, + "TCAGA": 346, + "CCTTTT": 347, + "GAAATG": 348, + "GCAGCA": 349, + "TCTGAA": 350, + "GGGTG": 351, + "CACATT": 352, + "TCTTTG": 353, + "GGGC": 354, + "TCCCA": 355, + "TCCATT": 356, + "CTGAAA": 357, + "CTTTA": 358, + "TCGA": 359, + "GTTTA": 360, + "CAACAA": 361, + "CTTCC": 362, + "GCCTCC": 363, + "TTAAA": 364, + "GCTCTG": 365, + "GTTTCA": 366, + "GGAGGA": 367, + "CGTGA": 368, + "CAGTC": 369, + "GAATA": 370, + "CAGAGA": 371, + "CCCTC": 372, + "CAAATG": 373, + "CTGCTG": 374, + "GATCC": 375, + "TTTTATT": 376, + "AAAATT": 377, + "TTATA": 378, + "TCAATT": 379, + "GGTAA": 380, + "GTTATT": 381, + "GCCAGG": 382, + "GGAGAA": 383, + "CATTTG": 384, + "TCACC": 385, + "CTCAAA": 386, + "GGTTA": 387, + "TCCAAA": 388, + "TCTATT": 389, + "GCAGA": 390, + "CTTCA": 391, + "TCATCA": 392, + "CGAGG": 393, + "TAACA": 394, + "GTTGTT": 395, + "CTTATT": 396, + "CGTCA": 397, + "TAAGA": 398, + "TAATTTT": 399, + "CTGTA": 400, + "TCCACA": 401, + "GCTGTG": 402, + "CGCTG": 403, + "TCTAAA": 404, + "GCGA": 405, + "CAATA": 406, + "CCACCA": 407, + "GAACA": 408, + "CGAAA": 409, + "CAGATT": 410, + "TCACA": 411, + "TTATTTT": 412, + "TCTCAA": 413, + "TGACA": 414, + "CTCCAA": 415, + "AAAAAAA": 416, + "TATATG": 417, + "TCCTCC": 418, + "TCACTT": 419, + "TCCAGG": 420, + "CAAGA": 421, + "GGCTA": 422, + "GTGGTG": 423, + "CGTAA": 424, + "CGAGA": 425, + "TGATA": 426, + "GGATTA": 427, + "CAACA": 428, + "CGATT": 429, + "TGAGAA": 430, + "CTCCTT": 431, + "CTCATT": 432, + "GTTAAA": 433, + "TCATA": 434, + "CCTCTG": 435, + "CTCTA": 436, + "GCTGAA": 437, + "CTGGA": 438, + "TAAGG": 439, + "CTTAAA": 440, + "TATTTA": 441, + "CCACA": 442, + "CCGG": 443, + "GTCAAA": 444, + "TGGAA": 445, + "CGGAA": 446, + "TGATGA": 447, + "GTTCA": 448, + "TAACAA": 449, + "GCTGTT": 450, + "TAAGAA": 451, + "CTGCC": 452, + "TTAATT": 453, + "CCAGA": 454, + "TCAGAA": 455, + "GTCATT": 456, + "CGCTT": 457, + "GATTAA": 458, + "CTGATT": 459, + "GCCACA": 460, + "GTAATT": 461, + "TCCAGA": 462, + "GCCAAA": 463, + "GTGATT": 464, + "TAAAATT": 465, + "CAAGAA": 466, + "CCACC": 467, + "TAATCC": 468, + "GTTCTT": 469, + "TCCATG": 470, + "GCTCTT": 471, + "TGCTG": 472, + "GGGTA": 473, + "TTACA": 474, + "GCCATT": 475, + "GCACA": 476, + "GCAATT": 477, + "TCCCTG": 478, + "TGTGA": 479, + "TCGAA": 480, + "GGACA": 481, + "GGAATT": 482, + "GTGGA": 483, + "CTTCTG": 484, + "TCCCC": 485, + "GCCCC": 486, + "CTTGA": 487, + "TAATGA": 488, + "TAAATA": 489, + "TATATA": 490, + "CTGCAA": 491, + "TCATTA": 492, + "GTATA": 493, + "TCCCCA": 494, + "CGTTA": 495, + "GCAGAA": 496, + "TGAGTT": 497, + "CTTTTTT": 498, + "CGATG": 499, + "CTTTCA": 500, + "AAAATG": 501, + "CAGGTT": 502, + "CTAATT": 503, + "CGCCA": 504, + "TGAAAAA": 505, + "GTTCC": 506, + "GTCCTT": 507, + "GTCCAA": 508, + "GTTTTTT": 509, + "CTCTGA": 510, + "GCGC": 511, + "GTTGA": 512, + "TGAATG": 513, + "CTATA": 514, + "GCAGTG": 515, + "CCTTAA": 516, + "TCACCA": 517, + "TCACTG": 518, + "GCCCTG": 519, + "TAACTT": 520, + "CAGATG": 521, + "GTAGG": 522, + "TCTATA": 523, + "GAGATT": 524, + "GTCTA": 525, + "TTTTAAA": 526, + "CACATG": 527, + "TGACC": 528, + "CACAAA": 529, + "GTGTA": 530, + "GGGAGG": 531, + "GCTTTG": 532, + "CAAAAAA": 533, + "GAGGAA": 534, + "GTTCTG": 535, + "TTTTTA": 536, + "GTCTCA": 537, + "GTTCAA": 538, + "TCGTG": 539, + "GCTTAA": 540, + "GCACC": 541, + "CTCCTG": 542, + "TAAATAAA": 543, + "CTACA": 544, + "CTTCCA": 545, + "TCCTCA": 546, + "CGCAA": 547, + "GAAAAAA": 548, + "GCCCA": 549, + "TCGTT": 550, + "GTAGA": 551, + "CTCTCA": 552, + "GTCCA": 553, + "TGACTT": 554, + "TCCCTT": 555, + "GCCATG": 556, + "CACACACACACACACA": 557, + "GTGATG": 558, + "CCTCTT": 559, + "GCCAGA": 560, + "TCCTA": 561, + "CGTTTT": 562, + "GTACA": 563, + "GCATA": 564, + "GAATTA": 565, + "TGTGTGTGTGTGTGTG": 566, + "CCCAGG": 567, + "GGTTTT": 568, + "TCAAAAA": 569, + "TCTATG": 570, + "CCATA": 571, + "TGACAA": 572, + "GGATA": 573, + "TCAGTG": 574, + "GTATTTT": 575, + "GAGATG": 576, + "GCGTG": 577, + "CGTCC": 578, + "TTAAAAA": 579, + "TAATCA": 580, + "CAATTA": 581, + "CCACTG": 582, + "CGGTT": 583, + "GTTGAA": 584, + "TGATTA": 585, + "CCTTTG": 586, + "CGGTG": 587, + "CAGGTG": 588, + "TCAATG": 589, + "CTGATG": 590, + "TCAGGA": 591, + "GTTTAA": 592, + "TATTAAA": 593, + "CTCTTA": 594, + "GCAGGA": 595, + "CTCTCC": 596, + "GAACC": 597, + "CTTTAA": 598, + "GGGCC": 599, + "GTATTA": 600, + "GCGCC": 601, + "CCAATT": 602, + "GCTAAA": 603, + "TGACTG": 604, + "GATTTG": 605, + "GATAAA": 606, + "TCAGCA": 607, + "GTTCCA": 608, + "GAAATA": 609, + "GACAAA": 610, + "GAGTC": 611, + "GCTATT": 612, + "TCACAA": 613, + "GAGGTT": 614, + "TAACC": 615, + "GAAGGA": 616, + "GCTCAA": 617, + "GAAAATT": 618, + "CCAGCA": 619, + "GTTTTAA": 620, + "GTGCC": 621, + "TGAGGA": 622, + "CATAAA": 623, + "GGTCC": 624, + "TCATTTT": 625, + "TATTTATT": 626, + "TAATAAA": 627, + "GCCTA": 628, + "CTTTTAA": 629, + "TAAGTG": 630, + "TAAGTA": 631, + "CTGGAA": 632, + "CACACA": 633, + "GACAGA": 634, + "CAACC": 635, + "GGGAAA": 636, + "CCAGAA": 637, + "TCAGTT": 638, + "TAACTA": 639, + "CTAAAAA": 640, + "TGGGTT": 641, + "TGAGTG": 642, + "TAAAATG": 643, + "TATATATATATATATA": 644, + "GCACTG": 645, + "GACTC": 646, + "TACAAA": 647, + "TAAAAAAA": 648, + "TCTACA": 649, + "GTTGTG": 650, + "TCGCC": 651, + "CCCAAA": 652, + "GTCATG": 653, + "CTGCTT": 654, + "GGAATG": 655, + "CTATTA": 656, + "GATATT": 657, + "TAGAAA": 658, + "GGCAGG": 659, + "GATGAA": 660, + "GTAGAA": 661, + "TCCTGA": 662, + "TAACTG": 663, + "GCTGGG": 664, + "GCAATG": 665, + "GCCCCA": 666, + "GTTTGA": 667, + "CATTTA": 668, + "GTGCA": 669, + "CTTGAA": 670, + "GTGGAA": 671, + "CTTCAA": 672, + "TAAATTA": 673, + "GTGGCA": 674, + "TCCTTA": 675, + "GGAAAAA": 676, + "TTTTTTA": 677, + "CCTGTG": 678, + "GTAATG": 679, + "GTGTTA": 680, + "CTAGG": 681, + "CAGGCTG": 682, + "GACACA": 683, + "GAAAAAAA": 684, + "TCGC": 685, + "GTAAAAA": 686, + "TGTTTA": 687, + "TCTCTA": 688, + "GTCCTG": 689, + "CCAGGA": 690, + "GAACAA": 691, + "TAAGTT": 692, + "TGAGCA": 693, + "GCTCCA": 694, + "TAAGCA": 695, + "CTCATG": 696, + "GTCTTA": 697, + "CCCACA": 698, + "CATATT": 699, + "GCCTCA": 700, + "CACTC": 701, + "CTTCTA": 702, + "TGATTTT": 703, + "TCGCA": 704, + "CCTGTT": 705, + "GAAGCA": 706, + "GCAAAAA": 707, + "GCGGA": 708, + "CCACAA": 709, + "GCGCA": 710, + "CATATA": 711, + "GACATT": 712, + "GTTCTA": 713, + "CAAAATT": 714, + "GAAAGAAA": 715, + "CCCGG": 716, + "TACACA": 717, + "CCAAAAA": 718, + "GAGGTG": 719, + "GGCTCA": 720, + "CAGTGA": 721, + "TCCCAA": 722, + "TATCTT": 723, + "TGAGTA": 724, + "TCGTA": 725, + "TTTTCTT": 726, + "GTGGGA": 727, + "GAGCTG": 728, + "CCCTCC": 729, + "TAGGTT": 730, + "TTAGG": 731, + "TAATATT": 732, + "CCAGCC": 733, + "CATCTT": 734, + "GTCTGA": 735, + "GTTTCC": 736, + "CCTGAA": 737, + "GGAGCA": 738, + "GAAAATG": 739, + "TCAGTA": 740, + "TAACCA": 741, + "GATGTT": 742, + "CTGTTA": 743, + "CATGTT": 744, + "GGCGG": 745, + "CATGTG": 746, + "GGGAGA": 747, + "CTTTGA": 748, + "TCTTTCTT": 749, + "AAAAAAAAA": 750, + "GGGGTG": 751, + "CTTTCC": 752, + "CTTGTT": 753, + "GCATTA": 754, + "CCCAGA": 755, + "CAAATA": 756, + "TCGGA": 757, + "CAGCTT": 758, + "TCACTA": 759, + "TAATTAA": 760, + "TAAGGA": 761, + "GAACTG": 762, + "GCACAA": 763, + "GCGTT": 764, + "GGCTC": 765, + "TCTTTTA": 766, + "CCTCCA": 767, + "GGCAAA": 768, + "CAGCTG": 769, + "CTACAA": 770, + "TACATT": 771, + "GCTATG": 772, + "CTTGTG": 773, + "GAGTCA": 774, + "GTTATG": 775, + "CTGCCA": 776, + "GTCTCC": 777, + "TGACCA": 778, + "CACCTG": 779, + "TATATTA": 780, + "TGATCA": 781, + "CAGCAA": 782, + "GATGTG": 783, + "GTCTTTT": 784, + "CTAGAA": 785, + "GCTACA": 786, + "CTGGGA": 787, + "GGGGTT": 788, + "CAAGTA": 789, + "CAAGGA": 790, + "CCCTCA": 791, + "TAGCC": 792, + "GTTGGA": 793, + "GCTATA": 794, + "TCTGAAA": 795, + "TATGTT": 796, + "CCCCTT": 797, + "GTTGTA": 798, + "CCCTGA": 799, + "TGACTA": 800, + "CAAGCA": 801, + "CAATAA": 802, + "GAACTT": 803, + "CATGAA": 804, + "CTTATG": 805, + "CTAATG": 806, + "TCTAAAA": 807, + "CCAATG": 808, + "GAAGTG": 809, + "CCTCAA": 810, + "CCCATT": 811, + "CAGTCA": 812, + "GAGAGAGAGAGAGAGA": 813, + "TATGTG": 814, + "GCAGTGA": 815, + "TCTCCTT": 816, + "TCCCAAA": 817, + "CCATTA": 818, + "CCAGTG": 819, + "GCATCA": 820, + "TCAAATT": 821, + "GATCTT": 822, + "GACAGG": 823, + "GGAGTG": 824, + "GTAGTA": 825, + "CAACTT": 826, + "GAAGTT": 827, + "CCCCTG": 828, + "TCTCAAA": 829, + "GGGTC": 830, + "GAGCTT": 831, + "TATGAAA": 832, + "TATGAA": 833, + "GACATG": 834, + "CAAGTG": 835, + "GATATA": 836, + "CATCTG": 837, + "CTGTGA": 838, + "TAATTTA": 839, + "GGCAGA": 840, + "GCGAA": 841, + "CCTAAA": 842, + "CCATCA": 843, + "CACTGA": 844, + "GGACTA": 845, + "GACGG": 846, + "CTCTTTT": 847, + "CTGTCA": 848, + "TCTCTCTCTCTCTCTC": 849, + "TTAATG": 850, + "GCAGCC": 851, + "CAAAAAAA": 852, + "GCACCA": 853, + "CTATTTT": 854, + "GAGCAA": 855, + "CTTGGA": 856, + "CTGGTG": 857, + "GAATAA": 858, + "TCCTTTT": 859, + "GAAGTA": 860, + "CAGTAA": 861, + "CAACCA": 862, + "CTGTAA": 863, + "TGATAA": 864, + "GCAGTT": 865, + "CACGG": 866, + "TAAATAA": 867, + "CTGTTTT": 868, + "CTACTA": 869, + "GCTCTA": 870, + "CGAAAA": 871, + "CAAGTT": 872, + "CTTGTA": 873, + "GAATGA": 874, + "GAGTGA": 875, + "GCCTGA": 876, + "GGTTTG": 877, + "CCCATG": 878, + "GGGGAA": 879, + "GAAGAAA": 880, + "TGTTA": 881, + "CAATTTT": 882, + "TATATTTT": 883, + "CTCAAAA": 884, + "GGTGGG": 885, + "CCGTG": 886, + "TATTTCA": 887, + "CCCCAA": 888, + "TATTTAA": 889, + "GGCTGA": 890, + "GGTGTG": 891, + "CATCAA": 892, + "CACTCA": 893, + "TCTCATT": 894, + "GAATTTT": 895, + "GAATCA": 896, + "CAGGAAA": 897, + "CATACA": 898, + "TATTTTA": 899, + "TTATAA": 900, + "GAGGAAA": 901, + "CATATG": 902, + "CTTTCTT": 903, + "CAACTG": 904, + "GGGCTG": 905, + "CCCCCA": 906, + "TTTGAAA": 907, + "CATTAAA": 908, + "CTTAAAA": 909, + "GACTGA": 910, + "CAATGA": 911, + "GGCACA": 912, + "CCAGTA": 913, + "GGATGA": 914, + "GTTTTTG": 915, + "GCATTTT": 916, + "GTGCCA": 917, + "GCAGTA": 918, + "GCCCTT": 919, + "TCGTC": 920, + "GAACTA": 921, + "GTGGTT": 922, + "GTGTGA": 923, + "GTGCTT": 924, + "CGCTA": 925, + "GTGTCA": 926, + "TCTTTA": 927, + "GCCTTA": 928, + "CCTATT": 929, + "CAAAATG": 930, + "GAACCA": 931, + "CTCCAGG": 932, + "GACTCA": 933, + "CATGAAA": 934, + "GCTAGG": 935, + "TGTTAAA": 936, + "GCGTA": 937, + "GCACTT": 938, + "TCTTAAA": 939, + "TAAGAAA": 940, + "GGCCTG": 941, + "TCCCTA": 942, + "GTGGTA": 943, + "CTGCTA": 944, + "GGAGTT": 945, + "GGTAAA": 946, + "CAAACAAA": 947, + "GATATG": 948, + "TCATGA": 949, + "GACCTT": 950, + "TAATATA": 951, + "GCTAGA": 952, + "GGACTG": 953, + "GGCATT": 954, + "CAGTTA": 955, + "CCCTAA": 956, + "CACCTT": 957, + "GGTGAA": 958, + "CAGCTA": 959, + "GTGTTTT": 960, + "CAACTA": 961, + "GATCAA": 962, + "GAGAAAA": 963, + "TGTGAAA": 964, + "AAAATA": 965, + "GATGAAA": 966, + "CTCTAA": 967, + "TTACTT": 968, + "GATCTG": 969, + "CCACTT": 970, + "GAGTTA": 971, + "CAATCA": 972, + "GGATTACAGG": 973, + "TTTATTTT": 974, + "TACATA": 975, + "TTTTATG": 976, + "GAGTAA": 977, + "GCTGAAA": 978, + "GTACTG": 979, + "GCTCTC": 980, + "TATGTA": 981, + "TGTGTA": 982, + "TCATAA": 983, + "GGACTT": 984, + "TCTCCAA": 985, + "GCATGA": 986, + "GACGA": 987, + "CGCCTG": 988, + "GACCTG": 989, + "GGTCTT": 990, + "CACCAA": 991, + "GATC": 992, + "GACCAA": 993, + "AAAATTA": 994, + "GTAAATT": 995, + "CCAGTT": 996, + "CAGAAAA": 997, + "TAACAAA": 998, + "GGTGTT": 999, + "GAAATTA": 1000, + "TGCCTCA": 1001, + "CCGCC": 1002, + "CCATTTT": 1003, + "CTTGCC": 1004, + "TCTGTA": 1005, + "CTGGCA": 1006, + "GGGATG": 1007, + "CCATGA": 1008, + "CTACTT": 1009, + "TAGGTG": 1010, + "TAAAAATT": 1011, + "GAAAGAA": 1012, + "TAAAATA": 1013, + "CTTTTTG": 1014, + "GTCAAAA": 1015, + "GGACAA": 1016, + "TCTGATT": 1017, + "CTCTCTT": 1018, + "TAATTTG": 1019, + "CTCTTTG": 1020, + "GGCCTT": 1021, + "GGATTTT": 1022, + "CTACTG": 1023, + "GTTGCA": 1024, + "GGCTCC": 1025, + "CTCTGTG": 1026, + "CTCCAGCC": 1027, + "TTACAA": 1028, + "GGACCA": 1029, + "GGAAGGAA": 1030, + "TAAAGAA": 1031, + "TTAGAA": 1032, + "GTGAAAA": 1033, + "CTTGCA": 1034, + "TGGGTG": 1035, + "GGAGCC": 1036, + "CCTCTA": 1037, + "CT": 1038, + "GGGCTT": 1039, + "GGCATG": 1040, + "CTGGTT": 1041, + "TACAGA": 1042, + "GATTAAA": 1043, + "CTCTGTT": 1044, + "TTATCA": 1045, + "CTGAAAA": 1046, + "GTAGTT": 1047, + "GGGTCA": 1048, + "GT": 1049, + "CAGCCA": 1050, + "GCGTC": 1051, + "CACTTA": 1052, + "GTGCTA": 1053, + "TCTTATT": 1054, + "GTACTT": 1055, + "GGTATT": 1056, + "TAGAGA": 1057, + "TACATG": 1058, + "CCACTA": 1059, + "TGAGAAA": 1060, + "CAATAAA": 1061, + "TCCAAAA": 1062, + "CGTGAA": 1063, + "GGTCTG": 1064, + "CTGAATT": 1065, + "TCAGCC": 1066, + "CCTCTC": 1067, + "GTTAAAA": 1068, + "GGGATT": 1069, + "TCCTAA": 1070, + "CACTAA": 1071, + "GGAGAAA": 1072, + "CCTTCCTT": 1073, + "GTTTCTT": 1074, + "TATCAA": 1075, + "GATACA": 1076, + "TAATCCCAGCA": 1077, + "CCGCA": 1078, + "TGAAATT": 1079, + "CGTAAA": 1080, + "CTCTCTG": 1081, + "TCTTTTTT": 1082, + "GTACAA": 1083, + "CCAAATT": 1084, + "TGTATTTT": 1085, + "TCGCTT": 1086, + "GGGTGA": 1087, + "GATAGA": 1088, + "CTTTATT": 1089, + "TAAACAA": 1090, + "GTTTATT": 1091, + "TGAATA": 1092, + "CTACCA": 1093, + "GTGTCC": 1094, + "CCCGA": 1095, + "TTTATTA": 1096, + "CTCCAAA": 1097, + "TTTTTTTTTTTT": 1098, + "TCATCC": 1099, + "GAAGCC": 1100, + "CTAAATT": 1101, + "CAAATTA": 1102, + "CCCCAAA": 1103, + "TCTTCTT": 1104, + "TAGGAAA": 1105, + "CACGA": 1106, + "CATTTTA": 1107, + "GTGCAA": 1108, + "TCTCCTG": 1109, + "TATTTTAA": 1110, + "GTTTGTT": 1111, + "GAGCCA": 1112, + "GGCCAA": 1113, + "CATTTCA": 1114, + "CATCCA": 1115, + "CCTATA": 1116, + "GACTTA": 1117, + "TCAAATG": 1118, + "GTATCA": 1119, + "TAAATTTT": 1120, + "CTGAGGCA": 1121, + "GCCCAA": 1122, + "GGTTAA": 1123, + "TATCTG": 1124, + "TGACAGA": 1125, + "GGAGAGA": 1126, + "GCTGCTG": 1127, + "CCCTTA": 1128, + "TCCTCTG": 1129, + "GTAGCA": 1130, + "CCTGAAA": 1131, + "CCGAA": 1132, + "TTTTTAA": 1133, + "CTATAA": 1134, + "CCTGTA": 1135, + "TTACTG": 1136, + "GTATAA": 1137, + "GGCGA": 1138, + "GACTAA": 1139, + "TCAGAAA": 1140, + "GTGTGTG": 1141, + "CAAAGAA": 1142, + "CCTATG": 1143, + "GCAGAGA": 1144, + "CCGTT": 1145, + "TTTTATTTT": 1146, + "GGAAGAA": 1147, + "TTACTA": 1148, + "GCCTGGG": 1149, + "TCCCTC": 1150, + "TCCTCTT": 1151, + "GGATCA": 1152, + "GGTCAA": 1153, + "TCGAGA": 1154, + "TATTCTT": 1155, + "TACTC": 1156, + "GTTAATT": 1157, + "GCGAGA": 1158, + "CTTAATT": 1159, + "TCCTTTG": 1160, + "GTCTAA": 1161, + "CACCCA": 1162, + "GGGTTA": 1163, + "GGGCAA": 1164, + "GGAAATG": 1165, + "GCAAATT": 1166, + "TAGATG": 1167, + "GCAGAAA": 1168, + "AAAAAAAAAAAAAAAA": 1169, + "CCTACA": 1170, + "GGAGTA": 1171, + "TCTAATT": 1172, + "CAACAAA": 1173, + "TAGATT": 1174, + "GGTTTA": 1175, + "CCTAGA": 1176, + "CTTTAAA": 1177, + "TACTTA": 1178, + "TAATGAA": 1179, + "CTATCA": 1180, + "TAGTAA": 1181, + "CAGAGAA": 1182, + "CAAGAAA": 1183, + "GGGGAAA": 1184, + "CGTTAA": 1185, + "CGTGTT": 1186, + "TCTGTCTG": 1187, + "TTTTAATT": 1188, + "CTGGCC": 1189, + "TAAATGA": 1190, + "CGTCAA": 1191, + "TTAGTA": 1192, + "GTCTCTG": 1193, + "TTTTAAAA": 1194, + "CAGTTTT": 1195, + "CTTCCTT": 1196, + "TATATAA": 1197, + "GCTTTTA": 1198, + "TTTTTCA": 1199, + "GGTC": 1200, + "TTATTAA": 1201, + "TTTTGTT": 1202, + "CATAGA": 1203, + "TAGGAA": 1204, + "GAGAGAA": 1205, + "GTAGCTG": 1206, + "TTATGA": 1207, + "GTAGTG": 1208, + "GGAGAGG": 1209, + "CTCTGAA": 1210, + "TAGTC": 1211, + "GACTCC": 1212, + "TCCCTCC": 1213, + "TAATGTT": 1214, + "CATCTA": 1215, + "GCCACCA": 1216, + "GTACTA": 1217, + "TGGGAAA": 1218, + "CGCCTT": 1219, + "GCCCGG": 1220, + "GGAGGAA": 1221, + "GTACCA": 1222, + "CGCAAA": 1223, + "CATAAAA": 1224, + "TAACATT": 1225, + "GCTAAAA": 1226, + "TCTTCTG": 1227, + "GCCAAAA": 1228, + "GTATGA": 1229, + "GTCTTTG": 1230, + "TACTGA": 1231, + "TCCCAGG": 1232, + "TTATTTA": 1233, + "TTAGTT": 1234, + "GGACC": 1235, + "TATAAAA": 1236, + "CAAACAA": 1237, + "CTTCTC": 1238, + "TCTATCTA": 1239, + "GAAATAA": 1240, + "GTGTAA": 1241, + "CTTTGTT": 1242, + "GATAAAA": 1243, + "GCCCAGG": 1244, + "GCGATT": 1245, + "AAAAAATT": 1246, + "TACAGG": 1247, + "GGCTAA": 1248, + "TAGCTT": 1249, + "GTCTCTA": 1250, + "CTCCTGA": 1251, + "GAATAAA": 1252, + "TTACCA": 1253, + "GGGACA": 1254, + "GCCACTG": 1255, + "GTTTAAA": 1256, + "GTCTGTG": 1257, + "TGACAAA": 1258, + "TACATTTT": 1259, + "GCCACC": 1260, + "TGTTTT": 1261, + "TAGCAA": 1262, + "TTATAAA": 1263, + "GACCCA": 1264, + "GCAGC": 1265, + "CAGACAGA": 1266, + "CACAAAA": 1267, + "GCCCTA": 1268, + "TATTAAAA": 1269, + "CGTATT": 1270, + "CCATCC": 1271, + "TCGATT": 1272, + "GAAGGAA": 1273, + "GATCCA": 1274, + "TATTTGA": 1275, + "GTGAATT": 1276, + "TACCTT": 1277, + "CGTCTT": 1278, + "CCTAGG": 1279, + "TCGAAA": 1280, + "CTTTCTG": 1281, + "TGAAGAA": 1282, + "TCTCTCA": 1283, + "GTCTCTT": 1284, + "GGAGGGG": 1285, + "GTCTGTT": 1286, + "CTATGA": 1287, + "GGAAATT": 1288, + "GCACACA": 1289, + "GCCTTTT": 1290, + "CAGTCC": 1291, + "CTGGTA": 1292, + "GCATCC": 1293, + "TAGTTA": 1294, + "GGCTTA": 1295, + "GAGTCC": 1296, + "TGAAAA": 1297, + "TAGATAGA": 1298, + "TGTTTGTT": 1299, + "TACTCA": 1300, + "CATTTAA": 1301, + "GATTTTA": 1302, + "CACTCC": 1303, + "GAAACAA": 1304, + "GCGCTG": 1305, + "TCTTTCA": 1306, + "CTGTCC": 1307, + "GAACTCA": 1308, + "CGGAAA": 1309, + "TATTGTT": 1310, + "GCACTA": 1311, + "TATTCAA": 1312, + "GCGGGG": 1313, + "GTGGCC": 1314, + "TAATTAAA": 1315, + "TACTAA": 1316, + "GCGGTG": 1317, + "TACCAA": 1318, + "GGTATA": 1319, + "CTAGTT": 1320, + "GCAGAGG": 1321, + "CTTTTTTTT": 1322, + "TTTTTTTTTTTTTTTT": 1323, + "TACAGTA": 1324, + "CCATGTT": 1325, + "TAGTGA": 1326, + "CGTGTG": 1327, + "GCTCTGA": 1328, + "CTTCCTG": 1329, + "TCGCTG": 1330, + "TAAATCA": 1331, + "TCCAATT": 1332, + "GTTTCTG": 1333, + "GAAGAGA": 1334, + "GGGTAA": 1335, + "CCATAA": 1336, + "TTATATT": 1337, + "CGAATT": 1338, + "CCGGA": 1339, + "TGAGCC": 1340, + "CCGTA": 1341, + "CAGAGGA": 1342, + "GTGTTTG": 1343, + "GACAAAA": 1344, + "TTTTTTAAA": 1345, + "GTTGCC": 1346, + "GAGTTTT": 1347, + "TCAAAAAA": 1348, + "TGTTTCA": 1349, + "TATCTA": 1350, + "TCTCTCC": 1351, + "CTCCACA": 1352, + "TAAATATT": 1353, + "TTTTCTG": 1354, + "CTCTCAA": 1355, + "CCTTAAA": 1356, + "TCTTTTAA": 1357, + "GAACAAA": 1358, + "TTAGCA": 1359, + "GCTCATG": 1360, + "TAAAGTA": 1361, + "GGATAA": 1362, + "TTATTAAA": 1363, + "CTCCATT": 1364, + "TCTCTGA": 1365, + "TTATTTG": 1366, + "CCTGTAA": 1367, + "TTATATA": 1368, + "GACTTTT": 1369, + "TGTTGTT": 1370, + "GCAAATG": 1371, + "CTTCAAA": 1372, + "GAATATT": 1373, + "GAATCC": 1374, + "CTCTTAA": 1375, + "GCATAA": 1376, + "GAATGAA": 1377, + "CTTAAAAA": 1378, + "TAAAAATG": 1379, + "TTTTAAAAA": 1380, + "CTCTGGG": 1381, + "TGATCC": 1382, + "GCTCTCA": 1383, + "CTCCAGA": 1384, + "GAGTGCAGTG": 1385, + "CAATATT": 1386, + "TAGAAAA": 1387, + "GTAAATG": 1388, + "TAGCTG": 1389, + "GCTCAAA": 1390, + "GCAGGAA": 1391, + "TACCTG": 1392, + "GGGAAAA": 1393, + "TTTTCTA": 1394, + "GGGGGGGG": 1395, + "CCGA": 1396, + "CTTTGAA": 1397, + "GGAGGTG": 1398, + "TAGTCA": 1399, + "GGCCCA": 1400, + "TGATGTT": 1401, + "CAAATAA": 1402, + "TCTTCCA": 1403, + "GCGCTT": 1404, + "GTATTTG": 1405, + "GTCTC": 1406, + "GAAATCA": 1407, + "TGATAAA": 1408, + "CATTCTT": 1409, + "TATCCA": 1410, + "GCCTCTG": 1411, + "TGAGATG": 1412, + "CGCCAA": 1413, + "GTTTTATT": 1414, + "TATATATT": 1415, + "GTAGGA": 1416, + "GACAGAA": 1417, + "CTCCAGCCTGGG": 1418, + "GCGTGA": 1419, + "GGTATG": 1420, + "GAGGGAGG": 1421, + "TCATTTG": 1422, + "CTACC": 1423, + "TACAGAA": 1424, + "GGTAGA": 1425, + "GATCTA": 1426, + "GTCCATG": 1427, + "TGAGGAA": 1428, + "TAATAAAA": 1429, + "TAAACTT": 1430, + "TCACATT": 1431, + "GGAGGCC": 1432, + "TCACAAA": 1433, + "CACTTTT": 1434, + "CGGCC": 1435, + "CAACAGA": 1436, + "GTAGAGA": 1437, + "GTTATTTT": 1438, + "CGTTTG": 1439, + "TCGTCA": 1440, + "TCTGCTG": 1441, + "CAACACA": 1442, + "GGTAGG": 1443, + "GCAGCTG": 1444, + "TAGTAGAGA": 1445, + "CAAGCC": 1446, + "GCATTTG": 1447, + "TAATATG": 1448, + "GCTTAAA": 1449, + "GCTTCTG": 1450, + "CTCTCCA": 1451, + "TCATCTT": 1452, + "CGTCTG": 1453, + "TCATTTA": 1454, + "CATAGG": 1455, + "GCTCCTT": 1456, + "TGTTCTT": 1457, + "TACATTA": 1458, + "CACAGAA": 1459, + "TAAATATA": 1460, + "TAGAGG": 1461, + "GATAGG": 1462, + "TCCTGAA": 1463, + "GGAGCTG": 1464, + "TGATATT": 1465, + "TCATTAA": 1466, + "CTTTTAAA": 1467, + "TCGTTA": 1468, + "TAAACTA": 1469, + "GTTTGAA": 1470, + "TAAAATTA": 1471, + "CACCCC": 1472, + "TCAGAGA": 1473, + "CTCCTGCCTCA": 1474, + "TGACATT": 1475, + "GTATTTA": 1476, + "CTTCATT": 1477, + "GAAACTG": 1478, + "TAACACA": 1479, + "GTTCAAA": 1480, + "GGAGATG": 1481, + "TCGGCC": 1482, + "CAGCATT": 1483, + "TCGATG": 1484, + "TATTCTA": 1485, + "CTGTGAA": 1486, + "TATTGAA": 1487, + "TTTTCCA": 1488, + "TATTTCTT": 1489, + "GGTGAAA": 1490, + "CTGAGAA": 1491, + "GCACAGA": 1492, + "GCGAGG": 1493, + "CTGTGTG": 1494, + "TGAAATG": 1495, + "TGATGAA": 1496, + "GTCCAAA": 1497, + "CTCAATT": 1498, + "TCCAGAA": 1499, + "GTATATA": 1500, + "TAAAGTT": 1501, + "TCTCAAAA": 1502, + "TCCATCA": 1503, + "GTCTGAA": 1504, + "TGAGAGA": 1505, + "TGATTTG": 1506, + "TTAGCC": 1507, + "CTCCATG": 1508, + "TCCCTGA": 1509, + "GAGCTA": 1510, + "CCCCCCCC": 1511, + "GTGGAAA": 1512, + "CTGGGAA": 1513, + "CAATGAA": 1514, + "CCACACA": 1515, + "CTTTCAA": 1516, + "CGGAGG": 1517, + "TCGTGA": 1518, + "CCAGAAA": 1519, + "GTTTTAAA": 1520, + "TGTTGAA": 1521, + "TCCTGTG": 1522, + "CTAAATG": 1523, + "TCCTTTA": 1524, + "GTCTGGG": 1525, + "TCTCTTTT": 1526, + "TACGG": 1527, + "TATTGTA": 1528, + "TTAGTG": 1529, + "TTACC": 1530, + "TAATCCCAGCACTTTG": 1531, + "TCTGGAA": 1532, + "CTTCTCA": 1533, + "CGCATT": 1534, + "TATTTAAA": 1535, + "TCACACA": 1536, + "TAATCAA": 1537, + "GCGAAA": 1538, + "GGGCCA": 1539, + "GTTCATT": 1540, + "GAGAAAAA": 1541, + "TTTTGTA": 1542, + "TACTTTT": 1543, + "TCGAGG": 1544, + "GTGAAAAA": 1545, + "CAATATA": 1546, + "TCCCATG": 1547, + "CAATTAA": 1548, + "CTGGAAA": 1549, + "CCCAGCA": 1550, + "TCCCATT": 1551, + "TCCTGTT": 1552, + "CTCTTTA": 1553, + "TCCCCTT": 1554, + "GTTTCAA": 1555, + "GTCCAGG": 1556, + "GGAAGGA": 1557, + "TAGTTTT": 1558, + "TGACCTT": 1559, + "GTGCTGGGATTACAGG": 1560, + "TATTTATA": 1561, + "TCTGCAA": 1562, + "CTGAAAAA": 1563, + "TATGTTA": 1564, + "CTTCACA": 1565, + "GCACAGG": 1566, + "CCTGCTG": 1567, + "TTTTTTAA": 1568, + "GTTATTA": 1569, + "CCCTTTT": 1570, + "TGATTTA": 1571, + "TACAAAA": 1572, + "TAAGTAA": 1573, + "TTTTTAAA": 1574, + "CATCTC": 1575, + "GTGGTGA": 1576, + "GTGGAGA": 1577, + "CTCTGCA": 1578, + "GTTAAAAA": 1579, + "TACATACA": 1580, + "CTTTGTG": 1581, + "GGACACA": 1582, + "TCTGATG": 1583, + "TATTATT": 1584, + "TCTTCTA": 1585, + "CTGTGTT": 1586, + "TCAGCTT": 1587, + "CTTTATA": 1588, + "GGCGC": 1589, + "TCCCTCA": 1590, + "GTACC": 1591, + "TGGAGAA": 1592, + "CAAAAATT": 1593, + "TCTTTAA": 1594, + "CTCTCTC": 1595, + "TGAGTGA": 1596, + "GCAGCTT": 1597, + "CGGATT": 1598, + "TACGA": 1599, + "TCTTGTT": 1600, + "TCGTAA": 1601, + "GCCTGTG": 1602, + "TATTCTG": 1603, + "GGGATA": 1604, + "GGGTCC": 1605, + "TGAGATT": 1606, + "CTTTTATT": 1607, + "TCCCACA": 1608, + "CATGGTG": 1609, + "TTAGGA": 1610, + "GAACACA": 1611, + "TCATAAA": 1612, + "CAACATT": 1613, + "GGTCCA": 1614, + "GAATTTG": 1615, + "TATTAATT": 1616, + "TCCTGGG": 1617, + "GCAGCAA": 1618, + "CTCTTCA": 1619, + "GAAGAGG": 1620, + "TCTGTCA": 1621, + "CTGAATG": 1622, + "CCACAAA": 1623, + "GTGGAGG": 1624, + "TGATTAA": 1625, + "CTCCCTCC": 1626, + "CACACACACACACACACACACACACACACACA": 1627, + "GCGATG": 1628, + "CATTCTG": 1629, + "GTAGAAA": 1630, + "TCATCAA": 1631, + "TTTTCAA": 1632, + "TATGTATG": 1633, + "CCAAATG": 1634, + "TAATTTTA": 1635, + "TAAGGAA": 1636, + "CTTGAAA": 1637, + "AAAAAAAAAAAA": 1638, + "GCTCCTG": 1639, + "GCAGATG": 1640, + "GAAAAATT": 1641, + "GACGC": 1642, + "GTGGGGG": 1643, + "GTCAATT": 1644, + "CTTGCTT": 1645, + "TGACACA": 1646, + "GTGTGTT": 1647, + "CCAGAGA": 1648, + "CCCAGCC": 1649, + "TAAAGAAA": 1650, + "GTCCATT": 1651, + "TAAATTAA": 1652, + "CCCAAAA": 1653, + "GAATTAA": 1654, + "TGAATTA": 1655, + "TTTTTTTG": 1656, + "CCAGCTT": 1657, + "CAATTTG": 1658, + "CTGTTTG": 1659, + "GTCTCAA": 1660, + "GTTTGTG": 1661, + "GGCATA": 1662, + "GGTACA": 1663, + "TGATGTG": 1664, + "GATTTCA": 1665, + "TCTGCTT": 1666, + "GTAATTA": 1667, + "TAAAAAAAA": 1668, + "GCCGCC": 1669, + "TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG": 1670, + "GCGTCA": 1671, + "GCTCATT": 1672, + "GAACCTG": 1673, + "TAAACAAA": 1674, + "GTGCTGA": 1675, + "TCAGGAA": 1676, + "TCCTCAA": 1677, + "TCTATTTT": 1678, + "TCTGTTTT": 1679, + "CAGAGCA": 1680, + "CCAGGAA": 1681, + "GTCTTTA": 1682, + "TCTTCAA": 1683, + "TCAAAATT": 1684, + "GCTTATT": 1685, + "GTTCCTT": 1686, + "CACCTA": 1687, + "TCACTGA": 1688, + "GAAGCAA": 1689, + "TAAAGA": 1690, + "TCCTTCA": 1691, + "TCTCATG": 1692, + "TCAGTGA": 1693, + "TACACAA": 1694, + "CACGTG": 1695, + "CCTAAAA": 1696, + "GCCTTTG": 1697, + "GGCTTTT": 1698, + "GTTGAAA": 1699, + "GTTCTC": 1700, + "CTAGA": 1701, + "CTACAAA": 1702, + "GCACAAA": 1703, + "TTACATT": 1704, + "GGCCCC": 1705, + "TAATGTG": 1706, + "CTGCCTT": 1707, + "TCCCAGA": 1708, + "GTGAATG": 1709, + "GGACAGG": 1710, + "GGATGTG": 1711, + "GTTTATA": 1712, + "TGACCAA": 1713, + "GTGGCTG": 1714, + "GTTCTCA": 1715, + "CTTATTTT": 1716, + "CTGGAGA": 1717, + "TTACAAA": 1718, + "GTCTTCA": 1719, + "CAAGAGA": 1720, + "CCATTTG": 1721, + "TCACAGA": 1722, + "CTAGTA": 1723, + "CATTATT": 1724, + "TTAGA": 1725, + "GCTCTCC": 1726, + "GCGCCA": 1727, + "TATGTTTT": 1728, + "TCCTCCA": 1729, + "CAGAAAAA": 1730, + "GTGGGAA": 1731, + "TAATCTT": 1732, + "TGAGTCA": 1733, + "CTGCTC": 1734, + "GTCTCCA": 1735, + "TCATGTT": 1736, + "GTTTCCA": 1737, + "TAAGCAA": 1738, + "CTAAAAATA": 1739, + "TGACTGA": 1740, + "TCGGTT": 1741, + "TTAGAAA": 1742, + "TAAGCC": 1743, + "TAAAGCA": 1744, + "CCTCTCC": 1745, + "CCTCCTT": 1746, + "TCAGATT": 1747, + "TATGAAAA": 1748, + "GCTGATG": 1749, + "CATATTTT": 1750, + "GCTCCAA": 1751, + "CGGCGG": 1752, + "CCACTGA": 1753, + "CAGCAAA": 1754, + "CTGTCTT": 1755, + "CTAGCA": 1756, + "TCGGGG": 1757, + "CACAGCA": 1758, + "GCTGATT": 1759, + "CTAGGA": 1760, + "TAACTC": 1761, + "TCATATT": 1762, + "CCTTCTT": 1763, + "CTGCAAA": 1764, + "CCCGC": 1765, + "GGTCTA": 1766, + "CCCAGGA": 1767, + "GTGTCTG": 1768, + "TAATAATAATAA": 1769, + "TCACATG": 1770, + "CAATTTA": 1771, + "TATATATATATATATATATATATATATATATA": 1772, + "CCACAGA": 1773, + "TCAATTTT": 1774, + "GTATTAA": 1775, + "GAACATT": 1776, + "TCTCTTA": 1777, + "CTATTTG": 1778, + "TCTTTCC": 1779, + "GGTTAAA": 1780, + "GCTAATT": 1781, + "CTGCTGA": 1782, + "TACCTA": 1783, + "CAGGGTT": 1784, + "TCGCCA": 1785, + "CAAAAATTA": 1786, + "CTTCTGA": 1787, + "GCATGTG": 1788, + "CTATTAA": 1789, + "GCACATG": 1790, + "CAACATG": 1791, + "TCATGAA": 1792, + "GAATGTT": 1793, + "GGGTTTT": 1794, + "CTGCCTG": 1795, + "GTCCACA": 1796, + "TAAACA": 1797, + "CTCTGGA": 1798, + "GACCCC": 1799, + "GGCAAAA": 1800, + "TCTGTTA": 1801, + "CTAGTG": 1802, + "CTATATA": 1803, + "TCAGTCA": 1804, + "TAACTAA": 1805, + "GAAGATG": 1806, + "GTCTTAA": 1807, + "CAAGGAA": 1808, + "GTAAAAAA": 1809, + "TCCCCTG": 1810, + "TCGCAA": 1811, + "TCTGCCTG": 1812, + "CCTTTTA": 1813, + "GTCCCAGCTA": 1814, + "TATATATG": 1815, + "TATTGTG": 1816, + "TGTGTTTT": 1817, + "GCGCAA": 1818, + "CACAGTG": 1819, + "TAAGATT": 1820, + "CTCTGTA": 1821, + "GGAGGCTGA": 1822, + "GGACAAA": 1823, + "TATTAAAAA": 1824, + "TCGTCC": 1825, + "TCGGAA": 1826, + "CTATAAA": 1827, + "CTTCAGA": 1828, + "CTAGAAA": 1829, + "CATTCAA": 1830, + "CACGCA": 1831, + "CAGGATT": 1832, + "CCATCTT": 1833, + "GTAGCC": 1834, + "GAATTTA": 1835, + "CACGC": 1836, + "CAATCC": 1837, + "TGAGCAA": 1838, + "GAAGCTG": 1839, + "TCAATTA": 1840, + "GAAGTCA": 1841, + "CTGCACA": 1842, + "CCACGG": 1843, + "GGATCTT": 1844, + "CTCCTGCCTCAGCCTCC": 1845, + "TAAATGAA": 1846, + "CCGTC": 1847, + "TCGGTG": 1848, + "TTTTATTA": 1849, + "GCAGGGG": 1850, + "GCAGGTG": 1851, + "TCTATTA": 1852, + "TAACTTA": 1853, + "CTAATTTT": 1854, + "CCCGCC": 1855, + "TAATACA": 1856, + "GGATTAAA": 1857, + "TCTCTCTG": 1858, + "GCTTCTT": 1859, + "CATTTATT": 1860, + "CCAGAGG": 1861, + "GGACAGA": 1862, + "GCCAATT": 1863, + "TCCCCAA": 1864, + "GTTGATT": 1865, + "GAAGAAAA": 1866, + "GCATTTA": 1867, + "CTCTAAA": 1868, + "CACACACACACA": 1869, + "CCTCAAA": 1870, + "TATAATT": 1871, + "CAATGTT": 1872, + "GCCCAGA": 1873, + "GTATATT": 1874, + "CTAAAAAA": 1875, + "CCACAGG": 1876, + "TAAGAGA": 1877, + "TCCTTAA": 1878, + "TATTTTTT": 1879, + "GAATATA": 1880, + "GGATTTG": 1881, + "GTGTGAA": 1882, + "CTGGCTT": 1883, + "GCGGCA": 1884, + "TCCGCC": 1885, + "GCATCTT": 1886, + "TCTAATA": 1887, + "CTGCATT": 1888, + "CTCTGCC": 1889, + "TCACTCA": 1890, + "TCAGCAA": 1891, + "TATTATG": 1892, + "CCAGCTG": 1893, + "GATCTC": 1894, + "GCCTCTT": 1895, + "CTTCCAA": 1896, + "TCCTAAA": 1897, + "TCATCTG": 1898, + "CTATTTA": 1899, + "CTGCAGG": 1900, + "CAAGCAA": 1901, + "GCGGAA": 1902, + "GAAATAAA": 1903, + "TAAAATAA": 1904, + "TCACCTT": 1905, + "CCATGTG": 1906, + "GACCTA": 1907, + "CAGATGA": 1908, + "GTGGCTT": 1909, + "TTATTATTATTA": 1910, + "TCCCGG": 1911, + "TATTTGTT": 1912, + "CTGTAAA": 1913, + "TCCATCCA": 1914, + "CTGTATA": 1915, + "GTTTCTA": 1916, + "GTTGCTT": 1917, + "CCATGAA": 1918, + "GCTCTTA": 1919, + "CTTCATG": 1920, + "GTTCCTG": 1921, + "GCTGGGA": 1922, + "TCAGAGG": 1923, + "CATTAAAA": 1924, + "TCAGTAA": 1925, + "GAATGTG": 1926, + "CTTATTA": 1927, + "GCACTGA": 1928, + "TGAGGTT": 1929, + "CATCAAA": 1930, + "CTTCTCC": 1931, + "GTTTATG": 1932, + "CTTTCCA": 1933, + "GTGCCTG": 1934, + "GAAAGGA": 1935, + "GCATCTG": 1936, + "TACCCA": 1937, + "TAACAGA": 1938, + "AAAAAAAAAAA": 1939, + "CTATGAA": 1940, + "CAGTAAA": 1941, + "TAGCTA": 1942, + "TCGTTTT": 1943, + "GTGTCTT": 1944, + "GAGCAAA": 1945, + "TCTAAAAA": 1946, + "GTTCACA": 1947, + "GAAATGA": 1948, + "CAAATGA": 1949, + "GCCCTGA": 1950, + "GTGTTTA": 1951, + "TCATGTG": 1952, + "CATATTA": 1953, + "TCAAAAAAA": 1954, + "TAAGTTA": 1955, + "TCTCTCTT": 1956, + "CCAGTGA": 1957, + "CCTCTGA": 1958, + "CAAGATG": 1959, + "GCCTGTT": 1960, + "GTTTGGG": 1961, + "CATTCATT": 1962, + "GCCCCTG": 1963, + "GTTCTGA": 1964, + "GCGGCC": 1965, + "GCGGTT": 1966, + "CAAAACAAAA": 1967, + "TACATATA": 1968, + "GAATTAAA": 1969, + "TCAAGAA": 1970, + "CTGTATT": 1971, + "TTTTTATT": 1972, + "GATTATT": 1973, + "TCTAATG": 1974, + "GTTGCTG": 1975, + "TGAATGAA": 1976, + "TCAGCTG": 1977, + "CTTGATT": 1978, + "CAGAATG": 1979, + "CTAATTA": 1980, + "TATAATG": 1981, + "GTTTTGTTTT": 1982, + "CCAGCCTG": 1983, + "TGATGGA": 1984, + "GCAGATT": 1985, + "CTCTATT": 1986, + "GCAGTCA": 1987, + "TAAGTGA": 1988, + "CTACACA": 1989, + "CGCATG": 1990, + "TAGCCA": 1991, + "GTGGCTCA": 1992, + "CAAATAAA": 1993, + "GTGCTCA": 1994, + "TTTTTTTTTT": 1995, + "TAACATG": 1996, + "TCCCAGCTA": 1997, + "CAAAGTA": 1998, + "TCATATA": 1999, + "CAGCATG": 2000, + "TGATCTT": 2001, + "CATAATT": 2002, + "TGTGTTA": 2003, + "TTTTGAA": 2004, + "TTAATTA": 2005, + "GATATTA": 2006, + "TCATTCA": 2007, + "TGATATA": 2008, + "TGACTCA": 2009, + "GACGTT": 2010, + "TGACATG": 2011, + "GTTGTGA": 2012, + "CATTTTTT": 2013, + "GCCTGGA": 2014, + "CTATGTT": 2015, + "CTTTGGG": 2016, + "GTCTCAAA": 2017, + "CTGGCTG": 2018, + "CCACATG": 2019, + "GGCGTG": 2020, + "CTTAATG": 2021, + "TAAGATG": 2022, + "GTATAAA": 2023, + "TGTATTA": 2024, + "TAACTCA": 2025, + "GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA": 2026, + "GCATGAA": 2027, + "GTTAATG": 2028, + "TCCAGGA": 2029, + "GAGAGAAA": 2030, + "TCTCTGTG": 2031, + "CTCTCTA": 2032, + "CCACCTG": 2033, + "GCCAGGA": 2034, + "CTGGAGG": 2035, + "CCATTTA": 2036, + "GTCTGGA": 2037, + "GCCCACA": 2038, + "TAGAGAA": 2039, + "CAACTCA": 2040, + "GGCAGGA": 2041, + "TCTTATG": 2042, + "CAAAGGA": 2043, + "GGTAAAA": 2044, + "GAGAGGA": 2045, + "GTCCAGA": 2046, + "GCCCTCA": 2047, + "GATATTTT": 2048, + "CAGGGAA": 2049, + "CCACATT": 2050, + "GAGGAGG": 2051, + "GAAACTT": 2052, + "CAGAATT": 2053, + "TCAGATG": 2054, + "TATTTCC": 2055, + "TACAGTG": 2056, + "TGAGCTG": 2057, + "CCATCTG": 2058, + "GAGAATG": 2059, + "TCAACAA": 2060, + "ATT": 2061, + "TAACTGA": 2062, + "TGAGAGG": 2063, + "CACTGAA": 2064, + "CCACCTT": 2065, + "CTGCAGA": 2066, + "TCACCAA": 2067, + "TGAGCTT": 2068, + "CAAAGCA": 2069, + "GGTTTTA": 2070, + "CGGGGTT": 2071, + "TCCAAAAA": 2072, + "TATGTATA": 2073, + "CCAGATG": 2074, + "TCCATTTT": 2075, + "CTGCTCA": 2076, + "GATAATT": 2077, + "CCACCAA": 2078, + "CTCCTCC": 2079, + "GAGAATT": 2080, + "GAAAGTA": 2081, + "TAAAATAAAA": 2082, + "CTTCTTA": 2083, + "CTGTTTA": 2084, + "GAATCAA": 2085, + "GCATGTT": 2086, + "GCACGG": 2087, + "GACTGAA": 2088, + "GTGCACA": 2089, + "GACGTG": 2090, + "TATACAA": 2091, + "TCGACA": 2092, + "GAAGACA": 2093, + "TAAAGGA": 2094, + "GATCAAA": 2095, + "CAGTGTG": 2096, + "CTAGCC": 2097, + "GAGGAAAA": 2098, + "TCTGAAAA": 2099, + "GAACCCA": 2100, + "GATGGATG": 2101, + "GTTCTTA": 2102, + "CTATATT": 2103, + "GCATTAA": 2104, + "TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC": 2105, + "TCAGTC": 2106, + "TATTTTTG": 2107, + "GAGGATT": 2108, + "GTATGTG": 2109, + "TAACCAA": 2110, + "GTTGTTTT": 2111, + "TTTTTCTT": 2112, + "GTGTTAA": 2113, + "CTTGGAA": 2114, + "AAAAAATG": 2115, + "CAATGTG": 2116, + "GTGCCTT": 2117, + "GCCTCAA": 2118, + "GAGTCTT": 2119, + "GCTAATTTT": 2120, + "CGAAAAA": 2121, + "GTGTATA": 2122, + "GCGTTA": 2123, + "CTGCACTCCAGCCTGGG": 2124, + "GTTCATG": 2125, + "CAAAGAAA": 2126, + "GCAGTAA": 2127, + "GGATGAA": 2128, + "CTTTATG": 2129, + "CAGGAAAA": 2130, + "TCCTGCA": 2131, + "CTGTCTG": 2132, + "GAACATG": 2133, + "GGATGGA": 2134, + "GCCTGAA": 2135, + "CAAAAATG": 2136, + "TCCAATG": 2137, + "CCAGCAA": 2138, + "GGCCTA": 2139, + "CAACTGA": 2140, + "GCACCTG": 2141, + "GTCTATT": 2142, + "CCTCTCA": 2143, + "GTGGTCA": 2144, + "GTGTAAA": 2145, + "GTACACA": 2146, + "GTAAAATT": 2147, + "GTACATT": 2148, + "TATATAAA": 2149, + "CTGTTAA": 2150, + "TAAGTCA": 2151, + "GCCTCCA": 2152, + "AAATTAAA": 2153, + "GTGCAGG": 2154, + "TCCTGGA": 2155, + "GTGCAAA": 2156, + "GCGTCC": 2157, + "CCATTAA": 2158, + "GGAGGGA": 2159, + "TCACTTA": 2160, + "TCATTAAA": 2161, + "CAACATA": 2162, + "TAATAGA": 2163, + "TAATGTA": 2164, + "GATTTTTT": 2165, + "GTTGTCA": 2166, + "GGAGACA": 2167, + "GTGTGGG": 2168, + "TCACAGG": 2169, + "TCGGCA": 2170, + "CTCCCTG": 2171, + "GACCAAA": 2172, + "TGTTTATT": 2173, + "CGAATG": 2174, + "CTCAATG": 2175, + "TCACCTG": 2176, + "CAGTGTT": 2177, + "TGAGACA": 2178, + "TAGGGG": 2179, + "GAAAAATG": 2180, + "GTTGAGA": 2181, + "TCGATA": 2182, + "CTCGGGAGG": 2183, + "GTTGTC": 2184, + "CCAGTCA": 2185, + "GCCCAGGCTG": 2186, + "GAACAGA": 2187, + "GGCTCACTGCAA": 2188, + "GCAGACA": 2189, + "TGAGGTG": 2190, + "CACGTT": 2191, + "TAAGAAAA": 2192, + "CCAGGCA": 2193, + "GTATCTT": 2194, + "CTTGGGAGG": 2195, + "CTTTCTA": 2196, + "CCGCTG": 2197, + "GAGCTCA": 2198, + "GAGACAGA": 2199, + "CTTCAGG": 2200, + "GCACATT": 2201, + "GTACAAA": 2202, + "CTTGTAA": 2203, + "GTGGGTG": 2204, + "GAAGTGA": 2205, + "GGTCTC": 2206, + "GTATGTT": 2207, + "GCACTCA": 2208, + "TTATGTT": 2209, + "CAAGTCA": 2210, + "CAAGTGA": 2211, + "GAAACTA": 2212, + "TAAATAAAA": 2213, + "TCTTAAAA": 2214, + "GTTGGAA": 2215, + "GTTCTAA": 2216, + "CCACTC": 2217, + "CAGTGAA": 2218, + "GAAAGG": 2219, + "GCACGA": 2220, + "TAACTTTT": 2221, + "GTTGTTA": 2222, + "TCAGTTA": 2223, + "CGGATG": 2224, + "TATTTGAA": 2225, + "CCCTGAA": 2226, + "GCCCTC": 2227, + "CTTCTAA": 2228, + "TTTGTTTT": 2229, + "GAGCTGA": 2230, + "CTGTGGG": 2231, + "CAAGATT": 2232, + "GAAGCTT": 2233, + "TGAGTAA": 2234, + "CTTGCTG": 2235, + "GGATGGG": 2236, + "CGTATG": 2237, + "TCCATTA": 2238, + "GTCTGCA": 2239, + "GCCATTTT": 2240, + "GTTGTAA": 2241, + "CACACAA": 2242, + "GGACTACAGG": 2243, + "CGTTTTA": 2244, + "TCTTCC": 2245, + "TAACCTT": 2246, + "CTTTAAAA": 2247, + "TGAATTTT": 2248, + "CTACAGA": 2249, + "GCAAGAA": 2250, + "TAACAAAA": 2251, + "CAATTAAA": 2252, + "CCACTCA": 2253, + "CATGGTGAAA": 2254, + "CCCAGAA": 2255, + "CTACATT": 2256, + "CCGAGG": 2257, + "TCCAGTG": 2258, + "TGAGTTA": 2259, + "GGAGTCA": 2260, + "TAACGA": 2261, + "GAGTAAA": 2262, + "GACTCTG": 2263, + "GGAGCTT": 2264, + "TACTCC": 2265, + "CTGCATG": 2266, + "GCTTTTTT": 2267, + "GTCTAAA": 2268, + "GTGCGG": 2269, + "CATCTCA": 2270, + "TGATCAA": 2271, + "GGAGATT": 2272, + "GCAAAAAA": 2273, + "CACCAAA": 2274, + "TGACGG": 2275, + "CAGAGG": 2276, + "GTTGATG": 2277, + "CTTGTCA": 2278, + "TCCACCTG": 2279, + "GGAGCAA": 2280, + "CAAGTAA": 2281, + "CCATAAA": 2282, + "GTGCATG": 2283, + "GCATATT": 2284, + "GTAGATT": 2285, + "GCCTAA": 2286, + "CTCAAAAA": 2287, + "GGAGAAAA": 2288, + "CTATCC": 2289, + "TAATATTA": 2290, + "GTGCTC": 2291, + "CAATATG": 2292, + "TGTGGAA": 2293, + "TGACTC": 2294, + "GTGTATG": 2295, + "TTTTAATG": 2296, + "GCTCTAA": 2297, + "CACAATG": 2298, + "CAGCTCA": 2299, + "GTTGGTT": 2300, + "CTAAAATT": 2301, + "GTCTATG": 2302, + "TGTGAAAA": 2303, + "CTGGGTT": 2304, + "CCCCTCC": 2305, + "CCCTCTT": 2306, + "GCAGGGA": 2307, + "GAAACCA": 2308, + "CATTTCC": 2309, + "GCAGCCA": 2310, + "TCATATG": 2311, + "GCAGGCA": 2312, + "CGTAAAA": 2313, + "TGACCTG": 2314, + "CAGAGGTT": 2315, + "CTTGTGA": 2316, + "TTATCTT": 2317, + "CTGTATG": 2318, + "GTCAATG": 2319, + "GGACGG": 2320, + "GCGTAA": 2321, + "CAAACTA": 2322, + "TAAATGTT": 2323, + "CTTCGG": 2324, + "CTCCCCA": 2325, + "TACAATG": 2326, + "TCTGTAA": 2327, + "GAATATG": 2328, + "GCGGGA": 2329, + "GGACATT": 2330, + "TTATGAA": 2331, + "GGATGTT": 2332, + "GGACATG": 2333, + "TCAGGTG": 2334, + "CAACAAAA": 2335, + "GAAAGAGA": 2336, + "GTGGATG": 2337, + "GGGCTA": 2338, + "CCATCAA": 2339, + "CAGCTGA": 2340, + "CTCCACC": 2341, + "CAATCAA": 2342, + "GTGGTC": 2343, + "TGACAGG": 2344, + "CCATTCA": 2345, + "GTCCCTG": 2346, + "CAGACACA": 2347, + "GTTGGTG": 2348, + "CCTCCTG": 2349, + "GAACTGA": 2350, + "TATTCATT": 2351, + "GCCCATG": 2352, + "CAATCTT": 2353, + "GAAAGCA": 2354, + "GAATCTG": 2355, + "TTATTTTA": 2356, + "GTTTGGA": 2357, + "TTTTTGTT": 2358, + "GGGAATG": 2359, + "GCGACA": 2360, + "TAAACTG": 2361, + "CCATATT": 2362, + "GGATCC": 2363, + "CAAGCTT": 2364, + "TAAAAAAAAA": 2365, + "TCACTC": 2366, + "CACTGTT": 2367, + "TGTTAATT": 2368, + "GGACTGA": 2369, + "GGAGTGA": 2370, + "CATACACA": 2371, + "GTTTGTA": 2372, + "TCCAGCA": 2373, + "GTGCATT": 2374, + "GGAAAAAA": 2375, + "CCAAGAA": 2376, + "TCAATA": 2377, + "CTTCCCA": 2378, + "TGAGAAAA": 2379, + "GGCCTCCCAAA": 2380, + "CAAGCTG": 2381, + "GCCCAAA": 2382, + "TGACTTA": 2383, + "CAGCCTT": 2384, + "CTGGATT": 2385, + "TTTTTTTA": 2386, + "TCACGG": 2387, + "GCAGTTA": 2388, + "TGACTAA": 2389, + "TTACAGG": 2390, + "TGATATG": 2391, + "TAATTATT": 2392, + "TCTTGAA": 2393, + "GCCCCTT": 2394, + "GTTCAGA": 2395, + "CTCTATG": 2396, + "CCATGGA": 2397, + "GAGGGAA": 2398, + "GGAGGCA": 2399, + "CTTTGCA": 2400, + "TCTTGG": 2401, + "GGAGGTT": 2402, + "GCCAATG": 2403, + "CTGGTGA": 2404, + "CAACCAA": 2405, + "CCAGTC": 2406, + "CTTGAGA": 2407, + "TACAGCA": 2408, + "CTTGTC": 2409, + "GACGGA": 2410, + "CTTCTTTT": 2411, + "GTGGC": 2412, + "GAGGATG": 2413, + "CAATAAAA": 2414, + "GAAATTTT": 2415, + "AAAAAAAAAA": 2416, + "CTCTATA": 2417, + "GTATGAA": 2418, + "CTTGTTA": 2419, + "TAACATA": 2420, + "CAAACACA": 2421, + "TGATTAAA": 2422, + "GCTCTGTT": 2423, + "GTGGGTT": 2424, + "GTTGGGG": 2425, + "GTGTGTA": 2426, + "GTAATTTT": 2427, + "GTATCC": 2428, + "TGTGTGTGTGTG": 2429, + "TCTTCCTT": 2430, + "TCACTAA": 2431, + "TCTCCAAA": 2432, + "TATCAAA": 2433, + "TGATGGG": 2434, + "GGATATT": 2435, + "CAAATTTT": 2436, + "GTTCAGG": 2437, + "GTGGATT": 2438, + "GTGCAGA": 2439, + "GCTGCC": 2440, + "CTCAGAA": 2441, + "GCAGTC": 2442, + "GGATAAA": 2443, + "GCCTTCA": 2444, + "CCAGGTG": 2445, + "TATCTC": 2446, + "CAATGCA": 2447, + "CCCACTG": 2448, + "GTGTATT": 2449, + "CGACAGA": 2450, + "TGAGATA": 2451, + "CCAGGTT": 2452, + "TGTTTAA": 2453, + "CATCATG": 2454, + "TGATTCA": 2455, + "GCAATTA": 2456, + "GAAATGAA": 2457, + "CTTGGTT": 2458, + "GAAGATT": 2459, + "GGATTAA": 2460, + "CCTCATT": 2461, + "GGCCAGGCTG": 2462, + "GCTATTA": 2463, + "GCCAGCA": 2464, + "GAGACAGG": 2465, + "CTTGAGG": 2466, + "CAGTCTT": 2467, + "GTTCTCC": 2468, + "TATTTCAA": 2469, + "TGACGA": 2470, + "CATGAAAA": 2471, + "CATTATG": 2472, + "TAAATTTA": 2473, + "GAGTGAA": 2474, + "CAACAGG": 2475, + "TAAGCTT": 2476, + "CACATTTT": 2477, + "GATCTCA": 2478, + "TAGTCC": 2479, + "GACCCTG": 2480, + "TAATGCA": 2481, + "TAAGTC": 2482, + "TAATAATT": 2483, + "GAAGTAA": 2484, + "CAACTC": 2485, + "CATCATT": 2486, + "GACGAA": 2487, + "GAAACAAA": 2488, + "TATTTCTG": 2489, + "CATTAATT": 2490, + "CCACCCC": 2491, + "TAATATTTT": 2492, + "GTTTAAAA": 2493, + "GTATCTG": 2494, + "GTCAAAAA": 2495, + "GATGCTG": 2496, + "TGTTCTG": 2497, + "GGTCAAA": 2498, + "GTAGGAA": 2499, + "GTATATG": 2500, + "TGATCTG": 2501, + "GGGGCTG": 2502, + "GCATCAA": 2503, + "GCCAAAAA": 2504, + "CCACGA": 2505, + "GCTAATG": 2506, + "CAGAGAAA": 2507, + "CCTTCTG": 2508, + "TCCTCTA": 2509, + "GCAGGTT": 2510, + "CTCACTG": 2511, + "TAGATTA": 2512, + "GCCGAGA": 2513, + "CCATCCA": 2514, + "CTTTACA": 2515, + "GTACATG": 2516, + "GCACCAA": 2517, + "CTTTGTA": 2518, + "CTATGTG": 2519, + "TCACTTTT": 2520, + "TGAGTC": 2521, + "CAAGAAAA": 2522, + "CTGACTG": 2523, + "GTTTTTTTT": 2524, + "GCATAAA": 2525, + "TAATCTG": 2526, + "GAAAAAAAA": 2527, + "CAGGATG": 2528, + "TGAGCCA": 2529, + "GAATTCA": 2530, + "TCAGACA": 2531, + "GTTCCAA": 2532, + "TCAGGTT": 2533, + "CAAACTG": 2534, + "CATTTCTT": 2535, + "TGTTAAAA": 2536, + "CCAGACA": 2537, + "CAAGTTA": 2538, + "CATGTTA": 2539, + "CATTCTA": 2540, + "TCTTTTTG": 2541, + "TGAGGGG": 2542, + "CACATTA": 2543, + "TAAAATAAA": 2544, + "GCATATA": 2545, + "TGTTCTA": 2546, + "GAAGGGG": 2547, + "GAGTGTG": 2548, + "TAAGACA": 2549, + "GAACTC": 2550, + "CCAGTAA": 2551, + "GAGAGAGG": 2552, + "GCGACC": 2553, + "CAATTCA": 2554, + "CGGCTG": 2555, + "CCAGATT": 2556, + "CCTGGG": 2557, + "GGAAGAAA": 2558, + "GAGAGG": 2559, + "TCAAAATG": 2560, + "CCTCATG": 2561, + "TAAAGG": 2562, + "CTTTGGA": 2563, + "CCAGGGA": 2564, + "GTACAGA": 2565, + "CTGAGGCAGGA": 2566, + "TGTTTCTT": 2567, + "CCAGGCTG": 2568, + "CTGAGG": 2569, + "GAGGCTG": 2570, + "CTCCTGGG": 2571, + "GAAGTC": 2572, + "CGACC": 2573, + "GGACTCA": 2574, + "GGAGTC": 2575, + "CACAATT": 2576, + "GTGTTCA": 2577, + "GACTAAA": 2578, + "GTCATTA": 2579, + "CAAAATTA": 2580, + "TGAAGAAA": 2581, + "GCACCTT": 2582, + "GTTTGCA": 2583, + "TCCTGCC": 2584, + "GTAGATG": 2585, + "GCCTGCA": 2586, + "GAGTTAA": 2587, + "TCCCTTA": 2588, + "GTGGTTA": 2589, + "TCGGGA": 2590, + "TACATAA": 2591, + "TCTCTCCA": 2592, + "CACTAAA": 2593, + "TATATATATATA": 2594, + "GTGGCAA": 2595, + "CACCATG": 2596, + "TTTGAAAA": 2597, + "CACACTG": 2598, + "CTTGGTG": 2599, + "TACACTG": 2600, + "CCTCCAA": 2601, + "CAACCTT": 2602, + "CAGCCAA": 2603, + "TTTTCAAA": 2604, + "TGATAGA": 2605, + "TACACTA": 2606, + "TCTGGG": 2607, + "TCCCAGCA": 2608, + "TAGGAAAA": 2609, + "CTTGGGG": 2610, + "TCTGTGAA": 2611, + "CCTTATT": 2612, + "CATTTAAA": 2613, + "TTTTATTTTA": 2614, + "GCCCTCC": 2615, + "CTGAGCA": 2616, + "CCCGTG": 2617, + "GTAGTGA": 2618, + "TCCTATT": 2619, + "GAAGGTG": 2620, + "TGTGCTG": 2621, + "TCCACTG": 2622, + "TAATCTA": 2623, + "TGATGTA": 2624, + "GTGGTAA": 2625, + "TAATGGA": 2626, + "GATGAAAA": 2627, + "GTAGTAA": 2628, + "GTGGGGA": 2629, + "GTGTCAA": 2630, + "CAGACTG": 2631, + "TCGAAAA": 2632, + "CTCATTA": 2633, + "TAATAATA": 2634, + "CTCAGAAA": 2635, + "CATCCTT": 2636, + "CCGCTT": 2637, + "GGAAGG": 2638, + "CCGTGA": 2639, + "CCACTCC": 2640, + "CTAGAGA": 2641, + "TAGAATG": 2642, + "GGATTTA": 2643, + "TTAATTTT": 2644, + "GCTAATA": 2645, + "TCCCCCA": 2646, + "CAAATATT": 2647, + "GATCATG": 2648, + "TCTTAATT": 2649, + "CAGTATT": 2650, + "GTCTTGAA": 2651, + "CCGAAA": 2652, + "CTATTCA": 2653, + "TAAGATA": 2654, + "CTTGCAA": 2655, + "GCCCCAA": 2656, + "TCCCTAA": 2657, + "GAAGTTA": 2658, + "GATGATG": 2659, + "CTTGATG": 2660, + "CCCTAAA": 2661, + "CCTGCCTG": 2662, + "GACATTTT": 2663, + "CCAGCCA": 2664, + "TGTGTGTGTG": 2665, + "GTCTATA": 2666, + "TCTCTGTT": 2667, + "GTCTGTA": 2668, + "TATAATA": 2669, + "CTTGTTTT": 2670, + "CGCCATT": 2671, + "CTCAGCA": 2672, + "TACAGTT": 2673, + "CAAGAGG": 2674, + "GGAAGCA": 2675, + "GCCTTTA": 2676, + "CCCCATT": 2677, + "CAACGA": 2678, + "GTCATTTT": 2679, + "CCCGCA": 2680, + "CAGTTAA": 2681, + "GAATCTT": 2682, + "CATGTTTT": 2683, + "CCGGGG": 2684, + "CTACTGA": 2685, + "TCACGA": 2686, + "TAAATTTG": 2687, + "GCCCATT": 2688, + "CTCTAGG": 2689, + "GGACCTG": 2690, + "TCAGGGA": 2691, + "GAGACTG": 2692, + "CCAAAAAA": 2693, + "GCCGG": 2694, + "CCAGGGG": 2695, + "TCAGAAAA": 2696, + "CATCTGA": 2697, + "TCTTCAAA": 2698, + "CTACAGG": 2699, + "GAGGCAGG": 2700, + "CATTGTA": 2701, + "TAAATCAA": 2702, + "GACTCTT": 2703, + "CTGATTA": 2704, + "GCATATG": 2705, + "GGACCTT": 2706, + "CAAGACA": 2707, + "TATTTATG": 2708, + "TATTTTAAA": 2709, + "CCGAGA": 2710, + "TCATTTTA": 2711, + "CTCACTCA": 2712, + "CCACCCA": 2713, + "CTCTAGA": 2714, + "CTACATG": 2715, + "GTGCTTA": 2716, + "CAACCTG": 2717, + "TCTGTGTT": 2718, + "TAAATATG": 2719, + "CAAAGG": 2720, + "CCCTGTT": 2721, + "GTTCGG": 2722, + "TGATAAAA": 2723, + "CACGAA": 2724, + "GTTGAGG": 2725, + "CAGAGTGA": 2726, + "GAAATTAA": 2727, + "CACATA": 2728, + "GAACAGG": 2729, + "TCTCCTGA": 2730, + "CCTGAGG": 2731, + "GGAGGCCAA": 2732, + "GTTTACA": 2733, + "TAACAGG": 2734, + "TGTGGTG": 2735, + "GCCTCCCAAA": 2736, + "CCATCCTG": 2737, + "GATTCTT": 2738, + "GAATGGA": 2739, + "GTAGTCA": 2740, + "CTCCTCTG": 2741, + "GAAAGAAAGAAAGAAA": 2742, + "CCCTGTG": 2743, + "CAGTATG": 2744, + "GCGATA": 2745, + "GGACTC": 2746, + "GAAAGA": 2747, + "TGTTGG": 2748, + "GTAGCTT": 2749, + "CATTTTAA": 2750, + "CCCTCTG": 2751, + "GCATTCA": 2752, + "CGATTA": 2753, + "TCACATA": 2754, + "TAATGAAA": 2755, + "GGAATTA": 2756, + "CTGTCAA": 2757, + "TAAATTAAA": 2758, + "CAAGTC": 2759, + "GTATTCA": 2760, + "GGCCATG": 2761, + "CTTTAGA": 2762, + "TGTTTCC": 2763, + "CATGTA": 2764, + "GAATAAAA": 2765, + "CAACTAA": 2766, + "TCATCTA": 2767, + "CACTCTT": 2768, + "CAGTTTG": 2769, + "CATAAAAA": 2770, + "GCATGCA": 2771, + "GATTTA": 2772, + "GAACCAA": 2773, + "TCTGTGA": 2774, + "TCAGCCA": 2775, + "TCTCCACA": 2776, + "TCTCAGCTCA": 2777, + "TATCATG": 2778, + "GCACTTA": 2779, + "CGCCAGG": 2780, + "CGGGG": 2781, + "CATTAAAAA": 2782, + "TTTGTTA": 2783, + "GGATATA": 2784, + "TCGACC": 2785, + "TAATCCA": 2786, + "CCGC": 2787, + "CATTGTT": 2788, + "CCAGTTA": 2789, + "GTAGTTA": 2790, + "CTAGGAA": 2791, + "CCTAATT": 2792, + "TCATGGG": 2793, + "GAACTAA": 2794, + "GCTATTTT": 2795, + "CCGTCA": 2796, + "CAGATTA": 2797, + "CCATATA": 2798, + "CAACTTA": 2799, + "TCAGTTTT": 2800, + "CTACCTT": 2801, + "GCACTC": 2802, + "GTGTGGA": 2803, + "GTGCCAA": 2804, + "GACAATG": 2805, + "GACAATT": 2806, + "GTACCTT": 2807, + "TAAACATT": 2808, + "CAGGAGG": 2809, + "GTGCGA": 2810, + "GAAAATTA": 2811, + "TCTCTTAA": 2812, + "CCGATT": 2813, + "GATGATT": 2814, + "CCATGGG": 2815, + "TCGGTA": 2816, + "CCATATG": 2817, + "CCAGTCC": 2818, + "GCCTTAA": 2819, + "TGATCCA": 2820, + "GTTGCAA": 2821, + "GTAGAGG": 2822, + "CAGATTTT": 2823, + "GTACTTA": 2824, + "TCTTTCTTTCTTTCTT": 2825, + "GCTCTGTG": 2826, + "TCAATAA": 2827, + "GTTTAGA": 2828, + "GTTCGA": 2829, + "CAAGGTT": 2830, + "CTCATTTT": 2831, + "CACAGG": 2832, + "CATGCTG": 2833, + "GAACGG": 2834, + "TATAAAAA": 2835, + "GAAGGCA": 2836, + "GAGCATT": 2837, + "TGTTTGTG": 2838, + "GCTGTTA": 2839, + "GTCACTG": 2840, + "CAAATGAA": 2841, + "GTGACTG": 2842, + "GTTCTTTT": 2843, + "CAGGCTGGAGTGCAGTG": 2844, + "TGATGAAA": 2845, + "TAACGG": 2846, + "CTACTAA": 2847, + "GACATTA": 2848, + "GGACGA": 2849, + "GAGCATG": 2850, + "GCATGGG": 2851, + "CCACTTA": 2852, + "CTATCAA": 2853, + "GCTGTTTT": 2854, + "GTCGTG": 2855, + "CCTGGCC": 2856, + "TCTCTGAA": 2857, + "TGTTGTA": 2858, + "CAGCCAGG": 2859, + "GTTTAGG": 2860, + "CCGCAA": 2861, + "GGAGTAA": 2862, + "CCAATTA": 2863, + "CAGCAAAA": 2864, + "TCATCCA": 2865, + "CACGTA": 2866, + "TCATAGA": 2867, + "TAATTAAAA": 2868, + "CACTTAA": 2869, + "TCTTTATT": 2870, + "GAGATTA": 2871, + "TAAGAGG": 2872, + "CAAATTAA": 2873, + "GACGCA": 2874, + "CACGGA": 2875, + "GTGTGCA": 2876, + "TCT": 2877, + "TATTATTA": 2878, + "GAAATATT": 2879, + "GGAGTTA": 2880, + "TCTTTGA": 2881, + "CTGATTTT": 2882, + "TGTGAATT": 2883, + "TCCCACC": 2884, + "CCCTTTG": 2885, + "CAAGGTG": 2886, + "CAGAGTT": 2887, + "CCCCATG": 2888, + "CTACCAA": 2889, + "CTCCAAAA": 2890, + "CTTCCCC": 2891, + "CTGCTAA": 2892, + "GATTAAAA": 2893, + "GCTTATG": 2894, + "CTACTTA": 2895, + "TAAAAAATT": 2896, + "TCAGTCC": 2897, + "CTATTAAA": 2898, + "GAATGGG": 2899, + "CACAGTA": 2900, + "CAACGG": 2901, + "GGTTATT": 2902, + "TCACCCA": 2903, + "TGATGCA": 2904, + "TAATTTTTT": 2905, + "GTTTGAGA": 2906, + "GTATTAAA": 2907, + "GCCCCCA": 2908, + "TATAGTA": 2909, + "TAGTAAA": 2910, + "TGATACA": 2911, + "GTGGTTTT": 2912, + "CCACTAA": 2913, + "CACAGAGA": 2914, + "CCTCTGCCTCC": 2915, + "CAAAAAAAA": 2916, + "CTCTCTCC": 2917, + "CATAATA": 2918, + "GAAGCCA": 2919, + "GTTCCCA": 2920, + "TGTGTTTG": 2921, + "CAATGGA": 2922, + "TGAAGTA": 2923, + "CTTCATA": 2924, + "CACTGTG": 2925, + "GCTCTTTT": 2926, + "TGACATA": 2927, + "TAAAGAAAA": 2928, + "GAGAAATG": 2929, + "CAGGGAGG": 2930, + "TGTTCAA": 2931, + "GAGCCAA": 2932, + "GACAGAGA": 2933, + "GGCTGAA": 2934, + "CAAATATA": 2935, + "GTGGAAAA": 2936, + "TAAGGTT": 2937, + "GTGATTA": 2938, + "GGATCTG": 2939, + "GATGTTA": 2940, + "GACTACACA": 2941, + "TCCTATA": 2942, + "CTGCCAA": 2943, + "TCCCGA": 2944, + "GTGATTTT": 2945, + "GCGTTTT": 2946, + "CAGAGTA": 2947, + "GAAAGGAA": 2948, + "CACTTTG": 2949, + "CCCCAAAA": 2950, + "GCAACCCA": 2951, + "TGCATTTT": 2952, + "TCTAGAA": 2953, + "TACTTTG": 2954, + "TGAGGCA": 2955, + "CATCTCC": 2956, + "TCGCTA": 2957, + "TGACTTTT": 2958, + "GAGCCTG": 2959, + "CATTTGTT": 2960, + "TCTTTGTT": 2961, + "GCAAAATT": 2962, + "CCTGATT": 2963, + "GATAAAAA": 2964, + "GAGTGTT": 2965, + "TCCTGTA": 2966, + "TACAGAAA": 2967, + "TCCAGGAA": 2968, + "GCCAGTG": 2969, + "TAGATTTT": 2970, + "TAATAGG": 2971, + "CTCCTCA": 2972, + "CATTTTTG": 2973, + "CATTTCAA": 2974, + "GCCATCA": 2975, + "TAAAATATA": 2976, + "GACTGTT": 2977, + "GCATGGA": 2978, + "CAAAGTT": 2979, + "CATGATT": 2980, + "GAGTTTG": 2981, + "CTAGCAA": 2982, + "CTTCCTA": 2983, + "GGGGAGG": 2984, + "CTATATG": 2985, + "TATTTATTTT": 2986, + "CACCATT": 2987, + "CCCTCAA": 2988, + "TTTTTTTTTTTTTT": 2989, + "GATCATT": 2990, + "GTACATA": 2991, + "CTCCATA": 2992, + "CCCCGTCTCTA": 2993, + "GCCTGCC": 2994, + "CTAGCTT": 2995, + "CCCGGA": 2996, + "GATGTTTT": 2997, + "GTATTTTA": 2998, + "TCAGATA": 2999, + "CCTGGAA": 3000, + "TATTCCA": 3001, + "GGACCAA": 3002, + "GCCATTA": 3003, + "CGACTGA": 3004, + "TAAGCTG": 3005, + "TAAACACA": 3006, + "GTTTCTC": 3007, + "CATCTTA": 3008, + "GAAATTTG": 3009, + "TAATGGG": 3010, + "TAAAATTTT": 3011, + "CTGTTCA": 3012, + "CCTGTTA": 3013, + "TACTGAA": 3014, + "TGACCCA": 3015, + "TGATTTTA": 3016, + "CTCCTTA": 3017, + "TATAGAA": 3018, + "CTGCGG": 3019, + "GCGGTA": 3020, + "GTGCTAA": 3021, + "CAGAGGAA": 3022, + "TACATCA": 3023, + "TCAATCAA": 3024, + "CTGCAGCC": 3025, + "TGAATATT": 3026, + "TCTACAA": 3027, + "CCACATA": 3028, + "CCCGTT": 3029, + "TATACACA": 3030, + "TCCTCTC": 3031, + "TCTACTT": 3032, + "CCGGAA": 3033, + "CTTTTTTA": 3034, + "GAAAGAAAA": 3035, + "CTATCTT": 3036, + "GACTTTG": 3037, + "TGAACAA": 3038, + "GCAGTTTT": 3039, + "GCTAAAAA": 3040, + "GAGGCGG": 3041, + "TAATAAAAA": 3042, + "CTGGTCA": 3043, + "CAGACAA": 3044, + "GGATATG": 3045, + "TGAAGG": 3046, + "GCCAGAA": 3047, + "CCAGGCC": 3048, + "CCACCATG": 3049, + "CAAACTT": 3050, + "TCATGTA": 3051, + "GCTGCTT": 3052, + "GTAATA": 3053, + "CCCCCAA": 3054, + "CAGCCTG": 3055, + "TCAACTT": 3056, + "TAAAATTAA": 3057, + "GCTGAAAA": 3058, + "CGACGA": 3059, + "GTGGGCA": 3060, + "TGAGGGA": 3061, + "CGCTCC": 3062, + "TTTTGTTTT": 3063, + "GAGTCAA": 3064, + "TCATGCA": 3065, + "CTGCTTA": 3066, + "TAAGTTTT": 3067, + "GTAGCAA": 3068, + "CCTTGG": 3069, + "TGACAAAA": 3070, + "CTGGTAA": 3071, + "TCTTTATA": 3072, + "TGTGTGTT": 3073, + "CTGGTC": 3074, + "CTGGCAA": 3075, + "CATTTCTG": 3076, + "CTCTACC": 3077, + "CTGAGGA": 3078, + "CTAAAATG": 3079, + "CTAGATT": 3080, + "GTATCAA": 3081, + "CAGTCAA": 3082, + "CTGGGTG": 3083, + "CCTCTTA": 3084, + "TGAGTTTT": 3085, + "TTTTATTTA": 3086, + "CCTTTTTT": 3087, + "TATATACA": 3088, + "TAGCAAA": 3089, + "AAATTA": 3090, + "CTGGATG": 3091, + "GATAATA": 3092, + "GACAAAAA": 3093, + "CCTGGGA": 3094, + "GCTTTCA": 3095, + "GTACAGG": 3096, + "GCTGGAA": 3097, + "CTACTCA": 3098, + "CAATGTA": 3099, + "GCGTGAA": 3100, + "GATCCTT": 3101, + "TATTAATG": 3102, + "GCCCGA": 3103, + "TAAAGTG": 3104, + "GCTTCCA": 3105, + "CATGGAA": 3106, + "TGAAGTT": 3107, + "CTTTCTC": 3108, + "TCTGTGTG": 3109, + "GTATGTA": 3110, + "CAATACA": 3111, + "TCAAGG": 3112, + "CCTCTAA": 3113, + "TGTGGG": 3114, + "GATCTGA": 3115, + "GTACTGA": 3116, + "TTAATTAA": 3117, + "GCAGAAAA": 3118, + "CTACATA": 3119, + "CCGGTG": 3120, + "GGGGAAAA": 3121, + "TACAAAAAA": 3122, + "TTTTGG": 3123, + "GTGAGAA": 3124, + "TCAATAAA": 3125, + "TCAAGTT": 3126, + "CTCAGGA": 3127, + "CTACTC": 3128, + "CAAATCA": 3129, + "GGCAGAA": 3130, + "CCCGAA": 3131, + "TGTTGTG": 3132, + "GAGCAAAA": 3133, + "TATTTGTG": 3134, + "GTAGGTT": 3135, + "CTACCTG": 3136, + "CACAAAAA": 3137, + "CTCAGG": 3138, + "GCTTTA": 3139, + "CAGAGCAA": 3140, + "CTCAGTG": 3141, + "GGAAGAGA": 3142, + "TAACCTG": 3143, + "GAAATATA": 3144, + "CGAGAA": 3145, + "GTGAGG": 3146, + "CATTTATA": 3147, + "GGCAGCA": 3148, + "TCTAAATT": 3149, + "CCCAGTG": 3150, + "GCCTAGG": 3151, + "TGCATTA": 3152, + "CCGTAA": 3153, + "CATTCCA": 3154, + "CTAGTTA": 3155, + "GACTTAA": 3156, + "CTATACA": 3157, + "GACACAA": 3158, + "TCTTCACA": 3159, + "CCGGTT": 3160, + "TAAAGTAA": 3161, + "CTGTGGA": 3162, + "TAAGGTG": 3163, + "TCCAGTA": 3164, + "CAAATTTA": 3165, + "AAATTAAAA": 3166, + "CCATCTA": 3167, + "CTCCCTT": 3168, + "CTCCTTTT": 3169, + "GAGAGAGAGAGA": 3170, + "GGAGATA": 3171, + "CCTATTA": 3172, + "CACCAAAA": 3173, + "CCGTTA": 3174, + "TGTTTATA": 3175, + "CTCAGGAGG": 3176, + "GACGTA": 3177, + "GTCCTTA": 3178, + "GAAAGTT": 3179, + "GCTGGTG": 3180, + "CTCTACA": 3181, + "CAATAGA": 3182, + "TAAAATATT": 3183, + "GTACCTG": 3184, + "GTACTAA": 3185, + "CTTTGAAA": 3186, + "CCTTTCC": 3187, + "TAAAAATTA": 3188, + "CTCGG": 3189, + "CAAGATA": 3190, + "CATTTGA": 3191, + "CACCTCA": 3192, + "GCCAGCC": 3193, + "GTCGG": 3194, + "GCACATA": 3195, + "CACTCAA": 3196, + "CTTTTAAAA": 3197, + "CAGGAATT": 3198, + "GCCTATT": 3199, + "TCTTTCTG": 3200, + "CTGAGGCAGGAGAA": 3201, + "CAGGCAGG": 3202, + "CTAGTAA": 3203, + "TCCATA": 3204, + "GAACTTA": 3205, + "CG": 3206, + "GCTGTGA": 3207, + "GAAAATA": 3208, + "TCTTCATT": 3209, + "GAGGGAGA": 3210, + "CCCATCC": 3211, + "GAGGTGGG": 3212, + "GCCTCTA": 3213, + "GTAGGTG": 3214, + "TAAACCA": 3215, + "GAAGGAAA": 3216, + "TATTGG": 3217, + "ATG": 3218, + "TCCAGTT": 3219, + "CCCACAA": 3220, + "GAAACACA": 3221, + "GTCTCAAAA": 3222, + "CTTTTCTTTT": 3223, + "TGAAGGA": 3224, + "TATTGATT": 3225, + "CTATGTA": 3226, + "AAAAAAAAAAAAAA": 3227, + "TCCTTAAA": 3228, + "GCGCTA": 3229, + "TCCACTT": 3230, + "GACTCAA": 3231, + "TAAATACA": 3232, + "TCATGGA": 3233, + "TCTGGGA": 3234, + "TCCTATG": 3235, + "CTGTGCA": 3236, + "TCAAGTGA": 3237, + "TCATAAAA": 3238, + "CATCCAA": 3239, + "CCTTCCA": 3240, + "CTGTACA": 3241, + "GAAGGTT": 3242, + "CTGTGTA": 3243, + "GTCACTT": 3244, + "TCACAAAA": 3245, + "TCAGGCA": 3246, + "GTGTTAAA": 3247, + "CCCTTAA": 3248, + "CAAAGTG": 3249, + "GAAATGTT": 3250, + "CTGGGGA": 3251, + "GACGCC": 3252, + "TATATGTG": 3253, + "CTAGATG": 3254, + "GAAATTAAA": 3255, + "GAATGCA": 3256, + "GCACTAA": 3257, + "CGGGAGG": 3258, + "GCCACAA": 3259, + "CGCTTA": 3260, + "TCCACAA": 3261, + "CAGATA": 3262, + "TCTGAATT": 3263, + "TATTATTTT": 3264, + "GCGCGG": 3265, + "CTCTGAAA": 3266, + "TCTCTTTG": 3267, + "TATTTCTA": 3268, + "GGGGTGGG": 3269, + "GGATGCA": 3270, + "CCACACC": 3271, + "TAAATGTG": 3272, + "TCTTCCTG": 3273, + "GCAAGG": 3274, + "CTGCTCC": 3275, + "CTGGAGTG": 3276, + "CTGTTAAA": 3277, + "CACACAAA": 3278, + "CTGACTT": 3279, + "GAAAAGAAAA": 3280, + "CCTTCTCC": 3281, + "GAAATAAAA": 3282, + "CCTCAGGTGA": 3283, + "GATAATG": 3284, + "GAATTGCTT": 3285, + "CCAAAATT": 3286, + "CGTGAAA": 3287, + "CACTGAAA": 3288, + "CAGTGAAA": 3289, + "GATCTTA": 3290, + "GAGATGGG": 3291, + "TCTGCCA": 3292, + "TGAGGTA": 3293, + "TATGGAA": 3294, + "TATATTTTA": 3295, + "TGAACTT": 3296, + "GCAGATA": 3297, + "CTTTTCTT": 3298, + "GTAAAATG": 3299, + "TCTCTAA": 3300, + "TCTGCAAA": 3301, + "GAGCCTT": 3302, + "TATCATT": 3303, + "CAATTTTA": 3304, + "CCGCCA": 3305, + "TATTTAAAA": 3306, + "GAGAGATG": 3307, + "GAGATGGA": 3308, + "GCCAGGATG": 3309, + "CGAGTAGCTG": 3310, + "TTCATTTT": 3311, + "TATACTT": 3312, + "GTCTACA": 3313, + "GTGAGTGA": 3314, + "GCTACACA": 3315, + "GGGAGGA": 3316, + "CAAGGCA": 3317, + "GCTTTTAA": 3318, + "CACTATT": 3319, + "GTTCATA": 3320, + "TCCTC": 3321, + "GTGGACA": 3322, + "TATTTGGA": 3323, + "CTCCAGTA": 3324, + "GTTCAGTT": 3325, + "CCAAGG": 3326, + "CAGAGCC": 3327, + "CTCGCC": 3328, + "CCGATG": 3329, + "GGAATTTT": 3330, + "TCCAGCC": 3331, + "CCTCTTTT": 3332, + "GAACCTT": 3333, + "CATGCACA": 3334, + "GTTTC": 3335, + "GAAGATA": 3336, + "TACCCC": 3337, + "GCTGCCA": 3338, + "GGGGGAGG": 3339, + "GCAGTGAGCTGA": 3340, + "CTGTCTA": 3341, + "CGAGGA": 3342, + "CAATGGG": 3343, + "GCTGTGAA": 3344, + "GAAAGTG": 3345, + "TACCAAAA": 3346, + "GTCAGG": 3347, + "CAGCTCC": 3348, + "TGTGCTT": 3349, + "GTCTAGG": 3350, + "TTTTTGTA": 3351, + "TTATATG": 3352, + "TCAGGGG": 3353, + "TATTGTTA": 3354, + "CCTGAGA": 3355, + "TATCTCA": 3356, + "CAATCTG": 3357, + "CACTCTG": 3358, + "GATTTAA": 3359, + "TGAATAA": 3360, + "TCTTGTA": 3361, + "TCAACTG": 3362, + "TCTCCAGG": 3363, + "CTAGAGG": 3364, + "CTGAGAAA": 3365, + "CTAGCTG": 3366, + "TCCACCA": 3367, + "CGATTTT": 3368, + "CCGGCC": 3369, + "GTTGACA": 3370, + "CTTAGAA": 3371, + "CATAATG": 3372, + "GAGTATT": 3373, + "CACAGAAA": 3374, + "GACTGTG": 3375, + "CTATTTTA": 3376, + "TGAGGAAA": 3377, + "TTATTAAAA": 3378, + "CTTATTTA": 3379, + "CAGACTT": 3380, + "CACGCC": 3381, + "GCTTGG": 3382, + "CCTGCTT": 3383, + "TAAAGCAA": 3384, + "CCTCGTGA": 3385, + "TAGAATT": 3386, + "CTTACAA": 3387, + "TAAAGGAA": 3388, + "GTCTAGA": 3389, + "GTGACTT": 3390, + "TACATATG": 3391, + "GTCAGGA": 3392, + "GCTCCAGG": 3393, + "GAAGGGA": 3394, + "CATGATG": 3395, + "TCATCAAA": 3396, + "CGTTAAA": 3397, + "GTACTCA": 3398, + "CTCCCAA": 3399, + "TATATGTA": 3400, + "GGTATTTT": 3401, + "TAAGCCA": 3402, + "CGAAATT": 3403, + "GTTTGTTTT": 3404, + "TCTGTCTT": 3405, + "TATATCA": 3406, + "TGTTCATT": 3407, + "CAAACCA": 3408, + "TTCATTA": 3409, + "TATTTGTA": 3410, + "GATTGAA": 3411, + "CTATAAAA": 3412, + "GATTAATT": 3413, + "CCCACCA": 3414, + "TCCTAGG": 3415, + "TAAATGTA": 3416, + "CTCTTAAA": 3417, + "GCAGTCC": 3418, + "GCGGCTG": 3419, + "GTCTCGAA": 3420, + "TGAATGA": 3421, + "CTGGGGG": 3422, + "GTCTCGA": 3423, + "GAACAAAA": 3424, + "TGAATCA": 3425, + "TGTATTTTTAGTAGAGA": 3426, + "GTTATTAA": 3427, + "TTTTTTAAAA": 3428, + "GTCAGTG": 3429, + "CCCATTA": 3430, + "CACAGGA": 3431, + "TATTCCTT": 3432, + "TCTGCCTT": 3433, + "CCTGGTG": 3434, + "GCGAGC": 3435, + "TACTAAA": 3436, + "TACACAAA": 3437, + "CCGTCC": 3438, + "GCTTTGTT": 3439, + "GCATCCA": 3440, + "CATCTAA": 3441, + "GCTGTGTT": 3442, + "GTAGACA": 3443, + "GCCTATG": 3444, + "TCTTTGTG": 3445, + "GATTCTG": 3446, + "CGCCCGG": 3447, + "GATGAGA": 3448, + "TATCTGA": 3449, + "TGAATTTG": 3450, + "CCTGATG": 3451, + "TAAAACAA": 3452, + "CTTTAGG": 3453, + "TTTTCCTT": 3454, + "TGAATAAA": 3455, + "CGGGGA": 3456, + "CAAACATT": 3457, + "GTATGGA": 3458, + "GCTTAAAA": 3459, + "TACCAAA": 3460, + "CAAAGAGA": 3461, + "CTCCTGCC": 3462, + "GTAAAAAAA": 3463, + "CACAGCC": 3464, + "CCATGCA": 3465, + "TACAATT": 3466, + "CTAGTGA": 3467, + "CTGAGTT": 3468, + "GAGTGAAA": 3469, + "TCTGTTTG": 3470, + "CTGTAGG": 3471, + "TATAAAAAA": 3472, + "GCATTAAA": 3473, + "GTCCATA": 3474, + "TGTTAAAAA": 3475, + "TGTTTGA": 3476, + "GAATAGA": 3477, + "CTTCAAAA": 3478, + "CTGGACA": 3479, + "CTGTAGA": 3480, + "CCATTAAA": 3481, + "CTATCTG": 3482, + "CACTATG": 3483, + "TTATCAA": 3484, + "TAAGTAAA": 3485, + "TAATCCCAGCACTTTGGGAGGCC": 3486, + "CCAGAAAA": 3487, + "TGAAGCA": 3488, + "TCCCTTTT": 3489, + "TCATACA": 3490, + "TACGTT": 3491, + "GCCGTG": 3492, + "GGAAGTG": 3493, + "GGCCAAA": 3494, + "GTACCAA": 3495, + "TCTCTACTAAAAATA": 3496, + "CATTGTG": 3497, + "TGTGTGA": 3498, + "GAAACAGA": 3499, + "CTTGACA": 3500, + "GATGAGG": 3501, + "GAGATTTT": 3502, + "CCTTCAA": 3503, + "GAATCTA": 3504, + "CTCTCCTT": 3505, + "GGCGGA": 3506, + "TCTATCTATCTATCTA": 3507, + "CACACAGA": 3508, + "TGTGTGTA": 3509, + "CAAAGCC": 3510, + "TGTGCCA": 3511, + "GTTGAAAA": 3512, + "CTCCAGCA": 3513, + "TCAAGGA": 3514, + "TAGCTCA": 3515, + "CGCTGA": 3516, + "CCTGAAAA": 3517, + "GACTATT": 3518, + "GATTCCA": 3519, + "GCTTCTA": 3520, + "GTCTGCC": 3521, + "CTTGGCA": 3522, + "TGTGGTA": 3523, + "GCTTTGA": 3524, + "GCTCTCTG": 3525, + "CTCACAGA": 3526, + "TCTTTAAA": 3527, + "CAAAGCAA": 3528, + "TACTTAA": 3529, + "GCTTCAA": 3530, + "CATTGAA": 3531, + "GGAGGAAA": 3532, + "CTATAGA": 3533, + "CTGAGGAA": 3534, + "CCTGGCA": 3535, + "CCCTATT": 3536, + "CTCGTG": 3537, + "TTACACA": 3538, + "TTAGGAA": 3539, + "CTGGTTA": 3540, + "GTTGTCC": 3541, + "TAATGAAAA": 3542, + "TATTTACA": 3543, + "GGGAATT": 3544, + "GTAGTTTT": 3545, + "GCTGCAA": 3546, + "CTACGG": 3547, + "GCCGGA": 3548, + "CTGGGCA": 3549, + "CCTTAAAA": 3550, + "GATGGAA": 3551, + "TAGATAGATAGATAGA": 3552, + "TATGTAA": 3553, + "GTACGG": 3554, + "TATTCAAA": 3555, + "GATCTCC": 3556, + "CCTGTTTT": 3557, + "TATTGCA": 3558, + "GGAAGGAAGGAAGGAA": 3559, + "GGTAATT": 3560, + "TTACAGA": 3561, + "TCAGC": 3562, + "GCAAAATG": 3563, + "GAGAGCA": 3564, + "GTAGAAAA": 3565, + "CATTTGAA": 3566, + "TCTTCTTTT": 3567, + "TCCCATA": 3568, + "GTTATTTA": 3569, + "CTATCTA": 3570, + "CATCCTG": 3571, + "TCTTGTG": 3572, + "TTATTATT": 3573, + "CCCGTC": 3574, + "TACTATG": 3575, + "TAAACATA": 3576, + "TAAGGAAA": 3577, + "GCTTGTG": 3578, + "CTCTAAAA": 3579, + "GTTTTAAAA": 3580, + "GACAGGA": 3581, + "TCCTAGA": 3582, + "TCCACCCA": 3583, + "GTTTGAAA": 3584, + "CCATCTCA": 3585, + "CTAAGAA": 3586, + "GTATCTA": 3587, + "GTGAGGA": 3588, + "GCTGGAGG": 3589, + "CCTGTAATCCCAGCTA": 3590, + "GCAACAA": 3591, + "CTTTCAAA": 3592, + "CAAATGTT": 3593, + "CTTGTCC": 3594, + "TCTCAAAAA": 3595, + "TATTTATTA": 3596, + "TAAGGCA": 3597, + "GAGAGGAA": 3598, + "TATGATT": 3599, + "GCATCTA": 3600, + "CGTTATT": 3601, + "GCCTGTA": 3602, + "GTTTCAAA": 3603, + "CCTTCCTTCCTTCCTT": 3604, + "GGCTTTG": 3605, + "GTCAGAA": 3606, + "CATGCATG": 3607, + "GTCATTTA": 3608, + "CTGGAAAA": 3609, + "CTTCGA": 3610, + "CCTATTTT": 3611, + "CCAACAA": 3612, + "TCCATCC": 3613, + "TAAAGTTA": 3614, + "GTCTCTC": 3615, + "TAATCAAA": 3616, + "GATTTTTG": 3617, + "GATTTCTT": 3618, + "GGGCTGA": 3619, + "GCATGTA": 3620, + "CCTGGGTT": 3621, + "GAGACAA": 3622, + "GCTGTCA": 3623, + "TGATAGG": 3624, + "GGAGACC": 3625, + "CCGGCA": 3626, + "TAATCTCA": 3627, + "TGAATTAA": 3628, + "TCTGGTG": 3629, + "GCCTC": 3630, + "GGCGCA": 3631, + "CCAGCTA": 3632, + "CAGTCTG": 3633, + "TGAACTA": 3634, + "GTAAGAA": 3635, + "CCTTTCA": 3636, + "TCCATGA": 3637, + "CAAAGGAA": 3638, + "CTCTC": 3639, + "CTCTCTCA": 3640, + "CTCCAGC": 3641, + "GTAGATA": 3642, + "CCCCCTCC": 3643, + "GGCGCC": 3644, + "TCTGTCC": 3645, + "GACCATT": 3646, + "CTTGAAAA": 3647, + "TTATCC": 3648, + "TACATGTG": 3649, + "CAAATTTG": 3650, + "TTTTGTG": 3651, + "CAGAGTG": 3652, + "GTAATAA": 3653, + "GTGAGTG": 3654, + "TTTTTCC": 3655, + "GGCTCTG": 3656, + "GCCCTAA": 3657, + "GGCTGTT": 3658, + "CCCAATT": 3659, + "CAGAGCTT": 3660, + "TATAAATG": 3661, + "GAGTCTG": 3662, + "TCTTAAAAA": 3663, + "GTTTTATG": 3664, + "GATCCAA": 3665, + "GGCCCTG": 3666, + "GATCCTG": 3667, + "TCAAGTG": 3668, + "GATTCAA": 3669, + "CCTCTCTT": 3670, + "GAGACGG": 3671, + "CAGATCA": 3672, + "TAAAAGAA": 3673, + "CTGAGCAA": 3674, + "CCTGCCA": 3675, + "CCTTCTA": 3676, + "CGCTCA": 3677, + "GGCTGTG": 3678, + "TGGGAAAA": 3679, + "GGAGCCTG": 3680, + "CTGAGTG": 3681, + "CGTCAAA": 3682, + "TCAAGTA": 3683, + "CGTAATT": 3684, + "TTACTTA": 3685, + "TATACTA": 3686, + "GGGCAAA": 3687, + "CAACTTTT": 3688, + "CTTTGCC": 3689, + "GCCAGGAA": 3690, + "CACACTA": 3691, + "GCCCAGC": 3692, + "TAAATAAATAAATAAA": 3693, + "CTTTCCTT": 3694, + "GGGAGAA": 3695, + "TATGGTA": 3696, + "CGGCCA": 3697, + "CCTCTCTG": 3698, + "GAAAGCAA": 3699, + "CAAGCCA": 3700, + "GGCGTT": 3701, + "CTCTTTTA": 3702, + "TCGGCCTCCCAAA": 3703, + "GATTTATT": 3704, + "CAAGTCC": 3705, + "TATCTTA": 3706, + "GTTCAAGACCA": 3707, + "CTCACACA": 3708, + "GAAATCAA": 3709, + "TGAGACC": 3710, + "GGGTAAA": 3711, + "GCTTGTT": 3712, + "GATTTTAA": 3713, + "TTTTTATA": 3714, + "CAGAGCTG": 3715, + "TCTGTTAA": 3716, + "GTAATTAA": 3717, + "TCTTTGAA": 3718, + "CTTGCCA": 3719, + "TTTTCATT": 3720, + "CCATGTA": 3721, + "TCTCGGCTCACTGCAA": 3722, + "GGATTCA": 3723, + "TCTATTAA": 3724, + "TACATAAA": 3725, + "GATTGATT": 3726, + "GGAGAGGA": 3727, + "CGCAAAA": 3728, + "GGACTAA": 3729, + "TTATGTG": 3730, + "GTCACTCA": 3731, + "GACAGCA": 3732, + "CGAGTT": 3733, + "GATGGTT": 3734, + "GGAAGAGG": 3735, + "GCCAACATGGTGAAA": 3736, + "GGAGCCA": 3737, + "TGAACTG": 3738, + "CCTCTGTG": 3739, + "GTATAAAA": 3740, + "TCCCAGAA": 3741, + "CATTTATG": 3742, + "GATTATG": 3743, + "TGTTTCTG": 3744, + "GAGTGGGTT": 3745, + "TACATATT": 3746, + "CTCCAGGA": 3747, + "GACACTG": 3748, + "GGTCTCA": 3749, + "CCGGGA": 3750, + "TGTTTAAA": 3751, + "CTCACCA": 3752, + "GGACTTA": 3753, + "GCCCACC": 3754, + "CAAATCAA": 3755, + "GAAATGTG": 3756, + "TAGTTAA": 3757, + "TCTATAA": 3758, + "TTAGATT": 3759, + "GTGTAGG": 3760, + "TACTGAAA": 3761, + "GCACCCA": 3762, + "GTGGGCTG": 3763, + "GAATGAAA": 3764, + "TCTAGTT": 3765, + "TCAGGAGA": 3766, + "TCCACTA": 3767, + "CTCAGTT": 3768, + "TACTTAAA": 3769, + "GACTCCA": 3770, + "TCCATTTG": 3771, + "CACAGCAA": 3772, + "GCTCATGCCTG": 3773, + "GGTGCTG": 3774, + "GCTTTCTT": 3775, + "GTGGCCA": 3776, + "TACGTG": 3777, + "GTGCAGTG": 3778, + "TGAAGTCA": 3779, + "CCTTTAA": 3780, + "TCTCAGCTCACTGCAA": 3781, + "GAAATATG": 3782, + "CCTCAAAA": 3783, + "GGGGCGG": 3784, + "CGACAA": 3785, + "GGTGATG": 3786, + "GTCTTAAA": 3787, + "CAGAAATG": 3788, + "CGTCATT": 3789, + "CCAAGCA": 3790, + "GGATCAA": 3791, + "GTGCTGGGATTA": 3792, + "GCTGGCC": 3793, + "CGGAGCTT": 3794, + "TACATGA": 3795, + "TGTTTGAA": 3796, + "TCTCCATT": 3797, + "TAAGCAAA": 3798, + "CCTTTCTT": 3799, + "TACTGTT": 3800, + "TCCATCTT": 3801, + "CTTACTT": 3802, + "CGGAGGTT": 3803, + "CAAAACAA": 3804, + "TCATAGG": 3805, + "TTACTAA": 3806, + "CTTATTTG": 3807, + "GAATGTA": 3808, + "CCCCATGGA": 3809, + "TTACTGA": 3810, + "CGGAAAA": 3811, + "CTCCAGTG": 3812, + "TGTTCCA": 3813, + "CAGATGAA": 3814, + "GTTGATA": 3815, + "TCCCCCC": 3816, + "CATTGCA": 3817, + "CTCAGCC": 3818, + "CTTACTG": 3819, + "TATCCTT": 3820, + "CTTTTATG": 3821, + "TGAGTAGCTG": 3822, + "GACTGAAA": 3823, + "CAATGAAA": 3824, + "CGACTG": 3825, + "CTTGGGA": 3826, + "GCAAGCA": 3827, + "TCACTCC": 3828, + "GATTTGA": 3829, + "CATTTTAAA": 3830, + "TCAACTA": 3831, + "GTCCAAAA": 3832, + "CACCCTG": 3833, + "TTACCTT": 3834, + "CAAGGGG": 3835, + "TTTTGGA": 3836, + "GTTATTTG": 3837, + "GCTACTG": 3838, + "CTGAGGCAGGAGAATG": 3839, + "GTGATGA": 3840, + "GTAGTC": 3841, + "TAGTATG": 3842, + "GTATAGA": 3843, + "GTGTCTA": 3844, + "GCTGCTA": 3845, + "TTAGTAA": 3846, + "TAAACATG": 3847, + "GTCACCA": 3848, + "CATCTTTT": 3849, + "CATATAA": 3850, + "TCTCTCTA": 3851, + "TTTTATTAA": 3852, + "TATTCTAA": 3853, + "GAAATTTA": 3854, + "CTTCCCTG": 3855, + "TAAAGATG": 3856, + "TACGTA": 3857, + "GTTTATTA": 3858, + "GAAAAGAA": 3859, + "CCCACCCA": 3860, + "CAATTAAAA": 3861, + "CCGACA": 3862, + "CAAAGTGA": 3863, + "CAAACAAAA": 3864, + "GCAATTTT": 3865, + "CGATTAA": 3866, + "TTAGAGA": 3867, + "CTGATGA": 3868, + "GGAGGAGG": 3869, + "GTCCTGGG": 3870, + "TCATGAAA": 3871, + "GCAACCA": 3872, + "GTTGGCA": 3873, + "GCGGCGG": 3874, + "GTCCCCA": 3875, + "GTAGGGG": 3876, + "GCCATGTT": 3877, + "GTTCGAGA": 3878, + "GCCTATA": 3879, + "TAAATTCA": 3880, + "GGCCATT": 3881, + "GAAAACAA": 3882, + "TGTGTATG": 3883, + "GTACTC": 3884, + "TAGGGAA": 3885, + "CCTTGAA": 3886, + "TCTATTTG": 3887, + "GAGGGCA": 3888, + "GAAACTGA": 3889, + "TACGC": 3890, + "TACAAAAA": 3891, + "TCATTATT": 3892, + "GGAAAATT": 3893, + "TCAATATT": 3894, + "CCCGTA": 3895, + "GGAGAGAA": 3896, + "TTAGTTA": 3897, + "CTCAGAGA": 3898, + "TCGAGC": 3899, + "CTAGTCA": 3900, + "GATGGCA": 3901, + "TGAACATT": 3902, + "CTATGGG": 3903, + "CACACCA": 3904, + "TCAATTAA": 3905, + "GGAACTG": 3906, + "TTACATG": 3907, + "CTTTCATT": 3908, + "CAGCTCTG": 3909, + "TCTTTTTTTT": 3910, + "TAAATCTT": 3911, + "TGATCTA": 3912, + "CATACAA": 3913, + "GCTCAAAA": 3914, + "GCTGTGTG": 3915, + "TCAATCA": 3916, + "GATTTGAA": 3917, + "CCAAGGA": 3918, + "GTCCTCA": 3919, + "GTGCTCC": 3920, + "AAAATAA": 3921, + "GTGACAA": 3922, + "GCTCACGCCTG": 3923, + "CGACGG": 3924, + "TATCCAA": 3925, + "CACACATG": 3926, + "TCTCTCTCC": 3927, + "TGTGGTT": 3928, + "CTTGGTA": 3929, + "TCTGGTT": 3930, + "TTTATAA": 3931, + "CTGCTTTT": 3932, + "TGTGTCA": 3933, + "CACATCA": 3934, + "CCTAATG": 3935, + "CGTTTTTT": 3936, + "GCTGGCA": 3937, + "GACGTC": 3938, + "TATAATTA": 3939, + "TACAGTAA": 3940, + "GAAAGTAA": 3941, + "GTCTGAAA": 3942, + "CCCATTTT": 3943, + "TATATGA": 3944, + "CTTGATA": 3945, + "CTTTATTTT": 3946, + "CTTTATTA": 3947, + "GGCGAA": 3948, + "CCATGCC": 3949, + "CCTGCCTT": 3950, + "GAAGAAGAAGAA": 3951, + "CTGACTGA": 3952, + "GCCCTTA": 3953, + "TATCTAA": 3954, + "GTGTTTTA": 3955, + "TGTGGCA": 3956, + "TATTGTAA": 3957, + "GCCAGAAA": 3958, + "CCCTGTCTC": 3959, + "CACAGGAA": 3960, + "AAAACAA": 3961, + "AAAAAAAAAAAAAAA": 3962, + "TAACTCC": 3963, + "GCCTAAA": 3964, + "CGAGTA": 3965, + "TAGTATT": 3966, + "GTATTTTTAGTAGAGA": 3967, + "GCTGCAGG": 3968, + "TATTGAAA": 3969, + "CCAGCCTGGG": 3970, + "GCTCCAAA": 3971, + "TACGAA": 3972, + "GGCCTCC": 3973, + "TATACAAA": 3974, + "CATGGCA": 3975, + "CATGCAA": 3976, + "TACACCA": 3977, + "CTTTACCA": 3978, + "TACAGAGA": 3979, + "TATTCTTA": 3980, + "TATGTCA": 3981, + "TCAAGCA": 3982, + "TCAATGA": 3983, + "GGCTCTT": 3984, + "GGAAGTT": 3985, + "TCCATGTT": 3986, + "GCTTTCC": 3987, + "TATGTGA": 3988, + "GTGTAGA": 3989, + "TTTTTAAAA": 3990, + "GCTGGAGA": 3991, + "GTGAGAGA": 3992, + "CCTAGAA": 3993, + "CCTCCAAA": 3994, + "CCAATGA": 3995, + "CAGGGCA": 3996, + "CTATGCA": 3997, + "CTTCACC": 3998, + "CTACAAAA": 3999, + "CTCACC": 4000, + "GAGTATG": 4001, + "TAGAAAAA": 4002, + "CTTTTGAA": 4003, + "TAAAGAGA": 4004, + "CATGTCA": 4005, + "TCTTTTAAA": 4006, + "CACAGTGA": 4007, + "GATCTAA": 4008, + "TAAGGTA": 4009, + "CATAGAA": 4010, + "CGCGCC": 4011, + "CAGCTTA": 4012, + "TATAGTT": 4013, + "CGGGCC": 4014, + "TATCCATT": 4015, + "TGTTTGTTTT": 4016, + "GCTGGCTG": 4017, + "TACAGGA": 4018, + "CTCCTTTG": 4019, + "CAATCTA": 4020, + "CCCCCTG": 4021, + "TATACTG": 4022, + "CTGAGCC": 4023, + "CGGTTA": 4024, + "TGAAGTG": 4025, + "GCTTCCTT": 4026, + "TTTTATTTG": 4027, + "TAGTGAA": 4028, + "CTGAGGTG": 4029, + "TCTTCTC": 4030, + "GACAGAAA": 4031, + "CTGAACTGAA": 4032, + "CCTGGGAA": 4033, + "TCCCCAAA": 4034, + "TATGTATT": 4035, + "GATTTCTG": 4036, + "CATTCAAA": 4037, + "CACAGTT": 4038, + "GCTTGAA": 4039, + "GTGGATCA": 4040, + "CTGAGTGA": 4041, + "TGAATTTA": 4042, + "TCAACAAA": 4043, + "GGTCATT": 4044, + "GTAATTTA": 4045, + "GCGACTT": 4046, + "CTGAGAGA": 4047, + "GTGCCCA": 4048, + "CTAGGTT": 4049, + "TCCTGAAA": 4050, + "GTCCACC": 4051, + "TCACAGAA": 4052, + "GCGAAAA": 4053, + "GTATGGG": 4054, + "TGAACAAA": 4055, + "TAAACAAAA": 4056, + "CCGTTTT": 4057, + "TCTCAATT": 4058, + "TCCAGAAA": 4059, + "GTAACAA": 4060, + "GCATTTTA": 4061, + "TCTCCATG": 4062, + "TTATAAAA": 4063, + "CAGGCAA": 4064, + "CTAAAAAAA": 4065, + "GTTGGGA": 4066, + "TAAAGATT": 4067, + "TGAAGAGA": 4068, + "CCCCTCA": 4069, + "TGTTTATG": 4070, + "TCTACTG": 4071, + "CCAATTTT": 4072, + "GGTGGTG": 4073, + "GGAACAA": 4074, + "TGTGGGA": 4075, + "TCTGCTA": 4076, + "GAACGA": 4077, + "GTAAGTA": 4078, + "GTTGCCA": 4079, + "AAAATTTT": 4080, + "GCGCGA": 4081, + "GAAAGATG": 4082, + "GTCTCTCA": 4083, + "TCCATCAA": 4084, + "GCAGCTA": 4085, + "CACATTTG": 4086, + "CTGACAA": 4087, + "TCCACC": 4088, + "GCT": 4089, + "CCCACTT": 4090, + "GCAGGTA": 4091, + "GAGGCCA": 4092, + "TAAAGTCA": 4093, + "CTGGATA": 4094, + "CGGCAA": 4095 + }, + "merges": [ + "A A", + "T T", + "T G", + "C A", + "C C", + "T A", + "G G", + "T C", + "G A", + "AA A", + "G C", + "T AA", + "TT TT", + "T CA", + "TG A", + "TT A", + "G AA", + "T CC", + "C AA", + "C TG", + "C TT", + "G TG", + "G TT", + "G CA", + "GG A", + "C CA", + "G TA", + "G CC", + "C TA", + "T AAA", + "AA AA", + "C TC", + "G TC", + "TG TG", + "TA TT", + "CA CA", + "G AAA", + "TA TA", + "TC TT", + "TG TT", + "C AAA", + "GA GA", + "CA TT", + "TG AA", + "CA GG", + "TC TG", + "CA GA", + "TC AA", + "GG AA", + "TAA AA", + "C TGA", + "GC TT", + "G TGA", + "GC TG", + "C TCA", + "CC TT", + "CA TG", + "GC AA", + "G TCA", + "G TAA", + "TTTT A", + "TA TG", + "GA GG", + "C GG", + "GA TT", + "CC TG", + "TC TC", + "CC AA", + "G TTA", + "C TCC", + "C TAA", + "TA CA", + "C TTA", + "TC CA", + "GA TG", + "TT AA", + "GAA AA", + "TT TG", + "G TTTT", + "TC TA", + "GC CA", + "G TCC", + "C TTTT", + "GG GG", + "C GA", + "TT TA", + "CC CA", + "CAA AA", + "TG GG", + "TA GA", + "TA GG", + "GA CA", + "GG TT", + "CC CC", + "GG TG", + "CA TA", + "GC TA", + "TG TA", + "TC AAA", + "TG GA", + "TAA TT", + "TTA TT", + "TG CA", + "GG CA", + "GA TA", + "CC TA", + "TT CA", + "TC TCA", + "GG GA", + "C GC", + "CTG AA", + "G TAAA", + "TC TCC", + "TTTT TT", + "C GTG", + "GC AAA", + "TAA AAA", + "TC TGA", + "TCA TT", + "GG AAA", + "TG AAA", + "TCC TT", + "CC AAA", + "GAA TT", + "C TAAA", + "C GTT", + "GTG AA", + "GG CC", + "TAA TA", + "GG TA", + "TG CC", + "CA CC", + "TGA TT", + "AAAA AA", + "GC TCA", + "TCC AA", + "GA GAA", + "CTG TT", + "TA TTA", + "CA GCA", + "CTC TT", + "CTT AA", + "CA GAA", + "GC TGA", + "GTT AA", + "TC TTA", + "TA TTTT", + "GCC AA", + "CTT TG", + "GA CC", + "C GCA", + "GTA TT", + "GTC TT", + "CAA TT", + "GTG TT", + "CTC AA", + "GGA GG", + "C GAA", + "TC TTTT", + "GTC AA", + "C GCC", + "TA TAA", + "TA CC", + "TC TAA", + "CCA TT", + "C GGA", + "CAA AAA", + "CA GTG", + "TCC TG", + "CTC TG", + "GAA AAA", + "CTG TG", + "CA GC", + "TTTT AA", + "GCA TT", + "GCC TT", + "TAA TG", + "CTA TT", + "GTT TG", + "TGA TG", + "GG CTG", + "CC TCA", + "GA GGA", + "GCC TG", + "AAA TT", + "C GTA", + "TC AAAA", + "TA CAA", + "CA TCA", + "CA GTT", + "TGA GA", + "GG GAA", + "CA CTG", + "CA CAA", + "CA GGA", + "CC CCA", + "CC CTG", + "TTTT TTTT", + "TA GAA", + "GA GCA", + "CC TCC", + "CA CCA", + "TA TCA", + "GA GC", + "CA TTA", + "CACA CACA", + "GA GTG", + "GGA TT", + "TGTG TGTG", + "TA CTT", + "CA CTT", + "GTC TG", + "TGA GG", + "GA GTT", + "GAA TG", + "TCA TG", + "GA CAA", + "GA CTT", + "TATT AA", + "TAA TAA", + "GG CCA", + "CA TTTT", + "CA GCC", + "CC CTT", + "GC TAA", + "TATA TATA", + "GTG TG", + "TA CTG", + "TA GTT", + "CAA TG", + "GC TC", + "CA GTA", + "GC TCC", + "CA TAA", + "TTA TG", + "TAAA TT", + "GA TGA", + "CA TGA", + "GC GG", + "AAAA AAAA", + "CCA TG", + "GA TAA", + "GA CTG", + "TA TGA", + "GCA GG", + "GA TCA", + "G TTTTA", + "GGA TG", + "CC TGA", + "G TAAAA", + "GAA GG", + "GA TTA", + "CC TC", + "GA CCA", + "GC TTA", + "CC CAA", + "AAA TG", + "GCA TG", + "TA GTA", + "TA CCA", + "GG CTT", + "C GTC", + "TC TCTT", + "GG TCA", + "TTA TTA", + "TA CTA", + "TA GCA", + "TA TC", + "CTG GG", + "CA TC", + "C TTTTA", + "C TAAAA", + "GTG GG", + "GA GTA", + "CCA GG", + "GA TTTT", + "TA GTG", + "GAAA TT", + "CA CTA", + "TC GG", + "TCA GG", + "CAGG AA", + "GC AAAA", + "CC TTA", + "CA TCC", + "CTT GG", + "TGTG AA", + "TATT TG", + "CC TAA", + "CTA TG", + "GA GAAA", + "GAGA GAGA", + "GC TTTT", + "TA TAAA", + "CAA GG", + "TC TCTG", + "TGTT AA", + "TGTG TT", + "GA GCC", + "GA CTA", + "TA TATT", + "TAA AAAA", + "TTTT TG", + "GTA TG", + "CATT AA", + "TA GGA", + "TA GC", + "GTT GG", + "GAA GAA", + "TAAA TG", + "TC TGTT", + "CA GAAA", + "CAAA TT", + "TAA TTA", + "TC TGTG", + "TA TCC", + "TGAA TT", + "CTC CA", + "GTG AAA", + "GG CAA", + "GGA GA", + "GAA GA", + "GG TGA", + "GG GCA", + "CC AAAA", + "TCTC TCTC", + "CTG CA", + "CTT CTT", + "TCTT AA", + "CC CTA", + "TGTG TG", + "AAA TA", + "TGTT TG", + "GG GTT", + "GTG CTG", + "GG AAAA", + "GG GGA", + "TCA GA", + "CC TTTT", + "GAAA TG", + "GCA GCA", + "TC TGAA", + "GG GTG", + "CACA TT", + "TCTT TG", + "GG GC", + "TCC CA", + "TC CATT", + "CTG AAA", + "CTT TA", + "TC GA", + "GTT TA", + "CAA CAA", + "CTT CC", + "GCC TCC", + "TT AAA", + "GC TCTG", + "GTT TCA", + "GGA GGA", + "C GTGA", + "CA GTC", + "GAA TA", + "CA GAGA", + "CC CTC", + "CAAA TG", + "CTG CTG", + "GA TCC", + "TTTTA TT", + "AAAA TT", + "TTA TA", + "TCAA TT", + "GG TAA", + "GTTA TT", + "GC CAGG", + "GGA GAA", + "CATT TG", + "TCA CC", + "CTC AAA", + "GG TTA", + "TCC AAA", + "TC TATT", + "GCA GA", + "CTT CA", + "TCA TCA", + "C GAGG", + "TAA CA", + "GTT GTT", + "CTTA TT", + "C GTCA", + "TAA GA", + "TAA TTTT", + "CTG TA", + "TC CACA", + "GC TGTG", + "C GCTG", + "TC TAAA", + "GC GA", + "CAA TA", + "CCA CCA", + "GAA CA", + "C GAAA", + "CAGA TT", + "TCA CA", + "TTA TTTT", + "TC TCAA", + "TGA CA", + "CTCC AA", + "AAAA AAA", + "TATA TG", + "TCC TCC", + "TCA CTT", + "TC CAGG", + "CAA GA", + "GG CTA", + "GTG GTG", + "C GTAA", + "C GAGA", + "TGA TA", + "GGA TTA", + "CAA CA", + "C GATT", + "TGA GAA", + "CTCC TT", + "CTCA TT", + "GTT AAA", + "TCA TA", + "CC TCTG", + "CTC TA", + "GC TGAA", + "CTG GA", + "TAA GG", + "CTT AAA", + "TATT TA", + "CCA CA", + "CC GG", + "GTC AAA", + "TG GAA", + "C GGAA", + "TGA TGA", + "GTT CA", + "TAA CAA", + "GC TGTT", + "TAA GAA", + "CTG CC", + "TTAA TT", + "CCA GA", + "TCA GAA", + "GTCA TT", + "C GCTT", + "GATT AA", + "CTGA TT", + "GC CACA", + "GTAA TT", + "TC CAGA", + "GCC AAA", + "GTGA TT", + "TAAAA TT", + "CAA GAA", + "CCA CC", + "TAA TCC", + "GTT CTT", + "TC CATG", + "GC TCTT", + "TG CTG", + "GG GTA", + "TTA CA", + "GC CATT", + "GCA CA", + "GCAA TT", + "TCC CTG", + "TG TGA", + "TC GAA", + "GGA CA", + "GGAA TT", + "GTG GA", + "CTT CTG", + "TCC CC", + "GCC CC", + "CTT GA", + "TAA TGA", + "TAAA TA", + "TATA TA", + "CTG CAA", + "TCA TTA", + "GTA TA", + "TCC CCA", + "C GTTA", + "GCA GAA", + "TGA GTT", + "CTTTT TT", + "C GATG", + "CTT TCA", + "AAAA TG", + "CAGG TT", + "CTAA TT", + "C GCCA", + "TGAA AAA", + "GTT CC", + "GTCC TT", + "GTCC AA", + "GTTTT TT", + "CTC TGA", + "GC GC", + "GTT GA", + "TGAA TG", + "CTA TA", + "GCA GTG", + "CCTT AA", + "TCA CCA", + "TCA CTG", + "GCC CTG", + "TAA CTT", + "CAGA TG", + "GTA GG", + "TC TATA", + "GAGA TT", + "GTC TA", + "TTTT AAA", + "CACA TG", + "TGA CC", + "CA CAAA", + "GTG TA", + "GG GAGG", + "GCTT TG", + "CAA AAAA", + "GA GGAA", + "GTT CTG", + "TTTT TA", + "GTC TCA", + "GTT CAA", + "TC GTG", + "GCTT AA", + "GCA CC", + "CTCC TG", + "TAAA TAAA", + "CTA CA", + "CTT CCA", + "TCC TCA", + "C GCAA", + "GAA AAAA", + "GCC CA", + "TC GTT", + "GTA GA", + "CTC TCA", + "GTC CA", + "TGA CTT", + "TCC CTT", + "GC CATG", + "CACACACA CACACACA", + "GTGA TG", + "CC TCTT", + "GC CAGA", + "TCC TA", + "C GTTTT", + "GTA CA", + "GCA TA", + "GAA TTA", + "TGTGTGTG TGTGTGTG", + "CC CAGG", + "GG TTTT", + "TCAA AAA", + "TC TATG", + "CCA TA", + "TGA CAA", + "GGA TA", + "TCA GTG", + "GTA TTTT", + "GAGA TG", + "GC GTG", + "C GTCC", + "TTAA AAA", + "TAA TCA", + "CAA TTA", + "CCA CTG", + "CGG TT", + "GTT GAA", + "TGA TTA", + "CCTT TG", + "CGG TG", + "CAGG TG", + "TCAA TG", + "CTGA TG", + "TCA GGA", + "GTT TAA", + "TATT AAA", + "CTC TTA", + "GCA GGA", + "CTC TCC", + "GAA CC", + "CTT TAA", + "GG GCC", + "GTA TTA", + "GC GCC", + "CCAA TT", + "GC TAAA", + "TGA CTG", + "GATT TG", + "GA TAAA", + "TCA GCA", + "GTT CCA", + "GAAA TA", + "GA CAAA", + "GA GTC", + "GC TATT", + "TCA CAA", + "GAGG TT", + "TAA CC", + "GAA GGA", + "GC TCAA", + "GAAAA TT", + "CCA GCA", + "GTTTT AA", + "GTG CC", + "TGA GGA", + "CA TAAA", + "GG TCC", + "TCA TTTT", + "TATT TATT", + "TAA TAAA", + "GCC TA", + "CTTTT AA", + "TAA GTG", + "TAA GTA", + "CTG GAA", + "CACA CA", + "GA CAGA", + "CAA CC", + "GG GAAA", + "CCA GAA", + "TCA GTT", + "TAA CTA", + "CTAA AAA", + "TGGG TT", + "TGA GTG", + "TAAAA TG", + "TATATATA TATATATA", + "GCA CTG", + "GA CTC", + "TA CAAA", + "TAAAA AAA", + "TC TACA", + "GTT GTG", + "TC GCC", + "CC CAAA", + "GTCA TG", + "CTG CTT", + "GGAA TG", + "CTA TTA", + "GA TATT", + "TA GAAA", + "GG CAGG", + "GA TGAA", + "GTA GAA", + "TCC TGA", + "TAA CTG", + "GCTG GG", + "GCAA TG", + "GCC CCA", + "GTT TGA", + "CATT TA", + "GTG CA", + "CTT GAA", + "GTG GAA", + "CTT CAA", + "TAAA TTA", + "GTG GCA", + "TCC TTA", + "GGAA AAA", + "TTTT TTA", + "CC TGTG", + "GTAA TG", + "GTG TTA", + "CTA GG", + "CAGG CTG", + "GA CACA", + "GAAAA AAA", + "TC GC", + "GTAA AAA", + "TGTT TA", + "TCTC TA", + "GTCC TG", + "CCA GGA", + "GAA CAA", + "TAA GTT", + "TGA GCA", + "GC TCCA", + "TAA GCA", + "CTCA TG", + "GTC TTA", + "CC CACA", + "CA TATT", + "GCC TCA", + "CA CTC", + "CTT CTA", + "TGA TTTT", + "TC GCA", + "CC TGTT", + "GAA GCA", + "GCAA AAA", + "GC GGA", + "CCA CAA", + "GC GCA", + "CA TATA", + "GA CATT", + "GTT CTA", + "CAAAA TT", + "GAAA GAAA", + "CC CGG", + "TA CACA", + "CCAA AAA", + "GAGG TG", + "GG CTCA", + "CA GTGA", + "TCC CAA", + "TA TCTT", + "TGA GTA", + "TC GTA", + "TTTT CTT", + "GTG GGA", + "GA GCTG", + "CC CTCC", + "TAGG TT", + "TTA GG", + "TAA TATT", + "CCA GCC", + "CA TCTT", + "GTC TGA", + "GTT TCC", + "CC TGAA", + "GGA GCA", + "GAAAA TG", + "TCA GTA", + "TAA CCA", + "GA TGTT", + "CTG TTA", + "CA TGTT", + "GG CGG", + "CA TGTG", + "GG GAGA", + "CTT TGA", + "TCTT TCTT", + "AAAAAA AAA", + "GGGG TG", + "CTT TCC", + "CTT GTT", + "GCA TTA", + "CC CAGA", + "CAAA TA", + "TC GGA", + "CA GCTT", + "TCA CTA", + "TAA TTAA", + "TAA GGA", + "GAA CTG", + "GCA CAA", + "GC GTT", + "GG CTC", + "TC TTTTA", + "CC TCCA", + "GG CAAA", + "CA GCTG", + "CTA CAA", + "TA CATT", + "GC TATG", + "CTT GTG", + "GA GTCA", + "GTTA TG", + "CTG CCA", + "GTC TCC", + "TGA CCA", + "CA CCTG", + "TATA TTA", + "TGA TCA", + "CA GCAA", + "GA TGTG", + "GTC TTTT", + "CTA GAA", + "GC TACA", + "CTG GGA", + "GGGG TT", + "CAA GTA", + "CAA GGA", + "CC CTCA", + "TA GCC", + "GTT GGA", + "GC TATA", + "TCTG AAA", + "TA TGTT", + "CC CCTT", + "GTT GTA", + "CC CTGA", + "TGA CTA", + "CAA GCA", + "CAA TAA", + "GAA CTT", + "CA TGAA", + "CTTA TG", + "CTAA TG", + "TC TAAAA", + "CCAA TG", + "GAA GTG", + "CC TCAA", + "CC CATT", + "CA GTCA", + "GAGAGAGA GAGAGAGA", + "TA TGTG", + "GCA GTGA", + "TCTCC TT", + "TCC CAAA", + "CCA TTA", + "CCA GTG", + "GCA TCA", + "TCAAA TT", + "GA TCTT", + "GA CAGG", + "GGA GTG", + "GTA GTA", + "CAA CTT", + "GAA GTT", + "CC CCTG", + "TCTC AAA", + "GG GTC", + "GA GCTT", + "TATG AAA", + "TA TGAA", + "GA CATG", + "CAA GTG", + "GA TATA", + "CA TCTG", + "CTG TGA", + "TAA TTTA", + "GG CAGA", + "GC GAA", + "CC TAAA", + "CCA TCA", + "CA CTGA", + "GGA CTA", + "GA CGG", + "CTC TTTT", + "CTG TCA", + "TCTCTCTC TCTCTCTC", + "TTAA TG", + "GCA GCC", + "CAAAA AAA", + "GCA CCA", + "CTA TTTT", + "GA GCAA", + "CTT GGA", + "CTG GTG", + "GAA TAA", + "TCC TTTT", + "GAA GTA", + "CA GTAA", + "CAA CCA", + "CTG TAA", + "TGA TAA", + "GCA GTT", + "CA CGG", + "TAAA TAA", + "CTG TTTT", + "CTA CTA", + "GC TCTA", + "C GAAAA", + "CAA GTT", + "CTT GTA", + "GAA TGA", + "GA GTGA", + "GCC TGA", + "GG TTTG", + "CC CATG", + "GG GGAA", + "GAA GAAA", + "TG TTA", + "CAA TTTT", + "TATA TTTT", + "CTC AAAA", + "GG TGGG", + "CC GTG", + "TATT TCA", + "CC CCAA", + "TATT TAA", + "GG CTGA", + "GG TGTG", + "CA TCAA", + "CA CTCA", + "TCTCA TT", + "GAA TTTT", + "GAA TCA", + "CAGG AAA", + "CA TACA", + "TA TTTTA", + "TTA TAA", + "GAGG AAA", + "CA TATG", + "CTT TCTT", + "CAA CTG", + "GG GCTG", + "CC CCCA", + "TTTG AAA", + "CATT AAA", + "CTT AAAA", + "GA CTGA", + "CAA TGA", + "GG CACA", + "CCA GTA", + "GGA TGA", + "GTTTT TG", + "GCA TTTT", + "GTG CCA", + "GCA GTA", + "GCC CTT", + "TC GTC", + "GAA CTA", + "GTG GTT", + "GTG TGA", + "GTG CTT", + "C GCTA", + "GTG TCA", + "TCTT TA", + "GCC TTA", + "CC TATT", + "CAAAA TG", + "GAA CCA", + "CTC CAGG", + "GA CTCA", + "CATG AAA", + "GC TAGG", + "TGTT AAA", + "GC GTA", + "GCA CTT", + "TCTT AAA", + "TAA GAAA", + "GG CCTG", + "TCC CTA", + "GTG GTA", + "CTG CTA", + "GGA GTT", + "GG TAAA", + "CAAA CAAA", + "GA TATG", + "TCA TGA", + "GA CCTT", + "TAA TATA", + "GC TAGA", + "GGA CTG", + "GG CATT", + "CA GTTA", + "CC CTAA", + "CA CCTT", + "GG TGAA", + "CA GCTA", + "GTG TTTT", + "CAA CTA", + "GA TCAA", + "GA GAAAA", + "TGTG AAA", + "AAAA TA", + "GATG AAA", + "CTC TAA", + "TTA CTT", + "GA TCTG", + "CCA CTT", + "GA GTTA", + "CAA TCA", + "GGATTA CAGG", + "TTTA TTTT", + "TACA TA", + "TTTTA TG", + "GA GTAA", + "GCTG AAA", + "GTA CTG", + "GC TCTC", + "TATG TA", + "TGTG TA", + "TCA TAA", + "GGA CTT", + "TCTCC AA", + "GCA TGA", + "GA CGA", + "CGCC TG", + "GA CCTG", + "GG TCTT", + "CA CCAA", + "GA TC", + "GA CCAA", + "AAAA TTA", + "GTAAA TT", + "CCA GTT", + "CA GAAAA", + "TAA CAAA", + "GG TGTT", + "GAAA TTA", + "TGCC TCA", + "CC GCC", + "CCA TTTT", + "CTT GCC", + "TCTG TA", + "CTG GCA", + "GG GATG", + "CCA TGA", + "CTA CTT", + "TAGG TG", + "TAAAAA TT", + "GAAA GAA", + "TAAAA TA", + "CTTTT TG", + "GTC AAAA", + "GGA CAA", + "TCTGA TT", + "CTC TCTT", + "TAA TTTG", + "CTC TTTG", + "GG CCTT", + "GGA TTTT", + "CTA CTG", + "GTT GCA", + "GG CTCC", + "CTC TGTG", + "CTC CAGCC", + "TTA CAA", + "GGA CCA", + "GGAA GGAA", + "TAAA GAA", + "TTA GAA", + "GTG AAAA", + "CTT GCA", + "TGGG TG", + "GGA GCC", + "CC TCTA", + "C T", + "GG GCTT", + "GG CATG", + "CTG GTT", + "TA CAGA", + "GATT AAA", + "CTC TGTT", + "TTA TCA", + "CTG AAAA", + "GTA GTT", + "GG GTCA", + "G T", + "CA GCCA", + "GC GTC", + "CA CTTA", + "GTG CTA", + "TC TTATT", + "GTA CTT", + "GG TATT", + "TA GAGA", + "TA CATG", + "CCA CTA", + "TGA GAAA", + "CAA TAAA", + "TCC AAAA", + "CGTG AA", + "GG TCTG", + "CTGAA TT", + "TCA GCC", + "CC TCTC", + "GTT AAAA", + "GG GATT", + "TCC TAA", + "CA CTAA", + "GGA GAAA", + "CCTT CCTT", + "GTT TCTT", + "TA TCAA", + "GA TACA", + "TAATCC CAGCA", + "CC GCA", + "TGAAA TT", + "C GTAAA", + "CTC TCTG", + "TC TTTTTT", + "GTA CAA", + "CCAAA TT", + "TGTA TTTT", + "TC GCTT", + "GG GTGA", + "GA TAGA", + "CTT TATT", + "TAAA CAA", + "GTT TATT", + "TGAA TA", + "CTA CCA", + "GTG TCC", + "CC CGA", + "TTTA TTA", + "CTCC AAA", + "TTTTTTTT TTTT", + "TCA TCC", + "GAA GCC", + "CTAAA TT", + "CAAA TTA", + "CCCC AAA", + "TCTT CTT", + "TAGG AAA", + "CA CGA", + "CA TTTTA", + "GTG CAA", + "TCTCC TG", + "TATTTT AA", + "GTT TGTT", + "GA GCCA", + "GG CCAA", + "CATT TCA", + "CA TCCA", + "CC TATA", + "GA CTTA", + "TCAAA TG", + "GTA TCA", + "TAAA TTTT", + "CTGA GGCA", + "GCC CAA", + "GG TTAA", + "TA TCTG", + "TGA CAGA", + "GGA GAGA", + "GCTG CTG", + "CC CTTA", + "TCC TCTG", + "GTA GCA", + "CCTG AAA", + "CC GAA", + "TTTT TAA", + "CTA TAA", + "CCTG TA", + "TTA CTG", + "GTA TAA", + "GG CGA", + "GA CTAA", + "TCA GAAA", + "GTG TGTG", + "CAAA GAA", + "CC TATG", + "GCA GAGA", + "CC GTT", + "TTTTA TTTT", + "GGAA GAA", + "TTA CTA", + "GCC TGGG", + "TCC CTC", + "TCC TCTT", + "GGA TCA", + "GG TCAA", + "TC GAGA", + "TATT CTT", + "TA CTC", + "GTTAA TT", + "GC GAGA", + "CTTAA TT", + "TCC TTTG", + "GTC TAA", + "CA CCCA", + "GG GTTA", + "GG GCAA", + "GGAAA TG", + "GCAAA TT", + "TA GATG", + "GCA GAAA", + "AAAAAAAA AAAAAAAA", + "CC TACA", + "GGA GTA", + "TC TAATT", + "CAA CAAA", + "TA GATT", + "GG TTTA", + "CC TAGA", + "CTT TAAA", + "TA CTTA", + "TAA TGAA", + "CTA TCA", + "TA GTAA", + "CAGA GAA", + "CAA GAAA", + "GGGG AAA", + "CGTT AA", + "CGTG TT", + "TCTG TCTG", + "TTTTAA TT", + "CTG GCC", + "TAAA TGA", + "C GTCAA", + "TTA GTA", + "GTC TCTG", + "TTTT AAAA", + "CA GTTTT", + "CTT CCTT", + "TATA TAA", + "GC TTTTA", + "TTTT TCA", + "GG TC", + "TTA TTAA", + "TTTT GTT", + "CA TAGA", + "TA GGAA", + "GAGA GAA", + "GTA GCTG", + "TTA TGA", + "GTA GTG", + "GGA GAGG", + "CTC TGAA", + "TA GTC", + "GA CTCC", + "TCC CTCC", + "TAA TGTT", + "CA TCTA", + "GCCA CCA", + "GTA CTA", + "TGGG AAA", + "CGCC TT", + "GCC CGG", + "GGA GGAA", + "GTA CCA", + "CGC AAA", + "CA TAAAA", + "TAA CATT", + "GC TAAAA", + "TCTT CTG", + "GCC AAAA", + "GTA TGA", + "GTC TTTG", + "TA CTGA", + "TCC CAGG", + "TTA TTTA", + "TTA GTT", + "GGA CC", + "TA TAAAA", + "CAAA CAA", + "CTT CTC", + "TCTA TCTA", + "GAAA TAA", + "GTG TAA", + "CTT TGTT", + "GA TAAAA", + "GCC CAGG", + "GC GATT", + "AAAAAA TT", + "TA CAGG", + "GG CTAA", + "TA GCTT", + "GTC TCTA", + "CTCC TGA", + "GAA TAAA", + "TTA CCA", + "GG GACA", + "GCCA CTG", + "GTT TAAA", + "GTC TGTG", + "TGA CAAA", + "TACA TTTT", + "GCCA CC", + "TG TTTT", + "TA GCAA", + "TTA TAAA", + "GA CCCA", + "GCA GC", + "CAGA CAGA", + "CA CAAAA", + "GCC CTA", + "TATT AAAA", + "C GTATT", + "CCA TCC", + "TC GATT", + "GAA GGAA", + "GA TCCA", + "TATT TGA", + "GTGAA TT", + "TA CCTT", + "C GTCTT", + "CC TAGG", + "TC GAAA", + "CTT TCTG", + "TGAA GAA", + "TCTC TCA", + "GTC TCTT", + "GGA GGGG", + "GTC TGTT", + "CTA TGA", + "GGAAA TT", + "GCA CACA", + "GCC TTTT", + "CA GTCC", + "CTG GTA", + "GCA TCC", + "TA GTTA", + "GG CTTA", + "GA GTCC", + "TG AAAA", + "TAGA TAGA", + "TGTT TGTT", + "TA CTCA", + "CATT TAA", + "GA TTTTA", + "CA CTCC", + "GAAA CAA", + "GC GCTG", + "TCTT TCA", + "CTG TCC", + "GAA CTCA", + "CGG AAA", + "TATT GTT", + "GCA CTA", + "TATT CAA", + "GC GGGG", + "GTG GCC", + "TAATT AAA", + "TA CTAA", + "GC GGTG", + "TA CCAA", + "GG TATA", + "CTA GTT", + "GCA GAGG", + "CTTTT TTTT", + "TTTTTTTT TTTTTTTT", + "TACA GTA", + "CCA TGTT", + "TA GTGA", + "CGTG TG", + "GC TCTGA", + "CTT CCTG", + "TC GCTG", + "TAAA TCA", + "TCCAA TT", + "GTT TCTG", + "GAA GAGA", + "GG GTAA", + "CCA TAA", + "TTA TATT", + "C GAATT", + "CC GGA", + "TGA GCC", + "CC GTA", + "CAGA GGA", + "GTG TTTG", + "GA CAAAA", + "TTTTTT AAA", + "GTT GCC", + "GA GTTTT", + "TC AAAAAA", + "TGTT TCA", + "TA TCTA", + "TCTC TCC", + "CTC CACA", + "TAAA TATT", + "TTTT CTG", + "CTC TCAA", + "CCTT AAA", + "TCTTTT AA", + "GAA CAAA", + "TTA GCA", + "GCTCA TG", + "TAAA GTA", + "GGA TAA", + "TTATT AAA", + "CTC CATT", + "TCTC TGA", + "TTA TTTG", + "CCTG TAA", + "TTA TATA", + "GA CTTTT", + "TGTT GTT", + "GCAAA TG", + "CTT CAAA", + "GAA TATT", + "GAA TCC", + "CTC TTAA", + "GCA TAA", + "GAA TGAA", + "CTTAA AAA", + "TAAAAA TG", + "TTTTAA AAA", + "CTC TGGG", + "TGA TCC", + "GC TCTCA", + "CTC CAGA", + "GAGTG CAGTG", + "CAA TATT", + "TA GAAAA", + "GTAAA TG", + "TA GCTG", + "GC TCAAA", + "GCA GGAA", + "TA CCTG", + "GG GAAAA", + "TTTT CTA", + "GGGG GGGG", + "CC GA", + "CTT TGAA", + "GGA GGTG", + "TA GTCA", + "GG CCCA", + "TGA TGTT", + "CAAA TAA", + "TCTT CCA", + "GC GCTT", + "GTA TTTG", + "GTC TC", + "GAAA TCA", + "TGA TAAA", + "CATT CTT", + "TA TCCA", + "GCC TCTG", + "TGA GATG", + "C GCCAA", + "GTTTTA TT", + "TATA TATT", + "GTA GGA", + "GACA GAA", + "CTCCAGCC TGGG", + "GC GTGA", + "GG TATG", + "GAGG GAGG", + "TCA TTTG", + "CTA CC", + "TACA GAA", + "GG TAGA", + "GA TCTA", + "GTC CATG", + "TGA GGAA", + "TAA TAAAA", + "TAAA CTT", + "TCA CATT", + "GGA GGCC", + "TCA CAAA", + "CA CTTTT", + "CGG CC", + "CAA CAGA", + "GTA GAGA", + "GTTA TTTT", + "CGTT TG", + "TC GTCA", + "TCTG CTG", + "CAA CACA", + "GG TAGG", + "GCA GCTG", + "TAGTA GAGA", + "CAA GCC", + "GCA TTTG", + "TAA TATG", + "GCTT AAA", + "GCTT CTG", + "CTC TCCA", + "TCA TCTT", + "C GTCTG", + "TCA TTTA", + "CA TAGG", + "GC TCCTT", + "TGTT CTT", + "TACA TTA", + "CACA GAA", + "TAAA TATA", + "TA GAGG", + "GA TAGG", + "TCC TGAA", + "GGA GCTG", + "TGA TATT", + "TCA TTAA", + "CTTTT AAA", + "TC GTTA", + "TAAA CTA", + "GTT TGAA", + "TAAAA TTA", + "CA CCCC", + "TCA GAGA", + "CTCC TGCCTCA", + "TGA CATT", + "GTA TTTA", + "CTT CATT", + "GAAA CTG", + "TAA CACA", + "GTT CAAA", + "GGA GATG", + "TC GGCC", + "CAGCA TT", + "TC GATG", + "TATT CTA", + "CTG TGAA", + "TATT GAA", + "TTTT CCA", + "TATT TCTT", + "GGTG AAA", + "CTGA GAA", + "GCA CAGA", + "GC GAGG", + "CTG TGTG", + "TGAAA TG", + "TGA TGAA", + "GTCC AAA", + "CTCAA TT", + "TCCA GAA", + "GTA TATA", + "TAAA GTT", + "TCTC AAAA", + "TCCA TCA", + "GTC TGAA", + "TGA GAGA", + "TGA TTTG", + "TTA GCC", + "CTC CATG", + "TCC CTGA", + "GA GCTA", + "CCCC CCCC", + "GTG GAAA", + "CTG GGAA", + "CAA TGAA", + "CCA CACA", + "CTT TCAA", + "C GGAGG", + "TC GTGA", + "CCA GAAA", + "GTTTT AAA", + "TGTT GAA", + "TCC TGTG", + "CTAAA TG", + "TCC TTTA", + "GTC TGGG", + "TCTC TTTT", + "TA CGG", + "TATT GTA", + "TTA GTG", + "TTA CC", + "TAATCCCAGCA CTTTG", + "TCTG GAA", + "CTT CTCA", + "CGCA TT", + "TATT TAAA", + "TCA CACA", + "TAA TCAA", + "GC GAAA", + "GG GCCA", + "GTT CATT", + "GAGAA AAA", + "TTTT GTA", + "TA CTTTT", + "TC GAGG", + "GTGAA AAA", + "CAA TATA", + "TCC CATG", + "CAA TTAA", + "CTG GAAA", + "CCCA GCA", + "TCC CATT", + "TCC TGTT", + "CTC TTTA", + "TCC CCTT", + "GTT TCAA", + "GTC CAGG", + "GGAA GGA", + "TA GTTTT", + "TGA CCTT", + "GTGCTG GGATTACAGG", + "TATT TATA", + "TCTG CAA", + "CTGAA AAA", + "TATG TTA", + "CTT CACA", + "GCA CAGG", + "CCTG CTG", + "TTTT TTAA", + "GTTA TTA", + "CC CTTTT", + "TGA TTTA", + "TA CAAAA", + "TAA GTAA", + "TTTT TAAA", + "CA TCTC", + "GTG GTGA", + "GTG GAGA", + "CTC TGCA", + "GTTAA AAA", + "TACA TACA", + "CTT TGTG", + "GGA CACA", + "TCTGA TG", + "TA TTATT", + "TCTT CTA", + "CTG TGTT", + "TCA GCTT", + "CTT TATA", + "GG CGC", + "TCC CTCA", + "GTA CC", + "TGGA GAA", + "CAAAAA TT", + "TCTT TAA", + "CTC TCTC", + "TGA GTGA", + "GCA GCTT", + "CGGA TT", + "TA CGA", + "TCTT GTT", + "TC GTAA", + "GCC TGTG", + "TATT CTG", + "GG GATA", + "GG GTCC", + "TGA GATT", + "CTTTTA TT", + "TCC CACA", + "CATG GTG", + "TTA GGA", + "GAA CACA", + "TCA TAAA", + "CAA CATT", + "GG TCCA", + "GAA TTTG", + "TATTAA TT", + "TCC TGGG", + "GCA GCAA", + "CTC TTCA", + "GAA GAGG", + "TCTG TCA", + "CTGAA TG", + "CCA CAAA", + "GTG GAGG", + "TGA TTAA", + "CTCC CTCC", + "CACACACACACACACA CACACACACACACACA", + "GC GATG", + "CATT CTG", + "GTA GAAA", + "TCA TCAA", + "TTTT CAA", + "TATG TATG", + "CCAAA TG", + "TAA TTTTA", + "TAA GGAA", + "CTT GAAA", + "AAAAAAAA AAAA", + "GC TCCTG", + "GCA GATG", + "GAAAAA TT", + "GA CGC", + "GTG GGGG", + "GTCAA TT", + "CTT GCTT", + "TGA CACA", + "GTG TGTT", + "CCA GAGA", + "CCCA GCC", + "TAAA GAAA", + "GTC CATT", + "TAAA TTAA", + "CC CAAAA", + "GAA TTAA", + "TGAA TTA", + "TTTT TTTG", + "CCA GCTT", + "CAA TTTG", + "CTG TTTG", + "GTC TCAA", + "GTT TGTG", + "GG CATA", + "GG TACA", + "TGA TGTG", + "GATT TCA", + "TCTG CTT", + "GTAA TTA", + "TAA AAAAAA", + "GCC GCC", + "TGTGTGTGTGTGTGTG TGTGTGTGTGTGTGTG", + "GC GTCA", + "GC TCATT", + "GAA CCTG", + "TAAA CAAA", + "GTG CTGA", + "TCA GGAA", + "TCC TCAA", + "TCTA TTTT", + "TCTG TTTT", + "CAGA GCA", + "CCA GGAA", + "GTC TTTA", + "TCTT CAA", + "TCAAAA TT", + "GC TTATT", + "GTT CCTT", + "CA CCTA", + "TCA CTGA", + "GAA GCAA", + "TAAA GA", + "TCC TTCA", + "TCTCA TG", + "TCA GTGA", + "TACA CAA", + "CA CGTG", + "CC TAAAA", + "GCC TTTG", + "GG CTTTT", + "GTT GAAA", + "GTT CTC", + "CTA GA", + "CTA CAAA", + "GCA CAAA", + "TTA CATT", + "GG CCCC", + "TAA TGTG", + "CTG CCTT", + "TCC CAGA", + "GTGAA TG", + "GGA CAGG", + "GGA TGTG", + "GTT TATA", + "TGA CCAA", + "GTG GCTG", + "GTT CTCA", + "CTTA TTTT", + "CTG GAGA", + "TTA CAAA", + "GTC TTCA", + "CAA GAGA", + "CCA TTTG", + "TCA CAGA", + "CTA GTA", + "CA TTATT", + "TTA GA", + "GC TCTCC", + "GC GCCA", + "TATG TTTT", + "TCC TCCA", + "CAGAA AAA", + "GTG GGAA", + "TAA TCTT", + "TGA GTCA", + "CTG CTC", + "GTC TCCA", + "TCA TGTT", + "GTT TCCA", + "TAA GCAA", + "CTAA AAATA", + "TGA CTGA", + "TC GGTT", + "TTA GAAA", + "TAA GCC", + "TAAA GCA", + "CC TCTCC", + "CC TCCTT", + "TCA GATT", + "TATG AAAA", + "GCTGA TG", + "CATA TTTT", + "GC TCCAA", + "CGG CGG", + "CCA CTGA", + "CA GCAAA", + "CTG TCTT", + "CTA GCA", + "TC GGGG", + "CACA GCA", + "GC TGATT", + "CTA GGA", + "TAA CTC", + "TCA TATT", + "CCTT CTT", + "CTG CAAA", + "CC CGC", + "GG TCTA", + "CCCA GGA", + "GTG TCTG", + "TAATAA TAATAA", + "TCA CATG", + "CAA TTTA", + "TATATATATATATATA TATATATATATATATA", + "CCA CAGA", + "TCAA TTTT", + "GTA TTAA", + "GAA CATT", + "TCTC TTA", + "CTA TTTG", + "TCTT TCC", + "GGTT AAA", + "GC TAATT", + "CTG CTGA", + "TA CCTA", + "CAGG GTT", + "TC GCCA", + "CAAAAA TTA", + "CTT CTGA", + "GCA TGTG", + "CTA TTAA", + "GCA CATG", + "CAA CATG", + "TCA TGAA", + "GAA TGTT", + "GG GTTTT", + "CTG CCTG", + "GTC CACA", + "TAAA CA", + "CTC TGGA", + "GA CCCC", + "GG CAAAA", + "TCTG TTA", + "CTA GTG", + "CTA TATA", + "TCA GTCA", + "TAA CTAA", + "GAA GATG", + "GTC TTAA", + "CAA GGAA", + "GTAA AAAA", + "TCC CCTG", + "TC GCAA", + "TCTG CCTG", + "CC TTTTA", + "GTCC CAGCTA", + "TATA TATG", + "TATT GTG", + "TGTG TTTT", + "GC GCAA", + "CACA GTG", + "TAA GATT", + "CTC TGTA", + "GGAGG CTGA", + "GGA CAAA", + "TATTAA AAA", + "TC GTCC", + "TC GGAA", + "CTA TAAA", + "CTT CAGA", + "CTA GAAA", + "CATT CAA", + "CA CGCA", + "CAGGA TT", + "CCA TCTT", + "GTA GCC", + "GAA TTTA", + "CA CGC", + "CAA TCC", + "TGA GCAA", + "GAA GCTG", + "TCAA TTA", + "GAA GTCA", + "CTG CACA", + "CCA CGG", + "GGA TCTT", + "CTCCTGCCTCA GCCTCC", + "TAAA TGAA", + "CC GTC", + "TC GGTG", + "TTTTA TTA", + "GCA GGGG", + "GCA GGTG", + "TCTA TTA", + "TAA CTTA", + "CTAA TTTT", + "CC CGCC", + "TAA TACA", + "GGATT AAA", + "TCTC TCTG", + "GCTT CTT", + "CATT TATT", + "CCA GAGG", + "GGA CAGA", + "GCCAA TT", + "TCC CCAA", + "GTT GATT", + "GAA GAAAA", + "GCA TTTA", + "CTC TAAA", + "CACACACA CACA", + "CC TCAAA", + "TA TAATT", + "CAA TGTT", + "GCC CAGA", + "GTA TATT", + "CTAA AAAA", + "CCA CAGG", + "TAA GAGA", + "TCC TTAA", + "TA TTTTTT", + "GAA TATA", + "GGA TTTG", + "GTG TGAA", + "CTG GCTT", + "GC GGCA", + "TCC GCC", + "GCA TCTT", + "TC TAATA", + "CTG CATT", + "CTC TGCC", + "TCA CTCA", + "TCA GCAA", + "TATTA TG", + "CCA GCTG", + "GA TCTC", + "GCC TCTT", + "CTT CCAA", + "TCC TAAA", + "TCA TCTG", + "CTA TTTA", + "CTG CAGG", + "CAA GCAA", + "GC GGAA", + "GAAA TAAA", + "TAAAA TAA", + "TCA CCTT", + "CCA TGTG", + "GA CCTA", + "CAGA TGA", + "GTG GCTT", + "TTATTA TTATTA", + "TCC CGG", + "TATT TGTT", + "CTG TAAA", + "TCCA TCCA", + "CTG TATA", + "GTT TCTA", + "GTT GCTT", + "CCA TGAA", + "GC TCTTA", + "CTT CATG", + "GTT CCTG", + "GCTG GGA", + "TCA GAGG", + "CATT AAAA", + "TCA GTAA", + "GAA TGTG", + "CTTA TTA", + "GCA CTGA", + "TGA GGTT", + "CA TCAAA", + "CTT CTCC", + "GTT TATG", + "CTT TCCA", + "GTG CCTG", + "GAAA GGA", + "GCA TCTG", + "TA CCCA", + "TAA CAGA", + "AAAAAAAA AAA", + "CTA TGAA", + "CA GTAAA", + "TA GCTA", + "TC GTTTT", + "GTG TCTT", + "GA GCAAA", + "TC TAAAAA", + "GTT CACA", + "GAAA TGA", + "CAAA TGA", + "GCC CTGA", + "GTG TTTA", + "TCA TGTG", + "CATA TTA", + "TCAAAA AAA", + "TAA GTTA", + "TCTC TCTT", + "CCA GTGA", + "CC TCTGA", + "CAA GATG", + "GCC TGTT", + "GTT TGGG", + "CATT CATT", + "GCC CCTG", + "GTT CTGA", + "GC GGCC", + "GC GGTT", + "CAAAA CAAAA", + "TACA TATA", + "GAATT AAA", + "TCAA GAA", + "CTG TATT", + "TTTT TATT", + "GA TTATT", + "TCTAA TG", + "GTT GCTG", + "TGAA TGAA", + "TCA GCTG", + "CTT GATT", + "CAGAA TG", + "CTAA TTA", + "TATAA TG", + "GTTTT GTTTT", + "CCA GCCTG", + "TGA TGGA", + "GCA GATT", + "CTC TATT", + "GCA GTCA", + "TAA GTGA", + "CTA CACA", + "CGCA TG", + "TA GCCA", + "GTG GCTCA", + "CAAA TAAA", + "GTG CTCA", + "TTTT TTTTTT", + "TAA CATG", + "TCCCA GCTA", + "CAAA GTA", + "TCA TATA", + "CAGCA TG", + "TGA TCTT", + "CA TAATT", + "TGTG TTA", + "TTTT GAA", + "TTAA TTA", + "GATA TTA", + "TCA TTCA", + "TGA TATA", + "TGA CTCA", + "GA CGTT", + "TGA CATG", + "GTT GTGA", + "CA TTTTTT", + "GCC TGGA", + "CTA TGTT", + "CTT TGGG", + "GTC TCAAA", + "CTG GCTG", + "CCA CATG", + "GG CGTG", + "CTTAA TG", + "TAA GATG", + "GTA TAAA", + "TGTA TTA", + "TAA CTCA", + "GAGAGAGAGAGAGAGA GAGAGAGAGAGAGAGA", + "GCA TGAA", + "GTTAA TG", + "TCCA GGA", + "GAGA GAAA", + "TCTC TGTG", + "CTC TCTA", + "CCA CCTG", + "GCCA GGA", + "CTG GAGG", + "CCA TTTA", + "GTC TGGA", + "GCC CACA", + "TAGA GAA", + "CAA CTCA", + "GGCA GGA", + "TCTTA TG", + "CAAA GGA", + "GG TAAAA", + "GAGA GGA", + "GTC CAGA", + "GCC CTCA", + "GATA TTTT", + "CAGG GAA", + "CCA CATT", + "GA GGAGG", + "GAAA CTT", + "CA GAATT", + "TCA GATG", + "TATT TCC", + "TACA GTG", + "TGA GCTG", + "CCA TCTG", + "GAGAA TG", + "TCAA CAA", + "A TT", + "TAA CTGA", + "TGA GAGG", + "CA CTGAA", + "CCA CCTT", + "CTG CAGA", + "TCA CCAA", + "TGA GCTT", + "CAAA GCA", + "GG TTTTA", + "CGG GGTT", + "TCCAA AAA", + "TATG TATA", + "CCA GATG", + "TCCA TTTT", + "CTG CTCA", + "GA TAATT", + "CCA CCAA", + "CTCC TCC", + "GA GAATT", + "GAAA GTA", + "TAAAA TAAAA", + "CTT CTTA", + "CTG TTTA", + "GAA TCAA", + "GCA TGTT", + "GCA CGG", + "GA CTGAA", + "GTG CACA", + "GA CGTG", + "TATA CAA", + "TC GACA", + "GAA GACA", + "TAAA GGA", + "GA TCAAA", + "CAGTG TG", + "CTA GCC", + "GAGG AAAA", + "TCTG AAAA", + "GAA CCCA", + "GATG GATG", + "GTT CTTA", + "CTA TATT", + "GCA TTAA", + "TCTCTCTCTCTCTCTC TCTCTCTCTCTCTCTC", + "TCA GTC", + "TATTTT TG", + "GAGGA TT", + "GTA TGTG", + "TAA CCAA", + "GTT GTTTT", + "TTTT TCTT", + "GTG TTAA", + "CTT GGAA", + "AAAAAA TG", + "CAA TGTG", + "GTG CCTT", + "GCC TCAA", + "GA GTCTT", + "GCTAA TTTT", + "CGAA AAA", + "GTG TATA", + "GC GTTA", + "CTGCA CTCCAGCCTGGG", + "GTT CATG", + "CAAA GAAA", + "GCA GTAA", + "GGA TGAA", + "CTT TATG", + "CAGG AAAA", + "TCC TGCA", + "CTG TCTG", + "GAA CATG", + "GGA TGGA", + "GCC TGAA", + "CAAAAA TG", + "TCCAA TG", + "CCA GCAA", + "GG CCTA", + "CAA CTGA", + "GCA CCTG", + "GTC TATT", + "CC TCTCA", + "GTG GTCA", + "GTG TAAA", + "GTA CACA", + "GTAAAA TT", + "GTA CATT", + "TATA TAAA", + "CTG TTAA", + "TAA GTCA", + "GCC TCCA", + "AAATT AAA", + "GTG CAGG", + "TCC TGGA", + "GTG CAAA", + "GC GTCC", + "CCA TTAA", + "GGA GGGA", + "TCA CTTA", + "TCATT AAA", + "CAA CATA", + "TAA TAGA", + "TAA TGTA", + "GA TTTTTT", + "GTT GTCA", + "GGA GACA", + "GTG TGGG", + "TCA CAGG", + "TC GGCA", + "CTCC CTG", + "GA CCAAA", + "TGTT TATT", + "CGAA TG", + "CTCAA TG", + "TCA CCTG", + "CA GTGTT", + "TGA GACA", + "TA GGGG", + "GAAAAA TG", + "GTT GAGA", + "TC GATA", + "CTC GGGAGG", + "GTT GTC", + "CCA GTCA", + "GCC CAGGCTG", + "GAA CAGA", + "GGCTCA CTGCAA", + "GCA GACA", + "TGA GGTG", + "CA CGTT", + "TAA GAAAA", + "CCA GGCA", + "GTA TCTT", + "CTTGG GAGG", + "CTT TCTA", + "CC GCTG", + "GA GCTCA", + "GAGA CAGA", + "CTT CAGG", + "GCA CATT", + "GTA CAAA", + "CTT GTAA", + "GTG GGTG", + "GAA GTGA", + "GG TCTC", + "GTA TGTT", + "GCA CTCA", + "TTA TGTT", + "CAA GTCA", + "CAA GTGA", + "GAAA CTA", + "TAAA TAAAA", + "TCTT AAAA", + "GTT GGAA", + "GTT CTAA", + "CCA CTC", + "CA GTGAA", + "GAAA GG", + "GCA CGA", + "TAA CTTTT", + "GTT GTTA", + "TCA GTTA", + "CGGA TG", + "TATT TGAA", + "CC CTGAA", + "GCC CTC", + "CTT CTAA", + "TTTG TTTT", + "GA GCTGA", + "CTG TGGG", + "CAA GATT", + "GAA GCTT", + "TGA GTAA", + "CTT GCTG", + "GGA TGGG", + "CGTA TG", + "TCCA TTA", + "GTC TGCA", + "GCCA TTTT", + "GTT GTAA", + "CACA CAA", + "GGACTA CAGG", + "C GTTTTA", + "TCTT CC", + "TAA CCTT", + "CTT TAAAA", + "TGAA TTTT", + "CTA CAGA", + "GCAA GAA", + "TAA CAAAA", + "CAATT AAA", + "CCA CTCA", + "CATG GTGAAA", + "CCCA GAA", + "CTA CATT", + "CC GAGG", + "TCCA GTG", + "TGA GTTA", + "GGA GTCA", + "TAA CGA", + "GA GTAAA", + "GA CTCTG", + "GGA GCTT", + "TA CTCC", + "CTG CATG", + "GC TTTTTT", + "GTC TAAA", + "GTG CGG", + "CA TCTCA", + "TGA TCAA", + "GGA GATT", + "GC AAAAAA", + "CA CCAAA", + "TGA CGG", + "CAGA GG", + "GTT GATG", + "CTT GTCA", + "TCCA CCTG", + "GGA GCAA", + "CAA GTAA", + "CCA TAAA", + "GTG CATG", + "GCA TATT", + "GTA GATT", + "GCC TAA", + "CTCAA AAA", + "GGA GAAAA", + "CTA TCC", + "TAATA TTA", + "GTG CTC", + "CAA TATG", + "TGTG GAA", + "TGA CTC", + "GTG TATG", + "TTTTAA TG", + "GC TCTAA", + "CACAA TG", + "CA GCTCA", + "GTT GGTT", + "CTAAAA TT", + "GTC TATG", + "TGTG AAAA", + "CTG GGTT", + "CCCC TCC", + "CC CTCTT", + "GCA GGGA", + "GAAA CCA", + "CATT TCC", + "GCA GCCA", + "TCA TATG", + "GCA GGCA", + "C GTAAAA", + "TGA CCTG", + "CAGA GGTT", + "CTT GTGA", + "TTA TCTT", + "CTG TATG", + "GTCAA TG", + "GGA CGG", + "GC GTAA", + "CAAA CTA", + "TAAA TGTT", + "CTT CGG", + "CTCC CCA", + "TACAA TG", + "TCTG TAA", + "GAA TATG", + "GC GGGA", + "GGA CATT", + "TTA TGAA", + "GGA TGTT", + "GGA CATG", + "TCA GGTG", + "CAA CAAAA", + "GAAA GAGA", + "GTG GATG", + "GG GCTA", + "CCA TCAA", + "CA GCTGA", + "CTC CACC", + "CAA TCAA", + "GTG GTC", + "TGA CAGG", + "CCA TTCA", + "GTCC CTG", + "CAGA CACA", + "GTT GGTG", + "CC TCCTG", + "GAA CTGA", + "TATT CATT", + "GCC CATG", + "CAA TCTT", + "GAAA GCA", + "GAA TCTG", + "TTA TTTTA", + "GTT TGGA", + "TTTT TGTT", + "GGGAA TG", + "GC GACA", + "TAAA CTG", + "CCA TATT", + "GGA TCC", + "CAA GCTT", + "TAAAAAA AAA", + "TCA CTC", + "CA CTGTT", + "TGTTAA TT", + "GGA CTGA", + "GGA GTGA", + "CATA CACA", + "GTT TGTA", + "TCCA GCA", + "GTG CATT", + "GG AAAAAA", + "CCAA GAA", + "TCAA TA", + "CTT CCCA", + "TGA GAAAA", + "GGCC TCCCAAA", + "CAA GCTG", + "GCC CAAA", + "TGA CTTA", + "CA GCCTT", + "CTG GATT", + "TTTT TTTA", + "TCA CGG", + "GCA GTTA", + "TGA CTAA", + "TTA CAGG", + "TGA TATG", + "TAA TTATT", + "TCTT GAA", + "GCC CCTT", + "GTT CAGA", + "CTC TATG", + "CCA TGGA", + "GAGG GAA", + "GGA GGCA", + "CTT TGCA", + "TCTT GG", + "GGA GGTT", + "GCCAA TG", + "CTG GTGA", + "CAA CCAA", + "CCA GTC", + "CTT GAGA", + "TACA GCA", + "CTT GTC", + "GA CGGA", + "CTT CTTTT", + "GTG GC", + "GAGGA TG", + "CAA TAAAA", + "GAAA TTTT", + "AAAA AAAAAA", + "CTC TATA", + "GTA TGAA", + "CTT GTTA", + "TAA CATA", + "CAAA CACA", + "TGATT AAA", + "GCTC TGTT", + "GTG GGTT", + "GTT GGGG", + "GTG TGTA", + "GTAA TTTT", + "GTA TCC", + "TGTGTGTG TGTG", + "TCTT CCTT", + "TCA CTAA", + "TCTCC AAA", + "TA TCAAA", + "TGA TGGG", + "GGA TATT", + "CAAA TTTT", + "GTT CAGG", + "GTG GATT", + "GTG CAGA", + "GCTG CC", + "CTCA GAA", + "GCA GTC", + "GGA TAAA", + "GCC TTCA", + "CCA GGTG", + "TA TCTC", + "CAA TGCA", + "CCCA CTG", + "GTG TATT", + "CGA CAGA", + "TGA GATA", + "CCA GGTT", + "TGTT TAA", + "CATCA TG", + "TGA TTCA", + "GCAA TTA", + "GAAA TGAA", + "CTT GGTT", + "GAA GATT", + "GGA TTAA", + "CC TCATT", + "GGCCA GGCTG", + "GCTA TTA", + "GCCA GCA", + "GAGA CAGG", + "CTT GAGG", + "CA GTCTT", + "GTT CTCC", + "TATT TCAA", + "TGA CGA", + "CATG AAAA", + "CATTA TG", + "TAAA TTTA", + "GA GTGAA", + "CAA CAGG", + "TAA GCTT", + "CACA TTTT", + "GA TCTCA", + "TA GTCC", + "GACC CTG", + "TAA TGCA", + "TAA GTC", + "TAA TAATT", + "GAA GTAA", + "CAA CTC", + "CA TCATT", + "GA CGAA", + "GAAA CAAA", + "TATT TCTG", + "CATTAA TT", + "CCA CCCC", + "TAATA TTTT", + "GTT TAAAA", + "GTA TCTG", + "GTCAA AAA", + "GATG CTG", + "TGTT CTG", + "GG TCAAA", + "GTA GGAA", + "GTA TATG", + "TGA TCTG", + "GGGG CTG", + "GCA TCAA", + "GCCAA AAA", + "CCA CGA", + "GC TAATG", + "CAGA GAAA", + "CCTT CTG", + "TCC TCTA", + "GCA GGTT", + "CTCA CTG", + "TAGA TTA", + "GCC GAGA", + "CCA TCCA", + "CTT TACA", + "GTA CATG", + "GCA CCAA", + "CTT TGTA", + "CTA TGTG", + "TCA CTTTT", + "TGA GTC", + "CAA GAAAA", + "CTGA CTG", + "GTTTT TTTT", + "GCA TAAA", + "TAA TCTG", + "GAA AAAAAA", + "CAGGA TG", + "TGA GCCA", + "GAA TTCA", + "TCA GACA", + "GTT CCAA", + "TCA GGTT", + "CAAA CTG", + "CATT TCTT", + "TGTT AAAA", + "CCA GACA", + "CAA GTTA", + "CATG TTA", + "CATT CTA", + "TCTTTT TG", + "TGA GGGG", + "CACA TTA", + "TAAAA TAAA", + "GCA TATA", + "TGTT CTA", + "GAA GGGG", + "GAGTG TG", + "TAA GACA", + "GAA CTC", + "CCA GTAA", + "GAGA GAGG", + "GC GACC", + "CAA TTCA", + "CGG CTG", + "CCA GATT", + "CCTG GG", + "GGAA GAAA", + "GAGA GG", + "TCAAAA TG", + "CCTCA TG", + "TAAA GG", + "CTT TGGA", + "CCA GGGA", + "GTA CAGA", + "CTGAGGCA GGA", + "TGTT TCTT", + "CCA GGCTG", + "CTGA GG", + "GAGG CTG", + "CTCC TGGG", + "GAA GTC", + "CGA CC", + "GGA CTCA", + "GGA GTC", + "CA CAATT", + "GTG TTCA", + "GA CTAAA", + "GTCA TTA", + "CAAAA TTA", + "TGAA GAAA", + "GCA CCTT", + "GTT TGCA", + "TCC TGCC", + "GTA GATG", + "GCC TGCA", + "GA GTTAA", + "TCC CTTA", + "GTG GTTA", + "TC GGGA", + "TACA TAA", + "TCTC TCCA", + "CA CTAAA", + "TATATATA TATA", + "GTG GCAA", + "CACCA TG", + "TTTG AAAA", + "CACA CTG", + "CTT GGTG", + "TACA CTG", + "CC TCCAA", + "CAA CCTT", + "CA GCCAA", + "TTTT CAAA", + "TGA TAGA", + "TACA CTA", + "TCTG GG", + "TCC CAGCA", + "TAGG AAAA", + "CTT GGGG", + "TC TGTGAA", + "CC TTATT", + "CATT TAAA", + "TTTTA TTTTA", + "GCC CTCC", + "CTGA GCA", + "CC CGTG", + "GTA GTGA", + "TCC TATT", + "GAA GGTG", + "TGTG CTG", + "TCCA CTG", + "TAA TCTA", + "TGA TGTA", + "GTG GTAA", + "TAA TGGA", + "GATG AAAA", + "GTA GTAA", + "GTG GGGA", + "GTG TCAA", + "CAGA CTG", + "TC GAAAA", + "CTCA TTA", + "TAA TAATA", + "CTCA GAAA", + "CA TCCTT", + "CC GCTT", + "GGAA GG", + "CC GTGA", + "CCA CTCC", + "CTA GAGA", + "TAGAA TG", + "GGA TTTA", + "TTAA TTTT", + "GC TAATA", + "TCC CCCA", + "CAAA TATT", + "GA TCATG", + "TCTTAA TT", + "CA GTATT", + "GTCTT GAA", + "CC GAAA", + "CTA TTCA", + "TAA GATA", + "CTT GCAA", + "GCC CCAA", + "TCC CTAA", + "GAA GTTA", + "GA TGATG", + "CTT GATG", + "CC CTAAA", + "CCTG CCTG", + "GACA TTTT", + "CCA GCCA", + "TGTGTGTG TG", + "GTC TATA", + "TCTC TGTT", + "GTC TGTA", + "TA TAATA", + "CTT GTTTT", + "CGC CATT", + "CTCA GCA", + "TACA GTT", + "CAA GAGG", + "GGAA GCA", + "GCC TTTA", + "CC CCATT", + "CAA CGA", + "GTCA TTTT", + "CC CGCA", + "CA GTTAA", + "GAA TCTT", + "CATG TTTT", + "CC GGGG", + "CTA CTGA", + "TCA CGA", + "TAAA TTTG", + "GCC CATT", + "CTC TAGG", + "GGA CCTG", + "TCA GGGA", + "GAGA CTG", + "CC AAAAAA", + "GCC GG", + "CCA GGGG", + "TCA GAAAA", + "CA TCTGA", + "TCTT CAAA", + "CTA CAGG", + "GAGG CAGG", + "CATT GTA", + "TAAA TCAA", + "GA CTCTT", + "CTGA TTA", + "GCA TATG", + "GGA CCTT", + "CAA GACA", + "TATT TATG", + "TATTTT AAA", + "CC GAGA", + "TCA TTTTA", + "CTCA CTCA", + "CCA CCCA", + "CTC TAGA", + "CTA CATG", + "GTG CTTA", + "CAA CCTG", + "TC TGTGTT", + "TAAA TATG", + "CAAA GG", + "CC CTGTT", + "GTT CGG", + "TGA TAAAA", + "CA CGAA", + "GTT GAGG", + "CAGA GTGA", + "GAAA TTAA", + "CACA TA", + "GAA CAGG", + "TCTCC TGA", + "CC TGAGG", + "GGAGG CCAA", + "GTT TACA", + "TAA CAGG", + "TGTG GTG", + "GCCTCC CAAA", + "CCA TCCTG", + "GATT CTT", + "GAA TGGA", + "GTA GTCA", + "CTCC TCTG", + "GAAAGAAA GAAAGAAA", + "CC CTGTG", + "CAGTA TG", + "GC GATA", + "GGA CTC", + "GAAA GA", + "TGTT GG", + "GTA GCTT", + "CA TTTTAA", + "CC CTCTG", + "GCA TTCA", + "CGA TTA", + "TCA CATA", + "TAA TGAAA", + "GGAA TTA", + "CTG TCAA", + "TAAATT AAA", + "CAA GTC", + "GTA TTCA", + "GGCCA TG", + "CTT TAGA", + "TGTT TCC", + "CATG TA", + "GAA TAAAA", + "CAA CTAA", + "TCA TCTA", + "CA CTCTT", + "CA GTTTG", + "CA TAAAAA", + "GCA TGCA", + "GATT TA", + "GAA CCAA", + "TCTG TGA", + "TCA GCCA", + "TCTC CACA", + "TCTCA GCTCA", + "TATCA TG", + "GCA CTTA", + "CGC CAGG", + "CGG GG", + "CATTAA AAA", + "TTTG TTA", + "GGA TATA", + "TC GACC", + "TAA TCCA", + "CC GC", + "CATT GTT", + "CCA GTTA", + "GTA GTTA", + "CTA GGAA", + "CC TAATT", + "TCA TGGG", + "GAA CTAA", + "GCTA TTTT", + "CC GTCA", + "CAGA TTA", + "CCA TATA", + "CAA CTTA", + "TCA GTTTT", + "CTA CCTT", + "GCA CTC", + "GTG TGGA", + "GTG CCAA", + "GACAA TG", + "GA CAATT", + "GTA CCTT", + "TAAA CATT", + "CA GGAGG", + "GTG CGA", + "GAAAA TTA", + "TCTCTT AA", + "CC GATT", + "GA TGATT", + "CCA TGGG", + "TC GGTA", + "CCA TATG", + "CCA GTCC", + "GCC TTAA", + "TGA TCCA", + "GTT GCAA", + "GTA GAGG", + "CAGA TTTT", + "GTA CTTA", + "TCTTTCTT TCTTTCTT", + "GCTC TGTG", + "TCAA TAA", + "GTT TAGA", + "GTT CGA", + "CAA GGTT", + "CTCA TTTT", + "CACA GG", + "CATG CTG", + "GAA CGG", + "TA TAAAAA", + "GAA GGCA", + "GA GCATT", + "TGTT TGTG", + "GCTG TTA", + "GTCA CTG", + "CAAA TGAA", + "GTGA CTG", + "GTT CTTTT", + "CAGGCTG GAGTGCAGTG", + "TGA TGAAA", + "TAA CGG", + "CTA CTAA", + "GACA TTA", + "GGA CGA", + "GAGCA TG", + "GCA TGGG", + "CCA CTTA", + "CTA TCAA", + "GCTG TTTT", + "GTC GTG", + "CCTG GCC", + "TCTC TGAA", + "TGTT GTA", + "CAGC CAGG", + "GTT TAGG", + "CC GCAA", + "GGA GTAA", + "CCAA TTA", + "CAGC AAAA", + "TCA TCCA", + "CA CGTA", + "TCA TAGA", + "TAATT AAAA", + "CA CTTAA", + "TCTT TATT", + "GAGA TTA", + "TAA GAGG", + "CAAA TTAA", + "GA CGCA", + "CA CGGA", + "GTG TGCA", + "TC T", + "TATTA TTA", + "GAAA TATT", + "GGA GTTA", + "TCTT TGA", + "CTGA TTTT", + "TGTGAA TT", + "TCC CACC", + "CC CTTTG", + "CAA GGTG", + "CAGA GTT", + "CCCCA TG", + "CTA CCAA", + "CTCC AAAA", + "CTT CCCC", + "CTG CTAA", + "GATT AAAA", + "GC TTATG", + "CTA CTTA", + "TAAAAAA TT", + "TCA GTCC", + "CTATT AAA", + "GAA TGGG", + "CACA GTA", + "CAA CGG", + "GG TTATT", + "TCA CCCA", + "TGA TGCA", + "TAA TTTTTT", + "GTT TGAGA", + "GTATT AAA", + "GCC CCCA", + "TATA GTA", + "TA GTAAA", + "TGA TACA", + "GTG GTTTT", + "CCA CTAA", + "CACA GAGA", + "CCTCTG CCTCC", + "CAA AAAAAA", + "CTC TCTCC", + "CA TAATA", + "GAA GCCA", + "GTT CCCA", + "TGTG TTTG", + "CAA TGGA", + "TGAA GTA", + "CTT CATA", + "CA CTGTG", + "GC TCTTTT", + "TGA CATA", + "TAAA GAAAA", + "GAGAAA TG", + "CAGG GAGG", + "TGTT CAA", + "GA GCCAA", + "GACA GAGA", + "GG CTGAA", + "CAAA TATA", + "GTG GAAAA", + "TAA GGTT", + "GTGA TTA", + "GGA TCTG", + "GATG TTA", + "GACTA CACA", + "TCC TATA", + "CTG CCAA", + "TCC CGA", + "GTGA TTTT", + "GC GTTTT", + "CAGA GTA", + "GAAA GGAA", + "CA CTTTG", + "CCCC AAAA", + "GCAA CCCA", + "TGCA TTTT", + "TCTA GAA", + "TA CTTTG", + "TGA GGCA", + "CA TCTCC", + "TC GCTA", + "TGA CTTTT", + "GA GCCTG", + "CATT TGTT", + "TCTT TGTT", + "GCAAAA TT", + "CC TGATT", + "GA TAAAAA", + "GA GTGTT", + "TCC TGTA", + "TACA GAAA", + "TC CAGGAA", + "GCCA GTG", + "TAGA TTTT", + "TAA TAGG", + "CTCC TCA", + "CATTTT TG", + "CATT TCAA", + "GCCA TCA", + "TAAAA TATA", + "GA CTGTT", + "GCA TGGA", + "CAAA GTT", + "CA TGATT", + "GA GTTTG", + "CTA GCAA", + "CTT CCTA", + "GG GGAGG", + "CTA TATG", + "TATT TATTTT", + "CA CCATT", + "CC CTCAA", + "TTTTTTTT TTTTTT", + "GA TCATT", + "GTA CATA", + "CTC CATA", + "CCCC GTCTCTA", + "GCC TGCC", + "CTA GCTT", + "CC CGGA", + "GATG TTTT", + "GTA TTTTA", + "TCA GATA", + "CCTG GAA", + "TATT CCA", + "GGA CCAA", + "GCCA TTA", + "CGA CTGA", + "TAA GCTG", + "TAAA CACA", + "GTT TCTC", + "CA TCTTA", + "GAAA TTTG", + "TAA TGGG", + "TAAAA TTTT", + "CTG TTCA", + "CCTG TTA", + "TA CTGAA", + "TGA CCCA", + "TGA TTTTA", + "CTCC TTA", + "TATA GAA", + "CTG CGG", + "GC GGTA", + "GTG CTAA", + "CAGA GGAA", + "TACA TCA", + "TCAA TCAA", + "CTG CAGCC", + "TGAA TATT", + "TCTA CAA", + "CCA CATA", + "CC CGTT", + "TATA CACA", + "TCC TCTC", + "TCTA CTT", + "CC GGAA", + "CTTTT TTA", + "GAAA GAAAA", + "CTA TCTT", + "GA CTTTG", + "TGAA CAA", + "GCA GTTTT", + "GC TAAAAA", + "GAGG CGG", + "TAA TAAAAA", + "CTG GTCA", + "CAGA CAA", + "GGA TATG", + "TGAA GG", + "GCCA GAA", + "CCA GGCC", + "CCA CCATG", + "CAAA CTT", + "TCA TGTA", + "GCTG CTT", + "GTAA TA", + "CCCC CAA", + "CA GCCTG", + "TCAA CTT", + "TAAAA TTAA", + "GCTG AAAA", + "CGA CGA", + "GTG GGCA", + "TGA GGGA", + "CGC TCC", + "TTTT GTTTT", + "GA GTCAA", + "TCA TGCA", + "CTG CTTA", + "TAA GTTTT", + "GTA GCAA", + "CCTT GG", + "TGA CAAAA", + "CTG GTAA", + "TCTT TATA", + "TGTG TGTT", + "CTG GTC", + "CTG GCAA", + "CATT TCTG", + "CTC TACC", + "CTGA GGA", + "CTAAAA TG", + "CTA GATT", + "GTA TCAA", + "CA GTCAA", + "CTG GGTG", + "CC TCTTA", + "TGA GTTTT", + "TTTTA TTTA", + "CC TTTTTT", + "TATA TACA", + "TA GCAAA", + "AAA TTA", + "CTG GATG", + "GA TAATA", + "GA CAAAAA", + "CCTG GGA", + "GCTT TCA", + "GTA CAGG", + "GCTG GAA", + "CTA CTCA", + "CAA TGTA", + "GC GTGAA", + "GA TCCTT", + "TATTAA TG", + "GCC CGA", + "TAAA GTG", + "GCTT CCA", + "CATG GAA", + "TGAA GTT", + "CTT TCTC", + "TCTGTG TG", + "GTA TGTA", + "CAA TACA", + "TCAA GG", + "CC TCTAA", + "TGTG GG", + "GA TCTGA", + "GTA CTGA", + "TTAA TTAA", + "GCA GAAAA", + "CTA CATA", + "CC GGTG", + "GGGG AAAA", + "TACAA AAAA", + "TTTT GG", + "GTGA GAA", + "TCAA TAAA", + "TCAA GTT", + "CTCA GGA", + "CTA CTC", + "CAAA TCA", + "GGCA GAA", + "CC CGAA", + "TGTT GTG", + "GAGC AAAA", + "TATT TGTG", + "GTA GGTT", + "CTA CCTG", + "CA CAAAAA", + "CTCA GG", + "GCTT TA", + "CAGA GCAA", + "CTCA GTG", + "GGAA GAGA", + "TAA CCTG", + "GAAA TATA", + "CGA GAA", + "GTGA GG", + "CATT TATA", + "GGCA GCA", + "TC TAAATT", + "CCCA GTG", + "GCC TAGG", + "TGCA TTA", + "CC GTAA", + "CATT CCA", + "CTA GTTA", + "GA CTTAA", + "CTA TACA", + "GACA CAA", + "TCTT CACA", + "CC GGTT", + "TAAA GTAA", + "CTG TGGA", + "TAA GGTG", + "TCCA GTA", + "CAAA TTTA", + "AAATT AAAA", + "CCA TCTA", + "CTCC CTT", + "CTCC TTTT", + "GAGAGAGA GAGA", + "GGA GATA", + "CCTA TTA", + "CACC AAAA", + "CC GTTA", + "TGTT TATA", + "CTCA GGAGG", + "GA CGTA", + "GTCC TTA", + "GAAA GTT", + "GCTG GTG", + "CTC TACA", + "CAA TAGA", + "TAAAA TATT", + "GTA CCTG", + "GTA CTAA", + "CTT TGAAA", + "CCTT TCC", + "TAAAAA TTA", + "CTC GG", + "CAA GATA", + "CATT TGA", + "CACC TCA", + "GCCA GCC", + "GTC GG", + "GCA CATA", + "CA CTCAA", + "CTTTT AAAA", + "CAGGAA TT", + "GCC TATT", + "TCTT TCTG", + "CTGAGGCA GGAGAA", + "CAGG CAGG", + "CTA GTAA", + "TCCA TA", + "GAA CTTA", + "C G", + "GCTG TGA", + "GAAAA TA", + "TCTT CATT", + "GAGG GAGA", + "CCCA TCC", + "GAGG TGGG", + "GCC TCTA", + "GTA GGTG", + "TAAA CCA", + "GAA GGAAA", + "TATT GG", + "A TG", + "TCCA GTT", + "CCCA CAA", + "GAAA CACA", + "GTC TCAAAA", + "CTTTT CTTTT", + "TGAA GGA", + "TATT GATT", + "CTA TGTA", + "AAAAAAAA AAAAAA", + "TCCTT AAA", + "GC GCTA", + "TCCA CTT", + "GA CTCAA", + "TAAA TACA", + "TCA TGGA", + "TCTG GGA", + "TCC TATG", + "CTG TGCA", + "TCAA GTGA", + "TCA TAAAA", + "CA TCCAA", + "CCTT CCA", + "CTG TACA", + "GAA GGTT", + "CTG TGTA", + "GTCA CTT", + "TCA CAAAA", + "TCA GGCA", + "GTGTT AAA", + "CC CTTAA", + "CAAA GTG", + "GAAA TGTT", + "CTG GGGA", + "GA CGCC", + "TATA TGTG", + "CTA GATG", + "GAAATT AAA", + "GAA TGCA", + "GCA CTAA", + "CGG GAGG", + "GCCA CAA", + "CGC TTA", + "TCCA CAA", + "CAGA TA", + "TC TGAATT", + "TATTA TTTT", + "GC GCGG", + "CTC TGAAA", + "TCTCTT TG", + "TATT TCTA", + "GGGG TGGG", + "GGA TGCA", + "CCA CACC", + "TAAA TGTG", + "TCTT CCTG", + "GCAA GG", + "CTG CTCC", + "CTG GAGTG", + "CTGTT AAA", + "CACA CAAA", + "CTGA CTT", + "GAAAA GAAAA", + "CCTT CTCC", + "GAAA TAAAA", + "CCTCA GGTGA", + "GA TAATG", + "GAATT GCTT", + "CCAAAA TT", + "CGTG AAA", + "CACTG AAA", + "CAGTG AAA", + "GA TCTTA", + "GAGA TGGG", + "TCTG CCA", + "TGA GGTA", + "TATG GAA", + "TATA TTTTA", + "TGAA CTT", + "GCA GATA", + "CTTTT CTT", + "GTAAAA TG", + "TCTC TAA", + "TCTG CAAA", + "GA GCCTT", + "TA TCATT", + "CAA TTTTA", + "CC GCCA", + "TATT TAAAA", + "GAGA GATG", + "GAGA TGGA", + "GCCA GGATG", + "CGA GTAGCTG", + "TTCA TTTT", + "TATA CTT", + "GTC TACA", + "GTGA GTGA", + "GCTA CACA", + "GGGA GGA", + "CAA GGCA", + "GC TTTTAA", + "CA CTATT", + "GTT CATA", + "TCC TC", + "GTG GACA", + "TATT TGGA", + "CTC CAGTA", + "GTT CAGTT", + "CCAA GG", + "CAGA GCC", + "CTC GCC", + "CC GATG", + "GGAA TTTT", + "TCCA GCC", + "CC TCTTTT", + "GAA CCTT", + "CATG CACA", + "GTT TC", + "GAA GATA", + "TA CCCC", + "GCTG CCA", + "GGGG GAGG", + "GCAGTGA GCTGA", + "CTG TCTA", + "CGA GGA", + "CAA TGGG", + "GC TGTGAA", + "GAAA GTG", + "TACC AAAA", + "GTCA GG", + "CAGC TCC", + "TGTG CTT", + "GTC TAGG", + "TTTT TGTA", + "TTA TATG", + "TCA GGGG", + "TATT GTTA", + "CC TGAGA", + "TA TCTCA", + "CAA TCTG", + "CA CTCTG", + "GATT TAA", + "TGAA TAA", + "TCTT GTA", + "TCAA CTG", + "TCTC CAGG", + "CTA GAGG", + "CTGA GAAA", + "CTA GCTG", + "TCCA CCA", + "CGA TTTT", + "CC GGCC", + "GTT GACA", + "CTTA GAA", + "CA TAATG", + "GA GTATT", + "CACA GAAA", + "GA CTGTG", + "CTA TTTTA", + "TGA GGAAA", + "TTATT AAAA", + "CTTA TTTA", + "CAGA CTT", + "CA CGCC", + "GCTT GG", + "CCTG CTT", + "TAAA GCAA", + "CCTC GTGA", + "TA GAATT", + "CTTA CAA", + "TAAA GGAA", + "GTC TAGA", + "GTGA CTT", + "TACA TATG", + "GTCA GGA", + "GCTC CAGG", + "GAA GGGA", + "CA TGATG", + "TCA TCAAA", + "CGTT AAA", + "GTA CTCA", + "CTCC CAA", + "TATA TGTA", + "GGTA TTTT", + "TAA GCCA", + "C GAAATT", + "GTTTG TTTT", + "TCTG TCTT", + "TATA TCA", + "TGTT CATT", + "CAAA CCA", + "TTCA TTA", + "TATT TGTA", + "GATT GAA", + "CTA TAAAA", + "GATTAA TT", + "CCCA CCA", + "TCC TAGG", + "TAAA TGTA", + "CTCTT AAA", + "GCA GTCC", + "GC GGCTG", + "GTC TCGAA", + "TGAA TGA", + "CTG GGGG", + "GTC TCGA", + "GAA CAAAA", + "TGAA TCA", + "TGTATTTT TAGTAGAGA", + "GTTA TTAA", + "TTTTTT AAAA", + "GTCA GTG", + "CCCA TTA", + "CACA GGA", + "TATT CCTT", + "TCTG CCTT", + "CCTG GTG", + "GC GAGC", + "TA CTAAA", + "TACA CAAA", + "CC GTCC", + "GCTT TGTT", + "GCA TCCA", + "CA TCTAA", + "GC TGTGTT", + "GTA GACA", + "GCC TATG", + "TCTT TGTG", + "GATT CTG", + "CGCC CGG", + "GA TGAGA", + "TA TCTGA", + "TGAA TTTG", + "CC TGATG", + "TAAAA CAA", + "CTT TAGG", + "TTTT CCTT", + "TGAA TAAA", + "CGG GGA", + "CAAA CATT", + "GTA TGGA", + "GCTT AAAA", + "TA CCAAA", + "CAAA GAGA", + "CTCC TGCC", + "GTAAAA AAA", + "CACA GCC", + "CCA TGCA", + "TA CAATT", + "CTA GTGA", + "CTGA GTT", + "GAGTG AAA", + "TCTGTT TG", + "CTG TAGG", + "TATAA AAAA", + "GCATT AAA", + "GTC CATA", + "TGTTAA AAA", + "TGTT TGA", + "GAA TAGA", + "CTT CAAAA", + "CTG GACA", + "CTG TAGA", + "CCATT AAA", + "CTA TCTG", + "CACTA TG", + "TTA TCAA", + "TAA GTAAA", + "TAATCCCAGCACTTTG GGAGGCC", + "CCA GAAAA", + "TGAA GCA", + "TCC CTTTT", + "TCA TACA", + "TA CGTT", + "GCC GTG", + "GGAA GTG", + "GG CCAAA", + "GTA CCAA", + "TCTCTA CTAAAAATA", + "CATT GTG", + "TGTG TGA", + "GAAA CAGA", + "CTT GACA", + "GA TGAGG", + "GAGA TTTT", + "CCTT CAA", + "GAA TCTA", + "CTC TCCTT", + "GG CGGA", + "TCTATCTA TCTATCTA", + "CACA CAGA", + "TGTG TGTA", + "CAAA GCC", + "TGTG CCA", + "GTT GAAAA", + "CTC CAGCA", + "TCAA GGA", + "TA GCTCA", + "CGC TGA", + "CCTG AAAA", + "GA CTATT", + "GATT CCA", + "GCTT CTA", + "GTC TGCC", + "CTT GGCA", + "TGTG GTA", + "GCTT TGA", + "GCTC TCTG", + "CTCA CAGA", + "TCTT TAAA", + "CAAA GCAA", + "TA CTTAA", + "GCTT CAA", + "CATT GAA", + "GGA GGAAA", + "CTA TAGA", + "CTGA GGAA", + "CCTG GCA", + "CC CTATT", + "CTC GTG", + "TTA CACA", + "TTA GGAA", + "CTG GTTA", + "GTT GTCC", + "TAATG AAAA", + "TATT TACA", + "GG GAATT", + "GTA GTTTT", + "GCTG CAA", + "CTA CGG", + "GCC GGA", + "CTG GGCA", + "CCTT AAAA", + "GATG GAA", + "TAGATAGA TAGATAGA", + "TATG TAA", + "GTA CGG", + "TATT CAAA", + "GA TCTCC", + "CCTG TTTT", + "TATT GCA", + "GGAAGGAA GGAAGGAA", + "GG TAATT", + "TTA CAGA", + "TCA GC", + "GCAAAA TG", + "GAGA GCA", + "GTA GAAAA", + "CATT TGAA", + "TCTT CTTTT", + "TCC CATA", + "GTTA TTTA", + "CTA TCTA", + "CA TCCTG", + "TCTT GTG", + "TTA TTATT", + "CC CGTC", + "TACTA TG", + "TAAA CATA", + "TAA GGAAA", + "GCTT GTG", + "CTC TAAAA", + "GTTTT AAAA", + "GACA GGA", + "TCC TAGA", + "TCCA CCCA", + "GTT TGAAA", + "CCA TCTCA", + "CTAA GAA", + "GTA TCTA", + "GTGA GGA", + "GCTG GAGG", + "CCTGTAA TCCCAGCTA", + "GCAA CAA", + "CTT TCAAA", + "CAAA TGTT", + "CTT GTCC", + "TCTCAA AAA", + "TATT TATTA", + "TAA GGCA", + "GAGA GGAA", + "TA TGATT", + "GCA TCTA", + "C GTTATT", + "GCC TGTA", + "GTT TCAAA", + "CCTTCCTT CCTTCCTT", + "GG CTTTG", + "GTCA GAA", + "CATG CATG", + "GTCA TTTA", + "CTG GAAAA", + "CTT CGA", + "CCTA TTTT", + "CCAA CAA", + "TCCA TCC", + "TAAA GTTA", + "GTC TCTC", + "TAA TCAAA", + "GATTTT TG", + "GATT TCTT", + "GG GCTGA", + "GCA TGTA", + "CCTG GGTT", + "GAGA CAA", + "GCTG TCA", + "TGA TAGG", + "GGA GACC", + "CC GGCA", + "TAA TCTCA", + "TGAA TTAA", + "TCTG GTG", + "GCC TC", + "GG CGCA", + "CCA GCTA", + "CA GTCTG", + "TGAA CTA", + "GTAA GAA", + "CCTT TCA", + "TCCA TGA", + "CAAA GGAA", + "CTC TC", + "CTC TCTCA", + "CTC CAGC", + "GTA GATA", + "CCCC CTCC", + "GG CGCC", + "TCTG TCC", + "GA CCATT", + "CTT GAAAA", + "TTA TCC", + "TACA TGTG", + "CAAA TTTG", + "TTTT GTG", + "CAGA GTG", + "GTAA TAA", + "GTGA GTG", + "TTTT TCC", + "GG CTCTG", + "GCC CTAA", + "GG CTGTT", + "CC CAATT", + "CAGA GCTT", + "TATAAA TG", + "GA GTCTG", + "TCTTAA AAA", + "GTTTTA TG", + "GA TCCAA", + "GGCC CTG", + "GA TCCTG", + "TCAA GTG", + "GATT CAA", + "CCTC TCTT", + "GAGA CGG", + "CAGA TCA", + "TAAAA GAA", + "CTGA GCAA", + "CCTG CCA", + "CCTT CTA", + "CGC TCA", + "GG CTGTG", + "TGGG AAAA", + "GGA GCCTG", + "CTGA GTG", + "CGTC AAA", + "TCAA GTA", + "CGTAA TT", + "TTA CTTA", + "TATA CTA", + "GG GCAAA", + "CAA CTTTT", + "CTT TGCC", + "GC CAGGAA", + "CACA CTA", + "GCC CAGC", + "TAAATAAA TAAATAAA", + "CTT TCCTT", + "GGGA GAA", + "TATG GTA", + "CGG CCA", + "CCTC TCTG", + "GAAA GCAA", + "CAA GCCA", + "GG CGTT", + "CTC TTTTA", + "TCGGCC TCCCAAA", + "GATT TATT", + "CAA GTCC", + "TA TCTTA", + "GTTCAA GACCA", + "CTCA CACA", + "GAAA TCAA", + "TGA GACC", + "GG GTAAA", + "GCTT GTT", + "GA TTTTAA", + "TTTT TATA", + "CAGA GCTG", + "TC TGTTAA", + "GTAA TTAA", + "TCTT TGAA", + "CTT GCCA", + "TTTT CATT", + "CCA TGTA", + "TCTC GGCTCACTGCAA", + "GGA TTCA", + "TC TATTAA", + "TACA TAAA", + "GATT GATT", + "GGA GAGGA", + "CGC AAAA", + "GGA CTAA", + "TTA TGTG", + "GTCA CTCA", + "GACA GCA", + "CGA GTT", + "GATG GTT", + "GGAA GAGG", + "GCCAA CATGGTGAAA", + "GGA GCCA", + "TGAA CTG", + "CCTC TGTG", + "GTA TAAAA", + "TCC CAGAA", + "CATT TATG", + "GA TTATG", + "TGTT TCTG", + "GAGTG GGTT", + "TACA TATT", + "CTC CAGGA", + "GACA CTG", + "GG TCTCA", + "CC GGGA", + "TGTT TAAA", + "CTCA CCA", + "GGA CTTA", + "GCC CACC", + "CAAA TCAA", + "GAAA TGTG", + "TA GTTAA", + "TCTA TAA", + "TTA GATT", + "GTG TAGG", + "TACTG AAA", + "GCA CCCA", + "GTG GGCTG", + "GAA TGAAA", + "TCTA GTT", + "TCA GGAGA", + "TCCA CTA", + "CTCA GTT", + "TACTT AAA", + "GA CTCCA", + "TCCATT TG", + "CACA GCAA", + "GCTCATG CCTG", + "GGTG CTG", + "GCTT TCTT", + "GTG GCCA", + "TA CGTG", + "GTG CAGTG", + "TGAA GTCA", + "CCTT TAA", + "TCTCAGCTCA CTGCAA", + "GAAA TATG", + "CC TCAAAA", + "GGGG CGG", + "CGA CAA", + "GG TGATG", + "GTCTT AAA", + "CAGAAA TG", + "CGTCA TT", + "CCAA GCA", + "GGA TCAA", + "GTGCTG GGATTA", + "GCTG GCC", + "CGGA GCTT", + "TACA TGA", + "TGTT TGAA", + "TCTC CATT", + "TAA GCAAA", + "CCTT TCTT", + "TA CTGTT", + "TCCA TCTT", + "CTTA CTT", + "CGGA GGTT", + "CAAAA CAA", + "TCA TAGG", + "TTA CTAA", + "CTTA TTTG", + "GAA TGTA", + "CCCCA TGGA", + "TTA CTGA", + "CGG AAAA", + "CTC CAGTG", + "TGTT CCA", + "CAGA TGAA", + "GTT GATA", + "TCC CCCC", + "CATT GCA", + "CTCA GCC", + "CTTA CTG", + "TA TCCTT", + "CTTTTA TG", + "TGAGTA GCTG", + "GACTG AAA", + "CAA TGAAA", + "CGA CTG", + "CTT GGGA", + "GCAA GCA", + "TCA CTCC", + "GATT TGA", + "CATTTT AAA", + "TCAA CTA", + "GTCC AAAA", + "CACC CTG", + "TTA CCTT", + "CAA GGGG", + "TTTT GGA", + "GTTA TTTG", + "GCTA CTG", + "CTGAGGCAGGA GAATG", + "GTGA TGA", + "GTA GTC", + "TAGTA TG", + "GTA TAGA", + "GTG TCTA", + "GCTG CTA", + "TTA GTAA", + "TAAA CATG", + "GTCA CCA", + "CA TCTTTT", + "CATA TAA", + "TCTC TCTA", + "TTTTA TTAA", + "TATT CTAA", + "GAAA TTTA", + "CTT CCCTG", + "TAAA GATG", + "TA CGTA", + "GTT TATTA", + "GAAAA GAA", + "CCCA CCCA", + "CAATT AAAA", + "CC GACA", + "CAAA GTGA", + "CAAA CAAAA", + "GCAA TTTT", + "CGATT AA", + "TTA GAGA", + "CTGA TGA", + "GGA GGAGG", + "GTCC TGGG", + "TCA TGAAA", + "GCAA CCA", + "GTT GGCA", + "GCGG CGG", + "GTCC CCA", + "GTA GGGG", + "GCCA TGTT", + "GTT CGAGA", + "GCC TATA", + "TAAA TTCA", + "GG CCATT", + "GAAAA CAA", + "TGTG TATG", + "GTA CTC", + "TAGG GAA", + "CCTT GAA", + "TC TATTTG", + "GAGG GCA", + "GAAA CTGA", + "TA CGC", + "TA CAAAAA", + "TCA TTATT", + "GGAAAA TT", + "TCAA TATT", + "CC CGTA", + "GGA GAGAA", + "TTA GTTA", + "CTCA GAGA", + "TC GAGC", + "CTA GTCA", + "GATG GCA", + "TGAA CATT", + "CTA TGGG", + "CACA CCA", + "TCAA TTAA", + "GGAA CTG", + "TTA CATG", + "CTT TCATT", + "CAGC TCTG", + "TCTTTT TTTT", + "TAAA TCTT", + "TGA TCTA", + "CATA CAA", + "GC TCAAAA", + "GC TGTGTG", + "TCAA TCA", + "GATT TGAA", + "CCAA GGA", + "GTCC TCA", + "GTG CTCC", + "AAAA TAA", + "GTGA CAA", + "GCTCA CGCCTG", + "CGA CGG", + "TA TCCAA", + "CACA CATG", + "TCTC TCTCC", + "TGTG GTT", + "CTT GGTA", + "TCTG GTT", + "TTTA TAA", + "CTG CTTTT", + "TGTG TCA", + "CACA TCA", + "CC TAATG", + "C GTTTTTT", + "GCTG GCA", + "GA CGTC", + "TATAA TTA", + "TACA GTAA", + "GAAA GTAA", + "GTC TGAAA", + "CCCA TTTT", + "TATA TGA", + "CTT GATA", + "CTT TATTTT", + "CTT TATTA", + "GG CGAA", + "CCA TGCC", + "CCTG CCTT", + "GAAGAA GAAGAA", + "CTGA CTGA", + "GCC CTTA", + "TA TCTAA", + "GTG TTTTA", + "TGTG GCA", + "TATT GTAA", + "GCCA GAAA", + "CCCTG TCTC", + "CACA GGAA", + "AAAA CAA", + "AAAAAAAA AAAAAAA", + "TAA CTCC", + "GCC TAAA", + "CGA GTA", + "TA GTATT", + "GTATTTT TAGTAGAGA", + "GCTG CAGG", + "TATT GAAA", + "CCAGCC TGGG", + "GCTCC AAA", + "TA CGAA", + "GGCC TCC", + "TATA CAAA", + "CATG GCA", + "CATG CAA", + "TACA CCA", + "CTT TACCA", + "TACA GAGA", + "TATT CTTA", + "TATG TCA", + "TCAA GCA", + "TCAA TGA", + "GG CTCTT", + "GGAA GTT", + "TCCA TGTT", + "GCTT TCC", + "TATG TGA", + "GTG TAGA", + "TTTT TAAAA", + "GCTG GAGA", + "GTGA GAGA", + "CCTA GAA", + "CCTCC AAA", + "CCAA TGA", + "CAGG GCA", + "CTA TGCA", + "CTT CACC", + "CTA CAAAA", + "CTCA CC", + "GAGTA TG", + "TA GAAAAA", + "CTTTT GAA", + "TAAA GAGA", + "CATG TCA", + "TCTTTT AAA", + "CACA GTGA", + "GA TCTAA", + "TAA GGTA", + "CATA GAA", + "CGC GCC", + "CAGC TTA", + "TATA GTT", + "CGG GCC", + "TATC CATT", + "TGTTTG TTTT", + "GCTG GCTG", + "TACA GGA", + "CTCC TTTG", + "CAA TCTA", + "CCCC CTG", + "TATA CTG", + "CTGA GCC", + "CGG TTA", + "TGAA GTG", + "GCTT CCTT", + "TTTTA TTTG", + "TA GTGAA", + "CTGA GGTG", + "TCTT CTC", + "GACA GAAA", + "CTGAA CTGAA", + "CCTG GGAA", + "TCC CCAAA", + "TATG TATT", + "GATT TCTG", + "CATT CAAA", + "CACA GTT", + "GCTT GAA", + "GTG GATCA", + "CTGA GTGA", + "TGAA TTTA", + "TCAA CAAA", + "GG TCATT", + "GTAA TTTA", + "GC GACTT", + "CTGA GAGA", + "GTG CCCA", + "CTA GGTT", + "TCC TGAAA", + "GTC CACC", + "TCA CAGAA", + "GC GAAAA", + "GTA TGGG", + "TGAA CAAA", + "TAAA CAAAA", + "CC GTTTT", + "TC TCAATT", + "TCCA GAAA", + "GTAA CAA", + "GCA TTTTA", + "TCTC CATG", + "TTA TAAAA", + "CAGG CAA", + "CTAAAA AAA", + "GTT GGGA", + "TAAA GATT", + "TGAA GAGA", + "CCCC TCA", + "TGTT TATG", + "TCTA CTG", + "CCAA TTTT", + "GGTG GTG", + "GGAA CAA", + "TGTG GGA", + "TCTG CTA", + "GAA CGA", + "GTAA GTA", + "GTT GCCA", + "AAAA TTTT", + "GC GCGA", + "GAAA GATG", + "GTC TCTCA", + "TCCA TCAA", + "GCA GCTA", + "CACA TTTG", + "CTGA CAA", + "TCCA CC", + "GC T", + "CCCA CTT", + "GCA GGTA", + "GAGG CCA", + "TAAA GTCA", + "CTG GATA", + "CGG CAA" + ] + } +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/tokenizer_config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e926cbe05847fc6887cfc8a947900d32ebe68356 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/tokenizer_config.json @@ -0,0 +1,56 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "cache_dir": null, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "model_max_length": 100, + "pad_token": "[PAD]", + "padding_side": "right", + "sep_token": "[SEP]", + "tokenizer_class": "PreTrainedTokenizerFast", + "trust_remote_code": true, + "unk_token": "[UNK]", + "use_fast": true +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/trainer_state.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f28e660d26ee6895e5772af204cab811f543e314 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/trainer_state.json @@ -0,0 +1,281 @@ +{ + "best_metric": 0.930230965774696, + "best_model_checkpoint": "genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260", + "epoch": 10.0, + "eval_steps": 100, + "global_step": 2260, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.44, + "learning_rate": 2.2058823529411766e-05, + "loss": 0.4646, + "step": 100 + }, + { + "epoch": 0.88, + "learning_rate": 2.9096045197740113e-05, + "loss": 0.3866, + "step": 200 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.8425131469692776, + "eval_f1": 0.8402503747609833, + "eval_loss": 0.3561484217643738, + "eval_matthews_correlation": 0.6806277133135248, + "eval_precision": 0.8410196579024094, + "eval_recall": 0.8396095161931753, + "eval_runtime": 1.0703, + "eval_samples_per_second": 3375.797, + "eval_steps_per_second": 27.096, + "step": 226 + }, + { + "epoch": 1.33, + "learning_rate": 2.7683615819209038e-05, + "loss": 0.3305, + "step": 300 + }, + { + "epoch": 1.77, + "learning_rate": 2.627118644067797e-05, + "loss": 0.2826, + "step": 400 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8782175477442569, + "eval_f1": 0.8781592957685165, + "eval_loss": 0.2907102406024933, + "eval_matthews_correlation": 0.7702174940935076, + "eval_precision": 0.8836075473649414, + "eval_recall": 0.8866158214838993, + "eval_runtime": 1.0555, + "eval_samples_per_second": 3422.906, + "eval_steps_per_second": 27.474, + "step": 452 + }, + { + "epoch": 2.21, + "learning_rate": 2.4858757062146894e-05, + "loss": 0.2378, + "step": 500 + }, + { + "epoch": 2.65, + "learning_rate": 2.3446327683615823e-05, + "loss": 0.2001, + "step": 600 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9031275947965679, + "eval_f1": 0.9024774453787319, + "eval_loss": 0.24181613326072693, + "eval_matthews_correlation": 0.806470766326512, + "eval_precision": 0.901330138812324, + "eval_recall": 0.905149672327138, + "eval_runtime": 1.0568, + "eval_samples_per_second": 3418.949, + "eval_steps_per_second": 27.442, + "step": 678 + }, + { + "epoch": 3.1, + "learning_rate": 2.2033898305084748e-05, + "loss": 0.1843, + "step": 700 + }, + { + "epoch": 3.54, + "learning_rate": 2.0621468926553672e-05, + "loss": 0.1338, + "step": 800 + }, + { + "epoch": 3.98, + "learning_rate": 1.92090395480226e-05, + "loss": 0.1218, + "step": 900 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9097702740105176, + "eval_f1": 0.9089688664709563, + "eval_loss": 0.24736538529396057, + "eval_matthews_correlation": 0.8184477293295029, + "eval_precision": 0.9079389158139595, + "eval_recall": 0.9105128609194197, + "eval_runtime": 1.0552, + "eval_samples_per_second": 3424.113, + "eval_steps_per_second": 27.484, + "step": 904 + }, + { + "epoch": 4.42, + "learning_rate": 1.7796610169491526e-05, + "loss": 0.0768, + "step": 1000 + }, + { + "epoch": 4.87, + "learning_rate": 1.6384180790960454e-05, + "loss": 0.0838, + "step": 1100 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.9070024910047052, + "eval_f1": 0.9069014958118373, + "eval_loss": 0.3227614760398865, + "eval_matthews_correlation": 0.8247852591155882, + "eval_precision": 0.9102777373515565, + "eval_recall": 0.9145184235718482, + "eval_runtime": 1.0539, + "eval_samples_per_second": 3428.159, + "eval_steps_per_second": 27.516, + "step": 1130 + }, + { + "epoch": 5.31, + "learning_rate": 1.4971751412429379e-05, + "loss": 0.0617, + "step": 1200 + }, + { + "epoch": 5.75, + "learning_rate": 1.3559322033898305e-05, + "loss": 0.0497, + "step": 1300 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9255466371436479, + "eval_f1": 0.9252111067120526, + "eval_loss": 0.31701555848121643, + "eval_matthews_correlation": 0.8539533757280905, + "eval_precision": 0.9244825296262367, + "eval_recall": 0.9294855011815828, + "eval_runtime": 1.0546, + "eval_samples_per_second": 3425.911, + "eval_steps_per_second": 27.498, + "step": 1356 + }, + { + "epoch": 6.19, + "learning_rate": 1.2146892655367232e-05, + "loss": 0.0382, + "step": 1400 + }, + { + "epoch": 6.64, + "learning_rate": 1.0734463276836158e-05, + "loss": 0.0357, + "step": 1500 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.9266537503459729, + "eval_f1": 0.9263500730195398, + "eval_loss": 0.37326350808143616, + "eval_matthews_correlation": 0.8566986044688514, + "eval_precision": 0.9257981734840697, + "eval_recall": 0.9309157158270056, + "eval_runtime": 1.0553, + "eval_samples_per_second": 3423.551, + "eval_steps_per_second": 27.479, + "step": 1582 + }, + { + "epoch": 7.08, + "learning_rate": 9.322033898305085e-06, + "loss": 0.0248, + "step": 1600 + }, + { + "epoch": 7.52, + "learning_rate": 7.909604519774012e-06, + "loss": 0.0195, + "step": 1700 + }, + { + "epoch": 7.96, + "learning_rate": 6.497175141242938e-06, + "loss": 0.016, + "step": 1800 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9305286465541102, + "eval_f1": 0.9302050841309717, + "eval_loss": 0.4228571653366089, + "eval_matthews_correlation": 0.8637825322542529, + "eval_precision": 0.9293918338811755, + "eval_recall": 0.9344052472327883, + "eval_runtime": 1.0668, + "eval_samples_per_second": 3386.606, + "eval_steps_per_second": 27.183, + "step": 1808 + }, + { + "epoch": 8.41, + "learning_rate": 5.084745762711865e-06, + "loss": 0.009, + "step": 1900 + }, + { + "epoch": 8.85, + "learning_rate": 3.6723163841807913e-06, + "loss": 0.0132, + "step": 2000 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.928037641848879, + "eval_f1": 0.927772269127762, + "eval_loss": 0.479954332113266, + "eval_matthews_correlation": 0.8601944891930503, + "eval_precision": 0.9274914175703679, + "eval_recall": 0.9327189557649391, + "eval_runtime": 1.0529, + "eval_samples_per_second": 3431.362, + "eval_steps_per_second": 27.542, + "step": 2034 + }, + { + "epoch": 9.29, + "learning_rate": 2.2598870056497174e-06, + "loss": 0.0088, + "step": 2100 + }, + { + "epoch": 9.73, + "learning_rate": 8.474576271186441e-07, + "loss": 0.004, + "step": 2200 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9305286465541102, + "eval_f1": 0.930230965774696, + "eval_loss": 0.4578400254249573, + "eval_matthews_correlation": 0.8642878418232358, + "eval_precision": 0.9295883645861114, + "eval_recall": 0.9347146798558896, + "eval_runtime": 1.0531, + "eval_samples_per_second": 3430.884, + "eval_steps_per_second": 27.538, + "step": 2260 + } + ], + "logging_steps": 100, + "max_steps": 2260, + "num_train_epochs": 10, + "save_steps": 100, + "total_flos": 9803271256099200.0, + "trial_name": null, + "trial_params": null +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/training_args.bin b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e28a83f2ce6797d3a0acfb029a1739a311f2ebcf --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f39397f6cff29396a71264607ab5334185bc9975c37dd59d2b1896724d64a5d2 +size 5393 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/results/base5120_human_nontata_promoters_lr3e-5_wd0.03_wr0.06_ep10_seed42/eval_results.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/results/base5120_human_nontata_promoters_lr3e-5_wd0.03_wr0.06_ep10_seed42/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2264ab25b7b88aca388d59bbaa980e4486e0d745 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/results/base5120_human_nontata_promoters_lr3e-5_wd0.03_wr0.06_ep10_seed42/eval_results.json @@ -0,0 +1 @@ +{"eval_loss": 0.4450441300868988, "eval_accuracy": 0.9297177642501383, "eval_f1": 0.9295506837288048, "eval_matthews_correlation": 0.8625877727464153, "eval_precision": 0.9294767252581735, "eval_recall": 0.933118736087702, "eval_runtime": 1.0754, "eval_samples_per_second": 3360.593, "eval_steps_per_second": 26.967, "epoch": 10.0} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/config.json new file mode 100644 index 0000000000000000000000000000000000000000..45e4c6c10a6211acf374c78e8078ab7ac74985f9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "torch_dtype": "float32", + "transformers_version": "4.35.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 4096 +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/model.safetensors b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7640ee1fc3e037cc90a45656fe0f4c61283b155f --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16cb3885308f4f203d15b8840a6261817743ba63ff5c4f70a1e7fbab89891b97 +size 356777880 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/optimizer.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f4b1cf57e6bc46675bd890ceba626c3ccaeed7a --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4ae511c6d76954c0b2b4accd8adf71c0ac03a393037f072c7975028fa7cd0f +size 713677451 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/rng_state.pth b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..271698ebb81246742521698898d87192c446bbab --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b50f32e04470e65dd9dd07d5d4a7a178819433db2a005e51da0f1b1484c4aaef +size 14645 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/scheduler.pt b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..739ce0899cc86cdd8ebd4d86ba71125919aebd3f --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4a500d1fb823c32f90422519a66e773684b10f5ac6a0dbca9e4ccc7773a49c +size 1465 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/special_tokens_map.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8b3208c2884c4efb86e49300fdd3dc877220cdf --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "unk_token": "[UNK]" +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/tokenizer.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c390760bdfa97b696a762628a15dd3bf7932038a --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/tokenizer.json @@ -0,0 +1,8340 @@ +{ + "version": "1.0", + "truncation": { + "direction": "Right", + "max_length": 100, + "strategy": "LongestFirst", + "stride": 0 + }, + "padding": { + "strategy": "BatchLongest", + "direction": "Right", + "pad_to_multiple_of": null, + "pad_id": 3, + "pad_type_id": 0, + "pad_token": "[PAD]" + }, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 1 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 2 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "vocab": { + "[UNK]": 0, + "[CLS]": 1, + "[SEP]": 2, + "[PAD]": 3, + "[MASK]": 4, + "A": 5, + "C": 6, + "G": 7, + "T": 8, + "AA": 9, + "TT": 10, + "TG": 11, + "CA": 12, + "CC": 13, + "TA": 14, + "GG": 15, + "TC": 16, + "GA": 17, + "AAA": 18, + "GC": 19, + "TAA": 20, + "TTTT": 21, + "TCA": 22, + "TGA": 23, + "TTA": 24, + "GAA": 25, + "TCC": 26, + "CAA": 27, + "CTG": 28, + "CTT": 29, + "GTG": 30, + "GTT": 31, + "GCA": 32, + "GGA": 33, + "CCA": 34, + "GTA": 35, + "GCC": 36, + "CTA": 37, + "TAAA": 38, + "AAAA": 39, + "CTC": 40, + "GTC": 41, + "TGTG": 42, + "TATT": 43, + "CACA": 44, + "GAAA": 45, + "TATA": 46, + "TCTT": 47, + "TGTT": 48, + "CAAA": 49, + "GAGA": 50, + "CATT": 51, + "TGAA": 52, + "CAGG": 53, + "TCTG": 54, + "CAGA": 55, + "TCAA": 56, + "GGAA": 57, + "TAAAA": 58, + "CTGA": 59, + "GCTT": 60, + "GTGA": 61, + "GCTG": 62, + "CTCA": 63, + "CCTT": 64, + "CATG": 65, + "GCAA": 66, + "GTCA": 67, + "GTAA": 68, + "TTTTA": 69, + "TATG": 70, + "GAGG": 71, + "CGG": 72, + "GATT": 73, + "CCTG": 74, + "TCTC": 75, + "CCAA": 76, + "GTTA": 77, + "CTCC": 78, + "CTAA": 79, + "TACA": 80, + "CTTA": 81, + "TCCA": 82, + "GATG": 83, + "TTAA": 84, + "GAAAA": 85, + "TTTG": 86, + "GTTTT": 87, + "TCTA": 88, + "GCCA": 89, + "GTCC": 90, + "CTTTT": 91, + "GGGG": 92, + "CGA": 93, + "TTTA": 94, + "CCCA": 95, + "CAAAA": 96, + "TGGG": 97, + "TAGA": 98, + "TAGG": 99, + "GACA": 100, + "GGTT": 101, + "CCCC": 102, + "GGTG": 103, + "CATA": 104, + "GCTA": 105, + "TGTA": 106, + "TCAAA": 107, + "TGGA": 108, + "TAATT": 109, + "TTATT": 110, + "TGCA": 111, + "GGCA": 112, + "GATA": 113, + "CCTA": 114, + "TTCA": 115, + "TCTCA": 116, + "GGGA": 117, + "CGC": 118, + "CTGAA": 119, + "GTAAA": 120, + "TCTCC": 121, + "TTTTTT": 122, + "CGTG": 123, + "GCAAA": 124, + "TAAAAA": 125, + "TCTGA": 126, + "TCATT": 127, + "GGAAA": 128, + "TGAAA": 129, + "TCCTT": 130, + "CCAAA": 131, + "GAATT": 132, + "CTAAA": 133, + "CGTT": 134, + "GTGAA": 135, + "GGCC": 136, + "TAATA": 137, + "GGTA": 138, + "TGCC": 139, + "CACC": 140, + "TGATT": 141, + "AAAAAA": 142, + "GCTCA": 143, + "TCCAA": 144, + "GAGAA": 145, + "CTGTT": 146, + "TATTA": 147, + "CAGCA": 148, + "CTCTT": 149, + "CTTAA": 150, + "CAGAA": 151, + "GCTGA": 152, + "GTTAA": 153, + "TCTTA": 154, + "TATTTT": 155, + "GCCAA": 156, + "CTTTG": 157, + "GACC": 158, + "CGCA": 159, + "GTATT": 160, + "GTCTT": 161, + "CAATT": 162, + "GTGTT": 163, + "CTCAA": 164, + "GGAGG": 165, + "CGAA": 166, + "TCTTTT": 167, + "GTCAA": 168, + "CGCC": 169, + "TATAA": 170, + "TACC": 171, + "TCTAA": 172, + "CCATT": 173, + "CGGA": 174, + "CAAAAA": 175, + "CAGTG": 176, + "TCCTG": 177, + "CTCTG": 178, + "GAAAAA": 179, + "CTGTG": 180, + "CAGC": 181, + "TTTTAA": 182, + "GCATT": 183, + "GCCTT": 184, + "TAATG": 185, + "CTATT": 186, + "GTTTG": 187, + "TGATG": 188, + "GGCTG": 189, + "CCTCA": 190, + "GAGGA": 191, + "GCCTG": 192, + "AAATT": 193, + "CGTA": 194, + "TCAAAA": 195, + "TACAA": 196, + "CATCA": 197, + "CAGTT": 198, + "TGAGA": 199, + "GGGAA": 200, + "CACTG": 201, + "CACAA": 202, + "CAGGA": 203, + "CCCCA": 204, + "CCCTG": 205, + "TTTTTTTT": 206, + "TAGAA": 207, + "GAGCA": 208, + "CCTCC": 209, + "CACCA": 210, + "TATCA": 211, + "GAGC": 212, + "CATTA": 213, + "CACACACA": 214, + "GAGTG": 215, + "GGATT": 216, + "TGTGTGTG": 217, + "TACTT": 218, + "CACTT": 219, + "GTCTG": 220, + "TGAGG": 221, + "GAGTT": 222, + "GAATG": 223, + "TCATG": 224, + "GACAA": 225, + "GACTT": 226, + "TATTAA": 227, + "TAATAA": 228, + "GGCCA": 229, + "CATTTT": 230, + "CAGCC": 231, + "CCCTT": 232, + "GCTAA": 233, + "TATATATA": 234, + "GTGTG": 235, + "TACTG": 236, + "TAGTT": 237, + "CAATG": 238, + "GCTC": 239, + "CAGTA": 240, + "GCTCC": 241, + "CATAA": 242, + "TTATG": 243, + "TAAATT": 244, + "GATGA": 245, + "CATGA": 246, + "GCGG": 247, + "AAAAAAAA": 248, + "CCATG": 249, + "GATAA": 250, + "GACTG": 251, + "TATGA": 252, + "GCAGG": 253, + "GATCA": 254, + "GTTTTA": 255, + "GGATG": 256, + "CCTGA": 257, + "GTAAAA": 258, + "GAAGG": 259, + "GATTA": 260, + "CCTC": 261, + "GACCA": 262, + "GCTTA": 263, + "CCCAA": 264, + "AAATG": 265, + "GCATG": 266, + "TAGTA": 267, + "TACCA": 268, + "GGCTT": 269, + "CGTC": 270, + "TCTCTT": 271, + "GGTCA": 272, + "TTATTA": 273, + "TACTA": 274, + "TAGCA": 275, + "TATC": 276, + "CTGGG": 277, + "CATC": 278, + "CTTTTA": 279, + "CTAAAA": 280, + "GTGGG": 281, + "GAGTA": 282, + "CCAGG": 283, + "GATTTT": 284, + "TAGTG": 285, + "GAAATT": 286, + "CACTA": 287, + "TCGG": 288, + "TCAGG": 289, + "CAGGAA": 290, + "GCAAAA": 291, + "CCTTA": 292, + "CATCC": 293, + "CTTGG": 294, + "TGTGAA": 295, + "TATTTG": 296, + "CCTAA": 297, + "CTATG": 298, + "GAGAAA": 299, + "GAGAGAGA": 300, + "GCTTTT": 301, + "TATAAA": 302, + "CAAGG": 303, + "TCTCTG": 304, + "TGTTAA": 305, + "TGTGTT": 306, + "GAGCC": 307, + "GACTA": 308, + "TATATT": 309, + "TAAAAAA": 310, + "TTTTTG": 311, + "GTATG": 312, + "CATTAA": 313, + "TAGGA": 314, + "TAGC": 315, + "GTTGG": 316, + "GAAGAA": 317, + "TAAATG": 318, + "TCTGTT": 319, + "CAGAAA": 320, + "CAAATT": 321, + "TAATTA": 322, + "TCTGTG": 323, + "TATCC": 324, + "TGAATT": 325, + "CTCCA": 326, + "GTGAAA": 327, + "GGCAA": 328, + "GGAGA": 329, + "GAAGA": 330, + "GGTGA": 331, + "GGGCA": 332, + "CCAAAA": 333, + "TCTCTCTC": 334, + "CTGCA": 335, + "CTTCTT": 336, + "TCTTAA": 337, + "CCCTA": 338, + "TGTGTG": 339, + "AAATA": 340, + "TGTTTG": 341, + "GGGTT": 342, + "GTGCTG": 343, + "GGAAAA": 344, + "GGGGA": 345, + "TCAGA": 346, + "CCTTTT": 347, + "GAAATG": 348, + "GCAGCA": 349, + "TCTGAA": 350, + "GGGTG": 351, + "CACATT": 352, + "TCTTTG": 353, + "GGGC": 354, + "TCCCA": 355, + "TCCATT": 356, + "CTGAAA": 357, + "CTTTA": 358, + "TCGA": 359, + "GTTTA": 360, + "CAACAA": 361, + "CTTCC": 362, + "GCCTCC": 363, + "TTAAA": 364, + "GCTCTG": 365, + "GTTTCA": 366, + "GGAGGA": 367, + "CGTGA": 368, + "CAGTC": 369, + "GAATA": 370, + "CAGAGA": 371, + "CCCTC": 372, + "CAAATG": 373, + "CTGCTG": 374, + "GATCC": 375, + "TTTTATT": 376, + "AAAATT": 377, + "TTATA": 378, + "TCAATT": 379, + "GGTAA": 380, + "GTTATT": 381, + "GCCAGG": 382, + "GGAGAA": 383, + "CATTTG": 384, + "TCACC": 385, + "CTCAAA": 386, + "GGTTA": 387, + "TCCAAA": 388, + "TCTATT": 389, + "GCAGA": 390, + "CTTCA": 391, + "TCATCA": 392, + "CGAGG": 393, + "TAACA": 394, + "GTTGTT": 395, + "CTTATT": 396, + "CGTCA": 397, + "TAAGA": 398, + "TAATTTT": 399, + "CTGTA": 400, + "TCCACA": 401, + "GCTGTG": 402, + "CGCTG": 403, + "TCTAAA": 404, + "GCGA": 405, + "CAATA": 406, + "CCACCA": 407, + "GAACA": 408, + "CGAAA": 409, + "CAGATT": 410, + "TCACA": 411, + "TTATTTT": 412, + "TCTCAA": 413, + "TGACA": 414, + "CTCCAA": 415, + "AAAAAAA": 416, + "TATATG": 417, + "TCCTCC": 418, + "TCACTT": 419, + "TCCAGG": 420, + "CAAGA": 421, + "GGCTA": 422, + "GTGGTG": 423, + "CGTAA": 424, + "CGAGA": 425, + "TGATA": 426, + "GGATTA": 427, + "CAACA": 428, + "CGATT": 429, + "TGAGAA": 430, + "CTCCTT": 431, + "CTCATT": 432, + "GTTAAA": 433, + "TCATA": 434, + "CCTCTG": 435, + "CTCTA": 436, + "GCTGAA": 437, + "CTGGA": 438, + "TAAGG": 439, + "CTTAAA": 440, + "TATTTA": 441, + "CCACA": 442, + "CCGG": 443, + "GTCAAA": 444, + "TGGAA": 445, + "CGGAA": 446, + "TGATGA": 447, + "GTTCA": 448, + "TAACAA": 449, + "GCTGTT": 450, + "TAAGAA": 451, + "CTGCC": 452, + "TTAATT": 453, + "CCAGA": 454, + "TCAGAA": 455, + "GTCATT": 456, + "CGCTT": 457, + "GATTAA": 458, + "CTGATT": 459, + "GCCACA": 460, + "GTAATT": 461, + "TCCAGA": 462, + "GCCAAA": 463, + "GTGATT": 464, + "TAAAATT": 465, + "CAAGAA": 466, + "CCACC": 467, + "TAATCC": 468, + "GTTCTT": 469, + "TCCATG": 470, + "GCTCTT": 471, + "TGCTG": 472, + "GGGTA": 473, + "TTACA": 474, + "GCCATT": 475, + "GCACA": 476, + "GCAATT": 477, + "TCCCTG": 478, + "TGTGA": 479, + "TCGAA": 480, + "GGACA": 481, + "GGAATT": 482, + "GTGGA": 483, + "CTTCTG": 484, + "TCCCC": 485, + "GCCCC": 486, + "CTTGA": 487, + "TAATGA": 488, + "TAAATA": 489, + "TATATA": 490, + "CTGCAA": 491, + "TCATTA": 492, + "GTATA": 493, + "TCCCCA": 494, + "CGTTA": 495, + "GCAGAA": 496, + "TGAGTT": 497, + "CTTTTTT": 498, + "CGATG": 499, + "CTTTCA": 500, + "AAAATG": 501, + "CAGGTT": 502, + "CTAATT": 503, + "CGCCA": 504, + "TGAAAAA": 505, + "GTTCC": 506, + "GTCCTT": 507, + "GTCCAA": 508, + "GTTTTTT": 509, + "CTCTGA": 510, + "GCGC": 511, + "GTTGA": 512, + "TGAATG": 513, + "CTATA": 514, + "GCAGTG": 515, + "CCTTAA": 516, + "TCACCA": 517, + "TCACTG": 518, + "GCCCTG": 519, + "TAACTT": 520, + "CAGATG": 521, + "GTAGG": 522, + "TCTATA": 523, + "GAGATT": 524, + "GTCTA": 525, + "TTTTAAA": 526, + "CACATG": 527, + "TGACC": 528, + "CACAAA": 529, + "GTGTA": 530, + "GGGAGG": 531, + "GCTTTG": 532, + "CAAAAAA": 533, + "GAGGAA": 534, + "GTTCTG": 535, + "TTTTTA": 536, + "GTCTCA": 537, + "GTTCAA": 538, + "TCGTG": 539, + "GCTTAA": 540, + "GCACC": 541, + "CTCCTG": 542, + "TAAATAAA": 543, + "CTACA": 544, + "CTTCCA": 545, + "TCCTCA": 546, + "CGCAA": 547, + "GAAAAAA": 548, + "GCCCA": 549, + "TCGTT": 550, + "GTAGA": 551, + "CTCTCA": 552, + "GTCCA": 553, + "TGACTT": 554, + "TCCCTT": 555, + "GCCATG": 556, + "CACACACACACACACA": 557, + "GTGATG": 558, + "CCTCTT": 559, + "GCCAGA": 560, + "TCCTA": 561, + "CGTTTT": 562, + "GTACA": 563, + "GCATA": 564, + "GAATTA": 565, + "TGTGTGTGTGTGTGTG": 566, + "CCCAGG": 567, + "GGTTTT": 568, + "TCAAAAA": 569, + "TCTATG": 570, + "CCATA": 571, + "TGACAA": 572, + "GGATA": 573, + "TCAGTG": 574, + "GTATTTT": 575, + "GAGATG": 576, + "GCGTG": 577, + "CGTCC": 578, + "TTAAAAA": 579, + "TAATCA": 580, + "CAATTA": 581, + "CCACTG": 582, + "CGGTT": 583, + "GTTGAA": 584, + "TGATTA": 585, + "CCTTTG": 586, + "CGGTG": 587, + "CAGGTG": 588, + "TCAATG": 589, + "CTGATG": 590, + "TCAGGA": 591, + "GTTTAA": 592, + "TATTAAA": 593, + "CTCTTA": 594, + "GCAGGA": 595, + "CTCTCC": 596, + "GAACC": 597, + "CTTTAA": 598, + "GGGCC": 599, + "GTATTA": 600, + "GCGCC": 601, + "CCAATT": 602, + "GCTAAA": 603, + "TGACTG": 604, + "GATTTG": 605, + "GATAAA": 606, + "TCAGCA": 607, + "GTTCCA": 608, + "GAAATA": 609, + "GACAAA": 610, + "GAGTC": 611, + "GCTATT": 612, + "TCACAA": 613, + "GAGGTT": 614, + "TAACC": 615, + "GAAGGA": 616, + "GCTCAA": 617, + "GAAAATT": 618, + "CCAGCA": 619, + "GTTTTAA": 620, + "GTGCC": 621, + "TGAGGA": 622, + "CATAAA": 623, + "GGTCC": 624, + "TCATTTT": 625, + "TATTTATT": 626, + "TAATAAA": 627, + "GCCTA": 628, + "CTTTTAA": 629, + "TAAGTG": 630, + "TAAGTA": 631, + "CTGGAA": 632, + "CACACA": 633, + "GACAGA": 634, + "CAACC": 635, + "GGGAAA": 636, + "CCAGAA": 637, + "TCAGTT": 638, + "TAACTA": 639, + "CTAAAAA": 640, + "TGGGTT": 641, + "TGAGTG": 642, + "TAAAATG": 643, + "TATATATATATATATA": 644, + "GCACTG": 645, + "GACTC": 646, + "TACAAA": 647, + "TAAAAAAA": 648, + "TCTACA": 649, + "GTTGTG": 650, + "TCGCC": 651, + "CCCAAA": 652, + "GTCATG": 653, + "CTGCTT": 654, + "GGAATG": 655, + "CTATTA": 656, + "GATATT": 657, + "TAGAAA": 658, + "GGCAGG": 659, + "GATGAA": 660, + "GTAGAA": 661, + "TCCTGA": 662, + "TAACTG": 663, + "GCTGGG": 664, + "GCAATG": 665, + "GCCCCA": 666, + "GTTTGA": 667, + "CATTTA": 668, + "GTGCA": 669, + "CTTGAA": 670, + "GTGGAA": 671, + "CTTCAA": 672, + "TAAATTA": 673, + "GTGGCA": 674, + "TCCTTA": 675, + "GGAAAAA": 676, + "TTTTTTA": 677, + "CCTGTG": 678, + "GTAATG": 679, + "GTGTTA": 680, + "CTAGG": 681, + "CAGGCTG": 682, + "GACACA": 683, + "GAAAAAAA": 684, + "TCGC": 685, + "GTAAAAA": 686, + "TGTTTA": 687, + "TCTCTA": 688, + "GTCCTG": 689, + "CCAGGA": 690, + "GAACAA": 691, + "TAAGTT": 692, + "TGAGCA": 693, + "GCTCCA": 694, + "TAAGCA": 695, + "CTCATG": 696, + "GTCTTA": 697, + "CCCACA": 698, + "CATATT": 699, + "GCCTCA": 700, + "CACTC": 701, + "CTTCTA": 702, + "TGATTTT": 703, + "TCGCA": 704, + "CCTGTT": 705, + "GAAGCA": 706, + "GCAAAAA": 707, + "GCGGA": 708, + "CCACAA": 709, + "GCGCA": 710, + "CATATA": 711, + "GACATT": 712, + "GTTCTA": 713, + "CAAAATT": 714, + "GAAAGAAA": 715, + "CCCGG": 716, + "TACACA": 717, + "CCAAAAA": 718, + "GAGGTG": 719, + "GGCTCA": 720, + "CAGTGA": 721, + "TCCCAA": 722, + "TATCTT": 723, + "TGAGTA": 724, + "TCGTA": 725, + "TTTTCTT": 726, + "GTGGGA": 727, + "GAGCTG": 728, + "CCCTCC": 729, + "TAGGTT": 730, + "TTAGG": 731, + "TAATATT": 732, + "CCAGCC": 733, + "CATCTT": 734, + "GTCTGA": 735, + "GTTTCC": 736, + "CCTGAA": 737, + "GGAGCA": 738, + "GAAAATG": 739, + "TCAGTA": 740, + "TAACCA": 741, + "GATGTT": 742, + "CTGTTA": 743, + "CATGTT": 744, + "GGCGG": 745, + "CATGTG": 746, + "GGGAGA": 747, + "CTTTGA": 748, + "TCTTTCTT": 749, + "AAAAAAAAA": 750, + "GGGGTG": 751, + "CTTTCC": 752, + "CTTGTT": 753, + "GCATTA": 754, + "CCCAGA": 755, + "CAAATA": 756, + "TCGGA": 757, + "CAGCTT": 758, + "TCACTA": 759, + "TAATTAA": 760, + "TAAGGA": 761, + "GAACTG": 762, + "GCACAA": 763, + "GCGTT": 764, + "GGCTC": 765, + "TCTTTTA": 766, + "CCTCCA": 767, + "GGCAAA": 768, + "CAGCTG": 769, + "CTACAA": 770, + "TACATT": 771, + "GCTATG": 772, + "CTTGTG": 773, + "GAGTCA": 774, + "GTTATG": 775, + "CTGCCA": 776, + "GTCTCC": 777, + "TGACCA": 778, + "CACCTG": 779, + "TATATTA": 780, + "TGATCA": 781, + "CAGCAA": 782, + "GATGTG": 783, + "GTCTTTT": 784, + "CTAGAA": 785, + "GCTACA": 786, + "CTGGGA": 787, + "GGGGTT": 788, + "CAAGTA": 789, + "CAAGGA": 790, + "CCCTCA": 791, + "TAGCC": 792, + "GTTGGA": 793, + "GCTATA": 794, + "TCTGAAA": 795, + "TATGTT": 796, + "CCCCTT": 797, + "GTTGTA": 798, + "CCCTGA": 799, + "TGACTA": 800, + "CAAGCA": 801, + "CAATAA": 802, + "GAACTT": 803, + "CATGAA": 804, + "CTTATG": 805, + "CTAATG": 806, + "TCTAAAA": 807, + "CCAATG": 808, + "GAAGTG": 809, + "CCTCAA": 810, + "CCCATT": 811, + "CAGTCA": 812, + "GAGAGAGAGAGAGAGA": 813, + "TATGTG": 814, + "GCAGTGA": 815, + "TCTCCTT": 816, + "TCCCAAA": 817, + "CCATTA": 818, + "CCAGTG": 819, + "GCATCA": 820, + "TCAAATT": 821, + "GATCTT": 822, + "GACAGG": 823, + "GGAGTG": 824, + "GTAGTA": 825, + "CAACTT": 826, + "GAAGTT": 827, + "CCCCTG": 828, + "TCTCAAA": 829, + "GGGTC": 830, + "GAGCTT": 831, + "TATGAAA": 832, + "TATGAA": 833, + "GACATG": 834, + "CAAGTG": 835, + "GATATA": 836, + "CATCTG": 837, + "CTGTGA": 838, + "TAATTTA": 839, + "GGCAGA": 840, + "GCGAA": 841, + "CCTAAA": 842, + "CCATCA": 843, + "CACTGA": 844, + "GGACTA": 845, + "GACGG": 846, + "CTCTTTT": 847, + "CTGTCA": 848, + "TCTCTCTCTCTCTCTC": 849, + "TTAATG": 850, + "GCAGCC": 851, + "CAAAAAAA": 852, + "GCACCA": 853, + "CTATTTT": 854, + "GAGCAA": 855, + "CTTGGA": 856, + "CTGGTG": 857, + "GAATAA": 858, + "TCCTTTT": 859, + "GAAGTA": 860, + "CAGTAA": 861, + "CAACCA": 862, + "CTGTAA": 863, + "TGATAA": 864, + "GCAGTT": 865, + "CACGG": 866, + "TAAATAA": 867, + "CTGTTTT": 868, + "CTACTA": 869, + "GCTCTA": 870, + "CGAAAA": 871, + "CAAGTT": 872, + "CTTGTA": 873, + "GAATGA": 874, + "GAGTGA": 875, + "GCCTGA": 876, + "GGTTTG": 877, + "CCCATG": 878, + "GGGGAA": 879, + "GAAGAAA": 880, + "TGTTA": 881, + "CAATTTT": 882, + "TATATTTT": 883, + "CTCAAAA": 884, + "GGTGGG": 885, + "CCGTG": 886, + "TATTTCA": 887, + "CCCCAA": 888, + "TATTTAA": 889, + "GGCTGA": 890, + "GGTGTG": 891, + "CATCAA": 892, + "CACTCA": 893, + "TCTCATT": 894, + "GAATTTT": 895, + "GAATCA": 896, + "CAGGAAA": 897, + "CATACA": 898, + "TATTTTA": 899, + "TTATAA": 900, + "GAGGAAA": 901, + "CATATG": 902, + "CTTTCTT": 903, + "CAACTG": 904, + "GGGCTG": 905, + "CCCCCA": 906, + "TTTGAAA": 907, + "CATTAAA": 908, + "CTTAAAA": 909, + "GACTGA": 910, + "CAATGA": 911, + "GGCACA": 912, + "CCAGTA": 913, + "GGATGA": 914, + "GTTTTTG": 915, + "GCATTTT": 916, + "GTGCCA": 917, + "GCAGTA": 918, + "GCCCTT": 919, + "TCGTC": 920, + "GAACTA": 921, + "GTGGTT": 922, + "GTGTGA": 923, + "GTGCTT": 924, + "CGCTA": 925, + "GTGTCA": 926, + "TCTTTA": 927, + "GCCTTA": 928, + "CCTATT": 929, + "CAAAATG": 930, + "GAACCA": 931, + "CTCCAGG": 932, + "GACTCA": 933, + "CATGAAA": 934, + "GCTAGG": 935, + "TGTTAAA": 936, + "GCGTA": 937, + "GCACTT": 938, + "TCTTAAA": 939, + "TAAGAAA": 940, + "GGCCTG": 941, + "TCCCTA": 942, + "GTGGTA": 943, + "CTGCTA": 944, + "GGAGTT": 945, + "GGTAAA": 946, + "CAAACAAA": 947, + "GATATG": 948, + "TCATGA": 949, + "GACCTT": 950, + "TAATATA": 951, + "GCTAGA": 952, + "GGACTG": 953, + "GGCATT": 954, + "CAGTTA": 955, + "CCCTAA": 956, + "CACCTT": 957, + "GGTGAA": 958, + "CAGCTA": 959, + "GTGTTTT": 960, + "CAACTA": 961, + "GATCAA": 962, + "GAGAAAA": 963, + "TGTGAAA": 964, + "AAAATA": 965, + "GATGAAA": 966, + "CTCTAA": 967, + "TTACTT": 968, + "GATCTG": 969, + "CCACTT": 970, + "GAGTTA": 971, + "CAATCA": 972, + "GGATTACAGG": 973, + "TTTATTTT": 974, + "TACATA": 975, + "TTTTATG": 976, + "GAGTAA": 977, + "GCTGAAA": 978, + "GTACTG": 979, + "GCTCTC": 980, + "TATGTA": 981, + "TGTGTA": 982, + "TCATAA": 983, + "GGACTT": 984, + "TCTCCAA": 985, + "GCATGA": 986, + "GACGA": 987, + "CGCCTG": 988, + "GACCTG": 989, + "GGTCTT": 990, + "CACCAA": 991, + "GATC": 992, + "GACCAA": 993, + "AAAATTA": 994, + "GTAAATT": 995, + "CCAGTT": 996, + "CAGAAAA": 997, + "TAACAAA": 998, + "GGTGTT": 999, + "GAAATTA": 1000, + "TGCCTCA": 1001, + "CCGCC": 1002, + "CCATTTT": 1003, + "CTTGCC": 1004, + "TCTGTA": 1005, + "CTGGCA": 1006, + "GGGATG": 1007, + "CCATGA": 1008, + "CTACTT": 1009, + "TAGGTG": 1010, + "TAAAAATT": 1011, + "GAAAGAA": 1012, + "TAAAATA": 1013, + "CTTTTTG": 1014, + "GTCAAAA": 1015, + "GGACAA": 1016, + "TCTGATT": 1017, + "CTCTCTT": 1018, + "TAATTTG": 1019, + "CTCTTTG": 1020, + "GGCCTT": 1021, + "GGATTTT": 1022, + "CTACTG": 1023, + "GTTGCA": 1024, + "GGCTCC": 1025, + "CTCTGTG": 1026, + "CTCCAGCC": 1027, + "TTACAA": 1028, + "GGACCA": 1029, + "GGAAGGAA": 1030, + "TAAAGAA": 1031, + "TTAGAA": 1032, + "GTGAAAA": 1033, + "CTTGCA": 1034, + "TGGGTG": 1035, + "GGAGCC": 1036, + "CCTCTA": 1037, + "CT": 1038, + "GGGCTT": 1039, + "GGCATG": 1040, + "CTGGTT": 1041, + "TACAGA": 1042, + "GATTAAA": 1043, + "CTCTGTT": 1044, + "TTATCA": 1045, + "CTGAAAA": 1046, + "GTAGTT": 1047, + "GGGTCA": 1048, + "GT": 1049, + "CAGCCA": 1050, + "GCGTC": 1051, + "CACTTA": 1052, + "GTGCTA": 1053, + "TCTTATT": 1054, + "GTACTT": 1055, + "GGTATT": 1056, + "TAGAGA": 1057, + "TACATG": 1058, + "CCACTA": 1059, + "TGAGAAA": 1060, + "CAATAAA": 1061, + "TCCAAAA": 1062, + "CGTGAA": 1063, + "GGTCTG": 1064, + "CTGAATT": 1065, + "TCAGCC": 1066, + "CCTCTC": 1067, + "GTTAAAA": 1068, + "GGGATT": 1069, + "TCCTAA": 1070, + "CACTAA": 1071, + "GGAGAAA": 1072, + "CCTTCCTT": 1073, + "GTTTCTT": 1074, + "TATCAA": 1075, + "GATACA": 1076, + "TAATCCCAGCA": 1077, + "CCGCA": 1078, + "TGAAATT": 1079, + "CGTAAA": 1080, + "CTCTCTG": 1081, + "TCTTTTTT": 1082, + "GTACAA": 1083, + "CCAAATT": 1084, + "TGTATTTT": 1085, + "TCGCTT": 1086, + "GGGTGA": 1087, + "GATAGA": 1088, + "CTTTATT": 1089, + "TAAACAA": 1090, + "GTTTATT": 1091, + "TGAATA": 1092, + "CTACCA": 1093, + "GTGTCC": 1094, + "CCCGA": 1095, + "TTTATTA": 1096, + "CTCCAAA": 1097, + "TTTTTTTTTTTT": 1098, + "TCATCC": 1099, + "GAAGCC": 1100, + "CTAAATT": 1101, + "CAAATTA": 1102, + "CCCCAAA": 1103, + "TCTTCTT": 1104, + "TAGGAAA": 1105, + "CACGA": 1106, + "CATTTTA": 1107, + "GTGCAA": 1108, + "TCTCCTG": 1109, + "TATTTTAA": 1110, + "GTTTGTT": 1111, + "GAGCCA": 1112, + "GGCCAA": 1113, + "CATTTCA": 1114, + "CATCCA": 1115, + "CCTATA": 1116, + "GACTTA": 1117, + "TCAAATG": 1118, + "GTATCA": 1119, + "TAAATTTT": 1120, + "CTGAGGCA": 1121, + "GCCCAA": 1122, + "GGTTAA": 1123, + "TATCTG": 1124, + "TGACAGA": 1125, + "GGAGAGA": 1126, + "GCTGCTG": 1127, + "CCCTTA": 1128, + "TCCTCTG": 1129, + "GTAGCA": 1130, + "CCTGAAA": 1131, + "CCGAA": 1132, + "TTTTTAA": 1133, + "CTATAA": 1134, + "CCTGTA": 1135, + "TTACTG": 1136, + "GTATAA": 1137, + "GGCGA": 1138, + "GACTAA": 1139, + "TCAGAAA": 1140, + "GTGTGTG": 1141, + "CAAAGAA": 1142, + "CCTATG": 1143, + "GCAGAGA": 1144, + "CCGTT": 1145, + "TTTTATTTT": 1146, + "GGAAGAA": 1147, + "TTACTA": 1148, + "GCCTGGG": 1149, + "TCCCTC": 1150, + "TCCTCTT": 1151, + "GGATCA": 1152, + "GGTCAA": 1153, + "TCGAGA": 1154, + "TATTCTT": 1155, + "TACTC": 1156, + "GTTAATT": 1157, + "GCGAGA": 1158, + "CTTAATT": 1159, + "TCCTTTG": 1160, + "GTCTAA": 1161, + "CACCCA": 1162, + "GGGTTA": 1163, + "GGGCAA": 1164, + "GGAAATG": 1165, + "GCAAATT": 1166, + "TAGATG": 1167, + "GCAGAAA": 1168, + "AAAAAAAAAAAAAAAA": 1169, + "CCTACA": 1170, + "GGAGTA": 1171, + "TCTAATT": 1172, + "CAACAAA": 1173, + "TAGATT": 1174, + "GGTTTA": 1175, + "CCTAGA": 1176, + "CTTTAAA": 1177, + "TACTTA": 1178, + "TAATGAA": 1179, + "CTATCA": 1180, + "TAGTAA": 1181, + "CAGAGAA": 1182, + "CAAGAAA": 1183, + "GGGGAAA": 1184, + "CGTTAA": 1185, + "CGTGTT": 1186, + "TCTGTCTG": 1187, + "TTTTAATT": 1188, + "CTGGCC": 1189, + "TAAATGA": 1190, + "CGTCAA": 1191, + "TTAGTA": 1192, + "GTCTCTG": 1193, + "TTTTAAAA": 1194, + "CAGTTTT": 1195, + "CTTCCTT": 1196, + "TATATAA": 1197, + "GCTTTTA": 1198, + "TTTTTCA": 1199, + "GGTC": 1200, + "TTATTAA": 1201, + "TTTTGTT": 1202, + "CATAGA": 1203, + "TAGGAA": 1204, + "GAGAGAA": 1205, + "GTAGCTG": 1206, + "TTATGA": 1207, + "GTAGTG": 1208, + "GGAGAGG": 1209, + "CTCTGAA": 1210, + "TAGTC": 1211, + "GACTCC": 1212, + "TCCCTCC": 1213, + "TAATGTT": 1214, + "CATCTA": 1215, + "GCCACCA": 1216, + "GTACTA": 1217, + "TGGGAAA": 1218, + "CGCCTT": 1219, + "GCCCGG": 1220, + "GGAGGAA": 1221, + "GTACCA": 1222, + "CGCAAA": 1223, + "CATAAAA": 1224, + "TAACATT": 1225, + "GCTAAAA": 1226, + "TCTTCTG": 1227, + "GCCAAAA": 1228, + "GTATGA": 1229, + "GTCTTTG": 1230, + "TACTGA": 1231, + "TCCCAGG": 1232, + "TTATTTA": 1233, + "TTAGTT": 1234, + "GGACC": 1235, + "TATAAAA": 1236, + "CAAACAA": 1237, + "CTTCTC": 1238, + "TCTATCTA": 1239, + "GAAATAA": 1240, + "GTGTAA": 1241, + "CTTTGTT": 1242, + "GATAAAA": 1243, + "GCCCAGG": 1244, + "GCGATT": 1245, + "AAAAAATT": 1246, + "TACAGG": 1247, + "GGCTAA": 1248, + "TAGCTT": 1249, + "GTCTCTA": 1250, + "CTCCTGA": 1251, + "GAATAAA": 1252, + "TTACCA": 1253, + "GGGACA": 1254, + "GCCACTG": 1255, + "GTTTAAA": 1256, + "GTCTGTG": 1257, + "TGACAAA": 1258, + "TACATTTT": 1259, + "GCCACC": 1260, + "TGTTTT": 1261, + "TAGCAA": 1262, + "TTATAAA": 1263, + "GACCCA": 1264, + "GCAGC": 1265, + "CAGACAGA": 1266, + "CACAAAA": 1267, + "GCCCTA": 1268, + "TATTAAAA": 1269, + "CGTATT": 1270, + "CCATCC": 1271, + "TCGATT": 1272, + "GAAGGAA": 1273, + "GATCCA": 1274, + "TATTTGA": 1275, + "GTGAATT": 1276, + "TACCTT": 1277, + "CGTCTT": 1278, + "CCTAGG": 1279, + "TCGAAA": 1280, + "CTTTCTG": 1281, + "TGAAGAA": 1282, + "TCTCTCA": 1283, + "GTCTCTT": 1284, + "GGAGGGG": 1285, + "GTCTGTT": 1286, + "CTATGA": 1287, + "GGAAATT": 1288, + "GCACACA": 1289, + "GCCTTTT": 1290, + "CAGTCC": 1291, + "CTGGTA": 1292, + "GCATCC": 1293, + "TAGTTA": 1294, + "GGCTTA": 1295, + "GAGTCC": 1296, + "TGAAAA": 1297, + "TAGATAGA": 1298, + "TGTTTGTT": 1299, + "TACTCA": 1300, + "CATTTAA": 1301, + "GATTTTA": 1302, + "CACTCC": 1303, + "GAAACAA": 1304, + "GCGCTG": 1305, + "TCTTTCA": 1306, + "CTGTCC": 1307, + "GAACTCA": 1308, + "CGGAAA": 1309, + "TATTGTT": 1310, + "GCACTA": 1311, + "TATTCAA": 1312, + "GCGGGG": 1313, + "GTGGCC": 1314, + "TAATTAAA": 1315, + "TACTAA": 1316, + "GCGGTG": 1317, + "TACCAA": 1318, + "GGTATA": 1319, + "CTAGTT": 1320, + "GCAGAGG": 1321, + "CTTTTTTTT": 1322, + "TTTTTTTTTTTTTTTT": 1323, + "TACAGTA": 1324, + "CCATGTT": 1325, + "TAGTGA": 1326, + "CGTGTG": 1327, + "GCTCTGA": 1328, + "CTTCCTG": 1329, + "TCGCTG": 1330, + "TAAATCA": 1331, + "TCCAATT": 1332, + "GTTTCTG": 1333, + "GAAGAGA": 1334, + "GGGTAA": 1335, + "CCATAA": 1336, + "TTATATT": 1337, + "CGAATT": 1338, + "CCGGA": 1339, + "TGAGCC": 1340, + "CCGTA": 1341, + "CAGAGGA": 1342, + "GTGTTTG": 1343, + "GACAAAA": 1344, + "TTTTTTAAA": 1345, + "GTTGCC": 1346, + "GAGTTTT": 1347, + "TCAAAAAA": 1348, + "TGTTTCA": 1349, + "TATCTA": 1350, + "TCTCTCC": 1351, + "CTCCACA": 1352, + "TAAATATT": 1353, + "TTTTCTG": 1354, + "CTCTCAA": 1355, + "CCTTAAA": 1356, + "TCTTTTAA": 1357, + "GAACAAA": 1358, + "TTAGCA": 1359, + "GCTCATG": 1360, + "TAAAGTA": 1361, + "GGATAA": 1362, + "TTATTAAA": 1363, + "CTCCATT": 1364, + "TCTCTGA": 1365, + "TTATTTG": 1366, + "CCTGTAA": 1367, + "TTATATA": 1368, + "GACTTTT": 1369, + "TGTTGTT": 1370, + "GCAAATG": 1371, + "CTTCAAA": 1372, + "GAATATT": 1373, + "GAATCC": 1374, + "CTCTTAA": 1375, + "GCATAA": 1376, + "GAATGAA": 1377, + "CTTAAAAA": 1378, + "TAAAAATG": 1379, + "TTTTAAAAA": 1380, + "CTCTGGG": 1381, + "TGATCC": 1382, + "GCTCTCA": 1383, + "CTCCAGA": 1384, + "GAGTGCAGTG": 1385, + "CAATATT": 1386, + "TAGAAAA": 1387, + "GTAAATG": 1388, + "TAGCTG": 1389, + "GCTCAAA": 1390, + "GCAGGAA": 1391, + "TACCTG": 1392, + "GGGAAAA": 1393, + "TTTTCTA": 1394, + "GGGGGGGG": 1395, + "CCGA": 1396, + "CTTTGAA": 1397, + "GGAGGTG": 1398, + "TAGTCA": 1399, + "GGCCCA": 1400, + "TGATGTT": 1401, + "CAAATAA": 1402, + "TCTTCCA": 1403, + "GCGCTT": 1404, + "GTATTTG": 1405, + "GTCTC": 1406, + "GAAATCA": 1407, + "TGATAAA": 1408, + "CATTCTT": 1409, + "TATCCA": 1410, + "GCCTCTG": 1411, + "TGAGATG": 1412, + "CGCCAA": 1413, + "GTTTTATT": 1414, + "TATATATT": 1415, + "GTAGGA": 1416, + "GACAGAA": 1417, + "CTCCAGCCTGGG": 1418, + "GCGTGA": 1419, + "GGTATG": 1420, + "GAGGGAGG": 1421, + "TCATTTG": 1422, + "CTACC": 1423, + "TACAGAA": 1424, + "GGTAGA": 1425, + "GATCTA": 1426, + "GTCCATG": 1427, + "TGAGGAA": 1428, + "TAATAAAA": 1429, + "TAAACTT": 1430, + "TCACATT": 1431, + "GGAGGCC": 1432, + "TCACAAA": 1433, + "CACTTTT": 1434, + "CGGCC": 1435, + "CAACAGA": 1436, + "GTAGAGA": 1437, + "GTTATTTT": 1438, + "CGTTTG": 1439, + "TCGTCA": 1440, + "TCTGCTG": 1441, + "CAACACA": 1442, + "GGTAGG": 1443, + "GCAGCTG": 1444, + "TAGTAGAGA": 1445, + "CAAGCC": 1446, + "GCATTTG": 1447, + "TAATATG": 1448, + "GCTTAAA": 1449, + "GCTTCTG": 1450, + "CTCTCCA": 1451, + "TCATCTT": 1452, + "CGTCTG": 1453, + "TCATTTA": 1454, + "CATAGG": 1455, + "GCTCCTT": 1456, + "TGTTCTT": 1457, + "TACATTA": 1458, + "CACAGAA": 1459, + "TAAATATA": 1460, + "TAGAGG": 1461, + "GATAGG": 1462, + "TCCTGAA": 1463, + "GGAGCTG": 1464, + "TGATATT": 1465, + "TCATTAA": 1466, + "CTTTTAAA": 1467, + "TCGTTA": 1468, + "TAAACTA": 1469, + "GTTTGAA": 1470, + "TAAAATTA": 1471, + "CACCCC": 1472, + "TCAGAGA": 1473, + "CTCCTGCCTCA": 1474, + "TGACATT": 1475, + "GTATTTA": 1476, + "CTTCATT": 1477, + "GAAACTG": 1478, + "TAACACA": 1479, + "GTTCAAA": 1480, + "GGAGATG": 1481, + "TCGGCC": 1482, + "CAGCATT": 1483, + "TCGATG": 1484, + "TATTCTA": 1485, + "CTGTGAA": 1486, + "TATTGAA": 1487, + "TTTTCCA": 1488, + "TATTTCTT": 1489, + "GGTGAAA": 1490, + "CTGAGAA": 1491, + "GCACAGA": 1492, + "GCGAGG": 1493, + "CTGTGTG": 1494, + "TGAAATG": 1495, + "TGATGAA": 1496, + "GTCCAAA": 1497, + "CTCAATT": 1498, + "TCCAGAA": 1499, + "GTATATA": 1500, + "TAAAGTT": 1501, + "TCTCAAAA": 1502, + "TCCATCA": 1503, + "GTCTGAA": 1504, + "TGAGAGA": 1505, + "TGATTTG": 1506, + "TTAGCC": 1507, + "CTCCATG": 1508, + "TCCCTGA": 1509, + "GAGCTA": 1510, + "CCCCCCCC": 1511, + "GTGGAAA": 1512, + "CTGGGAA": 1513, + "CAATGAA": 1514, + "CCACACA": 1515, + "CTTTCAA": 1516, + "CGGAGG": 1517, + "TCGTGA": 1518, + "CCAGAAA": 1519, + "GTTTTAAA": 1520, + "TGTTGAA": 1521, + "TCCTGTG": 1522, + "CTAAATG": 1523, + "TCCTTTA": 1524, + "GTCTGGG": 1525, + "TCTCTTTT": 1526, + "TACGG": 1527, + "TATTGTA": 1528, + "TTAGTG": 1529, + "TTACC": 1530, + "TAATCCCAGCACTTTG": 1531, + "TCTGGAA": 1532, + "CTTCTCA": 1533, + "CGCATT": 1534, + "TATTTAAA": 1535, + "TCACACA": 1536, + "TAATCAA": 1537, + "GCGAAA": 1538, + "GGGCCA": 1539, + "GTTCATT": 1540, + "GAGAAAAA": 1541, + "TTTTGTA": 1542, + "TACTTTT": 1543, + "TCGAGG": 1544, + "GTGAAAAA": 1545, + "CAATATA": 1546, + "TCCCATG": 1547, + "CAATTAA": 1548, + "CTGGAAA": 1549, + "CCCAGCA": 1550, + "TCCCATT": 1551, + "TCCTGTT": 1552, + "CTCTTTA": 1553, + "TCCCCTT": 1554, + "GTTTCAA": 1555, + "GTCCAGG": 1556, + "GGAAGGA": 1557, + "TAGTTTT": 1558, + "TGACCTT": 1559, + "GTGCTGGGATTACAGG": 1560, + "TATTTATA": 1561, + "TCTGCAA": 1562, + "CTGAAAAA": 1563, + "TATGTTA": 1564, + "CTTCACA": 1565, + "GCACAGG": 1566, + "CCTGCTG": 1567, + "TTTTTTAA": 1568, + "GTTATTA": 1569, + "CCCTTTT": 1570, + "TGATTTA": 1571, + "TACAAAA": 1572, + "TAAGTAA": 1573, + "TTTTTAAA": 1574, + "CATCTC": 1575, + "GTGGTGA": 1576, + "GTGGAGA": 1577, + "CTCTGCA": 1578, + "GTTAAAAA": 1579, + "TACATACA": 1580, + "CTTTGTG": 1581, + "GGACACA": 1582, + "TCTGATG": 1583, + "TATTATT": 1584, + "TCTTCTA": 1585, + "CTGTGTT": 1586, + "TCAGCTT": 1587, + "CTTTATA": 1588, + "GGCGC": 1589, + "TCCCTCA": 1590, + "GTACC": 1591, + "TGGAGAA": 1592, + "CAAAAATT": 1593, + "TCTTTAA": 1594, + "CTCTCTC": 1595, + "TGAGTGA": 1596, + "GCAGCTT": 1597, + "CGGATT": 1598, + "TACGA": 1599, + "TCTTGTT": 1600, + "TCGTAA": 1601, + "GCCTGTG": 1602, + "TATTCTG": 1603, + "GGGATA": 1604, + "GGGTCC": 1605, + "TGAGATT": 1606, + "CTTTTATT": 1607, + "TCCCACA": 1608, + "CATGGTG": 1609, + "TTAGGA": 1610, + "GAACACA": 1611, + "TCATAAA": 1612, + "CAACATT": 1613, + "GGTCCA": 1614, + "GAATTTG": 1615, + "TATTAATT": 1616, + "TCCTGGG": 1617, + "GCAGCAA": 1618, + "CTCTTCA": 1619, + "GAAGAGG": 1620, + "TCTGTCA": 1621, + "CTGAATG": 1622, + "CCACAAA": 1623, + "GTGGAGG": 1624, + "TGATTAA": 1625, + "CTCCCTCC": 1626, + "CACACACACACACACACACACACACACACACA": 1627, + "GCGATG": 1628, + "CATTCTG": 1629, + "GTAGAAA": 1630, + "TCATCAA": 1631, + "TTTTCAA": 1632, + "TATGTATG": 1633, + "CCAAATG": 1634, + "TAATTTTA": 1635, + "TAAGGAA": 1636, + "CTTGAAA": 1637, + "AAAAAAAAAAAA": 1638, + "GCTCCTG": 1639, + "GCAGATG": 1640, + "GAAAAATT": 1641, + "GACGC": 1642, + "GTGGGGG": 1643, + "GTCAATT": 1644, + "CTTGCTT": 1645, + "TGACACA": 1646, + "GTGTGTT": 1647, + "CCAGAGA": 1648, + "CCCAGCC": 1649, + "TAAAGAAA": 1650, + "GTCCATT": 1651, + "TAAATTAA": 1652, + "CCCAAAA": 1653, + "GAATTAA": 1654, + "TGAATTA": 1655, + "TTTTTTTG": 1656, + "CCAGCTT": 1657, + "CAATTTG": 1658, + "CTGTTTG": 1659, + "GTCTCAA": 1660, + "GTTTGTG": 1661, + "GGCATA": 1662, + "GGTACA": 1663, + "TGATGTG": 1664, + "GATTTCA": 1665, + "TCTGCTT": 1666, + "GTAATTA": 1667, + "TAAAAAAAA": 1668, + "GCCGCC": 1669, + "TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG": 1670, + "GCGTCA": 1671, + "GCTCATT": 1672, + "GAACCTG": 1673, + "TAAACAAA": 1674, + "GTGCTGA": 1675, + "TCAGGAA": 1676, + "TCCTCAA": 1677, + "TCTATTTT": 1678, + "TCTGTTTT": 1679, + "CAGAGCA": 1680, + "CCAGGAA": 1681, + "GTCTTTA": 1682, + "TCTTCAA": 1683, + "TCAAAATT": 1684, + "GCTTATT": 1685, + "GTTCCTT": 1686, + "CACCTA": 1687, + "TCACTGA": 1688, + "GAAGCAA": 1689, + "TAAAGA": 1690, + "TCCTTCA": 1691, + "TCTCATG": 1692, + "TCAGTGA": 1693, + "TACACAA": 1694, + "CACGTG": 1695, + "CCTAAAA": 1696, + "GCCTTTG": 1697, + "GGCTTTT": 1698, + "GTTGAAA": 1699, + "GTTCTC": 1700, + "CTAGA": 1701, + "CTACAAA": 1702, + "GCACAAA": 1703, + "TTACATT": 1704, + "GGCCCC": 1705, + "TAATGTG": 1706, + "CTGCCTT": 1707, + "TCCCAGA": 1708, + "GTGAATG": 1709, + "GGACAGG": 1710, + "GGATGTG": 1711, + "GTTTATA": 1712, + "TGACCAA": 1713, + "GTGGCTG": 1714, + "GTTCTCA": 1715, + "CTTATTTT": 1716, + "CTGGAGA": 1717, + "TTACAAA": 1718, + "GTCTTCA": 1719, + "CAAGAGA": 1720, + "CCATTTG": 1721, + "TCACAGA": 1722, + "CTAGTA": 1723, + "CATTATT": 1724, + "TTAGA": 1725, + "GCTCTCC": 1726, + "GCGCCA": 1727, + "TATGTTTT": 1728, + "TCCTCCA": 1729, + "CAGAAAAA": 1730, + "GTGGGAA": 1731, + "TAATCTT": 1732, + "TGAGTCA": 1733, + "CTGCTC": 1734, + "GTCTCCA": 1735, + "TCATGTT": 1736, + "GTTTCCA": 1737, + "TAAGCAA": 1738, + "CTAAAAATA": 1739, + "TGACTGA": 1740, + "TCGGTT": 1741, + "TTAGAAA": 1742, + "TAAGCC": 1743, + "TAAAGCA": 1744, + "CCTCTCC": 1745, + "CCTCCTT": 1746, + "TCAGATT": 1747, + "TATGAAAA": 1748, + "GCTGATG": 1749, + "CATATTTT": 1750, + "GCTCCAA": 1751, + "CGGCGG": 1752, + "CCACTGA": 1753, + "CAGCAAA": 1754, + "CTGTCTT": 1755, + "CTAGCA": 1756, + "TCGGGG": 1757, + "CACAGCA": 1758, + "GCTGATT": 1759, + "CTAGGA": 1760, + "TAACTC": 1761, + "TCATATT": 1762, + "CCTTCTT": 1763, + "CTGCAAA": 1764, + "CCCGC": 1765, + "GGTCTA": 1766, + "CCCAGGA": 1767, + "GTGTCTG": 1768, + "TAATAATAATAA": 1769, + "TCACATG": 1770, + "CAATTTA": 1771, + "TATATATATATATATATATATATATATATATA": 1772, + "CCACAGA": 1773, + "TCAATTTT": 1774, + "GTATTAA": 1775, + "GAACATT": 1776, + "TCTCTTA": 1777, + "CTATTTG": 1778, + "TCTTTCC": 1779, + "GGTTAAA": 1780, + "GCTAATT": 1781, + "CTGCTGA": 1782, + "TACCTA": 1783, + "CAGGGTT": 1784, + "TCGCCA": 1785, + "CAAAAATTA": 1786, + "CTTCTGA": 1787, + "GCATGTG": 1788, + "CTATTAA": 1789, + "GCACATG": 1790, + "CAACATG": 1791, + "TCATGAA": 1792, + "GAATGTT": 1793, + "GGGTTTT": 1794, + "CTGCCTG": 1795, + "GTCCACA": 1796, + "TAAACA": 1797, + "CTCTGGA": 1798, + "GACCCC": 1799, + "GGCAAAA": 1800, + "TCTGTTA": 1801, + "CTAGTG": 1802, + "CTATATA": 1803, + "TCAGTCA": 1804, + "TAACTAA": 1805, + "GAAGATG": 1806, + "GTCTTAA": 1807, + "CAAGGAA": 1808, + "GTAAAAAA": 1809, + "TCCCCTG": 1810, + "TCGCAA": 1811, + "TCTGCCTG": 1812, + "CCTTTTA": 1813, + "GTCCCAGCTA": 1814, + "TATATATG": 1815, + "TATTGTG": 1816, + "TGTGTTTT": 1817, + "GCGCAA": 1818, + "CACAGTG": 1819, + "TAAGATT": 1820, + "CTCTGTA": 1821, + "GGAGGCTGA": 1822, + "GGACAAA": 1823, + "TATTAAAAA": 1824, + "TCGTCC": 1825, + "TCGGAA": 1826, + "CTATAAA": 1827, + "CTTCAGA": 1828, + "CTAGAAA": 1829, + "CATTCAA": 1830, + "CACGCA": 1831, + "CAGGATT": 1832, + "CCATCTT": 1833, + "GTAGCC": 1834, + "GAATTTA": 1835, + "CACGC": 1836, + "CAATCC": 1837, + "TGAGCAA": 1838, + "GAAGCTG": 1839, + "TCAATTA": 1840, + "GAAGTCA": 1841, + "CTGCACA": 1842, + "CCACGG": 1843, + "GGATCTT": 1844, + "CTCCTGCCTCAGCCTCC": 1845, + "TAAATGAA": 1846, + "CCGTC": 1847, + "TCGGTG": 1848, + "TTTTATTA": 1849, + "GCAGGGG": 1850, + "GCAGGTG": 1851, + "TCTATTA": 1852, + "TAACTTA": 1853, + "CTAATTTT": 1854, + "CCCGCC": 1855, + "TAATACA": 1856, + "GGATTAAA": 1857, + "TCTCTCTG": 1858, + "GCTTCTT": 1859, + "CATTTATT": 1860, + "CCAGAGG": 1861, + "GGACAGA": 1862, + "GCCAATT": 1863, + "TCCCCAA": 1864, + "GTTGATT": 1865, + "GAAGAAAA": 1866, + "GCATTTA": 1867, + "CTCTAAA": 1868, + "CACACACACACA": 1869, + "CCTCAAA": 1870, + "TATAATT": 1871, + "CAATGTT": 1872, + "GCCCAGA": 1873, + "GTATATT": 1874, + "CTAAAAAA": 1875, + "CCACAGG": 1876, + "TAAGAGA": 1877, + "TCCTTAA": 1878, + "TATTTTTT": 1879, + "GAATATA": 1880, + "GGATTTG": 1881, + "GTGTGAA": 1882, + "CTGGCTT": 1883, + "GCGGCA": 1884, + "TCCGCC": 1885, + "GCATCTT": 1886, + "TCTAATA": 1887, + "CTGCATT": 1888, + "CTCTGCC": 1889, + "TCACTCA": 1890, + "TCAGCAA": 1891, + "TATTATG": 1892, + "CCAGCTG": 1893, + "GATCTC": 1894, + "GCCTCTT": 1895, + "CTTCCAA": 1896, + "TCCTAAA": 1897, + "TCATCTG": 1898, + "CTATTTA": 1899, + "CTGCAGG": 1900, + "CAAGCAA": 1901, + "GCGGAA": 1902, + "GAAATAAA": 1903, + "TAAAATAA": 1904, + "TCACCTT": 1905, + "CCATGTG": 1906, + "GACCTA": 1907, + "CAGATGA": 1908, + "GTGGCTT": 1909, + "TTATTATTATTA": 1910, + "TCCCGG": 1911, + "TATTTGTT": 1912, + "CTGTAAA": 1913, + "TCCATCCA": 1914, + "CTGTATA": 1915, + "GTTTCTA": 1916, + "GTTGCTT": 1917, + "CCATGAA": 1918, + "GCTCTTA": 1919, + "CTTCATG": 1920, + "GTTCCTG": 1921, + "GCTGGGA": 1922, + "TCAGAGG": 1923, + "CATTAAAA": 1924, + "TCAGTAA": 1925, + "GAATGTG": 1926, + "CTTATTA": 1927, + "GCACTGA": 1928, + "TGAGGTT": 1929, + "CATCAAA": 1930, + "CTTCTCC": 1931, + "GTTTATG": 1932, + "CTTTCCA": 1933, + "GTGCCTG": 1934, + "GAAAGGA": 1935, + "GCATCTG": 1936, + "TACCCA": 1937, + "TAACAGA": 1938, + "AAAAAAAAAAA": 1939, + "CTATGAA": 1940, + "CAGTAAA": 1941, + "TAGCTA": 1942, + "TCGTTTT": 1943, + "GTGTCTT": 1944, + "GAGCAAA": 1945, + "TCTAAAAA": 1946, + "GTTCACA": 1947, + "GAAATGA": 1948, + "CAAATGA": 1949, + "GCCCTGA": 1950, + "GTGTTTA": 1951, + "TCATGTG": 1952, + "CATATTA": 1953, + "TCAAAAAAA": 1954, + "TAAGTTA": 1955, + "TCTCTCTT": 1956, + "CCAGTGA": 1957, + "CCTCTGA": 1958, + "CAAGATG": 1959, + "GCCTGTT": 1960, + "GTTTGGG": 1961, + "CATTCATT": 1962, + "GCCCCTG": 1963, + "GTTCTGA": 1964, + "GCGGCC": 1965, + "GCGGTT": 1966, + "CAAAACAAAA": 1967, + "TACATATA": 1968, + "GAATTAAA": 1969, + "TCAAGAA": 1970, + "CTGTATT": 1971, + "TTTTTATT": 1972, + "GATTATT": 1973, + "TCTAATG": 1974, + "GTTGCTG": 1975, + "TGAATGAA": 1976, + "TCAGCTG": 1977, + "CTTGATT": 1978, + "CAGAATG": 1979, + "CTAATTA": 1980, + "TATAATG": 1981, + "GTTTTGTTTT": 1982, + "CCAGCCTG": 1983, + "TGATGGA": 1984, + "GCAGATT": 1985, + "CTCTATT": 1986, + "GCAGTCA": 1987, + "TAAGTGA": 1988, + "CTACACA": 1989, + "CGCATG": 1990, + "TAGCCA": 1991, + "GTGGCTCA": 1992, + "CAAATAAA": 1993, + "GTGCTCA": 1994, + "TTTTTTTTTT": 1995, + "TAACATG": 1996, + "TCCCAGCTA": 1997, + "CAAAGTA": 1998, + "TCATATA": 1999, + "CAGCATG": 2000, + "TGATCTT": 2001, + "CATAATT": 2002, + "TGTGTTA": 2003, + "TTTTGAA": 2004, + "TTAATTA": 2005, + "GATATTA": 2006, + "TCATTCA": 2007, + "TGATATA": 2008, + "TGACTCA": 2009, + "GACGTT": 2010, + "TGACATG": 2011, + "GTTGTGA": 2012, + "CATTTTTT": 2013, + "GCCTGGA": 2014, + "CTATGTT": 2015, + "CTTTGGG": 2016, + "GTCTCAAA": 2017, + "CTGGCTG": 2018, + "CCACATG": 2019, + "GGCGTG": 2020, + "CTTAATG": 2021, + "TAAGATG": 2022, + "GTATAAA": 2023, + "TGTATTA": 2024, + "TAACTCA": 2025, + "GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA": 2026, + "GCATGAA": 2027, + "GTTAATG": 2028, + "TCCAGGA": 2029, + "GAGAGAAA": 2030, + "TCTCTGTG": 2031, + "CTCTCTA": 2032, + "CCACCTG": 2033, + "GCCAGGA": 2034, + "CTGGAGG": 2035, + "CCATTTA": 2036, + "GTCTGGA": 2037, + "GCCCACA": 2038, + "TAGAGAA": 2039, + "CAACTCA": 2040, + "GGCAGGA": 2041, + "TCTTATG": 2042, + "CAAAGGA": 2043, + "GGTAAAA": 2044, + "GAGAGGA": 2045, + "GTCCAGA": 2046, + "GCCCTCA": 2047, + "GATATTTT": 2048, + "CAGGGAA": 2049, + "CCACATT": 2050, + "GAGGAGG": 2051, + "GAAACTT": 2052, + "CAGAATT": 2053, + "TCAGATG": 2054, + "TATTTCC": 2055, + "TACAGTG": 2056, + "TGAGCTG": 2057, + "CCATCTG": 2058, + "GAGAATG": 2059, + "TCAACAA": 2060, + "ATT": 2061, + "TAACTGA": 2062, + "TGAGAGG": 2063, + "CACTGAA": 2064, + "CCACCTT": 2065, + "CTGCAGA": 2066, + "TCACCAA": 2067, + "TGAGCTT": 2068, + "CAAAGCA": 2069, + "GGTTTTA": 2070, + "CGGGGTT": 2071, + "TCCAAAAA": 2072, + "TATGTATA": 2073, + "CCAGATG": 2074, + "TCCATTTT": 2075, + "CTGCTCA": 2076, + "GATAATT": 2077, + "CCACCAA": 2078, + "CTCCTCC": 2079, + "GAGAATT": 2080, + "GAAAGTA": 2081, + "TAAAATAAAA": 2082, + "CTTCTTA": 2083, + "CTGTTTA": 2084, + "GAATCAA": 2085, + "GCATGTT": 2086, + "GCACGG": 2087, + "GACTGAA": 2088, + "GTGCACA": 2089, + "GACGTG": 2090, + "TATACAA": 2091, + "TCGACA": 2092, + "GAAGACA": 2093, + "TAAAGGA": 2094, + "GATCAAA": 2095, + "CAGTGTG": 2096, + "CTAGCC": 2097, + "GAGGAAAA": 2098, + "TCTGAAAA": 2099, + "GAACCCA": 2100, + "GATGGATG": 2101, + "GTTCTTA": 2102, + "CTATATT": 2103, + "GCATTAA": 2104, + "TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC": 2105, + "TCAGTC": 2106, + "TATTTTTG": 2107, + "GAGGATT": 2108, + "GTATGTG": 2109, + "TAACCAA": 2110, + "GTTGTTTT": 2111, + "TTTTTCTT": 2112, + "GTGTTAA": 2113, + "CTTGGAA": 2114, + "AAAAAATG": 2115, + "CAATGTG": 2116, + "GTGCCTT": 2117, + "GCCTCAA": 2118, + "GAGTCTT": 2119, + "GCTAATTTT": 2120, + "CGAAAAA": 2121, + "GTGTATA": 2122, + "GCGTTA": 2123, + "CTGCACTCCAGCCTGGG": 2124, + "GTTCATG": 2125, + "CAAAGAAA": 2126, + "GCAGTAA": 2127, + "GGATGAA": 2128, + "CTTTATG": 2129, + "CAGGAAAA": 2130, + "TCCTGCA": 2131, + "CTGTCTG": 2132, + "GAACATG": 2133, + "GGATGGA": 2134, + "GCCTGAA": 2135, + "CAAAAATG": 2136, + "TCCAATG": 2137, + "CCAGCAA": 2138, + "GGCCTA": 2139, + "CAACTGA": 2140, + "GCACCTG": 2141, + "GTCTATT": 2142, + "CCTCTCA": 2143, + "GTGGTCA": 2144, + "GTGTAAA": 2145, + "GTACACA": 2146, + "GTAAAATT": 2147, + "GTACATT": 2148, + "TATATAAA": 2149, + "CTGTTAA": 2150, + "TAAGTCA": 2151, + "GCCTCCA": 2152, + "AAATTAAA": 2153, + "GTGCAGG": 2154, + "TCCTGGA": 2155, + "GTGCAAA": 2156, + "GCGTCC": 2157, + "CCATTAA": 2158, + "GGAGGGA": 2159, + "TCACTTA": 2160, + "TCATTAAA": 2161, + "CAACATA": 2162, + "TAATAGA": 2163, + "TAATGTA": 2164, + "GATTTTTT": 2165, + "GTTGTCA": 2166, + "GGAGACA": 2167, + "GTGTGGG": 2168, + "TCACAGG": 2169, + "TCGGCA": 2170, + "CTCCCTG": 2171, + "GACCAAA": 2172, + "TGTTTATT": 2173, + "CGAATG": 2174, + "CTCAATG": 2175, + "TCACCTG": 2176, + "CAGTGTT": 2177, + "TGAGACA": 2178, + "TAGGGG": 2179, + "GAAAAATG": 2180, + "GTTGAGA": 2181, + "TCGATA": 2182, + "CTCGGGAGG": 2183, + "GTTGTC": 2184, + "CCAGTCA": 2185, + "GCCCAGGCTG": 2186, + "GAACAGA": 2187, + "GGCTCACTGCAA": 2188, + "GCAGACA": 2189, + "TGAGGTG": 2190, + "CACGTT": 2191, + "TAAGAAAA": 2192, + "CCAGGCA": 2193, + "GTATCTT": 2194, + "CTTGGGAGG": 2195, + "CTTTCTA": 2196, + "CCGCTG": 2197, + "GAGCTCA": 2198, + "GAGACAGA": 2199, + "CTTCAGG": 2200, + "GCACATT": 2201, + "GTACAAA": 2202, + "CTTGTAA": 2203, + "GTGGGTG": 2204, + "GAAGTGA": 2205, + "GGTCTC": 2206, + "GTATGTT": 2207, + "GCACTCA": 2208, + "TTATGTT": 2209, + "CAAGTCA": 2210, + "CAAGTGA": 2211, + "GAAACTA": 2212, + "TAAATAAAA": 2213, + "TCTTAAAA": 2214, + "GTTGGAA": 2215, + "GTTCTAA": 2216, + "CCACTC": 2217, + "CAGTGAA": 2218, + "GAAAGG": 2219, + "GCACGA": 2220, + "TAACTTTT": 2221, + "GTTGTTA": 2222, + "TCAGTTA": 2223, + "CGGATG": 2224, + "TATTTGAA": 2225, + "CCCTGAA": 2226, + "GCCCTC": 2227, + "CTTCTAA": 2228, + "TTTGTTTT": 2229, + "GAGCTGA": 2230, + "CTGTGGG": 2231, + "CAAGATT": 2232, + "GAAGCTT": 2233, + "TGAGTAA": 2234, + "CTTGCTG": 2235, + "GGATGGG": 2236, + "CGTATG": 2237, + "TCCATTA": 2238, + "GTCTGCA": 2239, + "GCCATTTT": 2240, + "GTTGTAA": 2241, + "CACACAA": 2242, + "GGACTACAGG": 2243, + "CGTTTTA": 2244, + "TCTTCC": 2245, + "TAACCTT": 2246, + "CTTTAAAA": 2247, + "TGAATTTT": 2248, + "CTACAGA": 2249, + "GCAAGAA": 2250, + "TAACAAAA": 2251, + "CAATTAAA": 2252, + "CCACTCA": 2253, + "CATGGTGAAA": 2254, + "CCCAGAA": 2255, + "CTACATT": 2256, + "CCGAGG": 2257, + "TCCAGTG": 2258, + "TGAGTTA": 2259, + "GGAGTCA": 2260, + "TAACGA": 2261, + "GAGTAAA": 2262, + "GACTCTG": 2263, + "GGAGCTT": 2264, + "TACTCC": 2265, + "CTGCATG": 2266, + "GCTTTTTT": 2267, + "GTCTAAA": 2268, + "GTGCGG": 2269, + "CATCTCA": 2270, + "TGATCAA": 2271, + "GGAGATT": 2272, + "GCAAAAAA": 2273, + "CACCAAA": 2274, + "TGACGG": 2275, + "CAGAGG": 2276, + "GTTGATG": 2277, + "CTTGTCA": 2278, + "TCCACCTG": 2279, + "GGAGCAA": 2280, + "CAAGTAA": 2281, + "CCATAAA": 2282, + "GTGCATG": 2283, + "GCATATT": 2284, + "GTAGATT": 2285, + "GCCTAA": 2286, + "CTCAAAAA": 2287, + "GGAGAAAA": 2288, + "CTATCC": 2289, + "TAATATTA": 2290, + "GTGCTC": 2291, + "CAATATG": 2292, + "TGTGGAA": 2293, + "TGACTC": 2294, + "GTGTATG": 2295, + "TTTTAATG": 2296, + "GCTCTAA": 2297, + "CACAATG": 2298, + "CAGCTCA": 2299, + "GTTGGTT": 2300, + "CTAAAATT": 2301, + "GTCTATG": 2302, + "TGTGAAAA": 2303, + "CTGGGTT": 2304, + "CCCCTCC": 2305, + "CCCTCTT": 2306, + "GCAGGGA": 2307, + "GAAACCA": 2308, + "CATTTCC": 2309, + "GCAGCCA": 2310, + "TCATATG": 2311, + "GCAGGCA": 2312, + "CGTAAAA": 2313, + "TGACCTG": 2314, + "CAGAGGTT": 2315, + "CTTGTGA": 2316, + "TTATCTT": 2317, + "CTGTATG": 2318, + "GTCAATG": 2319, + "GGACGG": 2320, + "GCGTAA": 2321, + "CAAACTA": 2322, + "TAAATGTT": 2323, + "CTTCGG": 2324, + "CTCCCCA": 2325, + "TACAATG": 2326, + "TCTGTAA": 2327, + "GAATATG": 2328, + "GCGGGA": 2329, + "GGACATT": 2330, + "TTATGAA": 2331, + "GGATGTT": 2332, + "GGACATG": 2333, + "TCAGGTG": 2334, + "CAACAAAA": 2335, + "GAAAGAGA": 2336, + "GTGGATG": 2337, + "GGGCTA": 2338, + "CCATCAA": 2339, + "CAGCTGA": 2340, + "CTCCACC": 2341, + "CAATCAA": 2342, + "GTGGTC": 2343, + "TGACAGG": 2344, + "CCATTCA": 2345, + "GTCCCTG": 2346, + "CAGACACA": 2347, + "GTTGGTG": 2348, + "CCTCCTG": 2349, + "GAACTGA": 2350, + "TATTCATT": 2351, + "GCCCATG": 2352, + "CAATCTT": 2353, + "GAAAGCA": 2354, + "GAATCTG": 2355, + "TTATTTTA": 2356, + "GTTTGGA": 2357, + "TTTTTGTT": 2358, + "GGGAATG": 2359, + "GCGACA": 2360, + "TAAACTG": 2361, + "CCATATT": 2362, + "GGATCC": 2363, + "CAAGCTT": 2364, + "TAAAAAAAAA": 2365, + "TCACTC": 2366, + "CACTGTT": 2367, + "TGTTAATT": 2368, + "GGACTGA": 2369, + "GGAGTGA": 2370, + "CATACACA": 2371, + "GTTTGTA": 2372, + "TCCAGCA": 2373, + "GTGCATT": 2374, + "GGAAAAAA": 2375, + "CCAAGAA": 2376, + "TCAATA": 2377, + "CTTCCCA": 2378, + "TGAGAAAA": 2379, + "GGCCTCCCAAA": 2380, + "CAAGCTG": 2381, + "GCCCAAA": 2382, + "TGACTTA": 2383, + "CAGCCTT": 2384, + "CTGGATT": 2385, + "TTTTTTTA": 2386, + "TCACGG": 2387, + "GCAGTTA": 2388, + "TGACTAA": 2389, + "TTACAGG": 2390, + "TGATATG": 2391, + "TAATTATT": 2392, + "TCTTGAA": 2393, + "GCCCCTT": 2394, + "GTTCAGA": 2395, + "CTCTATG": 2396, + "CCATGGA": 2397, + "GAGGGAA": 2398, + "GGAGGCA": 2399, + "CTTTGCA": 2400, + "TCTTGG": 2401, + "GGAGGTT": 2402, + "GCCAATG": 2403, + "CTGGTGA": 2404, + "CAACCAA": 2405, + "CCAGTC": 2406, + "CTTGAGA": 2407, + "TACAGCA": 2408, + "CTTGTC": 2409, + "GACGGA": 2410, + "CTTCTTTT": 2411, + "GTGGC": 2412, + "GAGGATG": 2413, + "CAATAAAA": 2414, + "GAAATTTT": 2415, + "AAAAAAAAAA": 2416, + "CTCTATA": 2417, + "GTATGAA": 2418, + "CTTGTTA": 2419, + "TAACATA": 2420, + "CAAACACA": 2421, + "TGATTAAA": 2422, + "GCTCTGTT": 2423, + "GTGGGTT": 2424, + "GTTGGGG": 2425, + "GTGTGTA": 2426, + "GTAATTTT": 2427, + "GTATCC": 2428, + "TGTGTGTGTGTG": 2429, + "TCTTCCTT": 2430, + "TCACTAA": 2431, + "TCTCCAAA": 2432, + "TATCAAA": 2433, + "TGATGGG": 2434, + "GGATATT": 2435, + "CAAATTTT": 2436, + "GTTCAGG": 2437, + "GTGGATT": 2438, + "GTGCAGA": 2439, + "GCTGCC": 2440, + "CTCAGAA": 2441, + "GCAGTC": 2442, + "GGATAAA": 2443, + "GCCTTCA": 2444, + "CCAGGTG": 2445, + "TATCTC": 2446, + "CAATGCA": 2447, + "CCCACTG": 2448, + "GTGTATT": 2449, + "CGACAGA": 2450, + "TGAGATA": 2451, + "CCAGGTT": 2452, + "TGTTTAA": 2453, + "CATCATG": 2454, + "TGATTCA": 2455, + "GCAATTA": 2456, + "GAAATGAA": 2457, + "CTTGGTT": 2458, + "GAAGATT": 2459, + "GGATTAA": 2460, + "CCTCATT": 2461, + "GGCCAGGCTG": 2462, + "GCTATTA": 2463, + "GCCAGCA": 2464, + "GAGACAGG": 2465, + "CTTGAGG": 2466, + "CAGTCTT": 2467, + "GTTCTCC": 2468, + "TATTTCAA": 2469, + "TGACGA": 2470, + "CATGAAAA": 2471, + "CATTATG": 2472, + "TAAATTTA": 2473, + "GAGTGAA": 2474, + "CAACAGG": 2475, + "TAAGCTT": 2476, + "CACATTTT": 2477, + "GATCTCA": 2478, + "TAGTCC": 2479, + "GACCCTG": 2480, + "TAATGCA": 2481, + "TAAGTC": 2482, + "TAATAATT": 2483, + "GAAGTAA": 2484, + "CAACTC": 2485, + "CATCATT": 2486, + "GACGAA": 2487, + "GAAACAAA": 2488, + "TATTTCTG": 2489, + "CATTAATT": 2490, + "CCACCCC": 2491, + "TAATATTTT": 2492, + "GTTTAAAA": 2493, + "GTATCTG": 2494, + "GTCAAAAA": 2495, + "GATGCTG": 2496, + "TGTTCTG": 2497, + "GGTCAAA": 2498, + "GTAGGAA": 2499, + "GTATATG": 2500, + "TGATCTG": 2501, + "GGGGCTG": 2502, + "GCATCAA": 2503, + "GCCAAAAA": 2504, + "CCACGA": 2505, + "GCTAATG": 2506, + "CAGAGAAA": 2507, + "CCTTCTG": 2508, + "TCCTCTA": 2509, + "GCAGGTT": 2510, + "CTCACTG": 2511, + "TAGATTA": 2512, + "GCCGAGA": 2513, + "CCATCCA": 2514, + "CTTTACA": 2515, + "GTACATG": 2516, + "GCACCAA": 2517, + "CTTTGTA": 2518, + "CTATGTG": 2519, + "TCACTTTT": 2520, + "TGAGTC": 2521, + "CAAGAAAA": 2522, + "CTGACTG": 2523, + "GTTTTTTTT": 2524, + "GCATAAA": 2525, + "TAATCTG": 2526, + "GAAAAAAAA": 2527, + "CAGGATG": 2528, + "TGAGCCA": 2529, + "GAATTCA": 2530, + "TCAGACA": 2531, + "GTTCCAA": 2532, + "TCAGGTT": 2533, + "CAAACTG": 2534, + "CATTTCTT": 2535, + "TGTTAAAA": 2536, + "CCAGACA": 2537, + "CAAGTTA": 2538, + "CATGTTA": 2539, + "CATTCTA": 2540, + "TCTTTTTG": 2541, + "TGAGGGG": 2542, + "CACATTA": 2543, + "TAAAATAAA": 2544, + "GCATATA": 2545, + "TGTTCTA": 2546, + "GAAGGGG": 2547, + "GAGTGTG": 2548, + "TAAGACA": 2549, + "GAACTC": 2550, + "CCAGTAA": 2551, + "GAGAGAGG": 2552, + "GCGACC": 2553, + "CAATTCA": 2554, + "CGGCTG": 2555, + "CCAGATT": 2556, + "CCTGGG": 2557, + "GGAAGAAA": 2558, + "GAGAGG": 2559, + "TCAAAATG": 2560, + "CCTCATG": 2561, + "TAAAGG": 2562, + "CTTTGGA": 2563, + "CCAGGGA": 2564, + "GTACAGA": 2565, + "CTGAGGCAGGA": 2566, + "TGTTTCTT": 2567, + "CCAGGCTG": 2568, + "CTGAGG": 2569, + "GAGGCTG": 2570, + "CTCCTGGG": 2571, + "GAAGTC": 2572, + "CGACC": 2573, + "GGACTCA": 2574, + "GGAGTC": 2575, + "CACAATT": 2576, + "GTGTTCA": 2577, + "GACTAAA": 2578, + "GTCATTA": 2579, + "CAAAATTA": 2580, + "TGAAGAAA": 2581, + "GCACCTT": 2582, + "GTTTGCA": 2583, + "TCCTGCC": 2584, + "GTAGATG": 2585, + "GCCTGCA": 2586, + "GAGTTAA": 2587, + "TCCCTTA": 2588, + "GTGGTTA": 2589, + "TCGGGA": 2590, + "TACATAA": 2591, + "TCTCTCCA": 2592, + "CACTAAA": 2593, + "TATATATATATA": 2594, + "GTGGCAA": 2595, + "CACCATG": 2596, + "TTTGAAAA": 2597, + "CACACTG": 2598, + "CTTGGTG": 2599, + "TACACTG": 2600, + "CCTCCAA": 2601, + "CAACCTT": 2602, + "CAGCCAA": 2603, + "TTTTCAAA": 2604, + "TGATAGA": 2605, + "TACACTA": 2606, + "TCTGGG": 2607, + "TCCCAGCA": 2608, + "TAGGAAAA": 2609, + "CTTGGGG": 2610, + "TCTGTGAA": 2611, + "CCTTATT": 2612, + "CATTTAAA": 2613, + "TTTTATTTTA": 2614, + "GCCCTCC": 2615, + "CTGAGCA": 2616, + "CCCGTG": 2617, + "GTAGTGA": 2618, + "TCCTATT": 2619, + "GAAGGTG": 2620, + "TGTGCTG": 2621, + "TCCACTG": 2622, + "TAATCTA": 2623, + "TGATGTA": 2624, + "GTGGTAA": 2625, + "TAATGGA": 2626, + "GATGAAAA": 2627, + "GTAGTAA": 2628, + "GTGGGGA": 2629, + "GTGTCAA": 2630, + "CAGACTG": 2631, + "TCGAAAA": 2632, + "CTCATTA": 2633, + "TAATAATA": 2634, + "CTCAGAAA": 2635, + "CATCCTT": 2636, + "CCGCTT": 2637, + "GGAAGG": 2638, + "CCGTGA": 2639, + "CCACTCC": 2640, + "CTAGAGA": 2641, + "TAGAATG": 2642, + "GGATTTA": 2643, + "TTAATTTT": 2644, + "GCTAATA": 2645, + "TCCCCCA": 2646, + "CAAATATT": 2647, + "GATCATG": 2648, + "TCTTAATT": 2649, + "CAGTATT": 2650, + "GTCTTGAA": 2651, + "CCGAAA": 2652, + "CTATTCA": 2653, + "TAAGATA": 2654, + "CTTGCAA": 2655, + "GCCCCAA": 2656, + "TCCCTAA": 2657, + "GAAGTTA": 2658, + "GATGATG": 2659, + "CTTGATG": 2660, + "CCCTAAA": 2661, + "CCTGCCTG": 2662, + "GACATTTT": 2663, + "CCAGCCA": 2664, + "TGTGTGTGTG": 2665, + "GTCTATA": 2666, + "TCTCTGTT": 2667, + "GTCTGTA": 2668, + "TATAATA": 2669, + "CTTGTTTT": 2670, + "CGCCATT": 2671, + "CTCAGCA": 2672, + "TACAGTT": 2673, + "CAAGAGG": 2674, + "GGAAGCA": 2675, + "GCCTTTA": 2676, + "CCCCATT": 2677, + "CAACGA": 2678, + "GTCATTTT": 2679, + "CCCGCA": 2680, + "CAGTTAA": 2681, + "GAATCTT": 2682, + "CATGTTTT": 2683, + "CCGGGG": 2684, + "CTACTGA": 2685, + "TCACGA": 2686, + "TAAATTTG": 2687, + "GCCCATT": 2688, + "CTCTAGG": 2689, + "GGACCTG": 2690, + "TCAGGGA": 2691, + "GAGACTG": 2692, + "CCAAAAAA": 2693, + "GCCGG": 2694, + "CCAGGGG": 2695, + "TCAGAAAA": 2696, + "CATCTGA": 2697, + "TCTTCAAA": 2698, + "CTACAGG": 2699, + "GAGGCAGG": 2700, + "CATTGTA": 2701, + "TAAATCAA": 2702, + "GACTCTT": 2703, + "CTGATTA": 2704, + "GCATATG": 2705, + "GGACCTT": 2706, + "CAAGACA": 2707, + "TATTTATG": 2708, + "TATTTTAAA": 2709, + "CCGAGA": 2710, + "TCATTTTA": 2711, + "CTCACTCA": 2712, + "CCACCCA": 2713, + "CTCTAGA": 2714, + "CTACATG": 2715, + "GTGCTTA": 2716, + "CAACCTG": 2717, + "TCTGTGTT": 2718, + "TAAATATG": 2719, + "CAAAGG": 2720, + "CCCTGTT": 2721, + "GTTCGG": 2722, + "TGATAAAA": 2723, + "CACGAA": 2724, + "GTTGAGG": 2725, + "CAGAGTGA": 2726, + "GAAATTAA": 2727, + "CACATA": 2728, + "GAACAGG": 2729, + "TCTCCTGA": 2730, + "CCTGAGG": 2731, + "GGAGGCCAA": 2732, + "GTTTACA": 2733, + "TAACAGG": 2734, + "TGTGGTG": 2735, + "GCCTCCCAAA": 2736, + "CCATCCTG": 2737, + "GATTCTT": 2738, + "GAATGGA": 2739, + "GTAGTCA": 2740, + "CTCCTCTG": 2741, + "GAAAGAAAGAAAGAAA": 2742, + "CCCTGTG": 2743, + "CAGTATG": 2744, + "GCGATA": 2745, + "GGACTC": 2746, + "GAAAGA": 2747, + "TGTTGG": 2748, + "GTAGCTT": 2749, + "CATTTTAA": 2750, + "CCCTCTG": 2751, + "GCATTCA": 2752, + "CGATTA": 2753, + "TCACATA": 2754, + "TAATGAAA": 2755, + "GGAATTA": 2756, + "CTGTCAA": 2757, + "TAAATTAAA": 2758, + "CAAGTC": 2759, + "GTATTCA": 2760, + "GGCCATG": 2761, + "CTTTAGA": 2762, + "TGTTTCC": 2763, + "CATGTA": 2764, + "GAATAAAA": 2765, + "CAACTAA": 2766, + "TCATCTA": 2767, + "CACTCTT": 2768, + "CAGTTTG": 2769, + "CATAAAAA": 2770, + "GCATGCA": 2771, + "GATTTA": 2772, + "GAACCAA": 2773, + "TCTGTGA": 2774, + "TCAGCCA": 2775, + "TCTCCACA": 2776, + "TCTCAGCTCA": 2777, + "TATCATG": 2778, + "GCACTTA": 2779, + "CGCCAGG": 2780, + "CGGGG": 2781, + "CATTAAAAA": 2782, + "TTTGTTA": 2783, + "GGATATA": 2784, + "TCGACC": 2785, + "TAATCCA": 2786, + "CCGC": 2787, + "CATTGTT": 2788, + "CCAGTTA": 2789, + "GTAGTTA": 2790, + "CTAGGAA": 2791, + "CCTAATT": 2792, + "TCATGGG": 2793, + "GAACTAA": 2794, + "GCTATTTT": 2795, + "CCGTCA": 2796, + "CAGATTA": 2797, + "CCATATA": 2798, + "CAACTTA": 2799, + "TCAGTTTT": 2800, + "CTACCTT": 2801, + "GCACTC": 2802, + "GTGTGGA": 2803, + "GTGCCAA": 2804, + "GACAATG": 2805, + "GACAATT": 2806, + "GTACCTT": 2807, + "TAAACATT": 2808, + "CAGGAGG": 2809, + "GTGCGA": 2810, + "GAAAATTA": 2811, + "TCTCTTAA": 2812, + "CCGATT": 2813, + "GATGATT": 2814, + "CCATGGG": 2815, + "TCGGTA": 2816, + "CCATATG": 2817, + "CCAGTCC": 2818, + "GCCTTAA": 2819, + "TGATCCA": 2820, + "GTTGCAA": 2821, + "GTAGAGG": 2822, + "CAGATTTT": 2823, + "GTACTTA": 2824, + "TCTTTCTTTCTTTCTT": 2825, + "GCTCTGTG": 2826, + "TCAATAA": 2827, + "GTTTAGA": 2828, + "GTTCGA": 2829, + "CAAGGTT": 2830, + "CTCATTTT": 2831, + "CACAGG": 2832, + "CATGCTG": 2833, + "GAACGG": 2834, + "TATAAAAA": 2835, + "GAAGGCA": 2836, + "GAGCATT": 2837, + "TGTTTGTG": 2838, + "GCTGTTA": 2839, + "GTCACTG": 2840, + "CAAATGAA": 2841, + "GTGACTG": 2842, + "GTTCTTTT": 2843, + "CAGGCTGGAGTGCAGTG": 2844, + "TGATGAAA": 2845, + "TAACGG": 2846, + "CTACTAA": 2847, + "GACATTA": 2848, + "GGACGA": 2849, + "GAGCATG": 2850, + "GCATGGG": 2851, + "CCACTTA": 2852, + "CTATCAA": 2853, + "GCTGTTTT": 2854, + "GTCGTG": 2855, + "CCTGGCC": 2856, + "TCTCTGAA": 2857, + "TGTTGTA": 2858, + "CAGCCAGG": 2859, + "GTTTAGG": 2860, + "CCGCAA": 2861, + "GGAGTAA": 2862, + "CCAATTA": 2863, + "CAGCAAAA": 2864, + "TCATCCA": 2865, + "CACGTA": 2866, + "TCATAGA": 2867, + "TAATTAAAA": 2868, + "CACTTAA": 2869, + "TCTTTATT": 2870, + "GAGATTA": 2871, + "TAAGAGG": 2872, + "CAAATTAA": 2873, + "GACGCA": 2874, + "CACGGA": 2875, + "GTGTGCA": 2876, + "TCT": 2877, + "TATTATTA": 2878, + "GAAATATT": 2879, + "GGAGTTA": 2880, + "TCTTTGA": 2881, + "CTGATTTT": 2882, + "TGTGAATT": 2883, + "TCCCACC": 2884, + "CCCTTTG": 2885, + "CAAGGTG": 2886, + "CAGAGTT": 2887, + "CCCCATG": 2888, + "CTACCAA": 2889, + "CTCCAAAA": 2890, + "CTTCCCC": 2891, + "CTGCTAA": 2892, + "GATTAAAA": 2893, + "GCTTATG": 2894, + "CTACTTA": 2895, + "TAAAAAATT": 2896, + "TCAGTCC": 2897, + "CTATTAAA": 2898, + "GAATGGG": 2899, + "CACAGTA": 2900, + "CAACGG": 2901, + "GGTTATT": 2902, + "TCACCCA": 2903, + "TGATGCA": 2904, + "TAATTTTTT": 2905, + "GTTTGAGA": 2906, + "GTATTAAA": 2907, + "GCCCCCA": 2908, + "TATAGTA": 2909, + "TAGTAAA": 2910, + "TGATACA": 2911, + "GTGGTTTT": 2912, + "CCACTAA": 2913, + "CACAGAGA": 2914, + "CCTCTGCCTCC": 2915, + "CAAAAAAAA": 2916, + "CTCTCTCC": 2917, + "CATAATA": 2918, + "GAAGCCA": 2919, + "GTTCCCA": 2920, + "TGTGTTTG": 2921, + "CAATGGA": 2922, + "TGAAGTA": 2923, + "CTTCATA": 2924, + "CACTGTG": 2925, + "GCTCTTTT": 2926, + "TGACATA": 2927, + "TAAAGAAAA": 2928, + "GAGAAATG": 2929, + "CAGGGAGG": 2930, + "TGTTCAA": 2931, + "GAGCCAA": 2932, + "GACAGAGA": 2933, + "GGCTGAA": 2934, + "CAAATATA": 2935, + "GTGGAAAA": 2936, + "TAAGGTT": 2937, + "GTGATTA": 2938, + "GGATCTG": 2939, + "GATGTTA": 2940, + "GACTACACA": 2941, + "TCCTATA": 2942, + "CTGCCAA": 2943, + "TCCCGA": 2944, + "GTGATTTT": 2945, + "GCGTTTT": 2946, + "CAGAGTA": 2947, + "GAAAGGAA": 2948, + "CACTTTG": 2949, + "CCCCAAAA": 2950, + "GCAACCCA": 2951, + "TGCATTTT": 2952, + "TCTAGAA": 2953, + "TACTTTG": 2954, + "TGAGGCA": 2955, + "CATCTCC": 2956, + "TCGCTA": 2957, + "TGACTTTT": 2958, + "GAGCCTG": 2959, + "CATTTGTT": 2960, + "TCTTTGTT": 2961, + "GCAAAATT": 2962, + "CCTGATT": 2963, + "GATAAAAA": 2964, + "GAGTGTT": 2965, + "TCCTGTA": 2966, + "TACAGAAA": 2967, + "TCCAGGAA": 2968, + "GCCAGTG": 2969, + "TAGATTTT": 2970, + "TAATAGG": 2971, + "CTCCTCA": 2972, + "CATTTTTG": 2973, + "CATTTCAA": 2974, + "GCCATCA": 2975, + "TAAAATATA": 2976, + "GACTGTT": 2977, + "GCATGGA": 2978, + "CAAAGTT": 2979, + "CATGATT": 2980, + "GAGTTTG": 2981, + "CTAGCAA": 2982, + "CTTCCTA": 2983, + "GGGGAGG": 2984, + "CTATATG": 2985, + "TATTTATTTT": 2986, + "CACCATT": 2987, + "CCCTCAA": 2988, + "TTTTTTTTTTTTTT": 2989, + "GATCATT": 2990, + "GTACATA": 2991, + "CTCCATA": 2992, + "CCCCGTCTCTA": 2993, + "GCCTGCC": 2994, + "CTAGCTT": 2995, + "CCCGGA": 2996, + "GATGTTTT": 2997, + "GTATTTTA": 2998, + "TCAGATA": 2999, + "CCTGGAA": 3000, + "TATTCCA": 3001, + "GGACCAA": 3002, + "GCCATTA": 3003, + "CGACTGA": 3004, + "TAAGCTG": 3005, + "TAAACACA": 3006, + "GTTTCTC": 3007, + "CATCTTA": 3008, + "GAAATTTG": 3009, + "TAATGGG": 3010, + "TAAAATTTT": 3011, + "CTGTTCA": 3012, + "CCTGTTA": 3013, + "TACTGAA": 3014, + "TGACCCA": 3015, + "TGATTTTA": 3016, + "CTCCTTA": 3017, + "TATAGAA": 3018, + "CTGCGG": 3019, + "GCGGTA": 3020, + "GTGCTAA": 3021, + "CAGAGGAA": 3022, + "TACATCA": 3023, + "TCAATCAA": 3024, + "CTGCAGCC": 3025, + "TGAATATT": 3026, + "TCTACAA": 3027, + "CCACATA": 3028, + "CCCGTT": 3029, + "TATACACA": 3030, + "TCCTCTC": 3031, + "TCTACTT": 3032, + "CCGGAA": 3033, + "CTTTTTTA": 3034, + "GAAAGAAAA": 3035, + "CTATCTT": 3036, + "GACTTTG": 3037, + "TGAACAA": 3038, + "GCAGTTTT": 3039, + "GCTAAAAA": 3040, + "GAGGCGG": 3041, + "TAATAAAAA": 3042, + "CTGGTCA": 3043, + "CAGACAA": 3044, + "GGATATG": 3045, + "TGAAGG": 3046, + "GCCAGAA": 3047, + "CCAGGCC": 3048, + "CCACCATG": 3049, + "CAAACTT": 3050, + "TCATGTA": 3051, + "GCTGCTT": 3052, + "GTAATA": 3053, + "CCCCCAA": 3054, + "CAGCCTG": 3055, + "TCAACTT": 3056, + "TAAAATTAA": 3057, + "GCTGAAAA": 3058, + "CGACGA": 3059, + "GTGGGCA": 3060, + "TGAGGGA": 3061, + "CGCTCC": 3062, + "TTTTGTTTT": 3063, + "GAGTCAA": 3064, + "TCATGCA": 3065, + "CTGCTTA": 3066, + "TAAGTTTT": 3067, + "GTAGCAA": 3068, + "CCTTGG": 3069, + "TGACAAAA": 3070, + "CTGGTAA": 3071, + "TCTTTATA": 3072, + "TGTGTGTT": 3073, + "CTGGTC": 3074, + "CTGGCAA": 3075, + "CATTTCTG": 3076, + "CTCTACC": 3077, + "CTGAGGA": 3078, + "CTAAAATG": 3079, + "CTAGATT": 3080, + "GTATCAA": 3081, + "CAGTCAA": 3082, + "CTGGGTG": 3083, + "CCTCTTA": 3084, + "TGAGTTTT": 3085, + "TTTTATTTA": 3086, + "CCTTTTTT": 3087, + "TATATACA": 3088, + "TAGCAAA": 3089, + "AAATTA": 3090, + "CTGGATG": 3091, + "GATAATA": 3092, + "GACAAAAA": 3093, + "CCTGGGA": 3094, + "GCTTTCA": 3095, + "GTACAGG": 3096, + "GCTGGAA": 3097, + "CTACTCA": 3098, + "CAATGTA": 3099, + "GCGTGAA": 3100, + "GATCCTT": 3101, + "TATTAATG": 3102, + "GCCCGA": 3103, + "TAAAGTG": 3104, + "GCTTCCA": 3105, + "CATGGAA": 3106, + "TGAAGTT": 3107, + "CTTTCTC": 3108, + "TCTGTGTG": 3109, + "GTATGTA": 3110, + "CAATACA": 3111, + "TCAAGG": 3112, + "CCTCTAA": 3113, + "TGTGGG": 3114, + "GATCTGA": 3115, + "GTACTGA": 3116, + "TTAATTAA": 3117, + "GCAGAAAA": 3118, + "CTACATA": 3119, + "CCGGTG": 3120, + "GGGGAAAA": 3121, + "TACAAAAAA": 3122, + "TTTTGG": 3123, + "GTGAGAA": 3124, + "TCAATAAA": 3125, + "TCAAGTT": 3126, + "CTCAGGA": 3127, + "CTACTC": 3128, + "CAAATCA": 3129, + "GGCAGAA": 3130, + "CCCGAA": 3131, + "TGTTGTG": 3132, + "GAGCAAAA": 3133, + "TATTTGTG": 3134, + "GTAGGTT": 3135, + "CTACCTG": 3136, + "CACAAAAA": 3137, + "CTCAGG": 3138, + "GCTTTA": 3139, + "CAGAGCAA": 3140, + "CTCAGTG": 3141, + "GGAAGAGA": 3142, + "TAACCTG": 3143, + "GAAATATA": 3144, + "CGAGAA": 3145, + "GTGAGG": 3146, + "CATTTATA": 3147, + "GGCAGCA": 3148, + "TCTAAATT": 3149, + "CCCAGTG": 3150, + "GCCTAGG": 3151, + "TGCATTA": 3152, + "CCGTAA": 3153, + "CATTCCA": 3154, + "CTAGTTA": 3155, + "GACTTAA": 3156, + "CTATACA": 3157, + "GACACAA": 3158, + "TCTTCACA": 3159, + "CCGGTT": 3160, + "TAAAGTAA": 3161, + "CTGTGGA": 3162, + "TAAGGTG": 3163, + "TCCAGTA": 3164, + "CAAATTTA": 3165, + "AAATTAAAA": 3166, + "CCATCTA": 3167, + "CTCCCTT": 3168, + "CTCCTTTT": 3169, + "GAGAGAGAGAGA": 3170, + "GGAGATA": 3171, + "CCTATTA": 3172, + "CACCAAAA": 3173, + "CCGTTA": 3174, + "TGTTTATA": 3175, + "CTCAGGAGG": 3176, + "GACGTA": 3177, + "GTCCTTA": 3178, + "GAAAGTT": 3179, + "GCTGGTG": 3180, + "CTCTACA": 3181, + "CAATAGA": 3182, + "TAAAATATT": 3183, + "GTACCTG": 3184, + "GTACTAA": 3185, + "CTTTGAAA": 3186, + "CCTTTCC": 3187, + "TAAAAATTA": 3188, + "CTCGG": 3189, + "CAAGATA": 3190, + "CATTTGA": 3191, + "CACCTCA": 3192, + "GCCAGCC": 3193, + "GTCGG": 3194, + "GCACATA": 3195, + "CACTCAA": 3196, + "CTTTTAAAA": 3197, + "CAGGAATT": 3198, + "GCCTATT": 3199, + "TCTTTCTG": 3200, + "CTGAGGCAGGAGAA": 3201, + "CAGGCAGG": 3202, + "CTAGTAA": 3203, + "TCCATA": 3204, + "GAACTTA": 3205, + "CG": 3206, + "GCTGTGA": 3207, + "GAAAATA": 3208, + "TCTTCATT": 3209, + "GAGGGAGA": 3210, + "CCCATCC": 3211, + "GAGGTGGG": 3212, + "GCCTCTA": 3213, + "GTAGGTG": 3214, + "TAAACCA": 3215, + "GAAGGAAA": 3216, + "TATTGG": 3217, + "ATG": 3218, + "TCCAGTT": 3219, + "CCCACAA": 3220, + "GAAACACA": 3221, + "GTCTCAAAA": 3222, + "CTTTTCTTTT": 3223, + "TGAAGGA": 3224, + "TATTGATT": 3225, + "CTATGTA": 3226, + "AAAAAAAAAAAAAA": 3227, + "TCCTTAAA": 3228, + "GCGCTA": 3229, + "TCCACTT": 3230, + "GACTCAA": 3231, + "TAAATACA": 3232, + "TCATGGA": 3233, + "TCTGGGA": 3234, + "TCCTATG": 3235, + "CTGTGCA": 3236, + "TCAAGTGA": 3237, + "TCATAAAA": 3238, + "CATCCAA": 3239, + "CCTTCCA": 3240, + "CTGTACA": 3241, + "GAAGGTT": 3242, + "CTGTGTA": 3243, + "GTCACTT": 3244, + "TCACAAAA": 3245, + "TCAGGCA": 3246, + "GTGTTAAA": 3247, + "CCCTTAA": 3248, + "CAAAGTG": 3249, + "GAAATGTT": 3250, + "CTGGGGA": 3251, + "GACGCC": 3252, + "TATATGTG": 3253, + "CTAGATG": 3254, + "GAAATTAAA": 3255, + "GAATGCA": 3256, + "GCACTAA": 3257, + "CGGGAGG": 3258, + "GCCACAA": 3259, + "CGCTTA": 3260, + "TCCACAA": 3261, + "CAGATA": 3262, + "TCTGAATT": 3263, + "TATTATTTT": 3264, + "GCGCGG": 3265, + "CTCTGAAA": 3266, + "TCTCTTTG": 3267, + "TATTTCTA": 3268, + "GGGGTGGG": 3269, + "GGATGCA": 3270, + "CCACACC": 3271, + "TAAATGTG": 3272, + "TCTTCCTG": 3273, + "GCAAGG": 3274, + "CTGCTCC": 3275, + "CTGGAGTG": 3276, + "CTGTTAAA": 3277, + "CACACAAA": 3278, + "CTGACTT": 3279, + "GAAAAGAAAA": 3280, + "CCTTCTCC": 3281, + "GAAATAAAA": 3282, + "CCTCAGGTGA": 3283, + "GATAATG": 3284, + "GAATTGCTT": 3285, + "CCAAAATT": 3286, + "CGTGAAA": 3287, + "CACTGAAA": 3288, + "CAGTGAAA": 3289, + "GATCTTA": 3290, + "GAGATGGG": 3291, + "TCTGCCA": 3292, + "TGAGGTA": 3293, + "TATGGAA": 3294, + "TATATTTTA": 3295, + "TGAACTT": 3296, + "GCAGATA": 3297, + "CTTTTCTT": 3298, + "GTAAAATG": 3299, + "TCTCTAA": 3300, + "TCTGCAAA": 3301, + "GAGCCTT": 3302, + "TATCATT": 3303, + "CAATTTTA": 3304, + "CCGCCA": 3305, + "TATTTAAAA": 3306, + "GAGAGATG": 3307, + "GAGATGGA": 3308, + "GCCAGGATG": 3309, + "CGAGTAGCTG": 3310, + "TTCATTTT": 3311, + "TATACTT": 3312, + "GTCTACA": 3313, + "GTGAGTGA": 3314, + "GCTACACA": 3315, + "GGGAGGA": 3316, + "CAAGGCA": 3317, + "GCTTTTAA": 3318, + "CACTATT": 3319, + "GTTCATA": 3320, + "TCCTC": 3321, + "GTGGACA": 3322, + "TATTTGGA": 3323, + "CTCCAGTA": 3324, + "GTTCAGTT": 3325, + "CCAAGG": 3326, + "CAGAGCC": 3327, + "CTCGCC": 3328, + "CCGATG": 3329, + "GGAATTTT": 3330, + "TCCAGCC": 3331, + "CCTCTTTT": 3332, + "GAACCTT": 3333, + "CATGCACA": 3334, + "GTTTC": 3335, + "GAAGATA": 3336, + "TACCCC": 3337, + "GCTGCCA": 3338, + "GGGGGAGG": 3339, + "GCAGTGAGCTGA": 3340, + "CTGTCTA": 3341, + "CGAGGA": 3342, + "CAATGGG": 3343, + "GCTGTGAA": 3344, + "GAAAGTG": 3345, + "TACCAAAA": 3346, + "GTCAGG": 3347, + "CAGCTCC": 3348, + "TGTGCTT": 3349, + "GTCTAGG": 3350, + "TTTTTGTA": 3351, + "TTATATG": 3352, + "TCAGGGG": 3353, + "TATTGTTA": 3354, + "CCTGAGA": 3355, + "TATCTCA": 3356, + "CAATCTG": 3357, + "CACTCTG": 3358, + "GATTTAA": 3359, + "TGAATAA": 3360, + "TCTTGTA": 3361, + "TCAACTG": 3362, + "TCTCCAGG": 3363, + "CTAGAGG": 3364, + "CTGAGAAA": 3365, + "CTAGCTG": 3366, + "TCCACCA": 3367, + "CGATTTT": 3368, + "CCGGCC": 3369, + "GTTGACA": 3370, + "CTTAGAA": 3371, + "CATAATG": 3372, + "GAGTATT": 3373, + "CACAGAAA": 3374, + "GACTGTG": 3375, + "CTATTTTA": 3376, + "TGAGGAAA": 3377, + "TTATTAAAA": 3378, + "CTTATTTA": 3379, + "CAGACTT": 3380, + "CACGCC": 3381, + "GCTTGG": 3382, + "CCTGCTT": 3383, + "TAAAGCAA": 3384, + "CCTCGTGA": 3385, + "TAGAATT": 3386, + "CTTACAA": 3387, + "TAAAGGAA": 3388, + "GTCTAGA": 3389, + "GTGACTT": 3390, + "TACATATG": 3391, + "GTCAGGA": 3392, + "GCTCCAGG": 3393, + "GAAGGGA": 3394, + "CATGATG": 3395, + "TCATCAAA": 3396, + "CGTTAAA": 3397, + "GTACTCA": 3398, + "CTCCCAA": 3399, + "TATATGTA": 3400, + "GGTATTTT": 3401, + "TAAGCCA": 3402, + "CGAAATT": 3403, + "GTTTGTTTT": 3404, + "TCTGTCTT": 3405, + "TATATCA": 3406, + "TGTTCATT": 3407, + "CAAACCA": 3408, + "TTCATTA": 3409, + "TATTTGTA": 3410, + "GATTGAA": 3411, + "CTATAAAA": 3412, + "GATTAATT": 3413, + "CCCACCA": 3414, + "TCCTAGG": 3415, + "TAAATGTA": 3416, + "CTCTTAAA": 3417, + "GCAGTCC": 3418, + "GCGGCTG": 3419, + "GTCTCGAA": 3420, + "TGAATGA": 3421, + "CTGGGGG": 3422, + "GTCTCGA": 3423, + "GAACAAAA": 3424, + "TGAATCA": 3425, + "TGTATTTTTAGTAGAGA": 3426, + "GTTATTAA": 3427, + "TTTTTTAAAA": 3428, + "GTCAGTG": 3429, + "CCCATTA": 3430, + "CACAGGA": 3431, + "TATTCCTT": 3432, + "TCTGCCTT": 3433, + "CCTGGTG": 3434, + "GCGAGC": 3435, + "TACTAAA": 3436, + "TACACAAA": 3437, + "CCGTCC": 3438, + "GCTTTGTT": 3439, + "GCATCCA": 3440, + "CATCTAA": 3441, + "GCTGTGTT": 3442, + "GTAGACA": 3443, + "GCCTATG": 3444, + "TCTTTGTG": 3445, + "GATTCTG": 3446, + "CGCCCGG": 3447, + "GATGAGA": 3448, + "TATCTGA": 3449, + "TGAATTTG": 3450, + "CCTGATG": 3451, + "TAAAACAA": 3452, + "CTTTAGG": 3453, + "TTTTCCTT": 3454, + "TGAATAAA": 3455, + "CGGGGA": 3456, + "CAAACATT": 3457, + "GTATGGA": 3458, + "GCTTAAAA": 3459, + "TACCAAA": 3460, + "CAAAGAGA": 3461, + "CTCCTGCC": 3462, + "GTAAAAAAA": 3463, + "CACAGCC": 3464, + "CCATGCA": 3465, + "TACAATT": 3466, + "CTAGTGA": 3467, + "CTGAGTT": 3468, + "GAGTGAAA": 3469, + "TCTGTTTG": 3470, + "CTGTAGG": 3471, + "TATAAAAAA": 3472, + "GCATTAAA": 3473, + "GTCCATA": 3474, + "TGTTAAAAA": 3475, + "TGTTTGA": 3476, + "GAATAGA": 3477, + "CTTCAAAA": 3478, + "CTGGACA": 3479, + "CTGTAGA": 3480, + "CCATTAAA": 3481, + "CTATCTG": 3482, + "CACTATG": 3483, + "TTATCAA": 3484, + "TAAGTAAA": 3485, + "TAATCCCAGCACTTTGGGAGGCC": 3486, + "CCAGAAAA": 3487, + "TGAAGCA": 3488, + "TCCCTTTT": 3489, + "TCATACA": 3490, + "TACGTT": 3491, + "GCCGTG": 3492, + "GGAAGTG": 3493, + "GGCCAAA": 3494, + "GTACCAA": 3495, + "TCTCTACTAAAAATA": 3496, + "CATTGTG": 3497, + "TGTGTGA": 3498, + "GAAACAGA": 3499, + "CTTGACA": 3500, + "GATGAGG": 3501, + "GAGATTTT": 3502, + "CCTTCAA": 3503, + "GAATCTA": 3504, + "CTCTCCTT": 3505, + "GGCGGA": 3506, + "TCTATCTATCTATCTA": 3507, + "CACACAGA": 3508, + "TGTGTGTA": 3509, + "CAAAGCC": 3510, + "TGTGCCA": 3511, + "GTTGAAAA": 3512, + "CTCCAGCA": 3513, + "TCAAGGA": 3514, + "TAGCTCA": 3515, + "CGCTGA": 3516, + "CCTGAAAA": 3517, + "GACTATT": 3518, + "GATTCCA": 3519, + "GCTTCTA": 3520, + "GTCTGCC": 3521, + "CTTGGCA": 3522, + "TGTGGTA": 3523, + "GCTTTGA": 3524, + "GCTCTCTG": 3525, + "CTCACAGA": 3526, + "TCTTTAAA": 3527, + "CAAAGCAA": 3528, + "TACTTAA": 3529, + "GCTTCAA": 3530, + "CATTGAA": 3531, + "GGAGGAAA": 3532, + "CTATAGA": 3533, + "CTGAGGAA": 3534, + "CCTGGCA": 3535, + "CCCTATT": 3536, + "CTCGTG": 3537, + "TTACACA": 3538, + "TTAGGAA": 3539, + "CTGGTTA": 3540, + "GTTGTCC": 3541, + "TAATGAAAA": 3542, + "TATTTACA": 3543, + "GGGAATT": 3544, + "GTAGTTTT": 3545, + "GCTGCAA": 3546, + "CTACGG": 3547, + "GCCGGA": 3548, + "CTGGGCA": 3549, + "CCTTAAAA": 3550, + "GATGGAA": 3551, + "TAGATAGATAGATAGA": 3552, + "TATGTAA": 3553, + "GTACGG": 3554, + "TATTCAAA": 3555, + "GATCTCC": 3556, + "CCTGTTTT": 3557, + "TATTGCA": 3558, + "GGAAGGAAGGAAGGAA": 3559, + "GGTAATT": 3560, + "TTACAGA": 3561, + "TCAGC": 3562, + "GCAAAATG": 3563, + "GAGAGCA": 3564, + "GTAGAAAA": 3565, + "CATTTGAA": 3566, + "TCTTCTTTT": 3567, + "TCCCATA": 3568, + "GTTATTTA": 3569, + "CTATCTA": 3570, + "CATCCTG": 3571, + "TCTTGTG": 3572, + "TTATTATT": 3573, + "CCCGTC": 3574, + "TACTATG": 3575, + "TAAACATA": 3576, + "TAAGGAAA": 3577, + "GCTTGTG": 3578, + "CTCTAAAA": 3579, + "GTTTTAAAA": 3580, + "GACAGGA": 3581, + "TCCTAGA": 3582, + "TCCACCCA": 3583, + "GTTTGAAA": 3584, + "CCATCTCA": 3585, + "CTAAGAA": 3586, + "GTATCTA": 3587, + "GTGAGGA": 3588, + "GCTGGAGG": 3589, + "CCTGTAATCCCAGCTA": 3590, + "GCAACAA": 3591, + "CTTTCAAA": 3592, + "CAAATGTT": 3593, + "CTTGTCC": 3594, + "TCTCAAAAA": 3595, + "TATTTATTA": 3596, + "TAAGGCA": 3597, + "GAGAGGAA": 3598, + "TATGATT": 3599, + "GCATCTA": 3600, + "CGTTATT": 3601, + "GCCTGTA": 3602, + "GTTTCAAA": 3603, + "CCTTCCTTCCTTCCTT": 3604, + "GGCTTTG": 3605, + "GTCAGAA": 3606, + "CATGCATG": 3607, + "GTCATTTA": 3608, + "CTGGAAAA": 3609, + "CTTCGA": 3610, + "CCTATTTT": 3611, + "CCAACAA": 3612, + "TCCATCC": 3613, + "TAAAGTTA": 3614, + "GTCTCTC": 3615, + "TAATCAAA": 3616, + "GATTTTTG": 3617, + "GATTTCTT": 3618, + "GGGCTGA": 3619, + "GCATGTA": 3620, + "CCTGGGTT": 3621, + "GAGACAA": 3622, + "GCTGTCA": 3623, + "TGATAGG": 3624, + "GGAGACC": 3625, + "CCGGCA": 3626, + "TAATCTCA": 3627, + "TGAATTAA": 3628, + "TCTGGTG": 3629, + "GCCTC": 3630, + "GGCGCA": 3631, + "CCAGCTA": 3632, + "CAGTCTG": 3633, + "TGAACTA": 3634, + "GTAAGAA": 3635, + "CCTTTCA": 3636, + "TCCATGA": 3637, + "CAAAGGAA": 3638, + "CTCTC": 3639, + "CTCTCTCA": 3640, + "CTCCAGC": 3641, + "GTAGATA": 3642, + "CCCCCTCC": 3643, + "GGCGCC": 3644, + "TCTGTCC": 3645, + "GACCATT": 3646, + "CTTGAAAA": 3647, + "TTATCC": 3648, + "TACATGTG": 3649, + "CAAATTTG": 3650, + "TTTTGTG": 3651, + "CAGAGTG": 3652, + "GTAATAA": 3653, + "GTGAGTG": 3654, + "TTTTTCC": 3655, + "GGCTCTG": 3656, + "GCCCTAA": 3657, + "GGCTGTT": 3658, + "CCCAATT": 3659, + "CAGAGCTT": 3660, + "TATAAATG": 3661, + "GAGTCTG": 3662, + "TCTTAAAAA": 3663, + "GTTTTATG": 3664, + "GATCCAA": 3665, + "GGCCCTG": 3666, + "GATCCTG": 3667, + "TCAAGTG": 3668, + "GATTCAA": 3669, + "CCTCTCTT": 3670, + "GAGACGG": 3671, + "CAGATCA": 3672, + "TAAAAGAA": 3673, + "CTGAGCAA": 3674, + "CCTGCCA": 3675, + "CCTTCTA": 3676, + "CGCTCA": 3677, + "GGCTGTG": 3678, + "TGGGAAAA": 3679, + "GGAGCCTG": 3680, + "CTGAGTG": 3681, + "CGTCAAA": 3682, + "TCAAGTA": 3683, + "CGTAATT": 3684, + "TTACTTA": 3685, + "TATACTA": 3686, + "GGGCAAA": 3687, + "CAACTTTT": 3688, + "CTTTGCC": 3689, + "GCCAGGAA": 3690, + "CACACTA": 3691, + "GCCCAGC": 3692, + "TAAATAAATAAATAAA": 3693, + "CTTTCCTT": 3694, + "GGGAGAA": 3695, + "TATGGTA": 3696, + "CGGCCA": 3697, + "CCTCTCTG": 3698, + "GAAAGCAA": 3699, + "CAAGCCA": 3700, + "GGCGTT": 3701, + "CTCTTTTA": 3702, + "TCGGCCTCCCAAA": 3703, + "GATTTATT": 3704, + "CAAGTCC": 3705, + "TATCTTA": 3706, + "GTTCAAGACCA": 3707, + "CTCACACA": 3708, + "GAAATCAA": 3709, + "TGAGACC": 3710, + "GGGTAAA": 3711, + "GCTTGTT": 3712, + "GATTTTAA": 3713, + "TTTTTATA": 3714, + "CAGAGCTG": 3715, + "TCTGTTAA": 3716, + "GTAATTAA": 3717, + "TCTTTGAA": 3718, + "CTTGCCA": 3719, + "TTTTCATT": 3720, + "CCATGTA": 3721, + "TCTCGGCTCACTGCAA": 3722, + "GGATTCA": 3723, + "TCTATTAA": 3724, + "TACATAAA": 3725, + "GATTGATT": 3726, + "GGAGAGGA": 3727, + "CGCAAAA": 3728, + "GGACTAA": 3729, + "TTATGTG": 3730, + "GTCACTCA": 3731, + "GACAGCA": 3732, + "CGAGTT": 3733, + "GATGGTT": 3734, + "GGAAGAGG": 3735, + "GCCAACATGGTGAAA": 3736, + "GGAGCCA": 3737, + "TGAACTG": 3738, + "CCTCTGTG": 3739, + "GTATAAAA": 3740, + "TCCCAGAA": 3741, + "CATTTATG": 3742, + "GATTATG": 3743, + "TGTTTCTG": 3744, + "GAGTGGGTT": 3745, + "TACATATT": 3746, + "CTCCAGGA": 3747, + "GACACTG": 3748, + "GGTCTCA": 3749, + "CCGGGA": 3750, + "TGTTTAAA": 3751, + "CTCACCA": 3752, + "GGACTTA": 3753, + "GCCCACC": 3754, + "CAAATCAA": 3755, + "GAAATGTG": 3756, + "TAGTTAA": 3757, + "TCTATAA": 3758, + "TTAGATT": 3759, + "GTGTAGG": 3760, + "TACTGAAA": 3761, + "GCACCCA": 3762, + "GTGGGCTG": 3763, + "GAATGAAA": 3764, + "TCTAGTT": 3765, + "TCAGGAGA": 3766, + "TCCACTA": 3767, + "CTCAGTT": 3768, + "TACTTAAA": 3769, + "GACTCCA": 3770, + "TCCATTTG": 3771, + "CACAGCAA": 3772, + "GCTCATGCCTG": 3773, + "GGTGCTG": 3774, + "GCTTTCTT": 3775, + "GTGGCCA": 3776, + "TACGTG": 3777, + "GTGCAGTG": 3778, + "TGAAGTCA": 3779, + "CCTTTAA": 3780, + "TCTCAGCTCACTGCAA": 3781, + "GAAATATG": 3782, + "CCTCAAAA": 3783, + "GGGGCGG": 3784, + "CGACAA": 3785, + "GGTGATG": 3786, + "GTCTTAAA": 3787, + "CAGAAATG": 3788, + "CGTCATT": 3789, + "CCAAGCA": 3790, + "GGATCAA": 3791, + "GTGCTGGGATTA": 3792, + "GCTGGCC": 3793, + "CGGAGCTT": 3794, + "TACATGA": 3795, + "TGTTTGAA": 3796, + "TCTCCATT": 3797, + "TAAGCAAA": 3798, + "CCTTTCTT": 3799, + "TACTGTT": 3800, + "TCCATCTT": 3801, + "CTTACTT": 3802, + "CGGAGGTT": 3803, + "CAAAACAA": 3804, + "TCATAGG": 3805, + "TTACTAA": 3806, + "CTTATTTG": 3807, + "GAATGTA": 3808, + "CCCCATGGA": 3809, + "TTACTGA": 3810, + "CGGAAAA": 3811, + "CTCCAGTG": 3812, + "TGTTCCA": 3813, + "CAGATGAA": 3814, + "GTTGATA": 3815, + "TCCCCCC": 3816, + "CATTGCA": 3817, + "CTCAGCC": 3818, + "CTTACTG": 3819, + "TATCCTT": 3820, + "CTTTTATG": 3821, + "TGAGTAGCTG": 3822, + "GACTGAAA": 3823, + "CAATGAAA": 3824, + "CGACTG": 3825, + "CTTGGGA": 3826, + "GCAAGCA": 3827, + "TCACTCC": 3828, + "GATTTGA": 3829, + "CATTTTAAA": 3830, + "TCAACTA": 3831, + "GTCCAAAA": 3832, + "CACCCTG": 3833, + "TTACCTT": 3834, + "CAAGGGG": 3835, + "TTTTGGA": 3836, + "GTTATTTG": 3837, + "GCTACTG": 3838, + "CTGAGGCAGGAGAATG": 3839, + "GTGATGA": 3840, + "GTAGTC": 3841, + "TAGTATG": 3842, + "GTATAGA": 3843, + "GTGTCTA": 3844, + "GCTGCTA": 3845, + "TTAGTAA": 3846, + "TAAACATG": 3847, + "GTCACCA": 3848, + "CATCTTTT": 3849, + "CATATAA": 3850, + "TCTCTCTA": 3851, + "TTTTATTAA": 3852, + "TATTCTAA": 3853, + "GAAATTTA": 3854, + "CTTCCCTG": 3855, + "TAAAGATG": 3856, + "TACGTA": 3857, + "GTTTATTA": 3858, + "GAAAAGAA": 3859, + "CCCACCCA": 3860, + "CAATTAAAA": 3861, + "CCGACA": 3862, + "CAAAGTGA": 3863, + "CAAACAAAA": 3864, + "GCAATTTT": 3865, + "CGATTAA": 3866, + "TTAGAGA": 3867, + "CTGATGA": 3868, + "GGAGGAGG": 3869, + "GTCCTGGG": 3870, + "TCATGAAA": 3871, + "GCAACCA": 3872, + "GTTGGCA": 3873, + "GCGGCGG": 3874, + "GTCCCCA": 3875, + "GTAGGGG": 3876, + "GCCATGTT": 3877, + "GTTCGAGA": 3878, + "GCCTATA": 3879, + "TAAATTCA": 3880, + "GGCCATT": 3881, + "GAAAACAA": 3882, + "TGTGTATG": 3883, + "GTACTC": 3884, + "TAGGGAA": 3885, + "CCTTGAA": 3886, + "TCTATTTG": 3887, + "GAGGGCA": 3888, + "GAAACTGA": 3889, + "TACGC": 3890, + "TACAAAAA": 3891, + "TCATTATT": 3892, + "GGAAAATT": 3893, + "TCAATATT": 3894, + "CCCGTA": 3895, + "GGAGAGAA": 3896, + "TTAGTTA": 3897, + "CTCAGAGA": 3898, + "TCGAGC": 3899, + "CTAGTCA": 3900, + "GATGGCA": 3901, + "TGAACATT": 3902, + "CTATGGG": 3903, + "CACACCA": 3904, + "TCAATTAA": 3905, + "GGAACTG": 3906, + "TTACATG": 3907, + "CTTTCATT": 3908, + "CAGCTCTG": 3909, + "TCTTTTTTTT": 3910, + "TAAATCTT": 3911, + "TGATCTA": 3912, + "CATACAA": 3913, + "GCTCAAAA": 3914, + "GCTGTGTG": 3915, + "TCAATCA": 3916, + "GATTTGAA": 3917, + "CCAAGGA": 3918, + "GTCCTCA": 3919, + "GTGCTCC": 3920, + "AAAATAA": 3921, + "GTGACAA": 3922, + "GCTCACGCCTG": 3923, + "CGACGG": 3924, + "TATCCAA": 3925, + "CACACATG": 3926, + "TCTCTCTCC": 3927, + "TGTGGTT": 3928, + "CTTGGTA": 3929, + "TCTGGTT": 3930, + "TTTATAA": 3931, + "CTGCTTTT": 3932, + "TGTGTCA": 3933, + "CACATCA": 3934, + "CCTAATG": 3935, + "CGTTTTTT": 3936, + "GCTGGCA": 3937, + "GACGTC": 3938, + "TATAATTA": 3939, + "TACAGTAA": 3940, + "GAAAGTAA": 3941, + "GTCTGAAA": 3942, + "CCCATTTT": 3943, + "TATATGA": 3944, + "CTTGATA": 3945, + "CTTTATTTT": 3946, + "CTTTATTA": 3947, + "GGCGAA": 3948, + "CCATGCC": 3949, + "CCTGCCTT": 3950, + "GAAGAAGAAGAA": 3951, + "CTGACTGA": 3952, + "GCCCTTA": 3953, + "TATCTAA": 3954, + "GTGTTTTA": 3955, + "TGTGGCA": 3956, + "TATTGTAA": 3957, + "GCCAGAAA": 3958, + "CCCTGTCTC": 3959, + "CACAGGAA": 3960, + "AAAACAA": 3961, + "AAAAAAAAAAAAAAA": 3962, + "TAACTCC": 3963, + "GCCTAAA": 3964, + "CGAGTA": 3965, + "TAGTATT": 3966, + "GTATTTTTAGTAGAGA": 3967, + "GCTGCAGG": 3968, + "TATTGAAA": 3969, + "CCAGCCTGGG": 3970, + "GCTCCAAA": 3971, + "TACGAA": 3972, + "GGCCTCC": 3973, + "TATACAAA": 3974, + "CATGGCA": 3975, + "CATGCAA": 3976, + "TACACCA": 3977, + "CTTTACCA": 3978, + "TACAGAGA": 3979, + "TATTCTTA": 3980, + "TATGTCA": 3981, + "TCAAGCA": 3982, + "TCAATGA": 3983, + "GGCTCTT": 3984, + "GGAAGTT": 3985, + "TCCATGTT": 3986, + "GCTTTCC": 3987, + "TATGTGA": 3988, + "GTGTAGA": 3989, + "TTTTTAAAA": 3990, + "GCTGGAGA": 3991, + "GTGAGAGA": 3992, + "CCTAGAA": 3993, + "CCTCCAAA": 3994, + "CCAATGA": 3995, + "CAGGGCA": 3996, + "CTATGCA": 3997, + "CTTCACC": 3998, + "CTACAAAA": 3999, + "CTCACC": 4000, + "GAGTATG": 4001, + "TAGAAAAA": 4002, + "CTTTTGAA": 4003, + "TAAAGAGA": 4004, + "CATGTCA": 4005, + "TCTTTTAAA": 4006, + "CACAGTGA": 4007, + "GATCTAA": 4008, + "TAAGGTA": 4009, + "CATAGAA": 4010, + "CGCGCC": 4011, + "CAGCTTA": 4012, + "TATAGTT": 4013, + "CGGGCC": 4014, + "TATCCATT": 4015, + "TGTTTGTTTT": 4016, + "GCTGGCTG": 4017, + "TACAGGA": 4018, + "CTCCTTTG": 4019, + "CAATCTA": 4020, + "CCCCCTG": 4021, + "TATACTG": 4022, + "CTGAGCC": 4023, + "CGGTTA": 4024, + "TGAAGTG": 4025, + "GCTTCCTT": 4026, + "TTTTATTTG": 4027, + "TAGTGAA": 4028, + "CTGAGGTG": 4029, + "TCTTCTC": 4030, + "GACAGAAA": 4031, + "CTGAACTGAA": 4032, + "CCTGGGAA": 4033, + "TCCCCAAA": 4034, + "TATGTATT": 4035, + "GATTTCTG": 4036, + "CATTCAAA": 4037, + "CACAGTT": 4038, + "GCTTGAA": 4039, + "GTGGATCA": 4040, + "CTGAGTGA": 4041, + "TGAATTTA": 4042, + "TCAACAAA": 4043, + "GGTCATT": 4044, + "GTAATTTA": 4045, + "GCGACTT": 4046, + "CTGAGAGA": 4047, + "GTGCCCA": 4048, + "CTAGGTT": 4049, + "TCCTGAAA": 4050, + "GTCCACC": 4051, + "TCACAGAA": 4052, + "GCGAAAA": 4053, + "GTATGGG": 4054, + "TGAACAAA": 4055, + "TAAACAAAA": 4056, + "CCGTTTT": 4057, + "TCTCAATT": 4058, + "TCCAGAAA": 4059, + "GTAACAA": 4060, + "GCATTTTA": 4061, + "TCTCCATG": 4062, + "TTATAAAA": 4063, + "CAGGCAA": 4064, + "CTAAAAAAA": 4065, + "GTTGGGA": 4066, + "TAAAGATT": 4067, + "TGAAGAGA": 4068, + "CCCCTCA": 4069, + "TGTTTATG": 4070, + "TCTACTG": 4071, + "CCAATTTT": 4072, + "GGTGGTG": 4073, + "GGAACAA": 4074, + "TGTGGGA": 4075, + "TCTGCTA": 4076, + "GAACGA": 4077, + "GTAAGTA": 4078, + "GTTGCCA": 4079, + "AAAATTTT": 4080, + "GCGCGA": 4081, + "GAAAGATG": 4082, + "GTCTCTCA": 4083, + "TCCATCAA": 4084, + "GCAGCTA": 4085, + "CACATTTG": 4086, + "CTGACAA": 4087, + "TCCACC": 4088, + "GCT": 4089, + "CCCACTT": 4090, + "GCAGGTA": 4091, + "GAGGCCA": 4092, + "TAAAGTCA": 4093, + "CTGGATA": 4094, + "CGGCAA": 4095 + }, + "merges": [ + "A A", + "T T", + "T G", + "C A", + "C C", + "T A", + "G G", + "T C", + "G A", + "AA A", + "G C", + "T AA", + "TT TT", + "T CA", + "TG A", + "TT A", + "G AA", + "T CC", + "C AA", + "C TG", + "C TT", + "G TG", + "G TT", + "G CA", + "GG A", + "C CA", + "G TA", + "G CC", + "C TA", + "T AAA", + "AA AA", + "C TC", + "G TC", + "TG TG", + "TA TT", + "CA CA", + "G AAA", + "TA TA", + "TC TT", + "TG TT", + "C AAA", + "GA GA", + "CA TT", + "TG AA", + "CA GG", + "TC TG", + "CA GA", + "TC AA", + "GG AA", + "TAA AA", + "C TGA", + "GC TT", + "G TGA", + "GC TG", + "C TCA", + "CC TT", + "CA TG", + "GC AA", + "G TCA", + "G TAA", + "TTTT A", + "TA TG", + "GA GG", + "C GG", + "GA TT", + "CC TG", + "TC TC", + "CC AA", + "G TTA", + "C TCC", + "C TAA", + "TA CA", + "C TTA", + "TC CA", + "GA TG", + "TT AA", + "GAA AA", + "TT TG", + "G TTTT", + "TC TA", + "GC CA", + "G TCC", + "C TTTT", + "GG GG", + "C GA", + "TT TA", + "CC CA", + "CAA AA", + "TG GG", + "TA GA", + "TA GG", + "GA CA", + "GG TT", + "CC CC", + "GG TG", + "CA TA", + "GC TA", + "TG TA", + "TC AAA", + "TG GA", + "TAA TT", + "TTA TT", + "TG CA", + "GG CA", + "GA TA", + "CC TA", + "TT CA", + "TC TCA", + "GG GA", + "C GC", + "CTG AA", + "G TAAA", + "TC TCC", + "TTTT TT", + "C GTG", + "GC AAA", + "TAA AAA", + "TC TGA", + "TCA TT", + "GG AAA", + "TG AAA", + "TCC TT", + "CC AAA", + "GAA TT", + "C TAAA", + "C GTT", + "GTG AA", + "GG CC", + "TAA TA", + "GG TA", + "TG CC", + "CA CC", + "TGA TT", + "AAAA AA", + "GC TCA", + "TCC AA", + "GA GAA", + "CTG TT", + "TA TTA", + "CA GCA", + "CTC TT", + "CTT AA", + "CA GAA", + "GC TGA", + "GTT AA", + "TC TTA", + "TA TTTT", + "GCC AA", + "CTT TG", + "GA CC", + "C GCA", + "GTA TT", + "GTC TT", + "CAA TT", + "GTG TT", + "CTC AA", + "GGA GG", + "C GAA", + "TC TTTT", + "GTC AA", + "C GCC", + "TA TAA", + "TA CC", + "TC TAA", + "CCA TT", + "C GGA", + "CAA AAA", + "CA GTG", + "TCC TG", + "CTC TG", + "GAA AAA", + "CTG TG", + "CA GC", + "TTTT AA", + "GCA TT", + "GCC TT", + "TAA TG", + "CTA TT", + "GTT TG", + "TGA TG", + "GG CTG", + "CC TCA", + "GA GGA", + "GCC TG", + "AAA TT", + "C GTA", + "TC AAAA", + "TA CAA", + "CA TCA", + "CA GTT", + "TGA GA", + "GG GAA", + "CA CTG", + "CA CAA", + "CA GGA", + "CC CCA", + "CC CTG", + "TTTT TTTT", + "TA GAA", + "GA GCA", + "CC TCC", + "CA CCA", + "TA TCA", + "GA GC", + "CA TTA", + "CACA CACA", + "GA GTG", + "GGA TT", + "TGTG TGTG", + "TA CTT", + "CA CTT", + "GTC TG", + "TGA GG", + "GA GTT", + "GAA TG", + "TCA TG", + "GA CAA", + "GA CTT", + "TATT AA", + "TAA TAA", + "GG CCA", + "CA TTTT", + "CA GCC", + "CC CTT", + "GC TAA", + "TATA TATA", + "GTG TG", + "TA CTG", + "TA GTT", + "CAA TG", + "GC TC", + "CA GTA", + "GC TCC", + "CA TAA", + "TTA TG", + "TAAA TT", + "GA TGA", + "CA TGA", + "GC GG", + "AAAA AAAA", + "CCA TG", + "GA TAA", + "GA CTG", + "TA TGA", + "GCA GG", + "GA TCA", + "G TTTTA", + "GGA TG", + "CC TGA", + "G TAAAA", + "GAA GG", + "GA TTA", + "CC TC", + "GA CCA", + "GC TTA", + "CC CAA", + "AAA TG", + "GCA TG", + "TA GTA", + "TA CCA", + "GG CTT", + "C GTC", + "TC TCTT", + "GG TCA", + "TTA TTA", + "TA CTA", + "TA GCA", + "TA TC", + "CTG GG", + "CA TC", + "C TTTTA", + "C TAAAA", + "GTG GG", + "GA GTA", + "CCA GG", + "GA TTTT", + "TA GTG", + "GAAA TT", + "CA CTA", + "TC GG", + "TCA GG", + "CAGG AA", + "GC AAAA", + "CC TTA", + "CA TCC", + "CTT GG", + "TGTG AA", + "TATT TG", + "CC TAA", + "CTA TG", + "GA GAAA", + "GAGA GAGA", + "GC TTTT", + "TA TAAA", + "CAA GG", + "TC TCTG", + "TGTT AA", + "TGTG TT", + "GA GCC", + "GA CTA", + "TA TATT", + "TAA AAAA", + "TTTT TG", + "GTA TG", + "CATT AA", + "TA GGA", + "TA GC", + "GTT GG", + "GAA GAA", + "TAAA TG", + "TC TGTT", + "CA GAAA", + "CAAA TT", + "TAA TTA", + "TC TGTG", + "TA TCC", + "TGAA TT", + "CTC CA", + "GTG AAA", + "GG CAA", + "GGA GA", + "GAA GA", + "GG TGA", + "GG GCA", + "CC AAAA", + "TCTC TCTC", + "CTG CA", + "CTT CTT", + "TCTT AA", + "CC CTA", + "TGTG TG", + "AAA TA", + "TGTT TG", + "GG GTT", + "GTG CTG", + "GG AAAA", + "GG GGA", + "TCA GA", + "CC TTTT", + "GAAA TG", + "GCA GCA", + "TC TGAA", + "GG GTG", + "CACA TT", + "TCTT TG", + "GG GC", + "TCC CA", + "TC CATT", + "CTG AAA", + "CTT TA", + "TC GA", + "GTT TA", + "CAA CAA", + "CTT CC", + "GCC TCC", + "TT AAA", + "GC TCTG", + "GTT TCA", + "GGA GGA", + "C GTGA", + "CA GTC", + "GAA TA", + "CA GAGA", + "CC CTC", + "CAAA TG", + "CTG CTG", + "GA TCC", + "TTTTA TT", + "AAAA TT", + "TTA TA", + "TCAA TT", + "GG TAA", + "GTTA TT", + "GC CAGG", + "GGA GAA", + "CATT TG", + "TCA CC", + "CTC AAA", + "GG TTA", + "TCC AAA", + "TC TATT", + "GCA GA", + "CTT CA", + "TCA TCA", + "C GAGG", + "TAA CA", + "GTT GTT", + "CTTA TT", + "C GTCA", + "TAA GA", + "TAA TTTT", + "CTG TA", + "TC CACA", + "GC TGTG", + "C GCTG", + "TC TAAA", + "GC GA", + "CAA TA", + "CCA CCA", + "GAA CA", + "C GAAA", + "CAGA TT", + "TCA CA", + "TTA TTTT", + "TC TCAA", + "TGA CA", + "CTCC AA", + "AAAA AAA", + "TATA TG", + "TCC TCC", + "TCA CTT", + "TC CAGG", + "CAA GA", + "GG CTA", + "GTG GTG", + "C GTAA", + "C GAGA", + "TGA TA", + "GGA TTA", + "CAA CA", + "C GATT", + "TGA GAA", + "CTCC TT", + "CTCA TT", + "GTT AAA", + "TCA TA", + "CC TCTG", + "CTC TA", + "GC TGAA", + "CTG GA", + "TAA GG", + "CTT AAA", + "TATT TA", + "CCA CA", + "CC GG", + "GTC AAA", + "TG GAA", + "C GGAA", + "TGA TGA", + "GTT CA", + "TAA CAA", + "GC TGTT", + "TAA GAA", + "CTG CC", + "TTAA TT", + "CCA GA", + "TCA GAA", + "GTCA TT", + "C GCTT", + "GATT AA", + "CTGA TT", + "GC CACA", + "GTAA TT", + "TC CAGA", + "GCC AAA", + "GTGA TT", + "TAAAA TT", + "CAA GAA", + "CCA CC", + "TAA TCC", + "GTT CTT", + "TC CATG", + "GC TCTT", + "TG CTG", + "GG GTA", + "TTA CA", + "GC CATT", + "GCA CA", + "GCAA TT", + "TCC CTG", + "TG TGA", + "TC GAA", + "GGA CA", + "GGAA TT", + "GTG GA", + "CTT CTG", + "TCC CC", + "GCC CC", + "CTT GA", + "TAA TGA", + "TAAA TA", + "TATA TA", + "CTG CAA", + "TCA TTA", + "GTA TA", + "TCC CCA", + "C GTTA", + "GCA GAA", + "TGA GTT", + "CTTTT TT", + "C GATG", + "CTT TCA", + "AAAA TG", + "CAGG TT", + "CTAA TT", + "C GCCA", + "TGAA AAA", + "GTT CC", + "GTCC TT", + "GTCC AA", + "GTTTT TT", + "CTC TGA", + "GC GC", + "GTT GA", + "TGAA TG", + "CTA TA", + "GCA GTG", + "CCTT AA", + "TCA CCA", + "TCA CTG", + "GCC CTG", + "TAA CTT", + "CAGA TG", + "GTA GG", + "TC TATA", + "GAGA TT", + "GTC TA", + "TTTT AAA", + "CACA TG", + "TGA CC", + "CA CAAA", + "GTG TA", + "GG GAGG", + "GCTT TG", + "CAA AAAA", + "GA GGAA", + "GTT CTG", + "TTTT TA", + "GTC TCA", + "GTT CAA", + "TC GTG", + "GCTT AA", + "GCA CC", + "CTCC TG", + "TAAA TAAA", + "CTA CA", + "CTT CCA", + "TCC TCA", + "C GCAA", + "GAA AAAA", + "GCC CA", + "TC GTT", + "GTA GA", + "CTC TCA", + "GTC CA", + "TGA CTT", + "TCC CTT", + "GC CATG", + "CACACACA CACACACA", + "GTGA TG", + "CC TCTT", + "GC CAGA", + "TCC TA", + "C GTTTT", + "GTA CA", + "GCA TA", + "GAA TTA", + "TGTGTGTG TGTGTGTG", + "CC CAGG", + "GG TTTT", + "TCAA AAA", + "TC TATG", + "CCA TA", + "TGA CAA", + "GGA TA", + "TCA GTG", + "GTA TTTT", + "GAGA TG", + "GC GTG", + "C GTCC", + "TTAA AAA", + "TAA TCA", + "CAA TTA", + "CCA CTG", + "CGG TT", + "GTT GAA", + "TGA TTA", + "CCTT TG", + "CGG TG", + "CAGG TG", + "TCAA TG", + "CTGA TG", + "TCA GGA", + "GTT TAA", + "TATT AAA", + "CTC TTA", + "GCA GGA", + "CTC TCC", + "GAA CC", + "CTT TAA", + "GG GCC", + "GTA TTA", + "GC GCC", + "CCAA TT", + "GC TAAA", + "TGA CTG", + "GATT TG", + "GA TAAA", + "TCA GCA", + "GTT CCA", + "GAAA TA", + "GA CAAA", + "GA GTC", + "GC TATT", + "TCA CAA", + "GAGG TT", + "TAA CC", + "GAA GGA", + "GC TCAA", + "GAAAA TT", + "CCA GCA", + "GTTTT AA", + "GTG CC", + "TGA GGA", + "CA TAAA", + "GG TCC", + "TCA TTTT", + "TATT TATT", + "TAA TAAA", + "GCC TA", + "CTTTT AA", + "TAA GTG", + "TAA GTA", + "CTG GAA", + "CACA CA", + "GA CAGA", + "CAA CC", + "GG GAAA", + "CCA GAA", + "TCA GTT", + "TAA CTA", + "CTAA AAA", + "TGGG TT", + "TGA GTG", + "TAAAA TG", + "TATATATA TATATATA", + "GCA CTG", + "GA CTC", + "TA CAAA", + "TAAAA AAA", + "TC TACA", + "GTT GTG", + "TC GCC", + "CC CAAA", + "GTCA TG", + "CTG CTT", + "GGAA TG", + "CTA TTA", + "GA TATT", + "TA GAAA", + "GG CAGG", + "GA TGAA", + "GTA GAA", + "TCC TGA", + "TAA CTG", + "GCTG GG", + "GCAA TG", + "GCC CCA", + "GTT TGA", + "CATT TA", + "GTG CA", + "CTT GAA", + "GTG GAA", + "CTT CAA", + "TAAA TTA", + "GTG GCA", + "TCC TTA", + "GGAA AAA", + "TTTT TTA", + "CC TGTG", + "GTAA TG", + "GTG TTA", + "CTA GG", + "CAGG CTG", + "GA CACA", + "GAAAA AAA", + "TC GC", + "GTAA AAA", + "TGTT TA", + "TCTC TA", + "GTCC TG", + "CCA GGA", + "GAA CAA", + "TAA GTT", + "TGA GCA", + "GC TCCA", + "TAA GCA", + "CTCA TG", + "GTC TTA", + "CC CACA", + "CA TATT", + "GCC TCA", + "CA CTC", + "CTT CTA", + "TGA TTTT", + "TC GCA", + "CC TGTT", + "GAA GCA", + "GCAA AAA", + "GC GGA", + "CCA CAA", + "GC GCA", + "CA TATA", + "GA CATT", + "GTT CTA", + "CAAAA TT", + "GAAA GAAA", + "CC CGG", + "TA CACA", + "CCAA AAA", + "GAGG TG", + "GG CTCA", + "CA GTGA", + "TCC CAA", + "TA TCTT", + "TGA GTA", + "TC GTA", + "TTTT CTT", + "GTG GGA", + "GA GCTG", + "CC CTCC", + "TAGG TT", + "TTA GG", + "TAA TATT", + "CCA GCC", + "CA TCTT", + "GTC TGA", + "GTT TCC", + "CC TGAA", + "GGA GCA", + "GAAAA TG", + "TCA GTA", + "TAA CCA", + "GA TGTT", + "CTG TTA", + "CA TGTT", + "GG CGG", + "CA TGTG", + "GG GAGA", + "CTT TGA", + "TCTT TCTT", + "AAAAAA AAA", + "GGGG TG", + "CTT TCC", + "CTT GTT", + "GCA TTA", + "CC CAGA", + "CAAA TA", + "TC GGA", + "CA GCTT", + "TCA CTA", + "TAA TTAA", + "TAA GGA", + "GAA CTG", + "GCA CAA", + "GC GTT", + "GG CTC", + "TC TTTTA", + "CC TCCA", + "GG CAAA", + "CA GCTG", + "CTA CAA", + "TA CATT", + "GC TATG", + "CTT GTG", + "GA GTCA", + "GTTA TG", + "CTG CCA", + "GTC TCC", + "TGA CCA", + "CA CCTG", + "TATA TTA", + "TGA TCA", + "CA GCAA", + "GA TGTG", + "GTC TTTT", + "CTA GAA", + "GC TACA", + "CTG GGA", + "GGGG TT", + "CAA GTA", + "CAA GGA", + "CC CTCA", + "TA GCC", + "GTT GGA", + "GC TATA", + "TCTG AAA", + "TA TGTT", + "CC CCTT", + "GTT GTA", + "CC CTGA", + "TGA CTA", + "CAA GCA", + "CAA TAA", + "GAA CTT", + "CA TGAA", + "CTTA TG", + "CTAA TG", + "TC TAAAA", + "CCAA TG", + "GAA GTG", + "CC TCAA", + "CC CATT", + "CA GTCA", + "GAGAGAGA GAGAGAGA", + "TA TGTG", + "GCA GTGA", + "TCTCC TT", + "TCC CAAA", + "CCA TTA", + "CCA GTG", + "GCA TCA", + "TCAAA TT", + "GA TCTT", + "GA CAGG", + "GGA GTG", + "GTA GTA", + "CAA CTT", + "GAA GTT", + "CC CCTG", + "TCTC AAA", + "GG GTC", + "GA GCTT", + "TATG AAA", + "TA TGAA", + "GA CATG", + "CAA GTG", + "GA TATA", + "CA TCTG", + "CTG TGA", + "TAA TTTA", + "GG CAGA", + "GC GAA", + "CC TAAA", + "CCA TCA", + "CA CTGA", + "GGA CTA", + "GA CGG", + "CTC TTTT", + "CTG TCA", + "TCTCTCTC TCTCTCTC", + "TTAA TG", + "GCA GCC", + "CAAAA AAA", + "GCA CCA", + "CTA TTTT", + "GA GCAA", + "CTT GGA", + "CTG GTG", + "GAA TAA", + "TCC TTTT", + "GAA GTA", + "CA GTAA", + "CAA CCA", + "CTG TAA", + "TGA TAA", + "GCA GTT", + "CA CGG", + "TAAA TAA", + "CTG TTTT", + "CTA CTA", + "GC TCTA", + "C GAAAA", + "CAA GTT", + "CTT GTA", + "GAA TGA", + "GA GTGA", + "GCC TGA", + "GG TTTG", + "CC CATG", + "GG GGAA", + "GAA GAAA", + "TG TTA", + "CAA TTTT", + "TATA TTTT", + "CTC AAAA", + "GG TGGG", + "CC GTG", + "TATT TCA", + "CC CCAA", + "TATT TAA", + "GG CTGA", + "GG TGTG", + "CA TCAA", + "CA CTCA", + "TCTCA TT", + "GAA TTTT", + "GAA TCA", + "CAGG AAA", + "CA TACA", + "TA TTTTA", + "TTA TAA", + "GAGG AAA", + "CA TATG", + "CTT TCTT", + "CAA CTG", + "GG GCTG", + "CC CCCA", + "TTTG AAA", + "CATT AAA", + "CTT AAAA", + "GA CTGA", + "CAA TGA", + "GG CACA", + "CCA GTA", + "GGA TGA", + "GTTTT TG", + "GCA TTTT", + "GTG CCA", + "GCA GTA", + "GCC CTT", + "TC GTC", + "GAA CTA", + "GTG GTT", + "GTG TGA", + "GTG CTT", + "C GCTA", + "GTG TCA", + "TCTT TA", + "GCC TTA", + "CC TATT", + "CAAAA TG", + "GAA CCA", + "CTC CAGG", + "GA CTCA", + "CATG AAA", + "GC TAGG", + "TGTT AAA", + "GC GTA", + "GCA CTT", + "TCTT AAA", + "TAA GAAA", + "GG CCTG", + "TCC CTA", + "GTG GTA", + "CTG CTA", + "GGA GTT", + "GG TAAA", + "CAAA CAAA", + "GA TATG", + "TCA TGA", + "GA CCTT", + "TAA TATA", + "GC TAGA", + "GGA CTG", + "GG CATT", + "CA GTTA", + "CC CTAA", + "CA CCTT", + "GG TGAA", + "CA GCTA", + "GTG TTTT", + "CAA CTA", + "GA TCAA", + "GA GAAAA", + "TGTG AAA", + "AAAA TA", + "GATG AAA", + "CTC TAA", + "TTA CTT", + "GA TCTG", + "CCA CTT", + "GA GTTA", + "CAA TCA", + "GGATTA CAGG", + "TTTA TTTT", + "TACA TA", + "TTTTA TG", + "GA GTAA", + "GCTG AAA", + "GTA CTG", + "GC TCTC", + "TATG TA", + "TGTG TA", + "TCA TAA", + "GGA CTT", + "TCTCC AA", + "GCA TGA", + "GA CGA", + "CGCC TG", + "GA CCTG", + "GG TCTT", + "CA CCAA", + "GA TC", + "GA CCAA", + "AAAA TTA", + "GTAAA TT", + "CCA GTT", + "CA GAAAA", + "TAA CAAA", + "GG TGTT", + "GAAA TTA", + "TGCC TCA", + "CC GCC", + "CCA TTTT", + "CTT GCC", + "TCTG TA", + "CTG GCA", + "GG GATG", + "CCA TGA", + "CTA CTT", + "TAGG TG", + "TAAAAA TT", + "GAAA GAA", + "TAAAA TA", + "CTTTT TG", + "GTC AAAA", + "GGA CAA", + "TCTGA TT", + "CTC TCTT", + "TAA TTTG", + "CTC TTTG", + "GG CCTT", + "GGA TTTT", + "CTA CTG", + "GTT GCA", + "GG CTCC", + "CTC TGTG", + "CTC CAGCC", + "TTA CAA", + "GGA CCA", + "GGAA GGAA", + "TAAA GAA", + "TTA GAA", + "GTG AAAA", + "CTT GCA", + "TGGG TG", + "GGA GCC", + "CC TCTA", + "C T", + "GG GCTT", + "GG CATG", + "CTG GTT", + "TA CAGA", + "GATT AAA", + "CTC TGTT", + "TTA TCA", + "CTG AAAA", + "GTA GTT", + "GG GTCA", + "G T", + "CA GCCA", + "GC GTC", + "CA CTTA", + "GTG CTA", + "TC TTATT", + "GTA CTT", + "GG TATT", + "TA GAGA", + "TA CATG", + "CCA CTA", + "TGA GAAA", + "CAA TAAA", + "TCC AAAA", + "CGTG AA", + "GG TCTG", + "CTGAA TT", + "TCA GCC", + "CC TCTC", + "GTT AAAA", + "GG GATT", + "TCC TAA", + "CA CTAA", + "GGA GAAA", + "CCTT CCTT", + "GTT TCTT", + "TA TCAA", + "GA TACA", + "TAATCC CAGCA", + "CC GCA", + "TGAAA TT", + "C GTAAA", + "CTC TCTG", + "TC TTTTTT", + "GTA CAA", + "CCAAA TT", + "TGTA TTTT", + "TC GCTT", + "GG GTGA", + "GA TAGA", + "CTT TATT", + "TAAA CAA", + "GTT TATT", + "TGAA TA", + "CTA CCA", + "GTG TCC", + "CC CGA", + "TTTA TTA", + "CTCC AAA", + "TTTTTTTT TTTT", + "TCA TCC", + "GAA GCC", + "CTAAA TT", + "CAAA TTA", + "CCCC AAA", + "TCTT CTT", + "TAGG AAA", + "CA CGA", + "CA TTTTA", + "GTG CAA", + "TCTCC TG", + "TATTTT AA", + "GTT TGTT", + "GA GCCA", + "GG CCAA", + "CATT TCA", + "CA TCCA", + "CC TATA", + "GA CTTA", + "TCAAA TG", + "GTA TCA", + "TAAA TTTT", + "CTGA GGCA", + "GCC CAA", + "GG TTAA", + "TA TCTG", + "TGA CAGA", + "GGA GAGA", + "GCTG CTG", + "CC CTTA", + "TCC TCTG", + "GTA GCA", + "CCTG AAA", + "CC GAA", + "TTTT TAA", + "CTA TAA", + "CCTG TA", + "TTA CTG", + "GTA TAA", + "GG CGA", + "GA CTAA", + "TCA GAAA", + "GTG TGTG", + "CAAA GAA", + "CC TATG", + "GCA GAGA", + "CC GTT", + "TTTTA TTTT", + "GGAA GAA", + "TTA CTA", + "GCC TGGG", + "TCC CTC", + "TCC TCTT", + "GGA TCA", + "GG TCAA", + "TC GAGA", + "TATT CTT", + "TA CTC", + "GTTAA TT", + "GC GAGA", + "CTTAA TT", + "TCC TTTG", + "GTC TAA", + "CA CCCA", + "GG GTTA", + "GG GCAA", + "GGAAA TG", + "GCAAA TT", + "TA GATG", + "GCA GAAA", + "AAAAAAAA AAAAAAAA", + "CC TACA", + "GGA GTA", + "TC TAATT", + "CAA CAAA", + "TA GATT", + "GG TTTA", + "CC TAGA", + "CTT TAAA", + "TA CTTA", + "TAA TGAA", + "CTA TCA", + "TA GTAA", + "CAGA GAA", + "CAA GAAA", + "GGGG AAA", + "CGTT AA", + "CGTG TT", + "TCTG TCTG", + "TTTTAA TT", + "CTG GCC", + "TAAA TGA", + "C GTCAA", + "TTA GTA", + "GTC TCTG", + "TTTT AAAA", + "CA GTTTT", + "CTT CCTT", + "TATA TAA", + "GC TTTTA", + "TTTT TCA", + "GG TC", + "TTA TTAA", + "TTTT GTT", + "CA TAGA", + "TA GGAA", + "GAGA GAA", + "GTA GCTG", + "TTA TGA", + "GTA GTG", + "GGA GAGG", + "CTC TGAA", + "TA GTC", + "GA CTCC", + "TCC CTCC", + "TAA TGTT", + "CA TCTA", + "GCCA CCA", + "GTA CTA", + "TGGG AAA", + "CGCC TT", + "GCC CGG", + "GGA GGAA", + "GTA CCA", + "CGC AAA", + "CA TAAAA", + "TAA CATT", + "GC TAAAA", + "TCTT CTG", + "GCC AAAA", + "GTA TGA", + "GTC TTTG", + "TA CTGA", + "TCC CAGG", + "TTA TTTA", + "TTA GTT", + "GGA CC", + "TA TAAAA", + "CAAA CAA", + "CTT CTC", + "TCTA TCTA", + "GAAA TAA", + "GTG TAA", + "CTT TGTT", + "GA TAAAA", + "GCC CAGG", + "GC GATT", + "AAAAAA TT", + "TA CAGG", + "GG CTAA", + "TA GCTT", + "GTC TCTA", + "CTCC TGA", + "GAA TAAA", + "TTA CCA", + "GG GACA", + "GCCA CTG", + "GTT TAAA", + "GTC TGTG", + "TGA CAAA", + "TACA TTTT", + "GCCA CC", + "TG TTTT", + "TA GCAA", + "TTA TAAA", + "GA CCCA", + "GCA GC", + "CAGA CAGA", + "CA CAAAA", + "GCC CTA", + "TATT AAAA", + "C GTATT", + "CCA TCC", + "TC GATT", + "GAA GGAA", + "GA TCCA", + "TATT TGA", + "GTGAA TT", + "TA CCTT", + "C GTCTT", + "CC TAGG", + "TC GAAA", + "CTT TCTG", + "TGAA GAA", + "TCTC TCA", + "GTC TCTT", + "GGA GGGG", + "GTC TGTT", + "CTA TGA", + "GGAAA TT", + "GCA CACA", + "GCC TTTT", + "CA GTCC", + "CTG GTA", + "GCA TCC", + "TA GTTA", + "GG CTTA", + "GA GTCC", + "TG AAAA", + "TAGA TAGA", + "TGTT TGTT", + "TA CTCA", + "CATT TAA", + "GA TTTTA", + "CA CTCC", + "GAAA CAA", + "GC GCTG", + "TCTT TCA", + "CTG TCC", + "GAA CTCA", + "CGG AAA", + "TATT GTT", + "GCA CTA", + "TATT CAA", + "GC GGGG", + "GTG GCC", + "TAATT AAA", + "TA CTAA", + "GC GGTG", + "TA CCAA", + "GG TATA", + "CTA GTT", + "GCA GAGG", + "CTTTT TTTT", + "TTTTTTTT TTTTTTTT", + "TACA GTA", + "CCA TGTT", + "TA GTGA", + "CGTG TG", + "GC TCTGA", + "CTT CCTG", + "TC GCTG", + "TAAA TCA", + "TCCAA TT", + "GTT TCTG", + "GAA GAGA", + "GG GTAA", + "CCA TAA", + "TTA TATT", + "C GAATT", + "CC GGA", + "TGA GCC", + "CC GTA", + "CAGA GGA", + "GTG TTTG", + "GA CAAAA", + "TTTTTT AAA", + "GTT GCC", + "GA GTTTT", + "TC AAAAAA", + "TGTT TCA", + "TA TCTA", + "TCTC TCC", + "CTC CACA", + "TAAA TATT", + "TTTT CTG", + "CTC TCAA", + "CCTT AAA", + "TCTTTT AA", + "GAA CAAA", + "TTA GCA", + "GCTCA TG", + "TAAA GTA", + "GGA TAA", + "TTATT AAA", + "CTC CATT", + "TCTC TGA", + "TTA TTTG", + "CCTG TAA", + "TTA TATA", + "GA CTTTT", + "TGTT GTT", + "GCAAA TG", + "CTT CAAA", + "GAA TATT", + "GAA TCC", + "CTC TTAA", + "GCA TAA", + "GAA TGAA", + "CTTAA AAA", + "TAAAAA TG", + "TTTTAA AAA", + "CTC TGGG", + "TGA TCC", + "GC TCTCA", + "CTC CAGA", + "GAGTG CAGTG", + "CAA TATT", + "TA GAAAA", + "GTAAA TG", + "TA GCTG", + "GC TCAAA", + "GCA GGAA", + "TA CCTG", + "GG GAAAA", + "TTTT CTA", + "GGGG GGGG", + "CC GA", + "CTT TGAA", + "GGA GGTG", + "TA GTCA", + "GG CCCA", + "TGA TGTT", + "CAAA TAA", + "TCTT CCA", + "GC GCTT", + "GTA TTTG", + "GTC TC", + "GAAA TCA", + "TGA TAAA", + "CATT CTT", + "TA TCCA", + "GCC TCTG", + "TGA GATG", + "C GCCAA", + "GTTTTA TT", + "TATA TATT", + "GTA GGA", + "GACA GAA", + "CTCCAGCC TGGG", + "GC GTGA", + "GG TATG", + "GAGG GAGG", + "TCA TTTG", + "CTA CC", + "TACA GAA", + "GG TAGA", + "GA TCTA", + "GTC CATG", + "TGA GGAA", + "TAA TAAAA", + "TAAA CTT", + "TCA CATT", + "GGA GGCC", + "TCA CAAA", + "CA CTTTT", + "CGG CC", + "CAA CAGA", + "GTA GAGA", + "GTTA TTTT", + "CGTT TG", + "TC GTCA", + "TCTG CTG", + "CAA CACA", + "GG TAGG", + "GCA GCTG", + "TAGTA GAGA", + "CAA GCC", + "GCA TTTG", + "TAA TATG", + "GCTT AAA", + "GCTT CTG", + "CTC TCCA", + "TCA TCTT", + "C GTCTG", + "TCA TTTA", + "CA TAGG", + "GC TCCTT", + "TGTT CTT", + "TACA TTA", + "CACA GAA", + "TAAA TATA", + "TA GAGG", + "GA TAGG", + "TCC TGAA", + "GGA GCTG", + "TGA TATT", + "TCA TTAA", + "CTTTT AAA", + "TC GTTA", + "TAAA CTA", + "GTT TGAA", + "TAAAA TTA", + "CA CCCC", + "TCA GAGA", + "CTCC TGCCTCA", + "TGA CATT", + "GTA TTTA", + "CTT CATT", + "GAAA CTG", + "TAA CACA", + "GTT CAAA", + "GGA GATG", + "TC GGCC", + "CAGCA TT", + "TC GATG", + "TATT CTA", + "CTG TGAA", + "TATT GAA", + "TTTT CCA", + "TATT TCTT", + "GGTG AAA", + "CTGA GAA", + "GCA CAGA", + "GC GAGG", + "CTG TGTG", + "TGAAA TG", + "TGA TGAA", + "GTCC AAA", + "CTCAA TT", + "TCCA GAA", + "GTA TATA", + "TAAA GTT", + "TCTC AAAA", + "TCCA TCA", + "GTC TGAA", + "TGA GAGA", + "TGA TTTG", + "TTA GCC", + "CTC CATG", + "TCC CTGA", + "GA GCTA", + "CCCC CCCC", + "GTG GAAA", + "CTG GGAA", + "CAA TGAA", + "CCA CACA", + "CTT TCAA", + "C GGAGG", + "TC GTGA", + "CCA GAAA", + "GTTTT AAA", + "TGTT GAA", + "TCC TGTG", + "CTAAA TG", + "TCC TTTA", + "GTC TGGG", + "TCTC TTTT", + "TA CGG", + "TATT GTA", + "TTA GTG", + "TTA CC", + "TAATCCCAGCA CTTTG", + "TCTG GAA", + "CTT CTCA", + "CGCA TT", + "TATT TAAA", + "TCA CACA", + "TAA TCAA", + "GC GAAA", + "GG GCCA", + "GTT CATT", + "GAGAA AAA", + "TTTT GTA", + "TA CTTTT", + "TC GAGG", + "GTGAA AAA", + "CAA TATA", + "TCC CATG", + "CAA TTAA", + "CTG GAAA", + "CCCA GCA", + "TCC CATT", + "TCC TGTT", + "CTC TTTA", + "TCC CCTT", + "GTT TCAA", + "GTC CAGG", + "GGAA GGA", + "TA GTTTT", + "TGA CCTT", + "GTGCTG GGATTACAGG", + "TATT TATA", + "TCTG CAA", + "CTGAA AAA", + "TATG TTA", + "CTT CACA", + "GCA CAGG", + "CCTG CTG", + "TTTT TTAA", + "GTTA TTA", + "CC CTTTT", + "TGA TTTA", + "TA CAAAA", + "TAA GTAA", + "TTTT TAAA", + "CA TCTC", + "GTG GTGA", + "GTG GAGA", + "CTC TGCA", + "GTTAA AAA", + "TACA TACA", + "CTT TGTG", + "GGA CACA", + "TCTGA TG", + "TA TTATT", + "TCTT CTA", + "CTG TGTT", + "TCA GCTT", + "CTT TATA", + "GG CGC", + "TCC CTCA", + "GTA CC", + "TGGA GAA", + "CAAAAA TT", + "TCTT TAA", + "CTC TCTC", + "TGA GTGA", + "GCA GCTT", + "CGGA TT", + "TA CGA", + "TCTT GTT", + "TC GTAA", + "GCC TGTG", + "TATT CTG", + "GG GATA", + "GG GTCC", + "TGA GATT", + "CTTTTA TT", + "TCC CACA", + "CATG GTG", + "TTA GGA", + "GAA CACA", + "TCA TAAA", + "CAA CATT", + "GG TCCA", + "GAA TTTG", + "TATTAA TT", + "TCC TGGG", + "GCA GCAA", + "CTC TTCA", + "GAA GAGG", + "TCTG TCA", + "CTGAA TG", + "CCA CAAA", + "GTG GAGG", + "TGA TTAA", + "CTCC CTCC", + "CACACACACACACACA CACACACACACACACA", + "GC GATG", + "CATT CTG", + "GTA GAAA", + "TCA TCAA", + "TTTT CAA", + "TATG TATG", + "CCAAA TG", + "TAA TTTTA", + "TAA GGAA", + "CTT GAAA", + "AAAAAAAA AAAA", + "GC TCCTG", + "GCA GATG", + "GAAAAA TT", + "GA CGC", + "GTG GGGG", + "GTCAA TT", + "CTT GCTT", + "TGA CACA", + "GTG TGTT", + "CCA GAGA", + "CCCA GCC", + "TAAA GAAA", + "GTC CATT", + "TAAA TTAA", + "CC CAAAA", + "GAA TTAA", + "TGAA TTA", + "TTTT TTTG", + "CCA GCTT", + "CAA TTTG", + "CTG TTTG", + "GTC TCAA", + "GTT TGTG", + "GG CATA", + "GG TACA", + "TGA TGTG", + "GATT TCA", + "TCTG CTT", + "GTAA TTA", + "TAA AAAAAA", + "GCC GCC", + "TGTGTGTGTGTGTGTG TGTGTGTGTGTGTGTG", + "GC GTCA", + "GC TCATT", + "GAA CCTG", + "TAAA CAAA", + "GTG CTGA", + "TCA GGAA", + "TCC TCAA", + "TCTA TTTT", + "TCTG TTTT", + "CAGA GCA", + "CCA GGAA", + "GTC TTTA", + "TCTT CAA", + "TCAAAA TT", + "GC TTATT", + "GTT CCTT", + "CA CCTA", + "TCA CTGA", + "GAA GCAA", + "TAAA GA", + "TCC TTCA", + "TCTCA TG", + "TCA GTGA", + "TACA CAA", + "CA CGTG", + "CC TAAAA", + "GCC TTTG", + "GG CTTTT", + "GTT GAAA", + "GTT CTC", + "CTA GA", + "CTA CAAA", + "GCA CAAA", + "TTA CATT", + "GG CCCC", + "TAA TGTG", + "CTG CCTT", + "TCC CAGA", + "GTGAA TG", + "GGA CAGG", + "GGA TGTG", + "GTT TATA", + "TGA CCAA", + "GTG GCTG", + "GTT CTCA", + "CTTA TTTT", + "CTG GAGA", + "TTA CAAA", + "GTC TTCA", + "CAA GAGA", + "CCA TTTG", + "TCA CAGA", + "CTA GTA", + "CA TTATT", + "TTA GA", + "GC TCTCC", + "GC GCCA", + "TATG TTTT", + "TCC TCCA", + "CAGAA AAA", + "GTG GGAA", + "TAA TCTT", + "TGA GTCA", + "CTG CTC", + "GTC TCCA", + "TCA TGTT", + "GTT TCCA", + "TAA GCAA", + "CTAA AAATA", + "TGA CTGA", + "TC GGTT", + "TTA GAAA", + "TAA GCC", + "TAAA GCA", + "CC TCTCC", + "CC TCCTT", + "TCA GATT", + "TATG AAAA", + "GCTGA TG", + "CATA TTTT", + "GC TCCAA", + "CGG CGG", + "CCA CTGA", + "CA GCAAA", + "CTG TCTT", + "CTA GCA", + "TC GGGG", + "CACA GCA", + "GC TGATT", + "CTA GGA", + "TAA CTC", + "TCA TATT", + "CCTT CTT", + "CTG CAAA", + "CC CGC", + "GG TCTA", + "CCCA GGA", + "GTG TCTG", + "TAATAA TAATAA", + "TCA CATG", + "CAA TTTA", + "TATATATATATATATA TATATATATATATATA", + "CCA CAGA", + "TCAA TTTT", + "GTA TTAA", + "GAA CATT", + "TCTC TTA", + "CTA TTTG", + "TCTT TCC", + "GGTT AAA", + "GC TAATT", + "CTG CTGA", + "TA CCTA", + "CAGG GTT", + "TC GCCA", + "CAAAAA TTA", + "CTT CTGA", + "GCA TGTG", + "CTA TTAA", + "GCA CATG", + "CAA CATG", + "TCA TGAA", + "GAA TGTT", + "GG GTTTT", + "CTG CCTG", + "GTC CACA", + "TAAA CA", + "CTC TGGA", + "GA CCCC", + "GG CAAAA", + "TCTG TTA", + "CTA GTG", + "CTA TATA", + "TCA GTCA", + "TAA CTAA", + "GAA GATG", + "GTC TTAA", + "CAA GGAA", + "GTAA AAAA", + "TCC CCTG", + "TC GCAA", + "TCTG CCTG", + "CC TTTTA", + "GTCC CAGCTA", + "TATA TATG", + "TATT GTG", + "TGTG TTTT", + "GC GCAA", + "CACA GTG", + "TAA GATT", + "CTC TGTA", + "GGAGG CTGA", + "GGA CAAA", + "TATTAA AAA", + "TC GTCC", + "TC GGAA", + "CTA TAAA", + "CTT CAGA", + "CTA GAAA", + "CATT CAA", + "CA CGCA", + "CAGGA TT", + "CCA TCTT", + "GTA GCC", + "GAA TTTA", + "CA CGC", + "CAA TCC", + "TGA GCAA", + "GAA GCTG", + "TCAA TTA", + "GAA GTCA", + "CTG CACA", + "CCA CGG", + "GGA TCTT", + "CTCCTGCCTCA GCCTCC", + "TAAA TGAA", + "CC GTC", + "TC GGTG", + "TTTTA TTA", + "GCA GGGG", + "GCA GGTG", + "TCTA TTA", + "TAA CTTA", + "CTAA TTTT", + "CC CGCC", + "TAA TACA", + "GGATT AAA", + "TCTC TCTG", + "GCTT CTT", + "CATT TATT", + "CCA GAGG", + "GGA CAGA", + "GCCAA TT", + "TCC CCAA", + "GTT GATT", + "GAA GAAAA", + "GCA TTTA", + "CTC TAAA", + "CACACACA CACA", + "CC TCAAA", + "TA TAATT", + "CAA TGTT", + "GCC CAGA", + "GTA TATT", + "CTAA AAAA", + "CCA CAGG", + "TAA GAGA", + "TCC TTAA", + "TA TTTTTT", + "GAA TATA", + "GGA TTTG", + "GTG TGAA", + "CTG GCTT", + "GC GGCA", + "TCC GCC", + "GCA TCTT", + "TC TAATA", + "CTG CATT", + "CTC TGCC", + "TCA CTCA", + "TCA GCAA", + "TATTA TG", + "CCA GCTG", + "GA TCTC", + "GCC TCTT", + "CTT CCAA", + "TCC TAAA", + "TCA TCTG", + "CTA TTTA", + "CTG CAGG", + "CAA GCAA", + "GC GGAA", + "GAAA TAAA", + "TAAAA TAA", + "TCA CCTT", + "CCA TGTG", + "GA CCTA", + "CAGA TGA", + "GTG GCTT", + "TTATTA TTATTA", + "TCC CGG", + "TATT TGTT", + "CTG TAAA", + "TCCA TCCA", + "CTG TATA", + "GTT TCTA", + "GTT GCTT", + "CCA TGAA", + "GC TCTTA", + "CTT CATG", + "GTT CCTG", + "GCTG GGA", + "TCA GAGG", + "CATT AAAA", + "TCA GTAA", + "GAA TGTG", + "CTTA TTA", + "GCA CTGA", + "TGA GGTT", + "CA TCAAA", + "CTT CTCC", + "GTT TATG", + "CTT TCCA", + "GTG CCTG", + "GAAA GGA", + "GCA TCTG", + "TA CCCA", + "TAA CAGA", + "AAAAAAAA AAA", + "CTA TGAA", + "CA GTAAA", + "TA GCTA", + "TC GTTTT", + "GTG TCTT", + "GA GCAAA", + "TC TAAAAA", + "GTT CACA", + "GAAA TGA", + "CAAA TGA", + "GCC CTGA", + "GTG TTTA", + "TCA TGTG", + "CATA TTA", + "TCAAAA AAA", + "TAA GTTA", + "TCTC TCTT", + "CCA GTGA", + "CC TCTGA", + "CAA GATG", + "GCC TGTT", + "GTT TGGG", + "CATT CATT", + "GCC CCTG", + "GTT CTGA", + "GC GGCC", + "GC GGTT", + "CAAAA CAAAA", + "TACA TATA", + "GAATT AAA", + "TCAA GAA", + "CTG TATT", + "TTTT TATT", + "GA TTATT", + "TCTAA TG", + "GTT GCTG", + "TGAA TGAA", + "TCA GCTG", + "CTT GATT", + "CAGAA TG", + "CTAA TTA", + "TATAA TG", + "GTTTT GTTTT", + "CCA GCCTG", + "TGA TGGA", + "GCA GATT", + "CTC TATT", + "GCA GTCA", + "TAA GTGA", + "CTA CACA", + "CGCA TG", + "TA GCCA", + "GTG GCTCA", + "CAAA TAAA", + "GTG CTCA", + "TTTT TTTTTT", + "TAA CATG", + "TCCCA GCTA", + "CAAA GTA", + "TCA TATA", + "CAGCA TG", + "TGA TCTT", + "CA TAATT", + "TGTG TTA", + "TTTT GAA", + "TTAA TTA", + "GATA TTA", + "TCA TTCA", + "TGA TATA", + "TGA CTCA", + "GA CGTT", + "TGA CATG", + "GTT GTGA", + "CA TTTTTT", + "GCC TGGA", + "CTA TGTT", + "CTT TGGG", + "GTC TCAAA", + "CTG GCTG", + "CCA CATG", + "GG CGTG", + "CTTAA TG", + "TAA GATG", + "GTA TAAA", + "TGTA TTA", + "TAA CTCA", + "GAGAGAGAGAGAGAGA GAGAGAGAGAGAGAGA", + "GCA TGAA", + "GTTAA TG", + "TCCA GGA", + "GAGA GAAA", + "TCTC TGTG", + "CTC TCTA", + "CCA CCTG", + "GCCA GGA", + "CTG GAGG", + "CCA TTTA", + "GTC TGGA", + "GCC CACA", + "TAGA GAA", + "CAA CTCA", + "GGCA GGA", + "TCTTA TG", + "CAAA GGA", + "GG TAAAA", + "GAGA GGA", + "GTC CAGA", + "GCC CTCA", + "GATA TTTT", + "CAGG GAA", + "CCA CATT", + "GA GGAGG", + "GAAA CTT", + "CA GAATT", + "TCA GATG", + "TATT TCC", + "TACA GTG", + "TGA GCTG", + "CCA TCTG", + "GAGAA TG", + "TCAA CAA", + "A TT", + "TAA CTGA", + "TGA GAGG", + "CA CTGAA", + "CCA CCTT", + "CTG CAGA", + "TCA CCAA", + "TGA GCTT", + "CAAA GCA", + "GG TTTTA", + "CGG GGTT", + "TCCAA AAA", + "TATG TATA", + "CCA GATG", + "TCCA TTTT", + "CTG CTCA", + "GA TAATT", + "CCA CCAA", + "CTCC TCC", + "GA GAATT", + "GAAA GTA", + "TAAAA TAAAA", + "CTT CTTA", + "CTG TTTA", + "GAA TCAA", + "GCA TGTT", + "GCA CGG", + "GA CTGAA", + "GTG CACA", + "GA CGTG", + "TATA CAA", + "TC GACA", + "GAA GACA", + "TAAA GGA", + "GA TCAAA", + "CAGTG TG", + "CTA GCC", + "GAGG AAAA", + "TCTG AAAA", + "GAA CCCA", + "GATG GATG", + "GTT CTTA", + "CTA TATT", + "GCA TTAA", + "TCTCTCTCTCTCTCTC TCTCTCTCTCTCTCTC", + "TCA GTC", + "TATTTT TG", + "GAGGA TT", + "GTA TGTG", + "TAA CCAA", + "GTT GTTTT", + "TTTT TCTT", + "GTG TTAA", + "CTT GGAA", + "AAAAAA TG", + "CAA TGTG", + "GTG CCTT", + "GCC TCAA", + "GA GTCTT", + "GCTAA TTTT", + "CGAA AAA", + "GTG TATA", + "GC GTTA", + "CTGCA CTCCAGCCTGGG", + "GTT CATG", + "CAAA GAAA", + "GCA GTAA", + "GGA TGAA", + "CTT TATG", + "CAGG AAAA", + "TCC TGCA", + "CTG TCTG", + "GAA CATG", + "GGA TGGA", + "GCC TGAA", + "CAAAAA TG", + "TCCAA TG", + "CCA GCAA", + "GG CCTA", + "CAA CTGA", + "GCA CCTG", + "GTC TATT", + "CC TCTCA", + "GTG GTCA", + "GTG TAAA", + "GTA CACA", + "GTAAAA TT", + "GTA CATT", + "TATA TAAA", + "CTG TTAA", + "TAA GTCA", + "GCC TCCA", + "AAATT AAA", + "GTG CAGG", + "TCC TGGA", + "GTG CAAA", + "GC GTCC", + "CCA TTAA", + "GGA GGGA", + "TCA CTTA", + "TCATT AAA", + "CAA CATA", + "TAA TAGA", + "TAA TGTA", + "GA TTTTTT", + "GTT GTCA", + "GGA GACA", + "GTG TGGG", + "TCA CAGG", + "TC GGCA", + "CTCC CTG", + "GA CCAAA", + "TGTT TATT", + "CGAA TG", + "CTCAA TG", + "TCA CCTG", + "CA GTGTT", + "TGA GACA", + "TA GGGG", + "GAAAAA TG", + "GTT GAGA", + "TC GATA", + "CTC GGGAGG", + "GTT GTC", + "CCA GTCA", + "GCC CAGGCTG", + "GAA CAGA", + "GGCTCA CTGCAA", + "GCA GACA", + "TGA GGTG", + "CA CGTT", + "TAA GAAAA", + "CCA GGCA", + "GTA TCTT", + "CTTGG GAGG", + "CTT TCTA", + "CC GCTG", + "GA GCTCA", + "GAGA CAGA", + "CTT CAGG", + "GCA CATT", + "GTA CAAA", + "CTT GTAA", + "GTG GGTG", + "GAA GTGA", + "GG TCTC", + "GTA TGTT", + "GCA CTCA", + "TTA TGTT", + "CAA GTCA", + "CAA GTGA", + "GAAA CTA", + "TAAA TAAAA", + "TCTT AAAA", + "GTT GGAA", + "GTT CTAA", + "CCA CTC", + "CA GTGAA", + "GAAA GG", + "GCA CGA", + "TAA CTTTT", + "GTT GTTA", + "TCA GTTA", + "CGGA TG", + "TATT TGAA", + "CC CTGAA", + "GCC CTC", + "CTT CTAA", + "TTTG TTTT", + "GA GCTGA", + "CTG TGGG", + "CAA GATT", + "GAA GCTT", + "TGA GTAA", + "CTT GCTG", + "GGA TGGG", + "CGTA TG", + "TCCA TTA", + "GTC TGCA", + "GCCA TTTT", + "GTT GTAA", + "CACA CAA", + "GGACTA CAGG", + "C GTTTTA", + "TCTT CC", + "TAA CCTT", + "CTT TAAAA", + "TGAA TTTT", + "CTA CAGA", + "GCAA GAA", + "TAA CAAAA", + "CAATT AAA", + "CCA CTCA", + "CATG GTGAAA", + "CCCA GAA", + "CTA CATT", + "CC GAGG", + "TCCA GTG", + "TGA GTTA", + "GGA GTCA", + "TAA CGA", + "GA GTAAA", + "GA CTCTG", + "GGA GCTT", + "TA CTCC", + "CTG CATG", + "GC TTTTTT", + "GTC TAAA", + "GTG CGG", + "CA TCTCA", + "TGA TCAA", + "GGA GATT", + "GC AAAAAA", + "CA CCAAA", + "TGA CGG", + "CAGA GG", + "GTT GATG", + "CTT GTCA", + "TCCA CCTG", + "GGA GCAA", + "CAA GTAA", + "CCA TAAA", + "GTG CATG", + "GCA TATT", + "GTA GATT", + "GCC TAA", + "CTCAA AAA", + "GGA GAAAA", + "CTA TCC", + "TAATA TTA", + "GTG CTC", + "CAA TATG", + "TGTG GAA", + "TGA CTC", + "GTG TATG", + "TTTTAA TG", + "GC TCTAA", + "CACAA TG", + "CA GCTCA", + "GTT GGTT", + "CTAAAA TT", + "GTC TATG", + "TGTG AAAA", + "CTG GGTT", + "CCCC TCC", + "CC CTCTT", + "GCA GGGA", + "GAAA CCA", + "CATT TCC", + "GCA GCCA", + "TCA TATG", + "GCA GGCA", + "C GTAAAA", + "TGA CCTG", + "CAGA GGTT", + "CTT GTGA", + "TTA TCTT", + "CTG TATG", + "GTCAA TG", + "GGA CGG", + "GC GTAA", + "CAAA CTA", + "TAAA TGTT", + "CTT CGG", + "CTCC CCA", + "TACAA TG", + "TCTG TAA", + "GAA TATG", + "GC GGGA", + "GGA CATT", + "TTA TGAA", + "GGA TGTT", + "GGA CATG", + "TCA GGTG", + "CAA CAAAA", + "GAAA GAGA", + "GTG GATG", + "GG GCTA", + "CCA TCAA", + "CA GCTGA", + "CTC CACC", + "CAA TCAA", + "GTG GTC", + "TGA CAGG", + "CCA TTCA", + "GTCC CTG", + "CAGA CACA", + "GTT GGTG", + "CC TCCTG", + "GAA CTGA", + "TATT CATT", + "GCC CATG", + "CAA TCTT", + "GAAA GCA", + "GAA TCTG", + "TTA TTTTA", + "GTT TGGA", + "TTTT TGTT", + "GGGAA TG", + "GC GACA", + "TAAA CTG", + "CCA TATT", + "GGA TCC", + "CAA GCTT", + "TAAAAAA AAA", + "TCA CTC", + "CA CTGTT", + "TGTTAA TT", + "GGA CTGA", + "GGA GTGA", + "CATA CACA", + "GTT TGTA", + "TCCA GCA", + "GTG CATT", + "GG AAAAAA", + "CCAA GAA", + "TCAA TA", + "CTT CCCA", + "TGA GAAAA", + "GGCC TCCCAAA", + "CAA GCTG", + "GCC CAAA", + "TGA CTTA", + "CA GCCTT", + "CTG GATT", + "TTTT TTTA", + "TCA CGG", + "GCA GTTA", + "TGA CTAA", + "TTA CAGG", + "TGA TATG", + "TAA TTATT", + "TCTT GAA", + "GCC CCTT", + "GTT CAGA", + "CTC TATG", + "CCA TGGA", + "GAGG GAA", + "GGA GGCA", + "CTT TGCA", + "TCTT GG", + "GGA GGTT", + "GCCAA TG", + "CTG GTGA", + "CAA CCAA", + "CCA GTC", + "CTT GAGA", + "TACA GCA", + "CTT GTC", + "GA CGGA", + "CTT CTTTT", + "GTG GC", + "GAGGA TG", + "CAA TAAAA", + "GAAA TTTT", + "AAAA AAAAAA", + "CTC TATA", + "GTA TGAA", + "CTT GTTA", + "TAA CATA", + "CAAA CACA", + "TGATT AAA", + "GCTC TGTT", + "GTG GGTT", + "GTT GGGG", + "GTG TGTA", + "GTAA TTTT", + "GTA TCC", + "TGTGTGTG TGTG", + "TCTT CCTT", + "TCA CTAA", + "TCTCC AAA", + "TA TCAAA", + "TGA TGGG", + "GGA TATT", + "CAAA TTTT", + "GTT CAGG", + "GTG GATT", + "GTG CAGA", + "GCTG CC", + "CTCA GAA", + "GCA GTC", + "GGA TAAA", + "GCC TTCA", + "CCA GGTG", + "TA TCTC", + "CAA TGCA", + "CCCA CTG", + "GTG TATT", + "CGA CAGA", + "TGA GATA", + "CCA GGTT", + "TGTT TAA", + "CATCA TG", + "TGA TTCA", + "GCAA TTA", + "GAAA TGAA", + "CTT GGTT", + "GAA GATT", + "GGA TTAA", + "CC TCATT", + "GGCCA GGCTG", + "GCTA TTA", + "GCCA GCA", + "GAGA CAGG", + "CTT GAGG", + "CA GTCTT", + "GTT CTCC", + "TATT TCAA", + "TGA CGA", + "CATG AAAA", + "CATTA TG", + "TAAA TTTA", + "GA GTGAA", + "CAA CAGG", + "TAA GCTT", + "CACA TTTT", + "GA TCTCA", + "TA GTCC", + "GACC CTG", + "TAA TGCA", + "TAA GTC", + "TAA TAATT", + "GAA GTAA", + "CAA CTC", + "CA TCATT", + "GA CGAA", + "GAAA CAAA", + "TATT TCTG", + "CATTAA TT", + "CCA CCCC", + "TAATA TTTT", + "GTT TAAAA", + "GTA TCTG", + "GTCAA AAA", + "GATG CTG", + "TGTT CTG", + "GG TCAAA", + "GTA GGAA", + "GTA TATG", + "TGA TCTG", + "GGGG CTG", + "GCA TCAA", + "GCCAA AAA", + "CCA CGA", + "GC TAATG", + "CAGA GAAA", + "CCTT CTG", + "TCC TCTA", + "GCA GGTT", + "CTCA CTG", + "TAGA TTA", + "GCC GAGA", + "CCA TCCA", + "CTT TACA", + "GTA CATG", + "GCA CCAA", + "CTT TGTA", + "CTA TGTG", + "TCA CTTTT", + "TGA GTC", + "CAA GAAAA", + "CTGA CTG", + "GTTTT TTTT", + "GCA TAAA", + "TAA TCTG", + "GAA AAAAAA", + "CAGGA TG", + "TGA GCCA", + "GAA TTCA", + "TCA GACA", + "GTT CCAA", + "TCA GGTT", + "CAAA CTG", + "CATT TCTT", + "TGTT AAAA", + "CCA GACA", + "CAA GTTA", + "CATG TTA", + "CATT CTA", + "TCTTTT TG", + "TGA GGGG", + "CACA TTA", + "TAAAA TAAA", + "GCA TATA", + "TGTT CTA", + "GAA GGGG", + "GAGTG TG", + "TAA GACA", + "GAA CTC", + "CCA GTAA", + "GAGA GAGG", + "GC GACC", + "CAA TTCA", + "CGG CTG", + "CCA GATT", + "CCTG GG", + "GGAA GAAA", + "GAGA GG", + "TCAAAA TG", + "CCTCA TG", + "TAAA GG", + "CTT TGGA", + "CCA GGGA", + "GTA CAGA", + "CTGAGGCA GGA", + "TGTT TCTT", + "CCA GGCTG", + "CTGA GG", + "GAGG CTG", + "CTCC TGGG", + "GAA GTC", + "CGA CC", + "GGA CTCA", + "GGA GTC", + "CA CAATT", + "GTG TTCA", + "GA CTAAA", + "GTCA TTA", + "CAAAA TTA", + "TGAA GAAA", + "GCA CCTT", + "GTT TGCA", + "TCC TGCC", + "GTA GATG", + "GCC TGCA", + "GA GTTAA", + "TCC CTTA", + "GTG GTTA", + "TC GGGA", + "TACA TAA", + "TCTC TCCA", + "CA CTAAA", + "TATATATA TATA", + "GTG GCAA", + "CACCA TG", + "TTTG AAAA", + "CACA CTG", + "CTT GGTG", + "TACA CTG", + "CC TCCAA", + "CAA CCTT", + "CA GCCAA", + "TTTT CAAA", + "TGA TAGA", + "TACA CTA", + "TCTG GG", + "TCC CAGCA", + "TAGG AAAA", + "CTT GGGG", + "TC TGTGAA", + "CC TTATT", + "CATT TAAA", + "TTTTA TTTTA", + "GCC CTCC", + "CTGA GCA", + "CC CGTG", + "GTA GTGA", + "TCC TATT", + "GAA GGTG", + "TGTG CTG", + "TCCA CTG", + "TAA TCTA", + "TGA TGTA", + "GTG GTAA", + "TAA TGGA", + "GATG AAAA", + "GTA GTAA", + "GTG GGGA", + "GTG TCAA", + "CAGA CTG", + "TC GAAAA", + "CTCA TTA", + "TAA TAATA", + "CTCA GAAA", + "CA TCCTT", + "CC GCTT", + "GGAA GG", + "CC GTGA", + "CCA CTCC", + "CTA GAGA", + "TAGAA TG", + "GGA TTTA", + "TTAA TTTT", + "GC TAATA", + "TCC CCCA", + "CAAA TATT", + "GA TCATG", + "TCTTAA TT", + "CA GTATT", + "GTCTT GAA", + "CC GAAA", + "CTA TTCA", + "TAA GATA", + "CTT GCAA", + "GCC CCAA", + "TCC CTAA", + "GAA GTTA", + "GA TGATG", + "CTT GATG", + "CC CTAAA", + "CCTG CCTG", + "GACA TTTT", + "CCA GCCA", + "TGTGTGTG TG", + "GTC TATA", + "TCTC TGTT", + "GTC TGTA", + "TA TAATA", + "CTT GTTTT", + "CGC CATT", + "CTCA GCA", + "TACA GTT", + "CAA GAGG", + "GGAA GCA", + "GCC TTTA", + "CC CCATT", + "CAA CGA", + "GTCA TTTT", + "CC CGCA", + "CA GTTAA", + "GAA TCTT", + "CATG TTTT", + "CC GGGG", + "CTA CTGA", + "TCA CGA", + "TAAA TTTG", + "GCC CATT", + "CTC TAGG", + "GGA CCTG", + "TCA GGGA", + "GAGA CTG", + "CC AAAAAA", + "GCC GG", + "CCA GGGG", + "TCA GAAAA", + "CA TCTGA", + "TCTT CAAA", + "CTA CAGG", + "GAGG CAGG", + "CATT GTA", + "TAAA TCAA", + "GA CTCTT", + "CTGA TTA", + "GCA TATG", + "GGA CCTT", + "CAA GACA", + "TATT TATG", + "TATTTT AAA", + "CC GAGA", + "TCA TTTTA", + "CTCA CTCA", + "CCA CCCA", + "CTC TAGA", + "CTA CATG", + "GTG CTTA", + "CAA CCTG", + "TC TGTGTT", + "TAAA TATG", + "CAAA GG", + "CC CTGTT", + "GTT CGG", + "TGA TAAAA", + "CA CGAA", + "GTT GAGG", + "CAGA GTGA", + "GAAA TTAA", + "CACA TA", + "GAA CAGG", + "TCTCC TGA", + "CC TGAGG", + "GGAGG CCAA", + "GTT TACA", + "TAA CAGG", + "TGTG GTG", + "GCCTCC CAAA", + "CCA TCCTG", + "GATT CTT", + "GAA TGGA", + "GTA GTCA", + "CTCC TCTG", + "GAAAGAAA GAAAGAAA", + "CC CTGTG", + "CAGTA TG", + "GC GATA", + "GGA CTC", + "GAAA GA", + "TGTT GG", + "GTA GCTT", + "CA TTTTAA", + "CC CTCTG", + "GCA TTCA", + "CGA TTA", + "TCA CATA", + "TAA TGAAA", + "GGAA TTA", + "CTG TCAA", + "TAAATT AAA", + "CAA GTC", + "GTA TTCA", + "GGCCA TG", + "CTT TAGA", + "TGTT TCC", + "CATG TA", + "GAA TAAAA", + "CAA CTAA", + "TCA TCTA", + "CA CTCTT", + "CA GTTTG", + "CA TAAAAA", + "GCA TGCA", + "GATT TA", + "GAA CCAA", + "TCTG TGA", + "TCA GCCA", + "TCTC CACA", + "TCTCA GCTCA", + "TATCA TG", + "GCA CTTA", + "CGC CAGG", + "CGG GG", + "CATTAA AAA", + "TTTG TTA", + "GGA TATA", + "TC GACC", + "TAA TCCA", + "CC GC", + "CATT GTT", + "CCA GTTA", + "GTA GTTA", + "CTA GGAA", + "CC TAATT", + "TCA TGGG", + "GAA CTAA", + "GCTA TTTT", + "CC GTCA", + "CAGA TTA", + "CCA TATA", + "CAA CTTA", + "TCA GTTTT", + "CTA CCTT", + "GCA CTC", + "GTG TGGA", + "GTG CCAA", + "GACAA TG", + "GA CAATT", + "GTA CCTT", + "TAAA CATT", + "CA GGAGG", + "GTG CGA", + "GAAAA TTA", + "TCTCTT AA", + "CC GATT", + "GA TGATT", + "CCA TGGG", + "TC GGTA", + "CCA TATG", + "CCA GTCC", + "GCC TTAA", + "TGA TCCA", + "GTT GCAA", + "GTA GAGG", + "CAGA TTTT", + "GTA CTTA", + "TCTTTCTT TCTTTCTT", + "GCTC TGTG", + "TCAA TAA", + "GTT TAGA", + "GTT CGA", + "CAA GGTT", + "CTCA TTTT", + "CACA GG", + "CATG CTG", + "GAA CGG", + "TA TAAAAA", + "GAA GGCA", + "GA GCATT", + "TGTT TGTG", + "GCTG TTA", + "GTCA CTG", + "CAAA TGAA", + "GTGA CTG", + "GTT CTTTT", + "CAGGCTG GAGTGCAGTG", + "TGA TGAAA", + "TAA CGG", + "CTA CTAA", + "GACA TTA", + "GGA CGA", + "GAGCA TG", + "GCA TGGG", + "CCA CTTA", + "CTA TCAA", + "GCTG TTTT", + "GTC GTG", + "CCTG GCC", + "TCTC TGAA", + "TGTT GTA", + "CAGC CAGG", + "GTT TAGG", + "CC GCAA", + "GGA GTAA", + "CCAA TTA", + "CAGC AAAA", + "TCA TCCA", + "CA CGTA", + "TCA TAGA", + "TAATT AAAA", + "CA CTTAA", + "TCTT TATT", + "GAGA TTA", + "TAA GAGG", + "CAAA TTAA", + "GA CGCA", + "CA CGGA", + "GTG TGCA", + "TC T", + "TATTA TTA", + "GAAA TATT", + "GGA GTTA", + "TCTT TGA", + "CTGA TTTT", + "TGTGAA TT", + "TCC CACC", + "CC CTTTG", + "CAA GGTG", + "CAGA GTT", + "CCCCA TG", + "CTA CCAA", + "CTCC AAAA", + "CTT CCCC", + "CTG CTAA", + "GATT AAAA", + "GC TTATG", + "CTA CTTA", + "TAAAAAA TT", + "TCA GTCC", + "CTATT AAA", + "GAA TGGG", + "CACA GTA", + "CAA CGG", + "GG TTATT", + "TCA CCCA", + "TGA TGCA", + "TAA TTTTTT", + "GTT TGAGA", + "GTATT AAA", + "GCC CCCA", + "TATA GTA", + "TA GTAAA", + "TGA TACA", + "GTG GTTTT", + "CCA CTAA", + "CACA GAGA", + "CCTCTG CCTCC", + "CAA AAAAAA", + "CTC TCTCC", + "CA TAATA", + "GAA GCCA", + "GTT CCCA", + "TGTG TTTG", + "CAA TGGA", + "TGAA GTA", + "CTT CATA", + "CA CTGTG", + "GC TCTTTT", + "TGA CATA", + "TAAA GAAAA", + "GAGAAA TG", + "CAGG GAGG", + "TGTT CAA", + "GA GCCAA", + "GACA GAGA", + "GG CTGAA", + "CAAA TATA", + "GTG GAAAA", + "TAA GGTT", + "GTGA TTA", + "GGA TCTG", + "GATG TTA", + "GACTA CACA", + "TCC TATA", + "CTG CCAA", + "TCC CGA", + "GTGA TTTT", + "GC GTTTT", + "CAGA GTA", + "GAAA GGAA", + "CA CTTTG", + "CCCC AAAA", + "GCAA CCCA", + "TGCA TTTT", + "TCTA GAA", + "TA CTTTG", + "TGA GGCA", + "CA TCTCC", + "TC GCTA", + "TGA CTTTT", + "GA GCCTG", + "CATT TGTT", + "TCTT TGTT", + "GCAAAA TT", + "CC TGATT", + "GA TAAAAA", + "GA GTGTT", + "TCC TGTA", + "TACA GAAA", + "TC CAGGAA", + "GCCA GTG", + "TAGA TTTT", + "TAA TAGG", + "CTCC TCA", + "CATTTT TG", + "CATT TCAA", + "GCCA TCA", + "TAAAA TATA", + "GA CTGTT", + "GCA TGGA", + "CAAA GTT", + "CA TGATT", + "GA GTTTG", + "CTA GCAA", + "CTT CCTA", + "GG GGAGG", + "CTA TATG", + "TATT TATTTT", + "CA CCATT", + "CC CTCAA", + "TTTTTTTT TTTTTT", + "GA TCATT", + "GTA CATA", + "CTC CATA", + "CCCC GTCTCTA", + "GCC TGCC", + "CTA GCTT", + "CC CGGA", + "GATG TTTT", + "GTA TTTTA", + "TCA GATA", + "CCTG GAA", + "TATT CCA", + "GGA CCAA", + "GCCA TTA", + "CGA CTGA", + "TAA GCTG", + "TAAA CACA", + "GTT TCTC", + "CA TCTTA", + "GAAA TTTG", + "TAA TGGG", + "TAAAA TTTT", + "CTG TTCA", + "CCTG TTA", + "TA CTGAA", + "TGA CCCA", + "TGA TTTTA", + "CTCC TTA", + "TATA GAA", + "CTG CGG", + "GC GGTA", + "GTG CTAA", + "CAGA GGAA", + "TACA TCA", + "TCAA TCAA", + "CTG CAGCC", + "TGAA TATT", + "TCTA CAA", + "CCA CATA", + "CC CGTT", + "TATA CACA", + "TCC TCTC", + "TCTA CTT", + "CC GGAA", + "CTTTT TTA", + "GAAA GAAAA", + "CTA TCTT", + "GA CTTTG", + "TGAA CAA", + "GCA GTTTT", + "GC TAAAAA", + "GAGG CGG", + "TAA TAAAAA", + "CTG GTCA", + "CAGA CAA", + "GGA TATG", + "TGAA GG", + "GCCA GAA", + "CCA GGCC", + "CCA CCATG", + "CAAA CTT", + "TCA TGTA", + "GCTG CTT", + "GTAA TA", + "CCCC CAA", + "CA GCCTG", + "TCAA CTT", + "TAAAA TTAA", + "GCTG AAAA", + "CGA CGA", + "GTG GGCA", + "TGA GGGA", + "CGC TCC", + "TTTT GTTTT", + "GA GTCAA", + "TCA TGCA", + "CTG CTTA", + "TAA GTTTT", + "GTA GCAA", + "CCTT GG", + "TGA CAAAA", + "CTG GTAA", + "TCTT TATA", + "TGTG TGTT", + "CTG GTC", + "CTG GCAA", + "CATT TCTG", + "CTC TACC", + "CTGA GGA", + "CTAAAA TG", + "CTA GATT", + "GTA TCAA", + "CA GTCAA", + "CTG GGTG", + "CC TCTTA", + "TGA GTTTT", + "TTTTA TTTA", + "CC TTTTTT", + "TATA TACA", + "TA GCAAA", + "AAA TTA", + "CTG GATG", + "GA TAATA", + "GA CAAAAA", + "CCTG GGA", + "GCTT TCA", + "GTA CAGG", + "GCTG GAA", + "CTA CTCA", + "CAA TGTA", + "GC GTGAA", + "GA TCCTT", + "TATTAA TG", + "GCC CGA", + "TAAA GTG", + "GCTT CCA", + "CATG GAA", + "TGAA GTT", + "CTT TCTC", + "TCTGTG TG", + "GTA TGTA", + "CAA TACA", + "TCAA GG", + "CC TCTAA", + "TGTG GG", + "GA TCTGA", + "GTA CTGA", + "TTAA TTAA", + "GCA GAAAA", + "CTA CATA", + "CC GGTG", + "GGGG AAAA", + "TACAA AAAA", + "TTTT GG", + "GTGA GAA", + "TCAA TAAA", + "TCAA GTT", + "CTCA GGA", + "CTA CTC", + "CAAA TCA", + "GGCA GAA", + "CC CGAA", + "TGTT GTG", + "GAGC AAAA", + "TATT TGTG", + "GTA GGTT", + "CTA CCTG", + "CA CAAAAA", + "CTCA GG", + "GCTT TA", + "CAGA GCAA", + "CTCA GTG", + "GGAA GAGA", + "TAA CCTG", + "GAAA TATA", + "CGA GAA", + "GTGA GG", + "CATT TATA", + "GGCA GCA", + "TC TAAATT", + "CCCA GTG", + "GCC TAGG", + "TGCA TTA", + "CC GTAA", + "CATT CCA", + "CTA GTTA", + "GA CTTAA", + "CTA TACA", + "GACA CAA", + "TCTT CACA", + "CC GGTT", + "TAAA GTAA", + "CTG TGGA", + "TAA GGTG", + "TCCA GTA", + "CAAA TTTA", + "AAATT AAAA", + "CCA TCTA", + "CTCC CTT", + "CTCC TTTT", + "GAGAGAGA GAGA", + "GGA GATA", + "CCTA TTA", + "CACC AAAA", + "CC GTTA", + "TGTT TATA", + "CTCA GGAGG", + "GA CGTA", + "GTCC TTA", + "GAAA GTT", + "GCTG GTG", + "CTC TACA", + "CAA TAGA", + "TAAAA TATT", + "GTA CCTG", + "GTA CTAA", + "CTT TGAAA", + "CCTT TCC", + "TAAAAA TTA", + "CTC GG", + "CAA GATA", + "CATT TGA", + "CACC TCA", + "GCCA GCC", + "GTC GG", + "GCA CATA", + "CA CTCAA", + "CTTTT AAAA", + "CAGGAA TT", + "GCC TATT", + "TCTT TCTG", + "CTGAGGCA GGAGAA", + "CAGG CAGG", + "CTA GTAA", + "TCCA TA", + "GAA CTTA", + "C G", + "GCTG TGA", + "GAAAA TA", + "TCTT CATT", + "GAGG GAGA", + "CCCA TCC", + "GAGG TGGG", + "GCC TCTA", + "GTA GGTG", + "TAAA CCA", + "GAA GGAAA", + "TATT GG", + "A TG", + "TCCA GTT", + "CCCA CAA", + "GAAA CACA", + "GTC TCAAAA", + "CTTTT CTTTT", + "TGAA GGA", + "TATT GATT", + "CTA TGTA", + "AAAAAAAA AAAAAA", + "TCCTT AAA", + "GC GCTA", + "TCCA CTT", + "GA CTCAA", + "TAAA TACA", + "TCA TGGA", + "TCTG GGA", + "TCC TATG", + "CTG TGCA", + "TCAA GTGA", + "TCA TAAAA", + "CA TCCAA", + "CCTT CCA", + "CTG TACA", + "GAA GGTT", + "CTG TGTA", + "GTCA CTT", + "TCA CAAAA", + "TCA GGCA", + "GTGTT AAA", + "CC CTTAA", + "CAAA GTG", + "GAAA TGTT", + "CTG GGGA", + "GA CGCC", + "TATA TGTG", + "CTA GATG", + "GAAATT AAA", + "GAA TGCA", + "GCA CTAA", + "CGG GAGG", + "GCCA CAA", + "CGC TTA", + "TCCA CAA", + "CAGA TA", + "TC TGAATT", + "TATTA TTTT", + "GC GCGG", + "CTC TGAAA", + "TCTCTT TG", + "TATT TCTA", + "GGGG TGGG", + "GGA TGCA", + "CCA CACC", + "TAAA TGTG", + "TCTT CCTG", + "GCAA GG", + "CTG CTCC", + "CTG GAGTG", + "CTGTT AAA", + "CACA CAAA", + "CTGA CTT", + "GAAAA GAAAA", + "CCTT CTCC", + "GAAA TAAAA", + "CCTCA GGTGA", + "GA TAATG", + "GAATT GCTT", + "CCAAAA TT", + "CGTG AAA", + "CACTG AAA", + "CAGTG AAA", + "GA TCTTA", + "GAGA TGGG", + "TCTG CCA", + "TGA GGTA", + "TATG GAA", + "TATA TTTTA", + "TGAA CTT", + "GCA GATA", + "CTTTT CTT", + "GTAAAA TG", + "TCTC TAA", + "TCTG CAAA", + "GA GCCTT", + "TA TCATT", + "CAA TTTTA", + "CC GCCA", + "TATT TAAAA", + "GAGA GATG", + "GAGA TGGA", + "GCCA GGATG", + "CGA GTAGCTG", + "TTCA TTTT", + "TATA CTT", + "GTC TACA", + "GTGA GTGA", + "GCTA CACA", + "GGGA GGA", + "CAA GGCA", + "GC TTTTAA", + "CA CTATT", + "GTT CATA", + "TCC TC", + "GTG GACA", + "TATT TGGA", + "CTC CAGTA", + "GTT CAGTT", + "CCAA GG", + "CAGA GCC", + "CTC GCC", + "CC GATG", + "GGAA TTTT", + "TCCA GCC", + "CC TCTTTT", + "GAA CCTT", + "CATG CACA", + "GTT TC", + "GAA GATA", + "TA CCCC", + "GCTG CCA", + "GGGG GAGG", + "GCAGTGA GCTGA", + "CTG TCTA", + "CGA GGA", + "CAA TGGG", + "GC TGTGAA", + "GAAA GTG", + "TACC AAAA", + "GTCA GG", + "CAGC TCC", + "TGTG CTT", + "GTC TAGG", + "TTTT TGTA", + "TTA TATG", + "TCA GGGG", + "TATT GTTA", + "CC TGAGA", + "TA TCTCA", + "CAA TCTG", + "CA CTCTG", + "GATT TAA", + "TGAA TAA", + "TCTT GTA", + "TCAA CTG", + "TCTC CAGG", + "CTA GAGG", + "CTGA GAAA", + "CTA GCTG", + "TCCA CCA", + "CGA TTTT", + "CC GGCC", + "GTT GACA", + "CTTA GAA", + "CA TAATG", + "GA GTATT", + "CACA GAAA", + "GA CTGTG", + "CTA TTTTA", + "TGA GGAAA", + "TTATT AAAA", + "CTTA TTTA", + "CAGA CTT", + "CA CGCC", + "GCTT GG", + "CCTG CTT", + "TAAA GCAA", + "CCTC GTGA", + "TA GAATT", + "CTTA CAA", + "TAAA GGAA", + "GTC TAGA", + "GTGA CTT", + "TACA TATG", + "GTCA GGA", + "GCTC CAGG", + "GAA GGGA", + "CA TGATG", + "TCA TCAAA", + "CGTT AAA", + "GTA CTCA", + "CTCC CAA", + "TATA TGTA", + "GGTA TTTT", + "TAA GCCA", + "C GAAATT", + "GTTTG TTTT", + "TCTG TCTT", + "TATA TCA", + "TGTT CATT", + "CAAA CCA", + "TTCA TTA", + "TATT TGTA", + "GATT GAA", + "CTA TAAAA", + "GATTAA TT", + "CCCA CCA", + "TCC TAGG", + "TAAA TGTA", + "CTCTT AAA", + "GCA GTCC", + "GC GGCTG", + "GTC TCGAA", + "TGAA TGA", + "CTG GGGG", + "GTC TCGA", + "GAA CAAAA", + "TGAA TCA", + "TGTATTTT TAGTAGAGA", + "GTTA TTAA", + "TTTTTT AAAA", + "GTCA GTG", + "CCCA TTA", + "CACA GGA", + "TATT CCTT", + "TCTG CCTT", + "CCTG GTG", + "GC GAGC", + "TA CTAAA", + "TACA CAAA", + "CC GTCC", + "GCTT TGTT", + "GCA TCCA", + "CA TCTAA", + "GC TGTGTT", + "GTA GACA", + "GCC TATG", + "TCTT TGTG", + "GATT CTG", + "CGCC CGG", + "GA TGAGA", + "TA TCTGA", + "TGAA TTTG", + "CC TGATG", + "TAAAA CAA", + "CTT TAGG", + "TTTT CCTT", + "TGAA TAAA", + "CGG GGA", + "CAAA CATT", + "GTA TGGA", + "GCTT AAAA", + "TA CCAAA", + "CAAA GAGA", + "CTCC TGCC", + "GTAAAA AAA", + "CACA GCC", + "CCA TGCA", + "TA CAATT", + "CTA GTGA", + "CTGA GTT", + "GAGTG AAA", + "TCTGTT TG", + "CTG TAGG", + "TATAA AAAA", + "GCATT AAA", + "GTC CATA", + "TGTTAA AAA", + "TGTT TGA", + "GAA TAGA", + "CTT CAAAA", + "CTG GACA", + "CTG TAGA", + "CCATT AAA", + "CTA TCTG", + "CACTA TG", + "TTA TCAA", + "TAA GTAAA", + "TAATCCCAGCACTTTG GGAGGCC", + "CCA GAAAA", + "TGAA GCA", + "TCC CTTTT", + "TCA TACA", + "TA CGTT", + "GCC GTG", + "GGAA GTG", + "GG CCAAA", + "GTA CCAA", + "TCTCTA CTAAAAATA", + "CATT GTG", + "TGTG TGA", + "GAAA CAGA", + "CTT GACA", + "GA TGAGG", + "GAGA TTTT", + "CCTT CAA", + "GAA TCTA", + "CTC TCCTT", + "GG CGGA", + "TCTATCTA TCTATCTA", + "CACA CAGA", + "TGTG TGTA", + "CAAA GCC", + "TGTG CCA", + "GTT GAAAA", + "CTC CAGCA", + "TCAA GGA", + "TA GCTCA", + "CGC TGA", + "CCTG AAAA", + "GA CTATT", + "GATT CCA", + "GCTT CTA", + "GTC TGCC", + "CTT GGCA", + "TGTG GTA", + "GCTT TGA", + "GCTC TCTG", + "CTCA CAGA", + "TCTT TAAA", + "CAAA GCAA", + "TA CTTAA", + "GCTT CAA", + "CATT GAA", + "GGA GGAAA", + "CTA TAGA", + "CTGA GGAA", + "CCTG GCA", + "CC CTATT", + "CTC GTG", + "TTA CACA", + "TTA GGAA", + "CTG GTTA", + "GTT GTCC", + "TAATG AAAA", + "TATT TACA", + "GG GAATT", + "GTA GTTTT", + "GCTG CAA", + "CTA CGG", + "GCC GGA", + "CTG GGCA", + "CCTT AAAA", + "GATG GAA", + "TAGATAGA TAGATAGA", + "TATG TAA", + "GTA CGG", + "TATT CAAA", + "GA TCTCC", + "CCTG TTTT", + "TATT GCA", + "GGAAGGAA GGAAGGAA", + "GG TAATT", + "TTA CAGA", + "TCA GC", + "GCAAAA TG", + "GAGA GCA", + "GTA GAAAA", + "CATT TGAA", + "TCTT CTTTT", + "TCC CATA", + "GTTA TTTA", + "CTA TCTA", + "CA TCCTG", + "TCTT GTG", + "TTA TTATT", + "CC CGTC", + "TACTA TG", + "TAAA CATA", + "TAA GGAAA", + "GCTT GTG", + "CTC TAAAA", + "GTTTT AAAA", + "GACA GGA", + "TCC TAGA", + "TCCA CCCA", + "GTT TGAAA", + "CCA TCTCA", + "CTAA GAA", + "GTA TCTA", + "GTGA GGA", + "GCTG GAGG", + "CCTGTAA TCCCAGCTA", + "GCAA CAA", + "CTT TCAAA", + "CAAA TGTT", + "CTT GTCC", + "TCTCAA AAA", + "TATT TATTA", + "TAA GGCA", + "GAGA GGAA", + "TA TGATT", + "GCA TCTA", + "C GTTATT", + "GCC TGTA", + "GTT TCAAA", + "CCTTCCTT CCTTCCTT", + "GG CTTTG", + "GTCA GAA", + "CATG CATG", + "GTCA TTTA", + "CTG GAAAA", + "CTT CGA", + "CCTA TTTT", + "CCAA CAA", + "TCCA TCC", + "TAAA GTTA", + "GTC TCTC", + "TAA TCAAA", + "GATTTT TG", + "GATT TCTT", + "GG GCTGA", + "GCA TGTA", + "CCTG GGTT", + "GAGA CAA", + "GCTG TCA", + "TGA TAGG", + "GGA GACC", + "CC GGCA", + "TAA TCTCA", + "TGAA TTAA", + "TCTG GTG", + "GCC TC", + "GG CGCA", + "CCA GCTA", + "CA GTCTG", + "TGAA CTA", + "GTAA GAA", + "CCTT TCA", + "TCCA TGA", + "CAAA GGAA", + "CTC TC", + "CTC TCTCA", + "CTC CAGC", + "GTA GATA", + "CCCC CTCC", + "GG CGCC", + "TCTG TCC", + "GA CCATT", + "CTT GAAAA", + "TTA TCC", + "TACA TGTG", + "CAAA TTTG", + "TTTT GTG", + "CAGA GTG", + "GTAA TAA", + "GTGA GTG", + "TTTT TCC", + "GG CTCTG", + "GCC CTAA", + "GG CTGTT", + "CC CAATT", + "CAGA GCTT", + "TATAAA TG", + "GA GTCTG", + "TCTTAA AAA", + "GTTTTA TG", + "GA TCCAA", + "GGCC CTG", + "GA TCCTG", + "TCAA GTG", + "GATT CAA", + "CCTC TCTT", + "GAGA CGG", + "CAGA TCA", + "TAAAA GAA", + "CTGA GCAA", + "CCTG CCA", + "CCTT CTA", + "CGC TCA", + "GG CTGTG", + "TGGG AAAA", + "GGA GCCTG", + "CTGA GTG", + "CGTC AAA", + "TCAA GTA", + "CGTAA TT", + "TTA CTTA", + "TATA CTA", + "GG GCAAA", + "CAA CTTTT", + "CTT TGCC", + "GC CAGGAA", + "CACA CTA", + "GCC CAGC", + "TAAATAAA TAAATAAA", + "CTT TCCTT", + "GGGA GAA", + "TATG GTA", + "CGG CCA", + "CCTC TCTG", + "GAAA GCAA", + "CAA GCCA", + "GG CGTT", + "CTC TTTTA", + "TCGGCC TCCCAAA", + "GATT TATT", + "CAA GTCC", + "TA TCTTA", + "GTTCAA GACCA", + "CTCA CACA", + "GAAA TCAA", + "TGA GACC", + "GG GTAAA", + "GCTT GTT", + "GA TTTTAA", + "TTTT TATA", + "CAGA GCTG", + "TC TGTTAA", + "GTAA TTAA", + "TCTT TGAA", + "CTT GCCA", + "TTTT CATT", + "CCA TGTA", + "TCTC GGCTCACTGCAA", + "GGA TTCA", + "TC TATTAA", + "TACA TAAA", + "GATT GATT", + "GGA GAGGA", + "CGC AAAA", + "GGA CTAA", + "TTA TGTG", + "GTCA CTCA", + "GACA GCA", + "CGA GTT", + "GATG GTT", + "GGAA GAGG", + "GCCAA CATGGTGAAA", + "GGA GCCA", + "TGAA CTG", + "CCTC TGTG", + "GTA TAAAA", + "TCC CAGAA", + "CATT TATG", + "GA TTATG", + "TGTT TCTG", + "GAGTG GGTT", + "TACA TATT", + "CTC CAGGA", + "GACA CTG", + "GG TCTCA", + "CC GGGA", + "TGTT TAAA", + "CTCA CCA", + "GGA CTTA", + "GCC CACC", + "CAAA TCAA", + "GAAA TGTG", + "TA GTTAA", + "TCTA TAA", + "TTA GATT", + "GTG TAGG", + "TACTG AAA", + "GCA CCCA", + "GTG GGCTG", + "GAA TGAAA", + "TCTA GTT", + "TCA GGAGA", + "TCCA CTA", + "CTCA GTT", + "TACTT AAA", + "GA CTCCA", + "TCCATT TG", + "CACA GCAA", + "GCTCATG CCTG", + "GGTG CTG", + "GCTT TCTT", + "GTG GCCA", + "TA CGTG", + "GTG CAGTG", + "TGAA GTCA", + "CCTT TAA", + "TCTCAGCTCA CTGCAA", + "GAAA TATG", + "CC TCAAAA", + "GGGG CGG", + "CGA CAA", + "GG TGATG", + "GTCTT AAA", + "CAGAAA TG", + "CGTCA TT", + "CCAA GCA", + "GGA TCAA", + "GTGCTG GGATTA", + "GCTG GCC", + "CGGA GCTT", + "TACA TGA", + "TGTT TGAA", + "TCTC CATT", + "TAA GCAAA", + "CCTT TCTT", + "TA CTGTT", + "TCCA TCTT", + "CTTA CTT", + "CGGA GGTT", + "CAAAA CAA", + "TCA TAGG", + "TTA CTAA", + "CTTA TTTG", + "GAA TGTA", + "CCCCA TGGA", + "TTA CTGA", + "CGG AAAA", + "CTC CAGTG", + "TGTT CCA", + "CAGA TGAA", + "GTT GATA", + "TCC CCCC", + "CATT GCA", + "CTCA GCC", + "CTTA CTG", + "TA TCCTT", + "CTTTTA TG", + "TGAGTA GCTG", + "GACTG AAA", + "CAA TGAAA", + "CGA CTG", + "CTT GGGA", + "GCAA GCA", + "TCA CTCC", + "GATT TGA", + "CATTTT AAA", + "TCAA CTA", + "GTCC AAAA", + "CACC CTG", + "TTA CCTT", + "CAA GGGG", + "TTTT GGA", + "GTTA TTTG", + "GCTA CTG", + "CTGAGGCAGGA GAATG", + "GTGA TGA", + "GTA GTC", + "TAGTA TG", + "GTA TAGA", + "GTG TCTA", + "GCTG CTA", + "TTA GTAA", + "TAAA CATG", + "GTCA CCA", + "CA TCTTTT", + "CATA TAA", + "TCTC TCTA", + "TTTTA TTAA", + "TATT CTAA", + "GAAA TTTA", + "CTT CCCTG", + "TAAA GATG", + "TA CGTA", + "GTT TATTA", + "GAAAA GAA", + "CCCA CCCA", + "CAATT AAAA", + "CC GACA", + "CAAA GTGA", + "CAAA CAAAA", + "GCAA TTTT", + "CGATT AA", + "TTA GAGA", + "CTGA TGA", + "GGA GGAGG", + "GTCC TGGG", + "TCA TGAAA", + "GCAA CCA", + "GTT GGCA", + "GCGG CGG", + "GTCC CCA", + "GTA GGGG", + "GCCA TGTT", + "GTT CGAGA", + "GCC TATA", + "TAAA TTCA", + "GG CCATT", + "GAAAA CAA", + "TGTG TATG", + "GTA CTC", + "TAGG GAA", + "CCTT GAA", + "TC TATTTG", + "GAGG GCA", + "GAAA CTGA", + "TA CGC", + "TA CAAAAA", + "TCA TTATT", + "GGAAAA TT", + "TCAA TATT", + "CC CGTA", + "GGA GAGAA", + "TTA GTTA", + "CTCA GAGA", + "TC GAGC", + "CTA GTCA", + "GATG GCA", + "TGAA CATT", + "CTA TGGG", + "CACA CCA", + "TCAA TTAA", + "GGAA CTG", + "TTA CATG", + "CTT TCATT", + "CAGC TCTG", + "TCTTTT TTTT", + "TAAA TCTT", + "TGA TCTA", + "CATA CAA", + "GC TCAAAA", + "GC TGTGTG", + "TCAA TCA", + "GATT TGAA", + "CCAA GGA", + "GTCC TCA", + "GTG CTCC", + "AAAA TAA", + "GTGA CAA", + "GCTCA CGCCTG", + "CGA CGG", + "TA TCCAA", + "CACA CATG", + "TCTC TCTCC", + "TGTG GTT", + "CTT GGTA", + "TCTG GTT", + "TTTA TAA", + "CTG CTTTT", + "TGTG TCA", + "CACA TCA", + "CC TAATG", + "C GTTTTTT", + "GCTG GCA", + "GA CGTC", + "TATAA TTA", + "TACA GTAA", + "GAAA GTAA", + "GTC TGAAA", + "CCCA TTTT", + "TATA TGA", + "CTT GATA", + "CTT TATTTT", + "CTT TATTA", + "GG CGAA", + "CCA TGCC", + "CCTG CCTT", + "GAAGAA GAAGAA", + "CTGA CTGA", + "GCC CTTA", + "TA TCTAA", + "GTG TTTTA", + "TGTG GCA", + "TATT GTAA", + "GCCA GAAA", + "CCCTG TCTC", + "CACA GGAA", + "AAAA CAA", + "AAAAAAAA AAAAAAA", + "TAA CTCC", + "GCC TAAA", + "CGA GTA", + "TA GTATT", + "GTATTTT TAGTAGAGA", + "GCTG CAGG", + "TATT GAAA", + "CCAGCC TGGG", + "GCTCC AAA", + "TA CGAA", + "GGCC TCC", + "TATA CAAA", + "CATG GCA", + "CATG CAA", + "TACA CCA", + "CTT TACCA", + "TACA GAGA", + "TATT CTTA", + "TATG TCA", + "TCAA GCA", + "TCAA TGA", + "GG CTCTT", + "GGAA GTT", + "TCCA TGTT", + "GCTT TCC", + "TATG TGA", + "GTG TAGA", + "TTTT TAAAA", + "GCTG GAGA", + "GTGA GAGA", + "CCTA GAA", + "CCTCC AAA", + "CCAA TGA", + "CAGG GCA", + "CTA TGCA", + "CTT CACC", + "CTA CAAAA", + "CTCA CC", + "GAGTA TG", + "TA GAAAAA", + "CTTTT GAA", + "TAAA GAGA", + "CATG TCA", + "TCTTTT AAA", + "CACA GTGA", + "GA TCTAA", + "TAA GGTA", + "CATA GAA", + "CGC GCC", + "CAGC TTA", + "TATA GTT", + "CGG GCC", + "TATC CATT", + "TGTTTG TTTT", + "GCTG GCTG", + "TACA GGA", + "CTCC TTTG", + "CAA TCTA", + "CCCC CTG", + "TATA CTG", + "CTGA GCC", + "CGG TTA", + "TGAA GTG", + "GCTT CCTT", + "TTTTA TTTG", + "TA GTGAA", + "CTGA GGTG", + "TCTT CTC", + "GACA GAAA", + "CTGAA CTGAA", + "CCTG GGAA", + "TCC CCAAA", + "TATG TATT", + "GATT TCTG", + "CATT CAAA", + "CACA GTT", + "GCTT GAA", + "GTG GATCA", + "CTGA GTGA", + "TGAA TTTA", + "TCAA CAAA", + "GG TCATT", + "GTAA TTTA", + "GC GACTT", + "CTGA GAGA", + "GTG CCCA", + "CTA GGTT", + "TCC TGAAA", + "GTC CACC", + "TCA CAGAA", + "GC GAAAA", + "GTA TGGG", + "TGAA CAAA", + "TAAA CAAAA", + "CC GTTTT", + "TC TCAATT", + "TCCA GAAA", + "GTAA CAA", + "GCA TTTTA", + "TCTC CATG", + "TTA TAAAA", + "CAGG CAA", + "CTAAAA AAA", + "GTT GGGA", + "TAAA GATT", + "TGAA GAGA", + "CCCC TCA", + "TGTT TATG", + "TCTA CTG", + "CCAA TTTT", + "GGTG GTG", + "GGAA CAA", + "TGTG GGA", + "TCTG CTA", + "GAA CGA", + "GTAA GTA", + "GTT GCCA", + "AAAA TTTT", + "GC GCGA", + "GAAA GATG", + "GTC TCTCA", + "TCCA TCAA", + "GCA GCTA", + "CACA TTTG", + "CTGA CAA", + "TCCA CC", + "GC T", + "CCCA CTT", + "GCA GGTA", + "GAGG CCA", + "TAAA GTCA", + "CTG GATA", + "CGG CAA" + ] + } +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/tokenizer_config.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e926cbe05847fc6887cfc8a947900d32ebe68356 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/tokenizer_config.json @@ -0,0 +1,56 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "cache_dir": null, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "model_max_length": 100, + "pad_token": "[PAD]", + "padding_side": "right", + "sep_token": "[SEP]", + "tokenizer_class": "PreTrainedTokenizerFast", + "trust_remote_code": true, + "unk_token": "[UNK]", + "use_fast": true +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/trainer_state.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bd7d947b54847bb6bdc9c30b395abccc8c867f1a --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/trainer_state.json @@ -0,0 +1,250 @@ +{ + "best_metric": 0.7358749762534129, + "best_model_checkpoint": "genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279", + "epoch": 3.0, + "eval_steps": 100, + "global_step": 3279, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.09, + "learning_rate": 1.8292682926829268e-05, + "loss": 0.6694, + "step": 100 + }, + { + "epoch": 0.18, + "learning_rate": 2.9653290529695024e-05, + "loss": 0.6102, + "step": 200 + }, + { + "epoch": 0.27, + "learning_rate": 2.8690208667736757e-05, + "loss": 0.5807, + "step": 300 + }, + { + "epoch": 0.37, + "learning_rate": 2.772712680577849e-05, + "loss": 0.5773, + "step": 400 + }, + { + "epoch": 0.46, + "learning_rate": 2.6764044943820228e-05, + "loss": 0.5638, + "step": 500 + }, + { + "epoch": 0.55, + "learning_rate": 2.580096308186196e-05, + "loss": 0.5533, + "step": 600 + }, + { + "epoch": 0.64, + "learning_rate": 2.483788121990369e-05, + "loss": 0.5585, + "step": 700 + }, + { + "epoch": 0.73, + "learning_rate": 2.3874799357945425e-05, + "loss": 0.5493, + "step": 800 + }, + { + "epoch": 0.82, + "learning_rate": 2.291171749598716e-05, + "loss": 0.5406, + "step": 900 + }, + { + "epoch": 0.91, + "learning_rate": 2.1948635634028892e-05, + "loss": 0.5343, + "step": 1000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.7294575417715724, + "eval_f1": 0.7291296847420912, + "eval_loss": 0.5247864723205566, + "eval_matthews_correlation": 0.45931696274160566, + "eval_precision": 0.7300894822512383, + "eval_recall": 0.7292282878360884, + "eval_runtime": 7.2288, + "eval_samples_per_second": 2417.545, + "eval_steps_per_second": 18.952, + "step": 1093 + }, + { + "epoch": 1.01, + "learning_rate": 2.0985553772070626e-05, + "loss": 0.5383, + "step": 1100 + }, + { + "epoch": 1.1, + "learning_rate": 2.0022471910112362e-05, + "loss": 0.5082, + "step": 1200 + }, + { + "epoch": 1.19, + "learning_rate": 1.9059390048154096e-05, + "loss": 0.5053, + "step": 1300 + }, + { + "epoch": 1.28, + "learning_rate": 1.8096308186195826e-05, + "loss": 0.5087, + "step": 1400 + }, + { + "epoch": 1.37, + "learning_rate": 1.713322632423756e-05, + "loss": 0.5081, + "step": 1500 + }, + { + "epoch": 1.46, + "learning_rate": 1.6170144462279293e-05, + "loss": 0.5005, + "step": 1600 + }, + { + "epoch": 1.56, + "learning_rate": 1.5207062600321028e-05, + "loss": 0.4988, + "step": 1700 + }, + { + "epoch": 1.65, + "learning_rate": 1.4243980738362762e-05, + "loss": 0.4994, + "step": 1800 + }, + { + "epoch": 1.74, + "learning_rate": 1.3280898876404494e-05, + "loss": 0.4923, + "step": 1900 + }, + { + "epoch": 1.83, + "learning_rate": 1.2317817014446229e-05, + "loss": 0.4969, + "step": 2000 + }, + { + "epoch": 1.92, + "learning_rate": 1.1354735152487962e-05, + "loss": 0.4998, + "step": 2100 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.7334630350194552, + "eval_f1": 0.7331711549902049, + "eval_loss": 0.5253973603248596, + "eval_matthews_correlation": 0.468812256718811, + "eval_precision": 0.7350440286756788, + "eval_recall": 0.7337699592796829, + "eval_runtime": 7.2126, + "eval_samples_per_second": 2422.974, + "eval_steps_per_second": 18.994, + "step": 2186 + }, + { + "epoch": 2.01, + "learning_rate": 1.0391653290529694e-05, + "loss": 0.4906, + "step": 2200 + }, + { + "epoch": 2.1, + "learning_rate": 9.428571428571428e-06, + "loss": 0.4667, + "step": 2300 + }, + { + "epoch": 2.2, + "learning_rate": 8.465489566613163e-06, + "loss": 0.4585, + "step": 2400 + }, + { + "epoch": 2.29, + "learning_rate": 7.502407704654897e-06, + "loss": 0.4587, + "step": 2500 + }, + { + "epoch": 2.38, + "learning_rate": 6.539325842696629e-06, + "loss": 0.4511, + "step": 2600 + }, + { + "epoch": 2.47, + "learning_rate": 5.576243980738363e-06, + "loss": 0.4531, + "step": 2700 + }, + { + "epoch": 2.56, + "learning_rate": 4.613162118780096e-06, + "loss": 0.4514, + "step": 2800 + }, + { + "epoch": 2.65, + "learning_rate": 3.65008025682183e-06, + "loss": 0.452, + "step": 2900 + }, + { + "epoch": 2.74, + "learning_rate": 2.6869983948635634e-06, + "loss": 0.4472, + "step": 3000 + }, + { + "epoch": 2.84, + "learning_rate": 1.7239165329052971e-06, + "loss": 0.45, + "step": 3100 + }, + { + "epoch": 2.93, + "learning_rate": 7.608346709470304e-07, + "loss": 0.4487, + "step": 3200 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.7359235523002976, + "eval_f1": 0.7358749762534129, + "eval_loss": 0.5309367179870605, + "eval_matthews_correlation": 0.4717971985436896, + "eval_precision": 0.7359348554499333, + "eval_recall": 0.7358623486652538, + "eval_runtime": 7.2043, + "eval_samples_per_second": 2425.787, + "eval_steps_per_second": 19.017, + "step": 3279 + } + ], + "logging_steps": 100, + "max_steps": 3279, + "num_train_epochs": 3, + "save_steps": 100, + "total_flos": 2.1553112099736e+16, + "trial_name": null, + "trial_params": null +} diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/training_args.bin b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae615e84e50905f14c778d1f12f461b6a96e7cf6 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32cf507043abacf14daadc252f8e254df28139532f88a4f889921c6c456b5433 +size 5393 diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/results/base5120_human_ocr_ensembl_lr3e-5_wd0.01_wr0.05_ep3_seed42/eval_results.json b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/results/base5120_human_ocr_ensembl_lr3e-5_wd0.01_wr0.05_ep3_seed42/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..25f2b6e3d2a1cc6190a851c31f00c382110a5739 --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/results/base5120_human_ocr_ensembl_lr3e-5_wd0.01_wr0.05_ep3_seed42/eval_results.json @@ -0,0 +1 @@ +{"eval_loss": 0.5353624820709229, "eval_accuracy": 0.7384985122453651, "eval_f1": 0.7384938234495757, "eval_matthews_correlation": 0.4769881305208183, "eval_precision": 0.7384952210112051, "eval_recall": 0.738492909515214, "eval_runtime": 7.2139, "eval_samples_per_second": 2422.552, "eval_steps_per_second": 18.991, "epoch": 3.0} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_re_run.log b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_re_run.log new file mode 100644 index 0000000000000000000000000000000000000000..af7506882d2cab3b7afa24996360c9275162de3a --- /dev/null +++ b/Finetune-GenomicBenchmarks/genomic_bench_DNAbert2_re_run.log @@ -0,0 +1,3042 @@ +nohup: ignoring input +[SKIP] base5120_demo_coding_vs_intergenomic_seqs_lr3e-5_wd0.0_wr0.06_ep3_seed42 +[SKIP] base5120_human_nontata_promoters_lr3e-5_wd0.03_wr0.06_ep10_seed42 +[SKIP] base5120_human_enhancers_cohn_lr3e-5_wd0.0_wr0.03_ep1_seed42 +[SKIP] base5120_human_ocr_ensembl_lr3e-5_wd0.01_wr0.05_ep3_seed42 +[SKIP] base5120_demo_human_or_worm_lr3e-5_wd0.03_wr0.06_ep8_seed42 +[RUN ] base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +wandb: Appending key for api.wandb.ai to your netrc file: /home/nanhuang/.netrc +wandb: Currently logged in as: n5huang (n5huang-uc-san-diego) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/utils/generic.py:441: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + _torch_pytree._register_pytree_node( +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/utils/generic.py:309: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + _torch_pytree._register_pytree_node( +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/utils/generic.py:309: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + _torch_pytree._register_pytree_node( +wandb: setting up run pfwf0zeu +wandb: Tracking run with wandb version 0.23.1 +wandb: Run data is saved locally in /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run vibrant-glitter-37 +wandb: ⭐️ View project at https://wandb.ai/n5huang-uc-san-diego/genomic_bench_DNAbert2 +wandb: 🚀 View run at https://wandb.ai/n5huang-uc-san-diego/genomic_bench_DNAbert2/runs/pfwf0zeu +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 32 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 4 + Total optimization steps = 172 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/172 [00:00&2 + exit 1 +fi + +# export CUDA_VISIBLE_DEVICES=5 +export CUDA_VISIBLE_DEVICES=0, 1, 6, 7 + + +echo "The provided data_path is $data_path" +echo "Output root: $output_root" + +# datasets=( +# demo_human_or_worm +# dummy_mouse_enhancers_ensembl +# human_enhancers_ensembl +# human_nontata_promoters +# demo_coding_vs_intergenomic_seqs +# drosophila_enhancers_stark +# human_enhancers_cohn +# human_ensembl_regulatory +# human_ocr_ensembl +# ) + +for seed in 42; do + for idx in "${!MODELS[@]}"; do + model=${MODELS[$idx]} + tokenizer=${TOKENIZERS[$idx]} + model_name=${MODEL_NAMES[$idx]} + + # for data in demo_coding_vs_intergenomic_seqs human_nontata_promoters human_enhancers_cohn human_ocr_ensembl; do # length ~200 + # run_output_dir=${output_root}/${data}/${model_name} + # mkdir -p "${run_output_dir}" + # echo "Running ${model_name} on ${data}, seed ${seed}, lr ${lr}, output ${run_output_dir}" + # python train.py \ + # --model_name_or_path ${model} \ + # --tokenizer_path ${tokenizer} \ + # --trust_remote_code True \ + # --data_path $data_path/$data/split \ + # --kmer -1 \ + # --run_name ${model_name}_hg38_BPE_${lr}_${data}_seed${seed} \ + # --model_max_length 100 \ + # --per_device_train_batch_size 128 \ + # --per_device_eval_batch_size 128 \ + # --gradient_accumulation_steps 1 \ + # --learning_rate ${lr} \ + # --num_train_epochs 3 \ + # --fp16 \ + # --save_steps 200 \ + # --output_dir ${run_output_dir} \ + # --evaluation_strategy steps \ + # --eval_steps 200 \ + # --warmup_steps 30 \ + # --logging_steps 100000 \ + # --overwrite_output_dir True \ + # --log_level info \ + # --seed ${seed} \ + # --find_unused_parameters False \ + # --project_name ${project_name} + # done + + # for data in demo_human_or_worm drosophila_enhancers_stark dummy_mouse_enhancers_ensembl human_enhancers_ensembl; do # length mostly 2000+ + # run_output_dir=${output_root}/${data}/${model_name} + # mkdir -p "${run_output_dir}" + # echo "Running ${model_name} on ${data}, seed ${seed}, lr ${lr}, output ${run_output_dir}" + # python train.py \ + # --model_name_or_path ${model} \ + # --tokenizer_path ${tokenizer} \ + # --trust_remote_code True \ + # --data_path $data_path/$data/split \ + # --kmer -1 \ + # --run_name ${model_name}_hg38_BPE_${lr}_${data}_seed${seed} \ + # --model_max_length 512 \ + # --per_device_train_batch_size 128 \ + # --per_device_eval_batch_size 128 \ + # --gradient_accumulation_steps 1 \ + # --learning_rate ${lr} \ + # --num_train_epochs 5 \ + # --fp16 \ + # --save_steps 200 \ + # --output_dir ${run_output_dir} \ + # --evaluation_strategy steps \ + # --eval_steps 200 \ + # --warmup_steps 30 \ + # --logging_steps 100000 \ + # --overwrite_output_dir True \ + # --log_level info \ + # --seed ${seed} \ + # --find_unused_parameters False \ + # --project_name ${project_name} + # done + + for data in drosophila_enhancers_stark dummy_mouse_enhancers_ensembl human_enhancers_ensembl; do # length mostly 2000+ + run_output_dir=${output_root}/${data}/${model_name} + mkdir -p "${run_output_dir}" + echo "Running ${model_name} on ${data}, seed ${seed}, lr ${lr}, output ${run_output_dir}" + python train.py \ + --model_name_or_path ${model} \ + --tokenizer_path ${tokenizer} \ + --trust_remote_code True \ + --data_path $data_path/$data/split \ + --kmer -1 \ + --run_name ${model_name}_hg38_BPE_${lr}_${data}_seed${seed} \ + --model_max_lenFinetune-species/super_all/2e-5/base/results/hg38_base_2e-5_human_mouse_superclass_allchr_seed42gth 512 \ + --per_device_train_batch_size 128 \ + --per_device_eval_batch_size 128 \ + --gradient_accumulation_steps 1 \ + --learning_rate ${lr} \ + --num_train_epochs 5 \ + --fp16 \ + --save_steps 200 \ + --output_dir ${run_output_dir} \ + --evaluation_strategy steps \ + --eval_steps 200 \ + --warmup_steps 30 \ + --logging_steps 100000 \ + --overwrite_output_dir True \ + --log_level info \ + --seed ${seed} \ + --find_unused_parameters False \ + --project_name ${project_name} + done + + # for data in human_ensembl_regulatory; do # length ~200-700 + # run_output_dir=${output_root}/${data}/${model_name} +# mkdir -p "${run_output_dir}" +# echo "Running ${model_name} on ${data}, seed ${seed}, lr ${lr}, output ${run_output_dir}" +# python train.py \ +# --model_name_or_path ${model} \ +# --tokenizer_path ${tokenizer} \ +# --trust_remote_code True \ +# --data_path $data_path/$data/split \ +# --kmer -1 \ +# --run_name ${model_name}_hg38_BPE_${lr}_${data}_seed${seed} \ +# --model_max_length 250 \ +# --per_device_train_batch_size 128 \ +# --per_device_eval_batch_size 128 \ +# --gradient_accumulation_steps 1 \ +# --learning_rate ${lr} \ +# --num_train_epochs 8 \ +# --fp16 \ +# --save_steps 200 \ +# --output_dir ${run_output_dir} \ +# --evaluation_strategy steps \ +# --eval_steps 200 \ +# --warmup_steps 30 \ +# --logging_steps 100000 \ +# --overwrite_output_dir True \ +# --log_level info \ +# --seed ${seed} \ +# --find_unused_parameters False \ +# --project_name ${project_name} +# done + +# done +# done +Finetune-species/super_all/2e-5/base/results/hg38_base_2e-5_human_mouse_superclass_allchr_seed42 \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/run_dnabert2_2048.sh b/Finetune-GenomicBenchmarks/run_dnabert2_2048.sh new file mode 100644 index 0000000000000000000000000000000000000000..cbbd31c6d17b09be5e8bec56b1fa849b49364770 --- /dev/null +++ b/Finetune-GenomicBenchmarks/run_dnabert2_2048.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +data_path=$1 +lr=$2 +output_path=$3 +project_name=$4 +vocab=117M + +model=/storage2/fs1/btc/Active/yeli/xiaoxiao.zhou/tokenize/tokenizers/DNAbert2/pretrain/models_2048/model_1/checkpoint-200000 +tokenizer=/storage2/fs1/btc/Active/yeli/xiaoxiao.zhou/tokenize/tokenizers/DNAbert2/hg38_2048/tokenizer.json + +echo "The provided data_path is $data_path" + +datasets=( + demo_human_or_worm + dummy_mouse_enhancers_ensembl + human_enhancers_ensembl + human_nontata_promoters + demo_coding_vs_intergenomic_seqs + drosophila_enhancers_stark + human_enhancers_cohn + human_ensembl_regulatory + human_ocr_ensembl +) + + +for seed in 42 +do + for data in demo_human_or_worm demo_coding_vs_intergenomic_seqs human_nontata_promoters # length all 200, 251 + do + python train.py \ + --model_name_or_path ${model} \ + --tokenizer_path ${tokenizer} \ + --trust_remote_code True \ + --data_path $data_path/$data/split \ + --kmer -1 \ + --run_name hg38_BPE_${lr}_${data}_seed${seed} \ + --model_max_length 100 \ + --per_device_train_batch_size 128 \ + --per_device_eval_batch_size 128 \ + --gradient_accumulation_steps 1 \ + --learning_rate ${lr} \ + --num_train_epochs 3 \ + --fp16 \ + --save_steps 200 \ + --output_dir ${output_path} \ + --evaluation_strategy steps \ + --eval_steps 200 \ + --warmup_steps 30 \ + --logging_steps 100000 \ + --overwrite_output_dir True \ + --log_level info \ + --seed ${seed} \ + --find_unused_parameters False \ + --project_name ${project_name} + done + + for data in drosophila_enhancers_stark dummy_mouse_enhancers_ensembl # length mostly 2000, 3000~4000 + do + python train.py \ + --model_name_or_path ${model} \ + --tokenizer_path ${tokenizer} \ + --trust_remote_code True \ + --data_path $data_path/$data/split \ + --kmer -1 \ + --run_name hg38_BPE_${lr}_${data}_seed${seed} \ + --model_max_length 512 \ + --per_device_train_batch_size 128 \ + --per_device_eval_batch_size 128 \ + --gradient_accumulation_steps 1 \ + --learning_rate ${lr} \ + --num_train_epochs 3 \ + --fp16 \ + --save_steps 200 \ + --output_dir ${output_path} \ + --evaluation_strategy steps \ + --eval_steps 200 \ + --warmup_steps 30 \ + --logging_steps 100000 \ + --overwrite_output_dir True \ + --log_level info \ + --seed ${seed} \ + --find_unused_parameters False \ + --project_name ${project_name} + done + + for data in human_enhancers_ensembl human_enhancers_cohn human_ensembl_regulatory human_ocr_ensembl # length usually 200~700 + do + python train.py \ + --model_name_or_path ${model} \ + --tokenizer_path ${tokenizer} \ + --trust_remote_code True \ + --data_path $data_path/$data/split \ + --kmer -1 \ + --run_name hg38_BPE_${lr}_${data}_seed${seed} \ + --model_max_length 250 \ + --per_device_train_batch_size 128 \ + --per_device_eval_batch_size 128 \ + --gradient_accumulation_steps 1 \ + --learning_rate ${lr} \ + --num_train_epochs 3 \ + --fp16 \ + --save_steps 200 \ + --output_dir ${output_path} \ + --evaluation_strategy steps \ + --eval_steps 200 \ + --warmup_steps 30 \ + --logging_steps 100000 \ + --overwrite_output_dir True \ + --log_level info \ + --seed ${seed} \ + --find_unused_parameters False \ + --project_name ${project_name} + done +done diff --git a/Finetune-GenomicBenchmarks/run_gmb.sh b/Finetune-GenomicBenchmarks/run_gmb.sh new file mode 100644 index 0000000000000000000000000000000000000000..0e46cbd2c91cbb859570ee0b5205e1120179f841 --- /dev/null +++ b/Finetune-GenomicBenchmarks/run_gmb.sh @@ -0,0 +1,145 @@ +#!/bin/bash +set -euo pipefail + +# Usage: +# nohup bash run_gmb.sh \ +# /data/nanhuang/Nan/ft_data \ +# genomic_bench_DNAbert2_output \ +# genomic_bench_DNAbert2 \ +# 7 > genomic_bench_DNAbert2_re_run.log 2>&1 & +# +# Args: +# 1) data_path (e.g., ft_data) +# 2) output_root +# 3) project_name +# 4) gpu_id (optional, default: 0) + +source /data/nanhuang/miniconda3/etc/profile.d/conda.sh +conda activate bpe_v2 + + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +data_path=${1:?"Missing data_path"} +output_path=${2:?"Missing output_path"} +project_name=${3:?"Missing project_name"} +gpu_id=${4:-0} + +export CUDA_VISIBLE_DEVICES="${gpu_id}" + +BEST_PARAMS_CSV="/data/nanhuang/Nan/best_params_len2_5120_by_task.csv" + +MODEL="/data/nanhuang/Nan/models/DNAbert2_Pretrained" +TOKENIZER="/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json" +MODEL_NAME="DNAbert2_Pretrained" + + + +if [[ ! -d "${data_path}" && -d "${SCRIPT_DIR}/${data_path}" ]]; then + data_path="${SCRIPT_DIR}/${data_path}" +fi + +if [[ ! -d "${data_path}" ]]; then + echo "data_path does not exist: ${data_path}" >&2 + exit 1 +fi + +declare -A TASK_LR +declare -A TASK_WD +declare -A TASK_WR +declare -A TASK_EP +declare -A TASK_SEED + +while IFS=, read -r benchmark task metric best_score lr weight_decay warmup_ratio num_train_epochs selected_epoch seed run_name; do + [[ "${benchmark}" == "benchmark" ]] && continue + [[ "${benchmark}" != "GENOMIC_BENCH" ]] && continue + + TASK_LR["${task}"]="${lr}" + TASK_WD["${task}"]="${weight_decay}" + TASK_WR["${task}"]="${warmup_ratio}" + TASK_EP["${task}"]="${selected_epoch}" + TASK_SEED["${task}"]="${seed}" +done < "${BEST_PARAMS_CSV}" + +run_task() { + local task="$1" + local model_max_length="$2" + + local split_dir="${data_path}/${task}/split" + local train_csv="${split_dir}/train.csv" + + if [[ ! -f "${train_csv}" ]]; then + echo "[WARN] Missing ${train_csv}, skip ${task}" + return + fi + + local best_lr="${TASK_LR[$task]}" + local best_wd="${TASK_WD[$task]}" + local best_wr="${TASK_WR[$task]}" + local best_ep="${TASK_EP[$task]}" + local best_seed="${TASK_SEED[$task]}" + + if [[ -z "${best_lr:-}" ]]; then + echo "[WARN] No best params found in CSV for task ${task}, skip" + return + fi + + hp_tag="lr${best_lr}_wd${best_wd}_wr${best_wr}_ep${best_ep}_seed${best_seed}" + run_name="base5120_${task}_${hp_tag}" + run_output_dir="${output_path}/${task}/${MODEL_NAME}/${hp_tag}" + result_json="${run_output_dir}/results/${run_name}/eval_results.json" + + if [[ -f "${result_json}" ]]; then + echo "[SKIP] ${run_name}" + return + fi + + mkdir -p "${run_output_dir}" + echo "[RUN ] ${run_name}" + + cmd=( + python /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + --model_name_or_path "${MODEL}" + --tokenizer_path "${TOKENIZER}" + --trust_remote_code True + --data_path "${split_dir}" + --kmer -1 + --run_name "${run_name}" + --model_max_length "${model_max_length}" + --per_device_train_batch_size 32 + --per_device_eval_batch_size 32 + --gradient_accumulation_steps 4 + --learning_rate "${best_lr}" + --weight_decay "${best_wd}" + --num_train_epochs "${best_ep}" + --lr_scheduler_type linear + --warmup_steps 0 + --warmup_ratio "${best_wr}" + --fp16 + --output_dir "${run_output_dir}" + --evaluation_strategy epoch + --save_strategy epoch + --load_best_model_at_end True + --metric_for_best_model eval_f1 + --greater_is_better True + --save_total_limit 1 + --logging_steps 100 + --overwrite_output_dir True + --log_level info + --seed "${best_seed}" + --find_unused_parameters False + --project_name "${project_name}" + ) + "${cmd[@]}" +} + +# Keep the same active tasks as run_dnabert2_1024_multi.sh +for task in demo_coding_vs_intergenomic_seqs human_nontata_promoters human_enhancers_cohn human_ocr_ensembl; do + run_task "${task}" 100 +done + +for task in demo_human_or_worm drosophila_enhancers_stark dummy_mouse_enhancers_ensembl human_enhancers_ensembl; do + run_task "${task}" 512 +done + +run_task "human_ensembl_regulatory" 250 diff --git a/Finetune-GenomicBenchmarks/tokenization_dna.py b/Finetune-GenomicBenchmarks/tokenization_dna.py new file mode 100644 index 0000000000000000000000000000000000000000..e28717404d9ffded8e243dfc9e4dd5595c6aefce --- /dev/null +++ b/Finetune-GenomicBenchmarks/tokenization_dna.py @@ -0,0 +1,394 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes.""" + + +import collections +import logging +import os +import math +import unicodedata + + +from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast + + +logger = logging.getLogger(__name__) + +VOCAB_FILES_NAMES = {"vocab_file": os.getenv("VOCAB_NAME")} + +PRETRAINED_VOCAB_FILES_MAP = {"vocab_file": { + 'dna' : os.getenv("VOCAB_PATH") + } + } + + +PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {'dna': os.getenv("POSITIONAL_EMBEDDINGS_SIZE")} +PRETRAINED_INIT_CONFIGURATION = {'dna': {"do_lower_case": False}} + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + with open(vocab_file, "r", encoding="utf-8") as reader: + tokens = reader.readlines() + for index, token in enumerate(tokens): + token = token.rstrip("\n") + vocab[token] = index + return vocab + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a piece of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class DNATokenizer(PreTrainedTokenizer): + r""" + Constructs a BertTokenizer. + :class:`~transformers.BertTokenizer` runs end-to-end tokenization: punctuation splitting + wordpiece + + Args: + vocab_file: Path to a one-wordpiece-per-line vocabulary file + do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True + do_basic_tokenize: Whether to do basic tokenization before wordpiece. + max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the + minimum of this value (if specified) and the underlying BERT model's sequence length. + never_split: List of tokens which will never be split during tokenization. Only has an effect when + do_basic_tokenize=True + """ + + vocab_files_names = VOCAB_FILES_NAMES + pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP + pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION + max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES + + + def __init__( + self, + vocab_file, + do_lower_case=False, + never_split=None, + unk_token="[UNK]", + sep_token="[SEP]", + pad_token="[PAD]", + cls_token="[CLS]", + mask_token="[MASK]", + tokenize_chinese_chars=True, + **kwargs + ): + """Constructs a BertTokenizer. + + Args: + **vocab_file**: Path to a one-wordpiece-per-line vocabulary file + **do_lower_case**: (`optional`) boolean (default True) + Whether to lower case the input + Only has an effect when do_basic_tokenize=True + **do_basic_tokenize**: (`optional`) boolean (default True) + Whether to do basic tokenization before wordpiece. + **never_split**: (`optional`) list of string + List of tokens which will never be split during tokenization. + Only has an effect when do_basic_tokenize=True + **tokenize_chinese_chars**: (`optional`) boolean (default True) + Whether to tokenize Chinese characters. + This should likely be deactivated for Japanese: + see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328 + """ + super().__init__( + unk_token=unk_token, + sep_token=sep_token, + pad_token=pad_token, + cls_token=cls_token, + mask_token=mask_token, + **kwargs, + ) + self.max_len_single_sentence = self.max_len - 2 # take into account special tokens + self.max_len_sentences_pair = self.max_len - 3 # take into account special tokens + + if not os.path.isfile(vocab_file): + raise ValueError( + "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) + ) + self.vocab = load_vocab(vocab_file) + self.kmer = VOCAB_KMER[str(len(self.vocab))] + self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) + self.basic_tokenizer = BasicTokenizer( + do_lower_case=do_lower_case, never_split=never_split, tokenize_chinese_chars=tokenize_chinese_chars + ) + + @property + def vocab_size(self): + return len(self.vocab) + + def _tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text, never_split=self.all_special_tokens): + split_tokens.append(token) + # print(split_tokens) + return split_tokens + + def _convert_token_to_id(self, token): + """ Converts a token (str) in an id using the vocab. """ + return self.vocab.get(token, self.vocab.get(self.unk_token)) + + def _convert_id_to_token(self, index): + """Converts an index (integer) in a token (str) using the vocab.""" + return self.ids_to_tokens.get(index, self.unk_token) + + def convert_tokens_to_string(self, tokens): + """ Converts a sequence of tokens (string) in a single string. """ + out_string = " ".join(tokens).replace(" ##", "").strip() + return out_string + + def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): + """ + Build model inputs from a sequence or a pair of sequence for sequence classification tasks + by concatenating and adding special tokens. + A BERT sequence has the following format: + single sequence: [CLS] X [SEP] + pair of sequences: [CLS] A [SEP] B [SEP] + """ + cls = [self.cls_token_id] + sep = [self.sep_token_id] + + if token_ids_1 is None: + if len(token_ids_0) < 510: + return cls + token_ids_0 + sep + else: + output = [] + num_pieces = int(len(token_ids_0)//510) + 1 + for i in range(num_pieces): + output.extend(cls + token_ids_0[510*i:min(len(token_ids_0), 510*(i+1))] + sep) + return output + + return cls + token_ids_0 + sep + token_ids_1 + sep + + def get_special_tokens_mask(self, token_ids_0, token_ids_1=None, already_has_special_tokens=False): + """ + Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding + special tokens using the tokenizer ``prepare_for_model`` or ``encode_plus`` methods. + + Args: + token_ids_0: list of ids (must not contain special tokens) + token_ids_1: Optional list of ids (must not contain special tokens), necessary when fetching sequence ids + for sequence pairs + already_has_special_tokens: (default False) Set to True if the token list is already formated with + special tokens for the model + + Returns: + A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. + """ + + if already_has_special_tokens: + if token_ids_1 is not None: + raise ValueError( + "You should not supply a second sequence if the provided sequence of " + "ids is already formated with special tokens for the model." + ) + return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) + + if token_ids_1 is not None: + return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1] + + if len(token_ids_0) < 510: + return [1] + ([0] * len(token_ids_0)) + [1] + else: + output = [] + num_pieces = int(len(token_ids_0)//510) + 1 + for i in range(num_pieces): + output.extend([1] + ([0] * (min(len(token_ids_0), 510*(i+1))-510*i)) + [1]) + return output + return [1] + ([0] * len(token_ids_0)) + [1] + + def create_token_type_ids_from_sequences(self, token_ids_0, token_ids_1=None): + """ + Creates a mask from the two sequences passed to be used in a sequence-pair classification task. + A BERT sequence pair mask has the following format: + 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 + | first sequence | second sequence + + if token_ids_1 is None, only returns the first portion of the mask (0's). + """ + sep = [self.sep_token_id] + cls = [self.cls_token_id] + if token_ids_1 is None: + if len(token_ids_0) < 510: + return len(cls + token_ids_0 + sep) * [0] + else: + num_pieces = int(len(token_ids_0)//510) + 1 + return (len(cls + token_ids_0 + sep) + 2*(num_pieces-1)) * [0] + return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1] + + def save_vocabulary(self, vocab_path): + """Save the tokenizer vocabulary to a directory or file.""" + index = 0 + if os.path.isdir(vocab_path): + vocab_file = os.path.join(vocab_path, VOCAB_FILES_NAMES["vocab_file"]) + else: + vocab_file = vocab_path + with open(vocab_file, "w", encoding="utf-8") as writer: + for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): + if index != token_index: + logger.warning( + "Saving vocabulary to {}: vocabulary indices are not consecutive." + " Please check that the vocabulary is not corrupted!".format(vocab_file) + ) + index = token_index + writer.write(token + "\n") + index += 1 + return (vocab_file,) + + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=False, never_split=None, tokenize_chinese_chars=True): + """ Constructs a BasicTokenizer. + + Args: + **do_lower_case**: Whether to lower case the input. + **never_split**: (`optional`) list of str + Kept for backward compatibility purposes. + Now implemented directly at the base class level (see :func:`PreTrainedTokenizer.tokenize`) + List of token not to split. + **tokenize_chinese_chars**: (`optional`) boolean (default True) + Whether to tokenize Chinese characters. + This should likely be deactivated for Japanese: + see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328 + """ + if never_split is None: + never_split = [] + self.do_lower_case = do_lower_case + self.never_split = never_split + self.tokenize_chinese_chars = tokenize_chinese_chars + + def tokenize(self, text, never_split=None): + """ Basic Tokenization of a piece of text. + Split on "white spaces" only, for sub-word tokenization, see WordPieceTokenizer. + + Args: + **never_split**: (`optional`) list of str + Kept for backward compatibility purposes. + Now implemented directly at the base class level (see :func:`PreTrainedTokenizer.tokenize`) + List of token not to split. + """ + never_split = self.never_split + (never_split if never_split is not None else []) + text = self._clean_text(text) + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if token not in never_split: + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token, never_split)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text, never_split=None): + """Splits punctuation on a piece of text.""" + if never_split is not None and text in never_split: + return [text] + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xFFFD or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat.startswith("C"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if (cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/Finetune-GenomicBenchmarks/tokenization_motif.py b/Finetune-GenomicBenchmarks/tokenization_motif.py new file mode 100644 index 0000000000000000000000000000000000000000..61a20a9f299a6bed526d44f98809d125025cec21 --- /dev/null +++ b/Finetune-GenomicBenchmarks/tokenization_motif.py @@ -0,0 +1,406 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes.""" + + +import collections +import logging +import os +import math +import unicodedata +import json + +from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast + + +logger = logging.getLogger(__name__) + +VOCAB_FILES_NAMES = {"vocab_file": os.getenv("VOCAB_NAME")} + +PRETRAINED_VOCAB_FILES_MAP = {"vocab_file": { + 'motif' : os.getenv("VOCAB_PATH") + } + } + +PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {'motif': os.getenv("POSITIONAL_EMBEDDINGS_SIZE")} +PRETRAINED_INIT_CONFIGURATION = {'motif': {"do_lower_case": False}} + + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + with open(vocab_file, "r", encoding="utf-8") as reader: + tokens = reader.readlines() + for index, token in enumerate(tokens): + token = token.rstrip("\n") + vocab[token] = index + return vocab + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a piece of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class MotifTokenizer(PreTrainedTokenizer): + r""" + Constructs a BertTokenizer. + :class:`~transformers.BertTokenizer` runs end-to-end tokenization: punctuation splitting + wordpiece + + Args: + vocab_file: Path to a one-wordpiece-per-line vocabulary file + do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True + do_basic_tokenize: Whether to do basic tokenization before wordpiece. + max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the + minimum of this value (if specified) and the underlying BERT model's sequence length. + never_split: List of tokens which will never be split during tokenization. Only has an effect when + do_basic_tokenize=True + """ + vocab_files_names = VOCAB_FILES_NAMES + pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP + pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION + max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES + + def __init__( + self, + vocab_file, + do_lower_case=False, + never_split=None, + unk_token="[UNK]", + sep_token="[SEP]", + pad_token="[PAD]", + cls_token="[CLS]", + mask_token="[MASK]", + tokenize_chinese_chars=False, + additional_special_tokens = None, + **kwargs + ): + """Constructs a BertTokenizer. + Args: + **vocab_file**: Path to a one-wordpiece-per-line vocabulary file + **do_lower_case**: (`optional`) boolean (default True) + Whether to lower case the input + Only has an effect when do_basic_tokenize=True + **do_basic_tokenize**: (`optional`) boolean (default True) + Whether to do basic tokenization before wordpiece. + **never_split**: (`optional`) list of string + List of tokens which will never be split during tokenization. + Only has an effect when do_basic_tokenize=True + **tokenize_chinese_chars**: (`optional`) boolean (default True) + Whether to tokenize Chinese characters. + This should likely be deactivated for Japanese: + see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328 + """ + super().__init__( + unk_token=unk_token, + sep_token=sep_token, + pad_token=pad_token, + cls_token=cls_token, + mask_token=mask_token, + **kwargs, + ) + self.vocab = load_vocab(vocab_file) + self.max_len_single_sentence = self.model_max_length - 2 # take into account special tokens + self.max_len_sentences_pair = self.model_max_length - 3 # take into account special tokens + if not os.path.isfile(vocab_file): + raise ValueError( + "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) + ) + self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) + self._additional_special_tokens = additional_special_tokens or [] + self.basic_tokenizer = BasicTokenizer( + do_lower_case=do_lower_case, never_split=never_split, tokenize_chinese_chars=tokenize_chinese_chars + ) + + def add_tokens(self, new_tokens): + """Method to add custom tokens to the tokenizer.""" + self._additional_special_tokens.extend(new_tokens) + self._additional_special_tokens = list(set(self._additional_special_tokens)) # Remove duplicates + print(f"Custom tokens added: {new_tokens}") + print(f"Updated additional_special_tokens: {self._additional_special_tokens}") + + @property + def all_special_tokens(self): + """ List all the special tokens ('', ''...) mapped to class attributes + (cls_token, unk_token...) and custom special tokens (additional_special_tokens). + """ + set_attr = self.special_tokens_map + all_toks = [] + + # Add standard special tokens + for attr_value in set_attr.values(): + all_toks += (list(attr_value) if isinstance(attr_value, (list, tuple)) else [attr_value]) + + # Add custom special tokens + all_toks += self._additional_special_tokens + + # Remove duplicates by converting to a set and back to a list + all_toks = list(set(all_toks)) + + return all_toks + + @property + def vocab_size(self): + return len(self.vocab) + + def _tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text, never_split=self.all_special_tokens): + split_tokens.append(token) + # print(split_tokens) + return split_tokens + def _convert_token_to_id(self, token): + """ Converts a token (str) in an id using the vocab. """ + return self.vocab.get(token, self.vocab.get(self.unk_token)) + def _convert_id_to_token(self, index): + """Converts an index (integer) in a token (str) using the vocab.""" + return self.ids_to_tokens.get(index, self.unk_token) + def convert_tokens_to_string(self, tokens): + """ Converts a sequence of tokens (string) in a single string. """ + out_string = " ".join(tokens).replace(" ##", "").strip() + return out_string + def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): + """ + Build model inputs from a sequence or a pair of sequence for sequence classification tasks + by concatenating and adding special tokens. + A BERT sequence has the following format: + single sequence: [CLS] X [SEP] + pair of sequences: [CLS] A [SEP] B [SEP] + """ + cls = [self.cls_token_id] + sep = [self.sep_token_id] + if token_ids_1 is None: + if len(token_ids_0) < 510: + return cls + token_ids_0 + sep + else: + output = [] + num_pieces = int(len(token_ids_0)//510) + 1 + for i in range(num_pieces): + output.extend(cls + token_ids_0[510*i:min(len(token_ids_0), 510*(i+1))] + sep) + return output + return cls + token_ids_0 + sep + token_ids_1 + sep + def get_special_tokens_mask(self, token_ids_0, token_ids_1=None, already_has_special_tokens=False): + """ + Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding + special tokens using the tokenizer ``prepare_for_model`` or ``encode_plus`` methods. + + Args: + token_ids_0: list of ids (must not contain special tokens) + token_ids_1: Optional list of ids (must not contain special tokens), necessary when fetching sequence ids + for sequence pairs + already_has_special_tokens: (default False) Set to True if the token list is already formated with + special tokens for the model + + Returns: + A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. + """ + if already_has_special_tokens: + if token_ids_1 is not None: + raise ValueError( + "You should not supply a second sequence if the provided sequence of " + "ids is already formated with special tokens for the model." + ) + return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) + if token_ids_1 is not None: + return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1] + if len(token_ids_0) < 510: + return [1] + ([0] * len(token_ids_0)) + [1] + else: + output = [] + num_pieces = int(len(token_ids_0)//510) + 1 + for i in range(num_pieces): + output.extend([1] + ([0] * (min(len(token_ids_0), 510*(i+1))-510*i)) + [1]) + return output + return [1] + ([0] * len(token_ids_0)) + [1] + def create_token_type_ids_from_sequences(self, token_ids_0, token_ids_1=None): + """ + Creates a mask from the two sequences passed to be used in a sequence-pair classification task. + A BERT sequence pair mask has the following format: + 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 + | first sequence | second sequence + + if token_ids_1 is None, only returns the first portion of the mask (0's). + """ + sep = [self.sep_token_id] + cls = [self.cls_token_id] + if token_ids_1 is None: + if len(token_ids_0) < 510: + return len(cls + token_ids_0 + sep) * [0] + else: + num_pieces = int(len(token_ids_0)//510) + 1 + return (len(cls + token_ids_0 + sep) + 2*(num_pieces-1)) * [0] + return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1] + def save_vocabulary(self, vocab_path, filename_prefix=None): + """Save the tokenizer vocabulary to a directory or file.""" + index = 0 + if os.path.isdir(vocab_path): + vocab_file = os.path.join(vocab_path, VOCAB_FILES_NAMES["vocab_file"]) + else: + vocab_file = vocab_path + with open(vocab_file, "w", encoding="utf-8") as writer: + for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): + if index != token_index: + logger.warning( + "Saving vocabulary to {}: vocabulary indices are not consecutive." + " Please check that the vocabulary is not corrupted!".format(vocab_file) + ) + index = token_index + writer.write(token + "\n") + index += 1 + return (vocab_file,) + + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=False, never_split=None, tokenize_chinese_chars=True): + """ Constructs a BasicTokenizer. + + Args: + **do_lower_case**: Whether to lower case the input. + **never_split**: (`optional`) list of str + Kept for backward compatibility purposes. + Now implemented directly at the base class level (see :func:`PreTrainedTokenizer.tokenize`) + List of token not to split. + **tokenize_chinese_chars**: (`optional`) boolean (default True) + Whether to tokenize Chinese characters. + This should likely be deactivated for Japanese: + see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328 + """ + if never_split is None: + never_split = [] + self.do_lower_case = do_lower_case + self.never_split = never_split + self.tokenize_chinese_chars = tokenize_chinese_chars + + def tokenize(self, text, never_split=None): + """ Basic Tokenization of a piece of text. + Split on "white spaces" only, for sub-word tokenization, see WordPieceTokenizer. + + Args: + **never_split**: (`optional`) list of str + Kept for backward compatibility purposes. + Now implemented directly at the base class level (see :func:`PreTrainedTokenizer.tokenize`) + List of token not to split. + """ + never_split = self.never_split + (never_split if never_split is not None else []) + text = self._clean_text(text) + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if token not in never_split: + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token, never_split)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text, never_split=None): + """Splits punctuation on a piece of text.""" + if never_split is not None and text in never_split: + return [text] + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xFFFD or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat.startswith("C"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if (cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/Finetune-GenomicBenchmarks/tokenize_dnabert2.py b/Finetune-GenomicBenchmarks/tokenize_dnabert2.py new file mode 100644 index 0000000000000000000000000000000000000000..081fc3b6cda8c66524d39d7796d3dc2253f6832d --- /dev/null +++ b/Finetune-GenomicBenchmarks/tokenize_dnabert2.py @@ -0,0 +1,136 @@ +import os +import sys +import numpy as np +import pandas as pd +from os.path import join + +import argparse +import glob +import logging +import os +import pickle +import random +import re +import shutil +from typing import Dict, List, Tuple +from copy import deepcopy +from multiprocessing import Pool + +import numpy as np +import torch +from torch.nn.utils.rnn import pad_sequence +from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler +from torch.utils.data.distributed import DistributedSampler +from tqdm import tqdm, trange +import collections +import itertools + +from transformers import ( + WEIGHTS_NAME, + AdamW, + BertConfig, + BertForMaskedLM, + BertTokenizer, + CamembertConfig, + CamembertForMaskedLM, + CamembertTokenizer, + DistilBertConfig, + DistilBertForMaskedLM, + DistilBertTokenizer, + GPT2Config, + GPT2LMHeadModel, + GPT2Tokenizer, + OpenAIGPTConfig, + OpenAIGPTLMHeadModel, + OpenAIGPTTokenizer, + PreTrainedModel, + PreTrainedTokenizer, + RobertaConfig, + RobertaForMaskedLM, + RobertaTokenizer, + get_linear_schedule_with_warmup, +) + +import os +import csv +import copy +import json +import logging +from dataclasses import dataclass, field +from typing import Any, Optional, Dict, Sequence, Tuple, List, Union + +import torch +import transformers +import sklearn +import numpy as np +from torch.utils.data import Dataset +import collections + + +try: + from torch.utils.tensorboard import SummaryWriter +except ImportError: + from tensorboardX import SummaryWriter + +def main(): + + model_name_or_path = 'zhihan1996/DNABERT-2-117M' + cache_dir='/storage2/fs1/btc/Active/yeli/xiaoxiao.zhou/apps/transformers_cache' + + tokenizer = transformers.AutoTokenizer.from_pretrained( + model_name_or_path, + "cache_dir" == cache_dir, + ) + + for folder in os.listdir(args.data_dir): + if not folder.startswith('.'): + for f in os.listdir(os.path.join(args.data_dir, folder)): + if not f.startswith('.'): + + for name in ['test', 'dev', 'train']: + data = join(args.data_dir, folder, f, name + '.csv') + + if not os.path.exists(data): + print(f"File {data} does not exist, skipping...") + continue + + df = pd.read_csv(data) + print('Processing ' + folder + ' ' + f) + df_tokenized = [] + + if args.only_positive: + for i in range(len(df['sequence'])): + if df['label'][i] == 1: + seg = df['sequence'][i] + output = tokenizer.encode_plus(seg, return_tensors="pt") + df_tokenized.append(output['input_ids'].cpu()) + + df_ = [" ".join(str(token.item()) for token in line.squeeze()) for line in df_tokenized] + f_ = join(args.data_dir, folder, f, name + '_DNAbert2_only_POS.json') + with open(f_, 'w') as file: + logging.warning(f"Saving tokenized results to {f_}...") + json.dump(df_, file) + + else: + for i in range(len(df['sequence'])): + seg = df['sequence'][i] + output = tokenizer.encode_plus(seg, return_tensors="pt") + df_tokenized.append(output['input_ids'].cpu()) + + df_ = [" ".join(str(token.item()) for token in line.squeeze()) for line in df_tokenized] + f_ = join(args.data_dir, folder, f, name + '_DNAbert2.json') + with open(f_, 'w') as file: + logging.warning(f"Saving tokenized results to {f_}...") + json.dump(df_, file) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + + parser.add_argument("--data_dir", type=str, required=True) + parser.add_argument("--only_positive", action="store_true") + + args = parser.parse_args() + + main() diff --git a/Finetune-GenomicBenchmarks/tokenize_nt.py b/Finetune-GenomicBenchmarks/tokenize_nt.py new file mode 100644 index 0000000000000000000000000000000000000000..82b8f4a89ee9d1779b0c38d2e498b9de74d7a56f --- /dev/null +++ b/Finetune-GenomicBenchmarks/tokenize_nt.py @@ -0,0 +1,99 @@ +import sys +import os +import numpy as np +import pandas as pd +from os.path import join +import json + +import argparse +import glob +import logging +import os +import pickle +import random +import re +import shutil +from typing import Dict, List, Tuple +from copy import deepcopy +from multiprocessing import Pool + +import numpy as np +import torch +from torch.nn.utils.rnn import pad_sequence +from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler +from torch.utils.data.distributed import DistributedSampler +from tqdm import tqdm, trange +import collections +import itertools + +module_path = "/storage1/fs1/yeli/Active/xiaoxiao.zhou/projects/foundation/nucleotide-transformer" +if module_path not in sys.path: + sys.path.append(module_path) + +# import haiku as hk +# import jax +# import jax.numpy as jnp +# from nucleotide_transformer.pretrained import get_pretrained_model + +from transformers import AutoTokenizer, AutoModelForMaskedLM +import torch + +def main(): + + cache_dir='/storage2/fs1/btc/Active/yeli/xiaoxiao.zhou/apps/transformers_cache' + + tokenizer = AutoTokenizer.from_pretrained("InstaDeepAI/nucleotide-transformer-500m-human-ref") + model = AutoModelForMaskedLM.from_pretrained("InstaDeepAI/nucleotide-transformer-500m-human-ref") + + for folder in os.listdir(args.data_dir): + if not folder.startswith('.'): + for f in os.listdir(os.path.join(args.data_dir, folder)): + if not f.startswith('.'): + + for name in ['test', 'dev', 'train']: + data = join(args.data_dir, folder, f, name + '.csv') + + if not os.path.exists(data): + print(f"File {data} does not exist, skipping...") + continue + + df = pd.read_csv(data, sep = '\t') + print('Processing ' + folder + ' ' + f) + df_tokenized = [] + + if args.only_positive: + for i in range(len(df['sequence'])): + if df['label'][i] == 1: + seg = df['sequence'][i] + output = tokenizer.encode_plus(seg, return_tensors="pt") + df_tokenized.append(output['input_ids'].cpu()) + + df_ = [" ".join(str(token.item()) for token in line.squeeze()) for line in df_tokenized] + f_ = join(args.data_dir, folder, f, name + '_NT_only_POS.json') + with open(f_, 'w') as file: + logging.warning(f"Saving tokenized results to {f_}...") + json.dump(df_, file) + + else: + for i in range(len(df['sequence'])): + seg = df['sequence'][i] + output = tokenizer.encode_plus(seg, return_tensors="pt") + df_tokenized.append(output['input_ids'].cpu()) + + df_ = [" ".join(str(token.item()) for token in line.squeeze()) for line in df_tokenized] + f_ = join(args.data_dir, folder, f, name + '_NT.json') + with open(f_, 'w') as file: + logging.warning(f"Saving tokenized results to {f_}...") + json.dump(df_, file) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + + parser.add_argument("--data_dir", type=str, required=True) + parser.add_argument("--only_positive", action="store_true") + + args = parser.parse_args() + + main() diff --git a/Finetune-GenomicBenchmarks/tokenize_v4.py b/Finetune-GenomicBenchmarks/tokenize_v4.py new file mode 100644 index 0000000000000000000000000000000000000000..c116cf01a43700d1bd346813e830cdc245dd3a3c --- /dev/null +++ b/Finetune-GenomicBenchmarks/tokenize_v4.py @@ -0,0 +1,210 @@ +import numpy as np +import pandas as pd +from os.path import join + +import argparse +import glob +import logging +import os +import pickle +import random +import re +import shutil +from typing import Dict, List, Tuple +from copy import deepcopy +from multiprocessing import Pool +import sys +import importlib +from pathlib import Path + +import numpy as np +import torch +from torch.nn.utils.rnn import pad_sequence +from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler +from torch.utils.data.distributed import DistributedSampler +from tqdm import tqdm, trange +import collections +import itertools +import json + +from transformers import ( + WEIGHTS_NAME, + AdamW, + BertConfig, + BertForMaskedLM, + BertTokenizer, + DNATokenizer, + myTokenizer, + MotifTokenizer, + CamembertConfig, + CamembertForMaskedLM, + CamembertTokenizer, + DistilBertConfig, + DistilBertForMaskedLM, + DistilBertTokenizer, + GPT2Config, + GPT2LMHeadModel, + GPT2Tokenizer, + OpenAIGPTConfig, + OpenAIGPTLMHeadModel, + OpenAIGPTTokenizer, + PreTrainedModel, + PreTrainedTokenizer, + RobertaConfig, + RobertaForMaskedLM, + RobertaTokenizer, + get_linear_schedule_with_warmup, +) + + +MODEL_CLASSES = { + "gpt2": (GPT2Config, GPT2LMHeadModel, GPT2Tokenizer), + "openai-gpt": (OpenAIGPTConfig, OpenAIGPTLMHeadModel, OpenAIGPTTokenizer), + "dna": (BertConfig, BertForMaskedLM, DNATokenizer), + "bert": (BertConfig, BertForMaskedLM, BertTokenizer), + "roberta": (RobertaConfig, RobertaForMaskedLM, RobertaTokenizer), + "distilbert": (DistilBertConfig, DistilBertForMaskedLM, DistilBertTokenizer), + "camembert": (CamembertConfig, CamembertForMaskedLM, CamembertTokenizer), + "myBert": (BertConfig, BertForMaskedLM, myTokenizer), + "motifBert": (BertConfig, BertForMaskedLM, MotifTokenizer) +} + +MASK_LIST = { + "3mer_stride1": [-1, 1], + "3mer_stride3": [0], + "6mer_stride1": [-2, -1, 1, 2, 3], + "6mer_stride6": [0], + "motif": [0] +} + +# Setting environment variables +os.environ['VOCAB_PATH'] = '/storage2/fs1/btc/Active/yeli/xiaoxiao.zhou/tokenize/tokenizers/tokenizer_v4/hg38/vocab_dedup.txt' +os.environ['VOCAB_NAME'] = 'vocab_dedup.txt' +os.environ['POSITIONAL_EMBEDDINGS_SIZE'] = '512' + + +def tokenize_seq(seg, vocabs, maxlen, motif_hardcoded_sorted, motif_wildcarded_sorted, motif_variations_sorted, k3, k1): + + i = 0 # start position + tokens = [] + coordinates = [] + names = [] + + t = [] + while i < len(seg): + t = [] + for l in range(maxlen, 0, -1): + if seg[i:i+l] in motif_hardcoded_sorted: + t = [seg[i:i+l]] + elif seg[i:i+l] in motif_wildcarded_sorted: + t = motif_wildcarded_sorted[seg[i:i+l]] + elif seg[i:i+l] in motif_variations_sorted: + t = motif_variations_sorted[seg[i:i+l]] + elif seg[i:i+l] in k3: + t = [seg[i:i+l]] + elif seg[i:i+l] in k1: + t = [seg[i:i+l]] + + if t: + if len(t) > 1: + # min_length = min(len(item.split()) for item in t) + # filtered_list = [item for item in data if len(item.split()) == min_length] + # random_choice = random.choice(filtered_list) + random_choice = random.choice(t) + tokens.append(random_choice) + # names.append(lookup_table[random_choice.split()[0]]) + else: + tokens.append(t[0]) + # names.append(lookup_table.get(t[0].split()[0], '-')) + + # coordinate = chrmname + ':' + str(start_position + i) + '-' + str(min(start_position + i + l, start_position + len(seg))) + # coordinates.append(coordinate) + i = i + l + break + + return tokens, coordinates, names + +def main(): + + motif_hardcoded = pd.read_csv(join(args.tokenizer_dir, 'motifs_hardcode.txt'), header = None, names = ['column']) + motif_hardcoded_sorted = motif_hardcoded.sort_values(by='column', key=lambda col: col.str.len(), ascending=False) + + # uniq wildcarded motifs + motif_wildcarded = collections.defaultdict(list) + with open(join(args.tokenizer_dir, "motifs_wildcard.txt"), "r") as file: + for line in file: + seq, operations = line.strip().split(maxsplit=1) # Split only on the first space + motif_wildcarded[seq].append(operations) # Store in dictionary + motif_wildcarded_sorted = {k: motif_wildcarded[k] for k in sorted(motif_wildcarded.keys(), key=len, reverse=True)} + + # uniq motif variations + motif_variations = collections.defaultdict(list) + with open(join(args.tokenizer_dir, "motifs_variations.txt"), "r") as file: + for line in file: + seq, operations = line.strip().split(maxsplit=1) # Split only on the first space + motif_variations[seq].append(operations) # Store in dictionary + motif_variations_sorted = {k: motif_variations[k] for k in sorted(motif_variations.keys(), key=len, reverse=True)} + + k1 = ['A', 'T', 'C', 'G', 'N'] + # 3-mer + combinations = list(itertools.product(['A', 'T', 'C', 'G'], repeat=3)) + k3 = [''.join(term) for term in combinations] + + lookup_table = {} + with open(join(args.tokenizer_dir, "motifs_dedup.txt"), "r") as file: + for line in file: + segment, name = line.strip().split(maxsplit=1) # Split only on the first space + lookup_table[segment] = name # Store in dictionary + + for folder in os.listdir(args.data_dir): + if not folder.startswith('.'): + for f in ['test', 'dev', 'train']: + data = join(args.data_dir, folder, 'split', f + '.csv') + print('process file: ' + data) + + if not os.path.exists(data): + print(f"File {data} does not exist, skipping...") + continue + + df = pd.read_csv(data, sep = '\t') + print('Processing ' + folder + ' ' + f) + df_tokenized = [] + + if args.only_positive: + for i in range(len(df['sequence'])): + if df['label'][i] == 1: + seg = df['sequence'][i] + t, _, _ = tokenize_seq(seg, args.tokenizer_dir, 12, motif_hardcoded_sorted, motif_wildcarded_sorted, motif_variations_sorted, k3, k1) + df_tokenized.append(t) + + df_ = [" ".join(line) for line in df_tokenized] + f_ = join(args.data_dir, folder, 'split', f, name + '_token_v4_only_POS.json') + with open(f_, 'w') as file: + # logging.warning(f"Saving tokenized results to {f_}...") + json.dump(df_, file) + + else: + for i in range(len(df['sequence'])): + seg = df['sequence'][i] + t, _, _ = tokenize_seq(seg, args.tokenizer_dir, 12, motif_hardcoded_sorted, motif_wildcarded_sorted, motif_variations_sorted, k3, k1) + df_tokenized.append(t) + + df_ = [" ".join(line) for line in df_tokenized] + f_ = join(args.data_dir, folder, 'split', f + '_token_v4.json') + with open(f_, 'w') as file: + # logging.warning(f"Saving tokenized results to {f_}...") + json.dump(df_, file) + + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("--tokenizer_dir", type=str, required=True) + parser.add_argument("--data_dir", type=str, required=True) + parser.add_argument("--only_positive", action="store_true") + + args = parser.parse_args() + + main() + diff --git a/Finetune-GenomicBenchmarks/tokenize_v5_1.py b/Finetune-GenomicBenchmarks/tokenize_v5_1.py new file mode 100644 index 0000000000000000000000000000000000000000..3adec738e3aee6c3a642f8deed127aa0ec18b55d --- /dev/null +++ b/Finetune-GenomicBenchmarks/tokenize_v5_1.py @@ -0,0 +1,318 @@ +import numpy as np +import pandas as pd +from os.path import join + +import argparse +import glob +import logging +import os +import pickle +import random +import re +import shutil +from typing import Dict, List, Tuple +from copy import deepcopy +from multiprocessing import Pool +import sys +import importlib +from pathlib import Path + +import numpy as np +import torch +from torch.nn.utils.rnn import pad_sequence +from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler +from torch.utils.data.distributed import DistributedSampler +from tqdm import tqdm, trange +import collections +import itertools +import json + +from transformers import ( + WEIGHTS_NAME, + AdamW, + BertConfig, + BertForMaskedLM, + BertTokenizer, + DNATokenizer, + myTokenizer, + MotifTokenizer, + CamembertConfig, + CamembertForMaskedLM, + CamembertTokenizer, + DistilBertConfig, + DistilBertForMaskedLM, + DistilBertTokenizer, + GPT2Config, + GPT2LMHeadModel, + GPT2Tokenizer, + OpenAIGPTConfig, + OpenAIGPTLMHeadModel, + OpenAIGPTTokenizer, + PreTrainedModel, + PreTrainedTokenizer, + RobertaConfig, + RobertaForMaskedLM, + RobertaTokenizer, + get_linear_schedule_with_warmup, +) + + +MODEL_CLASSES = { + "gpt2": (GPT2Config, GPT2LMHeadModel, GPT2Tokenizer), + "openai-gpt": (OpenAIGPTConfig, OpenAIGPTLMHeadModel, OpenAIGPTTokenizer), + "dna": (BertConfig, BertForMaskedLM, DNATokenizer), + "bert": (BertConfig, BertForMaskedLM, BertTokenizer), + "roberta": (RobertaConfig, RobertaForMaskedLM, RobertaTokenizer), + "distilbert": (DistilBertConfig, DistilBertForMaskedLM, DistilBertTokenizer), + "camembert": (CamembertConfig, CamembertForMaskedLM, CamembertTokenizer), + "myBert": (BertConfig, BertForMaskedLM, myTokenizer), + "motifBert": (BertConfig, BertForMaskedLM, MotifTokenizer) +} + +MASK_LIST = { + "3mer_stride1": [-1, 1], + "3mer_stride3": [0], + "6mer_stride1": [-2, -1, 1, 2, 3], + "6mer_stride6": [0], + "motif": [0] +} + +# Setting environment variables +os.environ['VOCAB_PATH'] = '/storage2/fs1/btc/Active/yeli/xiaoxiao.zhou/tokenize/tokenizers/tokenizer_v5.1/hg38_NOOP/vocab_dedup.txt' +os.environ['VOCAB_NAME'] = 'vocab_dedup.txt' +os.environ['POSITIONAL_EMBEDDINGS_SIZE'] = '512' + +class TrieNode: + def __init__(self): + self.children = {} + self.is_end_of_word = False + self.features = [] + + +class Trie: + def __init__(self): + self.root = TrieNode() + self.lookup_table = {} + def insert(self, word, features = None): + current_node = self.root + for char in word: + if char not in current_node.children: + current_node.children[char] = TrieNode() + current_node = current_node.children[char] + current_node.is_end_of_word = True + if features: + current_node.features.append(features) + def print_trie(self, node=None, prefix="", level=0): + if node is None: + node = self.root + for char, child_node in node.children.items(): + print(" " * level + "'{}'{}".format(char, " (end)" if child_node.is_end_of_word else "")) + self.print_trie(child_node, prefix + char, level + 1) + def search(self, word): + current_node = self.root + for char in word: + if char not in current_node.children: + return False # Word not found + current_node = current_node.children[char] + if current_node.is_end_of_word: + if len(current_node.features) > 0: + return current_node.features + else: + return True + return False # Word not found + +def load_trie_from_file(filename): + with open(filename, 'rb') as file: + return pickle.load(file) + +def load_tokenizer5_1(): + config_class, model_class, tokenizer_class = MODEL_CLASSES['motifBert'] + tokenizer = tokenizer_class.from_pretrained('motif', cache_dir=None) + + bases = ['A', 'T', 'C', 'G'] + + token_wc = [ + f"{operator}_POS_{i}_*_{char}" + for operator, i, char in itertools.product(['WC'], range(12), bases) + ] + + motif_wildcarded = [] + with open(os.path.join('/storage2/fs1/btc/Active/yeli/xiaoxiao.zhou/tokenize/tokenizers/tokenizer_v5.1/hg38_NOOP', "motifs_wildcard.txt"), "r") as file: + for line in file: + seq, operations = line.strip().split(maxsplit=1) # Split only on the first space + motif_wildcarded.append(operations.split()[0]) # Store in dictionary + + tokenizer.add_tokens(token_wc + motif_wildcarded) + return tokenizer + +def tokenize(seg, i, maxlen, motif_hardcoded_trie, motif_wildcarded_trie, k3, k1, lookup_table): + ''' + Parameters: + seg: a sequence chunk from the chromosome + i: the start position at this segment + maxlen: the longest distance considered to find motif, should be the longest word in vocabulary + + rule: + hardcoded motif > wildcarded motif > motif + operation + + score design rule: + reward length of underlying sequence(instead of the motif token, cuz it means how long these token combination can tokenize) + penalize # of wildcards (identifying how many positions have high uncertainty) + penalize mutation operation + ''' + + score = 0 + t = [] + + best_token = None + best_score = -float('inf') + + for l in range(maxlen, 3, -1): + + segment = seg[i:i+l] + + if motif_hardcoded_trie.search(segment): + + t = [segment] + score = 1 * l + best_token, best_score = max([(best_token, best_score), (t, score)], key=lambda x: x[1]) + + if motif_wildcarded_trie.search(segment): + + t = [random.choice(motif_wildcarded_trie.search(segment))] # random.choice output doesn't have [], so need to add [] + wd = len(t[0].split()) - 1 # the number of wildcards + score = 1 * l - np.exp( wd / l) # the less wd count, the lower penalization + best_token, best_score = max([(best_token, best_score), (t, score)], key=lambda x: x[1]) + + # if cannot find motifs, tokenize with 3mer then 1mer + if best_token == None: + + for l in range(3, 0, -1): + + segment = seg[i:i+l] + + if segment in k3: + best_token = [segment] + best_score = 3 + break + + if segment in k1: + best_token = [segment] + best_score = 1 + + name = lookup_table.get(best_token[0].split()[0], '-') # '-' represent the given name for non-motif tokens + next_pos = i + len(best_token[0].split()[0]) + + return best_token[0], name, best_score, next_pos + +def tokenize_seq(seg, vocab_path, maxlen, motif_hardcoded_trie, motif_wildcarded_trie, k1, k3, lookup_table): + + i = 0 # start position + tokens = [] + names = [] + coordinates = [] + + t = [] + + while i < len(seg): + + t = [] + + best_token, best_name, best_score, next_pos = tokenize(seg, i, maxlen, motif_hardcoded_trie, motif_wildcarded_trie, k3, k1, lookup_table) + best_i = i + + _curr_token = best_token + offsets = [] + + if len(_curr_token) > 1: # our token only has length 1, 3, >=5, no length at 2 + # 只要当前 token 不是 1mer, 向右 offset 才有意义,否则相当于从下一个位置开始 tokenize + offsets = [1, 2] + + if offsets: + for shift in offsets: + i_shifted = i + shift + if i_shifted < len(seg): + token_, name_, score_, next_pos_ = tokenize(seg, i_shifted, maxlen, motif_hardcoded_trie, motif_wildcarded_trie, k3, k1, lookup_table) + best_token, best_name, best_i, next_pos, best_score = max([(best_token, best_name, best_i, next_pos, best_score), (token_, name_, i_shifted, next_pos_, score_ )], key=lambda x: x[4]) + + for skip in range(best_i - i): + tokens.append(seg[i + skip]) + # names.append('-') + # coordinates.append(chrmname + ':' + str(start_position + i + skip) + '-' + str(start_position + i + skip + 1)) + + # coordinate = chrmname + ':' + str(start_position + best_i) + '-' + str(min(start_position + next_pos, start_position + len(seg))) + tokens.append(best_token) + # names.append(best_name) + # coordinates.append(coordinate) + + i = next_pos + + return tokens, coordinates, names + + +def main(): + + # load vocabs + motif_hardcoded_trie = load_trie_from_file(join(args.tokenizer_dir, 'motifs_hardcode_trie.pkl')) + motif_wildcarded_trie = load_trie_from_file(join(args.tokenizer_dir, 'motifs_wildcard_trie.pkl')) + + k1 = ['A', 'T', 'C', 'G', 'N'] + # 3-mer + combinations = list(itertools.product(['A', 'T', 'C', 'G'], repeat=3)) + k3 = [''.join(term) for term in combinations] + + lookup_table = {} + with open(join(args.tokenizer_dir, "motifs_dedup.txt"), "r") as file: + for line in file: + segment, name = line.strip().split(maxsplit=1) # Split only on the first space + lookup_table[segment] = name # Store in dictionary + + for folder in os.listdir(args.data_dir): + if not folder.startswith('.'): + for f in ['test', 'dev', 'train']: + data = join(args.data_dir, folder, 'split', f + '.csv') + + if not os.path.exists(data): + print(f"File {data} does not exist, skipping...") + continue + + df = pd.read_csv(data, sep = '\t') + print('Processing ' + folder + ' ' + f) + df_tokenized = [] + + if args.only_positive: + for i in range(len(df['sequence'])): + if df['label'][i] == 1: + seg = df['sequence'][i] + t, _, _ = tokenize_seq(seg, args.tokenizer_dir, 12, motif_hardcoded_trie, motif_wildcarded_trie, k1, k3, lookup_table) + df_tokenized.append(t) + + df_ = [" ".join(line) for line in df_tokenized] + f_ = join(args.data_dir, folder, 'split', f + '_token_v5_1_only_POS.json') + with open(f_, 'w') as file: + # logging.warning(f"Saving tokenized results to {f_}...") + json.dump(df_, file) + + else: + for i in range(len(df['sequence'])): + seg = df['sequence'][i] + t, _, _ = tokenize_seq(seg, args.tokenizer_dir, 12, motif_hardcoded_trie, motif_wildcarded_trie, k1, k3, lookup_table) + df_tokenized.append(t) + + df_ = [" ".join(line) for line in df_tokenized] + f_ = join(args.data_dir, folder, 'split', f + '_token_v5_1.json') + with open(f_, 'w') as file: + # logging.warning(f"Saving tokenized results to {f_}...") + json.dump(df_, file) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + + parser.add_argument("--tokenizer_dir", type=str, required=True) + parser.add_argument("--data_dir", type=str, required=True) + parser.add_argument("--only_positive", action="store_true") + args = parser.parse_args() + + main() diff --git a/Finetune-GenomicBenchmarks/train.py b/Finetune-GenomicBenchmarks/train.py new file mode 100644 index 0000000000000000000000000000000000000000..4af65611876654410775343018c43b9dcd3fadc5 --- /dev/null +++ b/Finetune-GenomicBenchmarks/train.py @@ -0,0 +1,473 @@ +import wandb +# wandb.login(key="293cdcc20c72cb7e8cc5a077eaacf86b254e46ed") +#Nancy +wandb.login(key="04fa40f46e9b09c72fc2dcb1457767c7ad809037") +import os +import sys +os.environ["DISABLE_TRITON"] = "1" +sys.modules['triton'] = None +sys.modules['flash_attn_triton'] = None + + +import csv +import copy +import json +import logging +from dataclasses import dataclass, field +from typing import Any, Optional, Dict, Sequence, Tuple, List, Union + +import torch +import transformers +import sklearn +import numpy as np +from torch.utils.data import Dataset +import importlib +from pathlib import Path +import itertools + +from transformers import BertConfig, BertForSequenceClassification + +from transformers import ( + WEIGHTS_NAME, + AdamW, + BertConfig, + BertForMaskedLM, + BertTokenizer, + CamembertConfig, + CamembertForMaskedLM, + CamembertTokenizer, + DistilBertConfig, + DistilBertForMaskedLM, + DistilBertTokenizer, + GPT2Config, + GPT2LMHeadModel, + GPT2Tokenizer, + OpenAIGPTConfig, + OpenAIGPTLMHeadModel, + OpenAIGPTTokenizer, + PreTrainedModel, + PreTrainedTokenizer, + RobertaConfig, + RobertaForMaskedLM, + RobertaTokenizer, + get_linear_schedule_with_warmup, +) + +from tokenization_motif import MotifTokenizer +from tokenization_dna import DNATokenizer + + +MODEL_CLASSES = { + "dna": (BertConfig, BertForMaskedLM, DNATokenizer), + "bert": (BertConfig, BertForMaskedLM, BertTokenizer), + "motifBert": (BertConfig, BertForMaskedLM, MotifTokenizer) +} + +@dataclass +class ModelArguments: + model_name_or_path: Optional[str] = field(default="facebook/opt-125m") + trust_remote_code: bool = field(default=False, metadata={"help": "for custom models(has custom code that needs to be executed (e.g., custom architectures, tokenizers, or modeling files)), whether local or from the Hub"}) + use_lora: bool = field(default=False, metadata={"help": "whether to use LoRA"}) + lora_r: int = field(default=8, metadata={"help": "hidden dimension for LoRA"}) + lora_alpha: int = field(default=32, metadata={"help": "alpha for LoRA"}) + lora_dropout: float = field(default=0.05, metadata={"help": "dropout rate for LoRA"}) + lora_target_modules: str = field(default="query,value", metadata={"help": "where to perform LoRA"}) + tokenizer_path: Optional[str] = field(default="facebook/opt-125m") + + +@dataclass +class DataArguments: + data_path: str = field(default=None, metadata={"help": "Path to the training data."}) + kmer: int = field(default=-1, metadata={"help": "k-mer for input sequence. -1 means not using k-mer."}) + customized_tokenizer: Optional[str] = field(default=None) + + +@dataclass +class TrainingArguments(transformers.TrainingArguments): + vocab_file: Optional[str] = field( + default=None, + metadata={"help": "Path to custom vocabulary file (overrides Hugging Face default)"} + ) + cache_dir: Optional[str] = field(default=None) + run_name: str = field(default="run") + optim: str = field(default="adamw_torch") + model_max_length: int = field(default=512, metadata={"help": "Maximum sequence length."}) + gradient_accumulation_steps: int = field(default=1) + per_device_train_batch_size: int = field(default=1) + per_device_eval_batch_size: int = field(default=1) + num_train_epochs: int = field(default=1) + fp16: bool = field(default=False) + logging_steps: int = field(default=100) + save_steps: int = field(default=100) + eval_steps: int = field(default=100) + evaluation_strategy: str = field(default="steps"), + warmup_steps: int = field(default=50) + weight_decay: float = field(default=0.01) + learning_rate: float = field(default=1e-4) + save_total_limit: int = field(default=3) + load_best_model_at_end: bool = field(default=False) + output_dir: str = field(default="output") + find_unused_parameters: bool = field(default=False) + checkpointing: bool = field(default=False) + dataloader_pin_memory: bool = field(default=False) + eval_and_save_results: bool = field(default=True) + save_model: bool = field(default=False) + seed: int = field(default=42) + project_name: str = field(default=None) + + +def safe_save_model_for_hf_trainer(trainer: transformers.Trainer, output_dir: str): + """Collects the state dict and dump to disk.""" + state_dict = trainer.model.state_dict() + if trainer.args.should_save: + cpu_state_dict = {key: value.cpu() for key, value in state_dict.items()} + del state_dict + trainer._save(output_dir, state_dict=cpu_state_dict) # noqa + + +""" +Get the reversed complement of the original DNA sequence. +""" +def get_alter_of_dna_sequence(sequence: str): + MAP = {"A": "T", "T": "A", "C": "G", "G": "C"} + # return "".join([MAP[c] for c in reversed(sequence)]) + return "".join([MAP[c] for c in sequence]) + +""" +Transform a dna sequence to k-mer string +""" +def generate_kmer_str(sequence: str, k: int) -> str: + """Generate k-mer string from DNA sequence.""" + return " ".join([sequence[i:i+k] for i in range(len(sequence) - k + 1)]) + + +""" +Load or generate k-mer string for each DNA sequence. The generated k-mer string will be saved to the same directory as the original data with the same name but with a suffix of "_{k}mer". +""" +def load_or_generate_kmer(data_path: str, texts: List[str], k: int) -> List[str]: + """Load or generate k-mer string for each DNA sequence.""" + kmer_path = data_path.replace(".csv", f"_{k}mer.json") + if os.path.exists(kmer_path): + logging.warning(f"Loading k-mer from {kmer_path}...") + with open(kmer_path, "r") as f: + kmer = json.load(f) + else: + logging.warning(f"Generating k-mer...") + kmer = [generate_kmer_str(text, k) for text in texts] + with open(kmer_path, "w") as f: + logging.warning(f"Saving k-mer to {kmer_path}...") + json.dump(kmer, f) + + return kmer + +def load_customized_data(data_path: str, texts: List[str], customized_tokenizer: str) -> List[str]: + """Load or generate k-mer string for each DNA sequence.""" + customize_path = data_path.replace(".csv", f"_{customized_tokenizer}.json") + print(customize_path) + if os.path.exists(customize_path): + logging.warning(f"Loading data by customized tokenizer from {customize_path}...") + with open(customize_path, "r") as f: + data = json.load(f) + + return data + + +class SupervisedDataset(Dataset): + """Dataset for supervised fine-tuning.""" + + def __init__(self, + data_path: str, + tokenizer: transformers.PreTrainedTokenizer, + kmer: int = -1, + customized_tokenizer = None): + + super(SupervisedDataset, self).__init__() + + # load data from the disk + with open(data_path, "r") as f: + data = list(csv.reader(f, delimiter='\t'))[1:] + if len(data[0]) == 2: + # data is in the format of [text, label] + logging.warning("Perform single sequence classification...") + texts = [d[0] for d in data] + # labels = [int(d[1]) for d in data] + label_set = sorted(set(d[1] for d in data)) # get unique labels + label2id = {label: idx for idx, label in enumerate(label_set)} # map labels to integers + labels = [label2id[d[1]] for d in data] + elif len(data[0]) == 3: + # data is in the format of [text1, text2, label] + logging.warning("Perform sequence-pair classification...") + texts = [[d[0], d[1]] for d in data] + # labels = [int(d[2]) for d in data] + label_set = sorted(set(d[1] for d in data)) # get unique labels + label2id = {label: idx for idx, label in enumerate(label_set)} # map labels to integers + labels = [label2id[d[1]] for d in data] + elif len(data[0]) == 5: + logging.warning("Perform single sequence classification on Genomic Benchmarks...") + texts = [d[4] for d in data] + # labels = [int(d[0]) for d in data] + label_set = sorted(set(d[1] for d in data)) # get unique labels + label2id = {label: idx for idx, label in enumerate(label_set)} # map labels to integers + labels = [label2id[d[1]] for d in data] + else: + raise ValueError("Data format not supported.") + + if kmer != -1: + + logging.warning(f"Using {kmer}-mer as input...") + texts = load_or_generate_kmer(data_path, texts, kmer) + + elif kmer == -1 and customized_tokenizer: + logging.warning(f"Using {customized_tokenizer} as input...") + texts = load_customized_data(data_path, texts, customized_tokenizer) + + output = tokenizer( + texts, + return_tensors="pt", + padding="longest", + max_length=tokenizer.model_max_length, + truncation=True, + ) + # print(texts, output["input_ids"]) + + self.input_ids = output["input_ids"] + self.attention_mask = output["attention_mask"] + self.labels = labels + self.num_labels = len(set(labels)) + + def __len__(self): + return len(self.input_ids) + + def __getitem__(self, i) -> Dict[str, torch.Tensor]: + return dict(input_ids=self.input_ids[i], labels=self.labels[i]) + + +@dataclass +class DataCollatorForSupervisedDataset(object): + """Collate examples for supervised fine-tuning.""" + + tokenizer: transformers.PreTrainedTokenizer + + def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: + input_ids, labels = tuple([instance[key] for instance in instances] for key in ("input_ids", "labels")) + input_ids = torch.nn.utils.rnn.pad_sequence( + input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id + ) + labels = torch.Tensor(labels).long() + return dict( + input_ids=input_ids, + labels=labels, + attention_mask=input_ids.ne(self.tokenizer.pad_token_id), + ) + +""" +Manually calculate the accuracy, f1, matthews_correlation, precision, recall with sklearn. +""" +def calculate_metric_with_sklearn(predictions: np.ndarray, labels: np.ndarray): + valid_mask = labels != -100 # Exclude padding tokens (assuming -100 is the padding token ID) + valid_predictions = predictions[valid_mask] + valid_labels = labels[valid_mask] + return { + "accuracy": sklearn.metrics.accuracy_score(valid_labels, valid_predictions), + "f1": sklearn.metrics.f1_score( + valid_labels, valid_predictions, average="macro", zero_division=0 + ), + "matthews_correlation": sklearn.metrics.matthews_corrcoef( + valid_labels, valid_predictions + ), + "precision": sklearn.metrics.precision_score( + valid_labels, valid_predictions, average="macro", zero_division=0 + ), + "recall": sklearn.metrics.recall_score( + valid_labels, valid_predictions, average="macro", zero_division=0 + ), + } + +# from: https://discuss.huggingface.co/t/cuda-out-of-memory-when-using-trainer-with-compute-metrics/2941/13 +def preprocess_logits_for_metrics(logits:Union[torch.Tensor, Tuple[torch.Tensor, Any]], _): + if isinstance(logits, tuple): # Unpack logits if it's a tuple + logits = logits[0] + + if logits.ndim == 3: + # Reshape logits to 2D if needed + logits = logits.reshape(-1, logits.shape[-1]) + + return torch.argmax(logits, dim=-1) + + +""" +Compute metrics used for huggingface trainer. +""" +def compute_metrics(eval_pred): + predictions, labels = eval_pred + return calculate_metric_with_sklearn(predictions, labels) + +def load_token_v5_1(tokenizer_kwargs): + config_class, model_class, tokenizer_class = MODEL_CLASSES['motifBert'] + tokenizer = MotifTokenizer(**tokenizer_kwargs) + + bases = ['A', 'T', 'C', 'G'] + + token_wc = [ + f"{operator}_POS_{i}_*_{char}" + for operator, i, char in itertools.product(['WC'], range(12), bases) + ] + + motif_wildcarded = [] + with open(os.path.join('/storage2/fs1/btc/Active/yeli/xiaoxiao.zhou/tokenize/tokenizers/tokenizer_v5.1/hg38_NOOP', "motifs_wildcard.txt"), "r") as file: + for line in file: + seq, operations = line.strip().split(maxsplit=1) # Split only on the first space + motif_wildcarded.append(operations.split()[0]) # Store in dictionary + + tokenizer.add_tokens(token_wc + motif_wildcarded) + return tokenizer + +def load_token_v4(tokenizer_kwargs): + config_class, model_class, tokenizer_class = MODEL_CLASSES['motifBert'] + tokenizer = MotifTokenizer(**tokenizer_kwargs) + + bases = ['A', 'T', 'C', 'G'] + token_del = [ + f"{operator}_POS_{i}_{char}" + for operator, i, char in itertools.product(['DEL'], range(12), bases) + ] + token_rep = [ + f"{operator}_POS_{i}_{char1}_{char2}" + for operator, i, char1, char2 in itertools.product(['SUB'], range(12), bases, bases) + if char1 != char2 + ] + + token_wc = [ + f"{operator}_POS_{i}_*_{char}" + for operator, i, char in itertools.product(['WC'], range(12), bases) + ] + + token_ins = [ + f"{operator}_POS_{i}_{char}" + for operator, i, char in itertools.product(['INS'], range(13), bases) + ] + + motif_wildcarded = [] + with open(os.path.join('/storage2/fs1/btc/Active/yeli/xiaoxiao.zhou/tokenize/tokenizers/tokenizer_v4/hg38', "motifs_wildcard.txt"), "r") as file: + for line in file: + seq, operations = line.strip().split(maxsplit=1) # Split only on the first space + motif_wildcarded.append(operations.split()[0]) # Store in dictionary + + tokenizer.add_tokens(token_del + token_rep + token_wc + token_ins + motif_wildcarded) + return tokenizer + +def train(): + + parser = transformers.HfArgumentParser((ModelArguments, DataArguments, TrainingArguments)) + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + wandb.init( + project=training_args.project_name, + ) + + tokenizer_kwargs = { + "cache_dir": training_args.cache_dir, + "model_max_length": training_args.model_max_length, + "padding_side": "right", + "use_fast": True, + "trust_remote_code": model_args.trust_remote_code # 除非必要否则建议保持False + } + + if training_args.vocab_file is not None: + if not os.path.exists(training_args.vocab_file): + raise ValueError(f"Vocab file not found at: {training_args.vocab_file}") + tokenizer_kwargs["vocab_file"] = training_args.vocab_file + + if data_args.customized_tokenizer == 'token_v4': + tokenizer = load_token_v4(tokenizer_kwargs) + + elif data_args.customized_tokenizer == 'token_v5_1': + tokenizer = load_token_v5_1(tokenizer_kwargs) + + else: + tokenizer = transformers.PreTrainedTokenizerFast( + tokenizer_file=model_args.tokenizer_path, + **tokenizer_kwargs + ) + + tokenizer.pad_token = "[PAD]" + tokenizer.unk_token = "[UNK]" + tokenizer.cls_token = "[CLS]" + tokenizer.sep_token = "[SEP]" + tokenizer.mask_token = "[MASK]" + + if "InstaDeepAI" in model_args.model_name_or_path: + tokenizer.eos_token = tokenizer.pad_token + + # define datasets and data collator + train_dataset = SupervisedDataset(tokenizer=tokenizer, + data_path=os.path.join(data_args.data_path, "train.csv"), + kmer=data_args.kmer, + customized_tokenizer=data_args.customized_tokenizer) + val_dataset = SupervisedDataset(tokenizer=tokenizer, + data_path=os.path.join(data_args.data_path, "dev.csv"), + kmer=data_args.kmer, + customized_tokenizer=data_args.customized_tokenizer) + test_dataset = SupervisedDataset(tokenizer=tokenizer, + data_path=os.path.join(data_args.data_path, "test.csv"), + kmer=data_args.kmer, + customized_tokenizer=data_args.customized_tokenizer) + data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer) + + + config = transformers.AutoConfig.from_pretrained( + model_args.model_name_or_path, + num_labels = train_dataset.num_labels, + trust_remote_code=model_args.trust_remote_code + ) + + model = transformers.AutoModelForSequenceClassification.from_pretrained( + model_args.model_name_or_path, + cache_dir=training_args.cache_dir, + config=config, # pass the adjusted config + trust_remote_code=model_args.trust_remote_code + ).to("cuda") + + # configure LoRA + if model_args.use_lora: + lora_config = LoraConfig( + r=model_args.lora_r, + lora_alpha=model_args.lora_alpha, + target_modules=list(model_args.lora_target_modules.split(",")), + lora_dropout=model_args.lora_dropout, + bias="none", + task_type="SEQ_CLS", + inference_mode=False, + ) + model = get_peft_model(model, lora_config) + model.print_trainable_parameters() + + # define trainer + trainer = transformers.Trainer(model=model, + tokenizer=tokenizer, + args=training_args, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + compute_metrics=compute_metrics, + train_dataset=train_dataset, + eval_dataset=val_dataset, + data_collator=data_collator) + trainer.train() + + if training_args.save_model: + trainer.save_state() + safe_save_model_for_hf_trainer(trainer=trainer, output_dir=training_args.output_dir) + + # get the evaluation results from trainer + if training_args.eval_and_save_results: + results_path = os.path.join(training_args.output_dir, "results", training_args.run_name) + results = trainer.evaluate(eval_dataset=test_dataset) + os.makedirs(results_path, exist_ok=True) + with open(os.path.join(results_path, "eval_results.json"), "w") as f: + json.dump(results, f) + + + + +if __name__ == "__main__": + + train() \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c63637f7a5514a3bf5bc320e1d1ec861a78452b9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T15:34:02.137925903-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T15:34:02.619476859-07:00","level":"INFO","msg":"stream: created new stream","id":"oww9zr78"} +{"time":"2026-03-24T15:34:02.619722338-07:00","level":"INFO","msg":"handler: started","stream_id":"oww9zr78"} +{"time":"2026-03-24T15:34:02.619835137-07:00","level":"INFO","msg":"stream: started","id":"oww9zr78"} +{"time":"2026-03-24T15:34:02.619904927-07:00","level":"INFO","msg":"sender: started","stream_id":"oww9zr78"} +{"time":"2026-03-24T15:34:02.619950787-07:00","level":"INFO","msg":"writer: started","stream_id":"oww9zr78"} +{"time":"2026-03-24T16:36:35.530641442-07:00","level":"INFO","msg":"stream: closing","id":"oww9zr78"} +{"time":"2026-03-24T16:36:36.04031333-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T16:36:52.13435371-07:00","level":"INFO","msg":"handler: closed","stream_id":"oww9zr78"} +{"time":"2026-03-24T16:36:52.134643249-07:00","level":"INFO","msg":"sender: closed","stream_id":"oww9zr78"} +{"time":"2026-03-24T16:36:52.134701719-07:00","level":"INFO","msg":"stream: closed","id":"oww9zr78"} diff --git a/Finetune-GenomicBenchmarks/wandb/debug.log b/Finetune-GenomicBenchmarks/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c00e28eb09a0465fc930d44c34c6ead5af06b7ac --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/debug.log @@ -0,0 +1,24 @@ +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_setup.py:_flush():80] Configure stats pid to 3456373 +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug.log +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug-internal.log +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_init.py:init():841] calling init triggers +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_init.py:init():889] starting backend +2026-03-24 15:34:02,030 INFO MainThread:3456373 [wandb_init.py:init():892] sending inform_init request +2026-03-24 15:34:02,134 INFO MainThread:3456373 [wandb_init.py:init():900] backend started and connected +2026-03-24 15:34:02,149 INFO MainThread:3456373 [wandb_init.py:init():970] updated telemetry +2026-03-24 15:34:02,152 INFO MainThread:3456373 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 15:34:03,352 INFO MainThread:3456373 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 15:34:03,705 INFO MainThread:3456373 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 15:34:03,705 INFO MainThread:3456373 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 15:34:03,705 INFO MainThread:3456373 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 15:34:03,706 INFO MainThread:3456373 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 15:34:03,713 INFO MainThread:3456373 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 15:36:13,359 INFO MainThread:3456373 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/runs/Mar24_15-34-01_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_human_ensembl_regulatory_lr3e-5_wd0.0_wr0.03_ep5_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 250, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 16:36:35,531 INFO wandb-AsyncioManager-main:3456373 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 16:36:35,531 INFO wandb-AsyncioManager-main:3456373 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdd1de9e1771cc1c2a0aea589187fe20a39918b0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + 9vfj87f0r8x080uqa5hkz0vumjuqralh: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/demo_coding_vs_intergenomic_seqs/split + - --kmer + - "-1" + - --run_name + - base5120_demo_coding_vs_intergenomic_seqs_lr3e-5_wd0.0_wr0.06_ep3_seed42 + - --model_max_length + - "100" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.0" + - --num_train_epochs + - "3" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.06" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3557696446464" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T11:32:05.500129Z" + writerId: 9vfj87f0r8x080uqa5hkz0vumjuqralh + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/runs/Mar24_04-32-04_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 100 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_demo_coding_vs_intergenomic_seqs_lr3e-5_wd0.0_wr0.06_ep3_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.06 +warmup_steps: + value: 0 +weight_decay: + value: 0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e24ff5d75d812f1f116bade909648d3600fe9093 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/output.log @@ -0,0 +1,76 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'classifier.bias', 'bert.pooler.dense.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 80,000 + Num Epochs = 3 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 1,875 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 33%|███▎ | 624/1875 [01:01<02:03, 10.13it/s]***** Running Evaluation ***** +{'loss': 0.4279, 'learning_rate': 2.6548672566371683e-05, 'epoch': 0.16} +{'loss': 0.2941, 'learning_rate': 2.851872871736663e-05, 'epoch': 0.32} +{'loss': 0.2896, 'learning_rate': 2.68161180476731e-05, 'epoch': 0.48} +{'loss': 0.2724, 'learning_rate': 2.511350737797957e-05, 'epoch': 0.64} +{'loss': 0.2618, 'learning_rate': 2.341089670828604e-05, 'epoch': 0.8} +{'loss': 0.2538, 'learning_rate': 2.170828603859251e-05, 'epoch': 0.96} + Num examples = 10000 + Batch size = 128 + 33%|███▎ | 625/1875 [01:04<02:03, 10.13itSaving model checkpoint to genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-625 +Configuration saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-625/config.json +{'eval_loss': 0.24450437724590302, 'eval_accuracy': 0.8981, 'eval_f1': 0.8980013458817503, 'eval_matthews_correlation': 0.7970483436740018, 'eval_precision': 0.8991229435739876, 'eval_recall': 0.897926298388621, 'eval_runtime': 2.2999, 'eval_samples_per_second': 4347.921, 'eval_steps_per_second': 34.349, 'epoch': 1.0} +Model weights saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-625/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-625/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-625/special_tokens_map.json + 67%|██████▋ | 1250/1875 [02:10<01:03, 9.86it/s]***** Running Evaluation ***** +{'loss': 0.2381, 'learning_rate': 2.0005675368898978e-05, 'epoch': 1.12} +{'loss': 0.2257, 'learning_rate': 1.830306469920545e-05, 'epoch': 1.28} +{'loss': 0.2222, 'learning_rate': 1.6600454029511918e-05, 'epoch': 1.44} +{'loss': 0.2267, 'learning_rate': 1.4897843359818387e-05, 'epoch': 1.6} +{'loss': 0.2286, 'learning_rate': 1.3195232690124857e-05, 'epoch': 1.76} +{'loss': 0.2205, 'learning_rate': 1.1492622020431328e-05, 'epoch': 1.92} + Num examples = 10000 + Batch size = 128 + 67%|██████▋ | 1250/1875 [02:12<01:03, 9.86iSaving model checkpoint to genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1250 +Configuration saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1250/config.json +{'eval_loss': 0.23238573968410492, 'eval_accuracy': 0.9036, 'eval_f1': 0.9035999035999036, 'eval_matthews_correlation': 0.8073154756092576, 'eval_precision': 0.9036641967999408, 'eval_recall': 0.9036512789126667, 'eval_runtime': 2.312, 'eval_samples_per_second': 4325.297, 'eval_steps_per_second': 34.17, 'epoch': 2.0} +Model weights saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1250/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1250/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1250/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-625] due to args.save_total_limit +100%|██████████| 1875/1875 [03:19<00:00, 9.86it/s]***** Running Evaluation ***** +{'loss': 0.1951, 'learning_rate': 9.790011350737798e-06, 'epoch': 2.08} +{'loss': 0.1857, 'learning_rate': 8.087400681044268e-06, 'epoch': 2.24} +{'loss': 0.1822, 'learning_rate': 6.384790011350738e-06, 'epoch': 2.4} +{'loss': 0.175, 'learning_rate': 4.682179341657208e-06, 'epoch': 2.56} +{'loss': 0.1706, 'learning_rate': 2.9795686719636776e-06, 'epoch': 2.72} +{'loss': 0.1733, 'learning_rate': 1.2769580022701475e-06, 'epoch': 2.88} + Num examples = 10000 + Batch size = 128 +100%|██████████| 1875/1875 [03:22<00:00, 9.86iSaving model checkpoint to genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875 +Configuration saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/config.json +{'eval_loss': 0.24301454424858093, 'eval_accuracy': 0.9064, 'eval_f1': 0.9063933951179595, 'eval_matthews_correlation': 0.8132168703204711, 'eval_precision': 0.906713678896274, 'eval_recall': 0.9065032186577142, 'eval_runtime': 2.322, 'eval_samples_per_second': 4306.719, 'eval_steps_per_second': 34.023, 'epoch': 3.0} +Model weights saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1250] due to args.save_total_limit + + +Training completed. Do not forget to share your model on huggingface.co/models =) + + +Loading best model from genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/checkpoint-1875 (score: 0.9063933951179595). +100%|██████████| 1875/1875 [03:26<00:00, 9.08it/s] +{'train_runtime': 206.5926, 'train_samples_per_second': 1161.707, 'train_steps_per_second': 9.076, 'train_loss': 0.23328233184814454, 'epoch': 3.0} +***** Running Evaluation ***** + Num examples = 10000 + Batch size = 128 +100%|██████████| 79/79 [00:02<00:00, 36.49it/s] diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9c2a6bd65979a766c9387a3f504cd63f16cd1c3c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T11:32:05.500129Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/demo_coding_vs_intergenomic_seqs/split", + "--kmer", + "-1", + "--run_name", + "base5120_demo_coding_vs_intergenomic_seqs_lr3e-5_wd0.0_wr0.06_ep3_seed42", + "--model_max_length", + "100", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.0", + "--num_train_epochs", + "3", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.06", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3557696446464" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "9vfj87f0r8x080uqa5hkz0vumjuqralh" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..fbb6e94ef253daaaeaa5fb7fa8bc3b9ef0c2c62c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/train_steps_per_second":9.076,"_step":22,"train/train_samples_per_second":1161.707,"_wandb":{"runtime":229},"_timestamp":1.7743521568013425e+09,"train/epoch":3,"train/loss":0.1733,"train/global_step":1875,"train/learning_rate":1.2769580022701475e-06,"eval/f1":0.9020706907370306,"eval/precision":0.9027773220988344,"eval/loss":0.2542661130428314,"train/total_flos":6.4133319744e+15,"train/train_runtime":206.5926,"eval/recall":0.902183634938159,"eval/accuracy":0.9021,"eval/samples_per_second":4551.358,"eval/steps_per_second":35.956,"eval/runtime":2.1971,"eval/matthews_correlation":0.8049607381043299,"_runtime":229,"train/train_loss":0.23328233184814454} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..1f1ac4b6a1752afefe9bb2a26139bd0debff0591 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T04:32:05.717472758-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpiez6vypt/port-2759327.txt","pid":2759327,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T04:32:05.718658182-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2759327} +{"time":"2026-03-24T04:32:05.718625092-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2759327-2759718-3840243461/socket","Net":"unix"}} +{"time":"2026-03-24T04:32:05.889425932-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T04:32:05.991769711-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"qaue51mb","id":"1(@)"} +{"time":"2026-03-24T04:32:06.387766662-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"qaue51mb","id":"1(@)"} +{"time":"2026-03-24T04:35:56.808565321-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T04:35:56.80871791-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T04:35:56.808836749-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T04:35:56.808853579-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T04:35:56.808998018-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2759327-2759718-3840243461/socket","Net":"unix"}} +{"time":"2026-03-24T04:35:57.547658309-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T04:35:57.547707079-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T04:35:57.547726989-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..7889d5c6ef7fd04997eadc3980c98b3fb3316b87 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T04:32:05.992021249-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T04:32:06.387105076-07:00","level":"INFO","msg":"stream: created new stream","id":"qaue51mb"} +{"time":"2026-03-24T04:32:06.387362505-07:00","level":"INFO","msg":"handler: started","stream_id":"qaue51mb"} +{"time":"2026-03-24T04:32:06.387733172-07:00","level":"INFO","msg":"stream: started","id":"qaue51mb"} +{"time":"2026-03-24T04:32:06.387805752-07:00","level":"INFO","msg":"writer: started","stream_id":"qaue51mb"} +{"time":"2026-03-24T04:32:06.387860902-07:00","level":"INFO","msg":"sender: started","stream_id":"qaue51mb"} +{"time":"2026-03-24T04:35:56.80871203-07:00","level":"INFO","msg":"stream: closing","id":"qaue51mb"} +{"time":"2026-03-24T04:35:57.305162499-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T04:35:57.539029456-07:00","level":"INFO","msg":"handler: closed","stream_id":"qaue51mb"} +{"time":"2026-03-24T04:35:57.539255525-07:00","level":"INFO","msg":"sender: closed","stream_id":"qaue51mb"} +{"time":"2026-03-24T04:35:57.539294235-07:00","level":"INFO","msg":"stream: closed","id":"qaue51mb"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..ffd2bd539de80ecfa84c27cb8202d874bde88b41 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 04:32:05,506 INFO MainThread:2759327 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 04:32:05,506 INFO MainThread:2759327 [wandb_setup.py:_flush():80] Configure stats pid to 2759327 +2026-03-24 04:32:05,506 INFO MainThread:2759327 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 04:32:05,507 INFO MainThread:2759327 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 04:32:05,507 INFO MainThread:2759327 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 04:32:05,507 INFO MainThread:2759327 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/logs/debug.log +2026-03-24 04:32:05,507 INFO MainThread:2759327 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/logs/debug-internal.log +2026-03-24 04:32:05,507 INFO MainThread:2759327 [wandb_init.py:init():841] calling init triggers +2026-03-24 04:32:05,507 INFO MainThread:2759327 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 04:32:05,507 INFO MainThread:2759327 [wandb_init.py:init():889] starting backend +2026-03-24 04:32:05,890 INFO MainThread:2759327 [wandb_init.py:init():892] sending inform_init request +2026-03-24 04:32:05,988 INFO MainThread:2759327 [wandb_init.py:init():900] backend started and connected +2026-03-24 04:32:06,001 INFO MainThread:2759327 [wandb_init.py:init():970] updated telemetry +2026-03-24 04:32:06,004 INFO MainThread:2759327 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 04:32:07,342 INFO MainThread:2759327 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 04:32:07,694 INFO MainThread:2759327 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 04:32:07,695 INFO MainThread:2759327 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 04:32:07,695 INFO MainThread:2759327 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 04:32:07,695 INFO MainThread:2759327 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 04:32:07,703 INFO MainThread:2759327 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 04:32:28,013 INFO MainThread:2759327 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.06, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/demo_coding_vs_intergenomic_seqs/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.06_ep3_seed42/runs/Mar24_04-32-04_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_demo_coding_vs_intergenomic_seqs_lr3e-5_wd0.0_wr0.06_ep3_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 100, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 04:35:56,808 INFO wandb-AsyncioManager-main:2759327 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 04:35:56,809 INFO wandb-AsyncioManager-main:2759327 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/run-qaue51mb.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/run-qaue51mb.wandb new file mode 100644 index 0000000000000000000000000000000000000000..9e1399eec37e65aba27994e7ff833e796e2c6877 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043205-qaue51mb/run-qaue51mb.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b0c1410d10b5eb0bc702cc8bdb8344ba43145e368898f1a3803a39785e9d7f +size 215344 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a68788d715bf3596b6b8c9822a153b44387f7ce0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + jraxqt05f0v57ynpiyiipxk9x91p9vz0: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/human_nontata_promoters/split + - --kmer + - "-1" + - --run_name + - base5120_human_nontata_promoters_lr3e-5_wd0.03_wr0.06_ep10_seed42 + - --model_max_length + - "100" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.03" + - --num_train_epochs + - "10" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.06" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3558776877056" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T11:36:08.792295Z" + writerId: jraxqt05f0v57ynpiyiipxk9x91p9vz0 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/runs/Mar24_04-36-08_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 100 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 10 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_human_nontata_promoters_lr3e-5_wd0.03_wr0.06_ep10_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.06 +warmup_steps: + value: 0 +weight_decay: + value: 0.03 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e29cee25b3313b88a13b681f3d8c745b4e4b0124 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/output.log @@ -0,0 +1,150 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 28,904 + Num Epochs = 10 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 2,260 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 10%|█ | 226/2260 [00:28<04:02, 8.37it/s]***** Running Evaluation ***** +{'loss': 0.4646, 'learning_rate': 2.2058823529411766e-05, 'epoch': 0.44} +{'loss': 0.3866, 'learning_rate': 2.9096045197740113e-05, 'epoch': 0.88} + Num examples = 3613 + Batch size = 128 + 10%|█ | 226/2260 [00:29<04:02, 8.37itSaving model checkpoint to genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-226 +Configuration saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-226/config.json +{'eval_loss': 0.3561484217643738, 'eval_accuracy': 0.8425131469692776, 'eval_f1': 0.8402503747609833, 'eval_matthews_correlation': 0.6806277133135248, 'eval_precision': 0.8410196579024094, 'eval_recall': 0.8396095161931753, 'eval_runtime': 1.0703, 'eval_samples_per_second': 3375.797, 'eval_steps_per_second': 27.096, 'epoch': 1.0} +Model weights saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-226/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-226/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-226/special_tokens_map.json + 20%|██ | 452/2260 [01:01<03:35, 8.38it/s]***** Running Evaluation ***** +{'loss': 0.3305, 'learning_rate': 2.7683615819209038e-05, 'epoch': 1.33} +{'loss': 0.2826, 'learning_rate': 2.627118644067797e-05, 'epoch': 1.77} + Num examples = 3613 + Batch size = 128 + 20%|██ | 452/2260 [01:03<03:35, 8.38itSaving model checkpoint to genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-452 +Configuration saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-452/config.json +{'eval_loss': 0.2907102406024933, 'eval_accuracy': 0.8782175477442569, 'eval_f1': 0.8781592957685165, 'eval_matthews_correlation': 0.7702174940935076, 'eval_precision': 0.8836075473649414, 'eval_recall': 0.8866158214838993, 'eval_runtime': 1.0555, 'eval_samples_per_second': 3422.906, 'eval_steps_per_second': 27.474, 'epoch': 2.0} +Model weights saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-452/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-452/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-452/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-226] due to args.save_total_limit + 30%|███ | 678/2260 [01:35<03:09, 8.34it/s]***** Running Evaluation ***** +{'loss': 0.2378, 'learning_rate': 2.4858757062146894e-05, 'epoch': 2.21} +{'loss': 0.2001, 'learning_rate': 2.3446327683615823e-05, 'epoch': 2.65} + Num examples = 3613 + Batch size = 128 + 30%|███ | 678/2260 [01:36<03:09, 8.34itSaving model checkpoint to genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-678 +Configuration saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-678/config.json +{'eval_loss': 0.24181613326072693, 'eval_accuracy': 0.9031275947965679, 'eval_f1': 0.9024774453787319, 'eval_matthews_correlation': 0.806470766326512, 'eval_precision': 0.901330138812324, 'eval_recall': 0.905149672327138, 'eval_runtime': 1.0568, 'eval_samples_per_second': 3418.949, 'eval_steps_per_second': 27.442, 'epoch': 3.0} +Model weights saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-678/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-678/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-678/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-452] due to args.save_total_limit + 40%|████ | 904/2260 [02:09<02:42, 8.36it/s]***** Running Evaluation ***** +{'loss': 0.1843, 'learning_rate': 2.2033898305084748e-05, 'epoch': 3.1} +{'loss': 0.1338, 'learning_rate': 2.0621468926553672e-05, 'epoch': 3.54} +{'loss': 0.1218, 'learning_rate': 1.92090395480226e-05, 'epoch': 3.98} + Num examples = 3613 + Batch size = 128 + 40%|████ | 904/2260 [02:10<02:42, 8.36itSaving model checkpoint to genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-904 +Configuration saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-904/config.json +{'eval_loss': 0.24736538529396057, 'eval_accuracy': 0.9097702740105176, 'eval_f1': 0.9089688664709563, 'eval_matthews_correlation': 0.8184477293295029, 'eval_precision': 0.9079389158139595, 'eval_recall': 0.9105128609194197, 'eval_runtime': 1.0552, 'eval_samples_per_second': 3424.113, 'eval_steps_per_second': 27.484, 'epoch': 4.0} +Model weights saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-904/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-904/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-904/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-678] due to args.save_total_limit + 50%|█████ | 1130/2260 [02:43<02:15, 8.37it/s]***** Running Evaluation ***** +{'loss': 0.0768, 'learning_rate': 1.7796610169491526e-05, 'epoch': 4.42} +{'loss': 0.0838, 'learning_rate': 1.6384180790960454e-05, 'epoch': 4.87} + Num examples = 3613 + Batch size = 128 + 50%|█████ | 1130/2260 [02:44<02:15, 8.37iSaving model checkpoint to genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1130 +Configuration saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1130/config.json +{'eval_loss': 0.3227614760398865, 'eval_accuracy': 0.9070024910047052, 'eval_f1': 0.9069014958118373, 'eval_matthews_correlation': 0.8247852591155882, 'eval_precision': 0.9102777373515565, 'eval_recall': 0.9145184235718482, 'eval_runtime': 1.0539, 'eval_samples_per_second': 3428.159, 'eval_steps_per_second': 27.516, 'epoch': 5.0} +Model weights saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1130/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1130/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1130/special_tokens_map.json + 60%|██████ | 1356/2260 [03:16<01:52, 8.05it/s]***** Running Evaluation ***** +{'loss': 0.0617, 'learning_rate': 1.4971751412429379e-05, 'epoch': 5.31} +{'loss': 0.0497, 'learning_rate': 1.3559322033898305e-05, 'epoch': 5.75} + Num examples = 3613 + Batch size = 128 + 60%|██████ | 1356/2260 [03:17<01:52, 8.05iSaving model checkpoint to genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1356 +Configuration saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1356/config.json +{'eval_loss': 0.31701555848121643, 'eval_accuracy': 0.9255466371436479, 'eval_f1': 0.9252111067120526, 'eval_matthews_correlation': 0.8539533757280905, 'eval_precision': 0.9244825296262367, 'eval_recall': 0.9294855011815828, 'eval_runtime': 1.0546, 'eval_samples_per_second': 3425.911, 'eval_steps_per_second': 27.498, 'epoch': 6.0} +Model weights saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1356/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1356/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1356/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-904] due to args.save_total_limit +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1130] due to args.save_total_limit + 70%|███████ | 1582/2260 [03:50<01:20, 8.38it/s]***** Running Evaluation ***** +{'loss': 0.0382, 'learning_rate': 1.2146892655367232e-05, 'epoch': 6.19} +{'loss': 0.0357, 'learning_rate': 1.0734463276836158e-05, 'epoch': 6.64} + Num examples = 3613 + Batch size = 128 + 70%|███████ | 1582/2260 [03:51<01:20, 8.38iSaving model checkpoint to genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1582 +Configuration saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1582/config.json +{'eval_loss': 0.37326350808143616, 'eval_accuracy': 0.9266537503459729, 'eval_f1': 0.9263500730195398, 'eval_matthews_correlation': 0.8566986044688514, 'eval_precision': 0.9257981734840697, 'eval_recall': 0.9309157158270056, 'eval_runtime': 1.0553, 'eval_samples_per_second': 3423.551, 'eval_steps_per_second': 27.479, 'epoch': 7.0} +Model weights saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1582/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1582/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1582/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1356] due to args.save_total_limit + 80%|████████ | 1808/2260 [04:23<00:53, 8.37it/s]***** Running Evaluation ***** +{'loss': 0.0248, 'learning_rate': 9.322033898305085e-06, 'epoch': 7.08} +{'loss': 0.0195, 'learning_rate': 7.909604519774012e-06, 'epoch': 7.52} +{'loss': 0.016, 'learning_rate': 6.497175141242938e-06, 'epoch': 7.96} + Num examples = 3613 + Batch size = 128 + 80%|████████ | 1808/2260 [04:25<00:53, 8.37iSaving model checkpoint to genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1808 +Configuration saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1808/config.json +{'eval_loss': 0.4228571653366089, 'eval_accuracy': 0.9305286465541102, 'eval_f1': 0.9302050841309717, 'eval_matthews_correlation': 0.8637825322542529, 'eval_precision': 0.9293918338811755, 'eval_recall': 0.9344052472327883, 'eval_runtime': 1.0668, 'eval_samples_per_second': 3386.606, 'eval_steps_per_second': 27.183, 'epoch': 8.0} +Model weights saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1808/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1808/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1808/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1582] due to args.save_total_limit + 90%|█████████ | 2034/2260 [04:57<00:26, 8.38it/s]***** Running Evaluation ***** +{'loss': 0.009, 'learning_rate': 5.084745762711865e-06, 'epoch': 8.41} +{'loss': 0.0132, 'learning_rate': 3.6723163841807913e-06, 'epoch': 8.85} + Num examples = 3613 + Batch size = 128 + 90%|█████████ | 2034/2260 [04:58<00:26, 8.38iSaving model checkpoint to genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2034 +Configuration saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2034/config.json +{'eval_loss': 0.479954332113266, 'eval_accuracy': 0.928037641848879, 'eval_f1': 0.927772269127762, 'eval_matthews_correlation': 0.8601944891930503, 'eval_precision': 0.9274914175703679, 'eval_recall': 0.9327189557649391, 'eval_runtime': 1.0529, 'eval_samples_per_second': 3431.362, 'eval_steps_per_second': 27.542, 'epoch': 9.0} +Model weights saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2034/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2034/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2034/special_tokens_map.json +100%|██████████| 2260/2260 [05:30<00:00, 8.40it/s]***** Running Evaluation ***** +{'loss': 0.0088, 'learning_rate': 2.2598870056497174e-06, 'epoch': 9.29} +{'loss': 0.004, 'learning_rate': 8.474576271186441e-07, 'epoch': 9.73} + Num examples = 3613 + Batch size = 128 +100%|██████████| 2260/2260 [05:31<00:00, 8.40iSaving model checkpoint to genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260 +Configuration saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/config.json +{'eval_loss': 0.4578400254249573, 'eval_accuracy': 0.9305286465541102, 'eval_f1': 0.930230965774696, 'eval_matthews_correlation': 0.8642878418232358, 'eval_precision': 0.9295883645861114, 'eval_recall': 0.9347146798558896, 'eval_runtime': 1.0531, 'eval_samples_per_second': 3430.884, 'eval_steps_per_second': 27.538, 'epoch': 10.0} +Model weights saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-1808] due to args.save_total_limit +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2034] due to args.save_total_limit + + +Training completed. Do not forget to share your model on huggingface.co/models =) + + +Loading best model from genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/checkpoint-2260 (score: 0.930230965774696). +100%|██████████| 2260/2260 [05:36<00:00, 6.71it/s] +{'train_runtime': 336.6802, 'train_samples_per_second': 858.5, 'train_steps_per_second': 6.713, 'train_loss': 0.12328030103075821, 'epoch': 10.0} +***** Running Evaluation ***** + Num examples = 3614 + Batch size = 128 +100%|██████████| 29/29 [00:01<00:00, 28.09it/s] diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a0f68455f35140e5136dee29425f4b2754284f27 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T11:36:08.792295Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/human_nontata_promoters/split", + "--kmer", + "-1", + "--run_name", + "base5120_human_nontata_promoters_lr3e-5_wd0.03_wr0.06_ep10_seed42", + "--model_max_length", + "100", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.03", + "--num_train_epochs", + "10", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.06", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3558776877056" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "jraxqt05f0v57ynpiyiipxk9x91p9vz0" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..fe07fd7bc74a0ef63b42f282f1f3a77a82332db3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":349},"_step":33,"train/train_loss":0.12328030103075821,"train/learning_rate":8.474576271186441e-07,"eval/accuracy":0.9297177642501383,"eval/precision":0.9294767252581735,"eval/samples_per_second":3360.593,"eval/steps_per_second":26.967,"train/train_steps_per_second":6.713,"train/loss":0.004,"train/train_samples_per_second":858.5,"eval/runtime":1.0754,"train/epoch":10,"eval/matthews_correlation":0.8625877727464153,"eval/f1":0.9295506837288048,"_runtime":349,"eval/loss":0.4450441300868988,"train/train_runtime":336.6802,"train/total_flos":9.8032712560992e+15,"train/global_step":2260,"eval/recall":0.933118736087702,"_timestamp":1.7743525192218764e+09} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..10ac3d145e9f705c3b9c67f6fb5ec53261d457a9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T04:36:09.006778067-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp7ngm75wb/port-2761692.txt","pid":2761692,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T04:36:09.008331619-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2761692} +{"time":"2026-03-24T04:36:09.00817599-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2761692-2761745-363450788/socket","Net":"unix"}} +{"time":"2026-03-24T04:36:09.178851503-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T04:36:09.298474053-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"md2c6b0g","id":"1(@)"} +{"time":"2026-03-24T04:36:09.704847206-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"md2c6b0g","id":"1(@)"} +{"time":"2026-03-24T04:41:59.228333217-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T04:41:59.228471726-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T04:41:59.228534146-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T04:41:59.228648966-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T04:41:59.228828895-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2761692-2761745-363450788/socket","Net":"unix"}} +{"time":"2026-03-24T04:42:00.043621173-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T04:42:00.043690493-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T04:42:00.043726032-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..690d67e8a029ba2dfb7fec19eaf7025c5dc212e4 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T04:36:09.298796291-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T04:36:09.704387118-07:00","level":"INFO","msg":"stream: created new stream","id":"md2c6b0g"} +{"time":"2026-03-24T04:36:09.704611167-07:00","level":"INFO","msg":"handler: started","stream_id":"md2c6b0g"} +{"time":"2026-03-24T04:36:09.704823396-07:00","level":"INFO","msg":"stream: started","id":"md2c6b0g"} +{"time":"2026-03-24T04:36:09.704880976-07:00","level":"INFO","msg":"writer: started","stream_id":"md2c6b0g"} +{"time":"2026-03-24T04:36:09.704890566-07:00","level":"INFO","msg":"sender: started","stream_id":"md2c6b0g"} +{"time":"2026-03-24T04:41:59.228483497-07:00","level":"INFO","msg":"stream: closing","id":"md2c6b0g"} +{"time":"2026-03-24T04:41:59.635869846-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T04:42:00.033638315-07:00","level":"INFO","msg":"handler: closed","stream_id":"md2c6b0g"} +{"time":"2026-03-24T04:42:00.033940173-07:00","level":"INFO","msg":"sender: closed","stream_id":"md2c6b0g"} +{"time":"2026-03-24T04:42:00.033972273-07:00","level":"INFO","msg":"stream: closed","id":"md2c6b0g"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0571f4a8fe754f785da914b359d8ab34708ae4a3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 04:36:08,798 INFO MainThread:2761692 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 04:36:08,798 INFO MainThread:2761692 [wandb_setup.py:_flush():80] Configure stats pid to 2761692 +2026-03-24 04:36:08,798 INFO MainThread:2761692 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 04:36:08,798 INFO MainThread:2761692 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 04:36:08,798 INFO MainThread:2761692 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 04:36:08,798 INFO MainThread:2761692 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/logs/debug.log +2026-03-24 04:36:08,798 INFO MainThread:2761692 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/logs/debug-internal.log +2026-03-24 04:36:08,798 INFO MainThread:2761692 [wandb_init.py:init():841] calling init triggers +2026-03-24 04:36:08,798 INFO MainThread:2761692 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 04:36:08,798 INFO MainThread:2761692 [wandb_init.py:init():889] starting backend +2026-03-24 04:36:09,180 INFO MainThread:2761692 [wandb_init.py:init():892] sending inform_init request +2026-03-24 04:36:09,291 INFO MainThread:2761692 [wandb_init.py:init():900] backend started and connected +2026-03-24 04:36:09,304 INFO MainThread:2761692 [wandb_init.py:init():970] updated telemetry +2026-03-24 04:36:09,307 INFO MainThread:2761692 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 04:36:10,052 INFO MainThread:2761692 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 04:36:10,409 INFO MainThread:2761692 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 04:36:10,409 INFO MainThread:2761692 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 04:36:10,409 INFO MainThread:2761692 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 04:36:10,410 INFO MainThread:2761692 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 04:36:10,419 INFO MainThread:2761692 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 04:36:21,466 INFO MainThread:2761692 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.03, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.06, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/human_nontata_promoters/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep10_seed42/runs/Mar24_04-36-08_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_human_nontata_promoters_lr3e-5_wd0.03_wr0.06_ep10_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 100, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 04:41:59,228 INFO wandb-AsyncioManager-main:2761692 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 04:41:59,229 INFO wandb-AsyncioManager-main:2761692 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/run-md2c6b0g.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/run-md2c6b0g.wandb new file mode 100644 index 0000000000000000000000000000000000000000..064504cefd9de3a3cf5500ac25ab2a357b65262d --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_043608-md2c6b0g/run-md2c6b0g.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a64b7ed69efe90385cd61d126df4e31b2c8aa355a139d7c274c46248ef65fdf8 +size 423703 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fed88b078b315a9453ffe250ea298b78f2e9faa0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + 5edhqpelgdcrkprudgwljs1kf7g2x14s: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/human_enhancers_cohn/split + - --kmer + - "-1" + - --run_name + - base5120_human_enhancers_cohn_lr3e-5_wd0.0_wr0.03_ep1_seed42 + - --model_max_length + - "100" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.0" + - --num_train_epochs + - "1" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.03" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3558777057280" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T11:42:07.657894Z" + writerId: 5edhqpelgdcrkprudgwljs1kf7g2x14s + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/runs/Mar24_04-42-07_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 100 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 1 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_human_enhancers_cohn_lr3e-5_wd0.0_wr0.03_ep1_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.03 +warmup_steps: + value: 0 +weight_decay: + value: 0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..11baf4a2f2dce62943c474a44db00f610eb08bc9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/output.log @@ -0,0 +1,39 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'classifier.weight', 'classifier.bias', 'bert.pooler.dense.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 22,232 + Num Epochs = 1 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 174 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +100%|██████████| 174/174 [00:30<00:00, 6.31it/s]***** Running Evaluation ***** +{'loss': 0.5599, 'learning_rate': 1.3214285714285714e-05, 'epoch': 0.57} + Num examples = 2779 + Batch size = 128 +100%|██████████| 174/174 [00:31<00:00, 6.31it/Saving model checkpoint to genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174 +Configuration saved in genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/config.json +{'eval_loss': 0.5351111888885498, 'eval_accuracy': 0.7229219143576826, 'eval_f1': 0.7227702479304581, 'eval_matthews_correlation': 0.4478109097894003, 'eval_precision': 0.7243213139100618, 'eval_recall': 0.7234903668213308, 'eval_runtime': 1.1662, 'eval_samples_per_second': 2383.002, 'eval_steps_per_second': 18.865, 'epoch': 1.0} +Model weights saved in genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174/special_tokens_map.json + + +Training completed. Do not forget to share your model on huggingface.co/models =) + + +Loading best model from genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/checkpoint-174 (score: 0.7227702479304581). +100%|██████████| 174/174 [00:33<00:00, 5.24it/s] +{'train_runtime': 33.211, 'train_samples_per_second': 669.417, 'train_steps_per_second': 5.239, 'train_loss': 0.5500602722167969, 'epoch': 1.0} +***** Running Evaluation ***** + Num examples = 2780 + Batch size = 128 +100%|██████████| 22/22 [00:01<00:00, 20.14it/s] diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f73c44273c14207c103a9707cd62aee4af195d4c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T11:42:07.657894Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/human_enhancers_cohn/split", + "--kmer", + "-1", + "--run_name", + "base5120_human_enhancers_cohn_lr3e-5_wd0.0_wr0.03_ep1_seed42", + "--model_max_length", + "100", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.0", + "--num_train_epochs", + "1", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.03", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3558777057280" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "5edhqpelgdcrkprudgwljs1kf7g2x14s" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..c3cc23178c3fdeb73fa2379eb93a8bd69171c02f --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/files/wandb-summary.json @@ -0,0 +1 @@ +{"eval/steps_per_second":19.207,"_timestamp":1.7743525685485399e+09,"train/global_step":174,"train/train_samples_per_second":669.417,"train/train_runtime":33.211,"train/train_loss":0.5500602722167969,"train/train_steps_per_second":5.239,"eval/f1":0.7276967640297661,"train/learning_rate":1.3214285714285714e-05,"eval/matthews_correlation":0.4587468093902783,"eval/samples_per_second":2427.082,"eval/precision":0.7305107639295452,"train/loss":0.5599,"eval/loss":0.5238381624221802,"_runtime":39,"_step":3,"train/total_flos":1.142477535696e+15,"eval/accuracy":0.7284172661870504,"_wandb":{"runtime":39},"train/epoch":1,"eval/runtime":1.1454,"eval/recall":0.7282416572855608} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..52c8498e0f51ec0cbef4339f88d4125482f51364 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T04:42:07.775684704-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp6bapby34/port-2764321.txt","pid":2764321,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T04:42:07.777341525-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2764321} +{"time":"2026-03-24T04:42:07.777336015-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2764321-2764360-1253215542/socket","Net":"unix"}} +{"time":"2026-03-24T04:42:07.960523088-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T04:42:08.022039816-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"p84ny1bo","id":"1(@)"} +{"time":"2026-03-24T04:42:08.414138007-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"p84ny1bo","id":"1(@)"} +{"time":"2026-03-24T04:42:48.553398282-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T04:42:48.553567071-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T04:42:48.553631961-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T04:42:48.553673631-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T04:42:48.553945869-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2764321-2764360-1253215542/socket","Net":"unix"}} +{"time":"2026-03-24T04:42:49.514536213-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T04:42:49.514604113-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T04:42:49.514638923-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..79bbdb36a25cf4732afc1364de46c9ed0f626430 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T04:42:08.022330784-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T04:42:08.413726439-07:00","level":"INFO","msg":"stream: created new stream","id":"p84ny1bo"} +{"time":"2026-03-24T04:42:08.413902248-07:00","level":"INFO","msg":"handler: started","stream_id":"p84ny1bo"} +{"time":"2026-03-24T04:42:08.414119017-07:00","level":"INFO","msg":"stream: started","id":"p84ny1bo"} +{"time":"2026-03-24T04:42:08.414133567-07:00","level":"INFO","msg":"writer: started","stream_id":"p84ny1bo"} +{"time":"2026-03-24T04:42:08.414191056-07:00","level":"INFO","msg":"sender: started","stream_id":"p84ny1bo"} +{"time":"2026-03-24T04:42:48.553594511-07:00","level":"INFO","msg":"stream: closing","id":"p84ny1bo"} +{"time":"2026-03-24T04:42:48.922428609-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T04:42:49.500484986-07:00","level":"INFO","msg":"handler: closed","stream_id":"p84ny1bo"} +{"time":"2026-03-24T04:42:49.500686495-07:00","level":"INFO","msg":"sender: closed","stream_id":"p84ny1bo"} +{"time":"2026-03-24T04:42:49.500723155-07:00","level":"INFO","msg":"stream: closed","id":"p84ny1bo"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..4b3dfe9fb4e89c0841c3caffa7d5da5d6ec12c7e --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 04:42:07,661 INFO MainThread:2764321 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 04:42:07,661 INFO MainThread:2764321 [wandb_setup.py:_flush():80] Configure stats pid to 2764321 +2026-03-24 04:42:07,661 INFO MainThread:2764321 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 04:42:07,661 INFO MainThread:2764321 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 04:42:07,661 INFO MainThread:2764321 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 04:42:07,662 INFO MainThread:2764321 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/logs/debug.log +2026-03-24 04:42:07,662 INFO MainThread:2764321 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/logs/debug-internal.log +2026-03-24 04:42:07,662 INFO MainThread:2764321 [wandb_init.py:init():841] calling init triggers +2026-03-24 04:42:07,662 INFO MainThread:2764321 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 04:42:07,662 INFO MainThread:2764321 [wandb_init.py:init():889] starting backend +2026-03-24 04:42:07,961 INFO MainThread:2764321 [wandb_init.py:init():892] sending inform_init request +2026-03-24 04:42:08,019 INFO MainThread:2764321 [wandb_init.py:init():900] backend started and connected +2026-03-24 04:42:08,026 INFO MainThread:2764321 [wandb_init.py:init():970] updated telemetry +2026-03-24 04:42:08,028 INFO MainThread:2764321 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 04:42:09,139 INFO MainThread:2764321 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 04:42:09,281 INFO MainThread:2764321 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 04:42:09,281 INFO MainThread:2764321 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 04:42:09,281 INFO MainThread:2764321 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 04:42:09,281 INFO MainThread:2764321 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 04:42:09,286 INFO MainThread:2764321 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 04:42:14,190 INFO MainThread:2764321 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/human_enhancers_cohn/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep1_seed42/runs/Mar24_04-42-07_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_human_enhancers_cohn_lr3e-5_wd0.0_wr0.03_ep1_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 100, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 04:42:48,553 INFO wandb-AsyncioManager-main:2764321 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 04:42:48,553 INFO wandb-AsyncioManager-main:2764321 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/run-p84ny1bo.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/run-p84ny1bo.wandb new file mode 100644 index 0000000000000000000000000000000000000000..a947d7643e84abda226bb0461a1cc012a6151037 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_044207-p84ny1bo/run-p84ny1bo.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2daa40d970d30f0d8757a45c02e56c2a515ae296 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + kl8qvkcgkhkznxuscjqyvcx9qdzps5co: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/human_ocr_ensembl/split + - --kmer + - "-1" + - --run_name + - base5120_human_ocr_ensembl_lr3e-5_wd0.01_wr0.05_ep3_seed42 + - --model_max_length + - "100" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.01" + - --num_train_epochs + - "3" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.05" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3558777077760" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T11:42:56.113209Z" + writerId: kl8qvkcgkhkznxuscjqyvcx9qdzps5co + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/runs/Mar24_04-42-55_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 100 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_human_ocr_ensembl_lr3e-5_wd0.01_wr0.05_ep3_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.05 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..f9c2da368bc62d600bd778e81f9938709786f38a --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/output.log @@ -0,0 +1,90 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.bias', 'classifier.bias', 'classifier.weight', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 139,804 + Num Epochs = 3 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 3,279 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 33%|███▎ | 1092/3279 [03:08<06:22, 5.72it/s]***** Running Evaluation ***** +{'loss': 0.6694, 'learning_rate': 1.8292682926829268e-05, 'epoch': 0.09} +{'loss': 0.6102, 'learning_rate': 2.9653290529695024e-05, 'epoch': 0.18} +{'loss': 0.5807, 'learning_rate': 2.8690208667736757e-05, 'epoch': 0.27} +{'loss': 0.5773, 'learning_rate': 2.772712680577849e-05, 'epoch': 0.37} +{'loss': 0.5638, 'learning_rate': 2.6764044943820228e-05, 'epoch': 0.46} +{'loss': 0.5533, 'learning_rate': 2.580096308186196e-05, 'epoch': 0.55} +{'loss': 0.5585, 'learning_rate': 2.483788121990369e-05, 'epoch': 0.64} +{'loss': 0.5493, 'learning_rate': 2.3874799357945425e-05, 'epoch': 0.73} +{'loss': 0.5406, 'learning_rate': 2.291171749598716e-05, 'epoch': 0.82} +{'loss': 0.5343, 'learning_rate': 2.1948635634028892e-05, 'epoch': 0.91} + Num examples = 17476 + Batch size = 128 + 33%|███▎ | 1093/3279 [03:16<06:22, 5.72it/Saving model checkpoint to genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-1093 +Configuration saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-1093/config.json +{'eval_loss': 0.5247864723205566, 'eval_accuracy': 0.7294575417715724, 'eval_f1': 0.7291296847420912, 'eval_matthews_correlation': 0.45931696274160566, 'eval_precision': 0.7300894822512383, 'eval_recall': 0.7292282878360884, 'eval_runtime': 7.2288, 'eval_samples_per_second': 2417.545, 'eval_steps_per_second': 18.952, 'epoch': 1.0} +Model weights saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-1093/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-1093/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-1093/special_tokens_map.json + 67%|██████▋ | 2185/3279 [06:26<03:11, 5.73it/s]***** Running Evaluation ***** +{'loss': 0.5383, 'learning_rate': 2.0985553772070626e-05, 'epoch': 1.01} +{'loss': 0.5082, 'learning_rate': 2.0022471910112362e-05, 'epoch': 1.1} +{'loss': 0.5053, 'learning_rate': 1.9059390048154096e-05, 'epoch': 1.19} +{'loss': 0.5087, 'learning_rate': 1.8096308186195826e-05, 'epoch': 1.28} +{'loss': 0.5081, 'learning_rate': 1.713322632423756e-05, 'epoch': 1.37} +{'loss': 0.5005, 'learning_rate': 1.6170144462279293e-05, 'epoch': 1.46} +{'loss': 0.4988, 'learning_rate': 1.5207062600321028e-05, 'epoch': 1.56} +{'loss': 0.4994, 'learning_rate': 1.4243980738362762e-05, 'epoch': 1.65} +{'loss': 0.4923, 'learning_rate': 1.3280898876404494e-05, 'epoch': 1.74} +{'loss': 0.4969, 'learning_rate': 1.2317817014446229e-05, 'epoch': 1.83} +{'loss': 0.4998, 'learning_rate': 1.1354735152487962e-05, 'epoch': 1.92} + Num examples = 17476 + Batch size = 128 + 67%|██████▋ | 2186/3279 [06:33<03:10, 5.73it/Saving model checkpoint to genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-2186 +Configuration saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-2186/config.json +{'eval_loss': 0.5253973603248596, 'eval_accuracy': 0.7334630350194552, 'eval_f1': 0.7331711549902049, 'eval_matthews_correlation': 0.468812256718811, 'eval_precision': 0.7350440286756788, 'eval_recall': 0.7337699592796829, 'eval_runtime': 7.2126, 'eval_samples_per_second': 2422.974, 'eval_steps_per_second': 18.994, 'epoch': 2.0} +Model weights saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-2186/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-2186/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-2186/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-1093] due to args.save_total_limit +100%|█████████▉| 3278/3279 [09:44<00:00, 5.72it/s]***** Running Evaluation ***** +{'loss': 0.4906, 'learning_rate': 1.0391653290529694e-05, 'epoch': 2.01} +{'loss': 0.4667, 'learning_rate': 9.428571428571428e-06, 'epoch': 2.1} +{'loss': 0.4585, 'learning_rate': 8.465489566613163e-06, 'epoch': 2.2} +{'loss': 0.4587, 'learning_rate': 7.502407704654897e-06, 'epoch': 2.29} +{'loss': 0.4511, 'learning_rate': 6.539325842696629e-06, 'epoch': 2.38} +{'loss': 0.4531, 'learning_rate': 5.576243980738363e-06, 'epoch': 2.47} +{'loss': 0.4514, 'learning_rate': 4.613162118780096e-06, 'epoch': 2.56} +{'loss': 0.452, 'learning_rate': 3.65008025682183e-06, 'epoch': 2.65} +{'loss': 0.4472, 'learning_rate': 2.6869983948635634e-06, 'epoch': 2.74} +{'loss': 0.45, 'learning_rate': 1.7239165329052971e-06, 'epoch': 2.84} +{'loss': 0.4487, 'learning_rate': 7.608346709470304e-07, 'epoch': 2.93} + Num examples = 17476 + Batch size = 128 +100%|██████████| 3279/3279 [09:51<00:00, 5.72it/Saving model checkpoint to genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279 +Configuration saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/config.json +{'eval_loss': 0.5309367179870605, 'eval_accuracy': 0.7359235523002976, 'eval_f1': 0.7358749762534129, 'eval_matthews_correlation': 0.4717971985436896, 'eval_precision': 0.7359348554499333, 'eval_recall': 0.7358623486652538, 'eval_runtime': 7.2043, 'eval_samples_per_second': 2425.787, 'eval_steps_per_second': 19.017, 'epoch': 3.0} +Model weights saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-2186] due to args.save_total_limit + + +Training completed. Do not forget to share your model on huggingface.co/models =) + + +Loading best model from genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/checkpoint-3279 (score: 0.7358749762534129). +100%|██████████| 3279/3279 [09:53<00:00, 5.52it/s] +{'train_runtime': 593.8685, 'train_samples_per_second': 706.237, 'train_steps_per_second': 5.521, 'train_loss': 0.5085168427248923, 'epoch': 3.0} +***** Running Evaluation ***** + Num examples = 17476 + Batch size = 128 +100%|██████████| 137/137 [00:07<00:00, 19.23it/s] diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..eef4472f821221b41fd5761f6b66fe460535d653 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T11:42:56.113209Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/human_ocr_ensembl/split", + "--kmer", + "-1", + "--run_name", + "base5120_human_ocr_ensembl_lr3e-5_wd0.01_wr0.05_ep3_seed42", + "--model_max_length", + "100", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.01", + "--num_train_epochs", + "3", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.05", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3558777077760" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "kl8qvkcgkhkznxuscjqyvcx9qdzps5co" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..4bc0921c9f93fc9d9cd5024bfaaad6e41663cff4 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/global_step":3279,"train/train_samples_per_second":706.237,"train/total_flos":2.1553112099736e+16,"eval/f1":0.7384938234495757,"train/train_loss":0.5085168427248923,"train/train_steps_per_second":5.521,"eval/runtime":7.2139,"train/learning_rate":7.608346709470304e-07,"train/epoch":3,"eval/accuracy":0.7384985122453651,"eval/steps_per_second":18.991,"_timestamp":1.7743531997533135e+09,"eval/samples_per_second":2422.552,"_wandb":{"runtime":622},"eval/recall":0.738492909515214,"eval/matthews_correlation":0.4769881305208183,"eval/loss":0.5353624820709229,"_runtime":622,"train/loss":0.4487,"train/train_runtime":593.8685,"eval/precision":0.7384952210112051,"_step":36} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..b70288a4fd9fda0d402dd9c6b8f3f5291f6272ec --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T04:42:56.23658948-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_mv5s5nm/port-2764799.txt","pid":2764799,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T04:42:56.237898993-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2764799} +{"time":"2026-03-24T04:42:56.237867613-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2764799-2765114-2895144528/socket","Net":"unix"}} +{"time":"2026-03-24T04:42:56.422681961-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T04:42:56.4977444-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"f0nbbv1z","id":"1(@)"} +{"time":"2026-03-24T04:42:56.884598754-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"f0nbbv1z","id":"1(@)"} +{"time":"2026-03-24T04:53:19.757941901-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T04:53:19.760292949-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T04:53:19.760336329-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T04:53:19.760407558-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T04:53:19.760663727-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2764799-2765114-2895144528/socket","Net":"unix"}} +{"time":"2026-03-24T04:53:20.5316482-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T04:53:20.53173812-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T04:53:20.5317706-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..45f2800f9ac1e1d67d51f50bcab5d046bab399cc --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T04:42:56.497923788-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T04:42:56.884287646-07:00","level":"INFO","msg":"stream: created new stream","id":"f0nbbv1z"} +{"time":"2026-03-24T04:42:56.884421726-07:00","level":"INFO","msg":"handler: started","stream_id":"f0nbbv1z"} +{"time":"2026-03-24T04:42:56.884580115-07:00","level":"INFO","msg":"stream: started","id":"f0nbbv1z"} +{"time":"2026-03-24T04:42:56.884632625-07:00","level":"INFO","msg":"writer: started","stream_id":"f0nbbv1z"} +{"time":"2026-03-24T04:42:56.884667114-07:00","level":"INFO","msg":"sender: started","stream_id":"f0nbbv1z"} +{"time":"2026-03-24T04:53:19.760281649-07:00","level":"INFO","msg":"stream: closing","id":"f0nbbv1z"} +{"time":"2026-03-24T04:53:20.217401487-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T04:53:20.514560567-07:00","level":"INFO","msg":"handler: closed","stream_id":"f0nbbv1z"} +{"time":"2026-03-24T04:53:20.514787686-07:00","level":"INFO","msg":"sender: closed","stream_id":"f0nbbv1z"} +{"time":"2026-03-24T04:53:20.514816376-07:00","level":"INFO","msg":"stream: closed","id":"f0nbbv1z"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..14500077240de29814129d8338b1cddbb8b8b960 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 04:42:56,117 INFO MainThread:2764799 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 04:42:56,117 INFO MainThread:2764799 [wandb_setup.py:_flush():80] Configure stats pid to 2764799 +2026-03-24 04:42:56,117 INFO MainThread:2764799 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 04:42:56,117 INFO MainThread:2764799 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 04:42:56,117 INFO MainThread:2764799 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 04:42:56,117 INFO MainThread:2764799 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/logs/debug.log +2026-03-24 04:42:56,117 INFO MainThread:2764799 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/logs/debug-internal.log +2026-03-24 04:42:56,117 INFO MainThread:2764799 [wandb_init.py:init():841] calling init triggers +2026-03-24 04:42:56,117 INFO MainThread:2764799 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 04:42:56,117 INFO MainThread:2764799 [wandb_init.py:init():889] starting backend +2026-03-24 04:42:56,423 INFO MainThread:2764799 [wandb_init.py:init():892] sending inform_init request +2026-03-24 04:42:56,495 INFO MainThread:2764799 [wandb_init.py:init():900] backend started and connected +2026-03-24 04:42:56,501 INFO MainThread:2764799 [wandb_init.py:init():970] updated telemetry +2026-03-24 04:42:56,503 INFO MainThread:2764799 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 04:42:57,246 INFO MainThread:2764799 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 04:42:57,390 INFO MainThread:2764799 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 04:42:57,390 INFO MainThread:2764799 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 04:42:57,390 INFO MainThread:2764799 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 04:42:57,390 INFO MainThread:2764799 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 04:42:57,395 INFO MainThread:2764799 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 04:43:18,670 INFO MainThread:2764799 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.05, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/human_ocr_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.05_ep3_seed42/runs/Mar24_04-42-55_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_human_ocr_ensembl_lr3e-5_wd0.01_wr0.05_ep3_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 100, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 04:53:19,758 INFO wandb-AsyncioManager-main:2764799 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 04:53:19,758 INFO wandb-AsyncioManager-main:2764799 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/run-f0nbbv1z.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/run-f0nbbv1z.wandb new file mode 100644 index 0000000000000000000000000000000000000000..b57269c79753907e11299a55133d8bbe772c7baa --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_044256-f0nbbv1z/run-f0nbbv1z.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc9ace84e32428c5eb40c7d4fde5efb1a73dd56069acbc2b7fd27065d53b1fc +size 606236 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..203de769686f6e7b699373f6591b555f767f955b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + r8tps14oz8e5hqpmh839wmtf0rirq7m7: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/demo_human_or_worm/split + - --kmer + - "-1" + - --run_name + - base5120_demo_human_or_worm_lr3e-5_wd0.03_wr0.06_ep8_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.03" + - --num_train_epochs + - "8" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.06" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3558777335808" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T11:53:30.995334Z" + writerId: r8tps14oz8e5hqpmh839wmtf0rirq7m7 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/runs/Mar24_04-53-30_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 8 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_demo_human_or_worm_lr3e-5_wd0.03_wr0.06_ep8_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.06 +warmup_steps: + value: 0 +weight_decay: + value: 0.03 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7e1576626b525a3d300a20f50024697cfb27e68a --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/output.log @@ -0,0 +1,158 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'classifier.weight', 'bert.pooler.dense.bias', 'classifier.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 80,000 + Num Epochs = 8 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 5,000 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 12%|█▎ | 625/5000 [03:54<27:34, 2.64it/s]***** Running Evaluation ***** +{'loss': 0.4352, 'learning_rate': 9.999999999999999e-06, 'epoch': 0.16} +{'loss': 0.1944, 'learning_rate': 1.9999999999999998e-05, 'epoch': 0.32} +{'loss': 0.1817, 'learning_rate': 3e-05, 'epoch': 0.48} +{'loss': 0.1653, 'learning_rate': 2.9361702127659574e-05, 'epoch': 0.64} +{'loss': 0.15, 'learning_rate': 2.872340425531915e-05, 'epoch': 0.8} +{'loss': 0.1423, 'learning_rate': 2.8085106382978723e-05, 'epoch': 0.96} + Num examples = 10000 + Batch size = 128 + 12%|█▎ | 625/5000 [04:04<27:34, 2.64itSaving model checkpoint to genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-625 +Configuration saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-625/config.json +{'eval_loss': 0.14096176624298096, 'eval_accuracy': 0.9492, 'eval_f1': 0.9491973663914737, 'eval_matthews_correlation': 0.8987776409510696, 'eval_precision': 0.9494807442016145, 'eval_recall': 0.949296915548862, 'eval_runtime': 9.3786, 'eval_samples_per_second': 1066.258, 'eval_steps_per_second': 8.423, 'epoch': 1.0} +Model weights saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-625/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-625/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-625/special_tokens_map.json + 25%|██▌ | 1250/5000 [08:02<23:42, 2.64it/s]***** Running Evaluation ***** +{'loss': 0.1286, 'learning_rate': 2.74468085106383e-05, 'epoch': 1.12} +{'loss': 0.1227, 'learning_rate': 2.6808510638297873e-05, 'epoch': 1.28} +{'loss': 0.1128, 'learning_rate': 2.617021276595745e-05, 'epoch': 1.44} +{'loss': 0.1133, 'learning_rate': 2.5531914893617022e-05, 'epoch': 1.6} +{'loss': 0.1108, 'learning_rate': 2.4893617021276595e-05, 'epoch': 1.76} +{'loss': 0.1174, 'learning_rate': 2.4255319148936168e-05, 'epoch': 1.92} + Num examples = 10000 + Batch size = 128 + 25%|██▌ | 1250/5000 [08:12<23:42, 2.64iSaving model checkpoint to genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1250 +Configuration saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1250/config.json +{'eval_loss': 0.14714133739471436, 'eval_accuracy': 0.9448, 'eval_f1': 0.9447548116457338, 'eval_matthews_correlation': 0.8919111732730618, 'eval_precision': 0.9468665229355923, 'eval_recall': 0.9450465072788566, 'eval_runtime': 9.4606, 'eval_samples_per_second': 1057.01, 'eval_steps_per_second': 8.35, 'epoch': 2.0} +Model weights saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1250/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1250/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1250/special_tokens_map.json + 38%|███▊ | 1875/5000 [12:09<19:38, 2.65it/s]***** Running Evaluation ***** +{'loss': 0.0896, 'learning_rate': 2.3617021276595744e-05, 'epoch': 2.08} +{'loss': 0.0771, 'learning_rate': 2.297872340425532e-05, 'epoch': 2.24} +{'loss': 0.0849, 'learning_rate': 2.2340425531914894e-05, 'epoch': 2.4} +{'loss': 0.0786, 'learning_rate': 2.170212765957447e-05, 'epoch': 2.56} +{'loss': 0.0872, 'learning_rate': 2.1063829787234043e-05, 'epoch': 2.72} +{'loss': 0.0827, 'learning_rate': 2.0425531914893616e-05, 'epoch': 2.88} + Num examples = 10000 + Batch size = 128 + 38%|███▊ | 1875/5000 [12:19<19:38, 2.65iSaving model checkpoint to genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1875 +Configuration saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1875/config.json +{'eval_loss': 0.12726238369941711, 'eval_accuracy': 0.9533, 'eval_f1': 0.9532990543058497, 'eval_matthews_correlation': 0.9066100253951647, 'eval_precision': 0.9532948131792527, 'eval_recall': 0.9533152124454098, 'eval_runtime': 9.3211, 'eval_samples_per_second': 1072.834, 'eval_steps_per_second': 8.475, 'epoch': 3.0} +Model weights saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1875/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1875/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1875/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-625] due to args.save_total_limit +Deleting older checkpoint [genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1250] due to args.save_total_limit + 50%|█████ | 2500/5000 [16:17<15:41, 2.65it/s]***** Running Evaluation ***** +{'loss': 0.0769, 'learning_rate': 1.978723404255319e-05, 'epoch': 3.04} +{'loss': 0.0553, 'learning_rate': 1.914893617021277e-05, 'epoch': 3.2} +{'loss': 0.0561, 'learning_rate': 1.8510638297872342e-05, 'epoch': 3.36} +{'loss': 0.0551, 'learning_rate': 1.7872340425531915e-05, 'epoch': 3.52} +{'loss': 0.0555, 'learning_rate': 1.723404255319149e-05, 'epoch': 3.68} +{'loss': 0.0574, 'learning_rate': 1.6595744680851064e-05, 'epoch': 3.84} +{'loss': 0.0562, 'learning_rate': 1.5957446808510637e-05, 'epoch': 4.0} + Num examples = 10000 + Batch size = 128 + 50%|█████ | 2500/5000 [16:27<15:41, 2.65iSaving model checkpoint to genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-2500 +Configuration saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-2500/config.json +{'eval_loss': 0.16251493990421295, 'eval_accuracy': 0.9532, 'eval_f1': 0.9531823797204316, 'eval_matthews_correlation': 0.9066581426052277, 'eval_precision': 0.9535473452173776, 'eval_recall': 0.9531109024342193, 'eval_runtime': 9.3211, 'eval_samples_per_second': 1072.831, 'eval_steps_per_second': 8.475, 'epoch': 4.0} +Model weights saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-2500/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-2500/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-2500/special_tokens_map.json + 62%|██████▎ | 3125/5000 [20:25<11:53, 2.63it/s]***** Running Evaluation ***** +{'loss': 0.0343, 'learning_rate': 1.531914893617021e-05, 'epoch': 4.16} +{'loss': 0.0289, 'learning_rate': 1.4680851063829787e-05, 'epoch': 4.32} +{'loss': 0.0365, 'learning_rate': 1.4042553191489362e-05, 'epoch': 4.48} +{'loss': 0.0336, 'learning_rate': 1.3404255319148936e-05, 'epoch': 4.64} +{'loss': 0.0362, 'learning_rate': 1.2765957446808511e-05, 'epoch': 4.8} +{'loss': 0.0355, 'learning_rate': 1.2127659574468084e-05, 'epoch': 4.96} + Num examples = 10000 + Batch size = 128 + 62%|██████▎ | 3125/5000 [20:34<11:53, 2.63iSaving model checkpoint to genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3125 +Configuration saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3125/config.json +{'eval_loss': 0.17249269783496857, 'eval_accuracy': 0.9553, 'eval_f1': 0.9552997206232539, 'eval_matthews_correlation': 0.9106381321410131, 'eval_precision': 0.9553088212352849, 'eval_recall': 0.9553293111362456, 'eval_runtime': 9.36, 'eval_samples_per_second': 1068.377, 'eval_steps_per_second': 8.44, 'epoch': 5.0} +Model weights saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3125/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3125/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3125/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-1875] due to args.save_total_limit +Deleting older checkpoint [genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-2500] due to args.save_total_limit + 75%|███████▌ | 3750/5000 [24:33<07:51, 2.65it/s]***** Running Evaluation ***** +{'loss': 0.0224, 'learning_rate': 1.148936170212766e-05, 'epoch': 5.12} +{'loss': 0.0207, 'learning_rate': 1.0851063829787235e-05, 'epoch': 5.28} +{'loss': 0.0194, 'learning_rate': 1.0212765957446808e-05, 'epoch': 5.44} +{'loss': 0.0188, 'learning_rate': 9.574468085106385e-06, 'epoch': 5.6} +{'loss': 0.0197, 'learning_rate': 8.936170212765958e-06, 'epoch': 5.76} +{'loss': 0.0183, 'learning_rate': 8.297872340425532e-06, 'epoch': 5.92} + Num examples = 10000 + Batch size = 128 + 75%|███████▌ | 3750/5000 [24:42<07:51, 2.65iSaving model checkpoint to genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3750 +Configuration saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3750/config.json +{'eval_loss': 0.24090713262557983, 'eval_accuracy': 0.9552, 'eval_f1': 0.9551867424458852, 'eval_matthews_correlation': 0.91057237575741, 'eval_precision': 0.9554460306619799, 'eval_recall': 0.9551264011936584, 'eval_runtime': 9.3493, 'eval_samples_per_second': 1069.598, 'eval_steps_per_second': 8.45, 'epoch': 6.0} +Model weights saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3750/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3750/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3750/special_tokens_map.json + 88%|████████▊ | 4375/5000 [28:40<03:55, 2.66it/s]***** Running Evaluation ***** +{'loss': 0.0129, 'learning_rate': 7.659574468085105e-06, 'epoch': 6.08} +{'loss': 0.0136, 'learning_rate': 7.021276595744681e-06, 'epoch': 6.24} +{'loss': 0.0076, 'learning_rate': 6.3829787234042555e-06, 'epoch': 6.4} +{'loss': 0.0079, 'learning_rate': 5.74468085106383e-06, 'epoch': 6.56} +{'loss': 0.0105, 'learning_rate': 5.106382978723404e-06, 'epoch': 6.72} +{'loss': 0.0083, 'learning_rate': 4.468085106382979e-06, 'epoch': 6.88} + Num examples = 10000 + Batch size = 128 + 88%|████████▊ | 4375/5000 [28:49<03:55, 2.66iSaving model checkpoint to genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375 +Configuration saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/config.json +{'eval_loss': 0.2684195339679718, 'eval_accuracy': 0.9557, 'eval_f1': 0.9556951153864713, 'eval_matthews_correlation': 0.9114136506343465, 'eval_precision': 0.9557403251037204, 'eval_recall': 0.9556733279930717, 'eval_runtime': 9.3532, 'eval_samples_per_second': 1069.157, 'eval_steps_per_second': 8.446, 'epoch': 7.0} +Model weights saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3125] due to args.save_total_limit +Deleting older checkpoint [genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-3750] due to args.save_total_limit +100%|██████████| 5000/5000 [32:47<00:00, 2.66it/s]***** Running Evaluation ***** +{'loss': 0.0094, 'learning_rate': 3.829787234042553e-06, 'epoch': 7.04} +{'loss': 0.0062, 'learning_rate': 3.1914893617021277e-06, 'epoch': 7.2} +{'loss': 0.0035, 'learning_rate': 2.5595744680851062e-06, 'epoch': 7.36} +{'loss': 0.0047, 'learning_rate': 1.921276595744681e-06, 'epoch': 7.52} +{'loss': 0.0048, 'learning_rate': 1.2829787234042554e-06, 'epoch': 7.68} +{'loss': 0.0047, 'learning_rate': 6.446808510638298e-07, 'epoch': 7.84} +{'loss': 0.0061, 'learning_rate': 6.3829787234042555e-09, 'epoch': 8.0} + Num examples = 10000 + Batch size = 128 +100%|██████████| 5000/5000 [32:57<00:00, 2.66iSaving model checkpoint to genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-5000 +Configuration saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-5000/config.json +{'eval_loss': 0.2790652811527252, 'eval_accuracy': 0.9548, 'eval_f1': 0.9547964994409167, 'eval_matthews_correlation': 0.9095991869188845, 'eval_precision': 0.9548140029013661, 'eval_recall': 0.9547851844740392, 'eval_runtime': 9.3116, 'eval_samples_per_second': 1073.93, 'eval_steps_per_second': 8.484, 'epoch': 8.0} +Model weights saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-5000/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-5000/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-5000/special_tokens_map.json + + +Training completed. Do not forget to share your model on huggingface.co/models =) + + +Loading best model from genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-4375 (score: 0.9556951153864713). +100%|██████████| 5000/5000 [33:01<00:00, 2.66it/s]Deleting older checkpoint [genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/checkpoint-5000] due to args.save_total_limit +{'train_runtime': 1981.3756, 'train_samples_per_second': 323.008, 'train_steps_per_second': 2.523, 'train_loss': 0.06623276020288467, 'epoch': 8.0} +100%|██████████| 5000/5000 [33:01<00:00, 2.52it/s] +***** Running Evaluation ***** + Num examples = 10000 + Batch size = 128 +100%|██████████| 79/79 [00:09<00:00, 8.65it/s] diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b7a4af6560c20b38c7e323501cf88807fa0b582c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T11:53:30.995334Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/demo_human_or_worm/split", + "--kmer", + "-1", + "--run_name", + "base5120_demo_human_or_worm_lr3e-5_wd0.03_wr0.06_ep8_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.03", + "--num_train_epochs", + "8", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.06", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3558777335808" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "r8tps14oz8e5hqpmh839wmtf0rirq7m7" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..fcb779a62fb0ee222cbe069a47864f566f7decf5 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/train_runtime":1981.3756,"_timestamp":1.7743552575207174e+09,"train/train_loss":0.06623276020288467,"eval/samples_per_second":1080.186,"train/global_step":5000,"_wandb":{"runtime":2044},"eval/steps_per_second":8.533,"train/epoch":8,"eval/precision":0.957997831991328,"eval/matthews_correlation":0.9160011600280884,"_runtime":2044,"train/learning_rate":6.3829787234042555e-09,"train/loss":0.0061,"eval/loss":0.24703867733478546,"eval/runtime":9.2577,"eval/accuracy":0.958,"_step":59,"eval/f1":0.957999621996598,"train/train_samples_per_second":323.008,"train/train_steps_per_second":2.523,"eval/recall":0.958003328053249,"train/total_flos":6.64355414784e+16} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..7f693df1f1fd2473bd79131562c86b467ee690ee --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T04:53:31.201890171-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpgvhaf3cb/port-2769138.txt","pid":2769138,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T04:53:31.203589232-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2769138} +{"time":"2026-03-24T04:53:31.203469202-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2769138-2769191-3483499222/socket","Net":"unix"}} +{"time":"2026-03-24T04:53:31.374850432-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T04:53:31.463378293-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"ljb653t8","id":"1(@)"} +{"time":"2026-03-24T04:53:31.900643051-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ljb653t8","id":"1(@)"} +{"time":"2026-03-24T05:27:37.52697894-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T05:27:37.527178589-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T05:27:37.527211599-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T05:27:37.527293379-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T05:27:37.527507208-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2769138-2769191-3483499222/socket","Net":"unix"}} +{"time":"2026-03-24T05:27:38.947853214-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T05:27:38.947942773-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T05:27:38.947998453-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..516e9de15c4d28659833999604720f998df5cb9b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T04:53:31.463766361-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T04:53:31.900250733-07:00","level":"INFO","msg":"stream: created new stream","id":"ljb653t8"} +{"time":"2026-03-24T04:53:31.900454092-07:00","level":"INFO","msg":"handler: started","stream_id":"ljb653t8"} +{"time":"2026-03-24T04:53:31.900620662-07:00","level":"INFO","msg":"stream: started","id":"ljb653t8"} +{"time":"2026-03-24T04:53:31.900652502-07:00","level":"INFO","msg":"writer: started","stream_id":"ljb653t8"} +{"time":"2026-03-24T04:53:31.900673842-07:00","level":"INFO","msg":"sender: started","stream_id":"ljb653t8"} +{"time":"2026-03-24T05:27:37.527195409-07:00","level":"INFO","msg":"stream: closing","id":"ljb653t8"} +{"time":"2026-03-24T05:27:37.961432332-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T05:27:38.937926804-07:00","level":"INFO","msg":"handler: closed","stream_id":"ljb653t8"} +{"time":"2026-03-24T05:27:38.938264512-07:00","level":"INFO","msg":"sender: closed","stream_id":"ljb653t8"} +{"time":"2026-03-24T05:27:38.938315812-07:00","level":"INFO","msg":"stream: closed","id":"ljb653t8"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..f03f2cac6af40def3f19ae45d823bcda25f8f4ca --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 04:53:31,001 INFO MainThread:2769138 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 04:53:31,001 INFO MainThread:2769138 [wandb_setup.py:_flush():80] Configure stats pid to 2769138 +2026-03-24 04:53:31,001 INFO MainThread:2769138 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 04:53:31,001 INFO MainThread:2769138 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 04:53:31,001 INFO MainThread:2769138 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 04:53:31,001 INFO MainThread:2769138 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/logs/debug.log +2026-03-24 04:53:31,001 INFO MainThread:2769138 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/logs/debug-internal.log +2026-03-24 04:53:31,001 INFO MainThread:2769138 [wandb_init.py:init():841] calling init triggers +2026-03-24 04:53:31,001 INFO MainThread:2769138 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 04:53:31,001 INFO MainThread:2769138 [wandb_init.py:init():889] starting backend +2026-03-24 04:53:31,376 INFO MainThread:2769138 [wandb_init.py:init():892] sending inform_init request +2026-03-24 04:53:31,456 INFO MainThread:2769138 [wandb_init.py:init():900] backend started and connected +2026-03-24 04:53:31,467 INFO MainThread:2769138 [wandb_init.py:init():970] updated telemetry +2026-03-24 04:53:31,468 INFO MainThread:2769138 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 04:53:32,554 INFO MainThread:2769138 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 04:53:32,869 INFO MainThread:2769138 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 04:53:32,869 INFO MainThread:2769138 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 04:53:32,869 INFO MainThread:2769138 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 04:53:32,869 INFO MainThread:2769138 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 04:53:32,878 INFO MainThread:2769138 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 04:54:26,322 INFO MainThread:2769138 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.03, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 8, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.06, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/demo_human_or_worm/DNAbert2_Pretrained/lr3e-5_wd0.03_wr0.06_ep8_seed42/runs/Mar24_04-53-30_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_demo_human_or_worm_lr3e-5_wd0.03_wr0.06_ep8_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 05:27:37,527 INFO wandb-AsyncioManager-main:2769138 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 05:27:37,528 INFO wandb-AsyncioManager-main:2769138 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/run-ljb653t8.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/run-ljb653t8.wandb new file mode 100644 index 0000000000000000000000000000000000000000..5c328b5d76c92edeae2cebdbc42b0d0089cac156 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_045330-ljb653t8/run-ljb653t8.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e69337db50d25f3c650b7e5520e1ad22944daf53535db9810f35329c504a2108 +size 1225798 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a1a65fc75ed29453bde8ad32893850a2bd48076e --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + 8kxy12xwhax2huanedquf3eavd1b3gai: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3558778142720" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T12:27:50.443479Z" + writerId: 8kxy12xwhax2huanedquf3eavd1b3gai + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_05-27-49_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 4 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.15 +warmup_steps: + value: 0 +weight_decay: + value: 0.05 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..ed048435ad336e758bd5e316bcff326a441edd6f --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/output.log @@ -0,0 +1,133 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.bias', 'bert.pooler.dense.bias', 'classifier.weight', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 176 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/176 [00:00 + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 355, in forward + attention_probs = nn.functional.softmax(attention_scores, dim=-1) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 2137, in softmax + ret = input.softmax(dim) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 46.32 GiB memory in use. Of the allocated memory 44.69 GiB is allocated by PyTorch, and 1.31 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 355, in forward + attention_probs = nn.functional.softmax(attention_scores, dim=-1) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 2137, in softmax + ret = input.softmax(dim) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 46.32 GiB memory in use. Of the allocated memory 44.69 GiB is allocated by PyTorch, and 1.31 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3c0bd3c1718721df407d13c1de5fdbebeec54ee9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T12:27:50.443479Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3558778142720" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "8kxy12xwhax2huanedquf3eavd1b3gai" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..3756af571931fec44efdaa1ef6738731a7dc107b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":12,"_wandb":{"runtime":12}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..30205e0f234f9f0073f4955b56efb0b08cf3a143 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T05:27:50.650648219-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpq1e6fu96/port-2782250.txt","pid":2782250,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T05:27:50.651782483-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2782250} +{"time":"2026-03-24T05:27:50.651717413-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2782250-2782295-46840068/socket","Net":"unix"}} +{"time":"2026-03-24T05:27:50.822724772-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T05:27:50.924078422-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"4zki9d15","id":"1(@)"} +{"time":"2026-03-24T05:27:51.361686557-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"4zki9d15","id":"1(@)"} +{"time":"2026-03-24T05:28:04.901191322-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T05:28:04.901352371-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T05:28:04.901337231-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T05:28:04.9015683-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T05:28:04.90156421-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2782250-2782295-46840068/socket","Net":"unix"}} +{"time":"2026-03-24T05:28:06.183834592-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T05:28:06.183884322-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T05:28:06.183936821-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..40c6792ee2cd66463373dc737a55d901f368b8a2 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T05:27:50.92443321-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T05:27:51.360630943-07:00","level":"INFO","msg":"stream: created new stream","id":"4zki9d15"} +{"time":"2026-03-24T05:27:51.361423189-07:00","level":"INFO","msg":"handler: started","stream_id":"4zki9d15"} +{"time":"2026-03-24T05:27:51.361600998-07:00","level":"INFO","msg":"sender: started","stream_id":"4zki9d15"} +{"time":"2026-03-24T05:27:51.361554238-07:00","level":"INFO","msg":"stream: started","id":"4zki9d15"} +{"time":"2026-03-24T05:27:51.361614988-07:00","level":"INFO","msg":"writer: started","stream_id":"4zki9d15"} +{"time":"2026-03-24T05:28:04.901379541-07:00","level":"INFO","msg":"stream: closing","id":"4zki9d15"} +{"time":"2026-03-24T05:28:05.725956988-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T05:28:06.119559966-07:00","level":"INFO","msg":"handler: closed","stream_id":"4zki9d15"} +{"time":"2026-03-24T05:28:06.119764155-07:00","level":"INFO","msg":"sender: closed","stream_id":"4zki9d15"} +{"time":"2026-03-24T05:28:06.119784745-07:00","level":"INFO","msg":"stream: closed","id":"4zki9d15"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..53b0ef434a7203ebcee5f5d8ca37b8043cdb24e5 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 05:27:50,449 INFO MainThread:2782250 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 05:27:50,449 INFO MainThread:2782250 [wandb_setup.py:_flush():80] Configure stats pid to 2782250 +2026-03-24 05:27:50,449 INFO MainThread:2782250 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 05:27:50,449 INFO MainThread:2782250 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 05:27:50,449 INFO MainThread:2782250 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 05:27:50,449 INFO MainThread:2782250 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/logs/debug.log +2026-03-24 05:27:50,449 INFO MainThread:2782250 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/logs/debug-internal.log +2026-03-24 05:27:50,449 INFO MainThread:2782250 [wandb_init.py:init():841] calling init triggers +2026-03-24 05:27:50,449 INFO MainThread:2782250 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 05:27:50,450 INFO MainThread:2782250 [wandb_init.py:init():889] starting backend +2026-03-24 05:27:50,824 INFO MainThread:2782250 [wandb_init.py:init():892] sending inform_init request +2026-03-24 05:27:50,917 INFO MainThread:2782250 [wandb_init.py:init():900] backend started and connected +2026-03-24 05:27:50,926 INFO MainThread:2782250 [wandb_init.py:init():970] updated telemetry +2026-03-24 05:27:50,928 INFO MainThread:2782250 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 05:27:51,902 INFO MainThread:2782250 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 05:27:52,251 INFO MainThread:2782250 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 05:27:52,251 INFO MainThread:2782250 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 05:27:52,251 INFO MainThread:2782250 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 05:27:52,251 INFO MainThread:2782250 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 05:27:52,259 INFO MainThread:2782250 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 05:28:04,221 INFO MainThread:2782250 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.15, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_05-27-49_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 05:28:04,902 INFO wandb-AsyncioManager-main:2782250 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 05:28:04,902 INFO wandb-AsyncioManager-main:2782250 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/run-4zki9d15.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/run-4zki9d15.wandb new file mode 100644 index 0000000000000000000000000000000000000000..8f30c65dc077dcf1a432413bed3a42e3c3af829c Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_052750-4zki9d15/run-4zki9d15.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8137fca029aba45f45793115e5cf8bc6d36b881 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + cv0r89g9ise0nvsmwie3e27sj6oi8rnx: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559220674560" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:12.759024Z" + writerId: cv0r89g9ise0nvsmwie3e27sj6oi8rnx + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-10_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 4 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.15 +warmup_steps: + value: 0 +weight_decay: + value: 0.05 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..0f21f4e1bc71ce057dd8f981d07f4f8f597d8e3f --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/output.log @@ -0,0 +1,129 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.bias', 'classifier.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 176 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/176 [00:00 + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 352, in forward + attention_scores = attention_scores + attention_mask +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.42 GiB is free. Process 2988293 has 21.00 GiB memory in use. Including non-PyTorch memory, this process has 13.12 GiB memory in use. Process 2988061 has 5.62 GiB memory in use. Process 2988353 has 474.00 MiB memory in use. Process 2988034 has 260.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 62.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 62.00 MiB memory in use. Process 2988636 has 62.00 MiB memory in use. Process 2988454 has 62.00 MiB memory in use. Process 2988549 has 62.00 MiB memory in use. Process 2988575 has 18.00 MiB memory in use. Process 2988225 has 18.00 MiB memory in use. Process 2988538 has 18.00 MiB memory in use. Process 2989222 has 10.00 MiB memory in use. Process 2988129 has 10.00 MiB memory in use. Process 2989200 has 10.00 MiB memory in use. Process 2988790 has 10.00 MiB memory in use. Process 2989488 has 10.00 MiB memory in use. Process 2988709 has 16.00 MiB memory in use. Process 2988791 has 10.00 MiB memory in use. Process 2989081 has 10.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Of the allocated memory 11.68 GiB is allocated by PyTorch, and 1.13 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 352, in forward + attention_scores = attention_scores + attention_mask +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.42 GiB is free. Process 2988293 has 21.00 GiB memory in use. Including non-PyTorch memory, this process has 13.12 GiB memory in use. Process 2988061 has 5.62 GiB memory in use. Process 2988353 has 474.00 MiB memory in use. Process 2988034 has 260.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 62.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 62.00 MiB memory in use. Process 2988636 has 62.00 MiB memory in use. Process 2988454 has 62.00 MiB memory in use. Process 2988549 has 62.00 MiB memory in use. Process 2988575 has 18.00 MiB memory in use. Process 2988225 has 18.00 MiB memory in use. Process 2988538 has 18.00 MiB memory in use. Process 2989222 has 10.00 MiB memory in use. Process 2988129 has 10.00 MiB memory in use. Process 2989200 has 10.00 MiB memory in use. Process 2988790 has 10.00 MiB memory in use. Process 2989488 has 10.00 MiB memory in use. Process 2988709 has 16.00 MiB memory in use. Process 2988791 has 10.00 MiB memory in use. Process 2989081 has 10.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Of the allocated memory 11.68 GiB is allocated by PyTorch, and 1.13 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..bdb5300420a12913f14ebbbc1f2f448ce492ef6e --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:12.759024Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559220674560" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "cv0r89g9ise0nvsmwie3e27sj6oi8rnx" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..20b4385da45dc73f28a83368c93d1645d8267473 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":8,"_wandb":{"runtime":8}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..d1fe3a988651d7f6933636186792692b73537736 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/logs/debug-core.log @@ -0,0 +1,28 @@ +{"time":"2026-03-24T13:29:12.578343138-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpb0ag5brl/port-2988293.txt","pid":2988293,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:12.579814699-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988293} +{"time":"2026-03-24T13:29:12.579807359-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988293-2993387-3073390821/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:12.730261372-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:12.80843658-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"jif3dch5","id":"1(@)"} +{"time":"2026-03-24T13:29:12.971478289-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp3ygkt1nk/port-2988247.txt","pid":2988247,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:12.97296959-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988247} +{"time":"2026-03-24T13:29:12.972962081-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988247-2993683-2593588118/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:13.126772563-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:13.207244979-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"2rwws7uq","id":"1(@)"} +{"time":"2026-03-24T13:29:16.012298206-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"jif3dch5","id":"1(@)"} +{"time":"2026-03-24T13:29:16.629960644-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"2rwws7uq","id":"1(@)"} +{"time":"2026-03-24T13:29:25.982411196-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:25.982531865-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:25.982529585-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:25.982635544-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:25.982749004-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988293-2993387-3073390821/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:26.007841116-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:26.008091164-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:26.008155894-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:26.008165324-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:26.008337723-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988247-2993683-2593588118/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:27.210320586-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:27.210381796-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:27.210401286-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:30.367245973-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:30.367296483-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:30.367315263-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..0e040ccd354ce023c56e2866c10e6ee714189986 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:29:13.207596797-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:16.629512117-07:00","level":"INFO","msg":"stream: created new stream","id":"2rwws7uq"} +{"time":"2026-03-24T13:29:16.629731305-07:00","level":"INFO","msg":"handler: started","stream_id":"2rwws7uq"} +{"time":"2026-03-24T13:29:16.629933364-07:00","level":"INFO","msg":"stream: started","id":"2rwws7uq"} +{"time":"2026-03-24T13:29:16.629979784-07:00","level":"INFO","msg":"sender: started","stream_id":"2rwws7uq"} +{"time":"2026-03-24T13:29:16.629979214-07:00","level":"INFO","msg":"writer: started","stream_id":"2rwws7uq"} +{"time":"2026-03-24T13:29:26.007967875-07:00","level":"INFO","msg":"stream: closing","id":"2rwws7uq"} +{"time":"2026-03-24T13:29:26.819140052-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:27.181689784-07:00","level":"INFO","msg":"handler: closed","stream_id":"2rwws7uq"} +{"time":"2026-03-24T13:29:27.181959743-07:00","level":"INFO","msg":"sender: closed","stream_id":"2rwws7uq"} +{"time":"2026-03-24T13:29:27.181985703-07:00","level":"INFO","msg":"stream: closed","id":"2rwws7uq"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d955dff8fdce0a9042e3565b74db8403b7ab8812 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:29:12,768 INFO MainThread:2988247 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:12,768 INFO MainThread:2988247 [wandb_setup.py:_flush():80] Configure stats pid to 2988247 +2026-03-24 13:29:12,768 INFO MainThread:2988247 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:12,768 INFO MainThread:2988247 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:12,769 INFO MainThread:2988247 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:12,769 INFO MainThread:2988247 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/logs/debug.log +2026-03-24 13:29:12,769 INFO MainThread:2988247 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/logs/debug-internal.log +2026-03-24 13:29:12,769 INFO MainThread:2988247 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:12,770 INFO MainThread:2988247 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:12,770 INFO MainThread:2988247 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:13,126 INFO MainThread:2988247 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:13,211 INFO MainThread:2988247 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:13,219 INFO MainThread:2988247 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:13,221 INFO MainThread:2988247 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:17,241 INFO MainThread:2988247 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:17,392 INFO MainThread:2988247 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:17,393 INFO MainThread:2988247 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:17,393 INFO MainThread:2988247 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:17,393 INFO MainThread:2988247 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:17,397 INFO MainThread:2988247 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:23,501 INFO MainThread:2988247 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.15, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-10_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 13:29:26,007 INFO wandb-AsyncioManager-main:2988247 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:26,007 INFO wandb-AsyncioManager-main:2988247 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/run-2rwws7uq.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/run-2rwws7uq.wandb new file mode 100644 index 0000000000000000000000000000000000000000..843d807e4c02ca097ad3c5aade122aede6df3e0c Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-2rwws7uq/run-2rwws7uq.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f5644371218cc16b032f14fc2d56dfeef2f5b82 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + e7bn866j81j8lfrnwtqn9u5ggx3sfiod: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559220678656" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:12.235322Z" + writerId: e7bn866j81j8lfrnwtqn9u5ggx3sfiod + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-10_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 4 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.15 +warmup_steps: + value: 0 +weight_decay: + value: 0.05 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..506a00e4d6849573791dc39d994e41f14de22aa9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/output.log @@ -0,0 +1,145 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'bert.pooler.dense.weight', 'classifier.bias', 'bert.pooler.dense.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 176 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/176 [00:00 + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 359, in forward + attention_probs = self.dropout(attention_probs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/dropout.py", line 70, in forward + return F.dropout(input, self.p, self.training, self.inplace) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 1422, in dropout + _VF.dropout_(input, p, training) if inplace else _VF.dropout(input, p, training) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.42 GiB is free. Including non-PyTorch memory, this process has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 5.62 GiB memory in use. Process 2988353 has 474.00 MiB memory in use. Process 2988034 has 260.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 62.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 62.00 MiB memory in use. Process 2988636 has 62.00 MiB memory in use. Process 2988454 has 62.00 MiB memory in use. Process 2988549 has 62.00 MiB memory in use. Process 2988575 has 18.00 MiB memory in use. Process 2988225 has 18.00 MiB memory in use. Process 2988538 has 18.00 MiB memory in use. Process 2989222 has 10.00 MiB memory in use. Process 2988129 has 10.00 MiB memory in use. Process 2989200 has 10.00 MiB memory in use. Process 2988790 has 10.00 MiB memory in use. Process 2989488 has 10.00 MiB memory in use. Process 2988709 has 16.00 MiB memory in use. Process 2988791 has 10.00 MiB memory in use. Process 2989081 has 10.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Of the allocated memory 18.92 GiB is allocated by PyTorch, and 1.77 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 359, in forward + attention_probs = self.dropout(attention_probs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/dropout.py", line 70, in forward + return F.dropout(input, self.p, self.training, self.inplace) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 1422, in dropout + _VF.dropout_(input, p, training) if inplace else _VF.dropout(input, p, training) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.42 GiB is free. Including non-PyTorch memory, this process has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 5.62 GiB memory in use. Process 2988353 has 474.00 MiB memory in use. Process 2988034 has 260.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 62.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 62.00 MiB memory in use. Process 2988636 has 62.00 MiB memory in use. Process 2988454 has 62.00 MiB memory in use. Process 2988549 has 62.00 MiB memory in use. Process 2988575 has 18.00 MiB memory in use. Process 2988225 has 18.00 MiB memory in use. Process 2988538 has 18.00 MiB memory in use. Process 2989222 has 10.00 MiB memory in use. Process 2988129 has 10.00 MiB memory in use. Process 2989200 has 10.00 MiB memory in use. Process 2988790 has 10.00 MiB memory in use. Process 2989488 has 10.00 MiB memory in use. Process 2988709 has 16.00 MiB memory in use. Process 2988791 has 10.00 MiB memory in use. Process 2989081 has 10.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Of the allocated memory 18.92 GiB is allocated by PyTorch, and 1.77 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5b31e685801d21f5abaf68e741b3fd50421c4708 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:12.235322Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559220678656" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "e7bn866j81j8lfrnwtqn9u5ggx3sfiod" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..49f1c97bdc4f4b441dcae913d01d8e867efca440 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":9,"_wandb":{"runtime":9}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..d1fe3a988651d7f6933636186792692b73537736 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/logs/debug-core.log @@ -0,0 +1,28 @@ +{"time":"2026-03-24T13:29:12.578343138-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpb0ag5brl/port-2988293.txt","pid":2988293,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:12.579814699-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988293} +{"time":"2026-03-24T13:29:12.579807359-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988293-2993387-3073390821/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:12.730261372-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:12.80843658-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"jif3dch5","id":"1(@)"} +{"time":"2026-03-24T13:29:12.971478289-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp3ygkt1nk/port-2988247.txt","pid":2988247,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:12.97296959-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988247} +{"time":"2026-03-24T13:29:12.972962081-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988247-2993683-2593588118/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:13.126772563-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:13.207244979-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"2rwws7uq","id":"1(@)"} +{"time":"2026-03-24T13:29:16.012298206-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"jif3dch5","id":"1(@)"} +{"time":"2026-03-24T13:29:16.629960644-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"2rwws7uq","id":"1(@)"} +{"time":"2026-03-24T13:29:25.982411196-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:25.982531865-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:25.982529585-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:25.982635544-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:25.982749004-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988293-2993387-3073390821/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:26.007841116-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:26.008091164-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:26.008155894-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:26.008165324-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:26.008337723-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988247-2993683-2593588118/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:27.210320586-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:27.210381796-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:27.210401286-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:30.367245973-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:30.367296483-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:30.367315263-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..93445be4874bebfe5435194f61b9894dadc4553f --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:29:12.81200342-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:16.011784719-07:00","level":"INFO","msg":"stream: created new stream","id":"jif3dch5"} +{"time":"2026-03-24T13:29:16.012103797-07:00","level":"INFO","msg":"handler: started","stream_id":"jif3dch5"} +{"time":"2026-03-24T13:29:16.012271716-07:00","level":"INFO","msg":"stream: started","id":"jif3dch5"} +{"time":"2026-03-24T13:29:16.012757123-07:00","level":"INFO","msg":"writer: started","stream_id":"jif3dch5"} +{"time":"2026-03-24T13:29:16.012833623-07:00","level":"INFO","msg":"sender: started","stream_id":"jif3dch5"} +{"time":"2026-03-24T13:29:25.982555385-07:00","level":"INFO","msg":"stream: closing","id":"jif3dch5"} +{"time":"2026-03-24T13:29:26.690890748-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:30.36268209-07:00","level":"INFO","msg":"handler: closed","stream_id":"jif3dch5"} +{"time":"2026-03-24T13:29:30.363082898-07:00","level":"INFO","msg":"sender: closed","stream_id":"jif3dch5"} +{"time":"2026-03-24T13:29:30.363879673-07:00","level":"INFO","msg":"stream: closed","id":"jif3dch5"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..ae9c32dfe56cbbd955f4d76f178669aa44064f35 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:29:12,244 INFO MainThread:2988293 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:12,244 INFO MainThread:2988293 [wandb_setup.py:_flush():80] Configure stats pid to 2988293 +2026-03-24 13:29:12,244 INFO MainThread:2988293 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:12,244 INFO MainThread:2988293 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:12,245 INFO MainThread:2988293 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:12,245 INFO MainThread:2988293 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/logs/debug.log +2026-03-24 13:29:12,245 INFO MainThread:2988293 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/logs/debug-internal.log +2026-03-24 13:29:12,245 INFO MainThread:2988293 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:12,246 INFO MainThread:2988293 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:12,246 INFO MainThread:2988293 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:12,731 INFO MainThread:2988293 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:12,804 INFO MainThread:2988293 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:12,817 INFO MainThread:2988293 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:12,822 INFO MainThread:2988293 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:16,695 INFO MainThread:2988293 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:16,855 INFO MainThread:2988293 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:16,855 INFO MainThread:2988293 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:16,855 INFO MainThread:2988293 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:16,855 INFO MainThread:2988293 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:16,860 INFO MainThread:2988293 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:22,999 INFO MainThread:2988293 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.15, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-10_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 13:29:25,982 INFO wandb-AsyncioManager-main:2988293 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:25,982 INFO wandb-AsyncioManager-main:2988293 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/run-jif3dch5.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/run-jif3dch5.wandb new file mode 100644 index 0000000000000000000000000000000000000000..eecf684ba1e3e83e034e2c51e6dd3eeb3510e036 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-jif3dch5/run-jif3dch5.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d0f50ae7f12b25a848648ee3879e717d2860baa --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + 8ty4kccz1kpmldgk0qlsw83bjiyrms6b: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559220690944" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:12.803506Z" + writerId: 8ty4kccz1kpmldgk0qlsw83bjiyrms6b + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-10_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 4 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.15 +warmup_steps: + value: 0 +weight_decay: + value: 0.05 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..24617023621e7f9e8cd33f3045345d53655d7d4c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/output.log @@ -0,0 +1,129 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.bias', 'classifier.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 176 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/176 [00:00 + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 365, in forward + context_layer = torch.matmul(attention_probs, value_layer) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 768.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 51.00 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Including non-PyTorch memory, this process has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 260.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988636 has 62.00 MiB memory in use. Process 2988454 has 60.00 MiB memory in use. Process 2988549 has 62.00 MiB memory in use. Process 2988575 has 20.00 MiB memory in use. Process 2988225 has 20.00 MiB memory in use. Process 2988538 has 20.00 MiB memory in use. Process 2989222 has 10.00 MiB memory in use. Process 2988129 has 12.00 MiB memory in use. Process 2989200 has 12.00 MiB memory in use. Process 2988790 has 10.00 MiB memory in use. Process 2989488 has 10.00 MiB memory in use. Process 2988709 has 20.00 MiB memory in use. Process 2988791 has 12.00 MiB memory in use. Process 2989081 has 12.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Of the allocated memory 6.39 GiB is allocated by PyTorch, and 421.82 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 365, in forward + context_layer = torch.matmul(attention_probs, value_layer) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 768.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 51.00 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Including non-PyTorch memory, this process has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 260.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988636 has 62.00 MiB memory in use. Process 2988454 has 60.00 MiB memory in use. Process 2988549 has 62.00 MiB memory in use. Process 2988575 has 20.00 MiB memory in use. Process 2988225 has 20.00 MiB memory in use. Process 2988538 has 20.00 MiB memory in use. Process 2989222 has 10.00 MiB memory in use. Process 2988129 has 12.00 MiB memory in use. Process 2989200 has 12.00 MiB memory in use. Process 2988790 has 10.00 MiB memory in use. Process 2989488 has 10.00 MiB memory in use. Process 2988709 has 20.00 MiB memory in use. Process 2988791 has 12.00 MiB memory in use. Process 2989081 has 12.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Of the allocated memory 6.39 GiB is allocated by PyTorch, and 421.82 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6f9420b54c1931623156a5f721f138b030b080ce --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:12.803506Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559220690944" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "8ty4kccz1kpmldgk0qlsw83bjiyrms6b" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..f4ec3806c8e5abda8d70342c64624c8cf8f81552 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":9},"_runtime":9} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..3b1d4cb1d89894f47d0826f2431dab62925194b3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T13:29:13.0119669-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmphn4qq5ub/port-2988061.txt","pid":2988061,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:13.014036208-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988061} +{"time":"2026-03-24T13:29:13.014053458-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988061-2993719-3992670385/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:13.187388336-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:13.258585396-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"uz5f21vb","id":"1(@)"} +{"time":"2026-03-24T13:29:16.766932206-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"uz5f21vb","id":"1(@)"} +{"time":"2026-03-24T13:29:26.554182724-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:26.554358443-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:26.554403313-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:26.554502722-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:26.557916432-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988061-2993719-3992670385/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:30.08121281-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:30.081273239-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:30.081293719-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..561795ff657e21115df1bd51b86c1aafae733b5a --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:29:13.258948224-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:16.766358359-07:00","level":"INFO","msg":"stream: created new stream","id":"uz5f21vb"} +{"time":"2026-03-24T13:29:16.766587508-07:00","level":"INFO","msg":"handler: started","stream_id":"uz5f21vb"} +{"time":"2026-03-24T13:29:16.766865346-07:00","level":"INFO","msg":"stream: started","id":"uz5f21vb"} +{"time":"2026-03-24T13:29:16.766886536-07:00","level":"INFO","msg":"writer: started","stream_id":"uz5f21vb"} +{"time":"2026-03-24T13:29:16.766941746-07:00","level":"INFO","msg":"sender: started","stream_id":"uz5f21vb"} +{"time":"2026-03-24T13:29:26.554426653-07:00","level":"INFO","msg":"stream: closing","id":"uz5f21vb"} +{"time":"2026-03-24T13:29:27.273598343-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:30.07275273-07:00","level":"INFO","msg":"handler: closed","stream_id":"uz5f21vb"} +{"time":"2026-03-24T13:29:30.072972708-07:00","level":"INFO","msg":"sender: closed","stream_id":"uz5f21vb"} +{"time":"2026-03-24T13:29:30.072990458-07:00","level":"INFO","msg":"stream: closed","id":"uz5f21vb"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..6722de12390ab2be2ea6a83eec258dcd0266641d --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:29:12,811 INFO MainThread:2988061 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:12,811 INFO MainThread:2988061 [wandb_setup.py:_flush():80] Configure stats pid to 2988061 +2026-03-24 13:29:12,811 INFO MainThread:2988061 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:12,811 INFO MainThread:2988061 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:12,811 INFO MainThread:2988061 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:12,811 INFO MainThread:2988061 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/logs/debug.log +2026-03-24 13:29:12,812 INFO MainThread:2988061 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/logs/debug-internal.log +2026-03-24 13:29:12,812 INFO MainThread:2988061 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:12,812 INFO MainThread:2988061 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:12,813 INFO MainThread:2988061 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:13,188 INFO MainThread:2988061 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:13,256 INFO MainThread:2988061 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:13,267 INFO MainThread:2988061 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:13,269 INFO MainThread:2988061 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:17,443 INFO MainThread:2988061 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:17,574 INFO MainThread:2988061 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:17,574 INFO MainThread:2988061 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:17,574 INFO MainThread:2988061 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:17,574 INFO MainThread:2988061 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:17,578 INFO MainThread:2988061 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:23,899 INFO MainThread:2988061 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.15, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-10_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 13:29:26,554 INFO wandb-AsyncioManager-main:2988061 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:26,554 INFO wandb-AsyncioManager-main:2988061 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/run-uz5f21vb.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/run-uz5f21vb.wandb new file mode 100644 index 0000000000000000000000000000000000000000..4219a33d0b52cad58f6ae36b7cd92b79004bea8f Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132912-uz5f21vb/run-uz5f21vb.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e5d63b40f8daaa9a05d14c3b4da345ab93b7c24 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/config.yaml @@ -0,0 +1,152 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + 43cxjmjk535bx1ftco7trn70rsww4w6a: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559220899840" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:14.032841Z" + writerId: 43cxjmjk535bx1ftco7trn70rsww4w6a + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..561bd22816cfa92b4564a475c545d0ce53d8fca3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/output.log @@ -0,0 +1,47 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight', 'bert.pooler.dense.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + [Previous line repeated 4 more times] + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 10.69 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Including non-PyTorch memory, this process has 574.00 MiB memory in use. Process 2988034 has 260.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988636 has 62.00 MiB memory in use. Process 2988454 has 60.00 MiB memory in use. Process 2988549 has 62.00 MiB memory in use. Process 2988575 has 20.00 MiB memory in use. Process 2988225 has 20.00 MiB memory in use. Process 2988538 has 20.00 MiB memory in use. Process 2989222 has 10.00 MiB memory in use. Process 2988129 has 12.00 MiB memory in use. Process 2989200 has 12.00 MiB memory in use. Process 2988790 has 10.00 MiB memory in use. Process 2989488 has 10.00 MiB memory in use. Process 2988709 has 20.00 MiB memory in use. Process 2988791 has 12.00 MiB memory in use. Process 2989081 has 12.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Of the allocated memory 284.40 MiB is allocated by PyTorch, and 29.60 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + [Previous line repeated 4 more times] + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 10.69 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Including non-PyTorch memory, this process has 574.00 MiB memory in use. Process 2988034 has 260.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988636 has 62.00 MiB memory in use. Process 2988454 has 60.00 MiB memory in use. Process 2988549 has 62.00 MiB memory in use. Process 2988575 has 20.00 MiB memory in use. Process 2988225 has 20.00 MiB memory in use. Process 2988538 has 20.00 MiB memory in use. Process 2989222 has 10.00 MiB memory in use. Process 2988129 has 12.00 MiB memory in use. Process 2989200 has 12.00 MiB memory in use. Process 2988790 has 10.00 MiB memory in use. Process 2989488 has 10.00 MiB memory in use. Process 2988709 has 20.00 MiB memory in use. Process 2988791 has 12.00 MiB memory in use. Process 2989081 has 12.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Of the allocated memory 284.40 MiB is allocated by PyTorch, and 29.60 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3d77c299d81894df04b9dbe586aba6b53e5fab80 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:14.032841Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559220899840" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "43cxjmjk535bx1ftco7trn70rsww4w6a" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..f60bccc799a769e529cf191d8e66da19a5816ef1 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":6},"_runtime":6} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..5e0ba834891dd94ab096d5118ce296ef4f009eee --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T13:29:14.25673295-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpv07ld_73/port-2988353.txt","pid":2988353,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:14.258544659-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988353-2994382-4178059212/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:14.258759978-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988353} +{"time":"2026-03-24T13:29:14.418280517-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:14.493184795-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"hfm6xj8d","id":"1(@)"} +{"time":"2026-03-24T13:29:19.334976392-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"hfm6xj8d","id":"1(@)"} +{"time":"2026-03-24T13:29:26.686961841-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:26.68718629-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:26.68721227-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:26.6872579-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:26.687454538-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988353-2994382-4178059212/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:27.396127691-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:27.39625297-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:27.396309699-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c6d935929ec60cbce0010635f422f7bd9e138866 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:29:14.493571103-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:19.334591864-07:00","level":"INFO","msg":"stream: created new stream","id":"hfm6xj8d"} +{"time":"2026-03-24T13:29:19.334751333-07:00","level":"INFO","msg":"handler: started","stream_id":"hfm6xj8d"} +{"time":"2026-03-24T13:29:19.334950202-07:00","level":"INFO","msg":"stream: started","id":"hfm6xj8d"} +{"time":"2026-03-24T13:29:19.334973222-07:00","level":"INFO","msg":"writer: started","stream_id":"hfm6xj8d"} +{"time":"2026-03-24T13:29:19.334989892-07:00","level":"INFO","msg":"sender: started","stream_id":"hfm6xj8d"} +{"time":"2026-03-24T13:29:26.687064201-07:00","level":"INFO","msg":"stream: closing","id":"hfm6xj8d"} +{"time":"2026-03-24T13:29:27.136368002-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:27.392485962-07:00","level":"INFO","msg":"handler: closed","stream_id":"hfm6xj8d"} +{"time":"2026-03-24T13:29:27.392719551-07:00","level":"INFO","msg":"sender: closed","stream_id":"hfm6xj8d"} +{"time":"2026-03-24T13:29:27.39273202-07:00","level":"INFO","msg":"stream: closed","id":"hfm6xj8d"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..fa0eb5d0008ddffb5c639e501224ab9a7879a0f5 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:14,042 INFO MainThread:2988353 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:14,043 INFO MainThread:2988353 [wandb_setup.py:_flush():80] Configure stats pid to 2988353 +2026-03-24 13:29:14,043 INFO MainThread:2988353 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:14,043 INFO MainThread:2988353 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:14,043 INFO MainThread:2988353 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:14,044 INFO MainThread:2988353 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/logs/debug.log +2026-03-24 13:29:14,044 INFO MainThread:2988353 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/logs/debug-internal.log +2026-03-24 13:29:14,044 INFO MainThread:2988353 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:14,044 INFO MainThread:2988353 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:14,045 INFO MainThread:2988353 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:14,419 INFO MainThread:2988353 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:14,499 INFO MainThread:2988353 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:14,507 INFO MainThread:2988353 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:14,509 INFO MainThread:2988353 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:19,996 INFO MainThread:2988353 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:20,146 INFO MainThread:2988353 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:20,146 INFO MainThread:2988353 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:20,146 INFO MainThread:2988353 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:20,146 INFO MainThread:2988353 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:20,153 INFO MainThread:2988353 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:26,687 INFO wandb-AsyncioManager-main:2988353 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:26,687 INFO wandb-AsyncioManager-main:2988353 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/run-hfm6xj8d.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/run-hfm6xj8d.wandb new file mode 100644 index 0000000000000000000000000000000000000000..90075ceda80472feba91f7e22bfbdb1353cbd129 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-hfm6xj8d/run-hfm6xj8d.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe569fe6fd7b6cecdae076ee255b6e74cafb3f2c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + 5iima4ou6sr8uyegzfl1fejkk2foxibh: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559220994048" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:14.866221Z" + writerId: 5iima4ou6sr8uyegzfl1fejkk2foxibh + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d6ade975319961daab03095b15b10dde95c02d4d --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/output.log @@ -0,0 +1,47 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'classifier.weight', 'classifier.bias', 'bert.pooler.dense.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + [Previous line repeated 3 more times] + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 22.31 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Including non-PyTorch memory, this process has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988454 has 10.00 MiB memory in use. Process 2988549 has 10.00 MiB memory in use. Process 2988814 has 14.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 14.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 16.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 20.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Of the allocated memory 158.23 MiB is allocated by PyTorch, and 15.77 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + [Previous line repeated 3 more times] + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 22.31 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Including non-PyTorch memory, this process has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988454 has 10.00 MiB memory in use. Process 2988549 has 10.00 MiB memory in use. Process 2988814 has 14.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 14.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 16.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 20.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Of the allocated memory 158.23 MiB is allocated by PyTorch, and 15.77 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a296058be06e0fb7bb904e16e00511a635e6612d --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:14.866221Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559220994048" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "5iima4ou6sr8uyegzfl1fejkk2foxibh" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..5b78f04402bed59a657ef9c59f844f15fd8567dc --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":7,"_wandb":{"runtime":7}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..abd685acca95bcf5f1692f9ac5a8ad0c430d49cb --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/logs/debug-core.log @@ -0,0 +1,28 @@ +{"time":"2026-03-24T13:29:15.066821952-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpff2f6npc/port-2988034.txt","pid":2988034,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:15.068377703-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988034-2994830-2817754851/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:15.068591322-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988034} +{"time":"2026-03-24T13:29:15.230481527-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:15.31643103-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"m4665xz5","id":"1(@)"} +{"time":"2026-03-24T13:29:15.73019138-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpv8bdn478/port-2988245.txt","pid":2988245,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:15.738448561-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988245-2995250-1405629143/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:15.738690999-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988245} +{"time":"2026-03-24T13:29:15.891072311-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:15.954890245-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"iiybukud","id":"1(@)"} +{"time":"2026-03-24T13:29:20.538540835-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"m4665xz5","id":"1(@)"} +{"time":"2026-03-24T13:29:21.131866086-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"iiybukud","id":"1(@)"} +{"time":"2026-03-24T13:29:28.620377373-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:28.62070372-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:28.62080761-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:28.6208179-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:28.620999869-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988034-2994830-2817754851/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:28.727113743-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:28.727225113-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:28.727337612-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:28.727310602-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:28.728156397-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988245-2995250-1405629143/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:29.388991541-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:29.38906443-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:29.38909627-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:32.478018119-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:32.478092039-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:32.478117279-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..3a8d8a9fd9698ce7ceb403b1ea8f682f3220e116 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:15.316768988-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:20.395058851-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-756527432"} +{"time":"2026-03-24T13:29:20.538246957-07:00","level":"INFO","msg":"stream: created new stream","id":"m4665xz5"} +{"time":"2026-03-24T13:29:20.538387826-07:00","level":"INFO","msg":"handler: started","stream_id":"m4665xz5"} +{"time":"2026-03-24T13:29:20.538521445-07:00","level":"INFO","msg":"stream: started","id":"m4665xz5"} +{"time":"2026-03-24T13:29:20.538558525-07:00","level":"INFO","msg":"sender: started","stream_id":"m4665xz5"} +{"time":"2026-03-24T13:29:20.538565255-07:00","level":"INFO","msg":"writer: started","stream_id":"m4665xz5"} +{"time":"2026-03-24T13:29:28.620548232-07:00","level":"INFO","msg":"stream: closing","id":"m4665xz5"} +{"time":"2026-03-24T13:29:29.088412713-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:29.336999257-07:00","level":"INFO","msg":"handler: closed","stream_id":"m4665xz5"} +{"time":"2026-03-24T13:29:29.337247336-07:00","level":"INFO","msg":"sender: closed","stream_id":"m4665xz5"} +{"time":"2026-03-24T13:29:29.337271526-07:00","level":"INFO","msg":"stream: closed","id":"m4665xz5"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..7723e4544365cef3ef870363c0df740abb134a92 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:14,873 INFO MainThread:2988034 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:14,873 INFO MainThread:2988034 [wandb_setup.py:_flush():80] Configure stats pid to 2988034 +2026-03-24 13:29:14,873 INFO MainThread:2988034 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:14,873 INFO MainThread:2988034 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:14,873 INFO MainThread:2988034 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:14,873 INFO MainThread:2988034 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/logs/debug.log +2026-03-24 13:29:14,874 INFO MainThread:2988034 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/logs/debug-internal.log +2026-03-24 13:29:14,874 INFO MainThread:2988034 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:14,874 INFO MainThread:2988034 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:14,874 INFO MainThread:2988034 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:15,231 INFO MainThread:2988034 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:15,314 INFO MainThread:2988034 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:15,322 INFO MainThread:2988034 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:15,324 INFO MainThread:2988034 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:21,084 INFO MainThread:2988034 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:21,222 INFO MainThread:2988034 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:21,222 INFO MainThread:2988034 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:21,235 INFO MainThread:2988034 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:21,236 INFO MainThread:2988034 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:21,251 INFO MainThread:2988034 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:28,620 INFO wandb-AsyncioManager-main:2988034 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:28,620 INFO wandb-AsyncioManager-main:2988034 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/run-m4665xz5.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/run-m4665xz5.wandb new file mode 100644 index 0000000000000000000000000000000000000000..7a3f77b9d2134b7a724604b60d18892723cea3d8 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132914-m4665xz5/run-m4665xz5.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be3ddf01308286bb50566b588f53ec02ba3340aa --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + 1ypdg50fwinzh29azprzddew9yncemlr: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559221207040" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:15.512358Z" + writerId: 1ypdg50fwinzh29azprzddew9yncemlr + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..91efb1b66915f54270bde37d70d8152258aa5280 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'classifier.bias', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 26.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988549 has 10.00 MiB memory in use. Process 2988814 has 14.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 14.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 16.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 20.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 26.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988549 has 10.00 MiB memory in use. Process 2988814 has 14.00 MiB memory in use. Process 2988511 has 10.00 MiB memory in use. Process 2988942 has 14.00 MiB memory in use. Process 2989897 has 10.00 MiB memory in use. Process 2989914 has 16.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 20.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..11e2957a8a39fea89f1c2c1a82cb0eda75926246 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:15.512358Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559221207040" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "1ypdg50fwinzh29azprzddew9yncemlr" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..533452d4a934da3482e9f08995d671c42966eba9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":6,"_wandb":{"runtime":6}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..abd685acca95bcf5f1692f9ac5a8ad0c430d49cb --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/logs/debug-core.log @@ -0,0 +1,28 @@ +{"time":"2026-03-24T13:29:15.066821952-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpff2f6npc/port-2988034.txt","pid":2988034,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:15.068377703-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988034-2994830-2817754851/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:15.068591322-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988034} +{"time":"2026-03-24T13:29:15.230481527-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:15.31643103-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"m4665xz5","id":"1(@)"} +{"time":"2026-03-24T13:29:15.73019138-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpv8bdn478/port-2988245.txt","pid":2988245,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:15.738448561-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988245-2995250-1405629143/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:15.738690999-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988245} +{"time":"2026-03-24T13:29:15.891072311-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:15.954890245-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"iiybukud","id":"1(@)"} +{"time":"2026-03-24T13:29:20.538540835-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"m4665xz5","id":"1(@)"} +{"time":"2026-03-24T13:29:21.131866086-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"iiybukud","id":"1(@)"} +{"time":"2026-03-24T13:29:28.620377373-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:28.62070372-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:28.62080761-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:28.6208179-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:28.620999869-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988034-2994830-2817754851/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:28.727113743-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:28.727225113-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:28.727337612-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:28.727310602-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:28.728156397-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988245-2995250-1405629143/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:29.388991541-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:29.38906443-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:29.38909627-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:32.478018119-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:32.478092039-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:32.478117279-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..034f4d57925e83e70ff59833bbdc2d7fbaa490fb --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:15.955334492-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:21.018516635-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-3552993596"} +{"time":"2026-03-24T13:29:21.131490468-07:00","level":"INFO","msg":"stream: created new stream","id":"iiybukud"} +{"time":"2026-03-24T13:29:21.131638548-07:00","level":"INFO","msg":"handler: started","stream_id":"iiybukud"} +{"time":"2026-03-24T13:29:21.131853876-07:00","level":"INFO","msg":"stream: started","id":"iiybukud"} +{"time":"2026-03-24T13:29:21.131867706-07:00","level":"INFO","msg":"writer: started","stream_id":"iiybukud"} +{"time":"2026-03-24T13:29:21.131872636-07:00","level":"INFO","msg":"sender: started","stream_id":"iiybukud"} +{"time":"2026-03-24T13:29:28.727252732-07:00","level":"INFO","msg":"stream: closing","id":"iiybukud"} +{"time":"2026-03-24T13:29:29.142118897-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:32.474060483-07:00","level":"INFO","msg":"handler: closed","stream_id":"iiybukud"} +{"time":"2026-03-24T13:29:32.474266932-07:00","level":"INFO","msg":"sender: closed","stream_id":"iiybukud"} +{"time":"2026-03-24T13:29:32.474281421-07:00","level":"INFO","msg":"stream: closed","id":"iiybukud"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..aac47db4264144317fd6cb3574d3c7da43e016ac --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:15,523 INFO MainThread:2988245 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:15,523 INFO MainThread:2988245 [wandb_setup.py:_flush():80] Configure stats pid to 2988245 +2026-03-24 13:29:15,523 INFO MainThread:2988245 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:15,523 INFO MainThread:2988245 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:15,524 INFO MainThread:2988245 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:15,524 INFO MainThread:2988245 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/logs/debug.log +2026-03-24 13:29:15,524 INFO MainThread:2988245 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/logs/debug-internal.log +2026-03-24 13:29:15,525 INFO MainThread:2988245 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:15,525 INFO MainThread:2988245 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:15,525 INFO MainThread:2988245 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:15,891 INFO MainThread:2988245 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:15,952 INFO MainThread:2988245 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:15,960 INFO MainThread:2988245 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:15,961 INFO MainThread:2988245 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:21,830 INFO MainThread:2988245 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:21,981 INFO MainThread:2988245 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:21,981 INFO MainThread:2988245 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:21,981 INFO MainThread:2988245 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:21,981 INFO MainThread:2988245 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:21,986 INFO MainThread:2988245 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:28,727 INFO wandb-AsyncioManager-main:2988245 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:28,727 INFO wandb-AsyncioManager-main:2988245 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/run-iiybukud.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/run-iiybukud.wandb new file mode 100644 index 0000000000000000000000000000000000000000..c1b70c5b106e9ae05421bd3b0700d95e3b0ec92d Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132915-iiybukud/run-iiybukud.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..899f1f2f9606ff6a802ca988728975b51fe8dafc --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + r1aqz7soib7wtt5oka5d7ppe6adxqc5s: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559221428224" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:16.675864Z" + writerId: r1aqz7soib7wtt5oka5d7ppe6adxqc5s + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9d552495ce89088729a117d69c79c28480d7992d --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.88 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 12.00 MiB memory in use. Process 2988511 has 8.00 MiB memory in use. Process 2988942 has 12.00 MiB memory in use. Process 2989897 has 8.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 18.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.88 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 12.00 MiB memory in use. Process 2988511 has 8.00 MiB memory in use. Process 2988942 has 12.00 MiB memory in use. Process 2989897 has 8.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 18.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b0a1d44c969155d924e339329c9044d2a48b5ea1 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:16.675864Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559221428224" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "r1aqz7soib7wtt5oka5d7ppe6adxqc5s" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..552f28b1fa81611c547966c0c3246624ef909040 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":7},"_runtime":7} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..2b6a85b0e853b1682c5df77cf4db355cd76772ba --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/logs/debug-core.log @@ -0,0 +1,42 @@ +{"time":"2026-03-24T13:29:16.467389853-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpix_wtij8/port-2988566.txt","pid":2988566,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:16.469117702-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988566} +{"time":"2026-03-24T13:29:16.469274242-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988566-2995635-692076886/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:16.601892319-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpvefi3x8w/port-2988374.txt","pid":2988374,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:16.604819772-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988374-2995683-4098894380/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:16.604916821-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988374} +{"time":"2026-03-24T13:29:16.646426377-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:16.727079501-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"p411id0n","id":"1(@)"} +{"time":"2026-03-24T13:29:16.780531446-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:16.83928112-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"qpe0mrgb","id":"1(@)"} +{"time":"2026-03-24T13:29:16.848079187-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpy43es8c3/port-2988531.txt","pid":2988531,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:16.849698988-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988531} +{"time":"2026-03-24T13:29:16.849672968-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988531-2995862-1613203522/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:17.030147894-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:17.08164246-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"ojs217tb","id":"1(@)"} +{"time":"2026-03-24T13:29:21.886641436-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"p411id0n","id":"1(@)"} +{"time":"2026-03-24T13:29:22.056817892-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"qpe0mrgb","id":"1(@)"} +{"time":"2026-03-24T13:29:22.253207564-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ojs217tb","id":"1(@)"} +{"time":"2026-03-24T13:29:30.404370555-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:30.404680562-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:30.404681693-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:30.404989631-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988531-2995862-1613203522/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:30.40508845-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597205328-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597398797-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597456076-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597466376-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:30.597581876-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988374-2995683-4098894380/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:30.623345404-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:30.623814461-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:30.62389704-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:30.62391269-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:30.624242268-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988566-2995635-692076886/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:38.306439891-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30652834-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30655151-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.577463523-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.577660952-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.577752212-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:42.462672972-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:42.462767642-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:42.462789241-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..ce4eeece4368a6bcfd25d1d2e81b968a7db87898 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:17.081886058-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:22.132088898-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-3431216114"} +{"time":"2026-03-24T13:29:22.252774707-07:00","level":"INFO","msg":"stream: created new stream","id":"ojs217tb"} +{"time":"2026-03-24T13:29:22.252957156-07:00","level":"INFO","msg":"handler: started","stream_id":"ojs217tb"} +{"time":"2026-03-24T13:29:22.253193924-07:00","level":"INFO","msg":"stream: started","id":"ojs217tb"} +{"time":"2026-03-24T13:29:22.253216104-07:00","level":"INFO","msg":"writer: started","stream_id":"ojs217tb"} +{"time":"2026-03-24T13:29:22.253223304-07:00","level":"INFO","msg":"sender: started","stream_id":"ojs217tb"} +{"time":"2026-03-24T13:29:30.404727012-07:00","level":"INFO","msg":"stream: closing","id":"ojs217tb"} +{"time":"2026-03-24T13:29:30.88678123-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:38.570646564-07:00","level":"INFO","msg":"handler: closed","stream_id":"ojs217tb"} +{"time":"2026-03-24T13:29:38.571916896-07:00","level":"INFO","msg":"sender: closed","stream_id":"ojs217tb"} +{"time":"2026-03-24T13:29:38.571983036-07:00","level":"INFO","msg":"stream: closed","id":"ojs217tb"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..052093e0dc1283265864b2312facafaa42f9e09c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:16,683 INFO MainThread:2988531 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:16,683 INFO MainThread:2988531 [wandb_setup.py:_flush():80] Configure stats pid to 2988531 +2026-03-24 13:29:16,683 INFO MainThread:2988531 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:16,683 INFO MainThread:2988531 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:16,683 INFO MainThread:2988531 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:16,683 INFO MainThread:2988531 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/logs/debug.log +2026-03-24 13:29:16,683 INFO MainThread:2988531 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/logs/debug-internal.log +2026-03-24 13:29:16,683 INFO MainThread:2988531 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:16,683 INFO MainThread:2988531 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:16,684 INFO MainThread:2988531 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:17,030 INFO MainThread:2988531 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:17,079 INFO MainThread:2988531 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:17,085 INFO MainThread:2988531 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:17,087 INFO MainThread:2988531 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:22,848 INFO MainThread:2988531 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:23,008 INFO MainThread:2988531 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:23,008 INFO MainThread:2988531 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:23,009 INFO MainThread:2988531 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:23,009 INFO MainThread:2988531 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:23,013 INFO MainThread:2988531 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:30,404 INFO wandb-AsyncioManager-main:2988531 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:30,404 INFO wandb-AsyncioManager-main:2988531 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/run-ojs217tb.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/run-ojs217tb.wandb new file mode 100644 index 0000000000000000000000000000000000000000..bed21a83b5e4d09a2915cc1db2ec5875eaab3d2b Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-ojs217tb/run-ojs217tb.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..501e2655bba7a3c387383583da83ed5a5bee107f --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + qfst0fl8eyczy374wt8ashnpo6kzhsxx: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559221239808" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:16.298238Z" + writerId: qfst0fl8eyczy374wt8ashnpo6kzhsxx + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..dcf1d2ccdaa5df04c65e75119393079ee7fc41b4 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'classifier.weight', 'classifier.bias', 'bert.pooler.dense.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.62 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 12.00 MiB memory in use. Process 2988511 has 8.00 MiB memory in use. Process 2988942 has 12.00 MiB memory in use. Process 2989897 has 8.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 18.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.62 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 12.00 MiB memory in use. Process 2988511 has 8.00 MiB memory in use. Process 2988942 has 12.00 MiB memory in use. Process 2989897 has 8.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 18.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9ac6ba292fa6a25d010eb60783904610c2fc28a0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:16.298238Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559221239808" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "qfst0fl8eyczy374wt8ashnpo6kzhsxx" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..2320ce95111d4a7914cdfba4a4e05238d5d8f179 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":8},"_runtime":8} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..2b6a85b0e853b1682c5df77cf4db355cd76772ba --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/logs/debug-core.log @@ -0,0 +1,42 @@ +{"time":"2026-03-24T13:29:16.467389853-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpix_wtij8/port-2988566.txt","pid":2988566,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:16.469117702-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988566} +{"time":"2026-03-24T13:29:16.469274242-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988566-2995635-692076886/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:16.601892319-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpvefi3x8w/port-2988374.txt","pid":2988374,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:16.604819772-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988374-2995683-4098894380/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:16.604916821-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988374} +{"time":"2026-03-24T13:29:16.646426377-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:16.727079501-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"p411id0n","id":"1(@)"} +{"time":"2026-03-24T13:29:16.780531446-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:16.83928112-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"qpe0mrgb","id":"1(@)"} +{"time":"2026-03-24T13:29:16.848079187-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpy43es8c3/port-2988531.txt","pid":2988531,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:16.849698988-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988531} +{"time":"2026-03-24T13:29:16.849672968-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988531-2995862-1613203522/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:17.030147894-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:17.08164246-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"ojs217tb","id":"1(@)"} +{"time":"2026-03-24T13:29:21.886641436-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"p411id0n","id":"1(@)"} +{"time":"2026-03-24T13:29:22.056817892-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"qpe0mrgb","id":"1(@)"} +{"time":"2026-03-24T13:29:22.253207564-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ojs217tb","id":"1(@)"} +{"time":"2026-03-24T13:29:30.404370555-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:30.404680562-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:30.404681693-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:30.404989631-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988531-2995862-1613203522/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:30.40508845-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597205328-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597398797-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597456076-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597466376-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:30.597581876-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988374-2995683-4098894380/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:30.623345404-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:30.623814461-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:30.62389704-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:30.62391269-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:30.624242268-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988566-2995635-692076886/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:38.306439891-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30652834-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30655151-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.577463523-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.577660952-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.577752212-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:42.462672972-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:42.462767642-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:42.462789241-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..0b516bda8b3c1d2112d96030908045eb5ee68678 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:16.72733834-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:21.782726238-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-2152822616"} +{"time":"2026-03-24T13:29:21.886213478-07:00","level":"INFO","msg":"stream: created new stream","id":"p411id0n"} +{"time":"2026-03-24T13:29:21.886439047-07:00","level":"INFO","msg":"handler: started","stream_id":"p411id0n"} +{"time":"2026-03-24T13:29:21.886619116-07:00","level":"INFO","msg":"stream: started","id":"p411id0n"} +{"time":"2026-03-24T13:29:21.886670136-07:00","level":"INFO","msg":"writer: started","stream_id":"p411id0n"} +{"time":"2026-03-24T13:29:21.886669765-07:00","level":"INFO","msg":"sender: started","stream_id":"p411id0n"} +{"time":"2026-03-24T13:29:30.623534762-07:00","level":"INFO","msg":"stream: closing","id":"p411id0n"} +{"time":"2026-03-24T13:29:31.102454159-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:38.180815651-07:00","level":"INFO","msg":"handler: closed","stream_id":"p411id0n"} +{"time":"2026-03-24T13:29:38.18108178-07:00","level":"INFO","msg":"sender: closed","stream_id":"p411id0n"} +{"time":"2026-03-24T13:29:38.18109567-07:00","level":"INFO","msg":"stream: closed","id":"p411id0n"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..dbb2dfd410981535ca6790d7d0073e3b000df302 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:16,304 INFO MainThread:2988566 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:16,305 INFO MainThread:2988566 [wandb_setup.py:_flush():80] Configure stats pid to 2988566 +2026-03-24 13:29:16,305 INFO MainThread:2988566 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:16,305 INFO MainThread:2988566 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:16,305 INFO MainThread:2988566 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:16,305 INFO MainThread:2988566 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/logs/debug.log +2026-03-24 13:29:16,305 INFO MainThread:2988566 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/logs/debug-internal.log +2026-03-24 13:29:16,305 INFO MainThread:2988566 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:16,305 INFO MainThread:2988566 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:16,305 INFO MainThread:2988566 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:16,647 INFO MainThread:2988566 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:16,724 INFO MainThread:2988566 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:16,733 INFO MainThread:2988566 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:16,735 INFO MainThread:2988566 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:22,377 INFO MainThread:2988566 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:22,521 INFO MainThread:2988566 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:22,521 INFO MainThread:2988566 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:22,521 INFO MainThread:2988566 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:22,521 INFO MainThread:2988566 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:22,527 INFO MainThread:2988566 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:30,622 INFO wandb-AsyncioManager-main:2988566 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:30,622 INFO wandb-AsyncioManager-main:2988566 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/run-p411id0n.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/run-p411id0n.wandb new file mode 100644 index 0000000000000000000000000000000000000000..bfc90e164df490bedb6eaef9c67e333793eb7ff9 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-p411id0n/run-p411id0n.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1503f7015264fceef627996b09aa9dbbea6beb2 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + vd5bpi3fhl8l55m986ydepipay9xuezv: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559221387264" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:16.448377Z" + writerId: vd5bpi3fhl8l55m986ydepipay9xuezv + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..4700a5b1980f3d7ba681fc4c410fa2a6b8409938 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.62 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 12.00 MiB memory in use. Process 2988511 has 8.00 MiB memory in use. Process 2988942 has 12.00 MiB memory in use. Process 2989897 has 8.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 18.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.62 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 12.00 MiB memory in use. Process 2988511 has 8.00 MiB memory in use. Process 2988942 has 12.00 MiB memory in use. Process 2989897 has 8.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 18.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..7b9058bc18d35f9de4830b39043bd99fb8a45df7 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:16.448377Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559221387264" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "vd5bpi3fhl8l55m986ydepipay9xuezv" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..552f28b1fa81611c547966c0c3246624ef909040 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":7},"_runtime":7} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..2b6a85b0e853b1682c5df77cf4db355cd76772ba --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/logs/debug-core.log @@ -0,0 +1,42 @@ +{"time":"2026-03-24T13:29:16.467389853-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpix_wtij8/port-2988566.txt","pid":2988566,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:16.469117702-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988566} +{"time":"2026-03-24T13:29:16.469274242-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988566-2995635-692076886/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:16.601892319-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpvefi3x8w/port-2988374.txt","pid":2988374,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:16.604819772-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988374-2995683-4098894380/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:16.604916821-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988374} +{"time":"2026-03-24T13:29:16.646426377-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:16.727079501-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"p411id0n","id":"1(@)"} +{"time":"2026-03-24T13:29:16.780531446-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:16.83928112-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"qpe0mrgb","id":"1(@)"} +{"time":"2026-03-24T13:29:16.848079187-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpy43es8c3/port-2988531.txt","pid":2988531,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:16.849698988-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988531} +{"time":"2026-03-24T13:29:16.849672968-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988531-2995862-1613203522/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:17.030147894-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:17.08164246-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"ojs217tb","id":"1(@)"} +{"time":"2026-03-24T13:29:21.886641436-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"p411id0n","id":"1(@)"} +{"time":"2026-03-24T13:29:22.056817892-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"qpe0mrgb","id":"1(@)"} +{"time":"2026-03-24T13:29:22.253207564-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ojs217tb","id":"1(@)"} +{"time":"2026-03-24T13:29:30.404370555-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:30.404680562-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:30.404681693-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:30.404989631-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988531-2995862-1613203522/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:30.40508845-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597205328-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597398797-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597456076-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:30.597466376-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:30.597581876-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988374-2995683-4098894380/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:30.623345404-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:30.623814461-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:30.62389704-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:30.62391269-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:30.624242268-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988566-2995635-692076886/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:38.306439891-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30652834-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30655151-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.577463523-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.577660952-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.577752212-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:42.462672972-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:42.462767642-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:42.462789241-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..45b4dc26330bee359fd21484b3c9290267983a06 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:16.839659327-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:21.934767472-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-3318688956"} +{"time":"2026-03-24T13:29:22.056318705-07:00","level":"INFO","msg":"stream: created new stream","id":"qpe0mrgb"} +{"time":"2026-03-24T13:29:22.056504104-07:00","level":"INFO","msg":"handler: started","stream_id":"qpe0mrgb"} +{"time":"2026-03-24T13:29:22.056781953-07:00","level":"INFO","msg":"stream: started","id":"qpe0mrgb"} +{"time":"2026-03-24T13:29:22.056801092-07:00","level":"INFO","msg":"writer: started","stream_id":"qpe0mrgb"} +{"time":"2026-03-24T13:29:22.056798862-07:00","level":"INFO","msg":"sender: started","stream_id":"qpe0mrgb"} +{"time":"2026-03-24T13:29:30.597285327-07:00","level":"INFO","msg":"stream: closing","id":"qpe0mrgb"} +{"time":"2026-03-24T13:29:31.026502967-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:42.458639626-07:00","level":"INFO","msg":"handler: closed","stream_id":"qpe0mrgb"} +{"time":"2026-03-24T13:29:42.459127534-07:00","level":"INFO","msg":"sender: closed","stream_id":"qpe0mrgb"} +{"time":"2026-03-24T13:29:42.459143553-07:00","level":"INFO","msg":"stream: closed","id":"qpe0mrgb"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e4d6dc157ceb58861d17e340a822e9f5c5ac8f86 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:16,453 INFO MainThread:2988374 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:16,453 INFO MainThread:2988374 [wandb_setup.py:_flush():80] Configure stats pid to 2988374 +2026-03-24 13:29:16,453 INFO MainThread:2988374 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:16,453 INFO MainThread:2988374 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:16,454 INFO MainThread:2988374 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:16,454 INFO MainThread:2988374 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/logs/debug.log +2026-03-24 13:29:16,454 INFO MainThread:2988374 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/logs/debug-internal.log +2026-03-24 13:29:16,454 INFO MainThread:2988374 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:16,454 INFO MainThread:2988374 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:16,454 INFO MainThread:2988374 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:16,781 INFO MainThread:2988374 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:16,832 INFO MainThread:2988374 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:16,838 INFO MainThread:2988374 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:16,840 INFO MainThread:2988374 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:22,760 INFO MainThread:2988374 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:22,898 INFO MainThread:2988374 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:22,899 INFO MainThread:2988374 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:22,899 INFO MainThread:2988374 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:22,899 INFO MainThread:2988374 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:22,905 INFO MainThread:2988374 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:30,597 INFO wandb-AsyncioManager-main:2988374 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:30,597 INFO wandb-AsyncioManager-main:2988374 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/run-qpe0mrgb.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/run-qpe0mrgb.wandb new file mode 100644 index 0000000000000000000000000000000000000000..66ec6c3fcbaa154e41f97440c1b2742aa816942c Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132916-qpe0mrgb/run-qpe0mrgb.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa9db261180f0fa2170a032c9e04614161b9687a --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + i7gnfyji10txnwqz6c2z5nxdqhjbqsps: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559221878784" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:17.731413Z" + writerId: i7gnfyji10txnwqz6c2z5nxdqhjbqsps + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..f887c932b09df6fb3cd360d17bb92f3a631f7071 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.bias', 'classifier.bias', 'bert.pooler.dense.weight', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 27.25 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 6.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 16.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 27.25 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 6.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 16.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..2be4e024b325c014269deef437b4d54bd56c125b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:17.731413Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559221878784" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "i7gnfyji10txnwqz6c2z5nxdqhjbqsps" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..20b4385da45dc73f28a83368c93d1645d8267473 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":8,"_wandb":{"runtime":8}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..59bbe94b6e715ec1093aa654477e4feed2fa4530 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T13:29:17.854355073-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpz_mwzrmw/port-2988064.txt","pid":2988064,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:17.855370817-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988064} +{"time":"2026-03-24T13:29:17.855344327-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988064-2996648-3314653201/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:18.036929677-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:18.092681008-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"vaw9fw15","id":"1(@)"} +{"time":"2026-03-24T13:29:23.320919828-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"vaw9fw15","id":"1(@)"} +{"time":"2026-03-24T13:29:32.609434465-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:32.609708653-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:32.609770163-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:32.609782933-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:32.609956532-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988064-2996648-3314653201/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.101436474-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.101529623-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.101581233-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..cdc6553ee6cfad8c63b2fdf343992162ca80dd0a --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:18.093159255-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:23.176377541-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-795424768"} +{"time":"2026-03-24T13:29:23.320465521-07:00","level":"INFO","msg":"stream: created new stream","id":"vaw9fw15"} +{"time":"2026-03-24T13:29:23.32066096-07:00","level":"INFO","msg":"handler: started","stream_id":"vaw9fw15"} +{"time":"2026-03-24T13:29:23.320895859-07:00","level":"INFO","msg":"stream: started","id":"vaw9fw15"} +{"time":"2026-03-24T13:29:23.320946948-07:00","level":"INFO","msg":"sender: started","stream_id":"vaw9fw15"} +{"time":"2026-03-24T13:29:23.320949268-07:00","level":"INFO","msg":"writer: started","stream_id":"vaw9fw15"} +{"time":"2026-03-24T13:29:32.609545824-07:00","level":"INFO","msg":"stream: closing","id":"vaw9fw15"} +{"time":"2026-03-24T13:29:33.039031332-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:37.093291192-07:00","level":"INFO","msg":"handler: closed","stream_id":"vaw9fw15"} +{"time":"2026-03-24T13:29:37.093610281-07:00","level":"INFO","msg":"sender: closed","stream_id":"vaw9fw15"} +{"time":"2026-03-24T13:29:37.09365893-07:00","level":"INFO","msg":"stream: closed","id":"vaw9fw15"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..aebbe70841943834f47febb33d704ccf9d53b6b1 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:17,736 INFO MainThread:2988064 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:17,736 INFO MainThread:2988064 [wandb_setup.py:_flush():80] Configure stats pid to 2988064 +2026-03-24 13:29:17,736 INFO MainThread:2988064 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:17,736 INFO MainThread:2988064 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:17,736 INFO MainThread:2988064 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:17,736 INFO MainThread:2988064 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/logs/debug.log +2026-03-24 13:29:17,736 INFO MainThread:2988064 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/logs/debug-internal.log +2026-03-24 13:29:17,736 INFO MainThread:2988064 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:17,736 INFO MainThread:2988064 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:17,737 INFO MainThread:2988064 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:18,037 INFO MainThread:2988064 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:18,087 INFO MainThread:2988064 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:18,093 INFO MainThread:2988064 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:18,094 INFO MainThread:2988064 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:23,958 INFO MainThread:2988064 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:24,115 INFO MainThread:2988064 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:24,116 INFO MainThread:2988064 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:24,117 INFO MainThread:2988064 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:24,120 INFO MainThread:2988064 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:24,129 INFO MainThread:2988064 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:32,608 INFO wandb-AsyncioManager-main:2988064 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:32,608 INFO wandb-AsyncioManager-main:2988064 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/run-vaw9fw15.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/run-vaw9fw15.wandb new file mode 100644 index 0000000000000000000000000000000000000000..988d1c801cb6e952a7921989b9eb3bd02a0ea84d Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132917-vaw9fw15/run-vaw9fw15.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0836f9b2acd1390a7780aa6d261355941984aa2e --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + q0ibufu3q7039go7y2jpncci4f0xftdg: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559221993472" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:18.562050Z" + writerId: q0ibufu3q7039go7y2jpncci4f0xftdg + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..ad00a1b425b09e1d0fb91e26c244fd906ed51ba4 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'classifier.bias', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 29.12 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 16.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 29.12 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 16.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5e29549db16a7fa7e489d1705a7f0d7c8e317684 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:18.562050Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559221993472" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "q0ibufu3q7039go7y2jpncci4f0xftdg" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..f4ec3806c8e5abda8d70342c64624c8cf8f81552 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":9},"_runtime":9} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..d664c10dcfb98d17deb4b3f5dd9ba60c582b4a92 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T13:29:18.734149815-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpe02wj92j/port-2988668.txt","pid":2988668,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:18.737765004-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988668} +{"time":"2026-03-24T13:29:18.737720584-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988668-2996881-1202996963/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:18.898744454-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:18.945864026-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"kt9ngt4v","id":"1(@)"} +{"time":"2026-03-24T13:29:24.12288797-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"kt9ngt4v","id":"1(@)"} +{"time":"2026-03-24T13:29:34.176775025-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:34.176930264-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:34.176990283-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:34.177141683-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:34.177418481-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988668-2996881-1202996963/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:48.401165272-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:48.401274251-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:48.401326111-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..0fde700f55cc4a0d0e706237b0930b11e4b173ed --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:18.946195844-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:24.0006937-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-2731731584"} +{"time":"2026-03-24T13:29:24.122538332-07:00","level":"INFO","msg":"stream: created new stream","id":"kt9ngt4v"} +{"time":"2026-03-24T13:29:24.12287708-07:00","level":"INFO","msg":"stream: started","id":"kt9ngt4v"} +{"time":"2026-03-24T13:29:24.123053049-07:00","level":"INFO","msg":"writer: started","stream_id":"kt9ngt4v"} +{"time":"2026-03-24T13:29:24.123198728-07:00","level":"INFO","msg":"handler: started","stream_id":"kt9ngt4v"} +{"time":"2026-03-24T13:29:24.123222578-07:00","level":"INFO","msg":"sender: started","stream_id":"kt9ngt4v"} +{"time":"2026-03-24T13:29:34.176978914-07:00","level":"INFO","msg":"stream: closing","id":"kt9ngt4v"} +{"time":"2026-03-24T13:29:34.592213086-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:48.390095407-07:00","level":"INFO","msg":"handler: closed","stream_id":"kt9ngt4v"} +{"time":"2026-03-24T13:29:48.390383925-07:00","level":"INFO","msg":"sender: closed","stream_id":"kt9ngt4v"} +{"time":"2026-03-24T13:29:48.390410075-07:00","level":"INFO","msg":"stream: closed","id":"kt9ngt4v"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..bc86a6b300452d4083de5199f6e576e6d89436c0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:18,566 INFO MainThread:2988668 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:18,567 INFO MainThread:2988668 [wandb_setup.py:_flush():80] Configure stats pid to 2988668 +2026-03-24 13:29:18,567 INFO MainThread:2988668 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:18,567 INFO MainThread:2988668 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:18,567 INFO MainThread:2988668 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:18,567 INFO MainThread:2988668 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/logs/debug.log +2026-03-24 13:29:18,567 INFO MainThread:2988668 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/logs/debug-internal.log +2026-03-24 13:29:18,567 INFO MainThread:2988668 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:18,567 INFO MainThread:2988668 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:18,567 INFO MainThread:2988668 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:18,899 INFO MainThread:2988668 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:18,943 INFO MainThread:2988668 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:18,948 INFO MainThread:2988668 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:18,950 INFO MainThread:2988668 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:24,978 INFO MainThread:2988668 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:25,123 INFO MainThread:2988668 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:25,123 INFO MainThread:2988668 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:25,123 INFO MainThread:2988668 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:25,123 INFO MainThread:2988668 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:25,128 INFO MainThread:2988668 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:34,176 INFO wandb-AsyncioManager-main:2988668 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:34,177 INFO wandb-AsyncioManager-main:2988668 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/run-kt9ngt4v.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/run-kt9ngt4v.wandb new file mode 100644 index 0000000000000000000000000000000000000000..5fc33a32a25ea4cbf5d0b1dec6091765c2a3cc3b Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132918-kt9ngt4v/run-kt9ngt4v.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e182545afc49ec8d2d4cb43ac2d0598fb5c5c2e --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + gfhdaiogyv661d0m6c02g97a4gh4csuu: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559221993472" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:19.049919Z" + writerId: gfhdaiogyv661d0m6c02g97a4gh4csuu + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..6e6881ad7996f2820e93b3a7ccff2bd5410f15d6 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 27.12 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 16.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 27.12 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 10.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 10.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 12.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 16.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b6a4f466881958d15c0b9c62d898ceb4307accb8 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:19.049919Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559221993472" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "gfhdaiogyv661d0m6c02g97a4gh4csuu" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..49f1c97bdc4f4b441dcae913d01d8e867efca440 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":9,"_wandb":{"runtime":9}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..a7a2d763b70cfa885ae73c0a7ddd0be9aa630b62 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/logs/debug-core.log @@ -0,0 +1,28 @@ +{"time":"2026-03-24T13:29:19.254348558-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcvvxi5ex/port-2988116.txt","pid":2988116,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:19.257559219-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988116} +{"time":"2026-03-24T13:29:19.257518149-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988116-2997057-152810899/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:19.414838851-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:19.488163869-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"0pp4rdwo","id":"1(@)"} +{"time":"2026-03-24T13:29:19.887844472-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpe35592t4/port-2988554.txt","pid":2988554,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:19.891071573-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988554} +{"time":"2026-03-24T13:29:19.890998613-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988554-2997257-2276799086/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.056938925-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.129332018-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"foihfh1b","id":"1(@)"} +{"time":"2026-03-24T13:29:24.703840434-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"0pp4rdwo","id":"1(@)"} +{"time":"2026-03-24T13:29:25.406331122-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"foihfh1b","id":"1(@)"} +{"time":"2026-03-24T13:29:34.402875462-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:34.403062891-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:34.403069251-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:34.403293869-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988116-2997057-152810899/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:34.403326389-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:35.13780543-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:35.137881829-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:35.137912439-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:40.902553438-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:40.902795807-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:40.902830916-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:40.903034885-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:40.903514643-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988554-2997257-2276799086/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:44.973887931-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:44.97407156-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:44.97412328-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..243d1520500ea69e983b738aa0b5edd41b348915 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:19.488464287-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:24.574188939-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-2816003323"} +{"time":"2026-03-24T13:29:24.703094619-07:00","level":"INFO","msg":"stream: created new stream","id":"0pp4rdwo"} +{"time":"2026-03-24T13:29:24.703368487-07:00","level":"INFO","msg":"handler: started","stream_id":"0pp4rdwo"} +{"time":"2026-03-24T13:29:24.703809654-07:00","level":"INFO","msg":"stream: started","id":"0pp4rdwo"} +{"time":"2026-03-24T13:29:24.703827734-07:00","level":"INFO","msg":"writer: started","stream_id":"0pp4rdwo"} +{"time":"2026-03-24T13:29:24.703862214-07:00","level":"INFO","msg":"sender: started","stream_id":"0pp4rdwo"} +{"time":"2026-03-24T13:29:34.403059881-07:00","level":"INFO","msg":"stream: closing","id":"0pp4rdwo"} +{"time":"2026-03-24T13:29:34.822831696-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:35.132030793-07:00","level":"INFO","msg":"handler: closed","stream_id":"0pp4rdwo"} +{"time":"2026-03-24T13:29:35.132287102-07:00","level":"INFO","msg":"sender: closed","stream_id":"0pp4rdwo"} +{"time":"2026-03-24T13:29:35.132310402-07:00","level":"INFO","msg":"stream: closed","id":"0pp4rdwo"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..3387e4f4ece83688bf91b8a7cc4a6b5351179849 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:19,060 INFO MainThread:2988116 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:19,060 INFO MainThread:2988116 [wandb_setup.py:_flush():80] Configure stats pid to 2988116 +2026-03-24 13:29:19,060 INFO MainThread:2988116 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:19,061 INFO MainThread:2988116 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:19,061 INFO MainThread:2988116 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:19,061 INFO MainThread:2988116 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/logs/debug.log +2026-03-24 13:29:19,061 INFO MainThread:2988116 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/logs/debug-internal.log +2026-03-24 13:29:19,061 INFO MainThread:2988116 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:19,061 INFO MainThread:2988116 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:19,061 INFO MainThread:2988116 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:19,415 INFO MainThread:2988116 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:19,480 INFO MainThread:2988116 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:19,491 INFO MainThread:2988116 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:19,492 INFO MainThread:2988116 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:25,126 INFO MainThread:2988116 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:25,272 INFO MainThread:2988116 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:25,272 INFO MainThread:2988116 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:25,272 INFO MainThread:2988116 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:25,272 INFO MainThread:2988116 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:25,278 INFO MainThread:2988116 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:34,402 INFO wandb-AsyncioManager-main:2988116 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:34,402 INFO wandb-AsyncioManager-main:2988116 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/run-0pp4rdwo.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/run-0pp4rdwo.wandb new file mode 100644 index 0000000000000000000000000000000000000000..6b4f3e13d3a280992668d7e80837801e4efad274 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-0pp4rdwo/run-0pp4rdwo.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e0cdee7cdee2c35da9ab34e8db141eed6f567b60 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + ykepbu1ceialp3a61ufms96hbxiwnass: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559222091776" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:19.850645Z" + writerId: ykepbu1ceialp3a61ufms96hbxiwnass + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..3e0b6f3021b23ccf73278df3622c961fdf5bd41d --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 29.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 8.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 29.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 8.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..1958c2c934dd69b187dbc828b932b8131677c5d9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:19.850645Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559222091776" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "ykepbu1ceialp3a61ufms96hbxiwnass" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..49f1c97bdc4f4b441dcae913d01d8e867efca440 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":9,"_wandb":{"runtime":9}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..c6a849eb6203bbca1b49abeeb05546472054fccd --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/logs/debug-core.log @@ -0,0 +1,70 @@ +{"time":"2026-03-24T13:29:20.03742456-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoc28hlmr/port-2988093.txt","pid":2988093,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.038738382-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9uquarlq/port-2988874.txt","pid":2988874,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.039003841-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988093} +{"time":"2026-03-24T13:29:20.039025901-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988093-2997274-3761623073/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.04069919-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988874} +{"time":"2026-03-24T13:29:20.040718421-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988874-2997275-4219790605/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.124433377-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpze5mtr29/port-2988637.txt","pid":2988637,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.128053085-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988637} +{"time":"2026-03-24T13:29:20.128019566-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988637-2997300-777241408/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.205079181-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.213519842-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.266985147-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"ku0mi14u","id":"1(@)"} +{"time":"2026-03-24T13:29:20.295658037-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.3172018-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"eliuv43y","id":"1(@)"} +{"time":"2026-03-24T13:29:20.379313944-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"yjc1lhkl","id":"1(@)"} +{"time":"2026-03-24T13:29:20.444005412-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmplbn8udml/port-2988906.txt","pid":2988906,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.445893061-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988906} +{"time":"2026-03-24T13:29:20.445883732-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988906-2997875-3091809193/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.630082905-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.648636296-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqmfui_f4/port-2988923.txt","pid":2988923,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.650975252-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988923} +{"time":"2026-03-24T13:29:20.650965532-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988923-2998027-2125257262/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.677245977-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"5uhcp1bu","id":"1(@)"} +{"time":"2026-03-24T13:29:20.83285716-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.881265634-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"zrhz20dw","id":"1(@)"} +{"time":"2026-03-24T13:29:25.468460486-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ku0mi14u","id":"1(@)"} +{"time":"2026-03-24T13:29:25.562825929-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"eliuv43y","id":"1(@)"} +{"time":"2026-03-24T13:29:25.589227984-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"yjc1lhkl","id":"1(@)"} +{"time":"2026-03-24T13:29:25.872707423-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"5uhcp1bu","id":"1(@)"} +{"time":"2026-03-24T13:29:26.077784103-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"zrhz20dw","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347215345-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347334264-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:35.347311435-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347442434-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347525883-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988874-2997275-4219790605/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:35.485867918-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:35.486051637-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:35.486311015-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988093-2997274-3761623073/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:35.486655283-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:35.486845412-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037426476-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037580545-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.037557775-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037864853-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988923-2998027-2125257262/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.037964663-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.180693602-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.180838011-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.180819861-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.18100239-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.18108012-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988906-2997875-3091809193/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.251602234-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:36.251670873-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:36.251692473-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:36.372019764-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372219503-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.372213063-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372521501-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372582451-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988637-2997300-777241408/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.032919698-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.033011367-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.033062727-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:37.107021871-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.107098891-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.107145341-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.306457371-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30656243-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30658601-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:42.21708758-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:42.217248489-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:42.217275609-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..57c91dde7bf58531ebd7c33bb2991e99cc348d8c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:20.317472038-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:25.406009304-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-776866033"} +{"time":"2026-03-24T13:29:25.562337432-07:00","level":"INFO","msg":"stream: created new stream","id":"eliuv43y"} +{"time":"2026-03-24T13:29:25.562618721-07:00","level":"INFO","msg":"handler: started","stream_id":"eliuv43y"} +{"time":"2026-03-24T13:29:25.562806969-07:00","level":"INFO","msg":"stream: started","id":"eliuv43y"} +{"time":"2026-03-24T13:29:25.563247407-07:00","level":"INFO","msg":"writer: started","stream_id":"eliuv43y"} +{"time":"2026-03-24T13:29:25.563433066-07:00","level":"INFO","msg":"sender: started","stream_id":"eliuv43y"} +{"time":"2026-03-24T13:29:35.347319475-07:00","level":"INFO","msg":"stream: closing","id":"eliuv43y"} +{"time":"2026-03-24T13:29:35.796150939-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:36.240914087-07:00","level":"INFO","msg":"handler: closed","stream_id":"eliuv43y"} +{"time":"2026-03-24T13:29:36.241151635-07:00","level":"INFO","msg":"sender: closed","stream_id":"eliuv43y"} +{"time":"2026-03-24T13:29:36.241170135-07:00","level":"INFO","msg":"stream: closed","id":"eliuv43y"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..00ab396de25f89ef1488fad31e2c0da09f7ddad3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:19,858 INFO MainThread:2988874 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:19,858 INFO MainThread:2988874 [wandb_setup.py:_flush():80] Configure stats pid to 2988874 +2026-03-24 13:29:19,858 INFO MainThread:2988874 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:19,858 INFO MainThread:2988874 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:19,859 INFO MainThread:2988874 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:19,859 INFO MainThread:2988874 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/logs/debug.log +2026-03-24 13:29:19,859 INFO MainThread:2988874 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/logs/debug-internal.log +2026-03-24 13:29:19,859 INFO MainThread:2988874 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:19,859 INFO MainThread:2988874 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:19,859 INFO MainThread:2988874 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:20,214 INFO MainThread:2988874 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:20,315 INFO MainThread:2988874 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:20,321 INFO MainThread:2988874 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:20,323 INFO MainThread:2988874 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:26,289 INFO MainThread:2988874 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:26,431 INFO MainThread:2988874 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:26,431 INFO MainThread:2988874 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:26,431 INFO MainThread:2988874 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:26,431 INFO MainThread:2988874 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:26,437 INFO MainThread:2988874 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:35,347 INFO wandb-AsyncioManager-main:2988874 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:35,347 INFO wandb-AsyncioManager-main:2988874 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/run-eliuv43y.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/run-eliuv43y.wandb new file mode 100644 index 0000000000000000000000000000000000000000..aac6c86e39ff0441ffe68e140db48ec52767debd Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-eliuv43y/run-eliuv43y.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf3a7c55d499a24562e768f60289fd62d84a32a2 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + zlmu8imsoiyd9zzdjup8gvxbk7dvfypv: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559222042624" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:19.732396Z" + writerId: zlmu8imsoiyd9zzdjup8gvxbk7dvfypv + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..cf9386400ce2c7a439ede5f28de4d7ae148e3b6b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.bias', 'classifier.bias', 'bert.pooler.dense.weight', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 44.00 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988245 has 0 bytes memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 272.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 294.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 374.00 MiB memory in use. Process 2988814 has 6.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 8.00 MiB memory in use. Process 2989070 has 6.00 MiB memory in use. Process 2989165 has 4.00 MiB memory in use. Process 2989068 has 4.00 MiB memory in use. Process 2990500 has 4.00 MiB memory in use. Process 2989449 has 4.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 4.00 MiB memory in use. Process 2988518 has 4.00 MiB memory in use. Process 2988326 has 4.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Process 2988861 has 10.00 MiB memory in use. Of the allocated memory 12.00 MiB is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 44.00 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988245 has 0 bytes memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 272.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 294.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 374.00 MiB memory in use. Process 2988814 has 6.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 8.00 MiB memory in use. Process 2989070 has 6.00 MiB memory in use. Process 2989165 has 4.00 MiB memory in use. Process 2989068 has 4.00 MiB memory in use. Process 2990500 has 4.00 MiB memory in use. Process 2989449 has 4.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 4.00 MiB memory in use. Process 2988518 has 4.00 MiB memory in use. Process 2988326 has 4.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Process 2988861 has 10.00 MiB memory in use. Of the allocated memory 12.00 MiB is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..76a2e1f8b6fe13f6233a4fe783f2841e5f5b89dd --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:19.732396Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559222042624" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "zlmu8imsoiyd9zzdjup8gvxbk7dvfypv" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..b83728ff622654da73d628f469e3fbb77e031a30 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":14},"_runtime":14} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..a7a2d763b70cfa885ae73c0a7ddd0be9aa630b62 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/logs/debug-core.log @@ -0,0 +1,28 @@ +{"time":"2026-03-24T13:29:19.254348558-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcvvxi5ex/port-2988116.txt","pid":2988116,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:19.257559219-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988116} +{"time":"2026-03-24T13:29:19.257518149-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988116-2997057-152810899/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:19.414838851-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:19.488163869-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"0pp4rdwo","id":"1(@)"} +{"time":"2026-03-24T13:29:19.887844472-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpe35592t4/port-2988554.txt","pid":2988554,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:19.891071573-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988554} +{"time":"2026-03-24T13:29:19.890998613-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988554-2997257-2276799086/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.056938925-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.129332018-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"foihfh1b","id":"1(@)"} +{"time":"2026-03-24T13:29:24.703840434-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"0pp4rdwo","id":"1(@)"} +{"time":"2026-03-24T13:29:25.406331122-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"foihfh1b","id":"1(@)"} +{"time":"2026-03-24T13:29:34.402875462-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:34.403062891-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:34.403069251-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:34.403293869-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988116-2997057-152810899/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:34.403326389-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:35.13780543-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:35.137881829-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:35.137912439-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:40.902553438-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:40.902795807-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:40.902830916-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:40.903034885-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:40.903514643-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988554-2997257-2276799086/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:44.973887931-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:44.97407156-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:44.97412328-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..8f0127daed00508f7350c9c6cdb692811c90b660 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:20.129496787-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:25.212055588-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-4164733909"} +{"time":"2026-03-24T13:29:25.405550567-07:00","level":"INFO","msg":"stream: created new stream","id":"foihfh1b"} +{"time":"2026-03-24T13:29:25.406112053-07:00","level":"INFO","msg":"handler: started","stream_id":"foihfh1b"} +{"time":"2026-03-24T13:29:25.406305752-07:00","level":"INFO","msg":"stream: started","id":"foihfh1b"} +{"time":"2026-03-24T13:29:25.406313812-07:00","level":"INFO","msg":"writer: started","stream_id":"foihfh1b"} +{"time":"2026-03-24T13:29:25.406333102-07:00","level":"INFO","msg":"sender: started","stream_id":"foihfh1b"} +{"time":"2026-03-24T13:29:40.902838256-07:00","level":"INFO","msg":"stream: closing","id":"foihfh1b"} +{"time":"2026-03-24T13:29:41.312053444-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:44.961818783-07:00","level":"INFO","msg":"handler: closed","stream_id":"foihfh1b"} +{"time":"2026-03-24T13:29:44.962380489-07:00","level":"INFO","msg":"sender: closed","stream_id":"foihfh1b"} +{"time":"2026-03-24T13:29:44.962414049-07:00","level":"INFO","msg":"stream: closed","id":"foihfh1b"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..23b124b10edf8c18571416c9ad76c0e1416da8c2 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:19,736 INFO MainThread:2988554 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:19,736 INFO MainThread:2988554 [wandb_setup.py:_flush():80] Configure stats pid to 2988554 +2026-03-24 13:29:19,736 INFO MainThread:2988554 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:19,736 INFO MainThread:2988554 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:19,736 INFO MainThread:2988554 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:19,736 INFO MainThread:2988554 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/logs/debug.log +2026-03-24 13:29:19,736 INFO MainThread:2988554 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/logs/debug-internal.log +2026-03-24 13:29:19,736 INFO MainThread:2988554 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:19,736 INFO MainThread:2988554 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:19,736 INFO MainThread:2988554 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:20,057 INFO MainThread:2988554 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:20,125 INFO MainThread:2988554 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:20,136 INFO MainThread:2988554 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:20,137 INFO MainThread:2988554 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:26,049 INFO MainThread:2988554 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:26,196 INFO MainThread:2988554 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:26,197 INFO MainThread:2988554 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:26,197 INFO MainThread:2988554 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:26,197 INFO MainThread:2988554 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:26,203 INFO MainThread:2988554 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:40,902 INFO wandb-AsyncioManager-main:2988554 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:40,903 INFO wandb-AsyncioManager-main:2988554 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/run-foihfh1b.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/run-foihfh1b.wandb new file mode 100644 index 0000000000000000000000000000000000000000..6d98d8f15742f89487285ceb9553660c508f5b6a Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-foihfh1b/run-foihfh1b.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f61e38b9ac28ccd8a2977ba6b95f7fdbbeb94bac --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + 3zvyhqg5u68f6xhg5o50v2ll7xp1vsx5: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559222075392" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:19.846430Z" + writerId: 3zvyhqg5u68f6xhg5o50v2ll7xp1vsx5 + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..cc8d25437b36bfd1cfc31552ac2ae16e1c19cd59 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'bert.pooler.dense.bias', 'classifier.bias', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 29.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 8.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 29.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 260.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 8.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 10.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4e7c96ef3ec16f28b38d5074fc715af6d55df636 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:19.846430Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559222075392" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "3zvyhqg5u68f6xhg5o50v2ll7xp1vsx5" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..49f1c97bdc4f4b441dcae913d01d8e867efca440 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":9,"_wandb":{"runtime":9}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..c6a849eb6203bbca1b49abeeb05546472054fccd --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/logs/debug-core.log @@ -0,0 +1,70 @@ +{"time":"2026-03-24T13:29:20.03742456-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoc28hlmr/port-2988093.txt","pid":2988093,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.038738382-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9uquarlq/port-2988874.txt","pid":2988874,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.039003841-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988093} +{"time":"2026-03-24T13:29:20.039025901-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988093-2997274-3761623073/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.04069919-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988874} +{"time":"2026-03-24T13:29:20.040718421-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988874-2997275-4219790605/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.124433377-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpze5mtr29/port-2988637.txt","pid":2988637,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.128053085-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988637} +{"time":"2026-03-24T13:29:20.128019566-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988637-2997300-777241408/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.205079181-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.213519842-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.266985147-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"ku0mi14u","id":"1(@)"} +{"time":"2026-03-24T13:29:20.295658037-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.3172018-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"eliuv43y","id":"1(@)"} +{"time":"2026-03-24T13:29:20.379313944-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"yjc1lhkl","id":"1(@)"} +{"time":"2026-03-24T13:29:20.444005412-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmplbn8udml/port-2988906.txt","pid":2988906,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.445893061-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988906} +{"time":"2026-03-24T13:29:20.445883732-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988906-2997875-3091809193/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.630082905-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.648636296-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqmfui_f4/port-2988923.txt","pid":2988923,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.650975252-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988923} +{"time":"2026-03-24T13:29:20.650965532-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988923-2998027-2125257262/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.677245977-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"5uhcp1bu","id":"1(@)"} +{"time":"2026-03-24T13:29:20.83285716-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.881265634-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"zrhz20dw","id":"1(@)"} +{"time":"2026-03-24T13:29:25.468460486-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ku0mi14u","id":"1(@)"} +{"time":"2026-03-24T13:29:25.562825929-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"eliuv43y","id":"1(@)"} +{"time":"2026-03-24T13:29:25.589227984-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"yjc1lhkl","id":"1(@)"} +{"time":"2026-03-24T13:29:25.872707423-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"5uhcp1bu","id":"1(@)"} +{"time":"2026-03-24T13:29:26.077784103-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"zrhz20dw","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347215345-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347334264-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:35.347311435-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347442434-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347525883-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988874-2997275-4219790605/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:35.485867918-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:35.486051637-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:35.486311015-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988093-2997274-3761623073/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:35.486655283-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:35.486845412-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037426476-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037580545-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.037557775-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037864853-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988923-2998027-2125257262/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.037964663-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.180693602-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.180838011-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.180819861-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.18100239-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.18108012-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988906-2997875-3091809193/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.251602234-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:36.251670873-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:36.251692473-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:36.372019764-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372219503-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.372213063-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372521501-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372582451-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988637-2997300-777241408/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.032919698-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.033011367-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.033062727-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:37.107021871-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.107098891-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.107145341-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.306457371-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30656243-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30658601-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:42.21708758-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:42.217248489-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:42.217275609-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..79c5753c37722ad365569003a0a9ce6bf8ceb8ee --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:20.267451384-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:25.36097182-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-3942236552"} +{"time":"2026-03-24T13:29:25.468057748-07:00","level":"INFO","msg":"stream: created new stream","id":"ku0mi14u"} +{"time":"2026-03-24T13:29:25.468184127-07:00","level":"INFO","msg":"handler: started","stream_id":"ku0mi14u"} +{"time":"2026-03-24T13:29:25.468448076-07:00","level":"INFO","msg":"stream: started","id":"ku0mi14u"} +{"time":"2026-03-24T13:29:25.468475806-07:00","level":"INFO","msg":"writer: started","stream_id":"ku0mi14u"} +{"time":"2026-03-24T13:29:25.468474616-07:00","level":"INFO","msg":"sender: started","stream_id":"ku0mi14u"} +{"time":"2026-03-24T13:29:35.486015007-07:00","level":"INFO","msg":"stream: closing","id":"ku0mi14u"} +{"time":"2026-03-24T13:29:35.939735432-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:38.25564508-07:00","level":"INFO","msg":"handler: closed","stream_id":"ku0mi14u"} +{"time":"2026-03-24T13:29:38.255883329-07:00","level":"INFO","msg":"sender: closed","stream_id":"ku0mi14u"} +{"time":"2026-03-24T13:29:38.255904029-07:00","level":"INFO","msg":"stream: closed","id":"ku0mi14u"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..8aa5162632ecff3eadedc2f474dccff00b69ebf7 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:19,854 INFO MainThread:2988093 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:19,854 INFO MainThread:2988093 [wandb_setup.py:_flush():80] Configure stats pid to 2988093 +2026-03-24 13:29:19,854 INFO MainThread:2988093 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:19,854 INFO MainThread:2988093 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:19,854 INFO MainThread:2988093 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:19,854 INFO MainThread:2988093 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/logs/debug.log +2026-03-24 13:29:19,855 INFO MainThread:2988093 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/logs/debug-internal.log +2026-03-24 13:29:19,855 INFO MainThread:2988093 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:19,855 INFO MainThread:2988093 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:19,855 INFO MainThread:2988093 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:20,205 INFO MainThread:2988093 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:20,255 INFO MainThread:2988093 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:20,263 INFO MainThread:2988093 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:20,264 INFO MainThread:2988093 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:26,101 INFO MainThread:2988093 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:26,253 INFO MainThread:2988093 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:26,253 INFO MainThread:2988093 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:26,253 INFO MainThread:2988093 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:26,254 INFO MainThread:2988093 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:26,262 INFO MainThread:2988093 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:35,481 INFO wandb-AsyncioManager-main:2988093 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:35,482 INFO wandb-AsyncioManager-main:2988093 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/run-ku0mi14u.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/run-ku0mi14u.wandb new file mode 100644 index 0000000000000000000000000000000000000000..ab77566701f8cf8ee525c312189e22625a09e056 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-ku0mi14u/run-ku0mi14u.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4131317e25886e82aa27c2358d043ce6ed14df7b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + 9dxh2vckm0wurdnq6coiu2igm9lk14np: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559222075392" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:19.942497Z" + writerId: 9dxh2vckm0wurdnq6coiu2igm9lk14np + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..c00e49dbf1d60af07d2f12ab83dd04ea69548787 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'classifier.weight', 'classifier.bias', 'bert.pooler.dense.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 8.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 8.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..748d63552f30c721084bed228f43d190dc930923 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:19.942497Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559222075392" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "9dxh2vckm0wurdnq6coiu2igm9lk14np" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..379fe911e23a828fc54c3b9736a610509e9c45f0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":10},"_runtime":10} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..c6a849eb6203bbca1b49abeeb05546472054fccd --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/logs/debug-core.log @@ -0,0 +1,70 @@ +{"time":"2026-03-24T13:29:20.03742456-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoc28hlmr/port-2988093.txt","pid":2988093,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.038738382-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9uquarlq/port-2988874.txt","pid":2988874,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.039003841-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988093} +{"time":"2026-03-24T13:29:20.039025901-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988093-2997274-3761623073/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.04069919-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988874} +{"time":"2026-03-24T13:29:20.040718421-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988874-2997275-4219790605/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.124433377-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpze5mtr29/port-2988637.txt","pid":2988637,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.128053085-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988637} +{"time":"2026-03-24T13:29:20.128019566-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988637-2997300-777241408/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.205079181-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.213519842-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.266985147-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"ku0mi14u","id":"1(@)"} +{"time":"2026-03-24T13:29:20.295658037-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.3172018-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"eliuv43y","id":"1(@)"} +{"time":"2026-03-24T13:29:20.379313944-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"yjc1lhkl","id":"1(@)"} +{"time":"2026-03-24T13:29:20.444005412-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmplbn8udml/port-2988906.txt","pid":2988906,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.445893061-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988906} +{"time":"2026-03-24T13:29:20.445883732-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988906-2997875-3091809193/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.630082905-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.648636296-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqmfui_f4/port-2988923.txt","pid":2988923,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.650975252-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988923} +{"time":"2026-03-24T13:29:20.650965532-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988923-2998027-2125257262/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.677245977-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"5uhcp1bu","id":"1(@)"} +{"time":"2026-03-24T13:29:20.83285716-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.881265634-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"zrhz20dw","id":"1(@)"} +{"time":"2026-03-24T13:29:25.468460486-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ku0mi14u","id":"1(@)"} +{"time":"2026-03-24T13:29:25.562825929-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"eliuv43y","id":"1(@)"} +{"time":"2026-03-24T13:29:25.589227984-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"yjc1lhkl","id":"1(@)"} +{"time":"2026-03-24T13:29:25.872707423-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"5uhcp1bu","id":"1(@)"} +{"time":"2026-03-24T13:29:26.077784103-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"zrhz20dw","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347215345-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347334264-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:35.347311435-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347442434-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347525883-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988874-2997275-4219790605/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:35.485867918-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:35.486051637-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:35.486311015-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988093-2997274-3761623073/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:35.486655283-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:35.486845412-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037426476-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037580545-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.037557775-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037864853-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988923-2998027-2125257262/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.037964663-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.180693602-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.180838011-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.180819861-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.18100239-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.18108012-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988906-2997875-3091809193/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.251602234-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:36.251670873-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:36.251692473-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:36.372019764-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372219503-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.372213063-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372521501-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372582451-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988637-2997300-777241408/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.032919698-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.033011367-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.033062727-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:37.107021871-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.107098891-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.107145341-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.306457371-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30656243-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30658601-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:42.21708758-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:42.217248489-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:42.217275609-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..19ce5dd17f2c8a46544d2214ae07ad67c5308cb3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:20.379603162-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:25.459603878-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-4006030205"} +{"time":"2026-03-24T13:29:25.588504298-07:00","level":"INFO","msg":"stream: created new stream","id":"yjc1lhkl"} +{"time":"2026-03-24T13:29:25.588778627-07:00","level":"INFO","msg":"handler: started","stream_id":"yjc1lhkl"} +{"time":"2026-03-24T13:29:25.589200234-07:00","level":"INFO","msg":"stream: started","id":"yjc1lhkl"} +{"time":"2026-03-24T13:29:25.590137209-07:00","level":"INFO","msg":"writer: started","stream_id":"yjc1lhkl"} +{"time":"2026-03-24T13:29:25.590292338-07:00","level":"INFO","msg":"sender: started","stream_id":"yjc1lhkl"} +{"time":"2026-03-24T13:29:36.372410932-07:00","level":"INFO","msg":"stream: closing","id":"yjc1lhkl"} +{"time":"2026-03-24T13:29:36.804218056-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:42.090099158-07:00","level":"INFO","msg":"handler: closed","stream_id":"yjc1lhkl"} +{"time":"2026-03-24T13:29:42.090638935-07:00","level":"INFO","msg":"sender: closed","stream_id":"yjc1lhkl"} +{"time":"2026-03-24T13:29:42.090660045-07:00","level":"INFO","msg":"stream: closed","id":"yjc1lhkl"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..aaea57304602dc3126764f5b88e8c45b8ce76f6c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:19,949 INFO MainThread:2988637 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:19,949 INFO MainThread:2988637 [wandb_setup.py:_flush():80] Configure stats pid to 2988637 +2026-03-24 13:29:19,950 INFO MainThread:2988637 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:19,950 INFO MainThread:2988637 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:19,950 INFO MainThread:2988637 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:19,950 INFO MainThread:2988637 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/logs/debug.log +2026-03-24 13:29:19,950 INFO MainThread:2988637 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/logs/debug-internal.log +2026-03-24 13:29:19,950 INFO MainThread:2988637 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:19,950 INFO MainThread:2988637 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:19,950 INFO MainThread:2988637 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:20,296 INFO MainThread:2988637 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:20,370 INFO MainThread:2988637 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:20,378 INFO MainThread:2988637 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:20,380 INFO MainThread:2988637 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:26,167 INFO MainThread:2988637 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:26,308 INFO MainThread:2988637 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:26,308 INFO MainThread:2988637 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:26,308 INFO MainThread:2988637 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:26,308 INFO MainThread:2988637 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:26,314 INFO MainThread:2988637 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:36,370 INFO wandb-AsyncioManager-main:2988637 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:36,370 INFO wandb-AsyncioManager-main:2988637 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/run-yjc1lhkl.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/run-yjc1lhkl.wandb new file mode 100644 index 0000000000000000000000000000000000000000..0ed3da853f38f55efb61c972411025157cb332eb Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132919-yjc1lhkl/run-yjc1lhkl.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f03db5562e9bc62ae035e53d1a6e5217b55b5a09 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + wqe6bvqpfbqttcc8rxbewdx20m6nqwki: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559222091776" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:20.328107Z" + writerId: wqe6bvqpfbqttcc8rxbewdx20m6nqwki + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7a434d47e7530be71eb41de7d771e0bdfb0cd1c8 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.bias', 'bert.pooler.dense.bias', 'classifier.weight', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 8.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 8.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..c42e8ba83e870a4e10bb2cf6a7a539abddf3e781 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:20.328107Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559222091776" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "wqe6bvqpfbqttcc8rxbewdx20m6nqwki" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..f4ec3806c8e5abda8d70342c64624c8cf8f81552 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":9},"_runtime":9} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..c6a849eb6203bbca1b49abeeb05546472054fccd --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/logs/debug-core.log @@ -0,0 +1,70 @@ +{"time":"2026-03-24T13:29:20.03742456-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoc28hlmr/port-2988093.txt","pid":2988093,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.038738382-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9uquarlq/port-2988874.txt","pid":2988874,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.039003841-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988093} +{"time":"2026-03-24T13:29:20.039025901-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988093-2997274-3761623073/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.04069919-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988874} +{"time":"2026-03-24T13:29:20.040718421-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988874-2997275-4219790605/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.124433377-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpze5mtr29/port-2988637.txt","pid":2988637,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.128053085-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988637} +{"time":"2026-03-24T13:29:20.128019566-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988637-2997300-777241408/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.205079181-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.213519842-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.266985147-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"ku0mi14u","id":"1(@)"} +{"time":"2026-03-24T13:29:20.295658037-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.3172018-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"eliuv43y","id":"1(@)"} +{"time":"2026-03-24T13:29:20.379313944-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"yjc1lhkl","id":"1(@)"} +{"time":"2026-03-24T13:29:20.444005412-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmplbn8udml/port-2988906.txt","pid":2988906,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.445893061-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988906} +{"time":"2026-03-24T13:29:20.445883732-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988906-2997875-3091809193/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.630082905-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.648636296-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqmfui_f4/port-2988923.txt","pid":2988923,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.650975252-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988923} +{"time":"2026-03-24T13:29:20.650965532-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988923-2998027-2125257262/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.677245977-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"5uhcp1bu","id":"1(@)"} +{"time":"2026-03-24T13:29:20.83285716-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.881265634-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"zrhz20dw","id":"1(@)"} +{"time":"2026-03-24T13:29:25.468460486-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ku0mi14u","id":"1(@)"} +{"time":"2026-03-24T13:29:25.562825929-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"eliuv43y","id":"1(@)"} +{"time":"2026-03-24T13:29:25.589227984-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"yjc1lhkl","id":"1(@)"} +{"time":"2026-03-24T13:29:25.872707423-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"5uhcp1bu","id":"1(@)"} +{"time":"2026-03-24T13:29:26.077784103-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"zrhz20dw","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347215345-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347334264-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:35.347311435-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347442434-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347525883-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988874-2997275-4219790605/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:35.485867918-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:35.486051637-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:35.486311015-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988093-2997274-3761623073/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:35.486655283-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:35.486845412-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037426476-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037580545-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.037557775-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037864853-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988923-2998027-2125257262/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.037964663-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.180693602-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.180838011-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.180819861-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.18100239-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.18108012-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988906-2997875-3091809193/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.251602234-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:36.251670873-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:36.251692473-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:36.372019764-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372219503-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.372213063-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372521501-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372582451-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988637-2997300-777241408/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.032919698-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.033011367-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.033062727-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:37.107021871-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.107098891-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.107145341-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.306457371-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30656243-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30658601-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:42.21708758-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:42.217248489-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:42.217275609-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c9a7f2ad9c7c39b26eb5b1e0527f95a50d6366a9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:20.677486715-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:25.750553783-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-4136117656"} +{"time":"2026-03-24T13:29:25.872253755-07:00","level":"INFO","msg":"stream: created new stream","id":"5uhcp1bu"} +{"time":"2026-03-24T13:29:25.872444544-07:00","level":"INFO","msg":"handler: started","stream_id":"5uhcp1bu"} +{"time":"2026-03-24T13:29:25.872692483-07:00","level":"INFO","msg":"stream: started","id":"5uhcp1bu"} +{"time":"2026-03-24T13:29:25.872711073-07:00","level":"INFO","msg":"writer: started","stream_id":"5uhcp1bu"} +{"time":"2026-03-24T13:29:25.872735422-07:00","level":"INFO","msg":"sender: started","stream_id":"5uhcp1bu"} +{"time":"2026-03-24T13:29:36.180872951-07:00","level":"INFO","msg":"stream: closing","id":"5uhcp1bu"} +{"time":"2026-03-24T13:29:36.61195407-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:37.096866031-07:00","level":"INFO","msg":"handler: closed","stream_id":"5uhcp1bu"} +{"time":"2026-03-24T13:29:37.09708685-07:00","level":"INFO","msg":"sender: closed","stream_id":"5uhcp1bu"} +{"time":"2026-03-24T13:29:37.097125629-07:00","level":"INFO","msg":"stream: closed","id":"5uhcp1bu"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..50ac3bdf000ff164a49cefbacf5b62885b42ede6 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:20,332 INFO MainThread:2988906 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:20,332 INFO MainThread:2988906 [wandb_setup.py:_flush():80] Configure stats pid to 2988906 +2026-03-24 13:29:20,332 INFO MainThread:2988906 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:20,332 INFO MainThread:2988906 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:20,332 INFO MainThread:2988906 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:20,332 INFO MainThread:2988906 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/logs/debug.log +2026-03-24 13:29:20,332 INFO MainThread:2988906 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/logs/debug-internal.log +2026-03-24 13:29:20,332 INFO MainThread:2988906 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:20,333 INFO MainThread:2988906 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:20,333 INFO MainThread:2988906 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:20,630 INFO MainThread:2988906 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:20,675 INFO MainThread:2988906 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:20,681 INFO MainThread:2988906 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:20,682 INFO MainThread:2988906 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:26,367 INFO MainThread:2988906 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:26,517 INFO MainThread:2988906 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:26,517 INFO MainThread:2988906 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:26,517 INFO MainThread:2988906 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:26,517 INFO MainThread:2988906 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:26,524 INFO MainThread:2988906 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:36,180 INFO wandb-AsyncioManager-main:2988906 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:36,180 INFO wandb-AsyncioManager-main:2988906 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/run-5uhcp1bu.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/run-5uhcp1bu.wandb new file mode 100644 index 0000000000000000000000000000000000000000..ff7605f9295c41095138407ef0da42709e1de46f Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-5uhcp1bu/run-5uhcp1bu.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d3e93a47902519b90c2d08bb4449e44faed3137 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + b53l7vm7wpfvya02l5ob2eck7ok14wf5: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559222128640" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:20.524494Z" + writerId: b53l7vm7wpfvya02l5ob2eck7ok14wf5 + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..83552112b1e9628018d9b178a2bdf821c715c21b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 8.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 12.00 MiB is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 8.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 12.00 MiB is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a43e7648ea14b38cd4fd1091685320e070ba6a8f --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:20.524494Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559222128640" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "b53l7vm7wpfvya02l5ob2eck7ok14wf5" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..f4ec3806c8e5abda8d70342c64624c8cf8f81552 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":9},"_runtime":9} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..c6a849eb6203bbca1b49abeeb05546472054fccd --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/logs/debug-core.log @@ -0,0 +1,70 @@ +{"time":"2026-03-24T13:29:20.03742456-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoc28hlmr/port-2988093.txt","pid":2988093,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.038738382-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9uquarlq/port-2988874.txt","pid":2988874,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.039003841-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988093} +{"time":"2026-03-24T13:29:20.039025901-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988093-2997274-3761623073/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.04069919-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988874} +{"time":"2026-03-24T13:29:20.040718421-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988874-2997275-4219790605/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.124433377-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpze5mtr29/port-2988637.txt","pid":2988637,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.128053085-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988637} +{"time":"2026-03-24T13:29:20.128019566-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988637-2997300-777241408/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.205079181-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.213519842-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.266985147-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"ku0mi14u","id":"1(@)"} +{"time":"2026-03-24T13:29:20.295658037-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.3172018-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"eliuv43y","id":"1(@)"} +{"time":"2026-03-24T13:29:20.379313944-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"yjc1lhkl","id":"1(@)"} +{"time":"2026-03-24T13:29:20.444005412-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmplbn8udml/port-2988906.txt","pid":2988906,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.445893061-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988906} +{"time":"2026-03-24T13:29:20.445883732-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988906-2997875-3091809193/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.630082905-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.648636296-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqmfui_f4/port-2988923.txt","pid":2988923,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:20.650975252-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988923} +{"time":"2026-03-24T13:29:20.650965532-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988923-2998027-2125257262/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:20.677245977-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"5uhcp1bu","id":"1(@)"} +{"time":"2026-03-24T13:29:20.83285716-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:20.881265634-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"zrhz20dw","id":"1(@)"} +{"time":"2026-03-24T13:29:25.468460486-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ku0mi14u","id":"1(@)"} +{"time":"2026-03-24T13:29:25.562825929-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"eliuv43y","id":"1(@)"} +{"time":"2026-03-24T13:29:25.589227984-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"yjc1lhkl","id":"1(@)"} +{"time":"2026-03-24T13:29:25.872707423-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"5uhcp1bu","id":"1(@)"} +{"time":"2026-03-24T13:29:26.077784103-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"zrhz20dw","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347215345-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347334264-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:35.347311435-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347442434-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:35.347525883-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988874-2997275-4219790605/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:35.485867918-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:35.486051637-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:35.486311015-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988093-2997274-3761623073/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:35.486655283-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:35.486845412-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037426476-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037580545-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.037557775-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.037864853-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988923-2998027-2125257262/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.037964663-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.180693602-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.180838011-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.180819861-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.18100239-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.18108012-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988906-2997875-3091809193/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.251602234-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:36.251670873-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:36.251692473-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:36.372019764-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372219503-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.372213063-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372521501-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.372582451-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988637-2997300-777241408/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.032919698-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.033011367-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.033062727-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:37.107021871-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.107098891-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.107145341-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.306457371-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30656243-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.30658601-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:42.21708758-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:42.217248489-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:42.217275609-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..cee08934c4b0f2035000d34689ed7a4356e21d97 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:20.881555672-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:25.941047899-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-2262663921"} +{"time":"2026-03-24T13:29:26.077292036-07:00","level":"INFO","msg":"stream: created new stream","id":"zrhz20dw"} +{"time":"2026-03-24T13:29:26.077575094-07:00","level":"INFO","msg":"handler: started","stream_id":"zrhz20dw"} +{"time":"2026-03-24T13:29:26.077768953-07:00","level":"INFO","msg":"stream: started","id":"zrhz20dw"} +{"time":"2026-03-24T13:29:26.078195191-07:00","level":"INFO","msg":"writer: started","stream_id":"zrhz20dw"} +{"time":"2026-03-24T13:29:26.078239241-07:00","level":"INFO","msg":"sender: started","stream_id":"zrhz20dw"} +{"time":"2026-03-24T13:29:36.037571815-07:00","level":"INFO","msg":"stream: closing","id":"zrhz20dw"} +{"time":"2026-03-24T13:29:36.4608591-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:37.024578187-07:00","level":"INFO","msg":"handler: closed","stream_id":"zrhz20dw"} +{"time":"2026-03-24T13:29:37.024825376-07:00","level":"INFO","msg":"sender: closed","stream_id":"zrhz20dw"} +{"time":"2026-03-24T13:29:37.024858876-07:00","level":"INFO","msg":"stream: closed","id":"zrhz20dw"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..5834aac1bf9342f33c21597278a4045fbf79fb23 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:20,529 INFO MainThread:2988923 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:20,529 INFO MainThread:2988923 [wandb_setup.py:_flush():80] Configure stats pid to 2988923 +2026-03-24 13:29:20,529 INFO MainThread:2988923 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:20,529 INFO MainThread:2988923 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:20,529 INFO MainThread:2988923 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:20,529 INFO MainThread:2988923 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/logs/debug.log +2026-03-24 13:29:20,529 INFO MainThread:2988923 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/logs/debug-internal.log +2026-03-24 13:29:20,530 INFO MainThread:2988923 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:20,530 INFO MainThread:2988923 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:20,530 INFO MainThread:2988923 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:20,833 INFO MainThread:2988923 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:20,879 INFO MainThread:2988923 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:20,886 INFO MainThread:2988923 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:20,887 INFO MainThread:2988923 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:26,475 INFO MainThread:2988923 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:26,619 INFO MainThread:2988923 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:26,619 INFO MainThread:2988923 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:26,619 INFO MainThread:2988923 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:26,620 INFO MainThread:2988923 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:26,625 INFO MainThread:2988923 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:36,037 INFO wandb-AsyncioManager-main:2988923 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:36,038 INFO wandb-AsyncioManager-main:2988923 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/run-zrhz20dw.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/run-zrhz20dw.wandb new file mode 100644 index 0000000000000000000000000000000000000000..4e63606f16861d469661ac33d50464b050d51521 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132920-zrhz20dw/run-zrhz20dw.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..024c9d369584bd8d056d5cc9e3dd348114239449 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + k6z13ciajq7oytbx0jjxz972n6zocyrk: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559222145024" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:22.350179Z" + writerId: k6z13ciajq7oytbx0jjxz972n6zocyrk + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..752fa48cba249cd9231a8c21731d1926a6d98aea --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight', 'bert.pooler.dense.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 27.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 27.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..556837771b34926d11342c0268fb49b4fee1af52 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:22.350179Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559222145024" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "k6z13ciajq7oytbx0jjxz972n6zocyrk" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..2320ce95111d4a7914cdfba4a4e05238d5d8f179 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":8},"_runtime":8} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..f3c7306f2d91c07d67aa689ffc4a0f343c8f95ea --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/logs/debug-core.log @@ -0,0 +1,56 @@ +{"time":"2026-03-24T13:29:22.284047562-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpav27xyq1/port-2988286.txt","pid":2988286,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.285625233-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988286} +{"time":"2026-03-24T13:29:22.285588633-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988286-2998625-3155261872/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.468117997-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.494433712-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp1tkse6w3/port-2989072.txt","pid":2989072,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.495445966-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2989072} +{"time":"2026-03-24T13:29:22.495431786-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2989072-2998645-3328255346/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.495853584-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmc89vteo/port-2988736.txt","pid":2988736,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.496759738-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988736} +{"time":"2026-03-24T13:29:22.497413744-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988736-2998644-2395824398/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.511369532-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpdfnqxozc/port-2988448.txt","pid":2988448,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.512582405-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988448} +{"time":"2026-03-24T13:29:22.512540005-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988448-2998647-1262891246/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.543414403-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"768ms08v","id":"1(@)"} +{"time":"2026-03-24T13:29:22.677281384-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.678732355-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.689293983-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.728188143-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"pwb2s09u","id":"1(@)"} +{"time":"2026-03-24T13:29:22.730618369-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"kw7l95l5","id":"1(@)"} +{"time":"2026-03-24T13:29:22.741889473-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"30huih24","id":"1(@)"} +{"time":"2026-03-24T13:29:27.758310425-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"768ms08v","id":"1(@)"} +{"time":"2026-03-24T13:29:27.911334083-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"kw7l95l5","id":"1(@)"} +{"time":"2026-03-24T13:29:27.92876088-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"pwb2s09u","id":"1(@)"} +{"time":"2026-03-24T13:29:27.951134668-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"30huih24","id":"1(@)"} +{"time":"2026-03-24T13:29:36.827010682-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.827111841-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.827097321-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.82730241-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988286-2998625-3155261872/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.82736769-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.88339996-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.883488939-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.883478029-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.883638928-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988736-2998644-2395824398/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.883701208-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933077976-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933179796-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.933166776-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933402095-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933425005-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988448-2998647-1262891246/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.00036064-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000492309-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:37.000485809-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000715598-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000790127-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2989072-2998645-3328255346/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.723219699-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.723317478-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.723373088-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.804777103-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.804830543-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.804844043-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:40.954475202-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:40.954669681-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:40.954684581-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:53.48518493-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:53.48524762-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:53.48527483-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..1f6099c338849b7589519b4d0304234d361ceb1b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:22.742141081-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:27.812949263-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-318165640"} +{"time":"2026-03-24T13:29:27.950318353-07:00","level":"INFO","msg":"stream: created new stream","id":"30huih24"} +{"time":"2026-03-24T13:29:27.950540662-07:00","level":"INFO","msg":"handler: started","stream_id":"30huih24"} +{"time":"2026-03-24T13:29:27.951033049-07:00","level":"INFO","msg":"sender: started","stream_id":"30huih24"} +{"time":"2026-03-24T13:29:27.951079798-07:00","level":"INFO","msg":"writer: started","stream_id":"30huih24"} +{"time":"2026-03-24T13:29:27.950990829-07:00","level":"INFO","msg":"stream: started","id":"30huih24"} +{"time":"2026-03-24T13:29:36.933198066-07:00","level":"INFO","msg":"stream: closing","id":"30huih24"} +{"time":"2026-03-24T13:29:37.332370433-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:38.799376296-07:00","level":"INFO","msg":"handler: closed","stream_id":"30huih24"} +{"time":"2026-03-24T13:29:38.799656624-07:00","level":"INFO","msg":"sender: closed","stream_id":"30huih24"} +{"time":"2026-03-24T13:29:38.799674924-07:00","level":"INFO","msg":"stream: closed","id":"30huih24"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..6cb2f193c6b6f26c178a01174c6020a19a888ef3 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:22,357 INFO MainThread:2988448 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:22,357 INFO MainThread:2988448 [wandb_setup.py:_flush():80] Configure stats pid to 2988448 +2026-03-24 13:29:22,357 INFO MainThread:2988448 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:22,357 INFO MainThread:2988448 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:22,357 INFO MainThread:2988448 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:22,357 INFO MainThread:2988448 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/logs/debug.log +2026-03-24 13:29:22,357 INFO MainThread:2988448 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/logs/debug-internal.log +2026-03-24 13:29:22,357 INFO MainThread:2988448 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:22,358 INFO MainThread:2988448 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:22,358 INFO MainThread:2988448 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:22,690 INFO MainThread:2988448 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:22,740 INFO MainThread:2988448 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:22,748 INFO MainThread:2988448 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:22,749 INFO MainThread:2988448 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:28,555 INFO MainThread:2988448 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:28,743 INFO MainThread:2988448 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:28,744 INFO MainThread:2988448 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:28,745 INFO MainThread:2988448 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:28,746 INFO MainThread:2988448 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:28,753 INFO MainThread:2988448 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:36,933 INFO wandb-AsyncioManager-main:2988448 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:36,933 INFO wandb-AsyncioManager-main:2988448 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/run-30huih24.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/run-30huih24.wandb new file mode 100644 index 0000000000000000000000000000000000000000..d1217dcc5a2343e3c4d92c9eeae2114d4b535a75 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-30huih24/run-30huih24.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..afb61a04989a3cf36396a25cf2e61cade270e8d0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + rcgqxtwzxxcn4en1iqdj3f9wwb9uvux1: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559222145024" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:22.152916Z" + writerId: rcgqxtwzxxcn4en1iqdj3f9wwb9uvux1 + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..b413af2f3def7b984d6ca9792da7a837fd3a6770 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.bias', 'classifier.weight', 'bert.pooler.dense.weight', 'classifier.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 23.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..d2ce2718bd0422e4fe2f393224b28a623c5fa4c5 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:22.152916Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559222145024" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "rcgqxtwzxxcn4en1iqdj3f9wwb9uvux1" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..2320ce95111d4a7914cdfba4a4e05238d5d8f179 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":8},"_runtime":8} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..f3c7306f2d91c07d67aa689ffc4a0f343c8f95ea --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/logs/debug-core.log @@ -0,0 +1,56 @@ +{"time":"2026-03-24T13:29:22.284047562-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpav27xyq1/port-2988286.txt","pid":2988286,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.285625233-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988286} +{"time":"2026-03-24T13:29:22.285588633-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988286-2998625-3155261872/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.468117997-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.494433712-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp1tkse6w3/port-2989072.txt","pid":2989072,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.495445966-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2989072} +{"time":"2026-03-24T13:29:22.495431786-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2989072-2998645-3328255346/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.495853584-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmc89vteo/port-2988736.txt","pid":2988736,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.496759738-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988736} +{"time":"2026-03-24T13:29:22.497413744-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988736-2998644-2395824398/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.511369532-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpdfnqxozc/port-2988448.txt","pid":2988448,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.512582405-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988448} +{"time":"2026-03-24T13:29:22.512540005-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988448-2998647-1262891246/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.543414403-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"768ms08v","id":"1(@)"} +{"time":"2026-03-24T13:29:22.677281384-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.678732355-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.689293983-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.728188143-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"pwb2s09u","id":"1(@)"} +{"time":"2026-03-24T13:29:22.730618369-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"kw7l95l5","id":"1(@)"} +{"time":"2026-03-24T13:29:22.741889473-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"30huih24","id":"1(@)"} +{"time":"2026-03-24T13:29:27.758310425-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"768ms08v","id":"1(@)"} +{"time":"2026-03-24T13:29:27.911334083-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"kw7l95l5","id":"1(@)"} +{"time":"2026-03-24T13:29:27.92876088-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"pwb2s09u","id":"1(@)"} +{"time":"2026-03-24T13:29:27.951134668-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"30huih24","id":"1(@)"} +{"time":"2026-03-24T13:29:36.827010682-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.827111841-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.827097321-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.82730241-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988286-2998625-3155261872/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.82736769-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.88339996-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.883488939-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.883478029-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.883638928-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988736-2998644-2395824398/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.883701208-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933077976-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933179796-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.933166776-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933402095-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933425005-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988448-2998647-1262891246/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.00036064-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000492309-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:37.000485809-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000715598-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000790127-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2989072-2998645-3328255346/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.723219699-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.723317478-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.723373088-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.804777103-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.804830543-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.804844043-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:40.954475202-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:40.954669681-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:40.954684581-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:53.48518493-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:53.48524762-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:53.48527483-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..0f2ae9c6692057bbe3593e896848d62679d4e4b9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:22.543637792-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:27.596799937-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-2575439518"} +{"time":"2026-03-24T13:29:27.757858458-07:00","level":"INFO","msg":"stream: created new stream","id":"768ms08v"} +{"time":"2026-03-24T13:29:27.758031287-07:00","level":"INFO","msg":"handler: started","stream_id":"768ms08v"} +{"time":"2026-03-24T13:29:27.758294005-07:00","level":"INFO","msg":"stream: started","id":"768ms08v"} +{"time":"2026-03-24T13:29:27.758316785-07:00","level":"INFO","msg":"writer: started","stream_id":"768ms08v"} +{"time":"2026-03-24T13:29:27.758326705-07:00","level":"INFO","msg":"sender: started","stream_id":"768ms08v"} +{"time":"2026-03-24T13:29:36.827105631-07:00","level":"INFO","msg":"stream: closing","id":"768ms08v"} +{"time":"2026-03-24T13:29:37.211656885-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:53.482659455-07:00","level":"INFO","msg":"handler: closed","stream_id":"768ms08v"} +{"time":"2026-03-24T13:29:53.482870934-07:00","level":"INFO","msg":"sender: closed","stream_id":"768ms08v"} +{"time":"2026-03-24T13:29:53.482892644-07:00","level":"INFO","msg":"stream: closed","id":"768ms08v"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d54a66c8fa8b184bed428059556830cc979dd75b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:22,158 INFO MainThread:2988286 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:22,158 INFO MainThread:2988286 [wandb_setup.py:_flush():80] Configure stats pid to 2988286 +2026-03-24 13:29:22,158 INFO MainThread:2988286 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:22,158 INFO MainThread:2988286 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:22,158 INFO MainThread:2988286 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:22,158 INFO MainThread:2988286 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/logs/debug.log +2026-03-24 13:29:22,158 INFO MainThread:2988286 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/logs/debug-internal.log +2026-03-24 13:29:22,158 INFO MainThread:2988286 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:22,158 INFO MainThread:2988286 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:22,158 INFO MainThread:2988286 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:22,468 INFO MainThread:2988286 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:22,541 INFO MainThread:2988286 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:22,548 INFO MainThread:2988286 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:22,549 INFO MainThread:2988286 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:28,354 INFO MainThread:2988286 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:28,520 INFO MainThread:2988286 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:28,520 INFO MainThread:2988286 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:28,520 INFO MainThread:2988286 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:28,520 INFO MainThread:2988286 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:28,525 INFO MainThread:2988286 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:36,827 INFO wandb-AsyncioManager-main:2988286 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:36,827 INFO wandb-AsyncioManager-main:2988286 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/run-768ms08v.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/run-768ms08v.wandb new file mode 100644 index 0000000000000000000000000000000000000000..6f30e4f63cfab47d91e67527442313b703224828 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-768ms08v/run-768ms08v.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c2902fc016354a7e7c272fed08020ca7846e635 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + ut8wv34cfoi15ory9yuam5gscoiv12yd: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559222145024" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:22.347104Z" + writerId: ut8wv34cfoi15ory9yuam5gscoiv12yd + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..6747ea3c592c1b3ea3e9c7bafc36189af210a4c1 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 27.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 27.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9b9e729bf54dbc9c58327b105a3f1691f16e00c5 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:22.347104Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559222145024" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "ut8wv34cfoi15ory9yuam5gscoiv12yd" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..2320ce95111d4a7914cdfba4a4e05238d5d8f179 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":8},"_runtime":8} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..f3c7306f2d91c07d67aa689ffc4a0f343c8f95ea --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/logs/debug-core.log @@ -0,0 +1,56 @@ +{"time":"2026-03-24T13:29:22.284047562-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpav27xyq1/port-2988286.txt","pid":2988286,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.285625233-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988286} +{"time":"2026-03-24T13:29:22.285588633-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988286-2998625-3155261872/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.468117997-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.494433712-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp1tkse6w3/port-2989072.txt","pid":2989072,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.495445966-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2989072} +{"time":"2026-03-24T13:29:22.495431786-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2989072-2998645-3328255346/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.495853584-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmc89vteo/port-2988736.txt","pid":2988736,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.496759738-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988736} +{"time":"2026-03-24T13:29:22.497413744-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988736-2998644-2395824398/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.511369532-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpdfnqxozc/port-2988448.txt","pid":2988448,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.512582405-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988448} +{"time":"2026-03-24T13:29:22.512540005-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988448-2998647-1262891246/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.543414403-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"768ms08v","id":"1(@)"} +{"time":"2026-03-24T13:29:22.677281384-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.678732355-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.689293983-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.728188143-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"pwb2s09u","id":"1(@)"} +{"time":"2026-03-24T13:29:22.730618369-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"kw7l95l5","id":"1(@)"} +{"time":"2026-03-24T13:29:22.741889473-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"30huih24","id":"1(@)"} +{"time":"2026-03-24T13:29:27.758310425-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"768ms08v","id":"1(@)"} +{"time":"2026-03-24T13:29:27.911334083-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"kw7l95l5","id":"1(@)"} +{"time":"2026-03-24T13:29:27.92876088-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"pwb2s09u","id":"1(@)"} +{"time":"2026-03-24T13:29:27.951134668-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"30huih24","id":"1(@)"} +{"time":"2026-03-24T13:29:36.827010682-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.827111841-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.827097321-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.82730241-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988286-2998625-3155261872/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.82736769-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.88339996-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.883488939-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.883478029-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.883638928-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988736-2998644-2395824398/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.883701208-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933077976-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933179796-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.933166776-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933402095-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933425005-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988448-2998647-1262891246/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.00036064-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000492309-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:37.000485809-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000715598-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000790127-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2989072-2998645-3328255346/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.723219699-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.723317478-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.723373088-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.804777103-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.804830543-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.804844043-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:40.954475202-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:40.954669681-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:40.954684581-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:53.48518493-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:53.48524762-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:53.48527483-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..9c34a7f78373aff568fa6fbe4eb626d4120526aa --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:22.730812338-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:27.788803515-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-1504594105"} +{"time":"2026-03-24T13:29:27.910682117-07:00","level":"INFO","msg":"stream: created new stream","id":"kw7l95l5"} +{"time":"2026-03-24T13:29:27.910858446-07:00","level":"INFO","msg":"handler: started","stream_id":"kw7l95l5"} +{"time":"2026-03-24T13:29:27.911312703-07:00","level":"INFO","msg":"stream: started","id":"kw7l95l5"} +{"time":"2026-03-24T13:29:27.911338503-07:00","level":"INFO","msg":"writer: started","stream_id":"kw7l95l5"} +{"time":"2026-03-24T13:29:27.911349903-07:00","level":"INFO","msg":"sender: started","stream_id":"kw7l95l5"} +{"time":"2026-03-24T13:29:36.883501199-07:00","level":"INFO","msg":"stream: closing","id":"kw7l95l5"} +{"time":"2026-03-24T13:29:37.290113972-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:40.926159019-07:00","level":"INFO","msg":"handler: closed","stream_id":"kw7l95l5"} +{"time":"2026-03-24T13:29:40.926374468-07:00","level":"INFO","msg":"sender: closed","stream_id":"kw7l95l5"} +{"time":"2026-03-24T13:29:40.926389008-07:00","level":"INFO","msg":"stream: closed","id":"kw7l95l5"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c7378b18479e1ccf5cd8eedea5b3f6a7c50353e9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:22,353 INFO MainThread:2988736 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:22,353 INFO MainThread:2988736 [wandb_setup.py:_flush():80] Configure stats pid to 2988736 +2026-03-24 13:29:22,353 INFO MainThread:2988736 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:22,353 INFO MainThread:2988736 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:22,353 INFO MainThread:2988736 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:22,353 INFO MainThread:2988736 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/logs/debug.log +2026-03-24 13:29:22,353 INFO MainThread:2988736 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/logs/debug-internal.log +2026-03-24 13:29:22,354 INFO MainThread:2988736 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:22,354 INFO MainThread:2988736 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:22,354 INFO MainThread:2988736 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:22,679 INFO MainThread:2988736 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:22,728 INFO MainThread:2988736 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:22,735 INFO MainThread:2988736 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:22,737 INFO MainThread:2988736 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:28,404 INFO MainThread:2988736 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:28,562 INFO MainThread:2988736 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:28,563 INFO MainThread:2988736 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:28,563 INFO MainThread:2988736 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:28,563 INFO MainThread:2988736 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:28,569 INFO MainThread:2988736 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:36,883 INFO wandb-AsyncioManager-main:2988736 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:36,883 INFO wandb-AsyncioManager-main:2988736 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/run-kw7l95l5.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/run-kw7l95l5.wandb new file mode 100644 index 0000000000000000000000000000000000000000..faf08640e0511080e2099124885151d8d2bfbc2b Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-kw7l95l5/run-kw7l95l5.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a6629eea29ab1a76820f19f3a94dbbdc289e20f --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + 1o9hw2euudoi88zzuk2xtp2bbp7i8mee: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559222145024" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:22.360032Z" + writerId: 1o9hw2euudoi88zzuk2xtp2bbp7i8mee + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..52e3343b33f407e813c1b0c62f59281e16ba162f --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/output.log @@ -0,0 +1,45 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.bias', 'classifier.bias', 'classifier.weight', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 27.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 27.38 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 13.12 GiB memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988353 has 574.00 MiB memory in use. Process 2988034 has 434.00 MiB memory in use. Process 2988245 has 260.00 MiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988116 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 260.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Process 2988348 has 260.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 260.00 MiB memory in use. Process 2988814 has 8.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 10.00 MiB memory in use. Process 2989070 has 14.00 MiB memory in use. Process 2989165 has 10.00 MiB memory in use. Process 2989068 has 10.00 MiB memory in use. Process 2990500 has 10.00 MiB memory in use. Process 2989449 has 10.00 MiB memory in use. Process 2988447 has 14.00 MiB memory in use. Process 2988379 has 10.00 MiB memory in use. Process 2988518 has 10.00 MiB memory in use. Process 2988326 has 10.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Of the allocated memory 0 bytes is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0635987555abfd41eb1d7c7ce93362b9d12b44d7 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:22.360032Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559222145024" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "1o9hw2euudoi88zzuk2xtp2bbp7i8mee" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..2320ce95111d4a7914cdfba4a4e05238d5d8f179 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":8},"_runtime":8} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..f3c7306f2d91c07d67aa689ffc4a0f343c8f95ea --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/logs/debug-core.log @@ -0,0 +1,56 @@ +{"time":"2026-03-24T13:29:22.284047562-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpav27xyq1/port-2988286.txt","pid":2988286,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.285625233-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988286} +{"time":"2026-03-24T13:29:22.285588633-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988286-2998625-3155261872/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.468117997-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.494433712-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp1tkse6w3/port-2989072.txt","pid":2989072,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.495445966-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2989072} +{"time":"2026-03-24T13:29:22.495431786-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2989072-2998645-3328255346/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.495853584-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmc89vteo/port-2988736.txt","pid":2988736,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.496759738-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988736} +{"time":"2026-03-24T13:29:22.497413744-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988736-2998644-2395824398/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.511369532-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpdfnqxozc/port-2988448.txt","pid":2988448,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:22.512582405-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988448} +{"time":"2026-03-24T13:29:22.512540005-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988448-2998647-1262891246/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:22.543414403-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"768ms08v","id":"1(@)"} +{"time":"2026-03-24T13:29:22.677281384-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.678732355-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.689293983-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:22.728188143-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"pwb2s09u","id":"1(@)"} +{"time":"2026-03-24T13:29:22.730618369-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"kw7l95l5","id":"1(@)"} +{"time":"2026-03-24T13:29:22.741889473-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"30huih24","id":"1(@)"} +{"time":"2026-03-24T13:29:27.758310425-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"768ms08v","id":"1(@)"} +{"time":"2026-03-24T13:29:27.911334083-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"kw7l95l5","id":"1(@)"} +{"time":"2026-03-24T13:29:27.92876088-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"pwb2s09u","id":"1(@)"} +{"time":"2026-03-24T13:29:27.951134668-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"30huih24","id":"1(@)"} +{"time":"2026-03-24T13:29:36.827010682-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.827111841-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.827097321-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.82730241-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988286-2998625-3155261872/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.82736769-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.88339996-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.883488939-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.883478029-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.883638928-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988736-2998644-2395824398/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:36.883701208-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933077976-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933179796-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:36.933166776-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933402095-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:36.933425005-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988448-2998647-1262891246/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.00036064-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000492309-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:37.000485809-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000715598-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:37.000790127-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2989072-2998645-3328255346/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:37.723219699-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:37.723317478-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:37.723373088-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:38.804777103-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:38.804830543-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:38.804844043-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:40.954475202-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:40.954669681-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:40.954684581-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:53.48518493-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:53.48524762-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:53.48527483-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..d896938266a26bc4da22cbc65f0289ffbe8440a6 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:22.728456492-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:27.786743017-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-229529924"} +{"time":"2026-03-24T13:29:27.928233693-07:00","level":"INFO","msg":"stream: created new stream","id":"pwb2s09u"} +{"time":"2026-03-24T13:29:27.928440362-07:00","level":"INFO","msg":"handler: started","stream_id":"pwb2s09u"} +{"time":"2026-03-24T13:29:27.92874225-07:00","level":"INFO","msg":"stream: started","id":"pwb2s09u"} +{"time":"2026-03-24T13:29:27.92876392-07:00","level":"INFO","msg":"writer: started","stream_id":"pwb2s09u"} +{"time":"2026-03-24T13:29:27.92877603-07:00","level":"INFO","msg":"sender: started","stream_id":"pwb2s09u"} +{"time":"2026-03-24T13:29:37.000508219-07:00","level":"INFO","msg":"stream: closing","id":"pwb2s09u"} +{"time":"2026-03-24T13:29:37.403279345-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:37.706424418-07:00","level":"INFO","msg":"handler: closed","stream_id":"pwb2s09u"} +{"time":"2026-03-24T13:29:37.706749826-07:00","level":"INFO","msg":"sender: closed","stream_id":"pwb2s09u"} +{"time":"2026-03-24T13:29:37.706802346-07:00","level":"INFO","msg":"stream: closed","id":"pwb2s09u"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..65aedea3945c3ee97c710c8ded2d163f05d41810 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:22,364 INFO MainThread:2989072 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:22,365 INFO MainThread:2989072 [wandb_setup.py:_flush():80] Configure stats pid to 2989072 +2026-03-24 13:29:22,365 INFO MainThread:2989072 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:22,365 INFO MainThread:2989072 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:22,365 INFO MainThread:2989072 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:22,365 INFO MainThread:2989072 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/logs/debug.log +2026-03-24 13:29:22,365 INFO MainThread:2989072 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/logs/debug-internal.log +2026-03-24 13:29:22,365 INFO MainThread:2989072 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:22,365 INFO MainThread:2989072 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:22,365 INFO MainThread:2989072 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:22,677 INFO MainThread:2989072 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:22,726 INFO MainThread:2989072 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:22,732 INFO MainThread:2989072 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:22,733 INFO MainThread:2989072 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:28,479 INFO MainThread:2989072 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:28,650 INFO MainThread:2989072 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:28,651 INFO MainThread:2989072 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:28,651 INFO MainThread:2989072 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:28,651 INFO MainThread:2989072 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:28,658 INFO MainThread:2989072 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:37,000 INFO wandb-AsyncioManager-main:2989072 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:37,000 INFO wandb-AsyncioManager-main:2989072 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/run-pwb2s09u.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/run-pwb2s09u.wandb new file mode 100644 index 0000000000000000000000000000000000000000..cbc895ec8f3fda0b7c6bd11b5330624fd99a2867 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132922-pwb2s09u/run-pwb2s09u.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62ee49a4da0394473bdafdad7434d40ae2ecb639 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/config.yaml @@ -0,0 +1,515 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + taromok165on216fc0mifzsyd0na4hq2: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559221968896" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:26.386473Z" + writerId: taromok165on216fc0mifzsyd0na4hq2 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-12_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 4 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.15 +warmup_steps: + value: 0 +weight_decay: + value: 0.05 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9264e33a38671c7b2807273606a73e6f9d019ec0 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/output.log @@ -0,0 +1,129 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'bert.pooler.dense.weight', 'classifier.bias', 'bert.pooler.dense.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 176 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/176 [00:00 + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 367, in forward + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 96.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 86.62 MiB is free. Process 2988668 has 260.00 MiB memory in use. Process 2988554 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2988348 has 294.00 MiB memory in use. Including non-PyTorch memory, this process has 44.72 GiB memory in use. Process 2988462 has 260.00 MiB memory in use. Process 2989297 has 260.00 MiB memory in use. Process 2988927 has 260.00 MiB memory in use. Process 2988994 has 260.00 MiB memory in use. Process 2988205 has 260.00 MiB memory in use. Process 2989261 has 14.00 MiB memory in use. Process 2988861 has 10.00 MiB memory in use. Process 2989522 has 10.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Process 2990365 has 16.00 MiB memory in use. Of the allocated memory 44.40 GiB is allocated by PyTorch, and 12.09 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 367, in forward + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 96.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 86.62 MiB is free. Process 2988668 has 260.00 MiB memory in use. Process 2988554 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2988348 has 294.00 MiB memory in use. Including non-PyTorch memory, this process has 44.72 GiB memory in use. Process 2988462 has 260.00 MiB memory in use. Process 2989297 has 260.00 MiB memory in use. Process 2988927 has 260.00 MiB memory in use. Process 2988994 has 260.00 MiB memory in use. Process 2988205 has 260.00 MiB memory in use. Process 2989261 has 14.00 MiB memory in use. Process 2988861 has 10.00 MiB memory in use. Process 2989522 has 10.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Process 2990365 has 16.00 MiB memory in use. Of the allocated memory 44.40 GiB is allocated by PyTorch, and 12.09 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5f97064fb1ec084a4797ff03c6285ba67929b5fb --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:26.386473Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559221968896" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "taromok165on216fc0mifzsyd0na4hq2" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..edf2a662db7b90c38c1e765701ffc82c09c6a532 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":13,"_wandb":{"runtime":13}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..dc505463d821ae2c6bc3ccd62401b2cf7d53a977 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/logs/debug-core.log @@ -0,0 +1,28 @@ +{"time":"2026-03-24T13:29:26.531655737-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmppalqcz90/port-2988855.txt","pid":2988855,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:26.532723501-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988855-3000289-1822221261/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:26.53286251-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988855} +{"time":"2026-03-24T13:29:26.60590501-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9lsc5qzo/port-2988348.txt","pid":2988348,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:26.607061963-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988348} +{"time":"2026-03-24T13:29:26.607092172-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988348-3000559-1761458863/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:26.711295238-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:26.767454347-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"6v7f2ln2","id":"1(@)"} +{"time":"2026-03-24T13:29:26.783018035-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:26.838283629-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"c680bui8","id":"1(@)"} +{"time":"2026-03-24T13:29:31.951501693-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"6v7f2ln2","id":"1(@)"} +{"time":"2026-03-24T13:29:32.004927188-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"c680bui8","id":"1(@)"} +{"time":"2026-03-24T13:29:41.548109153-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:41.548401822-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:41.548437191-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:41.548572561-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:41.548983158-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988348-3000559-1761458863/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.160892299-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:45.161003669-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:45.161037238-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:46.147525344-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:46.147623123-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:46.147660033-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:46.147782693-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988855-3000289-1822221261/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:46.14825665-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:47.936248442-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:47.936337811-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:47.936387481-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..63ccdb53f34e862db6735d382e6f531ede9254c4 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:26.767747405-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:31.830088439-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-2598424911"} +{"time":"2026-03-24T13:29:31.951041676-07:00","level":"INFO","msg":"stream: created new stream","id":"6v7f2ln2"} +{"time":"2026-03-24T13:29:31.951259265-07:00","level":"INFO","msg":"handler: started","stream_id":"6v7f2ln2"} +{"time":"2026-03-24T13:29:31.951482443-07:00","level":"INFO","msg":"stream: started","id":"6v7f2ln2"} +{"time":"2026-03-24T13:29:31.951548253-07:00","level":"INFO","msg":"writer: started","stream_id":"6v7f2ln2"} +{"time":"2026-03-24T13:29:31.951617863-07:00","level":"INFO","msg":"sender: started","stream_id":"6v7f2ln2"} +{"time":"2026-03-24T13:29:46.147617143-07:00","level":"INFO","msg":"stream: closing","id":"6v7f2ln2"} +{"time":"2026-03-24T13:29:46.921559322-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:47.926098672-07:00","level":"INFO","msg":"handler: closed","stream_id":"6v7f2ln2"} +{"time":"2026-03-24T13:29:47.92636108-07:00","level":"INFO","msg":"sender: closed","stream_id":"6v7f2ln2"} +{"time":"2026-03-24T13:29:47.92639437-07:00","level":"INFO","msg":"stream: closed","id":"6v7f2ln2"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..692c820023da5618f9b6538a62daccb4d616dcf4 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:29:26,391 INFO MainThread:2988855 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:26,391 INFO MainThread:2988855 [wandb_setup.py:_flush():80] Configure stats pid to 2988855 +2026-03-24 13:29:26,392 INFO MainThread:2988855 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:26,392 INFO MainThread:2988855 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:26,392 INFO MainThread:2988855 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:26,392 INFO MainThread:2988855 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/logs/debug.log +2026-03-24 13:29:26,392 INFO MainThread:2988855 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/logs/debug-internal.log +2026-03-24 13:29:26,392 INFO MainThread:2988855 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:26,392 INFO MainThread:2988855 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:26,392 INFO MainThread:2988855 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:26,711 INFO MainThread:2988855 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:26,765 INFO MainThread:2988855 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:26,773 INFO MainThread:2988855 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:26,775 INFO MainThread:2988855 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:32,595 INFO MainThread:2988855 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:32,782 INFO MainThread:2988855 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:32,782 INFO MainThread:2988855 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:32,783 INFO MainThread:2988855 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:32,783 INFO MainThread:2988855 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:32,789 INFO MainThread:2988855 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:43,612 INFO MainThread:2988855 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.15, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-12_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 13:29:46,147 INFO wandb-AsyncioManager-main:2988855 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:46,147 INFO wandb-AsyncioManager-main:2988855 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/run-6v7f2ln2.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/run-6v7f2ln2.wandb new file mode 100644 index 0000000000000000000000000000000000000000..f865838f650dd717a714522a30a70eb7f1d124b5 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-6v7f2ln2/run-6v7f2ln2.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3003751c7d5d6bf108f101ab4bde8c3c0e40443b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/config.yaml @@ -0,0 +1,108 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + lb722lcxe9c5zvhwbw6cwz08toeefngg: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + disk: + /: + total: "3768964489216" + used: "3559221968896" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:26.451235Z" + writerId: lb722lcxe9c5zvhwbw6cwz08toeefngg + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9a1532eca2be7381e781af117a071b49e2e2af7a --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/output.log @@ -0,0 +1,47 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.bias', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + [Previous line repeated 3 more times] + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 44.00 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 0 bytes memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 272.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 294.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 374.00 MiB memory in use. Process 2988814 has 6.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 8.00 MiB memory in use. Process 2989070 has 6.00 MiB memory in use. Process 2989165 has 4.00 MiB memory in use. Process 2989068 has 4.00 MiB memory in use. Process 2990500 has 4.00 MiB memory in use. Process 2989449 has 4.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 4.00 MiB memory in use. Process 2988518 has 4.00 MiB memory in use. Process 2988326 has 4.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Process 2988861 has 10.00 MiB memory in use. Of the allocated memory 32.05 MiB is allocated by PyTorch, and 1.95 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + [Previous line repeated 3 more times] + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 44.00 MiB is free. Process 2988293 has 21.00 GiB memory in use. Process 2988247 has 0 bytes memory in use. Process 2988061 has 7.12 GiB memory in use. Process 2988374 has 260.00 MiB memory in use. Process 2988531 has 260.00 MiB memory in use. Process 2988064 has 260.00 MiB memory in use. Process 2988566 has 260.00 MiB memory in use. Process 2988668 has 260.00 MiB memory in use. Process 2988093 has 260.00 MiB memory in use. Process 2988637 has 260.00 MiB memory in use. Process 2988554 has 272.00 MiB memory in use. Process 2988906 has 260.00 MiB memory in use. Process 2988874 has 260.00 MiB memory in use. Process 2988923 has 272.00 MiB memory in use. Process 2988286 has 260.00 MiB memory in use. Process 2989072 has 260.00 MiB memory in use. Process 2988448 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 294.00 MiB memory in use. Process 2988736 has 260.00 MiB memory in use. Process 2988855 has 374.00 MiB memory in use. Process 2988814 has 6.00 MiB memory in use. Process 2988511 has 4.00 MiB memory in use. Process 2988942 has 6.00 MiB memory in use. Process 2989897 has 4.00 MiB memory in use. Process 2989914 has 8.00 MiB memory in use. Process 2989070 has 6.00 MiB memory in use. Process 2989165 has 4.00 MiB memory in use. Process 2989068 has 4.00 MiB memory in use. Process 2990500 has 4.00 MiB memory in use. Process 2989449 has 4.00 MiB memory in use. Process 2988447 has 10.00 MiB memory in use. Process 2988379 has 4.00 MiB memory in use. Process 2988518 has 4.00 MiB memory in use. Process 2988326 has 4.00 MiB memory in use. Process 2988462 has 10.00 MiB memory in use. Process 2989297 has 10.00 MiB memory in use. Process 2988927 has 10.00 MiB memory in use. Process 2988994 has 10.00 MiB memory in use. Process 2988205 has 10.00 MiB memory in use. Process 2988861 has 10.00 MiB memory in use. Of the allocated memory 32.05 MiB is allocated by PyTorch, and 1.95 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..fac873d8138a4fb3cfd7867622d08a4b638bb7de --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/wandb-metadata.json @@ -0,0 +1,85 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:26.451235Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559221968896" + } + }, + "memory": { + "total": "1082030182400" + }, + "writerId": "lb722lcxe9c5zvhwbw6cwz08toeefngg" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..2320ce95111d4a7914cdfba4a4e05238d5d8f179 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":8},"_runtime":8} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..dc505463d821ae2c6bc3ccd62401b2cf7d53a977 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/logs/debug-core.log @@ -0,0 +1,28 @@ +{"time":"2026-03-24T13:29:26.531655737-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmppalqcz90/port-2988855.txt","pid":2988855,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:26.532723501-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988855-3000289-1822221261/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:26.53286251-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988855} +{"time":"2026-03-24T13:29:26.60590501-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9lsc5qzo/port-2988348.txt","pid":2988348,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:26.607061963-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988348} +{"time":"2026-03-24T13:29:26.607092172-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988348-3000559-1761458863/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:26.711295238-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:26.767454347-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"6v7f2ln2","id":"1(@)"} +{"time":"2026-03-24T13:29:26.783018035-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:26.838283629-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"c680bui8","id":"1(@)"} +{"time":"2026-03-24T13:29:31.951501693-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"6v7f2ln2","id":"1(@)"} +{"time":"2026-03-24T13:29:32.004927188-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"c680bui8","id":"1(@)"} +{"time":"2026-03-24T13:29:41.548109153-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:41.548401822-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:41.548437191-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:41.548572561-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:41.548983158-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988348-3000559-1761458863/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.160892299-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:45.161003669-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:45.161037238-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:46.147525344-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:46.147623123-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:46.147660033-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:46.147782693-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988855-3000289-1822221261/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:46.14825665-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:47.936248442-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:47.936337811-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:47.936387481-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..4d555bc9b1d00f11fedfd31d683f3b6ef51f6846 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-03-24T13:29:26.838514398-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:31.893794243-07:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not get GPU binary port: timeout reading portfile /tmp/wandb-system-monitor-portfile-2028819368"} +{"time":"2026-03-24T13:29:32.004485351-07:00","level":"INFO","msg":"stream: created new stream","id":"c680bui8"} +{"time":"2026-03-24T13:29:32.00465091-07:00","level":"INFO","msg":"handler: started","stream_id":"c680bui8"} +{"time":"2026-03-24T13:29:32.004911979-07:00","level":"INFO","msg":"stream: started","id":"c680bui8"} +{"time":"2026-03-24T13:29:32.004931548-07:00","level":"INFO","msg":"writer: started","stream_id":"c680bui8"} +{"time":"2026-03-24T13:29:32.004933368-07:00","level":"INFO","msg":"sender: started","stream_id":"c680bui8"} +{"time":"2026-03-24T13:29:41.548426661-07:00","level":"INFO","msg":"stream: closing","id":"c680bui8"} +{"time":"2026-03-24T13:29:41.99306911-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:45.156731024-07:00","level":"INFO","msg":"handler: closed","stream_id":"c680bui8"} +{"time":"2026-03-24T13:29:45.157089081-07:00","level":"INFO","msg":"sender: closed","stream_id":"c680bui8"} +{"time":"2026-03-24T13:29:45.157112532-07:00","level":"INFO","msg":"stream: closed","id":"c680bui8"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d3467c087bdcdc12bca3673323a4575ca4a4c750 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:26,457 INFO MainThread:2988348 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:26,457 INFO MainThread:2988348 [wandb_setup.py:_flush():80] Configure stats pid to 2988348 +2026-03-24 13:29:26,457 INFO MainThread:2988348 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:26,457 INFO MainThread:2988348 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:26,457 INFO MainThread:2988348 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:26,458 INFO MainThread:2988348 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/logs/debug.log +2026-03-24 13:29:26,458 INFO MainThread:2988348 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/logs/debug-internal.log +2026-03-24 13:29:26,458 INFO MainThread:2988348 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:26,458 INFO MainThread:2988348 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:26,458 INFO MainThread:2988348 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:26,783 INFO MainThread:2988348 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:26,836 INFO MainThread:2988348 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:26,844 INFO MainThread:2988348 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:26,848 INFO MainThread:2988348 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:32,673 INFO MainThread:2988348 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:32,799 INFO MainThread:2988348 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:32,800 INFO MainThread:2988348 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:32,800 INFO MainThread:2988348 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:32,800 INFO MainThread:2988348 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:32,804 INFO MainThread:2988348 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:41,548 INFO wandb-AsyncioManager-main:2988348 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:41,548 INFO wandb-AsyncioManager-main:2988348 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/run-c680bui8.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/run-c680bui8.wandb new file mode 100644 index 0000000000000000000000000000000000000000..1a42eb6a0b85fa58512d5b1ea903ededa428ce91 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132926-c680bui8/run-c680bui8.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f9e5b74503b0811a22b3e691485c6c867d7de0b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/config.yaml @@ -0,0 +1,152 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + frqd9dk39yfr13l3syvqyh1an4gqwbbg: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559217737728" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:45.389786Z" + writerId: frqd9dk39yfr13l3syvqyh1an4gqwbbg + m: [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "12": 0.23.1 + "13": linux-x86_64 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7633c99996594a4ada3736f7da042ea3029e001f --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/output.log @@ -0,0 +1,47 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'classifier.bias', 'bert.pooler.dense.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + [Previous line repeated 3 more times] + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 38.12 MiB is free. Process 2988286 has 260.00 MiB memory in use. Process 2988462 has 3.37 GiB memory in use. Process 2989297 has 698.00 MiB memory in use. Process 2988927 has 1.48 GiB memory in use. Process 2988994 has 40.88 GiB memory in use. Including non-PyTorch memory, this process has 534.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Of the allocated memory 248.35 MiB is allocated by PyTorch, and 25.65 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 424, in train + model = transformers.AutoModelForSequenceClassification.from_pretrained( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to + return super().to(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1369, in to + return self._apply(convert) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 928, in _apply + module._apply(fn) + [Previous line repeated 3 more times] + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 955, in _apply + param_applied = fn(param) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1355, in convert + return t.to( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 38.12 MiB is free. Process 2988286 has 260.00 MiB memory in use. Process 2988462 has 3.37 GiB memory in use. Process 2989297 has 698.00 MiB memory in use. Process 2988927 has 1.48 GiB memory in use. Process 2988994 has 40.88 GiB memory in use. Including non-PyTorch memory, this process has 534.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Of the allocated memory 248.35 MiB is allocated by PyTorch, and 25.65 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6a82c931ba5fb8b8fc9f1dd54b3935083c28fb54 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:45.389786Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559217737728" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "frqd9dk39yfr13l3syvqyh1an4gqwbbg" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..30c7561b388c4ee0878bd3e4c4ce901784af945f --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":5},"_runtime":5} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..b3e6b29f3aaf1060a27618517e26ad723d23bc19 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/logs/debug-core.log @@ -0,0 +1,70 @@ +{"time":"2026-03-24T13:29:45.305129259-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp1z2yqrmo/port-2988462.txt","pid":2988462,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.309982791-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988462} +{"time":"2026-03-24T13:29:45.310212189-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988462-3002515-553344955/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.367267943-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpgcirvt_o/port-2988994.txt","pid":2988994,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.369118622-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988994} +{"time":"2026-03-24T13:29:45.369108472-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988994-3002526-3879120873/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.436629794-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpjb3cbe_y/port-2989297.txt","pid":2989297,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.440197373-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2989297} +{"time":"2026-03-24T13:29:45.439940814-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2989297-3002537-3086304176/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.47467857-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.537302391-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.549728007-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"kvar0rat","id":"1(@)"} +{"time":"2026-03-24T13:29:45.570654644-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpuhtl4ond/port-2988927.txt","pid":2988927,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.573715736-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpfxkgeon1/port-2988205.txt","pid":2988205,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.576238171-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988205} +{"time":"2026-03-24T13:29:45.576140962-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988205-3002555-3947692120/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.576526889-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988927} +{"time":"2026-03-24T13:29:45.576585229-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988927-3002556-1288920824/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.594127276-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"q9bcc7je","id":"1(@)"} +{"time":"2026-03-24T13:29:45.598255792-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.653664415-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"shouylxq","id":"1(@)"} +{"time":"2026-03-24T13:29:45.748437466-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.749159452-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.799482525-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"98091ibt","id":"1(@)"} +{"time":"2026-03-24T13:29:45.808395913-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"uwt1zkjf","id":"1(@)"} +{"time":"2026-03-24T13:29:46.383469733-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"kvar0rat","id":"1(@)"} +{"time":"2026-03-24T13:29:46.454975222-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"shouylxq","id":"1(@)"} +{"time":"2026-03-24T13:29:46.469338307-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"q9bcc7je","id":"1(@)"} +{"time":"2026-03-24T13:29:46.566053737-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"98091ibt","id":"1(@)"} +{"time":"2026-03-24T13:29:46.798692526-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"uwt1zkjf","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087060316-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087190845-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.087176566-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087420184-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087461954-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988462-3002515-553344955/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.117433757-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117506157-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117531057-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.117584126-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117803845-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988927-3002556-1288920824/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.13391052-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.133993269-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.13401498-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.134094839-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.134386087-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2989297-3002537-3086304176/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.213133753-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213290313-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.213280742-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213412652-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213513481-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988205-3002555-3947692120/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.324612586-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324723045-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.324702366-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324855835-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324995584-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988994-3002526-3879120873/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:54.282950989-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:54.283005858-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:54.283040418-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:54.302797732-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:54.302885752-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:54.302964941-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:55.884322072-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:55.884382172-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:55.884440552-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:57.169516129-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:57.169601188-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:57.169652118-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:58.800466629-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:58.800546268-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:58.800592628-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..0dfcb9b23e20564c0e7c8bbaba3a76c6ed4d695d --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:29:45.799729864-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:46.565711319-07:00","level":"INFO","msg":"stream: created new stream","id":"98091ibt"} +{"time":"2026-03-24T13:29:46.565868228-07:00","level":"INFO","msg":"handler: started","stream_id":"98091ibt"} +{"time":"2026-03-24T13:29:46.566041267-07:00","level":"INFO","msg":"stream: started","id":"98091ibt"} +{"time":"2026-03-24T13:29:46.566073647-07:00","level":"INFO","msg":"writer: started","stream_id":"98091ibt"} +{"time":"2026-03-24T13:29:46.566073837-07:00","level":"INFO","msg":"sender: started","stream_id":"98091ibt"} +{"time":"2026-03-24T13:29:53.213316702-07:00","level":"INFO","msg":"stream: closing","id":"98091ibt"} +{"time":"2026-03-24T13:29:53.660270098-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:57.160789551-07:00","level":"INFO","msg":"handler: closed","stream_id":"98091ibt"} +{"time":"2026-03-24T13:29:57.161103549-07:00","level":"INFO","msg":"sender: closed","stream_id":"98091ibt"} +{"time":"2026-03-24T13:29:57.161153818-07:00","level":"INFO","msg":"stream: closed","id":"98091ibt"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e8b17e039ce8b981e095004b76d1047a8a0df076 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/logs/debug.log @@ -0,0 +1,23 @@ +2026-03-24 13:29:45,399 INFO MainThread:2988205 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:45,399 INFO MainThread:2988205 [wandb_setup.py:_flush():80] Configure stats pid to 2988205 +2026-03-24 13:29:45,400 INFO MainThread:2988205 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:45,400 INFO MainThread:2988205 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:45,400 INFO MainThread:2988205 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:45,400 INFO MainThread:2988205 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/logs/debug.log +2026-03-24 13:29:45,400 INFO MainThread:2988205 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/logs/debug-internal.log +2026-03-24 13:29:45,401 INFO MainThread:2988205 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:45,401 INFO MainThread:2988205 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:45,401 INFO MainThread:2988205 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:45,748 INFO MainThread:2988205 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:45,797 INFO MainThread:2988205 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:45,803 INFO MainThread:2988205 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:45,804 INFO MainThread:2988205 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:47,360 INFO MainThread:2988205 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:47,479 INFO MainThread:2988205 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:47,479 INFO MainThread:2988205 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:47,479 INFO MainThread:2988205 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:47,480 INFO MainThread:2988205 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:47,485 INFO MainThread:2988205 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:53,210 INFO wandb-AsyncioManager-main:2988205 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:53,211 INFO wandb-AsyncioManager-main:2988205 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/run-98091ibt.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/run-98091ibt.wandb new file mode 100644 index 0000000000000000000000000000000000000000..f35993fbfa4254f382625a1c9ee898adab2208e0 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-98091ibt/run-98091ibt.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4b36020ec74713a24cd578ff6bb70e907b62a01 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + 40bcy80v63yv2kku8txfahv7wfx2ndnx: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559217737728" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:45.081864Z" + writerId: 40bcy80v63yv2kku8txfahv7wfx2ndnx + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-14_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 4 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.15 +warmup_steps: + value: 0 +weight_decay: + value: 0.05 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..178f7e68ab3e7fb98375717664e1a4bc3270f00b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/output.log @@ -0,0 +1,133 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.bias', 'classifier.bias', 'classifier.weight', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 176 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/176 [00:00 + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 355, in forward + attention_probs = nn.functional.softmax(attention_scores, dim=-1) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 2137, in softmax + ret = input.softmax(dim) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.05 GiB is free. Process 2988286 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 3.37 GiB memory in use. Process 2989297 has 654.00 MiB memory in use. Process 2988927 has 656.00 MiB memory in use. Process 2988994 has 40.88 GiB memory in use. Process 2988205 has 394.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Of the allocated memory 3.02 GiB is allocated by PyTorch, and 37.82 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 355, in forward + attention_probs = nn.functional.softmax(attention_scores, dim=-1) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 2137, in softmax + ret = input.softmax(dim) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.05 GiB is free. Process 2988286 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 3.37 GiB memory in use. Process 2989297 has 654.00 MiB memory in use. Process 2988927 has 656.00 MiB memory in use. Process 2988994 has 40.88 GiB memory in use. Process 2988205 has 394.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Of the allocated memory 3.02 GiB is allocated by PyTorch, and 37.82 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..51df502e1e57202d320a756cefc486bc14fbdf22 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:45.081864Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559217737728" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "40bcy80v63yv2kku8txfahv7wfx2ndnx" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..f60bccc799a769e529cf191d8e66da19a5816ef1 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":6},"_runtime":6} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..b3e6b29f3aaf1060a27618517e26ad723d23bc19 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/logs/debug-core.log @@ -0,0 +1,70 @@ +{"time":"2026-03-24T13:29:45.305129259-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp1z2yqrmo/port-2988462.txt","pid":2988462,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.309982791-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988462} +{"time":"2026-03-24T13:29:45.310212189-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988462-3002515-553344955/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.367267943-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpgcirvt_o/port-2988994.txt","pid":2988994,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.369118622-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988994} +{"time":"2026-03-24T13:29:45.369108472-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988994-3002526-3879120873/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.436629794-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpjb3cbe_y/port-2989297.txt","pid":2989297,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.440197373-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2989297} +{"time":"2026-03-24T13:29:45.439940814-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2989297-3002537-3086304176/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.47467857-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.537302391-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.549728007-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"kvar0rat","id":"1(@)"} +{"time":"2026-03-24T13:29:45.570654644-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpuhtl4ond/port-2988927.txt","pid":2988927,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.573715736-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpfxkgeon1/port-2988205.txt","pid":2988205,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.576238171-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988205} +{"time":"2026-03-24T13:29:45.576140962-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988205-3002555-3947692120/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.576526889-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988927} +{"time":"2026-03-24T13:29:45.576585229-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988927-3002556-1288920824/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.594127276-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"q9bcc7je","id":"1(@)"} +{"time":"2026-03-24T13:29:45.598255792-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.653664415-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"shouylxq","id":"1(@)"} +{"time":"2026-03-24T13:29:45.748437466-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.749159452-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.799482525-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"98091ibt","id":"1(@)"} +{"time":"2026-03-24T13:29:45.808395913-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"uwt1zkjf","id":"1(@)"} +{"time":"2026-03-24T13:29:46.383469733-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"kvar0rat","id":"1(@)"} +{"time":"2026-03-24T13:29:46.454975222-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"shouylxq","id":"1(@)"} +{"time":"2026-03-24T13:29:46.469338307-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"q9bcc7je","id":"1(@)"} +{"time":"2026-03-24T13:29:46.566053737-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"98091ibt","id":"1(@)"} +{"time":"2026-03-24T13:29:46.798692526-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"uwt1zkjf","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087060316-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087190845-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.087176566-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087420184-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087461954-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988462-3002515-553344955/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.117433757-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117506157-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117531057-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.117584126-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117803845-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988927-3002556-1288920824/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.13391052-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.133993269-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.13401498-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.134094839-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.134386087-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2989297-3002537-3086304176/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.213133753-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213290313-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.213280742-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213412652-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213513481-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988205-3002555-3947692120/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.324612586-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324723045-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.324702366-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324855835-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324995584-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988994-3002526-3879120873/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:54.282950989-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:54.283005858-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:54.283040418-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:54.302797732-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:54.302885752-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:54.302964941-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:55.884322072-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:55.884382172-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:55.884440552-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:57.169516129-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:57.169601188-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:57.169652118-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:58.800466629-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:58.800546268-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:58.800592628-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..49a6e85bd464bb87af937ad0e4d80a46eba2b405 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:29:45.551186229-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:46.382981746-07:00","level":"INFO","msg":"stream: created new stream","id":"kvar0rat"} +{"time":"2026-03-24T13:29:46.383214645-07:00","level":"INFO","msg":"handler: started","stream_id":"kvar0rat"} +{"time":"2026-03-24T13:29:46.383378374-07:00","level":"INFO","msg":"stream: started","id":"kvar0rat"} +{"time":"2026-03-24T13:29:46.383452193-07:00","level":"INFO","msg":"writer: started","stream_id":"kvar0rat"} +{"time":"2026-03-24T13:29:46.383419894-07:00","level":"INFO","msg":"sender: started","stream_id":"kvar0rat"} +{"time":"2026-03-24T13:29:53.087214496-07:00","level":"INFO","msg":"stream: closing","id":"kvar0rat"} +{"time":"2026-03-24T13:29:53.842866062-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:54.300130158-07:00","level":"INFO","msg":"handler: closed","stream_id":"kvar0rat"} +{"time":"2026-03-24T13:29:54.300455406-07:00","level":"INFO","msg":"sender: closed","stream_id":"kvar0rat"} +{"time":"2026-03-24T13:29:54.300508386-07:00","level":"INFO","msg":"stream: closed","id":"kvar0rat"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..b5462aec9e132db798bc8f03404377b6fc0474df --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:29:45,089 INFO MainThread:2988462 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:45,089 INFO MainThread:2988462 [wandb_setup.py:_flush():80] Configure stats pid to 2988462 +2026-03-24 13:29:45,089 INFO MainThread:2988462 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:45,089 INFO MainThread:2988462 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:45,089 INFO MainThread:2988462 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:45,089 INFO MainThread:2988462 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/logs/debug.log +2026-03-24 13:29:45,089 INFO MainThread:2988462 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/logs/debug-internal.log +2026-03-24 13:29:45,089 INFO MainThread:2988462 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:45,089 INFO MainThread:2988462 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:45,089 INFO MainThread:2988462 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:45,474 INFO MainThread:2988462 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:45,538 INFO MainThread:2988462 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:45,544 INFO MainThread:2988462 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:45,545 INFO MainThread:2988462 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:47,052 INFO MainThread:2988462 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:47,198 INFO MainThread:2988462 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:47,198 INFO MainThread:2988462 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:47,198 INFO MainThread:2988462 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:47,199 INFO MainThread:2988462 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:47,203 INFO MainThread:2988462 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:52,873 INFO MainThread:2988462 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.15, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-14_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 13:29:53,087 INFO wandb-AsyncioManager-main:2988462 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:53,087 INFO wandb-AsyncioManager-main:2988462 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/run-kvar0rat.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/run-kvar0rat.wandb new file mode 100644 index 0000000000000000000000000000000000000000..a901ade41768b65d954d7672f75f17bfa454f853 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-kvar0rat/run-kvar0rat.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3608c9551d281634584cc8f8a95f6309015781ce --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + t3dgbhn4qin96cddzjgsydlkw8qr89k9: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559217737728" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:45.185661Z" + writerId: t3dgbhn4qin96cddzjgsydlkw8qr89k9 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-15_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 4 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.15 +warmup_steps: + value: 0 +weight_decay: + value: 0.05 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..36e61695ea67b1d555022020b91b25f202ed247c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/output.log @@ -0,0 +1,129 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.bias', 'classifier.weight', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 176 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/176 [00:00 + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 352, in forward + attention_scores = attention_scores + attention_mask +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 40.12 MiB is free. Process 2988286 has 260.00 MiB memory in use. Process 2988462 has 3.37 GiB memory in use. Process 2989297 has 698.00 MiB memory in use. Process 2988927 has 1.48 GiB memory in use. Including non-PyTorch memory, this process has 40.88 GiB memory in use. Process 2988205 has 534.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Of the allocated memory 39.33 GiB is allocated by PyTorch, and 1.23 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 352, in forward + attention_scores = attention_scores + attention_mask +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 40.12 MiB is free. Process 2988286 has 260.00 MiB memory in use. Process 2988462 has 3.37 GiB memory in use. Process 2989297 has 698.00 MiB memory in use. Process 2988927 has 1.48 GiB memory in use. Including non-PyTorch memory, this process has 40.88 GiB memory in use. Process 2988205 has 534.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Of the allocated memory 39.33 GiB is allocated by PyTorch, and 1.23 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..99948b87a884af74709e95550a0f3f6281ff53c8 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:45.185661Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559217737728" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "t3dgbhn4qin96cddzjgsydlkw8qr89k9" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..533452d4a934da3482e9f08995d671c42966eba9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":6,"_wandb":{"runtime":6}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..b3e6b29f3aaf1060a27618517e26ad723d23bc19 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/logs/debug-core.log @@ -0,0 +1,70 @@ +{"time":"2026-03-24T13:29:45.305129259-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp1z2yqrmo/port-2988462.txt","pid":2988462,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.309982791-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988462} +{"time":"2026-03-24T13:29:45.310212189-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988462-3002515-553344955/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.367267943-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpgcirvt_o/port-2988994.txt","pid":2988994,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.369118622-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988994} +{"time":"2026-03-24T13:29:45.369108472-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988994-3002526-3879120873/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.436629794-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpjb3cbe_y/port-2989297.txt","pid":2989297,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.440197373-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2989297} +{"time":"2026-03-24T13:29:45.439940814-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2989297-3002537-3086304176/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.47467857-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.537302391-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.549728007-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"kvar0rat","id":"1(@)"} +{"time":"2026-03-24T13:29:45.570654644-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpuhtl4ond/port-2988927.txt","pid":2988927,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.573715736-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpfxkgeon1/port-2988205.txt","pid":2988205,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.576238171-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988205} +{"time":"2026-03-24T13:29:45.576140962-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988205-3002555-3947692120/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.576526889-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988927} +{"time":"2026-03-24T13:29:45.576585229-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988927-3002556-1288920824/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.594127276-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"q9bcc7je","id":"1(@)"} +{"time":"2026-03-24T13:29:45.598255792-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.653664415-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"shouylxq","id":"1(@)"} +{"time":"2026-03-24T13:29:45.748437466-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.749159452-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.799482525-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"98091ibt","id":"1(@)"} +{"time":"2026-03-24T13:29:45.808395913-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"uwt1zkjf","id":"1(@)"} +{"time":"2026-03-24T13:29:46.383469733-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"kvar0rat","id":"1(@)"} +{"time":"2026-03-24T13:29:46.454975222-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"shouylxq","id":"1(@)"} +{"time":"2026-03-24T13:29:46.469338307-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"q9bcc7je","id":"1(@)"} +{"time":"2026-03-24T13:29:46.566053737-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"98091ibt","id":"1(@)"} +{"time":"2026-03-24T13:29:46.798692526-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"uwt1zkjf","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087060316-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087190845-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.087176566-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087420184-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087461954-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988462-3002515-553344955/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.117433757-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117506157-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117531057-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.117584126-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117803845-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988927-3002556-1288920824/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.13391052-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.133993269-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.13401498-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.134094839-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.134386087-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2989297-3002537-3086304176/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.213133753-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213290313-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.213280742-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213412652-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213513481-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988205-3002555-3947692120/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.324612586-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324723045-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.324702366-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324855835-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324995584-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988994-3002526-3879120873/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:54.282950989-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:54.283005858-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:54.283040418-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:54.302797732-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:54.302885752-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:54.302964941-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:55.884322072-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:55.884382172-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:55.884440552-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:57.169516129-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:57.169601188-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:57.169652118-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:58.800466629-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:58.800546268-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:58.800592628-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..98c40f886307a67025b6c884cb0ba986d0a8bbbf --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:29:45.595966665-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:46.46887199-07:00","level":"INFO","msg":"stream: created new stream","id":"q9bcc7je"} +{"time":"2026-03-24T13:29:46.469078309-07:00","level":"INFO","msg":"handler: started","stream_id":"q9bcc7je"} +{"time":"2026-03-24T13:29:46.469321707-07:00","level":"INFO","msg":"stream: started","id":"q9bcc7je"} +{"time":"2026-03-24T13:29:46.469340717-07:00","level":"INFO","msg":"sender: started","stream_id":"q9bcc7je"} +{"time":"2026-03-24T13:29:46.469331197-07:00","level":"INFO","msg":"writer: started","stream_id":"q9bcc7je"} +{"time":"2026-03-24T13:29:53.324740605-07:00","level":"INFO","msg":"stream: closing","id":"q9bcc7je"} +{"time":"2026-03-24T13:29:54.061061776-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:58.795449638-07:00","level":"INFO","msg":"handler: closed","stream_id":"q9bcc7je"} +{"time":"2026-03-24T13:29:58.795779546-07:00","level":"INFO","msg":"sender: closed","stream_id":"q9bcc7je"} +{"time":"2026-03-24T13:29:58.795811556-07:00","level":"INFO","msg":"stream: closed","id":"q9bcc7je"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..36698b13e54f0df16c419f6048cdda35873e38a2 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:29:45,194 INFO MainThread:2988994 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:45,197 INFO MainThread:2988994 [wandb_setup.py:_flush():80] Configure stats pid to 2988994 +2026-03-24 13:29:45,197 INFO MainThread:2988994 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:45,197 INFO MainThread:2988994 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:45,197 INFO MainThread:2988994 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:45,197 INFO MainThread:2988994 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/logs/debug.log +2026-03-24 13:29:45,198 INFO MainThread:2988994 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/logs/debug-internal.log +2026-03-24 13:29:45,198 INFO MainThread:2988994 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:45,198 INFO MainThread:2988994 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:45,198 INFO MainThread:2988994 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:45,538 INFO MainThread:2988994 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:45,592 INFO MainThread:2988994 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:45,599 INFO MainThread:2988994 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:45,600 INFO MainThread:2988994 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:47,089 INFO MainThread:2988994 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:47,200 INFO MainThread:2988994 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:47,200 INFO MainThread:2988994 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:47,200 INFO MainThread:2988994 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:47,200 INFO MainThread:2988994 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:47,207 INFO MainThread:2988994 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:52,826 INFO MainThread:2988994 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.15, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-15_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 13:29:53,324 INFO wandb-AsyncioManager-main:2988994 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:53,325 INFO wandb-AsyncioManager-main:2988994 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/run-q9bcc7je.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/run-q9bcc7je.wandb new file mode 100644 index 0000000000000000000000000000000000000000..757873ea536bf764bb191f23fc98d155d58c67fe Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-q9bcc7je/run-q9bcc7je.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b18316b3b66241609cab33ddfa150f7a60883cf4 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + aqh6vwxu28w2c3nrf4aidpc13bbpxmg3: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559217737728" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:45.245152Z" + writerId: aqh6vwxu28w2c3nrf4aidpc13bbpxmg3 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-14_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 4 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.15 +warmup_steps: + value: 0 +weight_decay: + value: 0.05 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..25078126599339a04d40e700e670cdb06f650045 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/output.log @@ -0,0 +1,109 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.bias', 'classifier.bias', 'bert.pooler.dense.weight', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 176 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/176 [00:00 + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1006, in forward + embedding_output = self.embeddings( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 232, in forward + inputs_embeds = self.word_embeddings(input_ids) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/sparse.py", line 192, in forward + return F.embedding( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 2546, in embedding + return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 192.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 138.12 MiB is free. Process 2988286 has 260.00 MiB memory in use. Process 2988462 has 3.37 GiB memory in use. Including non-PyTorch memory, this process has 698.00 MiB memory in use. Process 2988927 has 1.48 GiB memory in use. Process 2988994 has 40.88 GiB memory in use. Process 2988205 has 434.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Of the allocated memory 341.55 MiB is allocated by PyTorch, and 52.45 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1006, in forward + embedding_output = self.embeddings( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 232, in forward + inputs_embeds = self.word_embeddings(input_ids) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/sparse.py", line 192, in forward + return F.embedding( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 2546, in embedding + return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 192.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 138.12 MiB is free. Process 2988286 has 260.00 MiB memory in use. Process 2988462 has 3.37 GiB memory in use. Including non-PyTorch memory, this process has 698.00 MiB memory in use. Process 2988927 has 1.48 GiB memory in use. Process 2988994 has 40.88 GiB memory in use. Process 2988205 has 434.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Of the allocated memory 341.55 MiB is allocated by PyTorch, and 52.45 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..184fd5160c3c79c618fb20694b259f1fdd693366 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:45.245152Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559217737728" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "aqh6vwxu28w2c3nrf4aidpc13bbpxmg3" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..1e541721647ba388e7207f3c72c53f101ac4527a --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":5,"_wandb":{"runtime":5}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..b3e6b29f3aaf1060a27618517e26ad723d23bc19 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/logs/debug-core.log @@ -0,0 +1,70 @@ +{"time":"2026-03-24T13:29:45.305129259-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp1z2yqrmo/port-2988462.txt","pid":2988462,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.309982791-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988462} +{"time":"2026-03-24T13:29:45.310212189-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988462-3002515-553344955/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.367267943-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpgcirvt_o/port-2988994.txt","pid":2988994,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.369118622-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988994} +{"time":"2026-03-24T13:29:45.369108472-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988994-3002526-3879120873/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.436629794-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpjb3cbe_y/port-2989297.txt","pid":2989297,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.440197373-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2989297} +{"time":"2026-03-24T13:29:45.439940814-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2989297-3002537-3086304176/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.47467857-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.537302391-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.549728007-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"kvar0rat","id":"1(@)"} +{"time":"2026-03-24T13:29:45.570654644-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpuhtl4ond/port-2988927.txt","pid":2988927,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.573715736-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpfxkgeon1/port-2988205.txt","pid":2988205,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.576238171-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988205} +{"time":"2026-03-24T13:29:45.576140962-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988205-3002555-3947692120/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.576526889-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988927} +{"time":"2026-03-24T13:29:45.576585229-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988927-3002556-1288920824/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.594127276-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"q9bcc7je","id":"1(@)"} +{"time":"2026-03-24T13:29:45.598255792-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.653664415-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"shouylxq","id":"1(@)"} +{"time":"2026-03-24T13:29:45.748437466-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.749159452-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.799482525-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"98091ibt","id":"1(@)"} +{"time":"2026-03-24T13:29:45.808395913-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"uwt1zkjf","id":"1(@)"} +{"time":"2026-03-24T13:29:46.383469733-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"kvar0rat","id":"1(@)"} +{"time":"2026-03-24T13:29:46.454975222-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"shouylxq","id":"1(@)"} +{"time":"2026-03-24T13:29:46.469338307-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"q9bcc7je","id":"1(@)"} +{"time":"2026-03-24T13:29:46.566053737-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"98091ibt","id":"1(@)"} +{"time":"2026-03-24T13:29:46.798692526-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"uwt1zkjf","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087060316-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087190845-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.087176566-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087420184-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087461954-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988462-3002515-553344955/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.117433757-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117506157-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117531057-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.117584126-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117803845-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988927-3002556-1288920824/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.13391052-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.133993269-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.13401498-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.134094839-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.134386087-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2989297-3002537-3086304176/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.213133753-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213290313-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.213280742-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213412652-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213513481-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988205-3002555-3947692120/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.324612586-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324723045-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.324702366-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324855835-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324995584-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988994-3002526-3879120873/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:54.282950989-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:54.283005858-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:54.283040418-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:54.302797732-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:54.302885752-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:54.302964941-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:55.884322072-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:55.884382172-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:55.884440552-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:57.169516129-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:57.169601188-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:57.169652118-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:58.800466629-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:58.800546268-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:58.800592628-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..23c6e8167a29f0313ea566af5fb983f86e273753 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:29:45.654061483-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:46.454530025-07:00","level":"INFO","msg":"stream: created new stream","id":"shouylxq"} +{"time":"2026-03-24T13:29:46.454679124-07:00","level":"INFO","msg":"handler: started","stream_id":"shouylxq"} +{"time":"2026-03-24T13:29:46.454958492-07:00","level":"INFO","msg":"stream: started","id":"shouylxq"} +{"time":"2026-03-24T13:29:46.454972932-07:00","level":"INFO","msg":"writer: started","stream_id":"shouylxq"} +{"time":"2026-03-24T13:29:46.454984022-07:00","level":"INFO","msg":"sender: started","stream_id":"shouylxq"} +{"time":"2026-03-24T13:29:53.134034669-07:00","level":"INFO","msg":"stream: closing","id":"shouylxq"} +{"time":"2026-03-24T13:29:53.899995046-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:54.270132165-07:00","level":"INFO","msg":"handler: closed","stream_id":"shouylxq"} +{"time":"2026-03-24T13:29:54.270403433-07:00","level":"INFO","msg":"sender: closed","stream_id":"shouylxq"} +{"time":"2026-03-24T13:29:54.270425443-07:00","level":"INFO","msg":"stream: closed","id":"shouylxq"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..11094bba04d6207cb48f6edc0ecc97a87f216c43 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:29:45,252 INFO MainThread:2989297 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:45,252 INFO MainThread:2989297 [wandb_setup.py:_flush():80] Configure stats pid to 2989297 +2026-03-24 13:29:45,252 INFO MainThread:2989297 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:45,252 INFO MainThread:2989297 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:45,252 INFO MainThread:2989297 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:45,253 INFO MainThread:2989297 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/logs/debug.log +2026-03-24 13:29:45,253 INFO MainThread:2989297 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/logs/debug-internal.log +2026-03-24 13:29:45,253 INFO MainThread:2989297 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:45,253 INFO MainThread:2989297 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:45,253 INFO MainThread:2989297 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:45,599 INFO MainThread:2989297 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:45,651 INFO MainThread:2989297 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:45,657 INFO MainThread:2989297 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:45,659 INFO MainThread:2989297 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:47,224 INFO MainThread:2989297 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:47,344 INFO MainThread:2989297 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:47,345 INFO MainThread:2989297 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:47,345 INFO MainThread:2989297 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:47,345 INFO MainThread:2989297 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:47,349 INFO MainThread:2989297 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:53,073 INFO MainThread:2989297 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.15, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-14_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 13:29:53,134 INFO wandb-AsyncioManager-main:2989297 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:53,134 INFO wandb-AsyncioManager-main:2989297 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/run-shouylxq.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/run-shouylxq.wandb new file mode 100644 index 0000000000000000000000000000000000000000..74c7cba3b9ec41ff05afedb72d7e005ebe563871 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-shouylxq/run-shouylxq.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fd321b22301bc3550827b57c6cbe4dce7d18007 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + 2yatnycti2je9x9orox8tyrko299bnho: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559217737728" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:45.411350Z" + writerId: 2yatnycti2je9x9orox8tyrko299bnho + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-14_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 4 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.15 +warmup_steps: + value: 0 +weight_decay: + value: 0.05 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..a2756919211f6b0822ca91836e6059b4a1fe6904 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/output.log @@ -0,0 +1,109 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 176 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/176 [00:00 + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1006, in forward + embedding_output = self.embeddings( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 240, in forward + embeddings = self.dropout(embeddings) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/dropout.py", line 70, in forward + return F.dropout(input, self.p, self.training, self.inplace) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 1422, in dropout + _VF.dropout_(input, p, training) if inplace else _VF.dropout(input, p, training) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 192.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 138.12 MiB is free. Process 2988286 has 260.00 MiB memory in use. Process 2988462 has 3.37 GiB memory in use. Process 2989297 has 698.00 MiB memory in use. Including non-PyTorch memory, this process has 1.48 GiB memory in use. Process 2988994 has 40.88 GiB memory in use. Process 2988205 has 434.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Of the allocated memory 1.13 GiB is allocated by PyTorch, and 49.95 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1006, in forward + embedding_output = self.embeddings( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 240, in forward + embeddings = self.dropout(embeddings) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/dropout.py", line 70, in forward + return F.dropout(input, self.p, self.training, self.inplace) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 1422, in dropout + _VF.dropout_(input, p, training) if inplace else _VF.dropout(input, p, training) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 192.00 MiB. GPU 0 has a total capacity of 47.53 GiB of which 138.12 MiB is free. Process 2988286 has 260.00 MiB memory in use. Process 2988462 has 3.37 GiB memory in use. Process 2989297 has 698.00 MiB memory in use. Including non-PyTorch memory, this process has 1.48 GiB memory in use. Process 2988994 has 40.88 GiB memory in use. Process 2988205 has 434.00 MiB memory in use. Process 2989572 has 260.00 MiB memory in use. Of the allocated memory 1.13 GiB is allocated by PyTorch, and 49.95 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a95eaef07d63e9820251c19ad1d24c6c932fe460 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:45.411350Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559217737728" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "2yatnycti2je9x9orox8tyrko299bnho" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..30c7561b388c4ee0878bd3e4c4ce901784af945f --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":5},"_runtime":5} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..b3e6b29f3aaf1060a27618517e26ad723d23bc19 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/logs/debug-core.log @@ -0,0 +1,70 @@ +{"time":"2026-03-24T13:29:45.305129259-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp1z2yqrmo/port-2988462.txt","pid":2988462,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.309982791-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988462} +{"time":"2026-03-24T13:29:45.310212189-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988462-3002515-553344955/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.367267943-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpgcirvt_o/port-2988994.txt","pid":2988994,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.369118622-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988994} +{"time":"2026-03-24T13:29:45.369108472-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988994-3002526-3879120873/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.436629794-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpjb3cbe_y/port-2989297.txt","pid":2989297,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.440197373-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2989297} +{"time":"2026-03-24T13:29:45.439940814-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2989297-3002537-3086304176/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.47467857-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.537302391-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.549728007-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"kvar0rat","id":"1(@)"} +{"time":"2026-03-24T13:29:45.570654644-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpuhtl4ond/port-2988927.txt","pid":2988927,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.573715736-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpfxkgeon1/port-2988205.txt","pid":2988205,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:45.576238171-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988205} +{"time":"2026-03-24T13:29:45.576140962-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988205-3002555-3947692120/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.576526889-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2988927} +{"time":"2026-03-24T13:29:45.576585229-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2988927-3002556-1288920824/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:45.594127276-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"q9bcc7je","id":"1(@)"} +{"time":"2026-03-24T13:29:45.598255792-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.653664415-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"shouylxq","id":"1(@)"} +{"time":"2026-03-24T13:29:45.748437466-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.749159452-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:45.799482525-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"98091ibt","id":"1(@)"} +{"time":"2026-03-24T13:29:45.808395913-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"uwt1zkjf","id":"1(@)"} +{"time":"2026-03-24T13:29:46.383469733-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"kvar0rat","id":"1(@)"} +{"time":"2026-03-24T13:29:46.454975222-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"shouylxq","id":"1(@)"} +{"time":"2026-03-24T13:29:46.469338307-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"q9bcc7je","id":"1(@)"} +{"time":"2026-03-24T13:29:46.566053737-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"98091ibt","id":"1(@)"} +{"time":"2026-03-24T13:29:46.798692526-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"uwt1zkjf","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087060316-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087190845-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.087176566-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087420184-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.087461954-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988462-3002515-553344955/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.117433757-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117506157-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117531057-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.117584126-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.117803845-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988927-3002556-1288920824/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.13391052-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.133993269-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.13401498-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.134094839-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.134386087-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2989297-3002537-3086304176/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.213133753-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213290313-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.213280742-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213412652-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.213513481-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988205-3002555-3947692120/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:53.324612586-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324723045-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:29:53.324702366-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324855835-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:29:53.324995584-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2988994-3002526-3879120873/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:54.282950989-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:54.283005858-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:54.283040418-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:54.302797732-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:54.302885752-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:54.302964941-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:55.884322072-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:55.884382172-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:55.884440552-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:57.169516129-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:57.169601188-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:57.169652118-07:00","level":"INFO","msg":"server is closed"} +{"time":"2026-03-24T13:29:58.800466629-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:29:58.800546268-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:29:58.800592628-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..84aad533e3942e9070085128d110e57b41841683 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:29:45.809564186-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:46.798307668-07:00","level":"INFO","msg":"stream: created new stream","id":"uwt1zkjf"} +{"time":"2026-03-24T13:29:46.798475167-07:00","level":"INFO","msg":"handler: started","stream_id":"uwt1zkjf"} +{"time":"2026-03-24T13:29:46.798678226-07:00","level":"INFO","msg":"stream: started","id":"uwt1zkjf"} +{"time":"2026-03-24T13:29:46.798693926-07:00","level":"INFO","msg":"writer: started","stream_id":"uwt1zkjf"} +{"time":"2026-03-24T13:29:46.798695556-07:00","level":"INFO","msg":"sender: started","stream_id":"uwt1zkjf"} +{"time":"2026-03-24T13:29:53.117531986-07:00","level":"INFO","msg":"stream: closing","id":"uwt1zkjf"} +{"time":"2026-03-24T13:29:53.869382696-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:29:55.86945071-07:00","level":"INFO","msg":"handler: closed","stream_id":"uwt1zkjf"} +{"time":"2026-03-24T13:29:55.869766408-07:00","level":"INFO","msg":"sender: closed","stream_id":"uwt1zkjf"} +{"time":"2026-03-24T13:29:55.869816858-07:00","level":"INFO","msg":"stream: closed","id":"uwt1zkjf"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..4ed7fbe2a9d9e4034228c2d9a2bb28961e97a6c1 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:29:45,417 INFO MainThread:2988927 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:45,417 INFO MainThread:2988927 [wandb_setup.py:_flush():80] Configure stats pid to 2988927 +2026-03-24 13:29:45,417 INFO MainThread:2988927 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:45,417 INFO MainThread:2988927 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:45,417 INFO MainThread:2988927 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:45,417 INFO MainThread:2988927 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/logs/debug.log +2026-03-24 13:29:45,417 INFO MainThread:2988927 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/logs/debug-internal.log +2026-03-24 13:29:45,417 INFO MainThread:2988927 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:45,417 INFO MainThread:2988927 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:45,417 INFO MainThread:2988927 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:45,749 INFO MainThread:2988927 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:45,797 INFO MainThread:2988927 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:45,803 INFO MainThread:2988927 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:45,805 INFO MainThread:2988927 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:47,508 INFO MainThread:2988927 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:47,638 INFO MainThread:2988927 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:47,638 INFO MainThread:2988927 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:47,638 INFO MainThread:2988927 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:47,639 INFO MainThread:2988927 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:47,643 INFO MainThread:2988927 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:29:53,045 INFO MainThread:2988927 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.15, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-14_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 13:29:53,117 INFO wandb-AsyncioManager-main:2988927 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:29:53,117 INFO wandb-AsyncioManager-main:2988927 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/run-uwt1zkjf.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/run-uwt1zkjf.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e5fbf3f7070b544107987681ed76c3361da11d85 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132945-uwt1zkjf/run-uwt1zkjf.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d1fcf83bb024d9bc8d08a1f384356a0beef058b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + rr8bwivvdw4c9dxifbsirtydjd0xv5bc: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "128" + - --per_device_eval_batch_size + - "128" + - --gradient_accumulation_steps + - "1" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559217713152" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:29:46.325627Z" + writerId: rr8bwivvdw4c9dxifbsirtydjd0xv5bc + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-33_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 4 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 128 +per_device_train_batch_size: + value: 128 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.15 +warmup_steps: + value: 0 +weight_decay: + value: 0.05 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..b23eed3804a1bcb1f77a5fb05fd9e73b23d90464 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/output.log @@ -0,0 +1,133 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.weight', 'classifier.bias', 'bert.pooler.dense.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 128 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 1 + Total optimization steps = 176 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/176 [00:00 + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 355, in forward + attention_probs = nn.functional.softmax(attention_scores, dim=-1) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 2137, in softmax + ret = input.softmax(dim) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 46.32 GiB memory in use. Of the allocated memory 44.69 GiB is allocated by PyTorch, and 1.31 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +Traceback (most recent call last): + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 473, in + train() + File "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", line 454, in train + trainer.train() + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1555, in train + return inner_training_loop( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2725, in training_step + loss = self.compute_loss(model, inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/trainer.py", line 2748, in compute_loss + outputs = model(**inputs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 680, in forward + return model_forward(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/utils/operations.py", line 668, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1564, in forward + outputs = self.bert( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1013, in forward + encoder_outputs = self.encoder( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward + layer_outputs = layer_module( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl + return forward_call(*args, **kwargs) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 355, in forward + attention_probs = nn.functional.softmax(attention_scores, dim=-1) + File "/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/torch/nn/functional.py", line 2137, in softmax + ret = input.softmax(dim) +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacity of 47.53 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 46.32 GiB memory in use. Of the allocated memory 44.69 GiB is allocated by PyTorch, and 1.31 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..e62818f0fb7e469685adbe3762d1032a1349fa87 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:29:46.325627Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "128", + "--per_device_eval_batch_size", + "128", + "--gradient_accumulation_steps", + "1", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559217713152" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "rr8bwivvdw4c9dxifbsirtydjd0xv5bc" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..edf2a662db7b90c38c1e765701ffc82c09c6a532 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":13,"_wandb":{"runtime":13}} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..34e694177ac3a59dadcca7031ae3974eecaa41cb --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T13:29:46.625598296-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmptq3jq7cp/port-2989572.txt","pid":2989572,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:29:46.627496225-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2989572} +{"time":"2026-03-24T13:29:46.627468885-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2989572-3003288-3475254375/socket","Net":"unix"}} +{"time":"2026-03-24T13:29:46.808552658-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:29:46.895265137-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"77x34hbb","id":"1(@)"} +{"time":"2026-03-24T13:29:47.37661827-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"77x34hbb","id":"1(@)"} +{"time":"2026-03-24T13:30:01.673974857-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:30:01.674800432-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:30:01.674888701-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:30:01.674871382-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:30:01.675220339-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2989572-3003288-3475254375/socket","Net":"unix"}} +{"time":"2026-03-24T13:30:05.069276662-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:30:05.069344162-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:30:05.069377522-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..969e1c22cc4a2f73aaaf9a0df78880a643820802 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:29:46.895614035-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:29:47.376153023-07:00","level":"INFO","msg":"stream: created new stream","id":"77x34hbb"} +{"time":"2026-03-24T13:29:47.376339122-07:00","level":"INFO","msg":"handler: started","stream_id":"77x34hbb"} +{"time":"2026-03-24T13:29:47.37659418-07:00","level":"INFO","msg":"stream: started","id":"77x34hbb"} +{"time":"2026-03-24T13:29:47.37662027-07:00","level":"INFO","msg":"writer: started","stream_id":"77x34hbb"} +{"time":"2026-03-24T13:29:47.3766265-07:00","level":"INFO","msg":"sender: started","stream_id":"77x34hbb"} +{"time":"2026-03-24T13:30:01.674836082-07:00","level":"INFO","msg":"stream: closing","id":"77x34hbb"} +{"time":"2026-03-24T13:30:02.45253946-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:30:05.053173877-07:00","level":"INFO","msg":"handler: closed","stream_id":"77x34hbb"} +{"time":"2026-03-24T13:30:05.056540977-07:00","level":"INFO","msg":"sender: closed","stream_id":"77x34hbb"} +{"time":"2026-03-24T13:30:05.056578907-07:00","level":"INFO","msg":"stream: closed","id":"77x34hbb"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..533d3581f16e842157580293ca522c35293b95c6 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:29:46,339 INFO MainThread:2989572 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:29:46,340 INFO MainThread:2989572 [wandb_setup.py:_flush():80] Configure stats pid to 2989572 +2026-03-24 13:29:46,340 INFO MainThread:2989572 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:29:46,340 INFO MainThread:2989572 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:29:46,341 INFO MainThread:2989572 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:29:46,341 INFO MainThread:2989572 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/logs/debug.log +2026-03-24 13:29:46,342 INFO MainThread:2989572 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/logs/debug-internal.log +2026-03-24 13:29:46,342 INFO MainThread:2989572 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:29:46,343 INFO MainThread:2989572 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:29:46,343 INFO MainThread:2989572 [wandb_init.py:init():889] starting backend +2026-03-24 13:29:46,809 INFO MainThread:2989572 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:29:46,892 INFO MainThread:2989572 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:29:46,975 INFO MainThread:2989572 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:29:46,980 INFO MainThread:2989572 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:29:47,977 INFO MainThread:2989572 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:29:48,416 INFO MainThread:2989572 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:29:48,417 INFO MainThread:2989572 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:29:48,418 INFO MainThread:2989572 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:29:48,418 INFO MainThread:2989572 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:29:48,428 INFO MainThread:2989572 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:30:00,945 INFO MainThread:2989572 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.15, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-29-33_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 13:30:01,672 INFO wandb-AsyncioManager-main:2989572 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:30:01,673 INFO wandb-AsyncioManager-main:2989572 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/run-77x34hbb.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/run-77x34hbb.wandb new file mode 100644 index 0000000000000000000000000000000000000000..6fe720c49b510242faa8e167b46b630290e41605 Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_132946-77x34hbb/run-77x34hbb.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93fdbfac7009c3dd2ef79b956a392e361b55830c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + yi4es0pwn2rzpjdjpcihoo44rcuvuy87: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split + - --kmer + - "-1" + - --run_name + - base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "32" + - --per_device_eval_batch_size + - "32" + - --gradient_accumulation_steps + - "4" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.05" + - --num_train_epochs + - "4" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.15" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559218933760" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:34:44.832175Z" + writerId: yi4es0pwn2rzpjdjpcihoo44rcuvuy87 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 4 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-34-44_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 4 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 32 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.15 +warmup_steps: + value: 0 +weight_decay: + value: 0.05 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1ae6975152cb5c9e4174688c06f9aab2c92adf13 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/output.log @@ -0,0 +1,69 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 5,531 + Num Epochs = 4 + Instantaneous batch size per device = 32 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 4 + Total optimization steps = 172 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 25%|██▌ | 43/172 [00:56<02:50, 1.32s/it]***** Running Evaluation ***** + Num examples = 691 + Batch size = 32 + 25%|██▌ | 43/172 [00:59<02:50, 1.32s/itSaving model checkpoint to genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-43 +Configuration saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-43/config.json +{'eval_loss': 0.6628125309944153, 'eval_accuracy': 0.5817655571635311, 'eval_f1': 0.5610827704893171, 'eval_matthews_correlation': 0.21314123722950265, 'eval_precision': 0.6214865572625698, 'eval_recall': 0.5934860367092647, 'eval_runtime': 2.2484, 'eval_samples_per_second': 307.332, 'eval_steps_per_second': 9.785, 'epoch': 0.99} +Model weights saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-43/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-43/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-43/special_tokens_map.json + 50%|█████ | 86/172 [01:58<01:54, 1.33s/it]***** Running Evaluation ***** + Num examples = 691 + Batch size = 32 + 50%|█████ | 86/172 [02:00<01:54, 1.33s/itSaving model checkpoint to genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-86 +Configuration saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-86/config.json +{'eval_loss': 0.5978314876556396, 'eval_accuracy': 0.6743849493487699, 'eval_f1': 0.6528777821686682, 'eval_matthews_correlation': 0.3669122104687527, 'eval_precision': 0.7052053787162517, 'eval_recall': 0.6640119901257788, 'eval_runtime': 2.2606, 'eval_samples_per_second': 305.673, 'eval_steps_per_second': 9.732, 'epoch': 1.99} +Model weights saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-86/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-86/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-86/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-43] due to args.save_total_limit + 75%|███████▌ | 129/172 [02:59<00:57, 1.34s/it]***** Running Evaluation ***** +{'loss': 0.6252, 'learning_rate': 1.4794520547945205e-05, 'epoch': 2.31} + Num examples = 691 + Batch size = 32 + 75%|███████▌ | 129/172 [03:02<00:57, 1.34s/iSaving model checkpoint to genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-129 +Configuration saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-129/config.json +{'eval_loss': 0.58121657371521, 'eval_accuracy': 0.6903039073806078, 'eval_f1': 0.687708020071297, 'eval_matthews_correlation': 0.37804735134263795, 'eval_precision': 0.6905416012558869, 'eval_recall': 0.6875178424490755, 'eval_runtime': 2.2678, 'eval_samples_per_second': 304.702, 'eval_steps_per_second': 9.701, 'epoch': 2.98} +Model weights saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-129/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-129/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-129/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-86] due to args.save_total_limit +100%|██████████| 172/172 [04:01<00:00, 1.34s/it]***** Running Evaluation ***** + Num examples = 691 + Batch size = 32 +100%|██████████| 172/172 [04:03<00:00, 1.34s/iSaving model checkpoint to genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172 +Configuration saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/config.json +{'eval_loss': 0.5836874842643738, 'eval_accuracy': 0.6960926193921853, 'eval_f1': 0.6941549901360716, 'eval_matthews_correlation': 0.38973274325358714, 'eval_precision': 0.6958638707926167, 'eval_recall': 0.6938739525432837, 'eval_runtime': 2.274, 'eval_samples_per_second': 303.868, 'eval_steps_per_second': 9.675, 'epoch': 3.98} +Model weights saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-129] due to args.save_total_limit + + +Training completed. Do not forget to share your model on huggingface.co/models =) + + +Loading best model from genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/checkpoint-172 (score: 0.6941549901360716). +100%|██████████| 172/172 [04:05<00:00, 1.43s/it] +{'train_runtime': 245.7227, 'train_samples_per_second': 90.036, 'train_steps_per_second': 0.7, 'train_loss': 0.5826694355454556, 'epoch': 3.98} +***** Running Evaluation ***** + Num examples = 692 + Batch size = 32 +100%|██████████| 22/22 [00:02<00:00, 10.19it/s] diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9a3350ce4c54c7dd2c9e6e7d70a77d8fb10e909c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:34:44.832175Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/drosophila_enhancers_stark/split", + "--kmer", + "-1", + "--run_name", + "base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "32", + "--per_device_eval_batch_size", + "32", + "--gradient_accumulation_steps", + "4", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.05", + "--num_train_epochs", + "4", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.15", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559218933760" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "yi4es0pwn2rzpjdjpcihoo44rcuvuy87" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..c93a3a4c2e0425b0bcedf2fa2fb0d31729d551f9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/files/wandb-summary.json @@ -0,0 +1 @@ +{"eval/recall":0.6846270161290322,"eval/samples_per_second":305.451,"train/total_flos":5.78870632897536e+15,"train/global_step":172,"_timestamp":1.7743847386845798e+09,"train/train_runtime":245.7227,"train/loss":0.6252,"eval/loss":0.6012589931488037,"eval/f1":0.685187074829932,"train/train_samples_per_second":90.036,"eval/runtime":2.2655,"train/learning_rate":1.4794520547945205e-05,"eval/steps_per_second":9.711,"_wandb":{"runtime":252},"_step":6,"eval/precision":0.6905166435506241,"eval/accuracy":0.6907514450867052,"train/epoch":3.98,"train/train_loss":0.5826694355454556,"eval/matthews_correlation":0.37509742426025894,"train/train_steps_per_second":0.7,"_runtime":252} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..ede18a02068b1df73d2338121f62f22c2b0ad87d --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T13:34:44.959466048-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoth5ec34/port-3006432.txt","pid":3006432,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:34:44.960393713-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":3006432} +{"time":"2026-03-24T13:34:44.960353963-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3006432-3006523-2960681678/socket","Net":"unix"}} +{"time":"2026-03-24T13:34:45.144206054-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:34:45.215143107-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"pfwf0zeu","id":"1(@)"} +{"time":"2026-03-24T13:34:45.603893866-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"pfwf0zeu","id":"1(@)"} +{"time":"2026-03-24T13:38:58.688498409-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:38:58.688640679-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:38:58.688614249-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:38:58.688828687-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:38:58.688960757-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3006432-3006523-2960681678/socket","Net":"unix"}} +{"time":"2026-03-24T13:38:59.460891156-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:38:59.460942596-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:38:59.460974936-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..1abf447d73b4a2521bd5be692a45bc3ccbaa5105 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:34:45.215450226-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:34:45.603449479-07:00","level":"INFO","msg":"stream: created new stream","id":"pfwf0zeu"} +{"time":"2026-03-24T13:34:45.603633208-07:00","level":"INFO","msg":"handler: started","stream_id":"pfwf0zeu"} +{"time":"2026-03-24T13:34:45.603874256-07:00","level":"INFO","msg":"stream: started","id":"pfwf0zeu"} +{"time":"2026-03-24T13:34:45.603941506-07:00","level":"INFO","msg":"writer: started","stream_id":"pfwf0zeu"} +{"time":"2026-03-24T13:34:45.603969236-07:00","level":"INFO","msg":"sender: started","stream_id":"pfwf0zeu"} +{"time":"2026-03-24T13:38:58.688653798-07:00","level":"INFO","msg":"stream: closing","id":"pfwf0zeu"} +{"time":"2026-03-24T13:38:59.155335306-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:38:59.453220761-07:00","level":"INFO","msg":"handler: closed","stream_id":"pfwf0zeu"} +{"time":"2026-03-24T13:38:59.45343516-07:00","level":"INFO","msg":"sender: closed","stream_id":"pfwf0zeu"} +{"time":"2026-03-24T13:38:59.453495669-07:00","level":"INFO","msg":"stream: closed","id":"pfwf0zeu"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..b324469bc5275cbb460066637da3556bd0a5ac0b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:34:44,835 INFO MainThread:3006432 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:34:44,836 INFO MainThread:3006432 [wandb_setup.py:_flush():80] Configure stats pid to 3006432 +2026-03-24 13:34:44,836 INFO MainThread:3006432 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:34:44,836 INFO MainThread:3006432 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:34:44,836 INFO MainThread:3006432 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:34:44,836 INFO MainThread:3006432 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/logs/debug.log +2026-03-24 13:34:44,836 INFO MainThread:3006432 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/logs/debug-internal.log +2026-03-24 13:34:44,836 INFO MainThread:3006432 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:34:44,836 INFO MainThread:3006432 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:34:44,836 INFO MainThread:3006432 [wandb_init.py:init():889] starting backend +2026-03-24 13:34:45,144 INFO MainThread:3006432 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:34:45,212 INFO MainThread:3006432 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:34:45,220 INFO MainThread:3006432 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:34:45,222 INFO MainThread:3006432 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:34:46,003 INFO MainThread:3006432 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:34:46,149 INFO MainThread:3006432 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:34:46,150 INFO MainThread:3006432 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:34:46,150 INFO MainThread:3006432 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:34:46,150 INFO MainThread:3006432 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:34:46,155 INFO MainThread:3006432 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:34:50,695 INFO MainThread:3006432 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.05, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.15, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/drosophila_enhancers_stark/DNAbert2_Pretrained/lr3e-5_wd0.05_wr0.15_ep4_seed42/runs/Mar24_13-34-44_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_drosophila_enhancers_stark_lr3e-5_wd0.05_wr0.15_ep4_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 13:38:58,688 INFO wandb-AsyncioManager-main:3006432 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:38:58,688 INFO wandb-AsyncioManager-main:3006432 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/run-pfwf0zeu.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/run-pfwf0zeu.wandb new file mode 100644 index 0000000000000000000000000000000000000000..b4e622492e1a83057f8d386066da62ddbbbdbba5 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133444-pfwf0zeu/run-pfwf0zeu.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bebba652441025a7370f1d8574fbc35ad818337405f57a37a93e6f8c983d050 +size 115737 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dac6a750d12e55b1246ce864f4780abfa07c1d69 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + zh3pc98d9pqlw9njow7sy91ko0mm0fc9: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/dummy_mouse_enhancers_ensembl/split + - --kmer + - "-1" + - --run_name + - base5120_dummy_mouse_enhancers_ensembl_lr3e-5_wd0.01_wr0.10_ep8_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "32" + - --per_device_eval_batch_size + - "32" + - --gradient_accumulation_steps + - "4" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.01" + - --num_train_epochs + - "8" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.10" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559219064832" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:39:06.506390Z" + writerId: zh3pc98d9pqlw9njow7sy91ko0mm0fc9 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 4 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/runs/Mar24_13-39-06_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 8 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 32 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_dummy_mouse_enhancers_ensembl_lr3e-5_wd0.01_wr0.10_ep8_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..edb185eaf81dc7ba48996e7319a02c4e2c236dda --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/output.log @@ -0,0 +1,108 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.weight', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.bias'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 968 + Num Epochs = 8 + Instantaneous batch size per device = 32 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 4 + Total optimization steps = 56 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 12%|█▎ | 7/56 [00:09<01:05, 1.34s/it]***** Running Evaluation ***** + Num examples = 121 + Batch size = 32 + 12%|█▎ | 7/56 [00:10<01:05, 1.34s/itSaving model checkpoint to genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-7 +Configuration saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-7/config.json +{'eval_loss': 0.6725832223892212, 'eval_accuracy': 0.5785123966942148, 'eval_f1': 0.5727933541017654, 'eval_matthews_correlation': 0.15919997829120064, 'eval_precision': 0.5816561242093157, 'eval_recall': 0.5775956284153005, 'eval_runtime': 0.4097, 'eval_samples_per_second': 295.354, 'eval_steps_per_second': 9.764, 'epoch': 0.9} +Model weights saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-7/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-7/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-7/special_tokens_map.json + 27%|██▋ | 15/56 [00:22<00:57, 1.40s/it]***** Running Evaluation ***** + Num examples = 121 + Batch size = 32 + 27%|██▋ | 15/56 [00:23<00:57, 1.40s/iSaving model checkpoint to genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-15 +Configuration saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-15/config.json +{'eval_loss': 0.5833003520965576, 'eval_accuracy': 0.6859504132231405, 'eval_f1': 0.6857572443958447, 'eval_matthews_correlation': 0.3718896520690588, 'eval_precision': 0.6860974274767379, 'eval_recall': 0.6857923497267759, 'eval_runtime': 0.4058, 'eval_samples_per_second': 298.171, 'eval_steps_per_second': 9.857, 'epoch': 1.94} +Model weights saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-15/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-15/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-15/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-7] due to args.save_total_limit + 41%|████ | 23/56 [00:35<00:45, 1.39s/it]***** Running Evaluation ***** + Num examples = 121 + Batch size = 32 + 41%|████ | 23/56 [00:35<00:45, 1.39s/iSaving model checkpoint to genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-23 +Configuration saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-23/config.json +{'eval_loss': 0.7039547562599182, 'eval_accuracy': 0.6611570247933884, 'eval_f1': 0.6565593631014192, 'eval_matthews_correlation': 0.328770377703483, 'eval_precision': 0.6686313973548016, 'eval_recall': 0.6602459016393443, 'eval_runtime': 0.4059, 'eval_samples_per_second': 298.079, 'eval_steps_per_second': 9.854, 'epoch': 2.97} +Model weights saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-23/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-23/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-23/special_tokens_map.json + 55%|█████▌ | 31/56 [00:47<00:32, 1.31s/it]***** Running Evaluation ***** + Num examples = 121 + Batch size = 32 + 55%|█████▌ | 31/56 [00:48<00:32, 1.31s/iSaving model checkpoint to genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31 +Configuration saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/config.json +{'eval_loss': 0.603266716003418, 'eval_accuracy': 0.7355371900826446, 'eval_f1': 0.732448866777225, 'eval_matthews_correlation': 0.48516753947547236, 'eval_precision': 0.7488499137435307, 'eval_recall': 0.7364754098360655, 'eval_runtime': 0.41, 'eval_samples_per_second': 295.158, 'eval_steps_per_second': 9.757, 'epoch': 4.0} +Model weights saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-15] due to args.save_total_limit +Deleting older checkpoint [genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-23] due to args.save_total_limit + 68%|██████▊ | 38/56 [00:59<00:25, 1.43s/it]***** Running Evaluation ***** + Num examples = 121 + Batch size = 32 + 68%|██████▊ | 38/56 [01:00<00:25, 1.43s/iSaving model checkpoint to genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-38 +Configuration saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-38/config.json +{'eval_loss': 0.661307156085968, 'eval_accuracy': 0.6776859504132231, 'eval_f1': 0.6744843760778092, 'eval_matthews_correlation': 0.36038353212189217, 'eval_precision': 0.683531746031746, 'eval_recall': 0.6769125683060109, 'eval_runtime': 0.4041, 'eval_samples_per_second': 299.46, 'eval_steps_per_second': 9.899, 'epoch': 4.9} +Model weights saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-38/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-38/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-38/special_tokens_map.json + 82%|████████▏ | 46/56 [01:12<00:13, 1.39s/it]***** Running Evaluation ***** + Num examples = 121 + Batch size = 32 + 82%|████████▏ | 46/56 [01:13<00:13, 1.39s/iSaving model checkpoint to genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-46 +Configuration saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-46/config.json +{'eval_loss': 0.5848523378372192, 'eval_accuracy': 0.7107438016528925, 'eval_f1': 0.7100308113659706, 'eval_matthews_correlation': 0.42484906384602256, 'eval_precision': 0.7136539524599226, 'eval_recall': 0.7112021857923497, 'eval_runtime': 0.4059, 'eval_samples_per_second': 298.131, 'eval_steps_per_second': 9.856, 'epoch': 5.94} +Model weights saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-46/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-46/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-46/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-38] due to args.save_total_limit + 96%|█████████▋| 54/56 [01:25<00:02, 1.39s/it]***** Running Evaluation ***** + Num examples = 121 + Batch size = 32 + 96%|█████████▋| 54/56 [01:25<00:02, 1.39s/iSaving model checkpoint to genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-54 +Configuration saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-54/config.json +{'eval_loss': 0.590368926525116, 'eval_accuracy': 0.7272727272727273, 'eval_f1': 0.7269743589743589, 'eval_matthews_correlation': 0.45644006920970437, 'eval_precision': 0.7288461538461539, 'eval_recall': 0.7275956284153006, 'eval_runtime': 0.4036, 'eval_samples_per_second': 299.803, 'eval_steps_per_second': 9.911, 'epoch': 6.97} +Model weights saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-54/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-54/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-54/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-46] due to args.save_total_limit +100%|██████████| 56/56 [01:30<00:00, 1.84s/it]***** Running Evaluation ***** + Num examples = 121 + Batch size = 32 +100%|██████████| 56/56 [01:30<00:00, 1.84s/iSaving model checkpoint to genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-56 +Configuration saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-56/config.json +{'eval_loss': 0.5920652747154236, 'eval_accuracy': 0.71900826446281, 'eval_f1': 0.7188354291962822, 'eval_matthews_correlation': 0.43924525554767113, 'eval_precision': 0.719983552631579, 'eval_recall': 0.7192622950819672, 'eval_runtime': 0.4089, 'eval_samples_per_second': 295.891, 'eval_steps_per_second': 9.782, 'epoch': 7.23} +Model weights saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-56/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-56/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-56/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-54] due to args.save_total_limit + + +Training completed. Do not forget to share your model on huggingface.co/models =) + + +Loading best model from genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-31 (score: 0.732448866777225). +100%|██████████| 56/56 [01:32<00:00, 1.84s/it]Deleting older checkpoint [genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/checkpoint-56] due to args.save_total_limit +{'train_runtime': 92.7242, 'train_samples_per_second': 83.517, 'train_steps_per_second': 0.604, 'train_loss': 0.47779972212655203, 'epoch': 7.23} +100%|██████████| 56/56 [01:33<00:00, 1.66s/it] +***** Running Evaluation ***** + Num examples = 121 + Batch size = 32 +100%|██████████| 4/4 [00:00<00:00, 13.46it/s] diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..688379a08bfe28b70f346475722c60099320e643 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:39:06.506390Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/dummy_mouse_enhancers_ensembl/split", + "--kmer", + "-1", + "--run_name", + "base5120_dummy_mouse_enhancers_ensembl_lr3e-5_wd0.01_wr0.10_ep8_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "32", + "--per_device_eval_batch_size", + "32", + "--gradient_accumulation_steps", + "4", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.01", + "--num_train_epochs", + "8", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.10", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559219064832" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "zh3pc98d9pqlw9njow7sy91ko0mm0fc9" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..ac50396b3b899c7afd0a51d8e0d1be905d3365d9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/train_samples_per_second":83.517,"train/global_step":56,"_step":9,"_timestamp":1.7743848433385277e+09,"train/train_loss":0.47779972212655203,"eval/accuracy":0.71900826446281,"_wandb":{"runtime":95},"eval/samples_per_second":296.277,"eval/f1":0.7180646929824561,"eval/loss":0.6152443289756775,"train/train_steps_per_second":0.604,"train/total_flos":1.84177738752e+15,"eval/recall":0.737513873473918,"eval/runtime":0.4084,"train/train_runtime":92.7242,"train/epoch":7.23,"eval/matthews_correlation":0.48551407172989086,"eval/steps_per_second":9.794,"_runtime":95,"eval/precision":0.7481159420289856} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..108810daa9161727505e441aeb4309560e752e47 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T13:39:06.643250085-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpwjjw0hui/port-3008442.txt","pid":3008442,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:39:06.644939816-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":3008442} +{"time":"2026-03-24T13:39:06.644951826-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3008442-3008573-3995616287/socket","Net":"unix"}} +{"time":"2026-03-24T13:39:06.827266788-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:39:06.908230284-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"m7fu9ed0","id":"1(@)"} +{"time":"2026-03-24T13:39:07.302225177-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"m7fu9ed0","id":"1(@)"} +{"time":"2026-03-24T13:40:43.342592504-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T13:40:43.342726543-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T13:40:43.342710994-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T13:40:43.342921932-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T13:40:43.342999232-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3008442-3008573-3995616287/socket","Net":"unix"}} +{"time":"2026-03-24T13:40:44.242161869-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T13:40:44.242225959-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T13:40:44.242265469-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..1ce6d1abd91364dd222fa5669d7caae754c618eb --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:39:06.908486662-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:39:07.301865508-07:00","level":"INFO","msg":"stream: created new stream","id":"m7fu9ed0"} +{"time":"2026-03-24T13:39:07.302062178-07:00","level":"INFO","msg":"handler: started","stream_id":"m7fu9ed0"} +{"time":"2026-03-24T13:39:07.302206226-07:00","level":"INFO","msg":"stream: started","id":"m7fu9ed0"} +{"time":"2026-03-24T13:39:07.302242846-07:00","level":"INFO","msg":"writer: started","stream_id":"m7fu9ed0"} +{"time":"2026-03-24T13:39:07.302290006-07:00","level":"INFO","msg":"sender: started","stream_id":"m7fu9ed0"} +{"time":"2026-03-24T13:40:43.342711434-07:00","level":"INFO","msg":"stream: closing","id":"m7fu9ed0"} +{"time":"2026-03-24T13:40:43.785250774-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T13:40:44.226949018-07:00","level":"INFO","msg":"handler: closed","stream_id":"m7fu9ed0"} +{"time":"2026-03-24T13:40:44.227194727-07:00","level":"INFO","msg":"sender: closed","stream_id":"m7fu9ed0"} +{"time":"2026-03-24T13:40:44.227260466-07:00","level":"INFO","msg":"stream: closed","id":"m7fu9ed0"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..3e7335a67b1f848dded6b806e4065ad5d5c21d48 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:39:06,510 INFO MainThread:3008442 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:39:06,510 INFO MainThread:3008442 [wandb_setup.py:_flush():80] Configure stats pid to 3008442 +2026-03-24 13:39:06,510 INFO MainThread:3008442 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:39:06,510 INFO MainThread:3008442 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:39:06,510 INFO MainThread:3008442 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:39:06,510 INFO MainThread:3008442 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/logs/debug.log +2026-03-24 13:39:06,510 INFO MainThread:3008442 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/logs/debug-internal.log +2026-03-24 13:39:06,510 INFO MainThread:3008442 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:39:06,510 INFO MainThread:3008442 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:39:06,510 INFO MainThread:3008442 [wandb_init.py:init():889] starting backend +2026-03-24 13:39:06,827 INFO MainThread:3008442 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:39:06,905 INFO MainThread:3008442 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:39:06,912 INFO MainThread:3008442 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:39:06,914 INFO MainThread:3008442 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:39:07,977 INFO MainThread:3008442 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:39:08,122 INFO MainThread:3008442 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:39:08,122 INFO MainThread:3008442 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:39:08,122 INFO MainThread:3008442 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:39:08,122 INFO MainThread:3008442 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:39:08,126 INFO MainThread:3008442 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:39:09,922 INFO MainThread:3008442 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 8, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/dummy_mouse_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.01_wr0.10_ep8_seed42/runs/Mar24_13-39-06_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_dummy_mouse_enhancers_ensembl_lr3e-5_wd0.01_wr0.10_ep8_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 13:40:43,342 INFO wandb-AsyncioManager-main:3008442 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 13:40:43,343 INFO wandb-AsyncioManager-main:3008442 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/run-m7fu9ed0.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/run-m7fu9ed0.wandb new file mode 100644 index 0000000000000000000000000000000000000000..f559a727d9aa0b9d1458bef63b32c77a3c533aae Binary files /dev/null and b/Finetune-GenomicBenchmarks/wandb/run-20260324_133906-m7fu9ed0/run-m7fu9ed0.wandb differ diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d401fb283774079dc7c799db5e5b5d1276ecfc1b --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/config.yaml @@ -0,0 +1,559 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + qf7352cnlrgreewnt1floz1l6z6ywanr: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/human_enhancers_ensembl/split + - --kmer + - "-1" + - --run_name + - base5120_human_enhancers_ensembl_lr3e-5_wd0.0_wr0.05_ep5_seed42 + - --model_max_length + - "512" + - --per_device_train_batch_size + - "32" + - --per_device_eval_batch_size + - "32" + - --gradient_accumulation_steps + - "4" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.0" + - --num_train_epochs + - "5" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.05" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559593918464" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T20:40:50.775073Z" + writerId: qf7352cnlrgreewnt1floz1l6z6ywanr + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 4 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/runs/Mar24_13-40-50_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 512 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 5 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 32 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_human_enhancers_ensembl_lr3e-5_wd0.0_wr0.05_ep5_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.05 +warmup_steps: + value: 0 +weight_decay: + value: 0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..a07ad91b37e81de5ec7a9bbe2f3cbd21bcf2fe41 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/output.log @@ -0,0 +1,126 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias', 'bert.pooler.dense.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 123,873 + Num Epochs = 5 + Instantaneous batch size per device = 32 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 4 + Total optimization steps = 4,840 + Number of trainable parameters = 89,188,610 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 20%|██ | 968/4840 [21:21<1:20:39, 1.25s/it]***** Running Evaluation ***** +{'loss': 0.6293, 'learning_rate': 1.2396694214876034e-05, 'epoch': 0.1} +{'loss': 0.5214, 'learning_rate': 2.479338842975207e-05, 'epoch': 0.21} +{'loss': 0.4632, 'learning_rate': 2.9621574597651154e-05, 'epoch': 0.31} +{'loss': 0.4305, 'learning_rate': 2.896911700739452e-05, 'epoch': 0.41} +{'loss': 0.4449, 'learning_rate': 2.8316659417137886e-05, 'epoch': 0.52} +{'loss': 0.4143, 'learning_rate': 2.7664201826881252e-05, 'epoch': 0.62} +{'loss': 0.4093, 'learning_rate': 2.701174423662462e-05, 'epoch': 0.72} +{'loss': 0.3943, 'learning_rate': 2.6359286646367988e-05, 'epoch': 0.83} +{'loss': 0.3822, 'learning_rate': 2.5706829056111354e-05, 'epoch': 0.93} + Num examples = 15484 + Batch size = 32 + 20%|██ | 968/4840 [22:00<1:20:39, 1.25s/Saving model checkpoint to genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-968 +Configuration saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-968/config.json +{'eval_loss': 0.3810736835002899, 'eval_accuracy': 0.8288555928700594, 'eval_f1': 0.8281272063918453, 'eval_matthews_correlation': 0.6624810694750157, 'eval_precision': 0.8339016038859941, 'eval_recall': 0.8286006733009063, 'eval_runtime': 38.3959, 'eval_samples_per_second': 403.272, 'eval_steps_per_second': 12.606, 'epoch': 1.0} +Model weights saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-968/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-968/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-968/special_tokens_map.json + 40%|████ | 1936/4840 [43:25<59:58, 1.24s/it] ***** Running Evaluation ***** +{'loss': 0.3789, 'learning_rate': 2.505437146585472e-05, 'epoch': 1.03} +{'loss': 0.3441, 'learning_rate': 2.4401913875598086e-05, 'epoch': 1.14} +{'loss': 0.339, 'learning_rate': 2.3749456285341452e-05, 'epoch': 1.24} +{'loss': 0.3471, 'learning_rate': 2.309699869508482e-05, 'epoch': 1.34} +{'loss': 0.3339, 'learning_rate': 2.2444541104828188e-05, 'epoch': 1.45} +{'loss': 0.3329, 'learning_rate': 2.1792083514571554e-05, 'epoch': 1.55} +{'loss': 0.3296, 'learning_rate': 2.113962592431492e-05, 'epoch': 1.65} +{'loss': 0.3236, 'learning_rate': 2.0487168334058287e-05, 'epoch': 1.76} +{'loss': 0.3212, 'learning_rate': 1.9834710743801653e-05, 'epoch': 1.86} +{'loss': 0.3223, 'learning_rate': 1.918225315354502e-05, 'epoch': 1.96} + Num examples = 15484 + Batch size = 32 + 40%|████ | 1936/4840 [44:03<59:58, 1.24s/iSaving model checkpoint to genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-1936 +Configuration saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-1936/config.json +{'eval_loss': 0.3406522572040558, 'eval_accuracy': 0.8521053991216739, 'eval_f1': 0.8521051005612541, 'eval_matthews_correlation': 0.7042627748459929, 'eval_precision': 0.8521369452783758, 'eval_recall': 0.8521258296553378, 'eval_runtime': 38.3903, 'eval_samples_per_second': 403.331, 'eval_steps_per_second': 12.607, 'epoch': 2.0} +Model weights saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-1936/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-1936/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-1936/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-968] due to args.save_total_limit + 60%|██████ | 2904/4840 [1:05:40<40:57, 1.27s/it]***** Running Evaluation ***** +{'loss': 0.2789, 'learning_rate': 1.8529795563288385e-05, 'epoch': 2.07} +{'loss': 0.26, 'learning_rate': 1.7877337973031755e-05, 'epoch': 2.17} +{'loss': 0.2586, 'learning_rate': 1.722488038277512e-05, 'epoch': 2.27} +{'loss': 0.2591, 'learning_rate': 1.6572422792518487e-05, 'epoch': 2.38} +{'loss': 0.256, 'learning_rate': 1.5919965202261853e-05, 'epoch': 2.48} +{'loss': 0.2601, 'learning_rate': 1.526750761200522e-05, 'epoch': 2.58} +{'loss': 0.2436, 'learning_rate': 1.4615050021748586e-05, 'epoch': 2.69} +{'loss': 0.2538, 'learning_rate': 1.3962592431491953e-05, 'epoch': 2.79} +{'loss': 0.2544, 'learning_rate': 1.3310134841235321e-05, 'epoch': 2.89} +{'loss': 0.2476, 'learning_rate': 1.2657677250978686e-05, 'epoch': 3.0} + Num examples = 15484 + Batch size = 32 + 60%|██████ | 2904/4840 [1:06:19<40:57, 1.27sSaving model checkpoint to genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-2904 +Configuration saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-2904/config.json +{'eval_loss': 0.309709757566452, 'eval_accuracy': 0.8736760526995608, 'eval_f1': 0.8736553931540352, 'eval_matthews_correlation': 0.7474416977926268, 'eval_precision': 0.8738035921545504, 'eval_recall': 0.8736381239537026, 'eval_runtime': 38.8875, 'eval_samples_per_second': 398.174, 'eval_steps_per_second': 12.446, 'epoch': 3.0} +Model weights saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-2904/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-2904/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-2904/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-1936] due to args.save_total_limit + 80%|████████ | 3872/4840 [1:28:05<20:25, 1.27s/it]***** Running Evaluation ***** +{'loss': 0.1833, 'learning_rate': 1.2005219660722054e-05, 'epoch': 3.1} +{'loss': 0.1724, 'learning_rate': 1.135276207046542e-05, 'epoch': 3.2} +{'loss': 0.1752, 'learning_rate': 1.0700304480208786e-05, 'epoch': 3.31} +{'loss': 0.1834, 'learning_rate': 1.0047846889952154e-05, 'epoch': 3.41} +{'loss': 0.1786, 'learning_rate': 9.39538929969552e-06, 'epoch': 3.51} +{'loss': 0.1774, 'learning_rate': 8.742931709438888e-06, 'epoch': 3.62} +{'loss': 0.1701, 'learning_rate': 8.090474119182252e-06, 'epoch': 3.72} +{'loss': 0.1724, 'learning_rate': 7.43801652892562e-06, 'epoch': 3.82} +{'loss': 0.1757, 'learning_rate': 6.785558938668986e-06, 'epoch': 3.93} + Num examples = 15484 + Batch size = 32 + 80%|████████ | 3872/4840 [1:28:44<20:25, 1.27sSaving model checkpoint to genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-3872 +Configuration saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-3872/config.json +{'eval_loss': 0.328061044216156, 'eval_accuracy': 0.8842676311030742, 'eval_f1': 0.8841256535963352, 'eval_matthews_correlation': 0.7709701679609638, 'eval_precision': 0.8865456555886244, 'eval_recall': 0.8844274222799268, 'eval_runtime': 38.8256, 'eval_samples_per_second': 398.809, 'eval_steps_per_second': 12.466, 'epoch': 4.0} +Model weights saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-3872/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-3872/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-3872/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-2904] due to args.save_total_limit +100%|██████████| 4840/4840 [1:50:27<00:00, 1.26s/it]***** Running Evaluation ***** +{'loss': 0.1582, 'learning_rate': 6.1331013484123534e-06, 'epoch': 4.03} +{'loss': 0.1202, 'learning_rate': 5.48064375815572e-06, 'epoch': 4.13} +{'loss': 0.1205, 'learning_rate': 4.828186167899087e-06, 'epoch': 4.24} +{'loss': 0.1181, 'learning_rate': 4.175728577642454e-06, 'epoch': 4.34} +{'loss': 0.1176, 'learning_rate': 3.5232709873858202e-06, 'epoch': 4.44} +{'loss': 0.1107, 'learning_rate': 2.870813397129187e-06, 'epoch': 4.55} +{'loss': 0.1246, 'learning_rate': 2.218355806872553e-06, 'epoch': 4.65} +{'loss': 0.1138, 'learning_rate': 1.56589821661592e-06, 'epoch': 4.75} +{'loss': 0.1114, 'learning_rate': 9.134406263592866e-07, 'epoch': 4.86} +{'loss': 0.1065, 'learning_rate': 2.6098303610265335e-07, 'epoch': 4.96} + Num examples = 15484 + Batch size = 32 +100%|██████████| 4840/4840 [1:51:06<00:00, 1.26sSaving model checkpoint to genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840 +Configuration saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/config.json +{'eval_loss': 0.3555919826030731, 'eval_accuracy': 0.8933092224231465, 'eval_f1': 0.8932781986457534, 'eval_matthews_correlation': 0.7873588707657256, 'eval_precision': 0.8939641189977475, 'eval_recall': 0.8933949574840743, 'eval_runtime': 38.6536, 'eval_samples_per_second': 400.583, 'eval_steps_per_second': 12.521, 'epoch': 5.0} +Model weights saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-3872] due to args.save_total_limit + + +Training completed. Do not forget to share your model on huggingface.co/models =) + + +Loading best model from genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/checkpoint-4840 (score: 0.8932781986457534). +100%|██████████| 4840/4840 [1:51:10<00:00, 1.38s/it] +{'train_runtime': 6670.5444, 'train_samples_per_second': 92.851, 'train_steps_per_second': 0.726, 'train_loss': 0.26582898344875366, 'epoch': 5.0} +***** Running Evaluation ***** + Num examples = 15485 + Batch size = 32 +100%|██████████| 484/484 [00:42<00:00, 11.43it/s] diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..c635309f844bcde6cb3fdce3b1ec77f728055cda --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T20:40:50.775073Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/human_enhancers_ensembl/split", + "--kmer", + "-1", + "--run_name", + "base5120_human_enhancers_ensembl_lr3e-5_wd0.0_wr0.05_ep5_seed42", + "--model_max_length", + "512", + "--per_device_train_batch_size", + "32", + "--per_device_eval_batch_size", + "32", + "--gradient_accumulation_steps", + "4", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.0", + "--num_train_epochs", + "5", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.05", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559593918464" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "qf7352cnlrgreewnt1floz1l6z6ywanr" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..fdc7f3c310c9bc5cf46be30c2a68deac30dd83e8 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/files/wandb-summary.json @@ -0,0 +1 @@ +{"_timestamp":1.774391628374646e+09,"eval/matthews_correlation":0.7873247541592554,"eval/loss":0.35072019696235657,"eval/precision":0.8943883205492502,"train/train_loss":0.26582898344875366,"_wandb":{"runtime":6776},"train/epoch":5,"train/train_runtime":6670.5444,"_step":54,"eval/samples_per_second":364.891,"_runtime":6776,"eval/recall":0.8929377698410588,"train/global_step":4840,"train/total_flos":1.629617788030464e+17,"eval/runtime":42.4373,"eval/steps_per_second":11.405,"eval/f1":0.8931562374251557,"eval/accuracy":0.8933161123668066,"train/train_steps_per_second":0.726,"train/loss":0.1065,"train/learning_rate":2.6098303610265335e-07,"train/train_samples_per_second":92.851} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..724d3fe14a17995caf5aea1a627ec175162963b9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T13:40:50.905529632-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpxpincxrd/port-3012169.txt","pid":3012169,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T13:40:50.907024093-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":3012169} +{"time":"2026-03-24T13:40:50.906950804-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3012169-3012243-3271093011/socket","Net":"unix"}} +{"time":"2026-03-24T13:40:51.088452361-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T13:40:51.151276363-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"qxvbi54v","id":"1(@)"} +{"time":"2026-03-24T13:40:51.52105663-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"qxvbi54v","id":"1(@)"} +{"time":"2026-03-24T15:33:48.383846844-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T15:33:48.383971504-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T15:33:48.384064003-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T15:33:48.384048694-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T15:33:48.384261383-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3012169-3012243-3271093011/socket","Net":"unix"}} +{"time":"2026-03-24T15:33:49.232977991-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T15:33:49.233073471-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T15:33:49.233123751-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..5bb035e8444f61c367bdc5059c24489efcabb610 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T13:40:51.151486782-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T13:40:51.520629442-07:00","level":"INFO","msg":"stream: created new stream","id":"qxvbi54v"} +{"time":"2026-03-24T13:40:51.520876581-07:00","level":"INFO","msg":"handler: started","stream_id":"qxvbi54v"} +{"time":"2026-03-24T13:40:51.52095792-07:00","level":"INFO","msg":"stream: started","id":"qxvbi54v"} +{"time":"2026-03-24T13:40:51.52097271-07:00","level":"INFO","msg":"writer: started","stream_id":"qxvbi54v"} +{"time":"2026-03-24T13:40:51.52099583-07:00","level":"INFO","msg":"sender: started","stream_id":"qxvbi54v"} +{"time":"2026-03-24T15:33:48.384007744-07:00","level":"INFO","msg":"stream: closing","id":"qxvbi54v"} +{"time":"2026-03-24T15:33:48.883501397-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T15:33:49.213882663-07:00","level":"INFO","msg":"handler: closed","stream_id":"qxvbi54v"} +{"time":"2026-03-24T15:33:49.214267911-07:00","level":"INFO","msg":"sender: closed","stream_id":"qxvbi54v"} +{"time":"2026-03-24T15:33:49.214318871-07:00","level":"INFO","msg":"stream: closed","id":"qxvbi54v"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..487eeeb9827bf71460acc10a209e6ee899e68f68 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 13:40:50,779 INFO MainThread:3012169 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 13:40:50,779 INFO MainThread:3012169 [wandb_setup.py:_flush():80] Configure stats pid to 3012169 +2026-03-24 13:40:50,779 INFO MainThread:3012169 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 13:40:50,779 INFO MainThread:3012169 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 13:40:50,779 INFO MainThread:3012169 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 13:40:50,779 INFO MainThread:3012169 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/logs/debug.log +2026-03-24 13:40:50,779 INFO MainThread:3012169 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/logs/debug-internal.log +2026-03-24 13:40:50,779 INFO MainThread:3012169 [wandb_init.py:init():841] calling init triggers +2026-03-24 13:40:50,779 INFO MainThread:3012169 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 13:40:50,779 INFO MainThread:3012169 [wandb_init.py:init():889] starting backend +2026-03-24 13:40:51,088 INFO MainThread:3012169 [wandb_init.py:init():892] sending inform_init request +2026-03-24 13:40:51,149 INFO MainThread:3012169 [wandb_init.py:init():900] backend started and connected +2026-03-24 13:40:51,153 INFO MainThread:3012169 [wandb_init.py:init():970] updated telemetry +2026-03-24 13:40:51,154 INFO MainThread:3012169 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 13:40:52,083 INFO MainThread:3012169 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 13:40:52,233 INFO MainThread:3012169 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 13:40:52,234 INFO MainThread:3012169 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 13:40:52,234 INFO MainThread:3012169 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 13:40:52,234 INFO MainThread:3012169 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 13:40:52,239 INFO MainThread:3012169 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 13:41:55,388 INFO MainThread:3012169 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.05, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/human_enhancers_ensembl/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.05_ep5_seed42/runs/Mar24_13-40-50_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_human_enhancers_ensembl_lr3e-5_wd0.0_wr0.05_ep5_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 512, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 15:33:48,384 INFO wandb-AsyncioManager-main:3012169 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 15:33:48,385 INFO wandb-AsyncioManager-main:3012169 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/run-qxvbi54v.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/run-qxvbi54v.wandb new file mode 100644 index 0000000000000000000000000000000000000000..846f9d0a0c892d9803660376db03fd5d2caaeb73 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_134050-qxvbi54v/run-qxvbi54v.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f516bc301543f695d2e6334da4fd23e136a49ab8ba3ab2cd08a68f74c64efa +size 2358207 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/config.yaml b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..887a65b911d831af90cb93e2176cc999a06138d7 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/config.yaml @@ -0,0 +1,561 @@ +_name_or_path: + value: /data/nanhuang/Nan/models/DNAbert2_Pretrained +_wandb: + value: + cli_version: 0.23.1 + e: + sm11qqu4zjzbzdqotiug93qezxowvkhj: + args: + - --model_name_or_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained + - --tokenizer_path + - /data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json + - --trust_remote_code + - "True" + - --data_path + - /data/nanhuang/Nan/ft_data/human_ensembl_regulatory/split + - --kmer + - "-1" + - --run_name + - base5120_human_ensembl_regulatory_lr3e-5_wd0.0_wr0.03_ep5_seed42 + - --model_max_length + - "250" + - --per_device_train_batch_size + - "32" + - --per_device_eval_batch_size + - "32" + - --gradient_accumulation_steps + - "4" + - --learning_rate + - "3e-5" + - --weight_decay + - "0.0" + - --num_train_epochs + - "5" + - --lr_scheduler_type + - linear + - --warmup_steps + - "0" + - --warmup_ratio + - "0.03" + - --fp16 + - --output_dir + - genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42 + - --evaluation_strategy + - epoch + - --save_strategy + - epoch + - --load_best_model_at_end + - "True" + - --metric_for_best_model + - eval_f1 + - --greater_is_better + - "True" + - --save_total_limit + - "1" + - --logging_steps + - "100" + - --overwrite_output_dir + - "True" + - --log_level + - info + - --seed + - "42" + - --find_unused_parameters + - "False" + - --project_name + - genomic_bench_DNAbert2 + codePath: train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.4" + disk: + /: + total: "3768964489216" + used: "3559228731392" + email: n5huang@ucsd.edu + executable: /data/nanhuang/miniconda3/envs/bpe_v2/bin/python + gpu: NVIDIA RTX A6000 + gpu_count: 8 + gpu_nvidia: + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-cf8724bd-d619-7916-ee26-88d517a20c47 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2 + - architecture: Ampere + cudaCores: 10752 + memoryTotal: "51527024640" + name: NVIDIA RTX A6000 + uuid: GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81 + host: u112222 + memory: + total: "1082030182400" + os: Linux-5.15.0-126-generic-x86_64-with-glibc2.35 + program: /data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py + python: CPython 3.9.18 + root: /data/nanhuang/Nan/Finetune-GenomicBenchmarks + startedAt: "2026-03-24T22:34:01.639155Z" + writerId: sm11qqu4zjzbzdqotiug93qezxowvkhj + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.9.18 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 71 + "3": + - 7 + - 66 + "4": 3.9.18 + "5": 0.23.1 + "6": 4.35.2 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +bad_words_ids: + value: null +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +cache_dir: + value: null +checkpointing: + value: false +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_pin_memory: + value: false +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +dispatch_batches: + value: null +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +eval_accumulation_steps: + value: null +eval_and_save_results: + value: true +eval_delay: + value: 0 +eval_steps: + value: 100 +evaluation_strategy: + value: epoch +exponential_decay_length_penalty: + value: null +find_unused_parameters: + value: false +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: true +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 4 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: false +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 + "2": LABEL_2 +ignore_data_skip: + value: false +include_inputs_for_metrics: + value: false +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 + LABEL_2: 2 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 3e-05 +length_column_name: + value: length +length_penalty: + value: 1 +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: info +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/runs/Mar24_15-34-01_u112222 +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 100 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 512 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: eval_f1 +min_length: + value: 0 +model_max_length: + value: 250 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 5 +optim: + value: adamw_torch +optim_args: + value: null +output_attentions: + value: false +output_dir: + value: genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 32 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project_name: + value: genomic_bench_DNAbert2 +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - wandb +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: base5120_human_ensembl_regulatory_lr3e-5_wd0.0_wr0.03_ep5_seed42 +save_model: + value: false +save_on_each_node: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: epoch +save_total_limit: + value: 1 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +split_batches: + value: false +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: float32 +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +transformers_version: + value: 4.35.2 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_mps_device: + value: false +vocab_file: + value: null +vocab_size: + value: 4096 +warmup_ratio: + value: 0.03 +warmup_steps: + value: 0 +weight_decay: + value: 0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/output.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..87a70cd7f833267e8f30c1512d2fbda236f7e942 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/output.log @@ -0,0 +1,168 @@ +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +WARNING:root:Perform single sequence classification... +Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /data/nanhuang/Nan/models/DNAbert2_Pretrained and are newly initialized: ['classifier.bias', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/nanhuang/miniconda3/envs/bpe_v2/lib/python3.9/site-packages/accelerate/accelerator.py:439: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + self.scaler = torch.cuda.amp.GradScaler(**kwargs) +Using auto half precision backend +***** Running training ***** + Num examples = 231,248 + Num Epochs = 5 + Instantaneous batch size per device = 32 + Total train batch size (w. parallel, distributed & accumulation) = 128 + Gradient Accumulation steps = 4 + Total optimization steps = 9,030 + Number of trainable parameters = 89,189,379 +Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 20%|██ | 1806/9030 [11:27<45:46, 2.63it/s]***** Running Evaluation ***** +{'loss': 0.9622, 'learning_rate': 1.107011070110701e-05, 'epoch': 0.06} +{'loss': 0.7009, 'learning_rate': 2.214022140221402e-05, 'epoch': 0.11} +{'loss': 0.5302, 'learning_rate': 2.99006735928759e-05, 'epoch': 0.17} +{'loss': 0.4629, 'learning_rate': 2.9558168740723828e-05, 'epoch': 0.22} +{'loss': 0.4315, 'learning_rate': 2.9215663888571756e-05, 'epoch': 0.28} +{'loss': 0.3898, 'learning_rate': 2.887315903641968e-05, 'epoch': 0.33} +{'loss': 0.3906, 'learning_rate': 2.853065418426761e-05, 'epoch': 0.39} +{'loss': 0.3697, 'learning_rate': 2.818814933211554e-05, 'epoch': 0.44} +{'loss': 0.3494, 'learning_rate': 2.7845644479963468e-05, 'epoch': 0.5} +{'loss': 0.3635, 'learning_rate': 2.7503139627811396e-05, 'epoch': 0.55} +{'loss': 0.3492, 'learning_rate': 2.7160634775659324e-05, 'epoch': 0.61} +{'loss': 0.3424, 'learning_rate': 2.6818129923507252e-05, 'epoch': 0.66} +{'loss': 0.3311, 'learning_rate': 2.647562507135518e-05, 'epoch': 0.72} +{'loss': 0.3341, 'learning_rate': 2.6133120219203104e-05, 'epoch': 0.77} +{'loss': 0.3222, 'learning_rate': 2.5790615367051032e-05, 'epoch': 0.83} +{'loss': 0.3196, 'learning_rate': 2.544811051489896e-05, 'epoch': 0.89} +{'loss': 0.3258, 'learning_rate': 2.510560566274689e-05, 'epoch': 0.94} +{'loss': 0.3128, 'learning_rate': 2.4763100810594816e-05, 'epoch': 1.0} + Num examples = 28906 + Batch size = 32 + 20%|██ | 1806/9030 [11:54<45:46, 2.63it/Saving model checkpoint to genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-1806 +Configuration saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-1806/config.json +{'eval_loss': 0.30727851390838623, 'eval_accuracy': 0.8786757074655781, 'eval_f1': 0.8800256036767294, 'eval_matthews_correlation': 0.8180828481018108, 'eval_precision': 0.8840421410896541, 'eval_recall': 0.8781635253112077, 'eval_runtime': 26.4413, 'eval_samples_per_second': 1093.212, 'eval_steps_per_second': 34.189, 'epoch': 1.0} +Model weights saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-1806/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-1806/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-1806/special_tokens_map.json + 40%|████ | 3613/9030 [23:33<34:18, 2.63it/s]***** Running Evaluation ***** +{'loss': 0.3041, 'learning_rate': 2.4420595958442745e-05, 'epoch': 1.05} +{'loss': 0.3024, 'learning_rate': 2.4078091106290673e-05, 'epoch': 1.11} +{'loss': 0.3024, 'learning_rate': 2.37355862541386e-05, 'epoch': 1.16} +{'loss': 0.3037, 'learning_rate': 2.339308140198653e-05, 'epoch': 1.22} +{'loss': 0.3018, 'learning_rate': 2.3050576549834457e-05, 'epoch': 1.27} +{'loss': 0.3007, 'learning_rate': 2.2708071697682385e-05, 'epoch': 1.33} +{'loss': 0.2866, 'learning_rate': 2.2365566845530313e-05, 'epoch': 1.38} +{'loss': 0.2889, 'learning_rate': 2.202306199337824e-05, 'epoch': 1.44} +{'loss': 0.2937, 'learning_rate': 2.168055714122617e-05, 'epoch': 1.49} +{'loss': 0.2936, 'learning_rate': 2.1338052289074097e-05, 'epoch': 1.55} +{'loss': 0.2854, 'learning_rate': 2.0995547436922025e-05, 'epoch': 1.61} +{'loss': 0.2824, 'learning_rate': 2.0653042584769953e-05, 'epoch': 1.66} +{'loss': 0.2796, 'learning_rate': 2.0310537732617877e-05, 'epoch': 1.72} +{'loss': 0.2862, 'learning_rate': 1.9968032880465805e-05, 'epoch': 1.77} +{'loss': 0.2718, 'learning_rate': 1.9625528028313733e-05, 'epoch': 1.83} +{'loss': 0.2792, 'learning_rate': 1.928302317616166e-05, 'epoch': 1.88} +{'loss': 0.2864, 'learning_rate': 1.894051832400959e-05, 'epoch': 1.94} +{'loss': 0.2852, 'learning_rate': 1.859801347185752e-05, 'epoch': 1.99} + Num examples = 28906 + Batch size = 32 + 40%|████ | 3613/9030 [23:59<34:18, 2.63it/Saving model checkpoint to genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-3613 +Configuration saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-3613/config.json +{'eval_loss': 0.27124258875846863, 'eval_accuracy': 0.8898844530547291, 'eval_f1': 0.8911132938895318, 'eval_matthews_correlation': 0.8365873619656282, 'eval_precision': 0.8999899065715472, 'eval_recall': 0.8877519423163003, 'eval_runtime': 26.2697, 'eval_samples_per_second': 1100.356, 'eval_steps_per_second': 34.412, 'epoch': 2.0} +Model weights saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-3613/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-3613/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-3613/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-1806] due to args.save_total_limit + 60%|██████ | 5420/9030 [35:32<23:30, 2.56it/s]***** Running Evaluation ***** +{'loss': 0.2613, 'learning_rate': 1.825550861970545e-05, 'epoch': 2.05} +{'loss': 0.2573, 'learning_rate': 1.7913003767553377e-05, 'epoch': 2.1} +{'loss': 0.2622, 'learning_rate': 1.75704989154013e-05, 'epoch': 2.16} +{'loss': 0.25, 'learning_rate': 1.722799406324923e-05, 'epoch': 2.21} +{'loss': 0.256, 'learning_rate': 1.6885489211097158e-05, 'epoch': 2.27} +{'loss': 0.2546, 'learning_rate': 1.6542984358945086e-05, 'epoch': 2.32} +{'loss': 0.2561, 'learning_rate': 1.6200479506793014e-05, 'epoch': 2.38} +{'loss': 0.267, 'learning_rate': 1.585797465464094e-05, 'epoch': 2.44} +{'loss': 0.2591, 'learning_rate': 1.551546980248887e-05, 'epoch': 2.49} +{'loss': 0.2566, 'learning_rate': 1.5172964950336796e-05, 'epoch': 2.55} +{'loss': 0.2609, 'learning_rate': 1.4830460098184726e-05, 'epoch': 2.6} +{'loss': 0.2562, 'learning_rate': 1.4487955246032654e-05, 'epoch': 2.66} +{'loss': 0.2495, 'learning_rate': 1.414545039388058e-05, 'epoch': 2.71} +{'loss': 0.2497, 'learning_rate': 1.3802945541728508e-05, 'epoch': 2.77} +{'loss': 0.2578, 'learning_rate': 1.3460440689576436e-05, 'epoch': 2.82} +{'loss': 0.256, 'learning_rate': 1.3117935837424364e-05, 'epoch': 2.88} +{'loss': 0.2459, 'learning_rate': 1.277543098527229e-05, 'epoch': 2.93} +{'loss': 0.2565, 'learning_rate': 1.2432926133120218e-05, 'epoch': 2.99} + Num examples = 28906 + Batch size = 32 + 60%|██████ | 5420/9030 [35:58<23:30, 2.56it/Saving model checkpoint to genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-5420 +Configuration saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-5420/config.json +{'eval_loss': 0.27161338925361633, 'eval_accuracy': 0.8888120113471252, 'eval_f1': 0.889180642434028, 'eval_matthews_correlation': 0.8362364396798708, 'eval_precision': 0.8995711757292421, 'eval_recall': 0.886977935999356, 'eval_runtime': 26.4679, 'eval_samples_per_second': 1092.114, 'eval_steps_per_second': 34.155, 'epoch': 3.0} +Model weights saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-5420/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-5420/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-5420/special_tokens_map.json + 80%|████████ | 7227/9030 [47:29<11:43, 2.56it/s]***** Running Evaluation ***** +{'loss': 0.2428, 'learning_rate': 1.2090421280968148e-05, 'epoch': 3.04} +{'loss': 0.2235, 'learning_rate': 1.1747916428816076e-05, 'epoch': 3.1} +{'loss': 0.2317, 'learning_rate': 1.1405411576664002e-05, 'epoch': 3.15} +{'loss': 0.2369, 'learning_rate': 1.106290672451193e-05, 'epoch': 3.21} +{'loss': 0.2346, 'learning_rate': 1.0720401872359858e-05, 'epoch': 3.27} +{'loss': 0.234, 'learning_rate': 1.0377897020207786e-05, 'epoch': 3.32} +{'loss': 0.2289, 'learning_rate': 1.0038817216577235e-05, 'epoch': 3.38} +{'loss': 0.2316, 'learning_rate': 9.696312364425162e-06, 'epoch': 3.43} +{'loss': 0.2293, 'learning_rate': 9.353807512273091e-06, 'epoch': 3.49} +{'loss': 0.2288, 'learning_rate': 9.01130266012102e-06, 'epoch': 3.54} +{'loss': 0.2191, 'learning_rate': 8.668797807968947e-06, 'epoch': 3.6} +{'loss': 0.2274, 'learning_rate': 8.326292955816874e-06, 'epoch': 3.65} +{'loss': 0.2249, 'learning_rate': 7.983788103664802e-06, 'epoch': 3.71} +{'loss': 0.224, 'learning_rate': 7.64128325151273e-06, 'epoch': 3.76} +{'loss': 0.2292, 'learning_rate': 7.298778399360658e-06, 'epoch': 3.82} +{'loss': 0.2235, 'learning_rate': 6.956273547208586e-06, 'epoch': 3.87} +{'loss': 0.2267, 'learning_rate': 6.613768695056513e-06, 'epoch': 3.93} +{'loss': 0.2097, 'learning_rate': 6.271263842904442e-06, 'epoch': 3.99} + Num examples = 28906 + Batch size = 32 + 80%|████████ | 7227/9030 [47:55<11:43, 2.56it/Saving model checkpoint to genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-7227 +Configuration saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-7227/config.json +{'eval_loss': 0.28062257170677185, 'eval_accuracy': 0.8907147305057773, 'eval_f1': 0.8926286555871359, 'eval_matthews_correlation': 0.8357783119636794, 'eval_precision': 0.8957037127875959, 'eval_recall': 0.8907174550142468, 'eval_runtime': 26.3462, 'eval_samples_per_second': 1097.162, 'eval_steps_per_second': 34.312, 'epoch': 4.0} +Model weights saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-7227/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-7227/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-7227/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-3613] due to args.save_total_limit +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-5420] due to args.save_total_limit +100%|██████████| 9030/9030 [59:25<00:00, 2.64it/s]***** Running Evaluation ***** +{'loss': 0.1994, 'learning_rate': 5.928758990752369e-06, 'epoch': 4.04} +{'loss': 0.2056, 'learning_rate': 5.586254138600297e-06, 'epoch': 4.1} +{'loss': 0.2071, 'learning_rate': 5.243749286448224e-06, 'epoch': 4.15} +{'loss': 0.2004, 'learning_rate': 4.901244434296153e-06, 'epoch': 4.21} +{'loss': 0.199, 'learning_rate': 4.55873958214408e-06, 'epoch': 4.26} +{'loss': 0.2007, 'learning_rate': 4.216234729992008e-06, 'epoch': 4.32} +{'loss': 0.1982, 'learning_rate': 3.873729877839935e-06, 'epoch': 4.37} +{'loss': 0.2004, 'learning_rate': 3.5312250256878642e-06, 'epoch': 4.43} +{'loss': 0.1988, 'learning_rate': 3.188720173535792e-06, 'epoch': 4.48} +{'loss': 0.1952, 'learning_rate': 2.84621532138372e-06, 'epoch': 4.54} +{'loss': 0.1938, 'learning_rate': 2.5037104692316474e-06, 'epoch': 4.59} +{'loss': 0.1969, 'learning_rate': 2.1612056170795755e-06, 'epoch': 4.65} +{'loss': 0.2002, 'learning_rate': 1.818700764927503e-06, 'epoch': 4.7} +{'loss': 0.1922, 'learning_rate': 1.476195912775431e-06, 'epoch': 4.76} +{'loss': 0.1959, 'learning_rate': 1.1336910606233589e-06, 'epoch': 4.82} +{'loss': 0.1964, 'learning_rate': 7.911862084712867e-07, 'epoch': 4.87} +{'loss': 0.1941, 'learning_rate': 4.4868135631921454e-07, 'epoch': 4.93} +{'loss': 0.1984, 'learning_rate': 1.0617650416714236e-07, 'epoch': 4.98} + Num examples = 28906 + Batch size = 32 +100%|██████████| 9030/9030 [59:51<00:00, 2.64it/Saving model checkpoint to genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030 +Configuration saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/config.json +{'eval_loss': 0.2620416581630707, 'eval_accuracy': 0.8981526326714177, 'eval_f1': 0.8996340973968899, 'eval_matthews_correlation': 0.8478487817413687, 'eval_precision': 0.9051439118111951, 'eval_recall': 0.8970906218119197, 'eval_runtime': 26.209, 'eval_samples_per_second': 1102.904, 'eval_steps_per_second': 34.492, 'epoch': 5.0} +Model weights saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/pytorch_model.bin +tokenizer config file saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/tokenizer_config.json +Special tokens file saved in genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030/special_tokens_map.json +Deleting older checkpoint [genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-7227] due to args.save_total_limit + + +Training completed. Do not forget to share your model on huggingface.co/models =) + + +Loading best model from genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/checkpoint-9030 (score: 0.8996340973968899). +100%|██████████| 9030/9030 [59:56<00:00, 2.51it/s] +{'train_runtime': 3596.1083, 'train_samples_per_second': 321.525, 'train_steps_per_second': 2.511, 'train_loss': 0.2788114057162803, 'epoch': 5.0} +***** Running Evaluation ***** + Num examples = 28907 + Batch size = 32 +100%|██████████| 904/904 [00:26<00:00, 34.74it/s] diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/requirements.txt b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d18d32ec4648cd530877d7c8c4758d5e887b9c --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/requirements.txt @@ -0,0 +1,144 @@ +scipy==1.13.1 +jupyter_core==5.8.1 +smmap==5.0.2 +yarl==1.22.0 +executing==2.2.0 +cffi==2.0.0 +mkl_random==1.2.8 +traitlets==5.14.3 +wandb==0.23.1 +annotated-types==0.7.0 +evaluate==0.4.6 +kiwisolver==1.4.4 +Jinja2==3.1.6 +pyparsing==3.2.0 +mpmath==1.3.0 +debugpy==1.8.16 +nvidia-cuda-nvrtc-cu12==12.8.93 +docker-pycreds==0.4.0 +pycparser==2.23 +anyio==4.12.0 +safetensors==0.7.0 +matplotlib-inline==0.1.7 +Pygments==2.19.2 +numpy==2.0.2 +nvidia-cuda-cupti-cu12==12.8.90 +Bottleneck==1.4.2 +matplotlib==3.9.2 +numexpr==2.10.1 +sip==6.7.12 +aiohappyeyeballs==2.6.1 +ptyprocess==0.7.0 +fsspec==2025.7.0 +accelerate==0.25.0 +zipp==3.23.0 +PyQt5_sip==12.13.0 +pure_eval==0.2.3 +regex==2025.11.3 +aiosignal==1.4.0 +certifi==2025.10.5 +transformers==4.35.2 +mkl-service==2.4.0 +httpx==0.28.1 +mkl_fft==1.3.11 +pickleshare==0.7.5 +ipykernel==6.30.1 +nvidia-nvtx-cu12==12.8.90 +nvidia-cufft-cu12==11.3.3.83 +triton==3.4.0 +numba==0.60.0 +psutil==7.0.0 +contourpy==1.2.1 +PyQt5==5.15.10 +packaging==25.0 +datasets==4.4.1 +ipython==8.18.1 +sympy==1.14.0 +nvidia-cusolver-cu12==11.7.3.90 +multidict==6.7.0 +jupyter_client==8.6.3 +setuptools==80.9.0 +prompt_toolkit==3.0.51 +six==1.17.0 +GitPython==3.1.45 +pydantic==2.11.7 +nvidia-cublas-cu12==12.8.4.1 +aiohttp==3.13.2 +tzdata==2025.2 +importlib_metadata==8.7.0 +biopython==1.85 +httpcore==1.0.9 +python-dateutil==2.9.0.post0 +llvmlite==0.43.0 +pandas==2.3.3 +scikit-learn==1.6.1 +asttokens==3.0.0 +joblib==1.5.3 +h11==0.16.0 +charset-normalizer==3.4.4 +pyzmq==27.0.2 +multiprocess==0.70.18 +nvidia-nvjitlink-cu12==12.8.93 +sentry-sdk==2.35.0 +pytz==2025.2 +pydantic_core==2.33.2 +MarkupSafe==3.0.3 +brotlicffi==1.0.9.2 +stack_data==0.6.3 +tqdm==4.67.1 +pynndescent==0.5.13 +importlib_resources==6.5.2 +ply==3.11 +pyarrow==21.0.0 +tokenizers==0.15.2 +exceptiongroup==1.3.1 +nvidia-cusparse-cu12==12.5.8.93 +comm==0.2.3 +pillow==11.3.0 +nvidia-cusparselt-cu12==0.7.1 +protobuf==3.20.3 +urllib3==2.5.0 +wheel==0.45.1 +wcwidth==0.2.13 +appdirs==1.4.4 +PySocks==1.7.1 +PyQt6_sip==13.10.2 +umap-learn==0.5.9.post2 +attrs==25.4.0 +platformdirs==4.3.8 +nvidia-cuda-runtime-cu12==12.8.90 +typing-inspection==0.4.1 +huggingface_hub==0.34.4 +decorator==5.2.1 +filelock==3.17.0 +nvidia-nccl-cu12==2.27.3 +fonttools==4.60.1 +xxhash==3.6.0 +dill==0.4.0 +threadpoolctl==3.6.0 +parso==0.8.4 +pysam==0.9.1 +frozenlist==1.8.0 +typing_extensions==4.15.0 +propcache==0.4.1 +tomli==2.2.1 +click==8.1.8 +nvidia-cudnn-cu12==9.10.2.21 +gitdb==4.0.12 +pip==25.3 +tornado==6.5.2 +networkx==3.2.1 +jedi==0.19.2 +idna==3.11 +pexpect==4.9.0 +async-timeout==5.0.1 +hf-xet==1.1.8 +nvidia-curand-cu12==10.3.9.90 +PyYAML==6.0.2 +nvidia-cufile-cu12==1.13.1.3 +setproctitle==1.3.6 +eval_type_backport==0.2.2 +requests==2.32.5 +nest-asyncio==1.6.0 +torch==2.8.0 +cycler==0.11.0 diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/wandb-metadata.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..e31ea9c40e2b37cada7caa884f166e29185794ae --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/wandb-metadata.json @@ -0,0 +1,146 @@ +{ + "os": "Linux-5.15.0-126-generic-x86_64-with-glibc2.35", + "python": "CPython 3.9.18", + "startedAt": "2026-03-24T22:34:01.639155Z", + "args": [ + "--model_name_or_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained", + "--tokenizer_path", + "/data/nanhuang/Nan/models/DNAbert2_Pretrained/tokenizer.json", + "--trust_remote_code", + "True", + "--data_path", + "/data/nanhuang/Nan/ft_data/human_ensembl_regulatory/split", + "--kmer", + "-1", + "--run_name", + "base5120_human_ensembl_regulatory_lr3e-5_wd0.0_wr0.03_ep5_seed42", + "--model_max_length", + "250", + "--per_device_train_batch_size", + "32", + "--per_device_eval_batch_size", + "32", + "--gradient_accumulation_steps", + "4", + "--learning_rate", + "3e-5", + "--weight_decay", + "0.0", + "--num_train_epochs", + "5", + "--lr_scheduler_type", + "linear", + "--warmup_steps", + "0", + "--warmup_ratio", + "0.03", + "--fp16", + "--output_dir", + "genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42", + "--evaluation_strategy", + "epoch", + "--save_strategy", + "epoch", + "--load_best_model_at_end", + "True", + "--metric_for_best_model", + "eval_f1", + "--greater_is_better", + "True", + "--save_total_limit", + "1", + "--logging_steps", + "100", + "--overwrite_output_dir", + "True", + "--log_level", + "info", + "--seed", + "42", + "--find_unused_parameters", + "False", + "--project_name", + "genomic_bench_DNAbert2" + ], + "program": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks/train.py", + "codePath": "train.py", + "codePathLocal": "train.py", + "email": "n5huang@ucsd.edu", + "root": "/data/nanhuang/Nan/Finetune-GenomicBenchmarks", + "host": "u112222", + "executable": "/data/nanhuang/miniconda3/envs/bpe_v2/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "3768964489216", + "used": "3559228731392" + } + }, + "memory": { + "total": "1082030182400" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-5a3d8a94-f380-da39-63d2-4cae98f5c2ae" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-cf8724bd-d619-7916-ee26-88d517a20c47" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-48b494ab-4a63-ff4c-5cc8-746af5d27310" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-968c7ea7-97bf-416a-7689-72c141cfc2bb" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-d53c626b-860f-1dec-1cfa-1dfcde78bc88" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-caa40ec7-afcb-5fe0-c53a-85eb54152941" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-18ee7a7f-1bbe-edef-c72c-3abed60917b2" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere", + "uuid": "GPU-a8757d5a-c26e-48c6-a704-dfe62167fc81" + } + ], + "cudaVersion": "12.4", + "writerId": "sm11qqu4zjzbzdqotiug93qezxowvkhj" +} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/wandb-summary.json b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..41a4601ecf38b79faead086cbd5956b8d4488697 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/files/wandb-summary.json @@ -0,0 +1 @@ +{"eval/loss":0.25783056020736694,"train/total_flos":1.0512838028784269e+17,"eval/accuracy":0.8992285605562667,"eval/f1":0.9006401658314979,"train/train_runtime":3596.1083,"_timestamp":1.7743953955222092e+09,"train/loss":0.1984,"train/train_samples_per_second":321.525,"train/global_step":9030,"eval/recall":0.8980398114829752,"train/train_steps_per_second":2.511,"train/epoch":5,"train/learning_rate":1.0617650416714236e-07,"_runtime":3752,"eval/samples_per_second":1109.452,"_wandb":{"runtime":3752},"train/train_loss":0.2788114057162803,"_step":96,"eval/matthews_correlation":0.8490984535285055,"eval/steps_per_second":34.696,"eval/runtime":26.0552,"eval/precision":0.9055273184685374} \ No newline at end of file diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug-core.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..0b0e19e082b93b9bda3b4bef742ed196e840f18d --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2026-03-24T15:34:01.859251882-07:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmphuo4_f0x/port-3456373.txt","pid":3456373,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-03-24T15:34:01.865943804-07:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":3456373} +{"time":"2026-03-24T15:34:01.865910244-07:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3456373-3457043-2622635256/socket","Net":"unix"}} +{"time":"2026-03-24T15:34:02.029182108-07:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-03-24T15:34:02.137669365-07:00","level":"INFO","msg":"handleInformInit: received","streamId":"oww9zr78","id":"1(@)"} +{"time":"2026-03-24T15:34:02.619938967-07:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"oww9zr78","id":"1(@)"} +{"time":"2026-03-24T16:36:35.530411192-07:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-03-24T16:36:35.530639722-07:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-03-24T16:36:35.530635772-07:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-03-24T16:36:35.530864391-07:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-03-24T16:36:35.531025571-07:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3456373-3457043-2622635256/socket","Net":"unix"}} +{"time":"2026-03-24T16:36:52.148126685-07:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-03-24T16:36:52.148213245-07:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-03-24T16:36:52.148270195-07:00","level":"INFO","msg":"server is closed"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug-internal.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c63637f7a5514a3bf5bc320e1d1ec861a78452b9 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-03-24T15:34:02.137925903-07:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2026-03-24T15:34:02.619476859-07:00","level":"INFO","msg":"stream: created new stream","id":"oww9zr78"} +{"time":"2026-03-24T15:34:02.619722338-07:00","level":"INFO","msg":"handler: started","stream_id":"oww9zr78"} +{"time":"2026-03-24T15:34:02.619835137-07:00","level":"INFO","msg":"stream: started","id":"oww9zr78"} +{"time":"2026-03-24T15:34:02.619904927-07:00","level":"INFO","msg":"sender: started","stream_id":"oww9zr78"} +{"time":"2026-03-24T15:34:02.619950787-07:00","level":"INFO","msg":"writer: started","stream_id":"oww9zr78"} +{"time":"2026-03-24T16:36:35.530641442-07:00","level":"INFO","msg":"stream: closing","id":"oww9zr78"} +{"time":"2026-03-24T16:36:36.04031333-07:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-03-24T16:36:52.13435371-07:00","level":"INFO","msg":"handler: closed","stream_id":"oww9zr78"} +{"time":"2026-03-24T16:36:52.134643249-07:00","level":"INFO","msg":"sender: closed","stream_id":"oww9zr78"} +{"time":"2026-03-24T16:36:52.134701719-07:00","level":"INFO","msg":"stream: closed","id":"oww9zr78"} diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug.log b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c00e28eb09a0465fc930d44c34c6ead5af06b7ac --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug.log @@ -0,0 +1,24 @@ +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_setup.py:_flush():80] Configure stats pid to 3456373 +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_setup.py:_flush():80] Loading settings from /home/nanhuang/.config/wandb/settings +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_setup.py:_flush():80] Loading settings from /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/settings +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_setup.py:_flush():80] Loading settings from environment variables +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug.log +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /data/nanhuang/Nan/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/logs/debug-internal.log +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_init.py:init():841] calling init triggers +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2026-03-24 15:34:01,646 INFO MainThread:3456373 [wandb_init.py:init():889] starting backend +2026-03-24 15:34:02,030 INFO MainThread:3456373 [wandb_init.py:init():892] sending inform_init request +2026-03-24 15:34:02,134 INFO MainThread:3456373 [wandb_init.py:init():900] backend started and connected +2026-03-24 15:34:02,149 INFO MainThread:3456373 [wandb_init.py:init():970] updated telemetry +2026-03-24 15:34:02,152 INFO MainThread:3456373 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2026-03-24 15:34:03,352 INFO MainThread:3456373 [wandb_init.py:init():1041] starting run threads in backend +2026-03-24 15:34:03,705 INFO MainThread:3456373 [wandb_run.py:_console_start():2521] atexit reg +2026-03-24 15:34:03,705 INFO MainThread:3456373 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2026-03-24 15:34:03,705 INFO MainThread:3456373 [wandb_run.py:_redirect():2438] Wrapping output streams. +2026-03-24 15:34:03,706 INFO MainThread:3456373 [wandb_run.py:_redirect():2461] Redirects installed. +2026-03-24 15:34:03,713 INFO MainThread:3456373 [wandb_init.py:init():1081] run started, returning control to user process +2026-03-24 15:36:13,359 INFO MainThread:3456373 [wandb_run.py:_config_callback():1396] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 512, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/data/nanhuang/Nan/models/DNAbert2_Pretrained', 'transformers_version': '4.35.2', 'model_type': 'bert', 'vocab_size': 4096, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': 'genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'genomic_bench_DNAbert2_output/human_ensembl_regulatory/DNAbert2_Pretrained/lr3e-5_wd0.0_wr0.03_ep5_seed42/runs/Mar24_15-34-01_u112222', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': 'base5120_human_ensembl_regulatory_lr3e-5_wd0.0_wr0.03_ep5_seed42', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'neftune_noise_alpha': None, 'vocab_file': None, 'cache_dir': None, 'model_max_length': 250, 'find_unused_parameters': False, 'checkpointing': False, 'eval_and_save_results': True, 'save_model': False, 'project_name': 'genomic_bench_DNAbert2'} +2026-03-24 16:36:35,531 INFO wandb-AsyncioManager-main:3456373 [service_client.py:_forward_responses():80] Reached EOF. +2026-03-24 16:36:35,531 INFO wandb-AsyncioManager-main:3456373 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/run-oww9zr78.wandb b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/run-oww9zr78.wandb new file mode 100644 index 0000000000000000000000000000000000000000..c281caa670ebf6f0256ad108125e296d7cf98e06 --- /dev/null +++ b/Finetune-GenomicBenchmarks/wandb/run-20260324_153401-oww9zr78/run-oww9zr78.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a1b320f13d18a2cb9693cdedc4221977cdcb4f9d0143290156060122844bfa +size 2205115