natmin322 commited on
Commit
8b682f9
·
1 Parent(s): d1be546
improve_gainlora/setup_kaggle_colab.sh CHANGED
@@ -116,6 +116,19 @@ echo "[Cache] Clearing stale HuggingFace dataset module cache..."
116
  rm -rf ~/.cache/huggingface/modules/datasets_modules/ 2>/dev/null || true
117
  echo "[Cache] HF dataset module cache cleared"
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  echo ""
120
  echo "[Check] Verifying installation..."
121
  ${PY_CMD} - <<'PY'
 
116
  rm -rf ~/.cache/huggingface/modules/datasets_modules/ 2>/dev/null || true
117
  echo "[Cache] HF dataset module cache cleared"
118
 
119
+ echo "[Symlink] Creating config directory aliases..."
120
+ # assets.py references configs/Long_Sequence/* and configs/SuperNI/*
121
+ # but actual dirs are configs/gen_script_long_order3_t5_configs/* etc.
122
+ if [ -d configs/gen_script_long_order3_t5_configs ] && [ ! -e configs/Long_Sequence ]; then
123
+ ln -s gen_script_long_order3_t5_configs configs/Long_Sequence
124
+ echo " Created symlink: configs/Long_Sequence -> gen_script_long_order3_t5_configs"
125
+ fi
126
+ if [ -d configs/gen_script_superni_order1_t5_configs ] && [ ! -e configs/SuperNI ]; then
127
+ ln -s gen_script_superni_order1_t5_configs configs/SuperNI
128
+ echo " Created symlink: configs/SuperNI -> gen_script_superni_order1_t5_configs"
129
+ fi
130
+
131
+
132
  echo ""
133
  echo "[Check] Verifying installation..."
134
  ${PY_CMD} - <<'PY'
improve_gainlora/src/assets.py CHANGED
@@ -18,21 +18,21 @@ task_config = {
18
  "task1290_xsum_summarization": "configs/SuperNI/task1290_xsum_summarization",
19
  "task073_commonsenseqa_answer_generation": "configs/SuperNI/task073_commonsenseqa_answer_generation",
20
  "task363_sst2_polarity_classification": "configs/SuperNI/task363_sst2_polarity_classification",
21
- "dbpedia": "configs/Long_Sequence/dbpedia",
22
- "amazon": "configs/Long_Sequence/amazon",
23
- "agnews": "configs/Long_Sequence/agnews",
24
- "yahoo": "configs/Long_Sequence/yahoo",
25
- "yelp": "configs/Long_Sequence/yelp",
26
- "copa": "configs/Long_Sequence/copa",
27
- "mnli": "configs/Long_Sequence/mnli",
28
- "cb": "configs/Long_Sequence/cb",
29
- "imdb": "configs/Long_Sequence/imdb",
30
- "multirc": "configs/Long_Sequence/multirc",
31
- "sst2": "configs/Long_Sequence/sst2",
32
- "boolq": "configs/Long_Sequence/boolq",
33
- "rte": "configs/Long_Sequence/rte",
34
- "wic": "configs/Long_Sequence/wic",
35
- "qqp": "configs/Long_Sequence/qqp",
36
  }
37
 
38
  def lora_state_dict_A(model: nn.Module, bias: str = 'none', task_name=None) -> Dict[str, torch.Tensor]:
 
18
  "task1290_xsum_summarization": "configs/SuperNI/task1290_xsum_summarization",
19
  "task073_commonsenseqa_answer_generation": "configs/SuperNI/task073_commonsenseqa_answer_generation",
20
  "task363_sst2_polarity_classification": "configs/SuperNI/task363_sst2_polarity_classification",
21
+ "dbpedia": "configs/gen_script_long_order3_t5_configs/dbpedia",
22
+ "amazon": "configs/gen_script_long_order3_t5_configs/amazon",
23
+ "agnews": "configs/gen_script_long_order3_t5_configs/agnews",
24
+ "yahoo": "configs/gen_script_long_order3_t5_configs/yahoo",
25
+ "yelp": "configs/gen_script_long_order3_t5_configs/yelp",
26
+ "copa": "configs/gen_script_long_order3_t5_configs/copa",
27
+ "mnli": "configs/gen_script_long_order3_t5_configs/mnli",
28
+ "cb": "configs/gen_script_long_order3_t5_configs/cb",
29
+ "imdb": "configs/gen_script_long_order3_t5_configs/imdb",
30
+ "multirc": "configs/gen_script_long_order3_t5_configs/multirc",
31
+ "sst2": "configs/gen_script_long_order3_t5_configs/sst2",
32
+ "boolq": "configs/gen_script_long_order3_t5_configs/boolq",
33
+ "rte": "configs/gen_script_long_order3_t5_configs/rte",
34
+ "wic": "configs/gen_script_long_order3_t5_configs/wic",
35
+ "qqp": "configs/gen_script_long_order3_t5_configs/qqp",
36
  }
37
 
38
  def lora_state_dict_A(model: nn.Module, bias: str = 'none', task_name=None) -> Dict[str, torch.Tensor]:
improve_gainlora/src/run_t5.py CHANGED
@@ -712,12 +712,20 @@ def main():
712
  if _need_replay_data:
713
  replay_dataset_dict = {}
714
  abs_data_dir_replay = os.path.abspath(data_dir) if data_dir else None
 
 
715
  for idx in range(cur_task_id):
 
 
 
 
 
 
716
  raw_datasets_gen = load_dataset(
717
  dataset_script_path,
718
  data_dir=abs_data_dir_replay,
719
  download_config=download_config,
720
- task_config_dir=os.path.abspath(task_config[task_order[idx]]) if task_config[task_order[idx]] else None,
721
  trust_remote_code=True,
722
  cache_dir=data_cache_dir, # for debug, change dataset size, otherwise open it
723
  max_num_instances_per_task=data_args.max_num_instances_per_task,
 
712
  if _need_replay_data:
713
  replay_dataset_dict = {}
714
  abs_data_dir_replay = os.path.abspath(data_dir) if data_dir else None
715
+ # Derive replay config dirs from current task's config dir parent (robust, avoids stale assets.py mappings)
716
+ _configs_parent = os.path.dirname(os.path.abspath(data_args.task_config_dir)) if data_args.task_config_dir else None
717
  for idx in range(cur_task_id):
718
+ if _configs_parent:
719
+ _replay_task_config_dir = os.path.join(_configs_parent, task_order[idx])
720
+ elif task_config.get(task_order[idx]):
721
+ _replay_task_config_dir = os.path.abspath(task_config[task_order[idx]])
722
+ else:
723
+ _replay_task_config_dir = None
724
  raw_datasets_gen = load_dataset(
725
  dataset_script_path,
726
  data_dir=abs_data_dir_replay,
727
  download_config=download_config,
728
+ task_config_dir=_replay_task_config_dir,
729
  trust_remote_code=True,
730
  cache_dir=data_cache_dir, # for debug, change dataset size, otherwise open it
731
  max_num_instances_per_task=data_args.max_num_instances_per_task,
root_gainlora/setup_kaggle_colab.sh CHANGED
@@ -116,6 +116,18 @@ echo "[Cache] Clearing stale HuggingFace dataset module cache..."
116
  rm -rf ~/.cache/huggingface/modules/datasets_modules/ 2>/dev/null || true
117
  echo "[Cache] HF dataset module cache cleared"
118
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  echo ""
120
  echo "[Check] Verifying installation..."
121
  ${PY_CMD} - <<'PY'
 
116
  rm -rf ~/.cache/huggingface/modules/datasets_modules/ 2>/dev/null || true
117
  echo "[Cache] HF dataset module cache cleared"
118
 
119
+ echo "[Symlink] Creating config directory aliases..."
120
+ # assets.py references configs/Long_Sequence/* and configs/SuperNI/*
121
+ if [ -d configs/gen_script_long_order3_t5_configs ] && [ ! -e configs/Long_Sequence ]; then
122
+ ln -s gen_script_long_order3_t5_configs configs/Long_Sequence
123
+ echo " Created symlink: configs/Long_Sequence -> gen_script_long_order3_t5_configs"
124
+ fi
125
+ if [ -d configs/gen_script_superni_order1_t5_configs ] && [ ! -e configs/SuperNI ]; then
126
+ ln -s gen_script_superni_order1_t5_configs configs/SuperNI
127
+ echo " Created symlink: configs/SuperNI -> gen_script_superni_order1_t5_configs"
128
+ fi
129
+ echo "[Cache] HF dataset module cache cleared"
130
+
131
  echo ""
132
  echo "[Check] Verifying installation..."
133
  ${PY_CMD} - <<'PY'