Linksome commited on
Commit
44839c7
·
verified ·
1 Parent(s): 74b5b6d

Add files using upload-large-folder tool

Browse files
D/2k.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ model_name_or_path: /workspace/meta-llama/Llama-3.1-8B-Instruct
2
+ adapter_name_or_path: /workspace/v121rc_exp1/D/checkpoint-2000
3
+ template: llama3
4
+ finetuning_type: lora
5
+ infer_backend: huggingface
6
+ trust_remote_code: true
D/PandaEval12_2_results/HNO2_eval_wo_reasoning_P3_results.json ADDED
The diff for this file is too large to render. See raw diff
 
D/PandaEval12_2_results/HNO2_eval_wo_reasoning_R1_results.json ADDED
The diff for this file is too large to render. See raw diff
 
D/runD.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import hashlib
4
+ from typing import Any, Dict, Tuple, List
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+
7
+ from tqdm import tqdm
8
+ import requests
9
+ from loguru import logger
10
+
11
+
12
+ def getenv_str(key: str, default: str) -> str:
13
+ v = os.environ.get(key)
14
+ return default if v is None else v
15
+
16
+
17
+ def getenv_int(key: str, default: int) -> int:
18
+ v = os.environ.get(key)
19
+ if v is None or v.strip() == "":
20
+ return default
21
+ try:
22
+ return int(v)
23
+ except ValueError:
24
+ raise ValueError(f"Env var {key} must be int, got: {v!r}")
25
+
26
+
27
+ # ----------------------------
28
+ # Read config from environment
29
+ # ----------------------------
30
+ CONFIG_DIR = getenv_str("CONFIG_DIR", "/workspace/v121rc_exp1/D")
31
+ SAVE_DIR = getenv_str("SAVE_DIR", CONFIG_DIR)
32
+
33
+ WORKING_DIR = getenv_str("EVAL_WORKING_DIR", "/workspace/v121rc_exp1/EVAL/HNO2")
34
+ WORKING_EVAL_SUBWORD = getenv_str("EVAL_SUBWORD", "wo_reasoning")
35
+
36
+ FORBIDDEN_SUBWORDS: List[str] = json.loads(getenv_str("FORBIDDEN_SUBWORDS_JSON", "[]"))
37
+ PARTICULAR = getenv_str("PARTICULAR", "")
38
+
39
+ BASE_PORT = getenv_int("BASE_PORT", 8002)
40
+
41
+ # Prefer explicit URL->ckpt mapping from RUNME.sh
42
+ MODELS_JSON_ENV = getenv_str("MODELS_JSON", "").strip()
43
+ if MODELS_JSON_ENV:
44
+ MODELS: Dict[str, int] = json.loads(MODELS_JSON_ENV)
45
+ MODELS = {str(k): int(v) for k, v in MODELS.items()}
46
+ else:
47
+ # Fallback sequential mapping (rarely used now)
48
+ checkpoints = json.loads(getenv_str("CKPTS_JSON", "[1000]"))
49
+ MODELS = {f"http://localhost:{BASE_PORT + i}/v1/chat/completions": int(checkpoints[i])
50
+ for i in range(len(checkpoints))}
51
+
52
+ MAX_WORKERS = min(16, max(1, len(MODELS)))
53
+
54
+
55
+ def thought_generator_with_local_LLM_requests(
56
+ message,
57
+ LLM_model,
58
+ LLM_max_new_tokens=128,
59
+ n=1,
60
+ API_URL="http://localhost:8000/v1/chat/completions",
61
+ timeout_sec=600,
62
+ stream=False,
63
+ ) -> str | list[Any] | Any:
64
+ # Your eval uses stream=False; keep it simple.
65
+ payload = {
66
+ "model": LLM_model,
67
+ "messages": message,
68
+ "n": n,
69
+ "max_tokens": LLM_max_new_tokens,
70
+ }
71
+
72
+ r = requests.post(
73
+ API_URL,
74
+ json=payload,
75
+ headers={"Content-Type": "application/json", "Authorization": "Bearer 0"},
76
+ timeout=timeout_sec,
77
+ )
78
+
79
+ if r.status_code != 200:
80
+ logger.error(f"LLM API error {r.status_code}: {r.text}")
81
+ raise RuntimeError(f"LLM API returned {r.status_code}")
82
+
83
+ data = r.json()
84
+ if n == 1:
85
+ return data["choices"][0]["message"]["content"]
86
+ return [c["message"]["content"] for c in data["choices"]]
87
+
88
+
89
+ def extract_label(response: str) -> str:
90
+ has_yes = "Yes" in response
91
+ has_no = "No" in response
92
+ if has_yes and not has_no:
93
+ return "Yes"
94
+ if has_no and not has_yes:
95
+ return "No"
96
+ return ""
97
+
98
+
99
+ def call_one_model(
100
+ model_url: str,
101
+ ckpt: int,
102
+ msgs,
103
+ gold_label: str,
104
+ ) -> Tuple[int, Dict[str, Any]]:
105
+ try:
106
+ response = thought_generator_with_local_LLM_requests(
107
+ message=msgs,
108
+ LLM_model="custom-model",
109
+ LLM_max_new_tokens=128,
110
+ n=1,
111
+ API_URL=model_url,
112
+ timeout_sec=300,
113
+ stream=False,
114
+ )
115
+ except Exception as e:
116
+ logger.error(f"Error getting response from model at {model_url}: {e}")
117
+ response = ""
118
+
119
+ label = extract_label(response)
120
+ return ckpt, {
121
+ "label": label,
122
+ "output": response,
123
+ "full_output": response,
124
+ "accuracy": 1 if label == gold_label else 0,
125
+ }
126
+
127
+
128
+ def entry_uid(system: str, prompt: str, gold_label: str, gold_output: str) -> str:
129
+ payload = {"system": system, "prompt": prompt, "gold_label": gold_label, "gold_output": gold_output}
130
+ s = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
131
+ return hashlib.sha1(s.encode("utf-8")).hexdigest()
132
+
133
+
134
+ def load_cache(path: str) -> Dict[str, Dict[str, Any]]:
135
+ if not os.path.exists(path):
136
+ return {}
137
+ try:
138
+ with open(path, "r") as f:
139
+ data = json.load(f)
140
+ cache = {}
141
+ for e in data:
142
+ uid = entry_uid(e.get("system", ""), e.get("prompt", ""), e.get("gold_label", ""), e.get("gold_output", ""))
143
+ cache[uid] = e
144
+ logger.info(f"Loaded cache from {path}: {len(cache)} entries")
145
+ return cache
146
+ except Exception as ex:
147
+ logger.warning(f"Failed to load cache from {path} (starting fresh): {ex}")
148
+ return {}
149
+
150
+
151
+ def should_run_step(o_entry: Dict[str, Any], ckpt: int) -> bool:
152
+ key = f"step_{ckpt}"
153
+ if key not in o_entry:
154
+ return True
155
+ v = o_entry.get(key) or {}
156
+ out = v.get("output", "")
157
+ return not isinstance(out, str) or out.strip() == ""
158
+
159
+
160
+ def atomic_write_json(path: str, obj: Any) -> None:
161
+ tmp = path + ".tmp"
162
+ with open(tmp, "w") as f:
163
+ json.dump(obj, f, indent=2, ensure_ascii=False)
164
+ os.replace(tmp, path)
165
+
166
+
167
+ def should_process_file(filename: str) -> bool:
168
+ if WORKING_EVAL_SUBWORD and WORKING_EVAL_SUBWORD not in filename:
169
+ return False
170
+ if any(sub in filename for sub in FORBIDDEN_SUBWORDS):
171
+ return False
172
+ if PARTICULAR and PARTICULAR not in filename:
173
+ return False
174
+ return filename.endswith(".json")
175
+
176
+
177
+ if __name__ == "__main__":
178
+ logger.info(f"WORKING_DIR={WORKING_DIR}")
179
+ logger.info(f"SAVE_DIR={SAVE_DIR}")
180
+ logger.info(f"MODELS={MODELS}")
181
+ logger.info(f"MAX_WORKERS={MAX_WORKERS}")
182
+
183
+ if not MODELS:
184
+ print("No models to evaluate (MODELS is empty). Exiting.")
185
+ raise SystemExit(0)
186
+
187
+ os.makedirs(SAVE_DIR, exist_ok=True)
188
+
189
+ for original_eval_log_file in os.listdir(WORKING_DIR):
190
+ if not should_process_file(original_eval_log_file):
191
+ continue
192
+ print(f"Working in {original_eval_log_file}")
193
+
194
+ original_eval_file = os.path.join(WORKING_DIR, original_eval_log_file)
195
+ output_eval_file = os.path.join(SAVE_DIR, original_eval_log_file.replace(".json", "_results.json"))
196
+
197
+ with open(original_eval_file, "r") as f:
198
+ eval_data: list[dict] = json.load(f)
199
+
200
+ cache_map = load_cache(output_eval_file)
201
+ output_eval_data = []
202
+
203
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
204
+ for idx, entry in enumerate(tqdm(eval_data)):
205
+ system = entry["system"]
206
+ prompt = entry["prompt"]
207
+ gold_label = entry["gold_label"]
208
+ gold_output = entry["gold_output"]
209
+
210
+ uid = entry_uid(system, prompt, gold_label, gold_output)
211
+ o_entry = cache_map.get(uid, {})
212
+ o_entry.update({"system": system, "prompt": prompt, "gold_label": gold_label, "gold_output": gold_output})
213
+
214
+ msgs = [{"role": "system", "content": system}, {"role": "user", "content": prompt}]
215
+
216
+ futures = []
217
+ for model_url, ckpt in MODELS.items():
218
+ if should_run_step(o_entry, ckpt):
219
+ futures.append(executor.submit(call_one_model, model_url, ckpt, msgs, gold_label))
220
+
221
+ for fut in as_completed(futures):
222
+ ckpt, result = fut.result()
223
+ o_entry[f"step_{ckpt}"] = result
224
+
225
+ output_eval_data.append(o_entry)
226
+
227
+ if (idx + 1) % 50 == 0:
228
+ atomic_write_json(output_eval_file, output_eval_data)
229
+
230
+ atomic_write_json(output_eval_file, output_eval_data)
231
+
232
+ print("Evaluation with checkpoints completed.")
E/logs/E/5k_port8006_gpu0_20251224_014758_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
E/logs/E/9k_port8005_gpu0_20251224_014758_batch2.log ADDED
The diff for this file is too large to render. See raw diff
 
F/logs/F/10k_port8006_gpu0_20251224_014934_batch2.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 2921
F/logs/F/1k_port8002_gpu0_20251229_035825_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
F/logs/F/5k_port8006_gpu0_20251229_035825_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
F/logs/F/5k_port8006_gpu0_20251229_035825_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 1755
G/logs/G/10k_port8003_gpu0_20251229_035833_batch3.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 7530
G/logs/G/1k_port8002_gpu0_20251224_014604_batch1.log ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,396 >> loading file tokenizer.json
2
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,396 >> loading file tokenizer.model
3
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,396 >> loading file added_tokens.json
4
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,396 >> loading file special_tokens_map.json
5
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,396 >> loading file tokenizer_config.json
6
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,397 >> loading file chat_template.jinja
7
+ [INFO|tokenization_utils_base.py:2364] 2025-12-24 01:46:09,700 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
8
+ [INFO|configuration_utils.py:763] 2025-12-24 01:46:09,725 >> loading configuration file /workspace/meta-llama/Llama-3.1-8B-Instruct/config.json
9
+ [INFO|configuration_utils.py:839] 2025-12-24 01:46:09,727 >> Model config LlamaConfig {
10
+ "architectures": [
11
+ "LlamaForCausalLM"
12
+ ],
13
+ "attention_bias": false,
14
+ "attention_dropout": 0.0,
15
+ "bos_token_id": 128000,
16
+ "dtype": "bfloat16",
17
+ "eos_token_id": [
18
+ 128001,
19
+ 128008,
20
+ 128009
21
+ ],
22
+ "head_dim": 128,
23
+ "hidden_act": "silu",
24
+ "hidden_size": 4096,
25
+ "initializer_range": 0.02,
26
+ "intermediate_size": 14336,
27
+ "max_position_embeddings": 131072,
28
+ "mlp_bias": false,
29
+ "model_type": "llama",
30
+ "num_attention_heads": 32,
31
+ "num_hidden_layers": 32,
32
+ "num_key_value_heads": 8,
33
+ "pretraining_tp": 1,
34
+ "rms_norm_eps": 1e-05,
35
+ "rope_scaling": {
36
+ "factor": 8.0,
37
+ "high_freq_factor": 4.0,
38
+ "low_freq_factor": 1.0,
39
+ "original_max_position_embeddings": 8192,
40
+ "rope_type": "llama3"
41
+ },
42
+ "rope_theta": 500000.0,
43
+ "tie_word_embeddings": false,
44
+ "transformers_version": "4.57.1",
45
+ "use_cache": true,
46
+ "vocab_size": 128256
47
+ }
48
+
49
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file tokenizer.json
50
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file tokenizer.model
51
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file added_tokens.json
52
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file special_tokens_map.json
53
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file tokenizer_config.json
54
+ [INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file chat_template.jinja
55
+ [INFO|tokenization_utils_base.py:2364] 2025-12-24 01:46:10,044 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
56
+ [INFO|2025-12-24 01:46:10] llamafactory.data.template:143 >> Add pad token: <|eot_id|>
57
+ [INFO|2025-12-24 01:46:10] llamafactory.data.template:143 >> Add <|eom_id|> to stop words.
58
+ [INFO|configuration_utils.py:763] 2025-12-24 01:46:10,065 >> loading configuration file /workspace/meta-llama/Llama-3.1-8B-Instruct/config.json
59
+ [INFO|configuration_utils.py:839] 2025-12-24 01:46:10,066 >> Model config LlamaConfig {
60
+ "architectures": [
61
+ "LlamaForCausalLM"
62
+ ],
63
+ "attention_bias": false,
64
+ "attention_dropout": 0.0,
65
+ "bos_token_id": 128000,
66
+ "dtype": "bfloat16",
67
+ "eos_token_id": [
68
+ 128001,
69
+ 128008,
70
+ 128009
71
+ ],
72
+ "head_dim": 128,
73
+ "hidden_act": "silu",
74
+ "hidden_size": 4096,
75
+ "initializer_range": 0.02,
76
+ "intermediate_size": 14336,
77
+ "max_position_embeddings": 131072,
78
+ "mlp_bias": false,
79
+ "model_type": "llama",
80
+ "num_attention_heads": 32,
81
+ "num_hidden_layers": 32,
82
+ "num_key_value_heads": 8,
83
+ "pretraining_tp": 1,
84
+ "rms_norm_eps": 1e-05,
85
+ "rope_scaling": {
86
+ "factor": 8.0,
87
+ "high_freq_factor": 4.0,
88
+ "low_freq_factor": 1.0,
89
+ "original_max_position_embeddings": 8192,
90
+ "rope_type": "llama3"
91
+ },
92
+ "rope_theta": 500000.0,
93
+ "tie_word_embeddings": false,
94
+ "transformers_version": "4.57.1",
95
+ "use_cache": true,
96
+ "vocab_size": 128256
97
+ }
98
+
99
+ [WARNING|logging.py:328] 2025-12-24 01:46:10,066 >> `torch_dtype` is deprecated! Use `dtype` instead!
100
+ [INFO|2025-12-24 01:46:10] llamafactory.model.model_utils.kv_cache:143 >> KV cache is enabled for faster generation.
101
+ [WARNING|logging.py:328] 2025-12-24 01:46:10,154 >> `torch_dtype` is deprecated! Use `dtype` instead!
102
+ [INFO|modeling_utils.py:1169] 2025-12-24 01:46:10,156 >> loading weights file /workspace/meta-llama/Llama-3.1-8B-Instruct/model.safetensors.index.json
103
+ [INFO|modeling_utils.py:2341] 2025-12-24 01:46:10,162 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
104
+ [INFO|configuration_utils.py:986] 2025-12-24 01:46:10,163 >> Generate config GenerationConfig {
105
+ "bos_token_id": 128000,
106
+ "eos_token_id": [
107
+ 128001,
108
+ 128008,
109
+ 128009
110
+ ]
111
+ }
112
+
113
+
114
+ [INFO|configuration_utils.py:939] 2025-12-24 01:47:00,827 >> loading configuration file /workspace/meta-llama/Llama-3.1-8B-Instruct/generation_config.json
115
+ [INFO|configuration_utils.py:986] 2025-12-24 01:47:00,829 >> Generate config GenerationConfig {
116
+ "bos_token_id": 128000,
117
+ "eos_token_id": [
118
+ 128001,
119
+ 128008,
120
+ 128009
121
+ ]
122
+ }
123
+
124
+ [INFO|dynamic_module_utils.py:423] 2025-12-24 01:47:00,835 >> Could not locate the custom_generate/generate.py inside /workspace/meta-llama/Llama-3.1-8B-Instruct.
125
+ [INFO|2025-12-24 01:47:00] llamafactory.model.model_utils.attention:143 >> Using torch SDPA for faster training and inference.
126
+ [INFO|2025-12-24 01:47:17] llamafactory.model.adapter:143 >> Merged 1 adapter(s).
127
+ [INFO|2025-12-24 01:47:17] llamafactory.model.adapter:143 >> Loaded adapter(s): /workspace/v121rc_exp1/G/checkpoint-1000
128
+ [INFO|2025-12-24 01:47:17] llamafactory.model.loader:143 >> all params: 8,030,261,248
129
+ Visit http://localhost:8002/docs for API document.
130
+ INFO: Started server process [290]
131
+ INFO: Waiting for application startup.
132
+ INFO: Application startup complete.
133
+ INFO: Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit)
134
+ INFO: 127.0.0.1:49800 - "GET /v1/models HTTP/1.1" 200 OK
135
+ INFO: Shutting down
136
+ INFO: Waiting for application shutdown.
137
+ INFO: Application shutdown complete.
138
+ INFO: Finished server process [290]
G/logs/G/1k_port8002_gpu0_20251224_014604_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 290
G/logs/G/1k_port8002_gpu0_20251224_015006_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
G/logs/G/2k_port8003_gpu0_20251224_015006_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
G/logs/G/3k_port8004_gpu0_20251224_015006_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
G/logs/G/5k_port8006_gpu0_20251224_015006_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
G/logs/G/6k_port8003_gpu0_20251224_015006_batch2.log ADDED
The diff for this file is too large to render. See raw diff
 
G/logs/G/7k_port8004_gpu0_20251229_060759_batch2.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 13691
G/logs/G/8k_port8005_gpu0_20251224_015006_batch2.log ADDED
The diff for this file is too large to render. See raw diff