Add files using upload-large-folder tool

Browse files

Files changed (20) hide show

D/2k.yaml +6 -0
D/PandaEval12_2_results/HNO2_eval_wo_reasoning_P3_results.json +0 -0
D/PandaEval12_2_results/HNO2_eval_wo_reasoning_R1_results.json +0 -0
D/runD.py +232 -0
E/logs/E/5k_port8006_gpu0_20251224_014758_batch1.log +0 -0
E/logs/E/9k_port8005_gpu0_20251224_014758_batch2.log +0 -0
F/logs/F/10k_port8006_gpu0_20251224_014934_batch2.log.pid +1 -0
F/logs/F/1k_port8002_gpu0_20251229_035825_batch1.log +0 -0
F/logs/F/5k_port8006_gpu0_20251229_035825_batch1.log +0 -0
F/logs/F/5k_port8006_gpu0_20251229_035825_batch1.log.pid +1 -0
G/logs/G/10k_port8003_gpu0_20251229_035833_batch3.log.pid +1 -0
G/logs/G/1k_port8002_gpu0_20251224_014604_batch1.log +138 -0
G/logs/G/1k_port8002_gpu0_20251224_014604_batch1.log.pid +1 -0
G/logs/G/1k_port8002_gpu0_20251224_015006_batch1.log +0 -0
G/logs/G/2k_port8003_gpu0_20251224_015006_batch1.log +0 -0
G/logs/G/3k_port8004_gpu0_20251224_015006_batch1.log +0 -0
G/logs/G/5k_port8006_gpu0_20251224_015006_batch1.log +0 -0
G/logs/G/6k_port8003_gpu0_20251224_015006_batch2.log +0 -0
G/logs/G/7k_port8004_gpu0_20251229_060759_batch2.log.pid +1 -0
G/logs/G/8k_port8005_gpu0_20251224_015006_batch2.log +0 -0

D/2k.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+model_name_or_path: /workspace/meta-llama/Llama-3.1-8B-Instruct
+adapter_name_or_path: /workspace/v121rc_exp1/D/checkpoint-2000
+template: llama3
+finetuning_type: lora
+infer_backend: huggingface
+trust_remote_code: true

D/PandaEval12_2_results/HNO2_eval_wo_reasoning_P3_results.json ADDED Viewed

The diff for this file is too large to render. See raw diff

D/PandaEval12_2_results/HNO2_eval_wo_reasoning_R1_results.json ADDED Viewed

The diff for this file is too large to render. See raw diff

D/runD.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import json
+import os
+import hashlib
+from typing import Any, Dict, Tuple, List
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from tqdm import tqdm
+import requests
+from loguru import logger
+def getenv_str(key: str, default: str) -> str:
+    v = os.environ.get(key)
+    return default if v is None else v
+def getenv_int(key: str, default: int) -> int:
+    v = os.environ.get(key)
+    if v is None or v.strip() == "":
+        return default
+    try:
+        return int(v)
+    except ValueError:
+        raise ValueError(f"Env var {key} must be int, got: {v!r}")
+# ----------------------------
+# Read config from environment
+# ----------------------------
+CONFIG_DIR = getenv_str("CONFIG_DIR", "/workspace/v121rc_exp1/D")
+SAVE_DIR = getenv_str("SAVE_DIR", CONFIG_DIR)
+WORKING_DIR = getenv_str("EVAL_WORKING_DIR", "/workspace/v121rc_exp1/EVAL/HNO2")
+WORKING_EVAL_SUBWORD = getenv_str("EVAL_SUBWORD", "wo_reasoning")
+FORBIDDEN_SUBWORDS: List[str] = json.loads(getenv_str("FORBIDDEN_SUBWORDS_JSON", "[]"))
+PARTICULAR = getenv_str("PARTICULAR", "")
+BASE_PORT = getenv_int("BASE_PORT", 8002)
+# Prefer explicit URL->ckpt mapping from RUNME.sh
+MODELS_JSON_ENV = getenv_str("MODELS_JSON", "").strip()
+if MODELS_JSON_ENV:
+    MODELS: Dict[str, int] = json.loads(MODELS_JSON_ENV)
+    MODELS = {str(k): int(v) for k, v in MODELS.items()}
+else:
+    # Fallback sequential mapping (rarely used now)
+    checkpoints = json.loads(getenv_str("CKPTS_JSON", "[1000]"))
+    MODELS = {f"http://localhost:{BASE_PORT + i}/v1/chat/completions": int(checkpoints[i])
+              for i in range(len(checkpoints))}
+MAX_WORKERS = min(16, max(1, len(MODELS)))
+def thought_generator_with_local_LLM_requests(
+    message,
+    LLM_model,
+    LLM_max_new_tokens=128,
+    n=1,
+    API_URL="http://localhost:8000/v1/chat/completions",
+    timeout_sec=600,
+    stream=False,
+) -> str | list[Any] | Any:
+    # Your eval uses stream=False; keep it simple.
+    payload = {
+        "model": LLM_model,
+        "messages": message,
+        "n": n,
+        "max_tokens": LLM_max_new_tokens,
+    }
+    r = requests.post(
+        API_URL,
+        json=payload,
+        headers={"Content-Type": "application/json", "Authorization": "Bearer 0"},
+        timeout=timeout_sec,
+    )
+    if r.status_code != 200:
+        logger.error(f"LLM API error {r.status_code}: {r.text}")
+        raise RuntimeError(f"LLM API returned {r.status_code}")
+    data = r.json()
+    if n == 1:
+        return data["choices"][0]["message"]["content"]
+    return [c["message"]["content"] for c in data["choices"]]
+def extract_label(response: str) -> str:
+    has_yes = "Yes" in response
+    has_no = "No" in response
+    if has_yes and not has_no:
+        return "Yes"
+    if has_no and not has_yes:
+        return "No"
+    return ""
+def call_one_model(
+    model_url: str,
+    ckpt: int,
+    msgs,
+    gold_label: str,
+) -> Tuple[int, Dict[str, Any]]:
+    try:
+        response = thought_generator_with_local_LLM_requests(
+            message=msgs,
+            LLM_model="custom-model",
+            LLM_max_new_tokens=128,
+            n=1,
+            API_URL=model_url,
+            timeout_sec=300,
+            stream=False,
+        )
+    except Exception as e:
+        logger.error(f"Error getting response from model at {model_url}: {e}")
+        response = ""
+    label = extract_label(response)
+    return ckpt, {
+        "label": label,
+        "output": response,
+        "full_output": response,
+        "accuracy": 1 if label == gold_label else 0,
+    }
+def entry_uid(system: str, prompt: str, gold_label: str, gold_output: str) -> str:
+    payload = {"system": system, "prompt": prompt, "gold_label": gold_label, "gold_output": gold_output}
+    s = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
+    return hashlib.sha1(s.encode("utf-8")).hexdigest()
+def load_cache(path: str) -> Dict[str, Dict[str, Any]]:
+    if not os.path.exists(path):
+        return {}
+    try:
+        with open(path, "r") as f:
+            data = json.load(f)
+        cache = {}
+        for e in data:
+            uid = entry_uid(e.get("system", ""), e.get("prompt", ""), e.get("gold_label", ""), e.get("gold_output", ""))
+            cache[uid] = e
+        logger.info(f"Loaded cache from {path}: {len(cache)} entries")
+        return cache
+    except Exception as ex:
+        logger.warning(f"Failed to load cache from {path} (starting fresh): {ex}")
+        return {}
+def should_run_step(o_entry: Dict[str, Any], ckpt: int) -> bool:
+    key = f"step_{ckpt}"
+    if key not in o_entry:
+        return True
+    v = o_entry.get(key) or {}
+    out = v.get("output", "")
+    return not isinstance(out, str) or out.strip() == ""
+def atomic_write_json(path: str, obj: Any) -> None:
+    tmp = path + ".tmp"
+    with open(tmp, "w") as f:
+        json.dump(obj, f, indent=2, ensure_ascii=False)
+    os.replace(tmp, path)
+def should_process_file(filename: str) -> bool:
+    if WORKING_EVAL_SUBWORD and WORKING_EVAL_SUBWORD not in filename:
+        return False
+    if any(sub in filename for sub in FORBIDDEN_SUBWORDS):
+        return False
+    if PARTICULAR and PARTICULAR not in filename:
+        return False
+    return filename.endswith(".json")
+if __name__ == "__main__":
+    logger.info(f"WORKING_DIR={WORKING_DIR}")
+    logger.info(f"SAVE_DIR={SAVE_DIR}")
+    logger.info(f"MODELS={MODELS}")
+    logger.info(f"MAX_WORKERS={MAX_WORKERS}")
+    if not MODELS:
+        print("No models to evaluate (MODELS is empty). Exiting.")
+        raise SystemExit(0)
+    os.makedirs(SAVE_DIR, exist_ok=True)
+    for original_eval_log_file in os.listdir(WORKING_DIR):
+        if not should_process_file(original_eval_log_file):
+            continue
+        print(f"Working in {original_eval_log_file}")
+        original_eval_file = os.path.join(WORKING_DIR, original_eval_log_file)
+        output_eval_file = os.path.join(SAVE_DIR, original_eval_log_file.replace(".json", "_results.json"))
+        with open(original_eval_file, "r") as f:
+            eval_data: list[dict] = json.load(f)
+        cache_map = load_cache(output_eval_file)
+        output_eval_data = []
+        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
+            for idx, entry in enumerate(tqdm(eval_data)):
+                system = entry["system"]
+                prompt = entry["prompt"]
+                gold_label = entry["gold_label"]
+                gold_output = entry["gold_output"]
+                uid = entry_uid(system, prompt, gold_label, gold_output)
+                o_entry = cache_map.get(uid, {})
+                o_entry.update({"system": system, "prompt": prompt, "gold_label": gold_label, "gold_output": gold_output})
+                msgs = [{"role": "system", "content": system}, {"role": "user", "content": prompt}]
+                futures = []
+                for model_url, ckpt in MODELS.items():
+                    if should_run_step(o_entry, ckpt):
+                        futures.append(executor.submit(call_one_model, model_url, ckpt, msgs, gold_label))
+                for fut in as_completed(futures):
+                    ckpt, result = fut.result()
+                    o_entry[f"step_{ckpt}"] = result
+                output_eval_data.append(o_entry)
+                if (idx + 1) % 50 == 0:
+                    atomic_write_json(output_eval_file, output_eval_data)
+        atomic_write_json(output_eval_file, output_eval_data)
+    print("Evaluation with checkpoints completed.")

E/logs/E/5k_port8006_gpu0_20251224_014758_batch1.log ADDED Viewed

The diff for this file is too large to render. See raw diff

E/logs/E/9k_port8005_gpu0_20251224_014758_batch2.log ADDED Viewed

The diff for this file is too large to render. See raw diff

F/logs/F/10k_port8006_gpu0_20251224_014934_batch2.log.pid ADDED Viewed

	@@ -0,0 +1 @@


1	+ 2921

F/logs/F/1k_port8002_gpu0_20251229_035825_batch1.log ADDED Viewed

The diff for this file is too large to render. See raw diff

F/logs/F/5k_port8006_gpu0_20251229_035825_batch1.log ADDED Viewed

The diff for this file is too large to render. See raw diff

F/logs/F/5k_port8006_gpu0_20251229_035825_batch1.log.pid ADDED Viewed

	@@ -0,0 +1 @@


1	+ 1755

G/logs/G/10k_port8003_gpu0_20251229_035833_batch3.log.pid ADDED Viewed

	@@ -0,0 +1 @@


1	+ 7530

G/logs/G/1k_port8002_gpu0_20251224_014604_batch1.log ADDED Viewed

	@@ -0,0 +1,138 @@

+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,396 >> loading file tokenizer.json
+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,396 >> loading file tokenizer.model
+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,396 >> loading file added_tokens.json
+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,396 >> loading file special_tokens_map.json
+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,396 >> loading file tokenizer_config.json
+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,397 >> loading file chat_template.jinja
+[INFO|tokenization_utils_base.py:2364] 2025-12-24 01:46:09,700 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+[INFO|configuration_utils.py:763] 2025-12-24 01:46:09,725 >> loading configuration file /workspace/meta-llama/Llama-3.1-8B-Instruct/config.json
+[INFO|configuration_utils.py:839] 2025-12-24 01:46:09,727 >> Model config LlamaConfig {
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "dtype": "bfloat16",
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ],
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "vocab_size": 128256
+}
+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file tokenizer.json
+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file tokenizer.model
+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file added_tokens.json
+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file special_tokens_map.json
+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file tokenizer_config.json
+[INFO|tokenization_utils_base.py:2093] 2025-12-24 01:46:09,733 >> loading file chat_template.jinja
+[INFO|tokenization_utils_base.py:2364] 2025-12-24 01:46:10,044 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+[INFO|2025-12-24 01:46:10] llamafactory.data.template:143 >> Add pad token: <|eot_id|>
+[INFO|2025-12-24 01:46:10] llamafactory.data.template:143 >> Add <|eom_id|> to stop words.
+[INFO|configuration_utils.py:763] 2025-12-24 01:46:10,065 >> loading configuration file /workspace/meta-llama/Llama-3.1-8B-Instruct/config.json
+[INFO|configuration_utils.py:839] 2025-12-24 01:46:10,066 >> Model config LlamaConfig {
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "dtype": "bfloat16",
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ],
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "vocab_size": 128256
+}
+[WARNING|logging.py:328] 2025-12-24 01:46:10,066 >> `torch_dtype` is deprecated! Use `dtype` instead!
+[INFO|2025-12-24 01:46:10] llamafactory.model.model_utils.kv_cache:143 >> KV cache is enabled for faster generation.
+[WARNING|logging.py:328] 2025-12-24 01:46:10,154 >> `torch_dtype` is deprecated! Use `dtype` instead!
+[INFO|modeling_utils.py:1169] 2025-12-24 01:46:10,156 >> loading weights file /workspace/meta-llama/Llama-3.1-8B-Instruct/model.safetensors.index.json
+[INFO|modeling_utils.py:2341] 2025-12-24 01:46:10,162 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
+[INFO|configuration_utils.py:986] 2025-12-24 01:46:10,163 >> Generate config GenerationConfig {
+  "bos_token_id": 128000,
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ]
+}
+[INFO|configuration_utils.py:939] 2025-12-24 01:47:00,827 >> loading configuration file /workspace/meta-llama/Llama-3.1-8B-Instruct/generation_config.json
+[INFO|configuration_utils.py:986] 2025-12-24 01:47:00,829 >> Generate config GenerationConfig {
+  "bos_token_id": 128000,
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ]
+}
+[INFO|dynamic_module_utils.py:423] 2025-12-24 01:47:00,835 >> Could not locate the custom_generate/generate.py inside /workspace/meta-llama/Llama-3.1-8B-Instruct.
+[INFO|2025-12-24 01:47:00] llamafactory.model.model_utils.attention:143 >> Using torch SDPA for faster training and inference.
+[INFO|2025-12-24 01:47:17] llamafactory.model.adapter:143 >> Merged 1 adapter(s).
+[INFO|2025-12-24 01:47:17] llamafactory.model.adapter:143 >> Loaded adapter(s): /workspace/v121rc_exp1/G/checkpoint-1000
+[INFO|2025-12-24 01:47:17] llamafactory.model.loader:143 >> all params: 8,030,261,248
+Visit http://localhost:8002/docs for API document.
+INFO:     Started server process [290]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit)
+INFO:     127.0.0.1:49800 - "GET /v1/models HTTP/1.1" 200 OK
+INFO:     Shutting down
+INFO:     Waiting for application shutdown.
+INFO:     Application shutdown complete.
+INFO:     Finished server process [290]

G/logs/G/1k_port8002_gpu0_20251224_014604_batch1.log.pid ADDED Viewed

	@@ -0,0 +1 @@


1	+ 290

G/logs/G/1k_port8002_gpu0_20251224_015006_batch1.log ADDED Viewed

The diff for this file is too large to render. See raw diff

G/logs/G/2k_port8003_gpu0_20251224_015006_batch1.log ADDED Viewed

The diff for this file is too large to render. See raw diff

G/logs/G/3k_port8004_gpu0_20251224_015006_batch1.log ADDED Viewed

The diff for this file is too large to render. See raw diff

G/logs/G/5k_port8006_gpu0_20251224_015006_batch1.log ADDED Viewed

The diff for this file is too large to render. See raw diff

G/logs/G/6k_port8003_gpu0_20251224_015006_batch2.log ADDED Viewed

The diff for this file is too large to render. See raw diff

G/logs/G/7k_port8004_gpu0_20251229_060759_batch2.log.pid ADDED Viewed

	@@ -0,0 +1 @@


1	+ 13691

G/logs/G/8k_port8005_gpu0_20251224_015006_batch2.log ADDED Viewed

The diff for this file is too large to render. See raw diff