LbbbbbY
/

FinAI_Contest_FinGPT

@@ -1,208 +0,0 @@
-# ===== FinLoRA evaluation on LLaMA-3.1-8B (LoRA 4-bit) | JSONL inputs =====
-import os, gc, psutil, json, torch, torch.nn as nn
-from typing import List, Tuple
-from sklearn.metrics import accuracy_score, f1_score
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-from peft import PeftModel
-# --------- CONFIG ----------
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# Use the SAME local LLaMA snapshot you trained with
-BASE_DIR    = "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b"
-ADAPTER_DIR = "finlora_lora_ckpt_llama_8bit_r8"   # from training
-HEADS_PATH  = "finlora_heads_llama_8bit_r8.pt"    # from training
-# Your JSONL eval files
-EVAL_FILES = ["fiqa_test.jsonl", "fpb_test.jsonl"]
-# Tokenization / eval params
-MAXLEN     = 256
-INIT_BATCH = 64  # will auto-shrink on OOM
-# ---------------- Memory helpers ----------------
-def print_mem(tag: str = ""):
-    v = psutil.virtual_memory()
-    cpu = f"CPU used: {(v.total - v.available)/1e9:.1f}/{v.total/1e9:.1f} GB"
-    if torch.cuda.is_available():
-        free, total = torch.cuda.mem_get_info()
-        gpu = f"GPU used: {(total - free)/1e9:.1f}/{total/1e9:.1f} GB"
-    else:
-        gpu = "GPU: n/a"
-    print(f"[MEM] {tag} | {cpu} | {gpu}")
-def memory_guard():
-    gc.collect()
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        torch.cuda.ipc_collect()
-# ---------------- Label/text helpers ----------------
-LBL_MAP_3 = {
-    "-1":0, "neg":0, "negative":0, -1:0,
-    "0":1, "neu":1, "neutral":1,   0:1,
-    "1":2, "pos":2, "positive":2,  1:2,
-}
-TEXT_KEYS  = ["context", "text", "sentence", "content", "Title", "question_title", "Input", "review"]
-LABEL_KEYS = ["label", "sentiment", "Sentiment", "class", "target", "y"]
-def _find_key(d: dict, candidates: List[str]) -> str:
-    keys_lower = {k.lower(): k for k in d.keys()}
-    for c in candidates:
-        if c in d: return c
-        if c.lower() in keys_lower: return keys_lower[c.lower()]
-    return None
-def _norm_label(v) -> int:
-    if v is None: return 1
-    s = str(v).strip().lower()
-    if s in LBL_MAP_3: return LBL_MAP_3[s]
-    if s.lstrip("-").isdigit():
-        try: return LBL_MAP_3[int(s)]
-        except Exception: return 1
-    return 1
-def load_eval_jsonl(path: str) -> Tuple[List[str], List[int]]:
-    if not os.path.exists(path):
-        raise FileNotFoundError(f"Eval file not found: {path}")
-    texts, labels = [], []
-    with open(path, "r", encoding="utf-8") as f:
-        for line in f:
-            line = line.strip()
-            if not line: continue
-            try:
-                ex = json.loads(line)
-            except Exception:
-                continue
-            t_key = _find_key(ex, TEXT_KEYS)
-            y_key = _find_key(ex, LABEL_KEYS)
-            if t_key is None or y_key is None:
-                # try a couple more common fields
-                t_key = t_key or _find_key(ex, ["Sentence", "question", "title"])
-                y_key = y_key or _find_key(ex, ["Label", "SentimentLabel"])
-            if t_key is None or y_key is None:
-                continue
-            texts.append(str(ex.get(t_key, "")))
-            labels.append(_norm_label(ex.get(y_key, None)))
-    if not texts:
-        raise ValueError(f"No (text,label) rows found in {path}. Check field names.")
-    return texts, labels
-# ---------------- Load LLaMA base + tokenizer (4-bit) ----------------
-print_mem("before load")
-tok = AutoTokenizer.from_pretrained(BASE_DIR, use_fast=True, trust_remote_code=True)
-if tok.pad_token_id is None:
-    tok.pad_token = tok.eos_token
-tok.padding_side = "left"
-bnb = BitsAndBytesConfig(
-    load_in_8bit=True,
-)
-base = AutoModelForCausalLM.from_pretrained(
-    BASE_DIR,
-    quantization_config=bnb,
-    torch_dtype=torch.bfloat16,
-    low_cpu_mem_usage=True,
-    device_map="auto",
-    trust_remote_code=True,
-)
-base.config.use_cache = False
-print_mem("after base load")
-# ---------------- Attach LoRA adapters ----------------
-enc = PeftModel.from_pretrained(base, ADAPTER_DIR)
-enc.eval()
-print_mem("after PEFT attach")
-# ---------------- Rebuild heads & load (256-d proj, 3-way cls) ----------------
-hid = enc.config.hidden_size  # LLaMA-3.1-8B -> 4096
-proj = nn.Sequential(nn.Linear(hid, hid), nn.Tanh(), nn.Linear(hid, 256)).to(DEVICE).eval()
-cls  = nn.Linear(hid, 3).to(DEVICE).eval()
-state = torch.load(HEADS_PATH, map_location="cpu")
-# quick shape sanity (weights exist and match hid)
-_ = proj.load_state_dict(state["proj"], strict=True)
-_ = cls.load_state_dict(state["cls"], strict=True)
-# ---------------- Pooling over LLaMA hidden states ----------------
-@torch.no_grad()
-def _mean_pool(last_hidden_state: torch.Tensor, attn_mask: torch.Tensor) -> torch.Tensor:
-    mask = attn_mask.unsqueeze(-1).type_as(last_hidden_state)   # [B,T,1]
-    summed = (last_hidden_state * mask).sum(dim=1)              # [B,H]
-    denom  = mask.sum(dim=1).clamp(min=1e-6)                    # [B,1]
-    return summed / denom
-# make sure your tokenizer has a pad token & left padding for LLaMA
-if tok.pad_token_id is None:
-    tok.pad_token = tok.eos_token
-tok.padding_side = "left"
-def _mean_pool(last_hidden_state: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
-    mask = attention_mask.unsqueeze(-1).type_as(last_hidden_state)
-    summed = (last_hidden_state * mask).sum(dim=1)
-    denom = mask.sum(dim=1).clamp(min=1e-6)
-    return summed / denom
-@torch.inference_mode()
-def encode_cls(batch):
-    batch = {k: v.to(DEVICE, non_blocking=True) for k, v in batch.items()}
-    # ask the model to return hidden states
-    out = enc(**batch, output_hidden_states=True)
-    # for causal LM, take the top hidden layer
-    last = out.hidden_states[-1] if hasattr(out, "hidden_states") else out[0]
-    h = _mean_pool(last, batch["attention_mask"])
-    return h
-@torch.inference_mode()
-def logits_for_texts(texts, maxlen=MAXLEN):
-    encd = tok(texts, padding=True, truncation=True, max_length=maxlen, return_tensors="pt")
-    with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16, enabled=torch.cuda.is_available()):
-        h = encode_cls(encd)
-        return cls(h)
-# ---------------- OOM-safe evaluation ----------------
-def evaluate_set(texts: List[str], labels: List[int], batch: int = INIT_BATCH, maxlen: int = MAXLEN):
-    preds = []
-    i, n = 0, len(texts)
-    while i < n:
-        cur_bs = min(batch, n - i)
-        while True:
-            try:
-                l = logits_for_texts(texts[i:i+cur_bs], maxlen=maxlen)
-                preds.extend(l.argmax(dim=1).cpu().tolist())
-                break
-            except torch.cuda.OutOfMemoryError:
-                memory_guard()
-                if cur_bs <= 1: raise
-                cur_bs = max(1, cur_bs // 2)
-                print(f"[OOM] shrinking batch to {cur_bs}")
-            except RuntimeError as e:
-                if "out of memory" in str(e).lower():
-                    memory_guard()
-                    if cur_bs <= 1: raise
-                    cur_bs = max(1, cur_bs // 2)
-                    print(f"[OOM] shrinking batch to {cur_bs}")
-                else:
-                    raise
-        i += cur_bs
-        batch = cur_bs
-    return {
-        "accuracy": accuracy_score(labels, preds),
-        "macro_f1": f1_score(labels, preds, average="macro"),
-    }
-# ---------------- Run JSONL evaluations ----------------
-print_mem("before JSONL eval")
-results = {}
-for jpath in EVAL_FILES:
-    texts, labels = load_eval_jsonl(jpath)
-    print(f"Loaded {jpath}: {len(texts)} rows")
-    metrics = evaluate_set(texts, labels, batch=INIT_BATCH, maxlen=MAXLEN)
-    results[jpath] = metrics
-    print(f"{jpath} -> Acc: {metrics['accuracy']:.4f} | Macro-F1: {metrics['macro_f1']:.4f}")
-print("Summary:", results)
-print_mem("done")