Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| import datetime, os, subprocess, tempfile | |
| from pathlib import Path | |
| import gc | |
| import pandas as pd, yaml, torch | |
| from huggingface_hub import HfApi, login, hf_hub_download, model_info | |
| from lm_eval import evaluator | |
| from lm_eval.models.huggingface import HFLM | |
| from peft import PeftModel | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoModelForSequenceClassification, | |
| AutoTokenizer, | |
| BitsAndBytesConfig | |
| ) | |
| CONFIGS = [] | |
| # βββββ Load all configs βββββ | |
| if Path("adapters.yaml").exists(): | |
| CONFIGS.extend(yaml.safe_load(open("adapters.yaml"))["adapters"]) | |
| for yml in Path("manifests").glob("*.yaml"): | |
| CONFIGS.append(yaml.safe_load(open(yml))) | |
| if not CONFIGS: | |
| raise RuntimeError("No adapter configs found in adapters.yaml or manifests/") | |
| # βββββ Hugging Face auth βββββ | |
| token = os.getenv("HF_TOKEN") | |
| if not token or token == "***": | |
| raise RuntimeError("HF_TOKEN secret is missing.") | |
| login(token) | |
| DATASET_REPO = os.environ["HF_DATASET_REPO"] | |
| api = HfApi() | |
| all_rows = [] | |
| # βββββ Safe tokenizer loading βββββ | |
| def load_tokenizer(model_id: str): | |
| try: | |
| return AutoTokenizer.from_pretrained(model_id, use_fast=True) | |
| except Exception as e1: | |
| print(f"Fast tokenizer failed for {model_id}: {e1}") | |
| try: | |
| return AutoTokenizer.from_pretrained(model_id, use_fast=False) | |
| except Exception as e2: | |
| raise RuntimeError(f"Failed to load tokenizer for {model_id}: {e2}") from e2 | |
| # βββββ Evaluate each adapter βββββ | |
| for cfg in CONFIGS: | |
| base_model_id = cfg["base_model"] | |
| adapter_repo = cfg["adapter_repo"] | |
| adapter_type = cfg.get("adapter_type", "LoRA") | |
| tasks = cfg["tasks"] | |
| print(f"\nLoading base model: {base_model_id}") | |
| tokenizer = load_tokenizer(base_model_id) | |
| if "llama" in base_model_id.lower(): | |
| try: | |
| tokenizer.legacy = False | |
| except: | |
| pass | |
| try: | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| base_model_id, | |
| device_map="auto", | |
| torch_dtype=torch.float16, | |
| trust_remote_code=True, | |
| use_safetensors=True | |
| ) | |
| is_encoder = False | |
| print("Loaded as Causal LM") | |
| except Exception as e: | |
| print(f"β οΈ Failed to load causal LM: {e}") | |
| base_model = AutoModelForSequenceClassification.from_pretrained( | |
| base_model_id, | |
| device_map="auto", | |
| torch_dtype=torch.float16, | |
| trust_remote_code=True, | |
| use_safetensors=True | |
| ) | |
| is_encoder = True | |
| print("Loaded as Sequence Classification model") | |
| try: | |
| info = model_info(adapter_repo) | |
| files = [f.rfilename for f in info.siblings] | |
| if "adapter_config.json" not in files: | |
| print(f"{adapter_repo} is not a valid PEFT adapter (missing adapter_config.json)") | |
| continue | |
| except Exception as e: | |
| print(f"Failed to inspect adapter {adapter_repo}: {e}") | |
| continue | |
| try: | |
| peft_model = PeftModel.from_pretrained( | |
| base_model, | |
| adapter_repo, | |
| device_map="auto", | |
| torch_dtype=torch.float16, | |
| ) | |
| merged_model = peft_model.merge_and_unload() | |
| except Exception as e: | |
| print(f"Failed to apply adapter {adapter_repo}: {e}") | |
| continue | |
| merged_model.eval() | |
| with tempfile.TemporaryDirectory() as td: | |
| merged_model.save_pretrained(td) | |
| tokenizer.save_pretrained(td) | |
| # Verify tokenizer object | |
| if not hasattr(tokenizer, "vocab_size"): | |
| print("Invalid tokenizer loaded. Skipping.") | |
| continue | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| hf_lm = HFLM( | |
| pretrained=td, | |
| batch_size=8 if not is_encoder else 16, | |
| device=device, | |
| ) | |
| try: | |
| res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks) | |
| print(f"Raw results for {adapter_repo}: {res}") | |
| if not res.get("results"): | |
| print(f"Empty results β likely a task or model compatibility issue for: {adapter_repo}") | |
| continue | |
| print(f"\nEvaluation raw result for {adapter_repo}:") | |
| print(res.get("results", {})) | |
| del merged_model | |
| del peft_model | |
| del base_model | |
| del tokenizer | |
| del hf_lm | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| torch.cuda.ipc_collect() | |
| except Exception as e: | |
| print(f"Evaluation failed for {adapter_repo}: {e}") | |
| continue | |
| meta = { | |
| "model_id": adapter_repo, | |
| "adapter_type": adapter_type, | |
| "trainable_params": cfg.get("trainable_params"), | |
| "peak_gpu_mem_mb": torch.cuda.max_memory_allocated() // 1024**2 if torch.cuda.is_available() else None, | |
| "run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"), | |
| "commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(), | |
| } | |
| count_before = len(all_rows) | |
| for task, scores in res["results"].items(): | |
| for metric, value in scores.items(): | |
| if value is None: | |
| continue | |
| metric_name, _, aggregation = metric.partition(",") | |
| all_rows.append({ | |
| **meta, | |
| "task": task, | |
| "metric": metric_name, | |
| "aggregation": aggregation or None, | |
| "value": value | |
| }) | |
| print(f"{len(all_rows) - count_before} rows added for {adapter_repo}") | |
| # βββββ Merge and upload results βββββ | |
| df_new = pd.DataFrame(all_rows) | |
| with tempfile.TemporaryDirectory() as tmp: | |
| current_path = hf_hub_download( | |
| repo_id=DATASET_REPO, | |
| filename="data/peft_bench.parquet", | |
| repo_type="dataset", | |
| cache_dir=tmp, | |
| local_dir=tmp, | |
| local_dir_use_symlinks=False, | |
| ) | |
| df_existing = pd.read_parquet(current_path) | |
| df_combined = pd.concat([df_existing, df_new], ignore_index=True) | |
| df_combined = df_combined.sort_values("run_date") | |
| df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce") | |
| print("\nFinal new results:") | |
| print(df_new[["model_id", "task", "metric", "aggregation", "value"]]) | |
| out = Path("peft_bench.parquet") | |
| df_combined.to_parquet(out, index=False) | |
| api.upload_file( | |
| path_or_fileobj=out, | |
| path_in_repo="data/peft_bench.parquet", | |
| repo_id=DATASET_REPO, | |
| repo_type="dataset", | |
| commit_message=f"Add {len(CONFIGS)} new adapter run(s)", | |
| ) | |