sandbox

Paused

App Files Files Community

ScottzillaSystems commited on May 2

Commit

3b60a91

verified ·

1 Parent(s): bbb9941

Add model diagnostic script for Agent Zero debugging

Browse files

Files changed (1) hide show

debug_models.py +233 -0

debug_models.py ADDED Viewed

	@@ -0,0 +1,233 @@

+#!/usr/bin/env python3
+"""
+Agent Zero Model Diagnostics — Tests loading each model from the catalog.
+Run this on CPU to identify config/tokenizer issues before deploying to ZeroGPU.
+"""
+import os
+import sys
+import json
+import traceback
+from typing import Dict, Any
+# Install deps
+import subprocess
+subprocess.run([sys.executable, "-m", "pip", "install", "-q",
+                "transformers>=4.52.0", "accelerate>=0.30.0", "torch", "huggingface-hub>=0.25.0"],
+               capture_output=True)
+import torch
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    AutoProcessor,
+    AutoModelForImageTextToText,
+    AutoConfig,
+)
+from huggingface_hub import HfApi
+HF_TOKEN = os.getenv("HF_TOKEN")
+if not HF_TOKEN:
+    print("❌ ERROR: HF_TOKEN not set!")
+    sys.exit(1)
+print(f"✅ HF_TOKEN present (length: {len(HF_TOKEN)})")
+print(f"✅ PyTorch version: {torch.__version__}")
+print(f"✅ CUDA available: {torch.cuda.is_available()}")
+import transformers
+print(f"✅ Transformers version: {transformers.__version__}")
+# Model catalog
+MODELS = {
+    "chatgpt5-494m": {
+        "repo": "ScottzillaSystems/ChatGPT-5",
+        "architecture": "causal_lm",
+        "size": "494M",
+    },
+    "qwen3.5-9b-opus": {
+        "repo": "ScottzillaSystems/Huihui-Qwen3.5-9B-Claude-4.6-Opus-abliterated",
+        "architecture": "conditional_gen",
+        "size": "9.6B",
+    },
+    "supergemma4-7.5b": {
+        "repo": "ScottzillaSystems/supergemma4-e4b-abliterated",
+        "architecture": "conditional_gen",
+        "size": "7.5B",
+    },
+    "cydonia-24b": {
+        "repo": "ScottzillaSystems/Cydonia-24B-v4.1",
+        "architecture": "causal_lm",
+        "size": "24B",
+    },
+    "qwen3.6-27b": {
+        "repo": "ScottzillaSystems/Huihui-Qwen3.6-27B-abliterated",
+        "architecture": "conditional_gen",
+        "size": "27.8B",
+    },
+    "qwen3-vl-8b": {
+        "repo": "ScottzillaSystems/Huihui-Qwen3-VL-8B-Instruct-abliterated",
+        "architecture": "conditional_gen",
+        "size": "8.8B",
+    },
+    "qwen3.5-9b-base": {
+        "repo": "ScottzillaSystems/Qwen3.5-9B",
+        "architecture": "conditional_gen",
+        "size": "9.6B",
+    },
+}
+results = {}
+print("\n" + "=" * 80)
+print("PHASE 1: Check model configs (no download, just metadata)")
+print("=" * 80)
+for key, model_info in MODELS.items():
+    repo = model_info["repo"]
+    print(f"\n{'─' * 60}")
+    print(f"Testing: {key} ({repo})")
+    print(f"{'─' * 60}")
+    result = {"repo": repo, "config_ok": False, "tokenizer_ok": False,
+              "chat_template_ok": False, "errors": []}
+    # Test 1: Load config
+    try:
+        config = AutoConfig.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN)
+        arch = config.architectures[0] if hasattr(config, 'architectures') and config.architectures else "unknown"
+        model_type = getattr(config, 'model_type', 'unknown')
+        print(f"  ✅ Config loaded: arch={arch}, model_type={model_type}")
+        result["config_ok"] = True
+        result["architecture_actual"] = arch
+        result["model_type"] = model_type
+    except Exception as e:
+        print(f"  ❌ Config FAILED: {type(e).__name__}: {e}")
+        result["errors"].append(f"Config: {type(e).__name__}: {e}")
+        results[key] = result
+        continue
+    # Test 2: Load tokenizer/processor
+    try:
+        if model_info["architecture"] == "conditional_gen":
+            tokenizer = AutoProcessor.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN)
+            print(f"  ✅ AutoProcessor loaded")
+        else:
+            tokenizer = AutoTokenizer.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN)
+            if tokenizer.pad_token is None:
+                tokenizer.pad_token = tokenizer.eos_token
+            print(f"  ✅ AutoTokenizer loaded")
+        result["tokenizer_ok"] = True
+        result["tokenizer_type"] = type(tokenizer).__name__
+    except Exception as e:
+        print(f"  ❌ Tokenizer/Processor FAILED: {type(e).__name__}: {e}")
+        traceback.print_exc()
+        result["errors"].append(f"Tokenizer: {type(e).__name__}: {e}")
+        # Try alternative loading
+        print(f"  🔄 Trying alternative loading...")
+        try:
+            if model_info["architecture"] == "conditional_gen":
+                tokenizer = AutoTokenizer.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN)
+                print(f"  ⚠️ AutoTokenizer works instead of AutoProcessor!")
+                result["tokenizer_ok"] = True
+                result["tokenizer_type"] = f"FALLBACK: {type(tokenizer).__name__}"
+                result["errors"].append("AutoProcessor failed but AutoTokenizer works")
+            else:
+                tokenizer = AutoProcessor.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN)
+                print(f"  ⚠️ AutoProcessor works instead of AutoTokenizer!")
+                result["tokenizer_ok"] = True
+                result["tokenizer_type"] = f"FALLBACK: {type(tokenizer).__name__}"
+        except Exception as e2:
+            print(f"  ❌ Alternative also FAILED: {type(e2).__name__}: {e2}")
+            result["errors"].append(f"Alt tokenizer: {type(e2).__name__}: {e2}")
+    # Test 3: Chat template
+    if result["tokenizer_ok"]:
+        try:
+            test_messages = [
+                {"role": "user", "content": "Hello, how are you?"}
+            ]
+            text = tokenizer.apply_chat_template(
+                test_messages, tokenize=False, add_generation_prompt=True
+            )
+            print(f"  ✅ Chat template works (output length: {len(text)} chars)")
+            print(f"     First 200 chars: {repr(text[:200])}")
+            result["chat_template_ok"] = True
+            result["chat_template_sample"] = text[:200]
+        except Exception as e:
+            print(f"  ❌ Chat template FAILED: {type(e).__name__}: {e}")
+            traceback.print_exc()
+            result["errors"].append(f"Chat template: {type(e).__name__}: {e}")
+    # Test 4: Tokenization
+    if result["tokenizer_ok"] and result["chat_template_ok"]:
+        try:
+            if model_info["architecture"] == "conditional_gen":
+                inputs = tokenizer(text=[text], return_tensors="pt", padding=True)
+            else:
+                inputs = tokenizer(text, return_tensors="pt", padding=True)
+            tensor_keys = [k for k in inputs.keys() if hasattr(inputs[k], 'shape')]
+            for k in tensor_keys:
+                print(f"  ✅ Input '{k}': shape={inputs[k].shape}, dtype={inputs[k].dtype}")
+            result["tokenization_ok"] = True
+        except Exception as e:
+            print(f"  ❌ Tokenization FAILED: {type(e).__name__}: {e}")
+            traceback.print_exc()
+            result["errors"].append(f"Tokenization: {type(e).__name__}: {e}")
+            result["tokenization_ok"] = False
+    # Test 5: Check which Auto class would load this model
+    try:
+        # Detect which class transformers would use
+        if arch in ["Qwen2ForCausalLM", "MistralForCausalLM", "LlamaForCausalLM"]:
+            result["recommended_loader"] = "AutoModelForCausalLM"
+        elif "ForConditionalGeneration" in arch or "ForImageTextToText" in arch:
+            result["recommended_loader"] = "AutoModelForImageTextToText"
+        else:
+            result["recommended_loader"] = f"Unknown for {arch}"
+        print(f"  ℹ️ Recommended loader: {result['recommended_loader']}")
+    except Exception as e:
+        pass
+    results[key] = result
+# Summary
+print("\n\n" + "=" * 80)
+print("SUMMARY")
+print("=" * 80)
+for key, r in results.items():
+    status_parts = []
+    if r["config_ok"]:
+        status_parts.append("config✅")
+    else:
+        status_parts.append("config❌")
+    if r.get("tokenizer_ok"):
+        status_parts.append("tokenizer✅")
+    else:
+        status_parts.append("tokenizer❌")
+    if r.get("chat_template_ok"):
+        status_parts.append("chat_tmpl✅")
+    else:
+        status_parts.append("chat_tmpl❌")
+    if r.get("tokenization_ok"):
+        status_parts.append("tokenize✅")
+    else:
+        status_parts.append("tokenize❌")
+    status = " | ".join(status_parts)
+    emoji = "✅" if all([r["config_ok"], r.get("tokenizer_ok"), r.get("chat_template_ok"), r.get("tokenization_ok")]) else "❌"
+    print(f"  {emoji} {key}: {status}")
+    if r.get("errors"):
+        for err in r["errors"]:
+            print(f"      └─ {err}")
+    if r.get("recommended_loader"):
+        print(f"      └─ Loader: {r['recommended_loader']}")
+# Dump full results as JSON
+print("\n\n" + "=" * 80)
+print("FULL RESULTS JSON:")
+print("=" * 80)
+print(json.dumps(results, indent=2, default=str))