sandbox

Running on CPU Upgrade

App Files Files Community

sandbox / debug_models.py

ScottzillaSystems

Add model diagnostic script for Agent Zero debugging

3b60a91 verified 7 days ago

raw

history blame contribute delete

8.76 kB

	#!/usr/bin/env python3
	"""
	Agent Zero Model Diagnostics — Tests loading each model from the catalog.
	Run this on CPU to identify config/tokenizer issues before deploying to ZeroGPU.
	"""

	import os
	import sys
	import json
	import traceback
	from typing import Dict, Any

	# Install deps
	import subprocess
	subprocess.run([sys.executable, "-m", "pip", "install", "-q",
	"transformers>=4.52.0", "accelerate>=0.30.0", "torch", "huggingface-hub>=0.25.0"],
	capture_output=True)

	import torch
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	AutoProcessor,
	AutoModelForImageTextToText,
	AutoConfig,
	)
	from huggingface_hub import HfApi

	HF_TOKEN = os.getenv("HF_TOKEN")
	if not HF_TOKEN:
	print("❌ ERROR: HF_TOKEN not set!")
	sys.exit(1)

	print(f"✅ HF_TOKEN present (length: {len(HF_TOKEN)})")
	print(f"✅ PyTorch version: {torch.__version__}")
	print(f"✅ CUDA available: {torch.cuda.is_available()}")

	import transformers
	print(f"✅ Transformers version: {transformers.__version__}")

	# Model catalog
	MODELS = {
	"chatgpt5-494m": {
	"repo": "ScottzillaSystems/ChatGPT-5",
	"architecture": "causal_lm",
	"size": "494M",
	},
	"qwen3.5-9b-opus": {
	"repo": "ScottzillaSystems/Huihui-Qwen3.5-9B-Claude-4.6-Opus-abliterated",
	"architecture": "conditional_gen",
	"size": "9.6B",
	},
	"supergemma4-7.5b": {
	"repo": "ScottzillaSystems/supergemma4-e4b-abliterated",
	"architecture": "conditional_gen",
	"size": "7.5B",
	},
	"cydonia-24b": {
	"repo": "ScottzillaSystems/Cydonia-24B-v4.1",
	"architecture": "causal_lm",
	"size": "24B",
	},
	"qwen3.6-27b": {
	"repo": "ScottzillaSystems/Huihui-Qwen3.6-27B-abliterated",
	"architecture": "conditional_gen",
	"size": "27.8B",
	},
	"qwen3-vl-8b": {
	"repo": "ScottzillaSystems/Huihui-Qwen3-VL-8B-Instruct-abliterated",
	"architecture": "conditional_gen",
	"size": "8.8B",
	},
	"qwen3.5-9b-base": {
	"repo": "ScottzillaSystems/Qwen3.5-9B",
	"architecture": "conditional_gen",
	"size": "9.6B",
	},
	}

	results = {}

	print("\n" + "=" * 80)
	print("PHASE 1: Check model configs (no download, just metadata)")
	print("=" * 80)

	for key, model_info in MODELS.items():
	repo = model_info["repo"]
	print(f"\n{'─' * 60}")
	print(f"Testing: {key} ({repo})")
	print(f"{'─' * 60}")

	result = {"repo": repo, "config_ok": False, "tokenizer_ok": False,
	"chat_template_ok": False, "errors": []}

	# Test 1: Load config
	try:
	config = AutoConfig.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN)
	arch = config.architectures[0] if hasattr(config, 'architectures') and config.architectures else "unknown"
	model_type = getattr(config, 'model_type', 'unknown')
	print(f" ✅ Config loaded: arch={arch}, model_type={model_type}")
	result["config_ok"] = True
	result["architecture_actual"] = arch
	result["model_type"] = model_type
	except Exception as e:
	print(f" ❌ Config FAILED: {type(e).__name__}: {e}")
	result["errors"].append(f"Config: {type(e).__name__}: {e}")
	results[key] = result
	continue

	# Test 2: Load tokenizer/processor
	try:
	if model_info["architecture"] == "conditional_gen":
	tokenizer = AutoProcessor.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN)
	print(f" ✅ AutoProcessor loaded")
	else:
	tokenizer = AutoTokenizer.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	print(f" ✅ AutoTokenizer loaded")
	result["tokenizer_ok"] = True
	result["tokenizer_type"] = type(tokenizer).__name__
	except Exception as e:
	print(f" ❌ Tokenizer/Processor FAILED: {type(e).__name__}: {e}")
	traceback.print_exc()
	result["errors"].append(f"Tokenizer: {type(e).__name__}: {e}")

	# Try alternative loading
	print(f" 🔄 Trying alternative loading...")
	try:
	if model_info["architecture"] == "conditional_gen":
	tokenizer = AutoTokenizer.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN)
	print(f" ⚠️ AutoTokenizer works instead of AutoProcessor!")
	result["tokenizer_ok"] = True
	result["tokenizer_type"] = f"FALLBACK: {type(tokenizer).__name__}"
	result["errors"].append("AutoProcessor failed but AutoTokenizer works")
	else:
	tokenizer = AutoProcessor.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN)
	print(f" ⚠️ AutoProcessor works instead of AutoTokenizer!")
	result["tokenizer_ok"] = True
	result["tokenizer_type"] = f"FALLBACK: {type(tokenizer).__name__}"
	except Exception as e2:
	print(f" ❌ Alternative also FAILED: {type(e2).__name__}: {e2}")
	result["errors"].append(f"Alt tokenizer: {type(e2).__name__}: {e2}")

	# Test 3: Chat template
	if result["tokenizer_ok"]:
	try:
	test_messages = [
	{"role": "user", "content": "Hello, how are you?"}
	]
	text = tokenizer.apply_chat_template(
	test_messages, tokenize=False, add_generation_prompt=True
	)
	print(f" ✅ Chat template works (output length: {len(text)} chars)")
	print(f" First 200 chars: {repr(text[:200])}")
	result["chat_template_ok"] = True
	result["chat_template_sample"] = text[:200]
	except Exception as e:
	print(f" ❌ Chat template FAILED: {type(e).__name__}: {e}")
	traceback.print_exc()
	result["errors"].append(f"Chat template: {type(e).__name__}: {e}")

	# Test 4: Tokenization
	if result["tokenizer_ok"] and result["chat_template_ok"]:
	try:
	if model_info["architecture"] == "conditional_gen":
	inputs = tokenizer(text=[text], return_tensors="pt", padding=True)
	else:
	inputs = tokenizer(text, return_tensors="pt", padding=True)

	tensor_keys = [k for k in inputs.keys() if hasattr(inputs[k], 'shape')]
	for k in tensor_keys:
	print(f" ✅ Input '{k}': shape={inputs[k].shape}, dtype={inputs[k].dtype}")
	result["tokenization_ok"] = True
	except Exception as e:
	print(f" ❌ Tokenization FAILED: {type(e).__name__}: {e}")
	traceback.print_exc()
	result["errors"].append(f"Tokenization: {type(e).__name__}: {e}")
	result["tokenization_ok"] = False

	# Test 5: Check which Auto class would load this model
	try:
	# Detect which class transformers would use
	if arch in ["Qwen2ForCausalLM", "MistralForCausalLM", "LlamaForCausalLM"]:
	result["recommended_loader"] = "AutoModelForCausalLM"
	elif "ForConditionalGeneration" in arch or "ForImageTextToText" in arch:
	result["recommended_loader"] = "AutoModelForImageTextToText"
	else:
	result["recommended_loader"] = f"Unknown for {arch}"
	print(f" ℹ️ Recommended loader: {result['recommended_loader']}")
	except Exception as e:
	pass

	results[key] = result

	# Summary
	print("\n\n" + "=" * 80)
	print("SUMMARY")
	print("=" * 80)

	for key, r in results.items():
	status_parts = []
	if r["config_ok"]:
	status_parts.append("config✅")
	else:
	status_parts.append("config❌")
	if r.get("tokenizer_ok"):
	status_parts.append("tokenizer✅")
	else:
	status_parts.append("tokenizer❌")
	if r.get("chat_template_ok"):
	status_parts.append("chat_tmpl✅")
	else:
	status_parts.append("chat_tmpl❌")
	if r.get("tokenization_ok"):
	status_parts.append("tokenize✅")
	else:
	status_parts.append("tokenize❌")

	status = " \| ".join(status_parts)
	emoji = "✅" if all([r["config_ok"], r.get("tokenizer_ok"), r.get("chat_template_ok"), r.get("tokenization_ok")]) else "❌"
	print(f" {emoji} {key}: {status}")
	if r.get("errors"):
	for err in r["errors"]:
	print(f" └─ {err}")
	if r.get("recommended_loader"):
	print(f" └─ Loader: {r['recommended_loader']}")

	# Dump full results as JSON
	print("\n\n" + "=" * 80)
	print("FULL RESULTS JSON:")
	print("=" * 80)
	print(json.dumps(results, indent=2, default=str))