Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| #!/usr/bin/env python3 | |
| """ | |
| Agent Zero Model Diagnostics — Tests loading each model from the catalog. | |
| Run this on CPU to identify config/tokenizer issues before deploying to ZeroGPU. | |
| """ | |
| import os | |
| import sys | |
| import json | |
| import traceback | |
| from typing import Dict, Any | |
| # Install deps | |
| import subprocess | |
| subprocess.run([sys.executable, "-m", "pip", "install", "-q", | |
| "transformers>=4.52.0", "accelerate>=0.30.0", "torch", "huggingface-hub>=0.25.0"], | |
| capture_output=True) | |
| import torch | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| AutoProcessor, | |
| AutoModelForImageTextToText, | |
| AutoConfig, | |
| ) | |
| from huggingface_hub import HfApi | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| if not HF_TOKEN: | |
| print("❌ ERROR: HF_TOKEN not set!") | |
| sys.exit(1) | |
| print(f"✅ HF_TOKEN present (length: {len(HF_TOKEN)})") | |
| print(f"✅ PyTorch version: {torch.__version__}") | |
| print(f"✅ CUDA available: {torch.cuda.is_available()}") | |
| import transformers | |
| print(f"✅ Transformers version: {transformers.__version__}") | |
| # Model catalog | |
| MODELS = { | |
| "chatgpt5-494m": { | |
| "repo": "ScottzillaSystems/ChatGPT-5", | |
| "architecture": "causal_lm", | |
| "size": "494M", | |
| }, | |
| "qwen3.5-9b-opus": { | |
| "repo": "ScottzillaSystems/Huihui-Qwen3.5-9B-Claude-4.6-Opus-abliterated", | |
| "architecture": "conditional_gen", | |
| "size": "9.6B", | |
| }, | |
| "supergemma4-7.5b": { | |
| "repo": "ScottzillaSystems/supergemma4-e4b-abliterated", | |
| "architecture": "conditional_gen", | |
| "size": "7.5B", | |
| }, | |
| "cydonia-24b": { | |
| "repo": "ScottzillaSystems/Cydonia-24B-v4.1", | |
| "architecture": "causal_lm", | |
| "size": "24B", | |
| }, | |
| "qwen3.6-27b": { | |
| "repo": "ScottzillaSystems/Huihui-Qwen3.6-27B-abliterated", | |
| "architecture": "conditional_gen", | |
| "size": "27.8B", | |
| }, | |
| "qwen3-vl-8b": { | |
| "repo": "ScottzillaSystems/Huihui-Qwen3-VL-8B-Instruct-abliterated", | |
| "architecture": "conditional_gen", | |
| "size": "8.8B", | |
| }, | |
| "qwen3.5-9b-base": { | |
| "repo": "ScottzillaSystems/Qwen3.5-9B", | |
| "architecture": "conditional_gen", | |
| "size": "9.6B", | |
| }, | |
| } | |
| results = {} | |
| print("\n" + "=" * 80) | |
| print("PHASE 1: Check model configs (no download, just metadata)") | |
| print("=" * 80) | |
| for key, model_info in MODELS.items(): | |
| repo = model_info["repo"] | |
| print(f"\n{'─' * 60}") | |
| print(f"Testing: {key} ({repo})") | |
| print(f"{'─' * 60}") | |
| result = {"repo": repo, "config_ok": False, "tokenizer_ok": False, | |
| "chat_template_ok": False, "errors": []} | |
| # Test 1: Load config | |
| try: | |
| config = AutoConfig.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN) | |
| arch = config.architectures[0] if hasattr(config, 'architectures') and config.architectures else "unknown" | |
| model_type = getattr(config, 'model_type', 'unknown') | |
| print(f" ✅ Config loaded: arch={arch}, model_type={model_type}") | |
| result["config_ok"] = True | |
| result["architecture_actual"] = arch | |
| result["model_type"] = model_type | |
| except Exception as e: | |
| print(f" ❌ Config FAILED: {type(e).__name__}: {e}") | |
| result["errors"].append(f"Config: {type(e).__name__}: {e}") | |
| results[key] = result | |
| continue | |
| # Test 2: Load tokenizer/processor | |
| try: | |
| if model_info["architecture"] == "conditional_gen": | |
| tokenizer = AutoProcessor.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN) | |
| print(f" ✅ AutoProcessor loaded") | |
| else: | |
| tokenizer = AutoTokenizer.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| print(f" ✅ AutoTokenizer loaded") | |
| result["tokenizer_ok"] = True | |
| result["tokenizer_type"] = type(tokenizer).__name__ | |
| except Exception as e: | |
| print(f" ❌ Tokenizer/Processor FAILED: {type(e).__name__}: {e}") | |
| traceback.print_exc() | |
| result["errors"].append(f"Tokenizer: {type(e).__name__}: {e}") | |
| # Try alternative loading | |
| print(f" 🔄 Trying alternative loading...") | |
| try: | |
| if model_info["architecture"] == "conditional_gen": | |
| tokenizer = AutoTokenizer.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN) | |
| print(f" ⚠️ AutoTokenizer works instead of AutoProcessor!") | |
| result["tokenizer_ok"] = True | |
| result["tokenizer_type"] = f"FALLBACK: {type(tokenizer).__name__}" | |
| result["errors"].append("AutoProcessor failed but AutoTokenizer works") | |
| else: | |
| tokenizer = AutoProcessor.from_pretrained(repo, trust_remote_code=True, token=HF_TOKEN) | |
| print(f" ⚠️ AutoProcessor works instead of AutoTokenizer!") | |
| result["tokenizer_ok"] = True | |
| result["tokenizer_type"] = f"FALLBACK: {type(tokenizer).__name__}" | |
| except Exception as e2: | |
| print(f" ❌ Alternative also FAILED: {type(e2).__name__}: {e2}") | |
| result["errors"].append(f"Alt tokenizer: {type(e2).__name__}: {e2}") | |
| # Test 3: Chat template | |
| if result["tokenizer_ok"]: | |
| try: | |
| test_messages = [ | |
| {"role": "user", "content": "Hello, how are you?"} | |
| ] | |
| text = tokenizer.apply_chat_template( | |
| test_messages, tokenize=False, add_generation_prompt=True | |
| ) | |
| print(f" ✅ Chat template works (output length: {len(text)} chars)") | |
| print(f" First 200 chars: {repr(text[:200])}") | |
| result["chat_template_ok"] = True | |
| result["chat_template_sample"] = text[:200] | |
| except Exception as e: | |
| print(f" ❌ Chat template FAILED: {type(e).__name__}: {e}") | |
| traceback.print_exc() | |
| result["errors"].append(f"Chat template: {type(e).__name__}: {e}") | |
| # Test 4: Tokenization | |
| if result["tokenizer_ok"] and result["chat_template_ok"]: | |
| try: | |
| if model_info["architecture"] == "conditional_gen": | |
| inputs = tokenizer(text=[text], return_tensors="pt", padding=True) | |
| else: | |
| inputs = tokenizer(text, return_tensors="pt", padding=True) | |
| tensor_keys = [k for k in inputs.keys() if hasattr(inputs[k], 'shape')] | |
| for k in tensor_keys: | |
| print(f" ✅ Input '{k}': shape={inputs[k].shape}, dtype={inputs[k].dtype}") | |
| result["tokenization_ok"] = True | |
| except Exception as e: | |
| print(f" ❌ Tokenization FAILED: {type(e).__name__}: {e}") | |
| traceback.print_exc() | |
| result["errors"].append(f"Tokenization: {type(e).__name__}: {e}") | |
| result["tokenization_ok"] = False | |
| # Test 5: Check which Auto class would load this model | |
| try: | |
| # Detect which class transformers would use | |
| if arch in ["Qwen2ForCausalLM", "MistralForCausalLM", "LlamaForCausalLM"]: | |
| result["recommended_loader"] = "AutoModelForCausalLM" | |
| elif "ForConditionalGeneration" in arch or "ForImageTextToText" in arch: | |
| result["recommended_loader"] = "AutoModelForImageTextToText" | |
| else: | |
| result["recommended_loader"] = f"Unknown for {arch}" | |
| print(f" ℹ️ Recommended loader: {result['recommended_loader']}") | |
| except Exception as e: | |
| pass | |
| results[key] = result | |
| # Summary | |
| print("\n\n" + "=" * 80) | |
| print("SUMMARY") | |
| print("=" * 80) | |
| for key, r in results.items(): | |
| status_parts = [] | |
| if r["config_ok"]: | |
| status_parts.append("config✅") | |
| else: | |
| status_parts.append("config❌") | |
| if r.get("tokenizer_ok"): | |
| status_parts.append("tokenizer✅") | |
| else: | |
| status_parts.append("tokenizer❌") | |
| if r.get("chat_template_ok"): | |
| status_parts.append("chat_tmpl✅") | |
| else: | |
| status_parts.append("chat_tmpl❌") | |
| if r.get("tokenization_ok"): | |
| status_parts.append("tokenize✅") | |
| else: | |
| status_parts.append("tokenize❌") | |
| status = " | ".join(status_parts) | |
| emoji = "✅" if all([r["config_ok"], r.get("tokenizer_ok"), r.get("chat_template_ok"), r.get("tokenization_ok")]) else "❌" | |
| print(f" {emoji} {key}: {status}") | |
| if r.get("errors"): | |
| for err in r["errors"]: | |
| print(f" └─ {err}") | |
| if r.get("recommended_loader"): | |
| print(f" └─ Loader: {r['recommended_loader']}") | |
| # Dump full results as JSON | |
| print("\n\n" + "=" * 80) | |
| print("FULL RESULTS JSON:") | |
| print("=" * 80) | |
| print(json.dumps(results, indent=2, default=str)) | |