Javad Taghia commited on
Commit
9ed2e4d
·
1 Parent(s): b6094c3

Add microscope config-only inspector

Browse files
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ __pycache__/
2
+ .DS_Store
3
+ .venv/
4
+ microscope/.venv/
5
+ microscope/__pycache__/
microscope/README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Microscope tooling
2
+
3
+ Small utilities to inspect this repository’s Diffusers checkpoint without leaving the repo.
4
+
5
+ ## Setup
6
+
7
+ From the repo root:
8
+
9
+ ```bash
10
+ cd microscope
11
+ python3 -m venv .venv
12
+ source .venv/bin/activate
13
+ pip install -U pip
14
+ pip install -r requirements.txt
15
+ ```
16
+
17
+ ## Usage
18
+
19
+ Inspect the local model (default: one directory up from this folder) without importing PyTorch/Diffusers:
20
+
21
+ ```bash
22
+ python inspect_model.py --model-dir .. --config-only
23
+ ```
24
+
25
+ Include parameter counts by scanning `*.safetensors` headers (still does not load weights into RAM):
26
+
27
+ ```bash
28
+ python inspect_model.py --model-dir .. --config-only --params
29
+ ```
30
+
31
+ Write a machine-readable summary:
32
+
33
+ ```bash
34
+ python inspect_model.py --model-dir .. --config-only --params --json-out model_summary.json
35
+ ```
36
+
37
+ Flags:
38
+
39
+ - `--model-dir`: path to the Diffusers pipeline (default: `..`).
40
+ - `--config-only`: read JSON configs and print a summary (recommended).
41
+ - `--params`: count parameters from `*.safetensors` metadata (no tensor loading).
42
+ - `--json-out`: write a JSON summary to this path.
microscope/inspect_model.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Repository model inspector.
4
+
5
+ This script is designed to work in `--config-only` mode without importing
6
+ PyTorch/Diffusers/Transformers. It reads JSON configs from a local Diffusers
7
+ repository layout and prints a summary.
8
+
9
+ With `--params`, it can also compute parameter counts by scanning
10
+ `*.safetensors` headers (without loading tensor data into RAM).
11
+ """
12
+
13
+ import argparse
14
+ import json
15
+ import math
16
+ from pathlib import Path
17
+ from typing import Any, Dict, Iterable, Optional
18
+
19
+
20
+ def load_json(path: Path) -> Dict[str, Any]:
21
+ return json.loads(path.read_text(encoding="utf-8"))
22
+
23
+
24
+ def human_params(value: Optional[int]) -> str:
25
+ if value is None:
26
+ return "n/a"
27
+ if value >= 1_000_000_000:
28
+ return f"{value/1e9:.2f}B"
29
+ return f"{value/1e6:.2f}M"
30
+
31
+
32
+ def read_model_index(model_dir: Path) -> Dict[str, Any]:
33
+ idx_path = model_dir / "model_index.json"
34
+ if not idx_path.exists():
35
+ return {}
36
+ return load_json(idx_path)
37
+
38
+
39
+ def describe_model_index(model_index: Dict[str, Any]) -> None:
40
+ if not model_index:
41
+ return
42
+ print("Pipeline pieces (model_index.json):")
43
+ for key, val in model_index.items():
44
+ if key.startswith("_"):
45
+ continue
46
+ print(f" {key:14s} -> {val}")
47
+ print()
48
+
49
+
50
+ def detect_pipeline_kind(model_index: Dict[str, Any]) -> str:
51
+ cls = str(model_index.get("_class_name", "")).lower()
52
+ if "zimage" in cls or ("transformer" in model_index and "unet" not in model_index):
53
+ return "zimage"
54
+ if "stable" in cls or "unet" in model_index:
55
+ return "sdxl_like"
56
+ return "unknown"
57
+
58
+
59
+ def iter_safetensors_files(directory: Path) -> Iterable[Path]:
60
+ if not directory.exists():
61
+ return []
62
+ return sorted(p for p in directory.iterdir() if p.is_file() and p.suffix == ".safetensors")
63
+
64
+
65
+ def count_params_from_safetensors(files: Iterable[Path]) -> int:
66
+ from safetensors import safe_open
67
+
68
+ total = 0
69
+ for file in files:
70
+ with safe_open(str(file), framework="np") as f:
71
+ for key in f.keys():
72
+ shape = f.get_slice(key).get_shape()
73
+ total += math.prod(shape)
74
+ return int(total)
75
+
76
+
77
+ def zimage_config_only_summary(model_dir: Path, include_params: bool) -> Dict[str, Any]:
78
+ model_index = read_model_index(model_dir)
79
+
80
+ te_cfg_path = model_dir / "text_encoder" / "config.json"
81
+ transformer_cfg_path = model_dir / "transformer" / "config.json"
82
+ vae_cfg_path = model_dir / "vae" / "config.json"
83
+ scheduler_cfg_path = model_dir / "scheduler" / "scheduler_config.json"
84
+
85
+ te_cfg = load_json(te_cfg_path) if te_cfg_path.exists() else {}
86
+ transformer_cfg = load_json(transformer_cfg_path) if transformer_cfg_path.exists() else {}
87
+ vae_cfg = load_json(vae_cfg_path) if vae_cfg_path.exists() else {}
88
+ scheduler_cfg = load_json(scheduler_cfg_path) if scheduler_cfg_path.exists() else {}
89
+
90
+ text_encoder_params = None
91
+ transformer_params = None
92
+ vae_params = None
93
+ if include_params:
94
+ text_encoder_params = count_params_from_safetensors(iter_safetensors_files(model_dir / "text_encoder"))
95
+ transformer_params = count_params_from_safetensors(iter_safetensors_files(model_dir / "transformer"))
96
+ vae_params = count_params_from_safetensors(iter_safetensors_files(model_dir / "vae"))
97
+
98
+ print("[Text encoder]")
99
+ if te_cfg:
100
+ arch = te_cfg.get("architectures", [])
101
+ arch_name = arch[0] if isinstance(arch, list) and arch else "n/a"
102
+ print(f" architecture={arch_name}")
103
+ print(
104
+ " "
105
+ f"layers={te_cfg.get('num_hidden_layers', 'n/a')}, "
106
+ f"hidden={te_cfg.get('hidden_size', 'n/a')}, "
107
+ f"heads={te_cfg.get('num_attention_heads', 'n/a')}, "
108
+ f"intermediate={te_cfg.get('intermediate_size', 'n/a')}"
109
+ )
110
+ print(f" vocab={te_cfg.get('vocab_size', 'n/a')}, max_positions={te_cfg.get('max_position_embeddings', 'n/a')}")
111
+ else:
112
+ print(" [warn] missing text_encoder/config.json")
113
+ print(f" params={human_params(text_encoder_params)}")
114
+ print()
115
+
116
+ print("[Transformer]")
117
+ if transformer_cfg:
118
+ print(f" class={transformer_cfg.get('_class_name', 'n/a')}")
119
+ print(
120
+ " "
121
+ f"dim={transformer_cfg.get('dim', 'n/a')}, "
122
+ f"layers={transformer_cfg.get('n_layers', 'n/a')}, "
123
+ f"heads={transformer_cfg.get('n_heads', 'n/a')}"
124
+ )
125
+ print(f" in_channels={transformer_cfg.get('in_channels', 'n/a')}, cap_feat_dim={transformer_cfg.get('cap_feat_dim', 'n/a')}")
126
+ print(f" patch_size={transformer_cfg.get('all_patch_size', 'n/a')}, f_patch_size={transformer_cfg.get('all_f_patch_size', 'n/a')}")
127
+ else:
128
+ print(" [warn] missing transformer/config.json")
129
+ print(f" params={human_params(transformer_params)}")
130
+ print()
131
+
132
+ print("[VAE]")
133
+ if vae_cfg:
134
+ print(f" class={vae_cfg.get('_class_name', 'n/a')}")
135
+ print(
136
+ " "
137
+ f"sample_size={vae_cfg.get('sample_size', 'n/a')}, "
138
+ f"in_channels={vae_cfg.get('in_channels', 'n/a')}, "
139
+ f"latent_channels={vae_cfg.get('latent_channels', 'n/a')}, "
140
+ f"out_channels={vae_cfg.get('out_channels', 'n/a')}"
141
+ )
142
+ print(f" block_out_channels={vae_cfg.get('block_out_channels', 'n/a')}, scaling_factor={vae_cfg.get('scaling_factor', 'n/a')}")
143
+ else:
144
+ print(" [warn] missing vae/config.json")
145
+ print(f" params={human_params(vae_params)}")
146
+ print()
147
+
148
+ print("[Scheduler]")
149
+ if scheduler_cfg:
150
+ print(
151
+ " "
152
+ f"class={scheduler_cfg.get('_class_name', 'n/a')}, "
153
+ f"timesteps={scheduler_cfg.get('num_train_timesteps', 'n/a')}, "
154
+ f"shift={scheduler_cfg.get('shift', 'n/a')}"
155
+ )
156
+ else:
157
+ print(" [warn] missing scheduler/scheduler_config.json")
158
+ print()
159
+
160
+ return {
161
+ "kind": "zimage",
162
+ "pipeline": model_index,
163
+ "text_encoder": {"config": te_cfg, "params": text_encoder_params},
164
+ "transformer": {"config": transformer_cfg, "params": transformer_params},
165
+ "vae": {"config": vae_cfg, "params": vae_params},
166
+ "scheduler": {"config": scheduler_cfg},
167
+ }
168
+
169
+
170
+ def main() -> None:
171
+ parser = argparse.ArgumentParser(description="Inspect a local Diffusers-style repository layout.")
172
+ parser.add_argument("--model-dir", type=Path, default=Path(".."), help="Path to the diffusers pipeline directory.")
173
+ parser.add_argument("--device", default="cpu", help="Unused (kept for CLI compatibility).")
174
+ parser.add_argument("--fp16", action="store_true", help="Unused (kept for CLI compatibility).")
175
+ parser.add_argument("--config-only", action="store_true", help="Read JSON configs and print a summary.")
176
+ parser.add_argument("--params", action="store_true", help="Count parameters from *.safetensors headers (no tensor loading).")
177
+ parser.add_argument("--json-out", type=Path, default=None, help="Write a JSON summary to this path.")
178
+ args = parser.parse_args()
179
+
180
+ model_index = read_model_index(args.model_dir)
181
+ if not model_index:
182
+ raise SystemExit(f"model_index.json not found under {args.model_dir}")
183
+
184
+ describe_model_index(model_index)
185
+ kind = detect_pipeline_kind(model_index)
186
+
187
+ if not args.config_only:
188
+ raise SystemExit("Only --config-only mode is supported by this inspector.")
189
+
190
+ if kind != "zimage":
191
+ raise SystemExit(f"Unsupported pipeline kind: {kind} (expected ZImagePipeline-style layout)")
192
+
193
+ summary = zimage_config_only_summary(args.model_dir, include_params=args.params)
194
+
195
+ if args.json_out is not None:
196
+ args.json_out.parent.mkdir(parents=True, exist_ok=True)
197
+ args.json_out.write_text(json.dumps(summary, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
198
+ print(f"[info] wrote JSON summary to {args.json_out}")
199
+
200
+
201
+ if __name__ == "__main__":
202
+ main()
microscope/model_summary.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "kind": "zimage",
3
+ "pipeline": {
4
+ "_class_name": "ZImagePipeline",
5
+ "_diffusers_version": "0.36.0.dev0",
6
+ "scheduler": [
7
+ "diffusers",
8
+ "FlowMatchEulerDiscreteScheduler"
9
+ ],
10
+ "text_encoder": [
11
+ "transformers",
12
+ "Qwen3Model"
13
+ ],
14
+ "tokenizer": [
15
+ "transformers",
16
+ "Qwen2Tokenizer"
17
+ ],
18
+ "transformer": [
19
+ "diffusers",
20
+ "ZImageTransformer2DModel"
21
+ ],
22
+ "vae": [
23
+ "diffusers",
24
+ "AutoencoderKL"
25
+ ]
26
+ },
27
+ "text_encoder": {
28
+ "config": {
29
+ "architectures": [
30
+ "Qwen3ForCausalLM"
31
+ ],
32
+ "attention_bias": false,
33
+ "attention_dropout": 0.0,
34
+ "bos_token_id": 151643,
35
+ "eos_token_id": 151645,
36
+ "head_dim": 128,
37
+ "hidden_act": "silu",
38
+ "hidden_size": 2560,
39
+ "initializer_range": 0.02,
40
+ "intermediate_size": 9728,
41
+ "max_position_embeddings": 40960,
42
+ "max_window_layers": 36,
43
+ "model_type": "qwen3",
44
+ "num_attention_heads": 32,
45
+ "num_hidden_layers": 36,
46
+ "num_key_value_heads": 8,
47
+ "rms_norm_eps": 1e-06,
48
+ "rope_scaling": null,
49
+ "rope_theta": 1000000,
50
+ "sliding_window": null,
51
+ "tie_word_embeddings": true,
52
+ "torch_dtype": "bfloat16",
53
+ "transformers_version": "4.51.0",
54
+ "use_cache": true,
55
+ "use_sliding_window": false,
56
+ "vocab_size": 151936
57
+ },
58
+ "params": 4022468096
59
+ },
60
+ "transformer": {
61
+ "config": {
62
+ "_class_name": "ZImageTransformer2DModel",
63
+ "_diffusers_version": "0.36.0.dev0",
64
+ "all_f_patch_size": [
65
+ 1
66
+ ],
67
+ "all_patch_size": [
68
+ 2
69
+ ],
70
+ "axes_dims": [
71
+ 32,
72
+ 48,
73
+ 48
74
+ ],
75
+ "axes_lens": [
76
+ 1536,
77
+ 512,
78
+ 512
79
+ ],
80
+ "cap_feat_dim": 2560,
81
+ "dim": 3840,
82
+ "in_channels": 16,
83
+ "n_heads": 30,
84
+ "n_kv_heads": 30,
85
+ "n_layers": 30,
86
+ "n_refiner_layers": 2,
87
+ "norm_eps": 1e-05,
88
+ "qk_norm": true,
89
+ "rope_theta": 256.0,
90
+ "t_scale": 1000.0
91
+ },
92
+ "params": 6154908736
93
+ },
94
+ "vae": {
95
+ "config": {
96
+ "_class_name": "AutoencoderKL",
97
+ "_diffusers_version": "0.36.0.dev0",
98
+ "_name_or_path": "flux-dev",
99
+ "act_fn": "silu",
100
+ "block_out_channels": [
101
+ 128,
102
+ 256,
103
+ 512,
104
+ 512
105
+ ],
106
+ "down_block_types": [
107
+ "DownEncoderBlock2D",
108
+ "DownEncoderBlock2D",
109
+ "DownEncoderBlock2D",
110
+ "DownEncoderBlock2D"
111
+ ],
112
+ "force_upcast": true,
113
+ "in_channels": 3,
114
+ "latent_channels": 16,
115
+ "latents_mean": null,
116
+ "latents_std": null,
117
+ "layers_per_block": 2,
118
+ "mid_block_add_attention": true,
119
+ "norm_num_groups": 32,
120
+ "out_channels": 3,
121
+ "sample_size": 1024,
122
+ "scaling_factor": 0.3611,
123
+ "shift_factor": 0.1159,
124
+ "up_block_types": [
125
+ "UpDecoderBlock2D",
126
+ "UpDecoderBlock2D",
127
+ "UpDecoderBlock2D",
128
+ "UpDecoderBlock2D"
129
+ ],
130
+ "use_post_quant_conv": false,
131
+ "use_quant_conv": false
132
+ },
133
+ "params": 83819683
134
+ },
135
+ "scheduler": {
136
+ "config": {
137
+ "_class_name": "FlowMatchEulerDiscreteScheduler",
138
+ "_diffusers_version": "0.36.0.dev0",
139
+ "num_train_timesteps": 1000,
140
+ "use_dynamic_shifting": false,
141
+ "shift": 3.0
142
+ }
143
+ }
144
+ }
microscope/requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ safetensors>=0.4.2