model-weight-inspector

Runtime error

App Files Files Community

yujiepan commited on Feb 14

Commit

5e22042

1 Parent(s): 6dc6bb2

Fix FP8 dtype support and real-time stderr logging

Browse files

Files changed (1) hide show

app.py +63 -52

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import shutil
 import io
 import time
 import threading
-from contextlib import redirect_stderr
 import gradio as gr
 import torch
@@ -18,61 +18,55 @@ def get_param(model_id: str, param_key: str, log_buffer: io.StringIO, progress:
     """
     Download and return a specific parameter tensor from a Hugging Face model.
     """
-    # Try to download the index file (for sharded models)
     try:
-        log_buffer.write(f"📥 Downloading index file for {model_id}...\n")
-        progress(0.1, desc="Downloading index...")
-        # Capture tqdm output from stderr
-        stderr_capture = io.StringIO()
-        with redirect_stderr(stderr_capture):
             index_path = hf_hub_download(
                 model_id, "model.safetensors.index.json")
-        stderr_output = stderr_capture.getvalue()
-        if stderr_output:
-            log_buffer.write(stderr_output + "\n")
-        log_buffer.write(f"✓ Index file found: {index_path}\n")
-        with open(index_path, "r", encoding="utf-8") as f:
-            index = json.load(f)
-        weight_map = index["weight_map"]
-        if param_key not in weight_map:
-            raise KeyError(
-                f"Parameter '{param_key}' not found in model. Available keys: {list(weight_map.keys())[:10]}..."
-            )
-        shard_file = weight_map[param_key]
-        log_buffer.write(f"✓ Parameter found in shard: {shard_file}\n")
-    except Exception as e:
-        if "404" in str(e) or "not found" in str(e).lower():
-            log_buffer.write("ℹ️ No index file, trying single model file...\n")
-            shard_file = "model.safetensors"
-        else:
-            raise
-    log_buffer.write(f"📥 Downloading shard: {shard_file}...\n")
-    progress(0.3, desc=f"Downloading {shard_file}...")
-    # Capture download progress
-    stderr_capture = io.StringIO()
-    with redirect_stderr(stderr_capture):
         shard_path = hf_hub_download(model_id, shard_file)
-    stderr_output = stderr_capture.getvalue()
-    if stderr_output:
-        log_buffer.write(stderr_output + "\n")
-    log_buffer.write(f"✓ Shard downloaded: {shard_path}\n")
-    progress(0.7, desc="Loading tensor...")
-    log_buffer.write(f"🔍 Loading tensor '{param_key}'...\n")
-    with safe_open(shard_path, framework="pt") as f:
-        tensor = f.get_tensor(param_key)
-    log_buffer.write(f"✓ Tensor loaded successfully\n")
-    progress(0.9, desc="Finalizing...")
-    return tensor
 def get_available_keys(model_id: str):
@@ -99,10 +93,22 @@ def format_tensor_info(tensor: torch.Tensor) -> str:
     info.append(f"**Dtype:** {tensor.dtype}")
     info.append(f"**Device:** {tensor.device}")
     info.append(f"**Numel:** {tensor.numel():,}")
-    info.append(f"**Min:** {tensor.min().item():.6f}")
-    info.append(f"**Max:** {tensor.max().item():.6f}")
-    info.append(f"**Mean:** {tensor.float().mean().item():.6f}")
-    info.append(f"**Std:** {tensor.float().std().item():.6f}")
     return "<br>".join(info)
@@ -165,14 +171,19 @@ def fetch_param(model_id: str, param_key: str, progress=gr.Progress()):
         flat = tensor.flatten()
         preview_size = min(100, flat.numel())
-        preview = flat[:preview_size].tolist()
         # Format preview in multiple lines (10 values per line)
         # Adapt to different data types
         preview_lines = []
         for i in range(0, len(preview), 10):
             line_values = preview[i:i+10]
-            if tensor.dtype in [torch.float32, torch.float64, torch.float16, torch.bfloat16]:
                 preview_lines.append(", ".join(f"{v:.6f}" for v in line_values))
             elif tensor.dtype in [torch.int8, torch.int16, torch.int32, torch.int64, torch.uint8]:
                 preview_lines.append(", ".join(f"{v}" for v in line_values))
@@ -358,7 +369,7 @@ with gr.Blocks(title="Hugging Face Model Weight Inspector") as demo:
                     preview_output = gr.Markdown(label="Tensor Preview")
             download_output = gr.File(label="Download Tensor (.pt file)")
             log_output = gr.Textbox(
-                label="📋 Download Log", lines=6, interactive=False)
         with gr.Tab("Cache Management"):
             with gr.Row():

 import io
 import time
 import threading
+import sys
 import gradio as gr
 import torch
     """
     Download and return a specific parameter tensor from a Hugging Face model.
     """
+    # Redirect stderr to log buffer for real-time tqdm updates
+    original_stderr = sys.stderr
+    sys.stderr = log_buffer
     try:
+        # Try to download the index file (for sharded models)
+        try:
+            log_buffer.write(f"📥 Downloading index file for {model_id}...\n")
+            progress(0.1, desc="Downloading index...")
             index_path = hf_hub_download(
                 model_id, "model.safetensors.index.json")
+            log_buffer.write(f"✓ Index file found: {index_path}\n")
+            with open(index_path, "r", encoding="utf-8") as f:
+                index = json.load(f)
+            weight_map = index["weight_map"]
+            if param_key not in weight_map:
+                raise KeyError(
+                    f"Parameter '{param_key}' not found in model. Available keys: {list(weight_map.keys())[:10]}..."
+                )
+            shard_file = weight_map[param_key]
+            log_buffer.write(f"✓ Parameter found in shard: {shard_file}\n")
+        except Exception as e:
+            if "404" in str(e) or "not found" in str(e).lower():
+                log_buffer.write("ℹ️ No index file, trying single model file...\n")
+                shard_file = "model.safetensors"
+            else:
+                raise
+        log_buffer.write(f"📥 Downloading shard: {shard_file}...\n")
+        progress(0.3, desc=f"Downloading {shard_file}...")
         shard_path = hf_hub_download(model_id, shard_file)
+        log_buffer.write(f"\n✓ Shard downloaded: {shard_path}\n")
+        progress(0.7, desc="Loading tensor...")
+        log_buffer.write(f"🔍 Loading tensor '{param_key}'...\n")
+        with safe_open(shard_path, framework="pt") as f:
+            tensor = f.get_tensor(param_key)
+        log_buffer.write(f"✓ Tensor loaded successfully\n")
+        progress(0.9, desc="Finalizing...")
+        return tensor
+    finally:
+        # Restore original stderr
+        sys.stderr = original_stderr
 def get_available_keys(model_id: str):
     info.append(f"**Dtype:** {tensor.dtype}")
     info.append(f"**Device:** {tensor.device}")
     info.append(f"**Numel:** {tensor.numel():,}")
+    # Handle special dtypes that don't support statistical operations
+    try:
+        # Convert FP8 and other special dtypes to float32 for stats
+        if str(tensor.dtype) in ['torch.float8_e4m3fn', 'torch.float8_e5m2']:
+            stats_tensor = tensor.to(torch.float32)
+        else:
+            stats_tensor = tensor
+        info.append(f"**Min:** {stats_tensor.min().item():.6f}")
+        info.append(f"**Max:** {stats_tensor.max().item():.6f}")
+        info.append(f"**Mean:** {stats_tensor.float().mean().item():.6f}")
+        info.append(f"**Std:** {stats_tensor.float().std().item():.6f}")
+    except Exception as e:
+        info.append(f"**Stats:** Unable to compute (dtype not supported)")
     return "<br>".join(info)
         flat = tensor.flatten()
         preview_size = min(100, flat.numel())
+        # Convert to float32 for FP8 types for display
+        if str(tensor.dtype) in ['torch.float8_e4m3fn', 'torch.float8_e5m2']:
+            preview = flat[:preview_size].to(torch.float32).tolist()
+        else:
+            preview = flat[:preview_size].tolist()
         # Format preview in multiple lines (10 values per line)
         # Adapt to different data types
         preview_lines = []
         for i in range(0, len(preview), 10):
             line_values = preview[i:i+10]
+            if tensor.dtype in [torch.float32, torch.float64, torch.float16, torch.bfloat16] or str(tensor.dtype) in ['torch.float8_e4m3fn', 'torch.float8_e5m2']:
                 preview_lines.append(", ".join(f"{v:.6f}" for v in line_values))
             elif tensor.dtype in [torch.int8, torch.int16, torch.int32, torch.int64, torch.uint8]:
                 preview_lines.append(", ".join(f"{v}" for v in line_values))
                     preview_output = gr.Markdown(label="Tensor Preview")
             download_output = gr.File(label="Download Tensor (.pt file)")
             log_output = gr.Textbox(
+                label="📋 Download Log", lines=1, interactive=False)
         with gr.Tab("Cache Management"):
             with gr.Row():