Spaces:

transformers-community
/

Transformers-tenets

Running

App Files Files Community

Molbap HF Staff commited on Aug 20, 2025

Commit

64c9a18

1 Parent(s): 0548742

some improvements

Browse files

Files changed (1) hide show

app.py +101 -11

app.py CHANGED Viewed

@@ -190,9 +190,11 @@ def build_attn_vis():
 from transformers import AutoModelForCausalLM, modeling_utils as MU  # noqa: E402
 def _measure_load_timeline(model_id: str, disable_warmup: bool):
     orig = getattr(MU, "caching_allocator_warmup", None)
     if disable_warmup and orig is not None:
         MU.caching_allocator_warmup = lambda *a, **k: None  # type: ignore[attr-defined]
     try:
         device = "cuda" if torch.cuda.is_available() else "cpu"
         tl = []
@@ -201,33 +203,46 @@ def _measure_load_timeline(model_id: str, disable_warmup: bool):
             while not stop_evt.is_set():
                 if device == "cuda":
                     torch.cuda.synchronize()
-                    alloc = torch.cuda.memory_allocated()
                 else:
                     alloc = 0
                 tl.append({"t": time.perf_counter() - start_t, "MiB": alloc / (1024**2)})
-                time.sleep(0.05)
         if device == "cuda":
             torch.cuda.empty_cache()
             torch.cuda.reset_peak_memory_stats()
         start = time.perf_counter()
         stop_evt = threading.Event()
         th = threading.Thread(target=sample, args=(start, stop_evt), daemon=True)
         th.start()
-        kwargs = {}
         if device == "cuda":
-            kwargs.update(dict(torch_dtype=torch.float16, device_map="cuda:0", low_cpu_mem_usage=True))
         model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
         stop_evt.set()
         th.join()
         if device == "cuda":
             torch.cuda.synchronize()
-            tl.append({"t": time.perf_counter() - start, "MiB": torch.cuda.memory_allocated() / (1024**2)})
         del model
         if device == "cuda":
             torch.cuda.empty_cache()
@@ -240,11 +255,37 @@ def _measure_load_timeline(model_id: str, disable_warmup: bool):
 @spaces.GPU(duration=240)
 def profile_warmup(model_id: str):
-    on  = _measure_load_timeline(model_id, disable_warmup=False)
-    off = _measure_load_timeline(model_id, disable_warmup=True)
-    rows = [{"t": r["t"], "MiB": r["MiB"], "mode": "warmup ON"} for r in on] + \
-           [{"t": r["t"], "MiB": r["MiB"], "mode": "warmup OFF"} for r in off]
-    return pd.DataFrame(rows)
 def build_alloc_plot():
     with gr.Group():
@@ -258,7 +299,6 @@ def build_alloc_plot():
                     "openai-community/gpt2",
                     "google/gemma-2-2b",
                     "microsoft/DialoGPT-small",
-                    "distilbert-base-uncased",
                     "facebook/opt-125m"
                 ],
                 value="openai-community/gpt2",
@@ -457,6 +497,56 @@ hr { border: 0; border-top: 1px solid var(--border-color); margin: 2rem 0; }
   background: #1d4ed8 !important;
 }
 """
 with gr.Blocks(css=CSS, fill_height=True, title="Interactive Blog — Transformers Feature Showcase") as demo:

 from transformers import AutoModelForCausalLM, modeling_utils as MU  # noqa: E402
 def _measure_load_timeline(model_id: str, disable_warmup: bool):
+    """Measure memory usage during model loading with/without cache warmup."""
     orig = getattr(MU, "caching_allocator_warmup", None)
     if disable_warmup and orig is not None:
         MU.caching_allocator_warmup = lambda *a, **k: None  # type: ignore[attr-defined]
     try:
         device = "cuda" if torch.cuda.is_available() else "cpu"
         tl = []
             while not stop_evt.is_set():
                 if device == "cuda":
                     torch.cuda.synchronize()
+                    # Use max memory to capture peaks better
+                    alloc = torch.cuda.max_memory_allocated()
+                    torch.cuda.reset_peak_memory_stats()
                 else:
                     alloc = 0
                 tl.append({"t": time.perf_counter() - start_t, "MiB": alloc / (1024**2)})
+                time.sleep(0.02)  # Sample more frequently
         if device == "cuda":
             torch.cuda.empty_cache()
             torch.cuda.reset_peak_memory_stats()
+            initial_mem = torch.cuda.memory_allocated()
+        else:
+            initial_mem = 0
         start = time.perf_counter()
         stop_evt = threading.Event()
         th = threading.Thread(target=sample, args=(start, stop_evt), daemon=True)
         th.start()
+        # Load model with appropriate settings
+        kwargs = {"low_cpu_mem_usage": True}
         if device == "cuda":
+            kwargs.update({
+                "torch_dtype": torch.float16,
+                "device_map": "cuda:0"
+            })
         model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
         stop_evt.set()
         th.join()
+        # Final memory measurement
         if device == "cuda":
             torch.cuda.synchronize()
+            final_mem = torch.cuda.memory_allocated()
+            tl.append({"t": time.perf_counter() - start, "MiB": final_mem / (1024**2)})
+        # Clean up
         del model
         if device == "cuda":
             torch.cuda.empty_cache()
 @spaces.GPU(duration=240)
 def profile_warmup(model_id: str):
+    if not torch.cuda.is_available():
+        # Create dummy data for CPU demo
+        import numpy as np
+        t_points = np.linspace(0, 5, 50)
+        base_mem = np.cumsum(np.random.exponential(50, 50))
+        warmup_on = [{"t": t, "MiB": mem, "mode": "warmup ON"} for t, mem in zip(t_points, base_mem * 0.8)]
+        warmup_off = [{"t": t, "MiB": mem, "mode": "warmup OFF"} for t, mem in zip(t_points, base_mem)]
+        return pd.DataFrame(warmup_on + warmup_off)
+    try:
+        on_data = _measure_load_timeline(model_id, disable_warmup=False)
+        off_data = _measure_load_timeline(model_id, disable_warmup=True)
+        # Create DataFrame with better labeling
+        rows = [{"t": r["t"], "MiB": r["MiB"], "mode": "🚀 Warmup ON (Optimized)"} for r in on_data] + \
+               [{"t": r["t"], "MiB": r["MiB"], "mode": "📈 Warmup OFF (Standard)"} for r in off_data]
+        df = pd.DataFrame(rows)
+        # Add summary stats if we have data
+        if len(on_data) > 0 and len(off_data) > 0:
+            on_peak = max(r["MiB"] for r in on_data)
+            off_peak = max(r["MiB"] for r in off_data)
+            savings = ((off_peak - on_peak) / off_peak * 100) if off_peak > 0 else 0
+            print(f"Memory savings: {savings:.1f}% (Peak: {on_peak:.0f} MiB vs {off_peak:.0f} MiB)")
+        return df
+    except Exception as e:
+        print(f"Error profiling {model_id}: {e}")
+        # Return empty DataFrame on error
+        return pd.DataFrame(columns=["t", "MiB", "mode"])
 def build_alloc_plot():
     with gr.Group():
                     "openai-community/gpt2",
                     "google/gemma-2-2b",
                     "microsoft/DialoGPT-small",
                     "facebook/opt-125m"
                 ],
                 value="openai-community/gpt2",
   background: #1d4ed8 !important;
 }
+/* Dropdown styling - fix contrast and visibility */
+.gr-dropdown {
+  background: #ffffff !important;
+  border: 1px solid var(--border-color) !important;
+  border-radius: 8px !important;
+}
+.gr-dropdown .gr-box {
+  background: #ffffff !important;
+  border: 1px solid var(--border-color) !important;
+}
+.gr-dropdown input {
+  background: #ffffff !important;
+  color: #1f2937 !important;
+  border: none !important;
+  font-weight: 500 !important;
+}
+.gr-dropdown .options {
+  background: #ffffff !important;
+  border: 1px solid var(--border-color) !important;
+  border-radius: 8px !important;
+  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1) !important;
+}
+.gr-dropdown .option {
+  background: #ffffff !important;
+  color: #1f2937 !important;
+  padding: 0.75rem !important;
+  font-weight: 500 !important;
+}
+.gr-dropdown .option:hover {
+  background: #f8fafc !important;
+  color: #1f2937 !important;
+}
+.gr-dropdown .option.selected {
+  background: var(--link-text-color) !important;
+  color: white !important;
+}
+/* Fix label styling */
+.gr-dropdown label {
+  color: #374151 !important;
+  font-weight: 600 !important;
+  margin-bottom: 0.5rem !important;
+}
 """
 with gr.Blocks(css=CSS, fill_height=True, title="Interactive Blog — Transformers Feature Showcase") as demo: