Spaces:

diamond-in
/

3dgraphllm

Paused

App Files Files Community

diamond-in commited on Dec 24, 2025

Commit

a7aae6c

verified ·

1 Parent(s): bac3a3f

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -116

app.py CHANGED Viewed

@@ -3,180 +3,197 @@ import torch
 import spaces
 import json
 import numpy as np
-import matplotlib
-import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import Axes3D
 from threading import Lock
 from huggingface_hub import snapshot_download
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Set Matplotlib backend to Agg (non-interactive) for server-side rendering
-matplotlib.use('Agg')
-# --- 1. DOWNLOAD MODEL FIRST ---
 MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
 print(f"⬇️ Downloading {MODEL_ID}...")
 try:
     snapshot_download(repo_id=MODEL_ID)
-    print("✅ Model downloaded successfully.")
 except Exception as e:
-    print(f"⚠️ Warning during download: {e}")
-# --- 2. GLOBAL STATE ---
 model_lock = Lock()
 model = None
 tokenizer = None
-current_activations = {}
-# --- 3. BACKEND: LOAD MODEL ---
 def load_model():
     global model, tokenizer
-    if model is not None:
-        return
     with model_lock:
-        print("LOADING Model into Memory...")
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_ID,
             torch_dtype=torch.float16,
-            device_map="auto",
-            trust_remote_code=True
         )
         tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-        print("Model Loaded!")
-# --- 4. BACKEND: HOOKS ---
-def get_activation_hook(layer_idx):
-    def hook(module, input, output):
-        if isinstance(output, tuple):
-            hidden_states = output[0]
-        else:
-            hidden_states = output
         with torch.no_grad():
-            val = torch.norm(hidden_states[:, -1, :]).item()
             current_activations[layer_idx] = val
     return hook
-# --- 5. VISUALIZATION FUNCTION (MATPLOTLIB) ---
-def create_3d_plot(token_text):
-    plt.close('all') # Close previous figures to prevent memory leaks
-    plt.style.use('dark_background')
-    fig = plt.figure(figsize=(8, 6))
-    ax = fig.add_subplot(111, projection='3d')
-    layers = list(range(28))
-    values = [current_activations.get(i, 0.1) for i in layers]
-    # Normalize
-    max_val = max(values) if values and max(values) > 0 else 1
-    norm_values = [v / max_val for v in values]
-    # Bar Data
-    x_pos = np.arange(28)
-    y_pos = np.zeros(28)
-    z_pos = np.zeros(28)
-    dx = np.ones(28) * 0.8
-    dy = np.ones(28) * 0.5
-    dz = values
-    # Colors
-    colormap = plt.cm.plasma
-    colors = colormap(norm_values)
-    # Draw Bars
-    ax.bar3d(x_pos, y_pos, z_pos, dx, dy, dz, color=colors, shade=True)
-    # Styling
-    ax.set_title(f"Live Activations: '{token_text}'", color='cyan', fontsize=12)
-    ax.set_xlabel('Layer')
-    ax.set_zlabel('Intensity')
-    ax.set_yticks([])
-    # --- ERROR FIX HERE: Use xaxis directly, not w_xaxis ---
-    dark_gray = (0.1, 0.1, 0.1, 1.0)
-    ax.xaxis.set_pane_color(dark_gray)
-    ax.yaxis.set_pane_color(dark_gray)
-    ax.zaxis.set_pane_color(dark_gray)
-    ax.grid(color='gray', linestyle=':', linewidth=0.3)
-    plt.tight_layout()
-    return fig
-# --- 6. INFERENCE GENERATOR ---
 @spaces.GPU(duration=120)
-def generate_response(user_prompt):
     load_model()
     hooks = []
     current_activations.clear()
     for i, layer in enumerate(model.model.layers):
-        h = layer.register_forward_hook(get_activation_hook(i))
         hooks.append(h)
-    messages = [{"role": "user", "content": user_prompt}]
-    text_input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    inputs = tokenizer([text_input], return_tensors="pt").to(model.device)
-    input_ids = inputs.input_ids
-    past_key_values = None
-    accumulated_text = ""
-    yield "", create_3d_plot("Init")
-    step_count = 0
-    max_tokens = 200
-    for _ in range(max_tokens):
         with torch.no_grad():
             if past_key_values is None:
-                outputs = model(input_ids)
             else:
-                outputs = model(input_ids=input_ids[:, -1:], past_key_values=past_key_values)
-            logits = outputs.logits[:, -1, :]
-            past_key_values = outputs.past_key_values
             next_token = torch.argmax(logits, dim=-1).unsqueeze(-1)
             token_str = tokenizer.decode(next_token[0], skip_special_tokens=True)
-            accumulated_text += token_str
-            input_ids = torch.cat([input_ids, next_token], dim=-1)
-            step_count += 1
-            # Update plot every 3 tokens
-            if step_count % 3 == 0 or next_token.item() == tokenizer.eos_token_id:
-                fig = create_3d_plot(token_str)
-                yield accumulated_text, fig
             else:
-                # Use gr.Skip() properly to avoid re-rendering
-                yield accumulated_text, gr.update()
             if next_token.item() == tokenizer.eos_token_id:
                 break
     for h in hooks: h.remove()
-    plt.close('all')
-# --- 7. UI LAYOUT ---
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="cyan")) as demo:
-    gr.Markdown("# 🧠 Qwen 2.5 (1.5B) Live Visualization")
     with gr.Row():
         with gr.Column(scale=1):
-            prompt_input = gr.Textbox(label="Prompt", lines=3, value="Explain quantum computing briefly.")
-            generate_btn = gr.Button("Generate", variant="primary")
-            output_text = gr.Textbox(label="Response", lines=8)
-        with gr.Column(scale=1):
-            viz_plot = gr.Plot(label="Real-Time Activation Topology")
-    generate_btn.click(
-        fn=generate_response,
-        inputs=prompt_input,
-        outputs=[output_text, viz_plot]
     )
 if __name__ == "__main__":

 import spaces
 import json
 import numpy as np
+import plotly.graph_objects as go
 from threading import Lock
 from huggingface_hub import snapshot_download
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# --- 1. MODEL DOWNLOAD (Immediate) ---
 MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
 print(f"⬇️ Downloading {MODEL_ID}...")
 try:
     snapshot_download(repo_id=MODEL_ID)
+    print("✅ Model downloaded.")
 except Exception as e:
+    print(f"⚠️ Download check ignored: {e}")
+# --- 2. GLOBAL SETUP & COORDINATES ---
 model_lock = Lock()
 model = None
 tokenizer = None
+current_activations = {}
+# Pre-calculate 3D Coordinates for the Neural Spiral (28 Layers)
+# We calculate this once so we don't waste CPU during generation
+num_layers = 28
+t_vals = np.linspace(0, 4 * np.pi, num_layers) # 2 loops
+radius = 5
+node_x = radius * np.cos(t_vals)
+node_y = radius * np.sin(t_vals)
+node_z = np.linspace(0, 15, num_layers) # Height
+# --- 3. PLOTLY VISUALIZATION FUNCTION ---
+def get_neural_plot(token_text, layer_data):
+    """
+    Creates an interactive 3D Plotly figure.
+    """
+    # 1. Prepare Data
+    # Get activations for all 28 layers (default 0.1)
+    acts = [layer_data.get(i, 0.0) for i in range(num_layers)]
+    # Normalize for visuals
+    max_val = max(acts) if acts and max(acts) > 0 else 1.0
+    norm_acts = [val / max_val for val in acts]
+    # 2. Determine Sizes and Colors
+    # Base size 10, grow up to 25 based on activity
+    sizes = [10 + (n * 20) for n in norm_acts]
+    # 3. Create Scatter3D Trace
+    trace = go.Scatter3d(
+        x=node_x,
+        y=node_y,
+        z=node_z,
+        mode='markers+lines', # Nodes connected by lines
+        marker=dict(
+            size=sizes,
+            color=norm_acts,       # Color by intensity
+            colorscale='Viridis',  # Cool -> Hot colors
+            cmin=0, cmax=1,
+            opacity=0.9,
+            line=dict(width=1, color='white')
+        ),
+        line=dict(
+            color='#444444',
+            width=2
+        ),
+        hovertext=[f"Layer {i}: {a:.2f}" for i, a in enumerate(acts)],
+        hoverinfo="text"
+    )
+    # 4. Layout
+    layout = go.Layout(
+        title=dict(
+            text=f"Token Processing: '{token_text}'",
+            font=dict(color="#00ffcc", size=20)
+        ),
+        paper_bgcolor='#0b0f19', # Dark Background
+        plot_bgcolor='#0b0f19',
+        scene=dict(
+            xaxis=dict(visible=False),
+            yaxis=dict(visible=False),
+            zaxis=dict(title="Layer Depth", color="white"),
+            bgcolor='#0b0f19',
+            camera=dict(
+                eye=dict(x=1.5, y=1.5, z=0.5) # Initial Camera angle
+            )
+        ),
+        margin=dict(l=0, r=0, b=0, t=40),
+        template="plotly_dark"
+    )
+    return go.Figure(data=[trace], layout=layout)
+# --- 4. BACKEND LOGIC ---
 def load_model():
     global model, tokenizer
+    if model is not None: return
     with model_lock:
+        print("Loading Model...")
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_ID,
             torch_dtype=torch.float16,
+            device_map="auto"
         )
         tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+        print("Loaded.")
+def hook_fn(layer_idx):
+    def hook(module, inp, out):
+        if isinstance(out, tuple): h = out[0]
+        else: h = out
         with torch.no_grad():
+            # L2 Norm of last token
+            val = torch.norm(h[:, -1, :]).item()
             current_activations[layer_idx] = val
     return hook
 @spaces.GPU(duration=120)
+def generate(prompt):
     load_model()
+    # Hook Setup
     hooks = []
     current_activations.clear()
     for i, layer in enumerate(model.model.layers):
+        h = layer.register_forward_hook(hook_fn(i))
         hooks.append(h)
+    # Tokenize
+    msgs = [{"role": "user", "content": prompt}]
+    inputs = tokenizer.apply_chat_template(msgs, return_tensors="pt", add_generation_prompt=True).to(model.device)
+    input_ids = inputs
+    past_key_values = None
+    accum_text = ""
+    # Initial Plot (Empty)
+    yield "", get_neural_plot("Waiting...", {})
+    # Generator
+    for step in range(256):
         with torch.no_grad():
             if past_key_values is None:
+                out = model(input_ids)
             else:
+                out = model(input_ids=input_ids[:, -1:], past_key_values=past_key_values)
+            logits = out.logits[:, -1, :]
+            past_key_values = out.past_key_values
             next_token = torch.argmax(logits, dim=-1).unsqueeze(-1)
             token_str = tokenizer.decode(next_token[0], skip_special_tokens=True)
+            accum_text += token_str
+            input_ids = torch.cat([input_ids, next_token], dim=-1)
+            # --- YIELD LOGIC ---
+            # Plotly is slightly heavy to generate every single token (might lag).
+            # We yield the updated Plot every 4 tokens to keep the UI buttery smooth.
+            if step % 4 == 0 or next_token.item() == tokenizer.eos_token_id:
+                fig = get_neural_plot(token_str, current_activations)
+                yield accum_text, fig
             else:
+                # Use gr.update() effectively skips sending the heavy plot
+                # Just update text
+                yield accum_text, gr.skip()
             if next_token.item() == tokenizer.eos_token_id:
                 break
+    # Cleanup
     for h in hooks: h.remove()
+# --- 5. UI LAYOUT ---
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="cyan")) as demo:
+    gr.Markdown("# 🧠 Qwen 1.5B - Interactive Neural Spiral")
+    gr.Markdown("*Zoom, Pan, and Rotate with your mouse. Nodes pulse based on AI thought process.*")
     with gr.Row():
         with gr.Column(scale=1):
+            prompt = gr.Textbox(label="User Prompt", value="Write a poem about neural networks.", lines=3)
+            btn = gr.Button("Generate", variant="primary")
+            output = gr.Textbox(label="AI Response", lines=10)
+        with gr.Column(scale=2):
+            # GRADIO PLOT Component (Supports Plotly Interactivity)
+            plot_component = gr.Plot(label="Live Neural Activations")
+    btn.click(
+        fn=generate,
+        inputs=prompt,
+        outputs=[output, plot_component]
     )
 if __name__ == "__main__":