Spaces:

diamond-in
/

3dgraphllm

Paused

App Files Files Community

diamond-in commited on Dec 24, 2025

Commit

30320d1

verified ·

1 Parent(s): cb826d0

Update app.py

Browse files

Files changed (1) hide show

app.py +211 -230

app.py CHANGED Viewed

@@ -2,49 +2,53 @@ import gradio as gr
 import torch
 import spaces
 import json
 import numpy as np
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from threading import Lock
-# --- Global Config ---
-# Qwen 2.5 32B is the target. We must use a global lock for thread safety on ZeroGPU.
-MODEL_ID = "Qwen/Qwen2.5-32B-Instruct"
 model = None
 tokenizer = None
-model_lock = Lock()
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Storage for capturing live layer data
-current_layer_stats = {}
-# --- Frontend Logic (Embedded HTML + Three.js) ---
-# This Javascript will handle the 3D visualization in the browser.
-frontend_html = """
 <!DOCTYPE html>
 <html lang="en">
 <head>
     <meta charset="UTF-8">
     <style>
-        body { margin: 0; background-color: #020617; overflow: hidden; font-family: 'Segoe UI', sans-serif; }
-        #viz-container { width: 100%; height: 600px; position: relative; border: 1px solid #1e293b; border-radius: 8px; }
-        #hud { position: absolute; top: 10px; left: 10px; pointer-events: none; z-index: 10; color: #94a3b8; }
-        .hud-panel { background: rgba(15, 23, 42, 0.85); padding: 12px; border-radius: 6px; border: 1px solid #334155; display: inline-block; backdrop-filter: blur(4px); }
-        h1 { margin: 0; font-size: 16px; color: #38bdf8; }
-        p { margin: 4px 0 0 0; font-size: 12px; }
-        #stream-hidden { display: none; }
     </style>
-    <!-- Load Three.js -->
     <script type="importmap">
         { "imports": { "three": "https://unpkg.com/three@0.160.0/build/three.module.js", "three/addons/": "https://unpkg.com/three@0.160.0/examples/jsm/" } }
     </script>
 </head>
 <body>
-    <div id="viz-container">
-        <div id="hud">
-            <div class="hud-panel">
-                <h1>Qwen 32B Activity Graph</h1>
-                <p>Live Layer Norm Visualization</p>
-                <p style="color: #fbbf24; margin-top:8px" id="token-display">Waiting...</p>
             </div>
         </div>
     </div>
@@ -53,276 +57,253 @@ frontend_html = """
         import * as THREE from 'three';
         import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
-        // 1. Scene & Camera
-        const container = document.getElementById('viz-container');
         const scene = new THREE.Scene();
-        scene.fog = new THREE.FogExp2(0x020617, 0.035);
         const camera = new THREE.PerspectiveCamera(50, container.clientWidth / container.clientHeight, 0.1, 100);
-        camera.position.set(0, 0, 30);
         const renderer = new THREE.WebGLRenderer({ antialias: true, alpha: true });
         renderer.setSize(container.clientWidth, container.clientHeight);
-        renderer.setPixelRatio(window.devicePixelRatio);
         container.appendChild(renderer.domElement);
         const controls = new OrbitControls(camera, renderer.domElement);
-        controls.enableDamping = true;
         controls.autoRotate = true;
-        controls.autoRotateSpeed = 1.0;
-        // 2. Geometry: Spiral Helix representing layers
-        const numLayers = 64; // Qwen 32B layer count
         const nodes = [];
         const group = new THREE.Group();
-        const nodeGeo = new THREE.SphereGeometry(0.5, 16, 16);
-        const nodeMat = new THREE.MeshStandardMaterial({ color: 0x334155, roughness: 0.1, metalness: 0.5, emissive: 0x000000 });
-        for(let i=0; i<numLayers; i++) {
-            const mesh = new THREE.Mesh(nodeGeo, nodeMat.clone());
-            // Calculate Helix positions
-            const theta = i * 0.4;
             const y = (i - numLayers/2) * 0.6;
-            const r = 6;
-            mesh.position.set(Math.cos(theta)*r, y, Math.sin(theta)*r);
-            nodes.push(mesh);
-            group.add(mesh);
         }
         scene.add(group);
-        // Add lights
-        const ambient = new THREE.AmbientLight(0x404040);
-        scene.add(ambient);
-        const point = new THREE.PointLight(0xffffff, 2, 50);
-        point.position.set(5, 10, 5);
-        scene.add(point);
-        // 3. Render Loop
         function animate() {
             requestAnimationFrame(animate);
             controls.update();
             renderer.render(scene, camera);
         }
         animate();
-        window.addEventListener('resize', () => {
-             camera.aspect = container.clientWidth / container.clientHeight;
-             camera.updateProjectionMatrix();
-             renderer.setSize(container.clientWidth, container.clientHeight);
-        });
-        // 4. Data Bridge - Listener Logic
-        // We look for the DOM element with ID 'data-stream' (Gradio Textbox)
-        // and listen for text changes using MutationObserver.
-        let processedLength = 0;
-        function updateGraph(json) {
-            document.getElementById('token-display').innerText = `Gen: "${json.token}"`;
-            // Map activation values to the 3D nodes
-            const vals = json.activations;
-            const maxVal = Math.max(...vals, 0.1);
             nodes.forEach((node, idx) => {
-                const val = vals[idx] || 0;
-                // Normalize 0.0 - 1.0
-                const norm = val / maxVal;
-                // Scale Animation
-                node.scale.setScalar(1 + norm * 2.5);
-                // Color Animation: Blue -> White -> Red
-                const color = new THREE.Color().setHSL(0.6 - (norm * 0.6), 1.0, 0.5 + (norm * 0.4));
-                node.material.color.copy(color);
-                node.material.emissive.setHSL(0.6 - (norm * 0.6), 1.0, norm);
             });
         }
-        // Logic to setup observer once the Gradio app loads fully
-        const setupObserver = setInterval(() => {
-            const target = document.querySelector('#data-stream textarea') || document.getElementById('data-stream');
-            if (target) {
-                console.log("3D Visualizer: Connected to Stream");
-                clearInterval(setupObserver);
-                const observer = new MutationObserver((mutations) => {
-                    const text = target.value || target.innerText;
-                    // Only process new data chunks
-                    if(text.length > processedLength) {
-                        const newContent = text.substring(processedLength);
-                        processedLength = text.length;
-                        // Parse JSON lines. Sometimes chunks have multiple lines.
-                        const lines = newContent.trim().split('\\n');
-                        lines.forEach(line => {
-                           try {
-                               if(line.startsWith('{')) updateGraph(JSON.parse(line));
-                           } catch(e) {}
-                        });
-                    }
                 });
-                observer.observe(target, { attributes: true, childList: true, subtree: true });
             }
-        }, 1000);
     </script>
 </body>
 </html>
 """
-# --- Backend: Model Logic ---
-def load_qwen_32b():
-    """
-    Loads Qwen-32B with 4-bit Quantization (fits ~19GB VRAM).
-    """
     global model, tokenizer
-    if model is not None: return
-    print("LOADING: Qwen 2.5 32B (4-bit)...")
-    bnb_config = BitsAndBytesConfig(
-        load_in_4bit=True,
-        bnb_4bit_compute_dtype=torch.float16,
-        bnb_4bit_quant_type="nf4"
-    )
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID,
-        quantization_config=bnb_config,
-        device_map="auto",
-        trust_remote_code=True
-    )
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-    print("MODEL LOADED.")
-def make_hook(layer_idx):
-    """
-    Creates a PyTorch forward hook that measures the 'activity'
-    (L2 norm) of a specific layer during inference.
-    """
-    def hook(module, input, output):
-        # Qwen returns (hidden_states, past_key_values)
-        if isinstance(output, tuple):
-            hidden = output[0]
-        else:
-            hidden = output
-        # We calculate the norm of the last token generated
-        # hidden shape: [Batch, Seq, Dim]
-        # We access the last token: [:, -1, :]
         with torch.no_grad():
-            activation_val = torch.norm(hidden[:, -1, :], p=2).item()
-            current_layer_stats[layer_idx] = activation_val
-    return hook
-@spaces.GPU(duration=120)
-def generate_and_visualize(prompt):
-    global model, tokenizer
-    # Ensure loaded (Lazy loading for faster startup)
-    if model is None:
-        load_qwen_32b()
-    # 1. Register Hooks (Visualization Data Miners)
-    # We clear old hooks to be safe
     hooks = []
-    current_layer_stats.clear()
-    # Qwen uses 'model.model.layers'
     for i, layer in enumerate(model.model.layers):
-        h = layer.register_forward_hook(make_hook(i))
         hooks.append(h)
     # 2. Tokenize
-    messages = [
-        {"role": "system", "content": "You are a helpful coding assistant."},
-        {"role": "user", "content": prompt}
-    ]
-    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    inputs = tokenizer([text], return_tensors="pt").to(device)
-    # 3. Manual Generation Loop (Streaming)
     input_ids = inputs.input_ids
-    # Yield initial clear state
-    yield json.dumps({"token": "", "activations": []}) + "\n"
-    # We generate up to 256 tokens for this demo
-    max_new_tokens = 256
-    generated_text = ""
-    # NOTE: Using a custom loop instead of .generate to get granular access
-    past_key_values = None
-    for _ in range(max_new_tokens):
-        with torch.no_grad():
-            if past_key_values is None:
-                outputs = model(input_ids)
-            else:
-                outputs = model(input_ids=input_ids[:, -1:], past_key_values=past_key_values)
-            logits = outputs.logits[:, -1, :]
-            past_key_values = outputs.past_key_values
-            # Simple Greedy Decoding
-            next_token = torch.argmax(logits, dim=-1).unsqueeze(-1)
-            # Update sequence
-            input_ids = torch.cat([input_ids, next_token], dim=-1)
-            # Decode Token
-            token_str = tokenizer.decode(next_token[0], skip_special_tokens=True)
-            generated_text += token_str
-            # PREPARE DATA PACKET for Visualization
-            # Collect data from hooks (sorted by layer index)
-            # 64 layers in 32B model
-            act_values = [current_layer_stats.get(i, 0.0) for i in range(len(model.model.layers))]
-            json_payload = json.dumps({
-                "token": token_str,
-                "activations": act_values
-            })
-            # Yield packet (Frontend sees this)
-            yield json_payload + "\n"
-            # Break on EOS
-            if next_token.item() == tokenizer.eos_token_id:
-                break
-    # Cleanup hooks
-    for h in hooks: h.remove()
-# --- Gradio UI Layout ---
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="gray")) as demo:
-    gr.Markdown("## Qwen 2.5 32B • Real-Time Neural Internals")
     with gr.Row():
-        with gr.Column(scale=4):
-            # Input Area
-            user_input = gr.Textbox(label="Prompt (Coding/Reasoning)", value="Write a Python script for Dijkstra's algorithm.", lines=3)
-            btn = gr.Button("Generate", variant="primary")
-            # THE BRIDGE: This textbox receives the stream from Python
-            # It is given a specific ID so JS can find it.
-            # We set visible=True but users won't look at it (css hides it partially).
-            stream_box = gr.Textbox(label="Raw Data Stream", elem_id="data-stream", visible=False)
-        with gr.Column(scale=5):
-            # Visualization
-            gr.HTML(frontend_html)
-    # Event Wiring
-    btn.click(generate_and_visualize, inputs=user_input, outputs=stream_box)
 if __name__ == "__main__":
     demo.launch()

 import torch
 import spaces
 import json
+import os
 import numpy as np
 from threading import Lock
+from huggingface_hub import snapshot_download
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# --- 1. PRE-DOWNLOAD STEP ---
+# This runs immediately when the container starts to ensure the model is ready.
+MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
+print(f"⬇️ Downloading {MODEL_ID}...")
+snapshot_download(repo_id=MODEL_ID)
+print("✅ Download complete.")
+# --- 2. Global State ---
+model_lock = Lock()
 model = None
 tokenizer = None
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Store layer activations for the visualizer
+current_activations = {}
+# --- 3. Frontend: HTML & Three.js 3D Visualizer ---
+# We configure this for 28 layers (the size of Qwen 1.5B)
+visualization_html = """
 <!DOCTYPE html>
 <html lang="en">
 <head>
     <meta charset="UTF-8">
     <style>
+        body { margin: 0; background: transparent; overflow: hidden; font-family: monospace; }
+        #canvas-wrapper { width: 100%; height: 500px; border-radius: 8px; border: 1px solid #333; background: #0b0f19; position: relative; }
+        #overlay { position: absolute; top: 10px; left: 10px; color: #00ffcc; z-index: 10; pointer-events: none; }
+        .data-panel { background: rgba(0,0,0,0.5); padding: 5px 10px; border-radius: 4px; }
+        #stream_hidden { display: none; }
     </style>
+    <!-- Import Three.js -->
     <script type="importmap">
         { "imports": { "three": "https://unpkg.com/three@0.160.0/build/three.module.js", "three/addons/": "https://unpkg.com/three@0.160.0/examples/jsm/" } }
     </script>
 </head>
 <body>
+    <div id="canvas-wrapper">
+        <div id="overlay">
+            <div class="data-panel">
+                <div id="status">INITIATING...</div>
+                <div id="token-show" style="color: white; font-weight: bold;"></div>
             </div>
         </div>
     </div>
         import * as THREE from 'three';
         import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
+        // 1. Setup Scene
+        const container = document.getElementById('canvas-wrapper');
         const scene = new THREE.Scene();
+        scene.fog = new THREE.FogExp2(0x0b0f19, 0.05);
         const camera = new THREE.PerspectiveCamera(50, container.clientWidth / container.clientHeight, 0.1, 100);
+        camera.position.set(0, 0, 20);
         const renderer = new THREE.WebGLRenderer({ antialias: true, alpha: true });
         renderer.setSize(container.clientWidth, container.clientHeight);
         container.appendChild(renderer.domElement);
         const controls = new OrbitControls(camera, renderer.domElement);
         controls.autoRotate = true;
+        controls.autoRotateSpeed = 2.0;
+        controls.enableDamping = true;
+        // 2. Build 3D Neural Tower
+        const numLayers = 28; // Qwen 1.5B has 28 layers
         const nodes = [];
         const group = new THREE.Group();
+        // Geometry: Flattened cylinders representing layers
+        const geometry = new THREE.CylinderGeometry(2, 2, 0.2, 32);
+        const material = new THREE.MeshStandardMaterial({
+            color: 0x223344,
+            emissive: 0x000000,
+            metalness: 0.8,
+            roughness: 0.2
+        });
+        for (let i = 0; i < numLayers; i++) {
+            const node = new THREE.Mesh(geometry, material.clone());
+            // Vertical Stack
             const y = (i - numLayers/2) * 0.6;
+            node.position.set(0, y, 0);
+            // Subtle rotation spiral
+            node.rotation.y = i * 0.1;
+            node.rotation.x = 0.1;
+            nodes.push(node);
+            group.add(node);
         }
         scene.add(group);
+        // Add connecting central 'axon'
+        const coreGeo = new THREE.CylinderGeometry(0.2, 0.2, numLayers * 0.6, 8);
+        const coreMat = new THREE.MeshBasicMaterial({ color: 0x0044aa, transparent: true, opacity: 0.5 });
+        const core = new THREE.Mesh(coreGeo, coreMat);
+        scene.add(core);
+        // Lights
+        const light = new THREE.PointLight(0x00ffff, 2, 50);
+        light.position.set(5, 5, 10);
+        scene.add(light);
+        scene.add(new THREE.AmbientLight(0x222222));
+        // Animation Loop
         function animate() {
             requestAnimationFrame(animate);
             controls.update();
             renderer.render(scene, camera);
         }
         animate();
+        // 3. Data Streaming Logic
+        function updateVisuals(data) {
+            document.getElementById('status').innerText = "BRAIN ACTIVITY: ACTIVE";
+            document.getElementById('token-show').innerText = `"${data.token}"`;
+            const acts = data.activations;
+            const maxVal = Math.max(...acts, 1.0);
             nodes.forEach((node, idx) => {
+                const val = acts[idx] || 0;
+                const normalized = val / maxVal;
+                // Color Logic: Blue -> White -> Orange
+                const targetColor = new THREE.Color().setHSL(0.6 - (normalized*0.5), 1.0, 0.2 + (normalized*0.5));
+                node.material.color.copy(targetColor);
+                node.material.emissive.copy(targetColor).multiplyScalar(normalized * 2);
+                // Expansion Logic
+                node.scale.set(1 + normalized, 1, 1 + normalized);
             });
         }
+        // Bridge to Gradio Textbox
+        let lastLen = 0;
+        setInterval(() => {
+            // Find the invisible stream textbox provided by Python
+            const el = document.querySelector('#stream-bridge textarea') || document.getElementById('stream-bridge');
+            if(!el) return;
+            const content = el.value || "";
+            if(content.length > lastLen) {
+                // Parse only the new lines
+                const newLines = content.substring(lastLen).trim().split('\\n');
+                lastLen = content.length;
+                newLines.forEach(line => {
+                    try {
+                        if(line.startsWith('{')) {
+                            updateVisuals(JSON.parse(line));
+                        }
+                    } catch(e) {}
                 });
             }
+        }, 50); // check every 50ms
+        // Resize handler
+        window.addEventListener('resize', () => {
+             camera.aspect = container.clientWidth / container.clientHeight;
+             camera.updateProjectionMatrix();
+             renderer.setSize(container.clientWidth, container.clientHeight);
+        });
     </script>
 </body>
 </html>
 """
+# --- 4. Backend Logic ---
+def get_model():
+    """Load model with Torch standard precision (small enough for standard load)"""
     global model, tokenizer
+    if model is not None:
+        return model, tokenizer
+    with model_lock:
+        if model is not None: return model, tokenizer
+        print("LOADING Qwen 1.5B (FP16)...")
+        # Load in Float16 to fit nicely in 3GB VRAM
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            torch_dtype=torch.float16,
+            device_map="auto"
+        )
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+        print("Model Loaded.")
+        return model, tokenizer
+def hook_fn(layer_idx):
+    def _hook(module, inp, out):
+        # Qwen tuple output: (hidden_states, ...)
+        if isinstance(out, tuple): hidden = out[0]
+        else: hidden = out
+        # Capture L2 Norm of the *last token*
         with torch.no_grad():
+            # [batch, seq, dim] -> take last sequence element
+            norm = hidden[:, -1, :].norm(p=2).item()
+            current_activations[layer_idx] = norm
+    return _hook
+@spaces.GPU
+def chat_stream(prompt):
+    model, tokenizer = get_model()
+    # 1. Register hooks on all 28 layers
+    current_activations.clear()
     hooks = []
+    # model.model.layers is standard for Qwen
     for i, layer in enumerate(model.model.layers):
+        h = layer.register_forward_hook(hook_fn(i))
         hooks.append(h)
     # 2. Tokenize
+    messages = [{"role": "user", "content": prompt}]
+    text_input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer([text_input], return_tensors="pt").to(model.device)
+    # 3. Generate Loop
     input_ids = inputs.input_ids
+    past_key_values = None
+    generated_full_text = ""
+    yield "", "" # Reset UI
+    max_tokens = 300
+    try:
+        for _ in range(max_tokens):
+            with torch.no_grad():
+                if past_key_values is None:
+                    out = model(input_ids)
+                else:
+                    out = model(input_ids=input_ids[:, -1:], past_key_values=past_key_values)
+                logits = out.logits[:, -1, :]
+                past_key_values = out.past_key_values
+                next_id = torch.argmax(logits, dim=-1).unsqueeze(-1)
+                # Check stop
+                if next_id.item() == tokenizer.eos_token_id:
+                    break
+                token_txt = tokenizer.decode(next_id[0], skip_special_tokens=True)
+                generated_full_text += token_txt
+                input_ids = torch.cat([input_ids, next_id], dim=-1)
+                # 4. Prepare Stream Data
+                # Get stats for all 28 layers
+                layer_stats = [current_activations.get(i, 0.0) for i in range(28)]
+                # Viz JSON (goes to hidden box)
+                viz_json = json.dumps({
+                    "token": token_txt,
+                    "activations": layer_stats
+                }) + "\n"
+                # Yield: (Viz Data, Answer Text)
+                yield viz_json, generated_full_text
+    finally:
+        # Cleanup
+        for h in hooks: h.remove()
+# --- 5. UI Layout ---
+with gr.Blocks(theme=gr.themes.Base()) as demo:
+    gr.Markdown("## Qwen2.5-1.5B 3D Network Explorer (Fast & Light)")
     with gr.Row():
+        with gr.Column(scale=1):
+            prompt = gr.Textbox(label="User Question", lines=2, placeholder="Type your query...")
+            run_btn = gr.Button("Thinking Process", variant="primary")
+            answer_box = gr.Textbox(label="AI Answer", lines=10, interactive=False)
+            # HIDDEN bridge for 3D data
+            stream_bridge = gr.Textbox(elem_id="stream-bridge", visible=False)
+        with gr.Column(scale=1):
+            gr.HTML(visualization_html)
+    # Wire it up
+    # Output Order must match: yield viz_json, generated_full_text
+    run_btn.click(
+        fn=chat_stream,
+        inputs=prompt,
+        outputs=[stream_bridge, answer_box]
+    )
 if __name__ == "__main__":
     demo.launch()