Executor-Tyrant-Framework Claude Opus 4.6 (1M context) commited on
Commit
fd0231e
·
1 Parent(s): bc9f1cc

Restore full PyTorch demo with ZeroGPU support

Browse files

- @spaces.GPU decorator on all GPU-using functions
- Model loads to cuda, inference on GPU
- Two tabs: Live Model (ZeroGPU) and Synthetic (no GPU)
- Lazy torch import so Gradio starts fast

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +280 -220
  2. requirements.txt +3 -0
app.py CHANGED
@@ -1,13 +1,14 @@
1
  """
2
  Condensate — Live Demo
3
- HuggingFace Spaces Gradio App
4
 
5
- Demonstrates Condensate's four layers on simulated workloads.
6
- Shows real prediction accuracy, cluster discovery, and RAM savings.
7
 
8
  "Do the same, or more, with less."
9
  """
10
 
 
11
  import gradio as gr
12
  import numpy as np
13
  import time
@@ -19,26 +20,214 @@ sys.path.insert(0, os.path.dirname(__file__))
19
  from membrane import Membrane
20
  from graph_builder import GraphBuilder
21
  from predictor import Predictor
22
- from condenser import Condenser
23
 
 
 
 
24
 
25
- def run_full_demo(num_layers, num_hot, num_iterations, demotion_idle_ms):
26
- """Run the complete Condensate pipeline on a simulated workload."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  num_layers = int(num_layers)
29
  num_hot = int(min(num_hot, num_layers))
30
  num_iterations = int(num_iterations)
31
- demotion_idle_ms = int(demotion_idle_ms)
32
 
33
  output = []
34
  output.append("=" * 55)
35
- output.append(" CONDENSATE — Full Pipeline Demo")
36
  output.append("=" * 55)
37
 
38
- # --- Build simulated model state ---
39
  state = {}
40
  for i in range(num_layers):
41
- # Structured sparse data — like real model weights
42
  arr = np.zeros((128, 128), dtype=np.float32)
43
  mask = np.random.random((128, 128)) < 0.2
44
  arr[mask] = np.random.randn(mask.sum()).astype(np.float32)
@@ -47,22 +236,13 @@ def run_full_demo(num_layers, num_hot, num_iterations, demotion_idle_ms):
47
  total_mb = sum(v.nbytes for v in state.values()) / 1024 / 1024
48
  hot_set = set(range(num_hot))
49
 
50
- output.append(f"\n Workload:")
51
- output.append(f" {num_layers} layers x 64KB = {total_mb:.1f} MB total state")
52
- output.append(f" {num_hot} layers hot (accessed every iteration)")
53
- output.append(f" {num_layers - num_hot} layers cold (rarely accessed)")
54
- output.append(f" {num_iterations} iterations")
55
-
56
- # --- Layer 0: Membrane ---
57
- output.append(f"\n{'─' * 55}")
58
- output.append(" LAYER 0: Membrane (Access Observation)")
59
- output.append(f"{'─' * 55}")
60
 
 
61
  Membrane.clear()
62
  wrapped = Membrane.wrap(state.copy(), "model")
63
-
64
- start = time.monotonic()
65
- for iteration in range(num_iterations):
66
  for i in range(num_layers):
67
  if i in hot_set:
68
  _ = wrapped[f"layer_{i}"]
@@ -70,65 +250,18 @@ def run_full_demo(num_layers, num_hot, num_iterations, demotion_idle_ms):
70
  _ = wrapped[f"layer_{i}"]
71
  time.sleep(0.001)
72
 
73
- elapsed = (time.monotonic() - start) * 1000
74
  log = Membrane.get_log()
75
-
76
- output.append(f" Access events captured: {len(log)}")
77
- output.append(f" Observation time: {elapsed:.0f}ms")
78
-
79
- # --- Layer 1: Graph Builder ---
80
- output.append(f"\n{'─' * 55}")
81
- output.append(" LAYER 1: Graph Builder (Pattern Discovery)")
82
- output.append(f"{'─' * 55}")
83
-
84
  graph = GraphBuilder(causal_window_ns=5_000_000)
85
  graph.build(log)
86
-
87
- hot_nodes = [n for n in graph.nodes.values()
88
- if getattr(n, '_temp_class', '') == 'HOT']
89
- warm_nodes = [n for n in graph.nodes.values()
90
- if getattr(n, '_temp_class', '') == 'WARM']
91
- cold_nodes = [n for n in graph.nodes.values()
92
- if getattr(n, '_temp_class', '') == 'COLD']
93
- chains = graph.get_causal_chains()
94
-
95
- output.append(f" Nodes: {len(graph.nodes)}")
96
- output.append(f" HOT: {len(hot_nodes)}")
97
- output.append(f" WARM: {len(warm_nodes)}")
98
- output.append(f" COLD: {len(cold_nodes)}")
99
- output.append(f" Clusters: {len(graph.clusters)} (proto-hyperedges)")
100
- output.append(f" Chains: {len(chains)} causal chains discovered")
101
-
102
- if hot_nodes:
103
- hot_accesses = sum(n.access_count for n in hot_nodes)
104
- total_accesses = sum(n.access_count for n in graph.nodes.values())
105
- output.append(f" Hot nodes handle {hot_accesses/total_accesses*100:.0f}% of all accesses")
106
-
107
- # --- Layer 2: Predictor ---
108
- output.append(f"\n{'─' * 55}")
109
- output.append(" LAYER 2: Predictor (Causal Prediction)")
110
- output.append(f"{'─' * 55}")
111
-
112
  predictor = Predictor()
113
  predictor.learn(graph)
 
114
 
115
- # Score on the training data
116
- result = predictor.score(log)
117
-
118
- output.append(f" Predictions made: {result['predictions_made']}")
119
- output.append(f" Hits: {result['hits']}")
120
- output.append(f" Misses: {result['misses']}")
121
- output.append(f" *** ACCURACY: {result['accuracy']}% ***")
122
- output.append(f" Hit breakdown:")
123
- output.append(f" Direct successor: {result['direct_hits']}")
124
- output.append(f" Chain propagation: {result['chain_hits']}")
125
- output.append(f" Cluster co-access: {result['cluster_hits']}")
126
-
127
- # --- Layer 3: Condenser ---
128
- output.append(f"\n{'─' * 55}")
129
- output.append(" LAYER 3: Condenser (RAM Reduction)")
130
- output.append(f"{'─' * 55}")
131
 
 
132
  def workload_fn(w):
133
  for i in range(num_layers):
134
  if i in hot_set:
@@ -137,123 +270,26 @@ def run_full_demo(num_layers, num_hot, num_iterations, demotion_idle_ms):
137
  _ = w[f"layer_{i}"]
138
  time.sleep(0.001)
139
 
140
- condenser = Condenser(demotion_idle_ms=demotion_idle_ms, warmup_iters=10)
141
  bench = condenser.run_benchmark(state, workload_fn,
142
  iterations=num_iterations, name="model")
143
 
144
- output.append(f" Baseline RAM: {bench['baseline_ram_mb']:.2f} MB")
145
- output.append(f" Condensed RAM: {bench['avg_condensed_ram_mb']:.2f} MB")
146
- output.append(f" Minimum RAM: {bench['min_condensed_ram_mb']:.2f} MB")
147
 
148
  if bench.get('promotion_log'):
149
  last = bench['promotion_log'][-1]
150
- output.append(f" Final tiers: HOT={last['hot']} WARM={last['warm']} COLD={last['cold']}")
151
-
152
- output.append(f"")
153
- output.append(f" ┌─────────────────────────────────────┐")
154
- output.append(f" │ RAM SAVED: {bench['saved_mb']:.2f} MB ({bench['saved_pct']:.1f}%)" + " " * max(0, 15 - len(f"{bench['saved_mb']:.2f} MB ({bench['saved_pct']:.1f}%)")) + "│")
155
- output.append(f" │ {bench['baseline_ram_mb']:.2f} MB → {bench['avg_condensed_ram_mb']:.2f} MB" + " " * max(0, 17 - len(f"{bench['baseline_ram_mb']:.2f} MB → {bench['avg_condensed_ram_mb']:.2f} MB")) + "│")
156
- output.append(f" │ Same data. Same output. Less RAM. │")
157
- output.append(f" └─────────────────────────────────────┘")
158
-
159
- # --- Speedup estimate ---
160
- output.append(f"\n{'─' * 55}")
161
- output.append(" THEORETICAL IMPACT")
162
- output.append(f"{'─' * 55}")
163
-
164
- hit_rate = result['accuracy'] / 100.0
165
- hit_lat = 100 # ns, pre-staged in RAM
166
- miss_lat = 100_000 # ns, page from disk
167
-
168
- with_pred = hit_rate * hit_lat + (1 - hit_rate) * miss_lat
169
- without_pred = miss_lat
170
- speedup = without_pred / with_pred if with_pred > 0 else 1.0
171
-
172
- output.append(f" Cold access without prediction: {miss_lat/1000:.0f}μs (page from disk)")
173
- output.append(f" Cold access with prediction: weighted avg {with_pred/1000:.1f}μs")
174
- output.append(f" *** COLD ACCESS SPEEDUP: {speedup:.1f}x ***")
175
-
176
- output.append(f"\n{'=' * 55}")
177
- output.append(f" Condensate — Do the same, or more, with less.")
178
- output.append(f"{'=' * 55}")
179
 
180
  condenser.cleanup()
181
- return "\n".join(output)
182
-
183
-
184
- def run_comparison():
185
- """Side-by-side: various workload profiles."""
186
-
187
- output = []
188
- output.append("=" * 55)
189
- output.append(" CONDENSATE — Workload Comparison")
190
- output.append("=" * 55)
191
-
192
- configs = [
193
- ("AI Inference (all hot)", 12, 12, 20),
194
- ("Selective (4 hot / 12 cold)", 16, 4, 30),
195
- ("Large model (8 hot / 56 cold)", 64, 8, 20),
196
- ("Edge device (2 hot / 14 cold)", 16, 2, 25),
197
- ]
198
-
199
- output.append(f"\n {'Workload':<32} {'Base MB':>8} {'Cond MB':>8} {'Saved':>8} {'Pred':>6}")
200
- output.append(f" {'─'*32} {'─'*8} {'─'*8} {'─'*8} {'─'*6}")
201
-
202
- for name, layers, hot, iters in configs:
203
- state = {}
204
- for i in range(layers):
205
- arr = np.zeros((128, 128), dtype=np.float32)
206
- mask = np.random.random((128, 128)) < 0.2
207
- arr[mask] = np.random.randn(mask.sum()).astype(np.float32)
208
- state[f"layer_{i}"] = arr
209
-
210
- hot_set = set(range(hot))
211
-
212
- def workload_fn(w, hs=hot_set, nl=layers):
213
- for i in range(nl):
214
- if i in hs:
215
- _ = w[f"layer_{i}"]
216
- elif np.random.random() < 0.03:
217
- _ = w[f"layer_{i}"]
218
- time.sleep(0.001)
219
-
220
- # Quick train for prediction accuracy
221
- Membrane.clear()
222
- wrapped = Membrane.wrap(state.copy(), "m")
223
- for _ in range(10):
224
- workload_fn(wrapped)
225
- log = Membrane.get_log()
226
- graph = GraphBuilder(causal_window_ns=5_000_000)
227
- graph.build(log)
228
- pred = Predictor()
229
- pred.learn(graph)
230
- score = pred.score(log)
231
-
232
- # Condenser benchmark
233
- condenser = Condenser(demotion_idle_ms=10, warmup_iters=8)
234
- bench = condenser.run_benchmark(state, workload_fn,
235
- iterations=iters, name="m")
236
-
237
- output.append(f" {name:<32} {bench['baseline_ram_mb']:>7.1f} "
238
- f"{bench['avg_condensed_ram_mb']:>7.1f} "
239
- f"{bench['saved_pct']:>6.1f}% "
240
- f"{score['accuracy']:>5.1f}%")
241
-
242
- condenser.cleanup()
243
-
244
- output.append(f"\n Key insight: when everything is hot, Condensate")
245
- output.append(f" correctly does NOTHING (0% savings = correct answer).")
246
- output.append(f" When cold state exists, savings scale with cold ratio.")
247
  output.append(f"\n{'=' * 55}")
248
-
249
  return "\n".join(output)
250
 
251
 
252
  # --- Gradio UI ---
253
 
254
- with gr.Blocks(
255
- title="Condensate — Do More With Less",
256
- ) as demo:
257
 
258
  gr.Markdown("""
259
  # Condensate
@@ -262,51 +298,75 @@ with gr.Blocks(
262
  Condensate uses a neural substrate with causal spike propagation
263
  to learn memory access patterns and dynamically condense RAM usage.
264
 
265
- **This demo runs all 4 layers of the Condensate pipeline on simulated
266
- workloads the same layers that achieved 98.8% prediction accuracy
267
- and 50-82% RAM reduction in testing.**
268
 
269
- Production version uses NeuroGraph SNN + Lenia/Flow-Lenia dynamics
270
- with a Rust core for cache-line-aligned, sub-microsecond operation.
271
  """)
272
 
273
- with gr.Row():
274
- with gr.Column():
275
- gr.Markdown("### Custom Workload")
276
- num_layers = gr.Slider(minimum=4, maximum=128, value=32, step=4,
277
- label="Total memory regions (layers)")
278
- num_hot = gr.Slider(minimum=1, maximum=64, value=6, step=1,
279
- label="Hot regions (always accessed)")
280
- num_iterations = gr.Slider(minimum=10, maximum=50, value=20, step=5,
281
- label="Iterations")
282
- demotion_idle = gr.Slider(minimum=2, maximum=50, value=10, step=2,
283
- label="Demotion idle threshold (ms)")
284
- run_btn = gr.Button("Run Full Pipeline", variant="primary")
285
-
286
- with gr.Column():
287
- gr.Markdown("### Quick Comparison")
288
- gr.Markdown("Run 4 different workload profiles side-by-side.")
289
- compare_btn = gr.Button("Run Comparison", variant="secondary")
290
-
291
- with gr.Row():
292
- output_box = gr.Textbox(
293
- label="Results",
294
- lines=45,
295
- interactive=False,
296
- show_copy_button=True,
297
- )
298
-
299
- run_btn.click(
300
- fn=run_full_demo,
301
- inputs=[num_layers, num_hot, num_iterations, demotion_idle],
302
- outputs=output_box,
303
- )
304
-
305
- compare_btn.click(
306
- fn=run_comparison,
307
- outputs=output_box,
308
- )
309
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
311
  if __name__ == "__main__":
312
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  """
2
  Condensate — Live Demo
3
+ HuggingFace Spaces Gradio App (ZeroGPU)
4
 
5
+ Shows real-time RAM condensation on a live model.
6
+ Compares baseline vs condensed inference.
7
 
8
  "Do the same, or more, with less."
9
  """
10
 
11
+ import spaces
12
  import gradio as gr
13
  import numpy as np
14
  import time
 
20
  from membrane import Membrane
21
  from graph_builder import GraphBuilder
22
  from predictor import Predictor
 
23
 
24
+ # Lazy imports for heavy deps
25
+ torch = None
26
+ TorchMembrane = None
27
 
28
+
29
+ def _ensure_torch():
30
+ global torch, TorchMembrane
31
+ if torch is None:
32
+ import torch as _torch
33
+ torch = _torch
34
+ from torch_membrane import TorchMembrane as _TM
35
+ TorchMembrane = _TM
36
+
37
+
38
+ # --- Global state ---
39
+ MODEL = None
40
+ TOKENIZER = None
41
+ MEMBRANE = None
42
+ PREDICTOR = None
43
+ GRAPH = None
44
+ MODEL_NAME = "distilgpt2"
45
+
46
+
47
+ @spaces.GPU
48
+ def load_model():
49
+ """Load model and install membrane."""
50
+ global MODEL, TOKENIZER, MEMBRANE
51
+
52
+ _ensure_torch()
53
+ from transformers import AutoModelForCausalLM, AutoTokenizer
54
+
55
+ TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)
56
+ if TOKENIZER.pad_token is None:
57
+ TOKENIZER.pad_token = TOKENIZER.eos_token
58
+
59
+ MODEL = AutoModelForCausalLM.from_pretrained(
60
+ MODEL_NAME,
61
+ torch_dtype=torch.float32,
62
+ output_attentions=True,
63
+ )
64
+ MODEL.eval()
65
+ MODEL.to("cuda")
66
+
67
+ MEMBRANE = TorchMembrane(MODEL)
68
+
69
+ param_count = sum(p.numel() for p in MODEL.parameters()) / 1e6
70
+ return f"Loaded {MODEL_NAME} ({param_count:.1f}M params) on ZeroGPU"
71
+
72
+
73
+ @spaces.GPU
74
+ def train_predictor(num_prompts=5):
75
+ """Run several prompts to train the predictor on access patterns."""
76
+ global PREDICTOR, GRAPH, MEMBRANE
77
+
78
+ _ensure_torch()
79
+
80
+ if MODEL is None:
81
+ load_model()
82
+
83
+ MEMBRANE.reset()
84
+
85
+ training_prompts = [
86
+ "The quick brown fox jumps over the lazy",
87
+ "In the beginning there was darkness and then",
88
+ "Machine learning models can be optimized by",
89
+ "The capital of France is Paris and the",
90
+ "Once upon a time in a land far far",
91
+ "Artificial intelligence will transform the way we",
92
+ "The most important thing about programming is",
93
+ "When the sun sets over the mountains the",
94
+ ][:num_prompts]
95
+
96
+ for prompt in training_prompts:
97
+ inputs = TOKENIZER(prompt, return_tensors="pt", padding=True).to("cuda")
98
+ with torch.no_grad():
99
+ MODEL.generate(
100
+ **inputs,
101
+ max_new_tokens=20,
102
+ do_sample=False,
103
+ pad_token_id=TOKENIZER.pad_token_id,
104
+ )
105
+
106
+ log = MEMBRANE.to_access_log()
107
+
108
+ GRAPH = GraphBuilder(causal_window_ns=5_000_000)
109
+ GRAPH.build(log)
110
+
111
+ PREDICTOR = Predictor()
112
+ PREDICTOR.learn(GRAPH)
113
+
114
+ result = PREDICTOR.score(log)
115
+
116
+ return (f"Trained on {len(training_prompts)} prompts, "
117
+ f"{len(log)} access events observed.\n"
118
+ f"Prediction accuracy: {result['accuracy']}%\n"
119
+ f"Causal chains discovered: {len(GRAPH.get_causal_chains())}\n"
120
+ f"Clusters (proto-hyperedges): {len(GRAPH.clusters)}")
121
+
122
+
123
+ @spaces.GPU
124
+ def run_analysis(prompt, max_tokens=30):
125
+ """Run inference, show activation map + condensation potential."""
126
+ global MEMBRANE, PREDICTOR
127
+
128
+ _ensure_torch()
129
+
130
+ if MODEL is None:
131
+ load_model()
132
+ if PREDICTOR is None:
133
+ train_predictor()
134
+
135
+ MEMBRANE.reset()
136
+
137
+ inputs = TOKENIZER(prompt, return_tensors="pt", padding=True).to("cuda")
138
+ start = time.monotonic()
139
+
140
+ with torch.no_grad():
141
+ outputs = MODEL.generate(
142
+ **inputs,
143
+ max_new_tokens=int(max_tokens),
144
+ do_sample=True,
145
+ temperature=0.7,
146
+ top_p=0.9,
147
+ pad_token_id=TOKENIZER.pad_token_id,
148
+ )
149
+
150
+ elapsed_ms = (time.monotonic() - start) * 1000
151
+ generated_text = TOKENIZER.decode(outputs[0], skip_special_tokens=True)
152
+
153
+ activation_map = MEMBRANE.get_activation_map()
154
+ potential = MEMBRANE.get_condensation_potential()
155
+
156
+ log = MEMBRANE.to_access_log()
157
+ pred_result = PREDICTOR.score(log)
158
+
159
+ # Build comparison output
160
+ comparison = []
161
+ comparison.append("=" * 55)
162
+ comparison.append(" BASELINE vs CONDENSATE")
163
+ comparison.append("=" * 55)
164
+ comparison.append(f"\n Generated: {generated_text}")
165
+ comparison.append(f" Time: {elapsed_ms:.0f}ms\n")
166
+
167
+ baseline_mb = potential['total_mb']
168
+ condensed_mb = potential['hot_mb']
169
+ saved_pct = potential['savings_pct']
170
+
171
+ comparison.append(f" WITHOUT Condensate:")
172
+ comparison.append(f" All {potential['total_layers']} layers in RAM: {baseline_mb:.2f} MB")
173
+ comparison.append(f" (Every weight loaded, whether needed or not)\n")
174
+
175
+ comparison.append(f" WITH Condensate:")
176
+ comparison.append(f" {potential['hot_layers']} HOT layers in RAM: {condensed_mb:.2f} MB")
177
+ comparison.append(f" {potential['cold_layers']} COLD layers paged: {potential['cold_mb']:.2f} MB saved")
178
+ comparison.append(f" (Cold layers compressed or on disk,")
179
+ comparison.append(f" pre-staged back to RAM before needed)\n")
180
+
181
+ comparison.append(f" ┌─────────────────────────────────────┐")
182
+ comparison.append(f" │ RAM REDUCTION: {saved_pct:.1f}% │")
183
+ comparison.append(f" │ {baseline_mb:.2f} MB → {condensed_mb:.2f} MB │")
184
+ comparison.append(f" │ Same output. Same quality. │")
185
+ comparison.append(f" └─────────────────────────────────────┘\n")
186
+
187
+ comparison.append(f" Prediction accuracy: {pred_result['accuracy']}%")
188
+ comparison.append(f" Access events: {len(log)}")
189
+
190
+ # Build analysis output
191
+ analysis = []
192
+ analysis.append("=" * 55)
193
+ analysis.append(" LAYER ACTIVATION MAP")
194
+ analysis.append("=" * 55)
195
+ analysis.append(f"\n {'Layer':<35} {'Fwd':>4} {'Activation':>10} {'MB':>6} {'Tier':>5}")
196
+ analysis.append(f" {'-'*35} {'-'*4} {'-'*10} {'-'*6} {'-'*5}")
197
+
198
+ for layer in activation_map[:40]:
199
+ name = layer['name']
200
+ if len(name) > 35:
201
+ name = "..." + name[-32:]
202
+ attn = " [A]" if layer['is_attention'] else ""
203
+ analysis.append(f" {name:<35} {layer['forward_count']:>4} "
204
+ f"{layer['avg_activation']:>10.3f} "
205
+ f"{layer['param_mb']:>6.3f} "
206
+ f"{layer['temperature']:>5}{attn}")
207
+
208
+ if len(activation_map) > 40:
209
+ analysis.append(f" ... and {len(activation_map) - 40} more layers")
210
+
211
+ return "\n".join(comparison), "\n".join(analysis)
212
+
213
+
214
+ # --- Also keep the synthetic demo for comparison ---
215
+
216
+ def run_synthetic_demo(num_layers, num_hot, num_iterations):
217
+ """Run the PoC pipeline on synthetic data (no GPU needed)."""
218
+ from condenser import Condenser
219
 
220
  num_layers = int(num_layers)
221
  num_hot = int(min(num_hot, num_layers))
222
  num_iterations = int(num_iterations)
 
223
 
224
  output = []
225
  output.append("=" * 55)
226
+ output.append(" CONDENSATE — Synthetic Pipeline Demo")
227
  output.append("=" * 55)
228
 
 
229
  state = {}
230
  for i in range(num_layers):
 
231
  arr = np.zeros((128, 128), dtype=np.float32)
232
  mask = np.random.random((128, 128)) < 0.2
233
  arr[mask] = np.random.randn(mask.sum()).astype(np.float32)
 
236
  total_mb = sum(v.nbytes for v in state.values()) / 1024 / 1024
237
  hot_set = set(range(num_hot))
238
 
239
+ output.append(f"\n {num_layers} regions x 64KB = {total_mb:.1f} MB total")
240
+ output.append(f" {num_hot} hot / {num_layers - num_hot} cold")
 
 
 
 
 
 
 
 
241
 
242
+ # Membrane + Graph + Predictor
243
  Membrane.clear()
244
  wrapped = Membrane.wrap(state.copy(), "model")
245
+ for _ in range(num_iterations):
 
 
246
  for i in range(num_layers):
247
  if i in hot_set:
248
  _ = wrapped[f"layer_{i}"]
 
250
  _ = wrapped[f"layer_{i}"]
251
  time.sleep(0.001)
252
 
 
253
  log = Membrane.get_log()
 
 
 
 
 
 
 
 
 
254
  graph = GraphBuilder(causal_window_ns=5_000_000)
255
  graph.build(log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  predictor = Predictor()
257
  predictor.learn(graph)
258
+ score = predictor.score(log)
259
 
260
+ output.append(f"\n Prediction accuracy: {score['accuracy']}%")
261
+ output.append(f" Clusters: {len(graph.clusters)}")
262
+ output.append(f" Causal chains: {len(graph.get_causal_chains())}")
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
+ # Condenser
265
  def workload_fn(w):
266
  for i in range(num_layers):
267
  if i in hot_set:
 
270
  _ = w[f"layer_{i}"]
271
  time.sleep(0.001)
272
 
273
+ condenser = Condenser(demotion_idle_ms=10, warmup_iters=8)
274
  bench = condenser.run_benchmark(state, workload_fn,
275
  iterations=num_iterations, name="model")
276
 
277
+ output.append(f"\n Baseline: {bench['baseline_ram_mb']:.2f} MB")
278
+ output.append(f" Condensed: {bench['avg_condensed_ram_mb']:.2f} MB")
279
+ output.append(f" *** SAVED: {bench['saved_mb']:.2f} MB ({bench['saved_pct']:.1f}%) ***")
280
 
281
  if bench.get('promotion_log'):
282
  last = bench['promotion_log'][-1]
283
+ output.append(f" Final: HOT={last['hot']} WARM={last['warm']} COLD={last['cold']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
  condenser.cleanup()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  output.append(f"\n{'=' * 55}")
 
287
  return "\n".join(output)
288
 
289
 
290
  # --- Gradio UI ---
291
 
292
+ with gr.Blocks(title="Condensate — Do More With Less") as demo:
 
 
293
 
294
  gr.Markdown("""
295
  # Condensate
 
298
  Condensate uses a neural substrate with causal spike propagation
299
  to learn memory access patterns and dynamically condense RAM usage.
300
 
301
+ **Live Model tab:** Runs a real transformer (distilgpt2) on ZeroGPU
302
+ and shows which layers are HOT vs COLD for your input.
 
303
 
304
+ **Synthetic tab:** Runs the full 4-layer pipeline on configurable
305
+ simulated workloads (no GPU needed).
306
  """)
307
 
308
+ with gr.Tabs():
309
+ with gr.TabItem("Live Model (ZeroGPU)"):
310
+ with gr.Row():
311
+ with gr.Column():
312
+ status = gr.Textbox(label="Status", interactive=False, lines=3)
313
+ load_btn = gr.Button("1. Load Model", variant="primary")
314
+ train_btn = gr.Button("2. Train Predictor", variant="primary")
315
+
316
+ with gr.Row():
317
+ with gr.Column():
318
+ prompt_input = gr.Textbox(
319
+ label="Prompt",
320
+ value="The future of artificial intelligence is",
321
+ lines=2,
322
+ )
323
+ max_tokens = gr.Slider(
324
+ minimum=10, maximum=100, value=30, step=5,
325
+ label="Max tokens"
326
+ )
327
+ run_btn = gr.Button("3. Run & Analyze", variant="primary")
328
+
329
+ with gr.Row():
330
+ with gr.Column():
331
+ comparison_output = gr.Textbox(
332
+ label="Baseline vs Condensate",
333
+ lines=25, interactive=False,
334
+ )
335
+ with gr.Column():
336
+ analysis_output = gr.Textbox(
337
+ label="Layer Activation Map",
338
+ lines=25, interactive=False,
339
+ )
340
+
341
+ load_btn.click(fn=load_model, outputs=status)
342
+ train_btn.click(fn=train_predictor, outputs=status)
343
+ run_btn.click(
344
+ fn=run_analysis,
345
+ inputs=[prompt_input, max_tokens],
346
+ outputs=[comparison_output, analysis_output],
347
+ )
348
+
349
+ with gr.TabItem("Synthetic Workload"):
350
+ with gr.Row():
351
+ with gr.Column():
352
+ syn_layers = gr.Slider(minimum=4, maximum=128, value=32, step=4,
353
+ label="Total memory regions")
354
+ syn_hot = gr.Slider(minimum=1, maximum=64, value=6, step=1,
355
+ label="Hot regions")
356
+ syn_iters = gr.Slider(minimum=10, maximum=50, value=20, step=5,
357
+ label="Iterations")
358
+ syn_btn = gr.Button("Run Pipeline", variant="primary")
359
+ with gr.Column():
360
+ syn_output = gr.Textbox(
361
+ label="Results", lines=25,
362
+ interactive=False, show_copy_button=True,
363
+ )
364
+
365
+ syn_btn.click(
366
+ fn=run_synthetic_demo,
367
+ inputs=[syn_layers, syn_hot, syn_iters],
368
+ outputs=syn_output,
369
+ )
370
 
371
  if __name__ == "__main__":
372
  demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt CHANGED
@@ -1,2 +1,5 @@
 
 
1
  numpy
2
  lz4
 
 
1
+ torch
2
+ transformers
3
  numpy
4
  lz4
5
+ spaces