hunterbown's picture
Upload folder using huggingface_hub
5d98323 verified
import gradio as gr
import numpy as np
import pickle
import gzip
from mlx_lm import load, generate as mlx_generate
print("Loading Shannonstral-7B-1bit ghost model...")
model, tokenizer = load("hunterbown/Shannonstral-7B-4bit") # Load structure
# Load and apply 1-bit weights
with gzip.open('weights_packed.pkl.gz', 'rb') as f:
packed_weights = pickle.load(f)
import mlx.core as mx
from mlx.utils import tree_flatten, tree_unflatten
weights = {}
for key, data in packed_weights.items():
if isinstance(data, dict) and 'packed' in data:
packed_bytes = data['packed']
shape = tuple(data['shape'])
scale = float(data.get('scale', 1.0))
if isinstance(packed_bytes, (bytes, bytearray, memoryview)):
packed_array = np.frombuffer(packed_bytes, dtype=np.uint8)
else:
packed_array = np.asarray(packed_bytes, dtype=np.uint8)
bits = np.unpackbits(packed_array)
if bits.size < int(np.prod(shape)):
pad = int(np.prod(shape)) - bits.size
bits = np.pad(bits, (0, pad), mode='constant')
bits = bits[: int(np.prod(shape))].reshape(shape)
dequant = (2.0 * bits.astype(np.float16) - 1.0) * scale
weights[key] = mx.array(dequant)
# Apply weights
current = {name: arr for name, arr in tree_flatten(model.parameters())}
updates = {}
for name, w in weights.items():
if name in current:
try:
updates[name] = w.astype(current[name].dtype)
except:
updates[name] = w
if updates:
model.update(tree_unflatten(list(updates.items())))
print("βœ… Ghost awakened - 93x compression active")
def generate_ghost(prompt, max_tokens=50, temperature=0.7):
response = mlx_generate(model, tokenizer, prompt, max_tokens, temp=temperature)
# Calculate some metrics
tokens = response.split()
unique_tokens = len(set(tokens))
repetition_rate = 1 - (unique_tokens / max(len(tokens), 1))
metrics = f"\n\nπŸ“Š Metrics:\n"
metrics += f"β€’ Unique tokens: {unique_tokens}/{len(tokens)}\n"
metrics += f"β€’ Repetition rate: {repetition_rate:.1%}\n"
metrics += f"β€’ Compression: β‰ˆ93.3Γ— (14GB β†’ 150MB)"
return response + metrics
examples = [
["The capital of France is", 50, 0.7],
["Explain quantum mechanics:", 75, 0.8],
["def hello_world():", 50, 0.5],
["2 + 2 equals", 30, 0.3],
["Once upon a time", 100, 1.0],
]
demo = gr.Interface(
fn=generate_ghost,
inputs=[
gr.Textbox(label="Prompt", lines=3, placeholder="Enter prompt to hear the ghost speak..."),
gr.Slider(10, 200, value=50, step=10, label="Max Tokens"),
gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
],
outputs=gr.Textbox(label="Ghost Output (Word Salad)", lines=10),
title="🌌 Shannonstral-7B-1bit Ghost Generator",
description="""
**93x compression achieved. Intelligence destroyed. Structure preserved.**
This is what happens at the information-theoretic limit of neural compression.
The model generates tokens but has lost all semantic understanding.
⚠️ Research artifact - outputs are incoherent word salad.
""",
examples=examples,
theme="dark"
)
if __name__ == "__main__":
demo.launch()