mamba-webgpu / build_substrate.py
LJTSG's picture
Upload build_substrate.py with huggingface_hub
54064eb verified
"""
Extract Grandma's facts from entity/facts.json, embed with MiniLM, save as substrate JSON.
"""
import json, sys
from sentence_transformers import SentenceTransformer
print("Loading MiniLM-L6-v2...")
model = SentenceTransformer('all-MiniLM-L6-v2')
print("Loading facts...")
with open(r'C:\Users\Forgemind\Desktop\Grandmas Hearth\entity\facts.json', 'r', encoding='utf-8') as f:
data = json.load(f)
facts = []
# Identity
ident = data.get('identity', {})
for k, v in ident.items():
if k.startswith('_'): continue
facts.append(f"Grandma's {k}: {v}")
# Spine
for s in data.get('spine', []):
facts.append(f"Spine rule — {s['rule']}: {s['meaning']}")
# Voice
voice = data.get('voice', {})
if voice.get('register'):
facts.append(f"Grandma's voice register: {voice['register']}")
if voice.get('endearments'):
facts.append(f"Grandma's endearments: {', '.join(voice['endearments'])}")
if voice.get('sense_palette'):
facts.append(f"Grandma's sensory palette: {', '.join(voice['sense_palette'])}")
if voice.get('opener_pattern'):
facts.append(f"Grandma's opener: {voice['opener_pattern']}")
# World
world = data.get('world', {})
if world.get('house'):
facts.append(f"Grandma's home: {world['house']}")
if world.get('stations'):
facts.append(f"The Hearthfold has these stations: {', '.join(world['stations'])}")
if world.get('cellar'):
facts.append(f"The cellar: shelves of {world['cellar'].get('shelves', '')}. The door {world['cellar'].get('door', '')}")
if world.get('candle'):
facts.append(f"The candle: {world['candle']}")
if world.get('mirror'):
facts.append(f"The hallway mirror: {world['mirror']}")
if world.get('blanket'):
facts.append(f"The blanket: {world['blanket']}")
# Entities
for name, ent in data.get('entities', {}).items():
rel = ent.get('relation', '')
facts.append(f"{name}: {rel}")
for note in ent.get('notes', []):
if note.strip():
facts.append(f"About {name}: {note}")
# Remembered
for mem in data.get('remembered', []):
if isinstance(mem, str):
facts.append(mem)
elif isinstance(mem, dict):
facts.append(mem.get('value', mem.get('text', str(mem))))
print(f"Extracted {len(facts)} facts")
# Embed
print("Embedding...")
embeddings = model.encode(facts, show_progress_bar=True, normalize_embeddings=True)
# Build substrate JSON
substrate = {
"model": "Xenova/all-MiniLM-L6-v2",
"dim": 384,
"facts": []
}
for i, (fact, vec) in enumerate(zip(facts, embeddings)):
substrate["facts"].append({
"key": fact[:80],
"value": fact,
"vec": [round(float(v), 6) for v in vec]
})
out_path = r'C:\Users\Forgemind\Desktop\Grandmas Hearth\mamba_webgpu\grandma-substrate.json'
with open(out_path, 'w', encoding='utf-8') as f:
json.dump(substrate, f)
print(f"Saved {len(substrate['facts'])} facts to {out_path}")
print(f"File size: {len(json.dumps(substrate)) / 1024:.0f} KB")