LJTSG commited on
Commit
54064eb
·
verified ·
1 Parent(s): 5a0cb81

Upload build_substrate.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. build_substrate.py +91 -0
build_substrate.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Extract Grandma's facts from entity/facts.json, embed with MiniLM, save as substrate JSON.
3
+ """
4
+ import json, sys
5
+ from sentence_transformers import SentenceTransformer
6
+
7
+ print("Loading MiniLM-L6-v2...")
8
+ model = SentenceTransformer('all-MiniLM-L6-v2')
9
+
10
+ print("Loading facts...")
11
+ with open(r'C:\Users\Forgemind\Desktop\Grandmas Hearth\entity\facts.json', 'r', encoding='utf-8') as f:
12
+ data = json.load(f)
13
+
14
+ facts = []
15
+
16
+ # Identity
17
+ ident = data.get('identity', {})
18
+ for k, v in ident.items():
19
+ if k.startswith('_'): continue
20
+ facts.append(f"Grandma's {k}: {v}")
21
+
22
+ # Spine
23
+ for s in data.get('spine', []):
24
+ facts.append(f"Spine rule — {s['rule']}: {s['meaning']}")
25
+
26
+ # Voice
27
+ voice = data.get('voice', {})
28
+ if voice.get('register'):
29
+ facts.append(f"Grandma's voice register: {voice['register']}")
30
+ if voice.get('endearments'):
31
+ facts.append(f"Grandma's endearments: {', '.join(voice['endearments'])}")
32
+ if voice.get('sense_palette'):
33
+ facts.append(f"Grandma's sensory palette: {', '.join(voice['sense_palette'])}")
34
+ if voice.get('opener_pattern'):
35
+ facts.append(f"Grandma's opener: {voice['opener_pattern']}")
36
+
37
+ # World
38
+ world = data.get('world', {})
39
+ if world.get('house'):
40
+ facts.append(f"Grandma's home: {world['house']}")
41
+ if world.get('stations'):
42
+ facts.append(f"The Hearthfold has these stations: {', '.join(world['stations'])}")
43
+ if world.get('cellar'):
44
+ facts.append(f"The cellar: shelves of {world['cellar'].get('shelves', '')}. The door {world['cellar'].get('door', '')}")
45
+ if world.get('candle'):
46
+ facts.append(f"The candle: {world['candle']}")
47
+ if world.get('mirror'):
48
+ facts.append(f"The hallway mirror: {world['mirror']}")
49
+ if world.get('blanket'):
50
+ facts.append(f"The blanket: {world['blanket']}")
51
+
52
+ # Entities
53
+ for name, ent in data.get('entities', {}).items():
54
+ rel = ent.get('relation', '')
55
+ facts.append(f"{name}: {rel}")
56
+ for note in ent.get('notes', []):
57
+ if note.strip():
58
+ facts.append(f"About {name}: {note}")
59
+
60
+ # Remembered
61
+ for mem in data.get('remembered', []):
62
+ if isinstance(mem, str):
63
+ facts.append(mem)
64
+ elif isinstance(mem, dict):
65
+ facts.append(mem.get('value', mem.get('text', str(mem))))
66
+
67
+ print(f"Extracted {len(facts)} facts")
68
+
69
+ # Embed
70
+ print("Embedding...")
71
+ embeddings = model.encode(facts, show_progress_bar=True, normalize_embeddings=True)
72
+
73
+ # Build substrate JSON
74
+ substrate = {
75
+ "model": "Xenova/all-MiniLM-L6-v2",
76
+ "dim": 384,
77
+ "facts": []
78
+ }
79
+ for i, (fact, vec) in enumerate(zip(facts, embeddings)):
80
+ substrate["facts"].append({
81
+ "key": fact[:80],
82
+ "value": fact,
83
+ "vec": [round(float(v), 6) for v in vec]
84
+ })
85
+
86
+ out_path = r'C:\Users\Forgemind\Desktop\Grandmas Hearth\mamba_webgpu\grandma-substrate.json'
87
+ with open(out_path, 'w', encoding='utf-8') as f:
88
+ json.dump(substrate, f)
89
+
90
+ print(f"Saved {len(substrate['facts'])} facts to {out_path}")
91
+ print(f"File size: {len(json.dumps(substrate)) / 1024:.0f} KB")