aedmark commited on
Commit
0495c11
·
verified ·
1 Parent(s): bac61bd

Delete bone_lexicon.py

Browse files
Files changed (1) hide show
  1. bone_lexicon.py +0 -565
bone_lexicon.py DELETED
@@ -1,565 +0,0 @@
1
- import json
2
- import random
3
- import re
4
- import string
5
- import time
6
- import unicodedata
7
- import os
8
- from typing import Tuple, Dict, Set, Optional, List
9
- from bone_core import Prisma, LoreManifest
10
- from functools import lru_cache
11
-
12
-
13
- class LexiconStore:
14
- HIVE_FILENAME = "cortex_hive.json"
15
- _PUNCTUATION = string.punctuation.replace("_", "")
16
- _TRANSLATOR = str.maketrans(_PUNCTUATION, " " * len(_PUNCTUATION))
17
-
18
- def __init__(self):
19
- self.categories = {
20
- "heavy",
21
- "kinetic",
22
- "explosive",
23
- "constructive",
24
- "abstract",
25
- "photo",
26
- "aerobic",
27
- "thermal",
28
- "cryo",
29
- "suburban",
30
- "play",
31
- "sacred",
32
- "buffer",
33
- "antigen",
34
- "diversion",
35
- "meat",
36
- "gradient_stop",
37
- "liminal",
38
- "void",
39
- "bureau_buzzwords",
40
- "crisis_term",
41
- "harvest",
42
- "pareidolia",
43
- "passive_watch",
44
- "repair_trigger",
45
- "refusal_guru",
46
- "cursed",
47
- "sentiment_pos",
48
- "sentiment_neg",
49
- "sentiment_negators",
50
- }
51
- self.VOCAB: Dict[str, Set[str]] = {k: set() for k in self.categories}
52
- self.LEARNED_VOCAB: Dict[str, Dict[str, int]] = {}
53
- self.USER_FLAGGED_BIAS = set()
54
- self.ANTIGEN_REPLACEMENTS = {}
55
- self.SOLVENTS = set()
56
- self.REVERSE_INDEX: Dict[str, Set[str]] = {}
57
- self.hive_loaded = False
58
-
59
- def load_vocabulary(self):
60
- data = LoreManifest.get_instance().get("LEXICON") or {}
61
- self.SOLVENTS = set(data.get("solvents", []))
62
- self.ANTIGEN_REPLACEMENTS = data.get("antigen_replacements", {})
63
- for cat, words in data.items():
64
- if cat in self.categories:
65
- word_set = set(words)
66
- self.VOCAB[cat] = word_set
67
- if not cat.startswith("sentiment"):
68
- for w in word_set:
69
- self._index_word(w, cat)
70
- self._load_hive()
71
-
72
- def _index_word(self, word: str, category: str):
73
- w = word.lower()
74
- if w not in self.REVERSE_INDEX:
75
- self.REVERSE_INDEX[w] = set()
76
- self.REVERSE_INDEX[w].add(category)
77
-
78
- def _load_hive(self):
79
- if not os.path.exists(self.HIVE_FILENAME):
80
- return
81
- try:
82
- with open(self.HIVE_FILENAME, "r", encoding="utf-8") as f:
83
- hive_data = json.load(f)
84
- count = 0
85
- for cat, entries in hive_data.items():
86
- if cat not in self.LEARNED_VOCAB:
87
- self.LEARNED_VOCAB[cat] = {}
88
- for word, tick in entries.items():
89
- self.LEARNED_VOCAB[cat][word] = tick
90
- self._index_word(word, cat)
91
- count += 1
92
- self.hive_loaded = True
93
- print(
94
- f"{Prisma.CYN}[HIVE]: The Library is open. {count} memories restored.{Prisma.RST}"
95
- )
96
- except (IOError, json.JSONDecodeError) as e:
97
- print(
98
- f"{Prisma.RED}[HIVE]: Memory corruption detected. Starting fresh. ({e}){Prisma.RST}"
99
- )
100
-
101
- def save_hive(self):
102
- try:
103
- with open(self.HIVE_FILENAME, "w", encoding="utf-8") as f:
104
- json.dump(self.LEARNED_VOCAB, f, indent=2)
105
- except IOError:
106
- pass
107
-
108
- def get_raw(self, category):
109
- base = self.VOCAB.get(category, set())
110
- learned = set(self.LEARNED_VOCAB.get(category, {}).keys())
111
- combined = base | learned
112
- if category == "suburban":
113
- return combined - self.USER_FLAGGED_BIAS
114
- return combined
115
-
116
- def get_categories_for_word(self, word: str) -> Set[str]:
117
- w = word.lower()
118
- return self.REVERSE_INDEX.get(w, set()).copy()
119
-
120
- def teach(self, word, category, tick):
121
- w = word.lower()
122
- if category not in self.LEARNED_VOCAB:
123
- self.LEARNED_VOCAB[category] = {}
124
- if w in self.LEARNED_VOCAB[category]:
125
- return False
126
- self.LEARNED_VOCAB[category][w] = tick
127
- self._index_word(w, category)
128
- return True
129
-
130
- def harvest(self, text: str) -> Dict[str, List[str]]:
131
- results = {}
132
- if not text:
133
- return results
134
- clean_text = text.translate(self._TRANSLATOR).lower()
135
- words = clean_text.split()
136
- for w in words:
137
- cats = self.get_categories_for_word(w)
138
- for cat in cats:
139
- if cat not in results:
140
- results[cat] = []
141
- results[cat].append(w)
142
- return results
143
-
144
-
145
- class LinguisticAnalyzer:
146
- def __init__(self, store_ref):
147
- self.store = store_ref
148
- self._TRANSLATOR = getattr(self.store, "_TRANSLATOR", None)
149
- self.PHONETICS = {
150
- "PLOSIVE": set("bdgkpt"),
151
- "FRICATIVE": set("fthszsh"),
152
- "LIQUID": set("lr"),
153
- "NASAL": set("mn"),
154
- "VOWELS": set("aeiouy"),
155
- }
156
- self.ROOTS = {
157
- "HEAVY": (
158
- "lith",
159
- "ferr",
160
- "petr",
161
- "dens",
162
- "grav",
163
- "struct",
164
- "base",
165
- "fund",
166
- "mound",
167
- ),
168
- "KINETIC": ("mot", "mov", "ject", "tract", "pel", "crat", "dynam", "flux"),
169
- "ABSTRACT": (
170
- "tion",
171
- "ism",
172
- "ence",
173
- "ance",
174
- "ity",
175
- "ology",
176
- "ness",
177
- "ment",
178
- "idea",
179
- ),
180
- "SUBURBAN": ("norm", "comm", "stand", "pol", "reg", "mod"),
181
- "VITAL": (
182
- "viv",
183
- "vita",
184
- "spir",
185
- "anim",
186
- "bio",
187
- "luc",
188
- "lum",
189
- "phot",
190
- "phon",
191
- "surg",
192
- "bloom",
193
- ),
194
- }
195
- self.thresholds = {
196
- "heavy_density": 0.55,
197
- "play_vitality": 0.6,
198
- "kinetic_flow": 0.6,
199
- }
200
- self.biases = {"heavy": 1.0, "play": 1.0, "kinetic": 1.0}
201
-
202
- def measure_viscosity(self, word: str) -> float:
203
- if not word:
204
- return 0.0
205
- w = word.lower()
206
- if w in self.store.SOLVENTS:
207
- return 0.1
208
- length_score = min(1.0, len(w) / 12.0)
209
- stops, flow = 0, 0
210
- for c in w:
211
- if c in self.PHONETICS["PLOSIVE"]:
212
- stops += 1
213
- elif c in self.PHONETICS["LIQUID"] or c in self.PHONETICS["VOWELS"]:
214
- flow += 1
215
- stop_score = min(1.0, stops / 3.0)
216
- flow_score = min(1.0, flow / 4.0)
217
- substance_score = max(stop_score, flow_score)
218
- return (length_score * 0.5) + (substance_score * 0.5)
219
-
220
- @staticmethod
221
- def get_turbulence(words: List[str]) -> float:
222
- if len(words) < 2:
223
- return 0.0
224
- lengths = [len(w) for w in words]
225
- avg_len = sum(lengths) / len(lengths)
226
- variance = sum((l - avg_len) ** 2 for l in lengths) / len(lengths)
227
- turbulence = min(1.0, variance / 10.0)
228
- return round(turbulence, 2)
229
-
230
- def vectorize(self, text: str) -> Dict[str, float]:
231
- words = self.sanitize(text)
232
- if not words:
233
- return {}
234
- DIMENSION_MAP = {
235
- "kinetic": "VEL",
236
- "explosive": "CHI",
237
- "heavy": "STR",
238
- "constructive": "STR",
239
- "antigen": "CHI",
240
- "toxin": "CHI",
241
- "thermal": "PHI",
242
- "photo": "PHI",
243
- "abstract": "PSI",
244
- "sacred": "PSI",
245
- "suburban": "BET",
246
- "buffer": "BET",
247
- "play": "DEL",
248
- "aerobic": "DEL",
249
- "harvest": "STR",
250
- "meat": "CHI",
251
- "void": "PSI",
252
- "liminal": "LAMBDA",
253
- "pareidolia": "PSI",
254
- "crisis_term": "CHI",
255
- "cursed": "CHI",
256
- }
257
- dims = {
258
- "VEL": 0.0,
259
- "STR": 0.0,
260
- "CHI": 0.0,
261
- "PHI": 0.0,
262
- "PSI": 0.0,
263
- "BET": 0.0,
264
- "DEL": 0.0,
265
- "LAMBDA": 0.0,
266
- "ENT": 0.0,
267
- }
268
- for w in words:
269
- cats = self.store.get_categories_for_word(w)
270
- for cat in cats:
271
- if cat in DIMENSION_MAP:
272
- target_dim = DIMENSION_MAP[cat]
273
- dims[target_dim] += 1.0
274
- total = max(1.0, sum(dims.values()))
275
- result = {k: round(v / total, 3) for k, v in dims.items()}
276
- result["ENT"] = result[
277
- "CHI"
278
- ]
279
- return result
280
-
281
- @staticmethod
282
- def calculate_flux(vec_a: Dict[str, float], vec_b: Dict[str, float]) -> float:
283
- if not vec_a or not vec_b:
284
- return 0.0
285
- keys = set(vec_a.keys()) | set(vec_b.keys())
286
- diff_sq = sum((vec_a.get(k, 0.0) - vec_b.get(k, 0.0)) ** 2 for k in keys)
287
- return round(diff_sq**0.5, 3)
288
-
289
- def contextualize(self, word: str, field_vector: Dict[str, float]) -> str:
290
- base_cat, _score = self.classify_word(word)
291
- if not field_vector or not base_cat:
292
- return base_cat
293
- dominant_field = max(field_vector, key=field_vector.get)
294
- if field_vector.get(dominant_field, 0.0) > 0.8:
295
- if dominant_field == "PSI" and base_cat == "heavy":
296
- return "abstract"
297
- return base_cat
298
-
299
- def sanitize(self, text: str) -> List[str]:
300
- if not text:
301
- return []
302
- try:
303
- normalized = (
304
- unicodedata.normalize("NFKD", text)
305
- .encode("ASCII", "ignore")
306
- .decode("utf-8")
307
- )
308
- except (TypeError, AttributeError):
309
- normalized = text
310
- xlate = self._TRANSLATOR if self._TRANSLATOR else str.maketrans("", "")
311
- cleaned_text = normalized.translate(xlate).lower()
312
- words = cleaned_text.split()
313
- bias_set = getattr(self.store, "USER_FLAGGED_BIAS", set())
314
- return [w for w in words if w.strip() and w not in bias_set]
315
-
316
- def classify_word(self, word: str) -> Tuple[Optional[str], float]:
317
- w = word.lower()
318
- if len(w) < 3:
319
- return None, 0.0
320
- for category, roots in self.ROOTS.items():
321
- for root in roots:
322
- if root in w:
323
- return category.lower(), 0.8
324
- counts = {k: 0 for k in self.PHONETICS}
325
- char_to_sound = {char: sound_type for sound_type, chars in self.PHONETICS.items() for char in chars}
326
- for char in w:
327
- if sound_type := char_to_sound.get(char):
328
- counts[sound_type] += 1
329
- density_score = (counts["PLOSIVE"] * 1.5) + (counts["NASAL"] * 0.8)
330
- flow_score = counts["LIQUID"] + counts["FRICATIVE"]
331
- vitality_score = (counts["VOWELS"] * 1.2) + (flow_score * 0.8)
332
- length_mod = 1.0 if len(w) > 5 else 1.5
333
- final_density = (density_score / len(w)) * length_mod
334
- final_vitality = (vitality_score / len(w)) * length_mod
335
- heavy_thresh = self.thresholds["heavy_density"] * self.biases["heavy"]
336
- play_thresh = self.thresholds["play_vitality"] * self.biases["play"]
337
- kinetic_thresh = self.thresholds["kinetic_flow"] * self.biases["kinetic"]
338
- if final_density > heavy_thresh:
339
- return "heavy", round(final_density, 2)
340
- if final_vitality > play_thresh:
341
- return "play", round(final_vitality, 2)
342
- if (flow_score / len(w)) > kinetic_thresh:
343
- return "kinetic", 0.5
344
- return None, 0.0
345
-
346
- def measure_valence(self, words: List[str]) -> float:
347
- if not words:
348
- return 0.0
349
- pos_set = self.store.get_raw("sentiment_pos")
350
- neg_set = self.store.get_raw("sentiment_neg")
351
- negators = self.store.get_raw("sentiment_negators")
352
- score = 0.0
353
- for i, word in enumerate(words):
354
- is_negated = False
355
- if i > 0 and words[i - 1] in negators:
356
- is_negated = True
357
- val = 0.0
358
- if word in pos_set:
359
- val = 1.0
360
- elif word in neg_set:
361
- val = -1.0
362
- if is_negated:
363
- val *= -0.5
364
- score += val
365
- normalized = score / max(1.0, len(words) * 0.5)
366
- return max(-1.0, min(1.0, normalized))
367
-
368
- def tune_sensitivity(self, voltage: float, drag: float):
369
- if voltage > 15.0:
370
- self.biases["kinetic"] = 0.8
371
- elif voltage < 5.0:
372
- self.biases["kinetic"] = 1.2
373
- else:
374
- self.biases["kinetic"] = 1.0
375
- if drag > 5.0:
376
- self.biases["heavy"] = 0.8
377
- else:
378
- self.biases["heavy"] = 1.0
379
-
380
-
381
- class SemanticField:
382
- def __init__(self, analyzer_ref):
383
- self.analyzer = analyzer_ref
384
- self.current_vector = {}
385
- self.momentum = 0.0
386
- self.history = []
387
-
388
- def update(self, text: str) -> Dict[str, float]:
389
- new_vector = self.analyzer.vectorize(text)
390
- if not new_vector:
391
- return self.current_vector
392
- flux = self.analyzer.calculate_flux(self.current_vector, new_vector)
393
- self.momentum = (self.momentum * 0.7) + (flux * 0.3)
394
- blended = {k: round(v * 0.6, 3) for k, v in self.current_vector.items()}
395
- for k, v in new_vector.items():
396
- blended[k] = round(blended.get(k, 0.0) + (v * 0.4), 3)
397
- self.current_vector = blended
398
- self.history.append((time.time(), flux))
399
- if len(self.history) > 10:
400
- self.history.pop(0)
401
- return self.current_vector
402
-
403
- def get_atmosphere(self) -> str:
404
- if not self.current_vector:
405
- return "VOID"
406
- dom = max(self.current_vector, key=self.current_vector.get)
407
- if self.momentum > 0.5:
408
- return f"Volatile {dom.upper()} Storm"
409
- return f"Stable {dom.upper()} Atmosphere"
410
-
411
-
412
- class LexiconService:
413
- _INITIALIZED = False
414
- _STORE = None
415
- _ANALYZER = None
416
- ANTIGEN_REGEX = None
417
- SOLVENTS = set()
418
-
419
- @classmethod
420
- def get_store(cls):
421
- if not cls._INITIALIZED:
422
- cls.initialize()
423
- return cls._STORE
424
-
425
- @classmethod
426
- def initialize(cls):
427
- if cls._INITIALIZED:
428
- return
429
- cls._INITIALIZED = True
430
- try:
431
- cls._STORE = LexiconStore()
432
- cls._STORE.load_vocabulary()
433
- cls._ANALYZER = LinguisticAnalyzer(cls._STORE)
434
- cls.compile_antigens()
435
- cls.SOLVENTS = cls._STORE.SOLVENTS
436
- total_words = sum(len(s) for s in cls._STORE.VOCAB.values())
437
- print(
438
- f"{Prisma.GRN}[LEXICON]: Systems Nominal. {total_words} words loaded.{Prisma.RST}"
439
- )
440
-
441
- except Exception as e:
442
- cls._INITIALIZED = False
443
- print(f"{Prisma.RED}[LEXICON]: Initialization Failed: {e}{Prisma.RST}")
444
- raise e
445
-
446
- @classmethod
447
- def get_valence(cls, words: List[str]) -> float:
448
- return cls._ANALYZER.measure_valence(words)
449
-
450
- @classmethod
451
- def get_categories_for_word(cls, word: str) -> Set[str]:
452
- if not cls._INITIALIZED:
453
- cls.initialize() # [SCHUR] Safety net
454
- return cls._STORE.get_categories_for_word(word)
455
-
456
- @classmethod
457
- def get_current_category(cls, word: str) -> Optional[str]:
458
- if not cls._INITIALIZED:
459
- cls.initialize()
460
- categories = cls._STORE.get_categories_for_word(word)
461
- if categories:
462
- return next(iter(categories))
463
- return None
464
-
465
- @classmethod
466
- def measure_viscosity(cls, word: str) -> float:
467
- return cls._ANALYZER.measure_viscosity(word)
468
-
469
- @classmethod
470
- def get_turbulence(cls, words: List[str]) -> float:
471
- return cls._ANALYZER.get_turbulence(words)
472
-
473
- @classmethod
474
- def vectorize(cls, text: str) -> Dict[str, float]:
475
- if not cls._INITIALIZED:
476
- cls.initialize()
477
- return cls._ANALYZER.vectorize(text)
478
-
479
- @classmethod
480
- def compile_antigens(cls):
481
- if not cls._INITIALIZED:
482
- cls.initialize()
483
- return
484
- replacements = cls._STORE.ANTIGEN_REPLACEMENTS
485
- if not replacements:
486
- cls.ANTIGEN_REGEX = None
487
- return
488
- patterns = sorted(replacements.keys(), key=len, reverse=True)
489
- escaped = [re.escape(str(p)) for p in patterns]
490
- cls.ANTIGEN_REGEX = re.compile("|".join(escaped), re.IGNORECASE)
491
-
492
- @classmethod
493
- def sanitize(cls, text):
494
- return cls._ANALYZER.sanitize(text)
495
-
496
- @classmethod
497
- def classify(cls, word):
498
- PRIORITY_ORDER = [
499
- "heavy",
500
- "kinetic",
501
- "explosive",
502
- "thermal",
503
- "cryo",
504
- "sacred",
505
- "antigen",
506
- "meat",
507
- "void",
508
- "liminal",
509
- "pareidolia",
510
- "play",
511
- "suburban",
512
- "abstract",
513
- ]
514
- known_cats = cls._STORE.get_categories_for_word(word)
515
- if known_cats:
516
- for p_cat in PRIORITY_ORDER:
517
- if p_cat in known_cats:
518
- return p_cat, 1.0
519
- return next(iter(known_cats)), 1.0
520
- return cls._ANALYZER.classify_word(word)
521
-
522
- @classmethod
523
- def clean(cls, text):
524
- return cls.sanitize(text)
525
-
526
- @classmethod
527
- def taste(cls, word):
528
- return cls.classify(word)
529
-
530
- @classmethod
531
- def create_field(cls):
532
- return SemanticField(cls._ANALYZER)
533
-
534
- @classmethod
535
- def get(cls, category: str) -> Set[str]:
536
- return cls._STORE.get_raw(category)
537
-
538
- @classmethod
539
- def get_random(cls, category: str) -> str:
540
- words = list(cls.get(category))
541
- return random.choice(words) if words else "void"
542
-
543
- @classmethod
544
- def teach(cls, word: str, category: str, tick: int = 0):
545
- cls._STORE.teach(word, category, tick)
546
-
547
- @classmethod
548
- def save(cls):
549
- if cls._INITIALIZED and cls._STORE:
550
- cls._STORE.save_hive()
551
- print(f"{Prisma.GRN}[LEXICON]: Hive saved to disk.{Prisma.RST}")
552
-
553
- @classmethod
554
- def harvest(cls, text: str) -> Dict[str, List[str]]:
555
- return cls._STORE.harvest(text)
556
-
557
- @classmethod
558
- def learn_antigen(cls, word: str, replacement: str = ""):
559
- cls._STORE.ANTIGEN_REPLACEMENTS[word] = replacement
560
- cls.compile_antigens()
561
-
562
- @classmethod
563
- def tune_perception(cls, voltage: float, narrative_drag: float):
564
- if cls._ANALYZER:
565
- cls._ANALYZER.tune_sensitivity(voltage, narrative_drag)