NavyDevilDoc commited on
Commit
b417daa
·
verified ·
1 Parent(s): 6cd615b

Create AcronymManager.py

Browse files
Files changed (1) hide show
  1. src/core/AcronymManager.py +82 -0
src/core/AcronymManager.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+ import os
4
+ import logging
5
+
6
+ class AcronymManager:
7
+ """
8
+ Maintains a global dictionary of Acronym -> Definition mappings.
9
+ Persists to disk so knowledge is shared across all documents and sessions.
10
+ """
11
+ def __init__(self, storage_path="acronyms.json"):
12
+ # We save this in the current directory so it persists
13
+ self.storage_path = storage_path
14
+ self.acronyms = self._load_acronyms()
15
+ self.logger = logging.getLogger(__name__)
16
+
17
+ def _load_acronyms(self):
18
+ if os.path.exists(self.storage_path):
19
+ try:
20
+ with open(self.storage_path, 'r', encoding='utf-8') as f:
21
+ return json.load(f)
22
+ except Exception:
23
+ return {}
24
+ return {}
25
+
26
+ def _save_acronyms(self):
27
+ try:
28
+ with open(self.storage_path, 'w', encoding='utf-8') as f:
29
+ json.dump(self.acronyms, f, indent=4)
30
+ except Exception as e:
31
+ self.logger.error(f"Failed to save acronyms: {e}")
32
+
33
+ def scan_text_for_acronyms(self, text):
34
+ """
35
+ Scans text for the pattern: "Regional Maintenance... (RMMCO)"
36
+ Updates the global registry if new definitions are found.
37
+ """
38
+ if not text: return
39
+
40
+ # Regex: Capture "Capitalized Words" followed immediately by "(ACRONYM)"
41
+ # This handles "Naval Sea Systems Command (NAVSEA)"
42
+ pattern = r"([A-Z][a-zA-Z\s\-\&]{3,})\s+\(([A-Z0-9]{2,})\)"
43
+
44
+ matches = re.findall(pattern, text)
45
+ new_found = 0
46
+
47
+ for definition, acronym in matches:
48
+ clean_def = definition.strip()
49
+ # Basic filters:
50
+ # 1. Definition shouldn't be too long (avoid capturing whole sentences)
51
+ # 2. Acronym shouldn't be a common word (like 'THE') if possible, though caps helps.
52
+ if len(clean_def) < 80:
53
+ # If we haven't seen this acronym, or if the new definition is significantly different/better
54
+ if acronym not in self.acronyms:
55
+ self.acronyms[acronym] = clean_def
56
+ new_found += 1
57
+
58
+ if new_found > 0:
59
+ self._save_acronyms()
60
+ self.logger.info(f"AcronymManager: Learned {new_found} new acronyms.")
61
+
62
+ def expand_query(self, query):
63
+ """
64
+ Injects definitions into the query.
65
+ Input: "Status of NAVSEA"
66
+ Output: "Status of NAVSEA (Naval Sea Systems Command)"
67
+ """
68
+ if not query: return query
69
+
70
+ expanded_query = query
71
+ # Find potential acronyms in the user's query (2+ uppercase letters)
72
+ query_words = set(re.findall(r"\b[A-Z0-9]{2,}\b", query))
73
+
74
+ for word in query_words:
75
+ if word in self.acronyms:
76
+ definition = self.acronyms[word]
77
+ # Only expand if the definition isn't already typed by the user
78
+ if definition.lower() not in query.lower():
79
+ # We inject the definition right next to the acronym
80
+ expanded_query = expanded_query.replace(word, f"{word} ({definition})")
81
+
82
+ return expanded_query