Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Dec 20, 2025

Commit

b417daa

verified ·

1 Parent(s): 6cd615b

Create AcronymManager.py

Browse files

Files changed (1) hide show

src/core/AcronymManager.py +82 -0

src/core/AcronymManager.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import re
+import json
+import os
+import logging
+class AcronymManager:
+    """
+    Maintains a global dictionary of Acronym -> Definition mappings.
+    Persists to disk so knowledge is shared across all documents and sessions.
+    """
+    def __init__(self, storage_path="acronyms.json"):
+        # We save this in the current directory so it persists
+        self.storage_path = storage_path
+        self.acronyms = self._load_acronyms()
+        self.logger = logging.getLogger(__name__)
+    def _load_acronyms(self):
+        if os.path.exists(self.storage_path):
+            try:
+                with open(self.storage_path, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+            except Exception:
+                return {}
+        return {}
+    def _save_acronyms(self):
+        try:
+            with open(self.storage_path, 'w', encoding='utf-8') as f:
+                json.dump(self.acronyms, f, indent=4)
+        except Exception as e:
+            self.logger.error(f"Failed to save acronyms: {e}")
+    def scan_text_for_acronyms(self, text):
+        """
+        Scans text for the pattern: "Regional Maintenance... (RMMCO)"
+        Updates the global registry if new definitions are found.
+        """
+        if not text: return
+        # Regex: Capture "Capitalized Words" followed immediately by "(ACRONYM)"
+        # This handles "Naval Sea Systems Command (NAVSEA)"
+        pattern = r"([A-Z][a-zA-Z\s\-\&]{3,})\s+\(([A-Z0-9]{2,})\)"
+        matches = re.findall(pattern, text)
+        new_found = 0
+        for definition, acronym in matches:
+            clean_def = definition.strip()
+            # Basic filters:
+            # 1. Definition shouldn't be too long (avoid capturing whole sentences)
+            # 2. Acronym shouldn't be a common word (like 'THE') if possible, though caps helps.
+            if len(clean_def) < 80:
+                # If we haven't seen this acronym, or if the new definition is significantly different/better
+                if acronym not in self.acronyms:
+                    self.acronyms[acronym] = clean_def
+                    new_found += 1
+        if new_found > 0:
+            self._save_acronyms()
+            self.logger.info(f"AcronymManager: Learned {new_found} new acronyms.")
+    def expand_query(self, query):
+        """
+        Injects definitions into the query.
+        Input: "Status of NAVSEA"
+        Output: "Status of NAVSEA (Naval Sea Systems Command)"
+        """
+        if not query: return query
+        expanded_query = query
+        # Find potential acronyms in the user's query (2+ uppercase letters)
+        query_words = set(re.findall(r"\b[A-Z0-9]{2,}\b", query))
+        for word in query_words:
+            if word in self.acronyms:
+                definition = self.acronyms[word]
+                # Only expand if the definition isn't already typed by the user
+                if definition.lower() not in query.lower():
+                    # We inject the definition right next to the acronym
+                    expanded_query = expanded_query.replace(word, f"{word} ({definition})")
+        return expanded_query