AI_Toolkit / src /core /AcronymManager.py
NavyDevilDoc's picture
Update src/core/AcronymManager.py
593ecb0 verified
import re
import json
import os
import logging
class AcronymManager:
"""
Maintains a global dictionary of Acronym -> Definition mappings.
Persists to disk so knowledge is shared across all documents and sessions.
"""
def __init__(self, storage_path=None):
# We save this in the current directory so it persists
if storage_path:
self.storage_path = storage_path
else:
# 1. Get the absolute path to this file (AcronymManager.py)
# Example: /app/src/core/AcronymManager.py
current_file_path = os.path.abspath(__file__)
# 2. Go up one level to the 'core' folder
core_dir = os.path.dirname(current_file_path)
# 3. Go up one more level to the 'src' folder (or app root)
src_dir = os.path.dirname(core_dir)
# 4. Define the path explicitly
self.storage_path = os.path.join(src_dir, "acronyms.json")
self.logger = logging.getLogger(__name__)
self.acronyms = self._load_acronyms()
self.acronyms = self._load_acronyms()
self.logger = logging.getLogger(__name__)
def _load_acronyms(self):
if os.path.exists(self.storage_path):
try:
with open(self.storage_path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception:
return {}
return {}
def _save_acronyms(self):
try:
with open(self.storage_path, 'w', encoding='utf-8') as f:
json.dump(self.acronyms, f, indent=4)
except Exception as e:
self.logger.error(f"Failed to save acronyms: {e}")
def scan_text_for_acronyms(self, text):
"""
Scans text for the pattern: "Regional Maintenance... (RMMCO)"
Updates the global registry if new definitions are found.
"""
if not text: return
# Regex: Capture "Capitalized Words" followed immediately by "(ACRONYM)"
# This handles "Naval Sea Systems Command (NAVSEA)"
pattern = r"([A-Z][a-zA-Z\s\-\&]{3,})\s+\(([A-Z0-9]{2,})\)"
matches = re.findall(pattern, text)
new_found = 0
for definition, acronym in matches:
clean_def = definition.strip()
# Basic filters:
# 1. Definition shouldn't be too long (avoid capturing whole sentences)
# 2. Acronym shouldn't be a common word (like 'THE') if possible, though caps helps.
if len(clean_def) < 80:
# If we haven't seen this acronym, or if the new definition is significantly different/better
if acronym not in self.acronyms:
self.acronyms[acronym] = clean_def
new_found += 1
if new_found > 0:
self._save_acronyms()
self.logger.info(f"AcronymManager: Learned {new_found} new acronyms.")
def expand_query(self, query):
"""
Injects definitions into the query.
Input: "Status of NAVSEA"
Output: "Status of NAVSEA (Naval Sea Systems Command)"
"""
if not query: return query
expanded_query = query
# Find potential acronyms in the user's query (2+ uppercase letters)
query_words = set(re.findall(r"\b[A-Z0-9]{2,}\b", query))
for word in query_words:
if word in self.acronyms:
definition = self.acronyms[word]
# Only expand if the definition isn't already typed by the user
if definition.lower() not in query.lower():
# We inject the definition right next to the acronym
expanded_query = expanded_query.replace(word, f"{word} ({definition})")
return expanded_query