import re import json import os import logging class AcronymManager: """ Maintains a global dictionary of Acronym -> Definition mappings. Persists to disk so knowledge is shared across all documents and sessions. """ def __init__(self, storage_path=None): # We save this in the current directory so it persists if storage_path: self.storage_path = storage_path else: # 1. Get the absolute path to this file (AcronymManager.py) # Example: /app/src/core/AcronymManager.py current_file_path = os.path.abspath(__file__) # 2. Go up one level to the 'core' folder core_dir = os.path.dirname(current_file_path) # 3. Go up one more level to the 'src' folder (or app root) src_dir = os.path.dirname(core_dir) # 4. Define the path explicitly self.storage_path = os.path.join(src_dir, "acronyms.json") self.logger = logging.getLogger(__name__) self.acronyms = self._load_acronyms() self.acronyms = self._load_acronyms() self.logger = logging.getLogger(__name__) def _load_acronyms(self): if os.path.exists(self.storage_path): try: with open(self.storage_path, 'r', encoding='utf-8') as f: return json.load(f) except Exception: return {} return {} def _save_acronyms(self): try: with open(self.storage_path, 'w', encoding='utf-8') as f: json.dump(self.acronyms, f, indent=4) except Exception as e: self.logger.error(f"Failed to save acronyms: {e}") def scan_text_for_acronyms(self, text): """ Scans text for the pattern: "Regional Maintenance... (RMMCO)" Updates the global registry if new definitions are found. """ if not text: return # Regex: Capture "Capitalized Words" followed immediately by "(ACRONYM)" # This handles "Naval Sea Systems Command (NAVSEA)" pattern = r"([A-Z][a-zA-Z\s\-\&]{3,})\s+\(([A-Z0-9]{2,})\)" matches = re.findall(pattern, text) new_found = 0 for definition, acronym in matches: clean_def = definition.strip() # Basic filters: # 1. Definition shouldn't be too long (avoid capturing whole sentences) # 2. Acronym shouldn't be a common word (like 'THE') if possible, though caps helps. if len(clean_def) < 80: # If we haven't seen this acronym, or if the new definition is significantly different/better if acronym not in self.acronyms: self.acronyms[acronym] = clean_def new_found += 1 if new_found > 0: self._save_acronyms() self.logger.info(f"AcronymManager: Learned {new_found} new acronyms.") def expand_query(self, query): """ Injects definitions into the query. Input: "Status of NAVSEA" Output: "Status of NAVSEA (Naval Sea Systems Command)" """ if not query: return query expanded_query = query # Find potential acronyms in the user's query (2+ uppercase letters) query_words = set(re.findall(r"\b[A-Z0-9]{2,}\b", query)) for word in query_words: if word in self.acronyms: definition = self.acronyms[word] # Only expand if the definition isn't already typed by the user if definition.lower() not in query.lower(): # We inject the definition right next to the acronym expanded_query = expanded_query.replace(word, f"{word} ({definition})") return expanded_query