Spaces:
Sleeping
Sleeping
| import re | |
| import json | |
| import os | |
| import logging | |
| class AcronymManager: | |
| """ | |
| Maintains a global dictionary of Acronym -> Definition mappings. | |
| Persists to disk so knowledge is shared across all documents and sessions. | |
| """ | |
| def __init__(self, storage_path=None): | |
| # We save this in the current directory so it persists | |
| if storage_path: | |
| self.storage_path = storage_path | |
| else: | |
| # 1. Get the absolute path to this file (AcronymManager.py) | |
| # Example: /app/src/core/AcronymManager.py | |
| current_file_path = os.path.abspath(__file__) | |
| # 2. Go up one level to the 'core' folder | |
| core_dir = os.path.dirname(current_file_path) | |
| # 3. Go up one more level to the 'src' folder (or app root) | |
| src_dir = os.path.dirname(core_dir) | |
| # 4. Define the path explicitly | |
| self.storage_path = os.path.join(src_dir, "acronyms.json") | |
| self.logger = logging.getLogger(__name__) | |
| self.acronyms = self._load_acronyms() | |
| self.acronyms = self._load_acronyms() | |
| self.logger = logging.getLogger(__name__) | |
| def _load_acronyms(self): | |
| if os.path.exists(self.storage_path): | |
| try: | |
| with open(self.storage_path, 'r', encoding='utf-8') as f: | |
| return json.load(f) | |
| except Exception: | |
| return {} | |
| return {} | |
| def _save_acronyms(self): | |
| try: | |
| with open(self.storage_path, 'w', encoding='utf-8') as f: | |
| json.dump(self.acronyms, f, indent=4) | |
| except Exception as e: | |
| self.logger.error(f"Failed to save acronyms: {e}") | |
| def scan_text_for_acronyms(self, text): | |
| """ | |
| Scans text for the pattern: "Regional Maintenance... (RMMCO)" | |
| Updates the global registry if new definitions are found. | |
| """ | |
| if not text: return | |
| # Regex: Capture "Capitalized Words" followed immediately by "(ACRONYM)" | |
| # This handles "Naval Sea Systems Command (NAVSEA)" | |
| pattern = r"([A-Z][a-zA-Z\s\-\&]{3,})\s+\(([A-Z0-9]{2,})\)" | |
| matches = re.findall(pattern, text) | |
| new_found = 0 | |
| for definition, acronym in matches: | |
| clean_def = definition.strip() | |
| # Basic filters: | |
| # 1. Definition shouldn't be too long (avoid capturing whole sentences) | |
| # 2. Acronym shouldn't be a common word (like 'THE') if possible, though caps helps. | |
| if len(clean_def) < 80: | |
| # If we haven't seen this acronym, or if the new definition is significantly different/better | |
| if acronym not in self.acronyms: | |
| self.acronyms[acronym] = clean_def | |
| new_found += 1 | |
| if new_found > 0: | |
| self._save_acronyms() | |
| self.logger.info(f"AcronymManager: Learned {new_found} new acronyms.") | |
| def expand_query(self, query): | |
| """ | |
| Injects definitions into the query. | |
| Input: "Status of NAVSEA" | |
| Output: "Status of NAVSEA (Naval Sea Systems Command)" | |
| """ | |
| if not query: return query | |
| expanded_query = query | |
| # Find potential acronyms in the user's query (2+ uppercase letters) | |
| query_words = set(re.findall(r"\b[A-Z0-9]{2,}\b", query)) | |
| for word in query_words: | |
| if word in self.acronyms: | |
| definition = self.acronyms[word] | |
| # Only expand if the definition isn't already typed by the user | |
| if definition.lower() not in query.lower(): | |
| # We inject the definition right next to the acronym | |
| expanded_query = expanded_query.replace(word, f"{word} ({definition})") | |
| return expanded_query |