File size: 3,908 Bytes
b417daa
 
 
 
 
 
 
 
 
 
593ecb0
b417daa
593ecb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b417daa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import re
import json
import os
import logging

class AcronymManager:
    """
    Maintains a global dictionary of Acronym -> Definition mappings.
    Persists to disk so knowledge is shared across all documents and sessions.
    """
    def __init__(self, storage_path=None):
        # We save this in the current directory so it persists
        if storage_path:
            self.storage_path = storage_path
        else:
            # 1. Get the absolute path to this file (AcronymManager.py)
            #    Example: /app/src/core/AcronymManager.py
            current_file_path = os.path.abspath(__file__)
            
            # 2. Go up one level to the 'core' folder
            core_dir = os.path.dirname(current_file_path)
            
            # 3. Go up one more level to the 'src' folder (or app root)
            src_dir = os.path.dirname(core_dir)
            
            # 4. Define the path explicitly
            self.storage_path = os.path.join(src_dir, "acronyms.json")

        self.logger = logging.getLogger(__name__)
        self.acronyms = self._load_acronyms()
        self.acronyms = self._load_acronyms()
        self.logger = logging.getLogger(__name__)

    def _load_acronyms(self):
        if os.path.exists(self.storage_path):
            try:
                with open(self.storage_path, 'r', encoding='utf-8') as f:
                    return json.load(f)
            except Exception:
                return {}
        return {}

    def _save_acronyms(self):
        try:
            with open(self.storage_path, 'w', encoding='utf-8') as f:
                json.dump(self.acronyms, f, indent=4)
        except Exception as e:
            self.logger.error(f"Failed to save acronyms: {e}")

    def scan_text_for_acronyms(self, text):
        """
        Scans text for the pattern: "Regional Maintenance... (RMMCO)"
        Updates the global registry if new definitions are found.
        """
        if not text: return

        # Regex: Capture "Capitalized Words" followed immediately by "(ACRONYM)"
        # This handles "Naval Sea Systems Command (NAVSEA)"
        pattern = r"([A-Z][a-zA-Z\s\-\&]{3,})\s+\(([A-Z0-9]{2,})\)"
        
        matches = re.findall(pattern, text)
        new_found = 0
        
        for definition, acronym in matches:
            clean_def = definition.strip()
            # Basic filters: 
            # 1. Definition shouldn't be too long (avoid capturing whole sentences)
            # 2. Acronym shouldn't be a common word (like 'THE') if possible, though caps helps.
            if len(clean_def) < 80:
                # If we haven't seen this acronym, or if the new definition is significantly different/better
                if acronym not in self.acronyms:
                    self.acronyms[acronym] = clean_def
                    new_found += 1
                
        if new_found > 0:
            self._save_acronyms()
            self.logger.info(f"AcronymManager: Learned {new_found} new acronyms.")

    def expand_query(self, query):
        """
        Injects definitions into the query.
        Input: "Status of NAVSEA"
        Output: "Status of NAVSEA (Naval Sea Systems Command)"
        """
        if not query: return query
        
        expanded_query = query
        # Find potential acronyms in the user's query (2+ uppercase letters)
        query_words = set(re.findall(r"\b[A-Z0-9]{2,}\b", query))
        
        for word in query_words:
            if word in self.acronyms:
                definition = self.acronyms[word]
                # Only expand if the definition isn't already typed by the user
                if definition.lower() not in query.lower():
                    # We inject the definition right next to the acronym
                    expanded_query = expanded_query.replace(word, f"{word} ({definition})")
                    
        return expanded_query