Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

AI_Toolkit / src /core /AcronymManager.py

NavyDevilDoc

Update src/core/AcronymManager.py

593ecb0 verified 4 months ago

raw

history blame contribute delete

3.91 kB

	import re
	import json
	import os
	import logging

	class AcronymManager:
	"""
	Maintains a global dictionary of Acronym -> Definition mappings.
	Persists to disk so knowledge is shared across all documents and sessions.
	"""
	def __init__(self, storage_path=None):
	# We save this in the current directory so it persists
	if storage_path:
	self.storage_path = storage_path
	else:
	# 1. Get the absolute path to this file (AcronymManager.py)
	# Example: /app/src/core/AcronymManager.py
	current_file_path = os.path.abspath(__file__)

	# 2. Go up one level to the 'core' folder
	core_dir = os.path.dirname(current_file_path)

	# 3. Go up one more level to the 'src' folder (or app root)
	src_dir = os.path.dirname(core_dir)

	# 4. Define the path explicitly
	self.storage_path = os.path.join(src_dir, "acronyms.json")

	self.logger = logging.getLogger(__name__)
	self.acronyms = self._load_acronyms()
	self.acronyms = self._load_acronyms()
	self.logger = logging.getLogger(__name__)

	def _load_acronyms(self):
	if os.path.exists(self.storage_path):
	try:
	with open(self.storage_path, 'r', encoding='utf-8') as f:
	return json.load(f)
	except Exception:
	return {}
	return {}

	def _save_acronyms(self):
	try:
	with open(self.storage_path, 'w', encoding='utf-8') as f:
	json.dump(self.acronyms, f, indent=4)
	except Exception as e:
	self.logger.error(f"Failed to save acronyms: {e}")

	def scan_text_for_acronyms(self, text):
	"""
	Scans text for the pattern: "Regional Maintenance... (RMMCO)"
	Updates the global registry if new definitions are found.
	"""
	if not text: return

	# Regex: Capture "Capitalized Words" followed immediately by "(ACRONYM)"
	# This handles "Naval Sea Systems Command (NAVSEA)"
	pattern = r"([A-Z][a-zA-Z\s\-\&]{3,})\s+\(([A-Z0-9]{2,})\)"

	matches = re.findall(pattern, text)
	new_found = 0

	for definition, acronym in matches:
	clean_def = definition.strip()
	# Basic filters:
	# 1. Definition shouldn't be too long (avoid capturing whole sentences)
	# 2. Acronym shouldn't be a common word (like 'THE') if possible, though caps helps.
	if len(clean_def) < 80:
	# If we haven't seen this acronym, or if the new definition is significantly different/better
	if acronym not in self.acronyms:
	self.acronyms[acronym] = clean_def
	new_found += 1

	if new_found > 0:
	self._save_acronyms()
	self.logger.info(f"AcronymManager: Learned {new_found} new acronyms.")

	def expand_query(self, query):
	"""
	Injects definitions into the query.
	Input: "Status of NAVSEA"
	Output: "Status of NAVSEA (Naval Sea Systems Command)"
	"""
	if not query: return query

	expanded_query = query
	# Find potential acronyms in the user's query (2+ uppercase letters)
	query_words = set(re.findall(r"\b[A-Z0-9]{2,}\b", query))

	for word in query_words:
	if word in self.acronyms:
	definition = self.acronyms[word]
	# Only expand if the definition isn't already typed by the user
	if definition.lower() not in query.lower():
	# We inject the definition right next to the acronym
	expanded_query = expanded_query.replace(word, f"{word} ({definition})")

	return expanded_query