Spaces:

Dooratre
/

data-str

Paused

App Files Files Community

data-str / subjects /loader.py

CORVO-AI

Upload 135 files

b83571a verified about 2 months ago

raw

history blame contribute delete

4.09 kB

	"""
	Subject file loader.
	Loads knowledge files from subject folders.
	Handles caching.
	"""

	import os
	import threading


	class SubjectLoader:
	"""Loads and caches subject data from folders."""

	def __init__(self):
	self._cache = {}
	self._lock = threading.Lock()
	print(" ✔ SubjectLoader initialized")

	def load(self, subject_id):
	"""
	Load a subject's files from its folder.
	Returns dict with all file contents, or None if folder missing.
	Cached after first load.
	"""
	with self._lock:
	if subject_id in self._cache:
	return self._cache[subject_id]

	folder_path = subject_id

	if not os.path.exists(folder_path):
	print(f" ❌ Subject folder not found: {folder_path}")
	return None

	subject_data = {}

	# Load main.txt
	main_path = os.path.join(folder_path, "main.txt")
	try:
	with open(main_path, 'r', encoding='utf-8') as f:
	subject_data["main.txt"] = f.read()
	print(f" ✔ {subject_id}/main.txt ({len(subject_data['main.txt'])} chars)")
	except FileNotFoundError:
	subject_data["main.txt"] = ""
	print(f" ⚠ {subject_id}/main.txt not found")

	# Load structure.txt
	structure_path = os.path.join(folder_path, "structure.txt")
	try:
	with open(structure_path, 'r', encoding='utf-8') as f:
	subject_data["structure.txt"] = f.read()
	print(f" ✔ {subject_id}/structure.txt ({len(subject_data['structure.txt'])} chars)")
	except FileNotFoundError:
	subject_data["structure.txt"] = ""
	print(f" ⚠ {subject_id}/structure.txt not found")

	# Load pages_base_url.txt (for board page images)
	pages_url_path = os.path.join(folder_path, "pages_base_url.txt")
	try:
	with open(pages_url_path, 'r', encoding='utf-8') as f:
	subject_data["pages_base_url"] = f.read().strip()
	print(f" ✔ {subject_id}/pages_base_url.txt loaded")
	except FileNotFoundError:
	subject_data["pages_base_url"] = ""
	print(f" ⚠ {subject_id}/pages_base_url.txt not found")

	# Load p*.txt files
	p_files = []
	i = 1
	while True:
	p_filename = f"p{i}.txt"
	p_path = os.path.join(folder_path, p_filename)
	if os.path.exists(p_path):
	try:
	with open(p_path, 'r', encoding='utf-8') as f:
	content = f.read()
	subject_data[p_filename] = content
	p_files.append(p_filename)
	print(f" ✔ {subject_id}/{p_filename} ({len(content)} chars)")
	except Exception as e:
	print(f" ⚠ Error loading {p_filename}: {e}")
	i += 1
	else:
	break

	subject_data["_p_files"] = p_files
	print(f" 📊 Subject '{subject_id}' loaded: main + structure + {len(p_files)} chapters")

	with self._lock:
	self._cache[subject_id] = subject_data

	return subject_data

	def reload(self, subject_id):
	"""Force reload a subject by clearing cache first."""
	with self._lock:
	if subject_id in self._cache:
	del self._cache[subject_id]
	return self.load(subject_id)

	def get_p_files(self, subject_id):
	"""Get list of chapter files for a subject."""
	data = self.load(subject_id)
	if not data:
	return []
	return data.get("_p_files", [])

	def get_pages_base_url(self, subject_id):
	"""Get the base URL for book page images."""
	data = self.load(subject_id)
	if not data:
	return ""
	return data.get("pages_base_url", "")


	# Singleton instance
	subject_loader = SubjectLoader()