data-str / subjects /loader.py
CORVO-AI's picture
Upload 135 files
b83571a verified
"""
Subject file loader.
Loads knowledge files from subject folders.
Handles caching.
"""
import os
import threading
class SubjectLoader:
"""Loads and caches subject data from folders."""
def __init__(self):
self._cache = {}
self._lock = threading.Lock()
print(" βœ” SubjectLoader initialized")
def load(self, subject_id):
"""
Load a subject's files from its folder.
Returns dict with all file contents, or None if folder missing.
Cached after first load.
"""
with self._lock:
if subject_id in self._cache:
return self._cache[subject_id]
folder_path = subject_id
if not os.path.exists(folder_path):
print(f" ❌ Subject folder not found: {folder_path}")
return None
subject_data = {}
# Load main.txt
main_path = os.path.join(folder_path, "main.txt")
try:
with open(main_path, 'r', encoding='utf-8') as f:
subject_data["main.txt"] = f.read()
print(f" βœ” {subject_id}/main.txt ({len(subject_data['main.txt'])} chars)")
except FileNotFoundError:
subject_data["main.txt"] = ""
print(f" ⚠ {subject_id}/main.txt not found")
# Load structure.txt
structure_path = os.path.join(folder_path, "structure.txt")
try:
with open(structure_path, 'r', encoding='utf-8') as f:
subject_data["structure.txt"] = f.read()
print(f" βœ” {subject_id}/structure.txt ({len(subject_data['structure.txt'])} chars)")
except FileNotFoundError:
subject_data["structure.txt"] = ""
print(f" ⚠ {subject_id}/structure.txt not found")
# Load pages_base_url.txt (for board page images)
pages_url_path = os.path.join(folder_path, "pages_base_url.txt")
try:
with open(pages_url_path, 'r', encoding='utf-8') as f:
subject_data["pages_base_url"] = f.read().strip()
print(f" βœ” {subject_id}/pages_base_url.txt loaded")
except FileNotFoundError:
subject_data["pages_base_url"] = ""
print(f" ⚠ {subject_id}/pages_base_url.txt not found")
# Load p*.txt files
p_files = []
i = 1
while True:
p_filename = f"p{i}.txt"
p_path = os.path.join(folder_path, p_filename)
if os.path.exists(p_path):
try:
with open(p_path, 'r', encoding='utf-8') as f:
content = f.read()
subject_data[p_filename] = content
p_files.append(p_filename)
print(f" βœ” {subject_id}/{p_filename} ({len(content)} chars)")
except Exception as e:
print(f" ⚠ Error loading {p_filename}: {e}")
i += 1
else:
break
subject_data["_p_files"] = p_files
print(f" πŸ“Š Subject '{subject_id}' loaded: main + structure + {len(p_files)} chapters")
with self._lock:
self._cache[subject_id] = subject_data
return subject_data
def reload(self, subject_id):
"""Force reload a subject by clearing cache first."""
with self._lock:
if subject_id in self._cache:
del self._cache[subject_id]
return self.load(subject_id)
def get_p_files(self, subject_id):
"""Get list of chapter files for a subject."""
data = self.load(subject_id)
if not data:
return []
return data.get("_p_files", [])
def get_pages_base_url(self, subject_id):
"""Get the base URL for book page images."""
data = self.load(subject_id)
if not data:
return ""
return data.get("pages_base_url", "")
# Singleton instance
subject_loader = SubjectLoader()