AI_Agent_Final_V4 / api /course_loader.py
SarahXia0405's picture
Update api/course_loader.py
dfef6f6 verified
# api/course_loader.py
import os
from typing import Dict, List
from api.rag_engine import build_rag_chunks_from_file
SUPPORTED_EXTS = {".pdf", ".docx", ".pptx", ".md", ".txt"}
def _pick_existing_dir(base: str, candidates: List[str]) -> str:
for name in candidates:
p = os.path.join(base, name)
if os.path.isdir(p):
return p
return ""
def load_course_chunks(course_dir: str, course_id: str) -> List[Dict]:
"""
Load course materials into RAG chunks.
We keep it simple:
- resources/ -> doc_type="Resources"
- syllabus/ -> doc_type="Syllabus"
- textbooks/ -> doc_type="Textbook"
"""
all_chunks: List[Dict] = []
# allow both lower/upper folder names (so your current Textbooks still works)
resources_dir = _pick_existing_dir(course_dir, ["resources", "Resources"])
syllabus_dir = _pick_existing_dir(course_dir, ["syllabus", "Syllabus"])
textbooks_dir = _pick_existing_dir(course_dir, ["textbooks", "Textbooks"])
def walk_and_add(folder: str, doc_type: str):
if not folder:
return
for root, _, files in os.walk(folder):
for fn in files:
ext = os.path.splitext(fn)[1].lower()
if ext not in SUPPORTED_EXTS:
continue
path = os.path.join(root, fn)
try:
chunks = build_rag_chunks_from_file(path, doc_type) or []
for c in chunks:
c["course_id"] = course_id
all_chunks.extend(chunks)
print(f"[course_loader] OK {course_id} {doc_type} {os.path.relpath(path, course_dir)} chunks={len(chunks)}")
except Exception as e:
print(f"[course_loader] FAIL {course_id} {doc_type} {os.path.relpath(path, course_dir)} err={repr(e)}")
walk_and_add(resources_dir, "Resources")
walk_and_add(syllabus_dir, "Syllabus")
walk_and_add(textbooks_dir, "Textbook")
return all_chunks
def load_all_courses(courses_root_dir: str) -> Dict[str, List[Dict]]:
"""
courses_root_dir = .../data/courses
Each subdir is a course_id (e.g. course_ist345)
"""
out: Dict[str, List[Dict]] = {}
if not os.path.isdir(courses_root_dir):
return out
for course_id in sorted(os.listdir(courses_root_dir)):
course_dir = os.path.join(courses_root_dir, course_id)
if not os.path.isdir(course_dir):
continue
chunks = load_course_chunks(course_dir, course_id)
out[course_id] = chunks
print(f"[course_loader] {course_id}: {len(chunks)} chunks loaded")
return out