File size: 2,685 Bytes
cfaf6e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfef6f6
cfaf6e4
 
 
 
 
 
 
 
 
dfef6f6
 
 
 
 
 
 
 
 
 
cfaf6e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# api/course_loader.py
import os
from typing import Dict, List
from api.rag_engine import build_rag_chunks_from_file

SUPPORTED_EXTS = {".pdf", ".docx", ".pptx", ".md", ".txt"}

def _pick_existing_dir(base: str, candidates: List[str]) -> str:
    for name in candidates:
        p = os.path.join(base, name)
        if os.path.isdir(p):
            return p
    return ""

def load_course_chunks(course_dir: str, course_id: str) -> List[Dict]:
    """
    Load course materials into RAG chunks.
    We keep it simple:
      - resources/ -> doc_type="Resources"
      - syllabus/  -> doc_type="Syllabus"
      - textbooks/ -> doc_type="Textbook"
    """
    all_chunks: List[Dict] = []

    # allow both lower/upper folder names (so your current Textbooks still works)
    resources_dir = _pick_existing_dir(course_dir, ["resources", "Resources"])
    syllabus_dir  = _pick_existing_dir(course_dir, ["syllabus", "Syllabus"])
    textbooks_dir = _pick_existing_dir(course_dir, ["textbooks", "Textbooks"])
    
    def walk_and_add(folder: str, doc_type: str):
        if not folder:
            return
        for root, _, files in os.walk(folder):
            for fn in files:
                ext = os.path.splitext(fn)[1].lower()
                if ext not in SUPPORTED_EXTS:
                    continue
                path = os.path.join(root, fn)
                try:
                    chunks = build_rag_chunks_from_file(path, doc_type) or []
                    for c in chunks:
                        c["course_id"] = course_id
                    all_chunks.extend(chunks)
                    print(f"[course_loader] OK {course_id} {doc_type} {os.path.relpath(path, course_dir)} chunks={len(chunks)}")
                except Exception as e:
                    print(f"[course_loader] FAIL {course_id} {doc_type} {os.path.relpath(path, course_dir)} err={repr(e)}")

    

    walk_and_add(resources_dir, "Resources")
    walk_and_add(syllabus_dir, "Syllabus")
    walk_and_add(textbooks_dir, "Textbook")

    return all_chunks


def load_all_courses(courses_root_dir: str) -> Dict[str, List[Dict]]:
    """
    courses_root_dir = .../data/courses
    Each subdir is a course_id (e.g. course_ist345)
    """
    out: Dict[str, List[Dict]] = {}
    if not os.path.isdir(courses_root_dir):
        return out

    for course_id in sorted(os.listdir(courses_root_dir)):
        course_dir = os.path.join(courses_root_dir, course_id)
        if not os.path.isdir(course_dir):
            continue
        chunks = load_course_chunks(course_dir, course_id)
        out[course_id] = chunks
        print(f"[course_loader] {course_id}: {len(chunks)} chunks loaded")

    return out