File size: 4,038 Bytes
b739f9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""
MathPulse AI — Backfill Student Profiles

One-time migration script to build student_profiles for ALL existing students
who already have data in Firestore but no unified profile.

Usage:
  cd backend
  python -m scripts.backfill_student_profiles
"""

import asyncio
import logging
import os
import sys

# Add backend to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger("backfill")


async def backfill_all_profiles():
    """Rebuild student_profiles for all existing managedStudents."""
    # Initialize Firebase
    import firebase_admin
    from firebase_admin import firestore

    if not firebase_admin._apps:
        sa_path = os.environ.get("FIREBASE_SERVICE_ACCOUNT_FILE")
        sa_json = os.environ.get("FIREBASE_SERVICE_ACCOUNT_JSON")
        if sa_path and os.path.exists(sa_path):
            from firebase_admin import credentials
            cred = credentials.Certificate(sa_path)
            firebase_admin.initialize_app(cred)
        elif sa_json:
            import json
            from firebase_admin import credentials
            cred = credentials.Certificate(json.loads(sa_json))
            firebase_admin.initialize_app(cred)
        else:
            firebase_admin.initialize_app()

    db = firestore.client()

    from services.student_intelligence_pipeline import get_pipeline, StudentActivityEvent
    from datetime import datetime, timezone

    pipeline = get_pipeline()

    # Fetch all managed students
    logger.info("Fetching all managedStudents...")
    students = list(db.collection("managedStudents").stream())
    total = len(students)
    logger.info(f"Found {total} students to backfill")

    success = 0
    errors = 0

    for i, student_doc in enumerate(students):
        sid = student_doc.id
        data = student_doc.to_dict()

        try:
            # Create a synthetic event to trigger full profile build
            event = StudentActivityEvent(
                student_id=sid,
                event_type="session",
                event_data={"event": "backfill", "source": "migration_script"},
                occurred_at=datetime.now(timezone.utc).isoformat(),
                class_id=data.get("classroomId", ""),
                teacher_id=data.get("teacherId", ""),
            )

            # Set basic profile fields from managed student data
            profile_ref = db.collection("student_profiles").document(sid)
            profile_doc = profile_ref.get()
            if not profile_doc.exists:
                # Pre-seed with identity data
                profile_ref.set({
                    "student_id": sid,
                    "display_name": data.get("name", ""),
                    "grade_level": data.get("gradeLevel", data.get("grade", "")),
                    "section": data.get("section", ""),
                    "class_id": data.get("classroomId", ""),
                    "teacher_id": data.get("teacherId", ""),
                    "diagnostic_score": data.get("diagnosticScore"),
                    "external_grades_avg": data.get("externalGradesAvg"),
                    "wri": data.get("wri"),
                    "risk_status": data.get("riskStatus", "pending_assessment"),
                    "wri_weights": data.get("weights", {"w1": 0.30, "w2": 0.40, "w3": 0.30}),
                    "profile_version": 0,
                }, merge=True)

            # Run pipeline to compute P and update everything
            await pipeline.process_event(event)
            success += 1

        except Exception as e:
            logger.error(f"Error backfilling {sid}: {e}")
            errors += 1

        if (i + 1) % 10 == 0:
            logger.info(f"Progress: {i + 1}/{total} (success={success}, errors={errors})")

    logger.info(f"Backfill complete: {success} success, {errors} errors out of {total} total")


if __name__ == "__main__":
    asyncio.run(backfill_all_profiles())