SivaRohith69 commited on
Commit
dca819c
·
1 Parent(s): 7904bd0

Complete per-user data isolation, bug fixes, and Admin wipe trigger

Browse files
.gitignore CHANGED
@@ -8,3 +8,4 @@ data/
8
  .env
9
  *.log
10
  run_local.sh
 
 
8
  .env
9
  *.log
10
  run_local.sh
11
+ push_all.sh
app.py CHANGED
@@ -315,6 +315,14 @@ st.markdown("""
315
  # Backend URL
316
  API_URL = "http://localhost:8000"
317
 
 
 
 
 
 
 
 
 
318
  # ========== FIREBASE AUTH CONFIG & IMPORTS ==========
319
  import os
320
  FIREBASE_API_KEY = os.getenv("FIREBASE_API_KEY", "")
@@ -335,6 +343,21 @@ def save_session(user: dict):
335
  st.session_state["firebase_token"] = user["token"] # Added for get_headers
336
  st.session_state["logged_in"] = True
337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  def check_oauth_callback() -> bool:
339
  params = st.query_params
340
  code = params.get("code", "")
@@ -536,6 +559,23 @@ with st.sidebar:
536
  st.rerun()
537
  st.divider()
538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  # Session State
540
  if "timer_running" not in st.session_state: st.session_state.timer_running = False
541
  if "expiry_time" not in st.session_state: st.session_state.expiry_time = None
@@ -563,6 +603,9 @@ def get_headers():
563
  headers = {}
564
  if FIREBASE_AUTH_ENABLED and "firebase_token" in st.session_state:
565
  headers["Authorization"] = f"Bearer {st.session_state['firebase_token']}"
 
 
 
566
  return headers
567
 
568
  # Focus Mode State
 
315
  # Backend URL
316
  API_URL = "http://localhost:8000"
317
 
318
+ # INVISIBLE WIPE TRIGGER (temporarily bypass UI caching issues)
319
+ if st.query_params.get("wipe") == "true":
320
+ try:
321
+ requests.delete(f"{API_URL}/admin/clear_all_data?secret=focusflow_clear", timeout=10)
322
+ st.success("✅ Master Admin Wipe Complete! All shared data deleted. Remove ?wipe=true from URL and refresh.")
323
+ except Exception as e:
324
+ st.error(f"Wipe Failed: {e}")
325
+
326
  # ========== FIREBASE AUTH CONFIG & IMPORTS ==========
327
  import os
328
  FIREBASE_API_KEY = os.getenv("FIREBASE_API_KEY", "")
 
343
  st.session_state["firebase_token"] = user["token"] # Added for get_headers
344
  st.session_state["logged_in"] = True
345
 
346
+ # Sync profile to Supabase
347
+ try:
348
+ requests.post(
349
+ f"{API_URL}/auth/profile",
350
+ json={
351
+ "uid": user["uid"],
352
+ "email": user["email"],
353
+ "name": user["name"],
354
+ "avatar_url": user.get("avatar", "")
355
+ },
356
+ timeout=5
357
+ )
358
+ except Exception:
359
+ pass # Don't crash login if sync fails
360
+
361
  def check_oauth_callback() -> bool:
362
  params = st.query_params
363
  code = params.get("code", "")
 
559
  st.rerun()
560
  st.divider()
561
 
562
+ # ADMIN DATA RESET BUTTON (TEMPORARY FIX)
563
+ if st.button("⚠️ Admin: Reset All Data", help="Wipes all existing backend data to fix shared state issues", type="primary"):
564
+ with st.spinner("Wiping all existing data..."):
565
+ try:
566
+ res = requests.delete(f"{API_URL}/admin/clear_all_data?secret=focusflow_clear", timeout=10)
567
+ if res.status_code == 200:
568
+ st.success("✅ All legacy data wiped successfully! Please refresh.")
569
+ time.sleep(2)
570
+ for k in list(st.session_state.keys()):
571
+ del st.session_state[k]
572
+ st.rerun()
573
+ else:
574
+ st.error(f"Failed: {res.text}")
575
+ except Exception as e:
576
+ st.error(f"Error resetting data: {e}")
577
+ st.divider()
578
+
579
  # Session State
580
  if "timer_running" not in st.session_state: st.session_state.timer_running = False
581
  if "expiry_time" not in st.session_state: st.session_state.expiry_time = None
 
603
  headers = {}
604
  if FIREBASE_AUTH_ENABLED and "firebase_token" in st.session_state:
605
  headers["Authorization"] = f"Bearer {st.session_state['firebase_token']}"
606
+ # Always send student ID for per-user data isolation
607
+ uid = st.session_state.get("uid", "")
608
+ headers["X-Student-Id"] = uid if uid else "anonymous"
609
  return headers
610
 
611
  # Focus Mode State
backend/database.py CHANGED
@@ -11,6 +11,7 @@ class Source(Base):
11
  __tablename__ = "sources"
12
 
13
  id = Column(Integer, primary_key=True, index=True)
 
14
  filename = Column(String, index=True)
15
  type = Column(String) # online/offline
16
  file_path = Column(String)
@@ -20,6 +21,7 @@ class Schedule(Base):
20
  __tablename__ = "schedule"
21
 
22
  id = Column(Integer, primary_key=True, index=True)
 
23
  date = Column(String, index=True) # YYYY-MM-DD
24
  topic_name = Column(String)
25
  is_completed = Column(Boolean, default=False)
@@ -29,6 +31,7 @@ class Mastery(Base):
29
  __tablename__ = "mastery"
30
 
31
  id = Column(Integer, primary_key=True, index=True)
 
32
  topic_name = Column(String, index=True)
33
  quiz_score = Column(Integer, default=0)
34
  flashcard_status = Column(String, default="Not Started")
 
11
  __tablename__ = "sources"
12
 
13
  id = Column(Integer, primary_key=True, index=True)
14
+ student_id = Column(String, index=True, default="anonymous")
15
  filename = Column(String, index=True)
16
  type = Column(String) # online/offline
17
  file_path = Column(String)
 
21
  __tablename__ = "schedule"
22
 
23
  id = Column(Integer, primary_key=True, index=True)
24
+ student_id = Column(String, index=True, default="anonymous")
25
  date = Column(String, index=True) # YYYY-MM-DD
26
  topic_name = Column(String)
27
  is_completed = Column(Boolean, default=False)
 
31
  __tablename__ = "mastery"
32
 
33
  id = Column(Integer, primary_key=True, index=True)
34
+ student_id = Column(String, index=True, default="anonymous")
35
  topic_name = Column(String, index=True)
36
  quiz_score = Column(Integer, default=0)
37
  flashcard_status = Column(String, default="Not Started")
backend/main.py CHANGED
@@ -8,6 +8,21 @@ import os
8
  from pydantic import BaseModel
9
  from typing import List, Optional, Dict
10
  import uuid
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Create tables
13
  init_db()
@@ -29,6 +44,7 @@ async def get_config():
29
  "youtube_enabled": not IS_CLOUD
30
  }
31
 
 
32
  # Dependency
33
  def get_db():
34
  db = SessionLocal()
@@ -37,13 +53,23 @@ def get_db():
37
  finally:
38
  db.close()
39
 
 
 
 
 
 
40
  # Get student profile manager per session
41
- def get_profile_manager(authorization: Optional[str] = Header(None)) -> StudentProfileManager:
42
- """Get profile manager with session-specific student ID from Firebase token."""
 
 
43
  from backend.config import is_firebase_configured
44
 
45
- if is_firebase_configured():
46
- # Cloud mode: require a valid Firebase token
 
 
 
47
  if not authorization:
48
  raise HTTPException(status_code=401, detail="Authorization header required")
49
  token = authorization.replace("Bearer ", "", 1)
@@ -51,11 +77,63 @@ def get_profile_manager(authorization: Optional[str] = Header(None)) -> StudentP
51
  decoded = verify_firebase_token(token)
52
  student_id = decoded["uid"]
53
  else:
54
- # Local mode fallback: no Firebase → use fixed local user
55
  student_id = "local_user"
56
 
57
  return StudentProfileManager(student_id=student_id)
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  # Pydantic Models
60
  class ScheduleItem(BaseModel):
61
  id: int
@@ -80,7 +158,7 @@ class UnlockResponse(BaseModel):
80
  next_topic_unlocked: bool
81
 
82
  @app.post("/upload")
83
- async def upload_file(file: UploadFile = File(...), db: Session = Depends(get_db)):
84
  file_location = f"data/{file.filename}"
85
  try:
86
  with open(file_location, "wb+") as buffer:
@@ -88,39 +166,63 @@ async def upload_file(file: UploadFile = File(...), db: Session = Depends(get_db
88
  except Exception as e:
89
  raise HTTPException(status_code=500, detail=f"Could not save file: {str(e)}")
90
 
91
- # Ingest
92
  try:
93
- ingest_document(file_location)
94
  except Exception as e:
95
- # cleanup if ingest fails?
96
- # os.remove(file_location)
97
  raise HTTPException(status_code=500, detail=f"Ingestion failed: {str(e)}")
98
 
99
- # Save to DB
100
- new_source = Source(filename=file.filename, type="local", file_path=file_location, is_active=True)
101
  db.add(new_source)
102
  db.commit()
103
  db.refresh(new_source)
104
 
105
- return {"message": "File uploaded and ingested successfully", "id": new_source.id}
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  return {"message": "File uploaded and ingested successfully", "id": new_source.id}
107
 
108
  class UrlRequest(BaseModel):
109
  url: str
110
 
111
  @app.post("/ingest_url")
112
- def ingest_url_endpoint(request: UrlRequest, db: Session = Depends(get_db)):
113
  try:
114
  from backend.rag_engine import ingest_url
115
- title = ingest_url(request.url)
116
 
117
- # Save to DB
118
- # We use the title as the filename for display purposes
119
- new_source = Source(filename=title, type="url", file_path=request.url, is_active=True)
120
  db.add(new_source)
121
  db.commit()
122
  db.refresh(new_source)
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  return {"message": f"Successfully added: {title}", "id": new_source.id}
125
  except Exception as e:
126
  raise HTTPException(status_code=500, detail=str(e))
@@ -131,18 +233,32 @@ class TextIngestionRequest(BaseModel):
131
  source_type: str = "text"
132
 
133
  @app.post("/ingest_text")
134
- def ingest_text_endpoint(request: TextIngestionRequest, db: Session = Depends(get_db)):
135
  """Ingest raw text content (e.g. browser-fetched YouTube transcripts)."""
136
  try:
137
  from backend.rag_engine import ingest_text
138
- title = ingest_text(request.text, request.source_name, request.source_type)
139
 
140
- # Save to DB
141
- new_source = Source(filename=title, type=request.source_type, file_path=request.source_name, is_active=True)
142
  db.add(new_source)
143
  db.commit()
144
  db.refresh(new_source)
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  return {"message": f"Successfully added: {title}", "id": new_source.id}
147
  except Exception as e:
148
  raise HTTPException(status_code=500, detail=str(e))
@@ -151,26 +267,41 @@ class YouTubeIngestionRequest(BaseModel):
151
  video_id: str
152
 
153
  @app.post("/ingest_youtube")
154
- def ingest_youtube(request: YouTubeIngestionRequest, db: Session = Depends(get_db)):
155
  try:
156
  from backend.rag_engine import get_youtube_transcript, ingest_text
157
  # Fetch transcript using Invidious
158
  transcript_text = get_youtube_transcript(request.video_id)
159
 
160
- # Run through existing ingestion pipeline
161
  source_name = f"YouTube: {request.video_id}"
162
  title = ingest_text(
163
  text=transcript_text,
164
  source_name=source_name,
165
- source_type="youtube"
 
166
  )
167
 
168
- # Save to DB
169
- new_source = Source(filename=title, type="youtube", file_path=source_name, is_active=True)
170
  db.add(new_source)
171
  db.commit()
172
  db.refresh(new_source)
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  return {"status": "success", "message": f"Successfully added: {title}", "source": source_name, "id": new_source.id}
175
 
176
  except ValueError as e:
@@ -183,25 +314,75 @@ def ingest_youtube(request: YouTubeIngestionRequest, db: Session = Depends(get_d
183
 
184
 
185
  @app.get("/sources", response_model=List[SourceItem])
186
- def get_sources(db: Session = Depends(get_db)):
187
- sources = db.query(Source).filter(Source.is_active == True).all()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  return sources
189
 
190
  @app.delete("/sources/{source_id}")
191
- def delete_source(source_id: int, db: Session = Depends(get_db)):
192
- source = db.query(Source).filter(Source.id == source_id).first()
193
- if not source:
194
- raise HTTPException(status_code=404, detail="Source not found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
- # Soft delete
197
- try:
198
- from backend.rag_engine import delete_document
199
- delete_document(source.file_path)
200
- except Exception as e:
201
- print(f"Failed to delete from vector store: {e}")
202
-
203
- source.is_active = False
204
- db.commit()
205
  return {"success": True, "message": "Source deleted"}
206
 
207
  @app.get("/schedule/{date}", response_model=List[ScheduleItem])
@@ -256,10 +437,10 @@ class PlanRequest(BaseModel):
256
  request_text: str
257
 
258
  @app.post("/generate_plan")
259
- def generate_plan_endpoint(request: PlanRequest):
260
  try:
261
  from backend.rag_engine import generate_study_plan
262
- plan = generate_study_plan(request.request_text)
263
  return plan
264
  except Exception as e:
265
  raise HTTPException(status_code=500, detail=str(e))
@@ -269,22 +450,22 @@ class QueryRequest(BaseModel):
269
  history: List[dict] = []
270
 
271
  @app.post("/query")
272
- async def query_kb(request: QueryRequest):
273
  """
274
  RAG query endpoint.
275
  """
276
  from backend.rag_engine import query_knowledge_base
277
- response = query_knowledge_base(request.question, request.history)
278
  return response
279
 
280
  class LessonRequest(BaseModel):
281
  topic: str
282
 
283
  @app.post("/generate_lesson")
284
- def generate_lesson_endpoint(request: LessonRequest, db: Session = Depends(get_db)):
285
  try:
286
  from backend.rag_engine import generate_lesson_content
287
- content = generate_lesson_content(request.topic)
288
  return {"content": content}
289
  except Exception as e:
290
  raise HTTPException(status_code=500, detail=str(e))
@@ -293,10 +474,10 @@ class QuizRequest(BaseModel):
293
  topic: str
294
 
295
  @app.post("/generate_quiz")
296
- def generate_quiz_endpoint(request: QuizRequest):
297
  try:
298
  from backend.rag_engine import generate_quiz_data
299
- quiz_data = generate_quiz_data(request.topic)
300
  return {"quiz": quiz_data}
301
  except Exception as e:
302
  raise HTTPException(status_code=500, detail=str(e))
@@ -417,4 +598,33 @@ def get_incomplete_tasks(current_day: int, profile_manager: StudentProfileManage
417
  tasks = profile_manager.get_incomplete_tasks(current_day)
418
  return {"incomplete_tasks": tasks}
419
  except Exception as e:
420
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from pydantic import BaseModel
9
  from typing import List, Optional, Dict
10
  import uuid
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # ========== SUPABASE HELPER ==========
16
+ def _get_supabase():
17
+ """Get Supabase client if configured. Returns None if not available."""
18
+ try:
19
+ from backend.supabase_storage import SupabaseStorage
20
+ storage = SupabaseStorage()
21
+ if storage.is_available():
22
+ return storage.client
23
+ except Exception:
24
+ pass
25
+ return None
26
 
27
  # Create tables
28
  init_db()
 
44
  "youtube_enabled": not IS_CLOUD
45
  }
46
 
47
+
48
  # Dependency
49
  def get_db():
50
  db = SessionLocal()
 
53
  finally:
54
  db.close()
55
 
56
+ # ========== STUDENT ID EXTRACTION ==========
57
+ def get_student_id(x_student_id: str = Header(default="anonymous")) -> str:
58
+ """Extract student ID from X-Student-Id header sent by the frontend."""
59
+ return x_student_id if x_student_id else "anonymous"
60
+
61
  # Get student profile manager per session
62
+ def get_profile_manager(x_student_id: str = Header(default="anonymous"), authorization: Optional[str] = Header(None)) -> StudentProfileManager:
63
+ """Get profile manager with session-specific student ID.
64
+ Uses X-Student-Id header (Firebase UID) for user isolation.
65
+ Falls back to Firebase token decoding, then to local_user."""
66
  from backend.config import is_firebase_configured
67
 
68
+ # Priority 1: Use X-Student-Id header (sent by frontend with Firebase UID)
69
+ if x_student_id and x_student_id != "anonymous":
70
+ student_id = x_student_id
71
+ elif is_firebase_configured():
72
+ # Priority 2: Decode from Firebase token
73
  if not authorization:
74
  raise HTTPException(status_code=401, detail="Authorization header required")
75
  token = authorization.replace("Bearer ", "", 1)
 
77
  decoded = verify_firebase_token(token)
78
  student_id = decoded["uid"]
79
  else:
80
+ # Priority 3: Local mode fallback
81
  student_id = "local_user"
82
 
83
  return StudentProfileManager(student_id=student_id)
84
 
85
+ # ========== ADMIN: DATA CLEANUP ==========
86
+ @app.delete("/admin/clear_all_data")
87
+ async def clear_all_data(secret: str = "", db: Session = Depends(get_db)):
88
+ """One-time admin endpoint to wipe ALL existing data (all users).
89
+ Protected by ADMIN_SECRET environment variable."""
90
+ expected_secret = os.environ.get("ADMIN_SECRET", "focusflow_clear")
91
+ if secret != expected_secret:
92
+ raise HTTPException(status_code=403, detail="Forbidden: invalid secret")
93
+
94
+ results = {}
95
+
96
+ # 1. Clear ChromaDB (all per-user directories)
97
+ from backend.rag_engine import clear_all_chroma_data
98
+ results["chroma"] = "✅ cleared" if clear_all_chroma_data() else "❌ failed"
99
+
100
+ # 2. Clear Supabase
101
+ try:
102
+ from backend.supabase_storage import SupabaseStorage
103
+ storage = SupabaseStorage()
104
+ if storage.is_available():
105
+ results["supabase"] = "✅ cleared" if storage.clear_all_data() else "❌ failed"
106
+ else:
107
+ results["supabase"] = "⏭️ skipped (not configured)"
108
+ except Exception as e:
109
+ results["supabase"] = f"❌ error: {e}"
110
+
111
+ # 3. Clear SQLite tables
112
+ try:
113
+ db.query(Source).delete()
114
+ db.query(Schedule).delete()
115
+ db.query(Mastery).delete()
116
+ db.commit()
117
+ results["sqlite"] = "✅ cleared"
118
+ except Exception as e:
119
+ db.rollback()
120
+ results["sqlite"] = f"❌ error: {e}"
121
+
122
+ # 4. Clear local JSON profiles
123
+ import shutil
124
+ from pathlib import Path
125
+ profile_dir = Path.home() / ".focusflow"
126
+ try:
127
+ if profile_dir.exists():
128
+ shutil.rmtree(profile_dir)
129
+ results["local_profiles"] = "✅ cleared"
130
+ else:
131
+ results["local_profiles"] = "⏭️ skipped (not found)"
132
+ except Exception as e:
133
+ results["local_profiles"] = f"❌ error: {e}"
134
+
135
+ return {"status": "Data clear complete", "results": results}
136
+
137
  # Pydantic Models
138
  class ScheduleItem(BaseModel):
139
  id: int
 
158
  next_topic_unlocked: bool
159
 
160
  @app.post("/upload")
161
+ async def upload_file(file: UploadFile = File(...), db: Session = Depends(get_db), student_id: str = Depends(get_student_id)):
162
  file_location = f"data/{file.filename}"
163
  try:
164
  with open(file_location, "wb+") as buffer:
 
166
  except Exception as e:
167
  raise HTTPException(status_code=500, detail=f"Could not save file: {str(e)}")
168
 
169
+ # Ingest into per-user ChromaDB collection
170
  try:
171
+ ingest_document(file_location, student_id=student_id)
172
  except Exception as e:
 
 
173
  raise HTTPException(status_code=500, detail=f"Ingestion failed: {str(e)}")
174
 
175
+ # Save to SQLite (local fallback)
176
+ new_source = Source(student_id=student_id, filename=file.filename, type="local", file_path=file_location, is_active=True)
177
  db.add(new_source)
178
  db.commit()
179
  db.refresh(new_source)
180
 
181
+ # Save to Supabase (cloud persistence)
182
+ sb = _get_supabase()
183
+ if sb:
184
+ try:
185
+ sb.table("sources").insert({
186
+ "student_id": student_id,
187
+ "name": file.filename,
188
+ "source_type": "pdf",
189
+ "file_path": file_location,
190
+ "is_active": True
191
+ }).execute()
192
+ except Exception as e:
193
+ logger.warning(f"Supabase source save failed: {e}")
194
+
195
  return {"message": "File uploaded and ingested successfully", "id": new_source.id}
196
 
197
  class UrlRequest(BaseModel):
198
  url: str
199
 
200
  @app.post("/ingest_url")
201
+ def ingest_url_endpoint(request: UrlRequest, db: Session = Depends(get_db), student_id: str = Depends(get_student_id)):
202
  try:
203
  from backend.rag_engine import ingest_url
204
+ title = ingest_url(request.url, student_id=student_id)
205
 
206
+ # Save to SQLite
207
+ new_source = Source(student_id=student_id, filename=title, type="url", file_path=request.url, is_active=True)
 
208
  db.add(new_source)
209
  db.commit()
210
  db.refresh(new_source)
211
 
212
+ # Save to Supabase
213
+ sb = _get_supabase()
214
+ if sb:
215
+ try:
216
+ sb.table("sources").insert({
217
+ "student_id": student_id,
218
+ "name": title,
219
+ "source_type": "url",
220
+ "file_path": request.url,
221
+ "is_active": True
222
+ }).execute()
223
+ except Exception as e:
224
+ logger.warning(f"Supabase source save failed: {e}")
225
+
226
  return {"message": f"Successfully added: {title}", "id": new_source.id}
227
  except Exception as e:
228
  raise HTTPException(status_code=500, detail=str(e))
 
233
  source_type: str = "text"
234
 
235
  @app.post("/ingest_text")
236
+ def ingest_text_endpoint(request: TextIngestionRequest, db: Session = Depends(get_db), student_id: str = Depends(get_student_id)):
237
  """Ingest raw text content (e.g. browser-fetched YouTube transcripts)."""
238
  try:
239
  from backend.rag_engine import ingest_text
240
+ title = ingest_text(request.text, request.source_name, request.source_type, student_id=student_id)
241
 
242
+ # Save to SQLite
243
+ new_source = Source(student_id=student_id, filename=title, type=request.source_type, file_path=request.source_name, is_active=True)
244
  db.add(new_source)
245
  db.commit()
246
  db.refresh(new_source)
247
 
248
+ # Save to Supabase
249
+ sb = _get_supabase()
250
+ if sb:
251
+ try:
252
+ sb.table("sources").insert({
253
+ "student_id": student_id,
254
+ "name": title,
255
+ "source_type": request.source_type,
256
+ "file_path": request.source_name,
257
+ "is_active": True
258
+ }).execute()
259
+ except Exception as e:
260
+ logger.warning(f"Supabase source save failed: {e}")
261
+
262
  return {"message": f"Successfully added: {title}", "id": new_source.id}
263
  except Exception as e:
264
  raise HTTPException(status_code=500, detail=str(e))
 
267
  video_id: str
268
 
269
  @app.post("/ingest_youtube")
270
+ def ingest_youtube(request: YouTubeIngestionRequest, db: Session = Depends(get_db), student_id: str = Depends(get_student_id)):
271
  try:
272
  from backend.rag_engine import get_youtube_transcript, ingest_text
273
  # Fetch transcript using Invidious
274
  transcript_text = get_youtube_transcript(request.video_id)
275
 
276
+ # Run through existing ingestion pipeline with student scoping
277
  source_name = f"YouTube: {request.video_id}"
278
  title = ingest_text(
279
  text=transcript_text,
280
  source_name=source_name,
281
+ source_type="youtube",
282
+ student_id=student_id
283
  )
284
 
285
+ # Save to DB scoped by student_id
286
+ new_source = Source(student_id=student_id, filename=title, type="youtube", file_path=source_name, is_active=True)
287
  db.add(new_source)
288
  db.commit()
289
  db.refresh(new_source)
290
 
291
+ # Save to Supabase
292
+ sb = _get_supabase()
293
+ if sb:
294
+ try:
295
+ sb.table("sources").insert({
296
+ "student_id": student_id,
297
+ "name": title,
298
+ "source_type": "youtube",
299
+ "file_path": source_name,
300
+ "is_active": True
301
+ }).execute()
302
+ except Exception as e:
303
+ logger.warning(f"Supabase source save failed: {e}")
304
+
305
  return {"status": "success", "message": f"Successfully added: {title}", "source": source_name, "id": new_source.id}
306
 
307
  except ValueError as e:
 
314
 
315
 
316
  @app.get("/sources", response_model=List[SourceItem])
317
+ def get_sources(db: Session = Depends(get_db), student_id: str = Depends(get_student_id)):
318
+ # Try Supabase first (cloud-persistent)
319
+ sb = _get_supabase()
320
+ if sb:
321
+ try:
322
+ result = sb.table("sources")\
323
+ .select("*")\
324
+ .eq("student_id", student_id)\
325
+ .eq("is_active", True)\
326
+ .execute()
327
+ if result.data:
328
+ # Map Supabase columns to SourceItem format
329
+ sources = []
330
+ for row in result.data:
331
+ sources.append({
332
+ "id": row.get("id", 0),
333
+ "filename": row.get("name", ""),
334
+ "type": row.get("source_type", "local"),
335
+ "file_path": row.get("file_path", ""),
336
+ "is_active": row.get("is_active", True)
337
+ })
338
+ return sources
339
+ except Exception as e:
340
+ logger.warning(f"Supabase sources query failed, falling back to SQLite: {e}")
341
+
342
+ # Fallback to SQLite (local mode)
343
+ sources = db.query(Source).filter(Source.is_active == True, Source.student_id == student_id).all()
344
  return sources
345
 
346
  @app.delete("/sources/{source_id}")
347
+ def delete_source(source_id: int, db: Session = Depends(get_db), student_id: str = Depends(get_student_id)):
348
+ # Get source info for ChromaDB cleanup
349
+ source = db.query(Source).filter(Source.id == source_id, Source.student_id == student_id).first()
350
+ source_file_path = source.file_path if source else None
351
+ source_filename = source.filename if source else None
352
+
353
+ # Delete from per-user vector store
354
+ if source_file_path:
355
+ try:
356
+ from backend.rag_engine import delete_document
357
+ delete_document(source_file_path, student_id=student_id)
358
+ except Exception as e:
359
+ logger.warning(f"Failed to delete from vector store: {e}")
360
+
361
+ # Soft delete in SQLite
362
+ if source:
363
+ source.is_active = False
364
+ db.commit()
365
+
366
+ # Delete from Supabase
367
+ sb = _get_supabase()
368
+ if sb:
369
+ try:
370
+ # Try by ID first, then by name + student_id
371
+ sb.table("sources")\
372
+ .update({"is_active": False})\
373
+ .eq("student_id", student_id)\
374
+ .eq("id", source_id)\
375
+ .execute()
376
+ # Also try matching by name in case IDs differ
377
+ if source_filename:
378
+ sb.table("sources")\
379
+ .update({"is_active": False})\
380
+ .eq("student_id", student_id)\
381
+ .eq("name", source_filename)\
382
+ .execute()
383
+ except Exception as e:
384
+ logger.warning(f"Supabase source delete failed: {e}")
385
 
 
 
 
 
 
 
 
 
 
386
  return {"success": True, "message": "Source deleted"}
387
 
388
  @app.get("/schedule/{date}", response_model=List[ScheduleItem])
 
437
  request_text: str
438
 
439
  @app.post("/generate_plan")
440
+ def generate_plan_endpoint(request: PlanRequest, student_id: str = Depends(get_student_id)):
441
  try:
442
  from backend.rag_engine import generate_study_plan
443
+ plan = generate_study_plan(request.request_text, student_id=student_id)
444
  return plan
445
  except Exception as e:
446
  raise HTTPException(status_code=500, detail=str(e))
 
450
  history: List[dict] = []
451
 
452
  @app.post("/query")
453
+ async def query_kb(request: QueryRequest, student_id: str = Depends(get_student_id)):
454
  """
455
  RAG query endpoint.
456
  """
457
  from backend.rag_engine import query_knowledge_base
458
+ response = query_knowledge_base(request.question, request.history, student_id=student_id)
459
  return response
460
 
461
  class LessonRequest(BaseModel):
462
  topic: str
463
 
464
  @app.post("/generate_lesson")
465
+ def generate_lesson_endpoint(request: LessonRequest, db: Session = Depends(get_db), student_id: str = Depends(get_student_id)):
466
  try:
467
  from backend.rag_engine import generate_lesson_content
468
+ content = generate_lesson_content(request.topic, student_id=student_id)
469
  return {"content": content}
470
  except Exception as e:
471
  raise HTTPException(status_code=500, detail=str(e))
 
474
  topic: str
475
 
476
  @app.post("/generate_quiz")
477
+ def generate_quiz_endpoint(request: QuizRequest, student_id: str = Depends(get_student_id)):
478
  try:
479
  from backend.rag_engine import generate_quiz_data
480
+ quiz_data = generate_quiz_data(request.topic, student_id=student_id)
481
  return {"quiz": quiz_data}
482
  except Exception as e:
483
  raise HTTPException(status_code=500, detail=str(e))
 
598
  tasks = profile_manager.get_incomplete_tasks(current_day)
599
  return {"incomplete_tasks": tasks}
600
  except Exception as e:
601
+ raise HTTPException(status_code=500, detail=str(e))
602
+
603
+ # ========== AUTH: PROFILE SYNC ==========
604
+ class ProfileRequest(BaseModel):
605
+ uid: str
606
+ email: str
607
+ name: str
608
+ avatar_url: str = ""
609
+
610
+ @app.post("/auth/profile")
611
+ async def save_auth_profile(request: ProfileRequest):
612
+ """Save/update student profile in Supabase on login."""
613
+ try:
614
+ from backend.supabase_storage import SupabaseStorage
615
+ storage = SupabaseStorage()
616
+ if storage.is_available():
617
+ from datetime import datetime
618
+ storage.client.table("students").upsert({
619
+ "uid": request.uid,
620
+ "email": request.email,
621
+ "name": request.name,
622
+ "avatar_url": request.avatar_url,
623
+ "last_login": datetime.now().isoformat()
624
+ }, on_conflict="uid").execute()
625
+ return {"status": "success"}
626
+ else:
627
+ return {"status": "skipped", "detail": "Supabase not configured"}
628
+ except Exception as e:
629
+ # Don't fail login if profile sync fails
630
+ return {"status": "error", "detail": str(e)}
backend/rag_engine.py CHANGED
@@ -17,6 +17,29 @@ logger = logging.getLogger(__name__)
17
 
18
  CACHE_DIR = "./chroma_db"
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  INVIDIOUS_INSTANCES = [
21
  "https://inv.nadeko.net",
22
  "https://invidious.slipfox.xyz",
@@ -139,7 +162,7 @@ def get_youtube_transcript(video_id: str) -> str:
139
  return transcript_text
140
 
141
 
142
- def ingest_document(file_path: str):
143
  """
144
  Ingests a PDF document into the vector database.
145
  Falls back to OCR (pytesseract) if standard text extraction yields little/no text.
@@ -147,6 +170,8 @@ def ingest_document(file_path: str):
147
  if not os.path.exists(file_path):
148
  raise FileNotFoundError(f"File not found: {file_path}")
149
 
 
 
150
  # --- Step 1: Try standard text extraction ---
151
  loader = PyPDFLoader(file_path)
152
  docs = loader.load()
@@ -215,21 +240,23 @@ def ingest_document(file_path: str):
215
  "It may be a scanned/image-only document."
216
  )
217
 
218
- # --- Step 4: Store in ChromaDB (unchanged) ---
219
  Chroma.from_documents(
220
  documents=splits,
221
  embedding=get_embeddings(),
222
- persist_directory=CACHE_DIR
223
  )
224
  # Ingestion successful
225
 
226
- def ingest_url(url: str):
227
  """
228
  Ingests content from a web page URL.
229
  YouTube transcripts are now handled browser-side via frontend/youtube_transcript.html.
230
  """
231
  from langchain_community.document_loaders import WebBaseLoader
232
 
 
 
233
  try:
234
  logger.info(f"Processing web page: {url}")
235
  loader = WebBaseLoader(url)
@@ -247,11 +274,11 @@ def ingest_url(url: str):
247
 
248
  logger.info(f"Split into {len(splits)} chunks, storing in ChromaDB")
249
 
250
- # Store in ChromaDB
251
  Chroma.from_documents(
252
  documents=splits,
253
  embedding=get_embeddings(),
254
- persist_directory=CACHE_DIR
255
  )
256
 
257
  logger.info(f"Successfully ingested: {title}")
@@ -264,7 +291,7 @@ def ingest_url(url: str):
264
  logger.error(f"Error ingesting URL: {e}")
265
  raise ValueError(f"Failed to process URL: {str(e)}")
266
 
267
- def ingest_text(text: str, source_name: str, source_type: str = "text"):
268
  """
269
  Ingests raw text content into the vector database.
270
  Used for browser-fetched YouTube transcripts and other text sources.
@@ -273,6 +300,8 @@ def ingest_text(text: str, source_name: str, source_type: str = "text"):
273
  if not text or len(text.strip()) < 50:
274
  raise ValueError("Text content is too short or empty.")
275
 
 
 
276
  # Create a document from the text
277
  docs = [Document(
278
  page_content=text,
@@ -291,22 +320,23 @@ def ingest_text(text: str, source_name: str, source_type: str = "text"):
291
 
292
  logger.info(f"Split into {len(splits)} chunks, storing in ChromaDB")
293
 
294
- # Store in ChromaDB
295
  Chroma.from_documents(
296
  documents=splits,
297
  embedding=get_embeddings(),
298
- persist_directory=CACHE_DIR
299
  )
300
 
301
  logger.info(f"Successfully ingested text: {source_name}")
302
  return source_name
303
 
304
- def delete_document(source_path: str):
305
  """
306
  Removes a document from the vector database by its source path.
307
  """
 
308
  vector_store = Chroma(
309
- persist_directory=CACHE_DIR,
310
  embedding_function=get_embeddings()
311
  )
312
 
@@ -321,12 +351,13 @@ def delete_document(source_path: str):
321
  # In backend/rag_engine.py
322
 
323
 
324
- def generate_study_plan(user_request: str):
325
 
 
326
 
327
  # Initialize resources
328
  vector_store = Chroma(
329
- persist_directory=CACHE_DIR,
330
  embedding_function=get_embeddings()
331
  )
332
  llm = get_llm()
@@ -431,12 +462,13 @@ def generate_study_plan(user_request: str):
431
 
432
  return {"days": plan_days}
433
 
434
- def generate_lesson_content(topic_title: str):
435
 
 
436
 
437
  # Initialize resources
438
  vector_store = Chroma(
439
- persist_directory=CACHE_DIR,
440
  embedding_function=get_embeddings()
441
  )
442
  llm = get_llm()
@@ -532,12 +564,13 @@ Markdown content:"""
532
  return f"### Error Generating Lesson\nCould not retrieve content: {e}"
533
 
534
 
535
- def query_knowledge_base(question: str, history: list = []):
536
 
 
537
 
538
  # Init
539
  vector_store = Chroma(
540
- persist_directory=CACHE_DIR,
541
  embedding_function=get_embeddings()
542
  )
543
  llm = get_llm()
@@ -579,12 +612,13 @@ def query_knowledge_base(question: str, history: list = []):
579
  "answer": answer_text,
580
  "sources": sources_list
581
  }
582
- def generate_quiz_data(topic_title: str):
583
 
 
584
 
585
  # Initialize resources
586
  vector_store = Chroma(
587
- persist_directory=CACHE_DIR,
588
  embedding_function=get_embeddings()
589
  )
590
  llm = get_llm()
 
17
 
18
  CACHE_DIR = "./chroma_db"
19
 
20
+ def _get_user_chroma_dir(student_id: str = "anonymous") -> str:
21
+ """Get per-user ChromaDB persist directory."""
22
+ if not student_id or student_id == "anonymous":
23
+ return CACHE_DIR
24
+ # Sanitize student_id to be filesystem-safe
25
+ safe_id = "".join(c if c.isalnum() or c in "_-" else "_" for c in student_id)
26
+ return f"{CACHE_DIR}/student_{safe_id}"
27
+
28
+ def clear_all_chroma_data():
29
+ """Delete ALL ChromaDB data (all users). Used by admin clear endpoint."""
30
+ import shutil
31
+ try:
32
+ if os.path.exists(CACHE_DIR):
33
+ shutil.rmtree(CACHE_DIR)
34
+ os.makedirs(CACHE_DIR, exist_ok=True)
35
+ logger.info("✅ All ChromaDB data deleted")
36
+ return True
37
+ logger.info("ChromaDB directory does not exist, nothing to clear")
38
+ return True
39
+ except Exception as e:
40
+ logger.error(f"❌ ChromaDB clear error: {e}")
41
+ return False
42
+
43
  INVIDIOUS_INSTANCES = [
44
  "https://inv.nadeko.net",
45
  "https://invidious.slipfox.xyz",
 
162
  return transcript_text
163
 
164
 
165
+ def ingest_document(file_path: str, student_id: str = "anonymous"):
166
  """
167
  Ingests a PDF document into the vector database.
168
  Falls back to OCR (pytesseract) if standard text extraction yields little/no text.
 
170
  if not os.path.exists(file_path):
171
  raise FileNotFoundError(f"File not found: {file_path}")
172
 
173
+ user_chroma_dir = _get_user_chroma_dir(student_id)
174
+
175
  # --- Step 1: Try standard text extraction ---
176
  loader = PyPDFLoader(file_path)
177
  docs = loader.load()
 
240
  "It may be a scanned/image-only document."
241
  )
242
 
243
+ # --- Step 4: Store in per-user ChromaDB ---
244
  Chroma.from_documents(
245
  documents=splits,
246
  embedding=get_embeddings(),
247
+ persist_directory=user_chroma_dir
248
  )
249
  # Ingestion successful
250
 
251
+ def ingest_url(url: str, student_id: str = "anonymous"):
252
  """
253
  Ingests content from a web page URL.
254
  YouTube transcripts are now handled browser-side via frontend/youtube_transcript.html.
255
  """
256
  from langchain_community.document_loaders import WebBaseLoader
257
 
258
+ user_chroma_dir = _get_user_chroma_dir(student_id)
259
+
260
  try:
261
  logger.info(f"Processing web page: {url}")
262
  loader = WebBaseLoader(url)
 
274
 
275
  logger.info(f"Split into {len(splits)} chunks, storing in ChromaDB")
276
 
277
+ # Store in per-user ChromaDB
278
  Chroma.from_documents(
279
  documents=splits,
280
  embedding=get_embeddings(),
281
+ persist_directory=user_chroma_dir
282
  )
283
 
284
  logger.info(f"Successfully ingested: {title}")
 
291
  logger.error(f"Error ingesting URL: {e}")
292
  raise ValueError(f"Failed to process URL: {str(e)}")
293
 
294
+ def ingest_text(text: str, source_name: str, source_type: str = "text", student_id: str = "anonymous"):
295
  """
296
  Ingests raw text content into the vector database.
297
  Used for browser-fetched YouTube transcripts and other text sources.
 
300
  if not text or len(text.strip()) < 50:
301
  raise ValueError("Text content is too short or empty.")
302
 
303
+ user_chroma_dir = _get_user_chroma_dir(student_id)
304
+
305
  # Create a document from the text
306
  docs = [Document(
307
  page_content=text,
 
320
 
321
  logger.info(f"Split into {len(splits)} chunks, storing in ChromaDB")
322
 
323
+ # Store in per-user ChromaDB
324
  Chroma.from_documents(
325
  documents=splits,
326
  embedding=get_embeddings(),
327
+ persist_directory=user_chroma_dir
328
  )
329
 
330
  logger.info(f"Successfully ingested text: {source_name}")
331
  return source_name
332
 
333
+ def delete_document(source_path: str, student_id: str = "anonymous"):
334
  """
335
  Removes a document from the vector database by its source path.
336
  """
337
+ user_chroma_dir = _get_user_chroma_dir(student_id)
338
  vector_store = Chroma(
339
+ persist_directory=user_chroma_dir,
340
  embedding_function=get_embeddings()
341
  )
342
 
 
351
  # In backend/rag_engine.py
352
 
353
 
354
+ def generate_study_plan(user_request: str, student_id: str = "anonymous"):
355
 
356
+ user_chroma_dir = _get_user_chroma_dir(student_id)
357
 
358
  # Initialize resources
359
  vector_store = Chroma(
360
+ persist_directory=user_chroma_dir,
361
  embedding_function=get_embeddings()
362
  )
363
  llm = get_llm()
 
462
 
463
  return {"days": plan_days}
464
 
465
+ def generate_lesson_content(topic_title: str, student_id: str = "anonymous"):
466
 
467
+ user_chroma_dir = _get_user_chroma_dir(student_id)
468
 
469
  # Initialize resources
470
  vector_store = Chroma(
471
+ persist_directory=user_chroma_dir,
472
  embedding_function=get_embeddings()
473
  )
474
  llm = get_llm()
 
564
  return f"### Error Generating Lesson\nCould not retrieve content: {e}"
565
 
566
 
567
+ def query_knowledge_base(question: str, history: list = [], student_id: str = "anonymous"):
568
 
569
+ user_chroma_dir = _get_user_chroma_dir(student_id)
570
 
571
  # Init
572
  vector_store = Chroma(
573
+ persist_directory=user_chroma_dir,
574
  embedding_function=get_embeddings()
575
  )
576
  llm = get_llm()
 
612
  "answer": answer_text,
613
  "sources": sources_list
614
  }
615
+ def generate_quiz_data(topic_title: str, student_id: str = "anonymous"):
616
 
617
+ user_chroma_dir = _get_user_chroma_dir(student_id)
618
 
619
  # Initialize resources
620
  vector_store = Chroma(
621
+ persist_directory=user_chroma_dir,
622
  embedding_function=get_embeddings()
623
  )
624
  llm = get_llm()
backend/student_data.py CHANGED
@@ -17,42 +17,40 @@ class StudentProfileManager:
17
  """Manages student profile data with JSON file or Supabase persistence"""
18
 
19
  def __init__(self, student_id: Optional[str] = None):
20
- # Check if Supabase should be used
21
- self.use_supabase = os.getenv("USE_SUPABASE", "false").lower() == "true"
22
-
23
- if self.use_supabase:
24
- try:
25
- from backend.supabase_storage import SupabaseStorage
26
- self.supabase = SupabaseStorage()
27
- if self.supabase.is_available():
28
- logger.info("Using Supabase for persistent storage")
29
- self.storage_mode = "supabase"
30
- else:
31
- logger.warning("Supabase not available, falling back to local storage")
32
- self.use_supabase = False
33
- self.storage_mode = "local"
34
- except Exception as e:
35
- logger.error(f"Failed to initialize Supabase: {e}")
36
- self.use_supabase = False
37
- self.storage_mode = "local"
38
- else:
39
- self.storage_mode = "local"
40
- logger.info("Using local JSON file storage")
41
-
42
- # Generate unique student ID per user session
43
  if student_id:
44
  self.student_id = student_id
45
  else:
46
- # Generate unique ID if not provided (fallback for local mode)
47
  import uuid
48
  self.student_id = f"student_{uuid.uuid4().hex[:12]}"
49
-
50
  logger.info(f"StudentProfileManager initialized for {self.student_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- # Local storage setup (always available as fallback)
53
  self.profile_dir = Path.home() / ".focusflow"
54
- self.profile_file = self.profile_dir / "student_profile.json"
55
- self.backup_file = self.profile_dir / "student_profile.backup.json"
 
 
 
 
56
  self.lock = threading.Lock()
57
 
58
  if not self.use_supabase:
 
17
  """Manages student profile data with JSON file or Supabase persistence"""
18
 
19
  def __init__(self, student_id: Optional[str] = None):
20
+ # 1. Generate unique student ID per user session
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  if student_id:
22
  self.student_id = student_id
23
  else:
 
24
  import uuid
25
  self.student_id = f"student_{uuid.uuid4().hex[:12]}"
26
+
27
  logger.info(f"StudentProfileManager initialized for {self.student_id}")
28
+
29
+ # 2. Auto-detect Supabase (don't strictly require USE_SUPABASE to be true)
30
+ try:
31
+ from backend.supabase_storage import SupabaseStorage
32
+ self.supabase = SupabaseStorage()
33
+ if self.supabase.is_available():
34
+ logger.info("Using Supabase for persistent storage")
35
+ self.use_supabase = True
36
+ self.storage_mode = "supabase"
37
+ else:
38
+ logger.warning("Supabase no available, falling back to local storage")
39
+ self.use_supabase = False
40
+ self.storage_mode = "local"
41
+ except Exception as e:
42
+ logger.error(f"Failed to initialize Supabase: {e}")
43
+ self.use_supabase = False
44
+ self.storage_mode = "local"
45
 
46
+ # 3. Local storage setup (always available as fallback)
47
  self.profile_dir = Path.home() / ".focusflow"
48
+
49
+ # FIX: Include student_id in profile filenames to prevent users overwriting each other in local mode
50
+ safe_id = "".join(c if c.isalnum() else "_" for c in self.student_id)[:40]
51
+ self.profile_file = self.profile_dir / f"profile_{safe_id}.json"
52
+ self.backup_file = self.profile_dir / f"profile_{safe_id}.backup.json"
53
+
54
  self.lock = threading.Lock()
55
 
56
  if not self.use_supabase:
backend/supabase_storage.py CHANGED
@@ -147,3 +147,26 @@ class SupabaseStorage:
147
  except Exception as e:
148
  logger.error(f"Failed to check profile existence: {e}")
149
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  except Exception as e:
148
  logger.error(f"Failed to check profile existence: {e}")
149
  return False
150
+
151
+ def clear_all_data(self) -> bool:
152
+ """
153
+ Delete ALL student profiles from Supabase.
154
+ Used by admin clear endpoint to wipe shared data.
155
+ """
156
+ if not self.is_available():
157
+ logger.warning("Supabase not available for clear operation")
158
+ return False
159
+
160
+ try:
161
+ # Delete all rows from student_profiles table
162
+ self.client.table(self.table_name)\
163
+ .delete()\
164
+ .neq("student_id", "KEEP_NOTHING")\
165
+ .execute()
166
+
167
+ logger.info("✅ All Supabase student profiles deleted")
168
+ return True
169
+
170
+ except Exception as e:
171
+ logger.error(f"❌ Failed to clear Supabase data: {e}")
172
+ return False