mohhhhhit commited on
Commit
f67c7fa
·
verified ·
1 Parent(s): 3736c33

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +896 -896
main.py CHANGED
@@ -1,896 +1,896 @@
1
- """
2
- FastAPI Backend for NotebookPRO
3
- Handles RAG, LLM, file processing, and chat management
4
- """
5
- from fastapi import FastAPI, File, UploadFile, HTTPException
6
- from fastapi.middleware.cors import CORSMiddleware
7
- from pydantic import BaseModel
8
- from typing import List, Optional, Dict, Any
9
- from pathlib import Path
10
- import json
11
- from datetime import datetime
12
- import uuid
13
- import sys
14
- import warnings
15
- import logging
16
- import os
17
- import shutil
18
-
19
- # Suppress warnings
20
- warnings.filterwarnings('ignore')
21
- os.environ['PYTHONWARNINGS'] = 'ignore'
22
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
23
- os.environ['TOKENIZERS_PARALLELISM'] = 'false'
24
- os.environ.setdefault('OMP_NUM_THREADS', '2')
25
- os.environ.setdefault('MKL_NUM_THREADS', '2')
26
- os.environ.setdefault('OPENBLAS_NUM_THREADS', '2')
27
- os.environ.setdefault('NUMEXPR_NUM_THREADS', '2')
28
- #logging.getLogger().setLevel(logging.ERROR)
29
-
30
- # Add project root to path
31
- sys.path.append(str(Path(__file__).parent.parent))
32
-
33
- import config
34
- from utils.document_processor import DocumentProcessor
35
- from utils.vector_db import VectorDatabase
36
- from utils.hybrid_retriever import HybridRetriever
37
- from utils.llm_generator import LLMGenerator
38
- from utils.config_manager import ConfigManager
39
- from utils.spaces_manager import SpacesManager
40
- from utils.studio_manager import StudioManager
41
- from utils.studio_generator import StudioGenerator
42
-
43
- # Initialize FastAPI
44
- app = FastAPI(title="NotebookPRO API", version="2.0.0")
45
-
46
- # CORS - Allow Flutter web to connect
47
- app.add_middleware(
48
- CORSMiddleware,
49
- allow_origins=["*"], # In production, specify your Flutter web URL
50
- allow_credentials=True,
51
- allow_methods=["*"],
52
- allow_headers=["*"],
53
- )
54
-
55
- # Global instances
56
- config_manager = ConfigManager()
57
- spaces_manager = SpacesManager()
58
- studio_manager = StudioManager()
59
- studio_generator = None # Will be initialized after LLM
60
- vector_db = None
61
- llm_generator = None
62
- current_space = None
63
-
64
- # ==================== Pydantic Models ====================
65
-
66
- class ChatMessage(BaseModel):
67
- role: str
68
- content: str
69
- timestamp: str
70
- sources: Optional[List[Dict[str, Any]]] = None
71
-
72
- class ChatRequest(BaseModel):
73
- query: str
74
- space_id: str
75
- chat_id: Optional[str] = None
76
- workflow: str = "chat"
77
-
78
- class ChatResponse(BaseModel):
79
- response: str
80
- sources: List[Dict[str, Any]]
81
- chat_id: str
82
- timestamp: str
83
-
84
- class SpaceCreate(BaseModel):
85
- name: str
86
-
87
- class SpaceResponse(BaseModel):
88
- id: str
89
- name: str
90
- created_at: str
91
- file_count: int
92
-
93
- class ChatInfo(BaseModel):
94
- id: str
95
- title: str
96
- preview: str
97
- created_at: str
98
- updated_at: str
99
- message_count: int
100
-
101
- class ConfigResponse(BaseModel):
102
- groq_api_key: Optional[str]
103
- gemini_api_key: Optional[str]
104
-
105
- class ConfigUpdate(BaseModel):
106
- groq_api_key: Optional[str] = None
107
- gemini_api_key: Optional[str] = None
108
-
109
- class ChatToNotebookRequest(BaseModel):
110
- space_id: str
111
- question: str
112
- answer: str
113
- chat_id: Optional[str] = None
114
- assistant_timestamp: Optional[str] = None
115
- tags: List[str] = []
116
- space_name: Optional[str] = None
117
-
118
- # ==================== Helper Functions ====================
119
-
120
- def get_data_dir():
121
- """Get data directory path"""
122
- return Path(__file__).parent.parent / "data"
123
-
124
- def get_space_dir(space_id: str):
125
- """Get space-specific directory"""
126
- return get_data_dir() / "spaces" / space_id
127
-
128
- def load_chats_for_space(space_id: str) -> List[Dict]:
129
- """Load all chats for a space"""
130
- chats_file = get_space_dir(space_id) / "chats.json"
131
- if chats_file.exists():
132
- with open(chats_file, 'r', encoding='utf-8') as f:
133
- return json.load(f)
134
- return []
135
-
136
- def save_chats_for_space(space_id: str, chats: List[Dict]):
137
- """Save chats for a space"""
138
- chats_file = get_space_dir(space_id) / "chats.json"
139
- chats_file.parent.mkdir(parents=True, exist_ok=True)
140
- with open(chats_file, 'w', encoding='utf-8') as f:
141
- json.dump(chats, f, indent=2, ensure_ascii=False)
142
-
143
- def get_chat_title(messages: List[Dict]) -> str:
144
- """Generate chat title from first user message"""
145
- for msg in messages:
146
- if msg['role'] == 'user':
147
- content = msg['content'][:50]
148
- return content + "..." if len(msg['content']) > 50 else content
149
- return "New Chat"
150
-
151
- def ensure_notebooks_for_existing_spaces() -> int:
152
- """Ensure every existing space has an associated notebook metadata record."""
153
- created_count = 0
154
- spaces = spaces_manager.get_all_spaces()
155
-
156
- for space in spaces:
157
- space_id = space.get('id')
158
- if not space_id:
159
- continue
160
-
161
- existing_notebook = studio_manager.get_space_notebook(space_id)
162
- if existing_notebook:
163
- continue
164
-
165
- studio_manager.ensure_space_notebook(space_id, space.get('name', space_id))
166
- created_count += 1
167
-
168
- return created_count
169
-
170
- def rebuild_space_index_if_missing(space_id: str) -> int:
171
- """Rebuild a space index from uploaded files if the current index is empty."""
172
- if not vector_db:
173
- return 0
174
-
175
- try:
176
- if vector_db.get_collection_count() > 0:
177
- return 0
178
- except Exception:
179
- # If count check fails, continue with a best-effort rebuild.
180
- pass
181
-
182
- uploads_dir = get_space_dir(space_id) / "uploads"
183
- if not uploads_dir.exists():
184
- return 0
185
-
186
- files = [
187
- p for p in uploads_dir.iterdir()
188
- if p.is_file() and p.suffix.lower() in {".pdf", ".docx", ".txt"}
189
- ]
190
- if not files:
191
- return 0
192
-
193
- processor = DocumentProcessor()
194
- texts: List[str] = []
195
- metadatas: List[Dict[str, Any]] = []
196
- ids: List[str] = []
197
-
198
- for file_path in files:
199
- try:
200
- file_data = processor.process_file(file_path)
201
- chunks = processor.chunk_text(
202
- file_data['content'],
203
- chunk_size=512,
204
- overlap=50,
205
- semantic=True,
206
- )
207
- total_chunks = len(chunks)
208
- for idx, chunk in enumerate(chunks):
209
- texts.append(chunk)
210
- metadatas.append({
211
- 'filename': file_path.name,
212
- 'chunk_index': idx,
213
- 'total_chunks': total_chunks,
214
- 'source_type': file_data['format'],
215
- })
216
- ids.append(f"{space_id}_rebuild_{len(ids)}_{uuid.uuid4().hex[:8]}")
217
- except Exception as e:
218
- print(f"Index rebuild skipped {file_path.name}: {e}")
219
-
220
- if not texts:
221
- return 0
222
-
223
- batch_size = 5000
224
- for i in range(0, len(texts), batch_size):
225
- vector_db.add_documents(
226
- texts[i:i + batch_size],
227
- metadatas[i:i + batch_size],
228
- ids[i:i + batch_size],
229
- )
230
-
231
- print(f"Rebuilt index for space '{space_id}' with {len(texts)} chunks")
232
- return len(texts)
233
-
234
- def initialize_space(space_id: str):
235
- """Initialize vector DB and components for a space"""
236
- global vector_db, llm_generator, studio_generator, current_space
237
-
238
- # Fast path: reuse already initialized components for the active space.
239
- if current_space == space_id and vector_db is not None and llm_generator is not None:
240
- return
241
-
242
- # Get API keys
243
- import os
244
- # Try the config manager first, but fallback to the .env file variables
245
- groq_key = config_manager.get_api_key('groq') or os.getenv('GROQ_API_KEY')
246
- gemini_key = config_manager.get_api_key('gemini') or os.getenv('GOOGLE_API_KEY') or os.getenv('GEMINI_API_KEY')
247
-
248
- if not groq_key and not gemini_key:
249
- raise HTTPException(status_code=400, detail="No API keys configured. Please add Groq or Gemini API key.")
250
-
251
- # Initialize vector database for this space (space-local persistence path).
252
- # Initialize Qdrant cloud database for this space
253
- vector_db = VectorDatabase(
254
- collection_name=f"space_{space_id}"
255
- )
256
-
257
- # Backward-compatibility: rebuild embeddings from uploaded files if index is empty.
258
- rebuild_space_index_if_missing(space_id)
259
-
260
- # Initialize LLM generator - choose provider based on available keys
261
- if groq_key:
262
- llm_generator = LLMGenerator(provider="groq", api_key=groq_key)
263
- else:
264
- llm_generator = LLMGenerator(provider="gemini", api_key=gemini_key)
265
-
266
- # Initialize studio generator with LLM
267
- studio_generator = StudioGenerator(llm_generator, studio_manager)
268
- current_space = space_id
269
-
270
- @app.on_event("startup")
271
- async def startup_sync_notebooks():
272
- """Auto-create missing notebooks for pre-existing spaces when backend starts."""
273
- try:
274
- created = ensure_notebooks_for_existing_spaces()
275
- if created > 0:
276
- print(f"Created {created} missing notebook(s) for existing spaces")
277
- except Exception as e:
278
- # Keep server startup resilient even if sync fails.
279
- print(f"Notebook startup sync failed: {e}")
280
-
281
- # ==================== API Endpoints ====================
282
-
283
- @app.get("/")
284
- async def root():
285
- """Health check"""
286
- return {"status": "NotebookPRO API is running", "version": "2.0.0"}
287
-
288
- @app.get("/api/config", response_model=ConfigResponse)
289
- async def get_config():
290
- """Get current API keys (masked)"""
291
- groq_key = config_manager.get_api_key('groq')
292
- gemini_key = config_manager.get_api_key('gemini')
293
-
294
- return ConfigResponse(
295
- groq_api_key="***" + groq_key[-4:] if groq_key else None,
296
- gemini_api_key="***" + gemini_key[-4:] if gemini_key else None
297
- )
298
-
299
- @app.post("/api/config")
300
- async def update_config(config_update: ConfigUpdate):
301
- """Update API keys"""
302
- if config_update.groq_api_key:
303
- config_manager.set_api_key('groq', config_update.groq_api_key)
304
- if config_update.gemini_api_key:
305
- config_manager.set_api_key('gemini', config_update.gemini_api_key)
306
-
307
- return {"status": "success", "message": "Configuration updated"}
308
-
309
- @app.get("/api/spaces", response_model=List[SpaceResponse])
310
- async def get_spaces():
311
- """Get all spaces"""
312
- # Self-healing check in case spaces were created externally while server is running.
313
- ensure_notebooks_for_existing_spaces()
314
- spaces = spaces_manager.get_all_spaces()
315
-
316
- result = []
317
- for space in spaces:
318
- space_id = space['id']
319
- space_dir = get_space_dir(space_id)
320
- processed_file = space_dir / "processed_files.json"
321
-
322
- file_count = 0
323
- if processed_file.exists():
324
- with open(processed_file, 'r') as f:
325
- file_count = len(json.load(f))
326
-
327
- result.append(SpaceResponse(
328
- id=space_id,
329
- name=space['name'],
330
- created_at=space['created_at'],
331
- file_count=file_count
332
- ))
333
-
334
- return result
335
-
336
- @app.post("/api/spaces", response_model=SpaceResponse)
337
- async def create_space(space_data: SpaceCreate):
338
- """Create a new space"""
339
- try:
340
- space = spaces_manager.create_space(space_data.name)
341
-
342
- # Create associated notebook metadata with the same name as the space.
343
- studio_manager.ensure_space_notebook(space['id'], space['name'])
344
-
345
- return SpaceResponse(
346
- id=space['id'],
347
- name=space['name'],
348
- created_at=space['created_at'],
349
- file_count=0
350
- )
351
- except ValueError as e:
352
- raise HTTPException(status_code=400, detail=str(e))
353
-
354
- @app.delete("/api/spaces/{space_id}")
355
- async def delete_space(space_id: str):
356
- """Delete a space"""
357
- try:
358
- spaces_manager.delete_space(space_id)
359
-
360
- # Delete space directory
361
- space_dir = get_space_dir(space_id)
362
- if space_dir.exists():
363
- shutil.rmtree(space_dir)
364
-
365
- return {"status": "success", "message": f"Space {space_id} deleted"}
366
- except ValueError as e:
367
- raise HTTPException(status_code=400, detail=str(e))
368
- except Exception as e:
369
- raise HTTPException(status_code=500, detail=f"Error deleting space: {str(e)}")
370
-
371
- @app.get("/api/spaces/{space_id}/chats", response_model=List[ChatInfo])
372
- async def get_chats(space_id: str):
373
- """Get all chats for a space"""
374
- chats = load_chats_for_space(space_id)
375
-
376
- result = []
377
- for chat in chats:
378
- messages = chat.get('messages', [])
379
- result.append(ChatInfo(
380
- id=chat['id'],
381
- title=get_chat_title(messages),
382
- preview=messages[0]['content'][:100] if messages else "",
383
- created_at=chat.get('created_at', ''),
384
- updated_at=chat.get('updated_at', ''),
385
- message_count=len(messages)
386
- ))
387
-
388
- return result
389
-
390
- @app.get("/api/spaces/{space_id}/chats/{chat_id}")
391
- async def get_chat(space_id: str, chat_id: str):
392
- """Get specific chat by ID"""
393
- chats = load_chats_for_space(space_id)
394
-
395
- for chat in chats:
396
- if chat['id'] == chat_id:
397
- return chat
398
-
399
- raise HTTPException(status_code=404, detail="Chat not found")
400
-
401
- @app.delete("/api/spaces/{space_id}/chats/{chat_id}")
402
- async def delete_chat(space_id: str, chat_id: str):
403
- """Delete a chat"""
404
- chats = load_chats_for_space(space_id)
405
- chats = [c for c in chats if c['id'] != chat_id]
406
- save_chats_for_space(space_id, chats)
407
-
408
- return {"status": "success", "message": f"Chat {chat_id} deleted"}
409
-
410
- @app.post("/api/chat", response_model=ChatResponse)
411
- async def chat(request: ChatRequest):
412
- """Process a chat message with RAG"""
413
- try:
414
- # Initialize space if needed
415
- initialize_space(request.space_id)
416
-
417
- # Create hybrid retriever with 60% vector, 40% BM25
418
- hybrid_retriever = HybridRetriever(vector_db, alpha=0.6)
419
-
420
- # Retrieve relevant documents
421
- documents, metadatas, scores = hybrid_retriever.retrieve(
422
- query=request.query,
423
- n_results=5
424
- )
425
-
426
- # Build context from retrieved documents
427
- context_parts = []
428
- sources = []
429
-
430
- for idx, (doc, meta, score) in enumerate(zip(documents, metadatas, scores), 1):
431
- # Extract clean filename for source citation
432
- filename = meta.get('filename', 'Unknown')
433
- clean_name = filename.replace('.pdf', '').replace('.docx', '').replace('.txt', '')
434
- context_parts.append(f"Source [{idx}] ({clean_name}):\n{doc}\n")
435
- sources.append({
436
- "content": doc[:200] + "..." if len(doc) > 200 else doc,
437
- "metadata": meta,
438
- "score": float(score)
439
- })
440
-
441
- context = "\n".join(context_parts)
442
-
443
- # Use the advanced generate_response method which has the new NotebookLM-style prompt
444
- response = llm_generator.generate_response(
445
- prompt=request.query,
446
- context=context,
447
- use_case=request.workflow if request.workflow in ["summary", "explanation", "qa", "notes"] else "qa",
448
- metadatas=metadatas,
449
- temperature=0.3
450
- )
451
-
452
- # Create or update chat
453
- chat_id = request.chat_id or str(uuid.uuid4())
454
- chats = load_chats_for_space(request.space_id)
455
-
456
- # Find existing chat or create new
457
- chat = None
458
- for c in chats:
459
- if c['id'] == chat_id:
460
- chat = c
461
- break
462
-
463
- if not chat:
464
- chat = {
465
- 'id': chat_id,
466
- 'messages': [],
467
- 'created_at': datetime.now().isoformat(),
468
- 'updated_at': datetime.now().isoformat()
469
- }
470
- chats.append(chat)
471
-
472
- # Add messages
473
- timestamp = datetime.now().isoformat()
474
- chat['messages'].extend([
475
- {'role': 'user', 'content': request.query, 'timestamp': timestamp},
476
- {
477
- 'role': 'assistant',
478
- 'content': response,
479
- 'timestamp': timestamp,
480
- 'sources': sources
481
- }
482
- ])
483
- chat['updated_at'] = timestamp
484
-
485
- # Save chats
486
- save_chats_for_space(request.space_id, chats)
487
-
488
- return ChatResponse(
489
- response=response,
490
- sources=sources,
491
- chat_id=chat_id,
492
- timestamp=timestamp
493
- )
494
-
495
- except Exception as e:
496
- raise HTTPException(status_code=500, detail=str(e))
497
-
498
- @app.post("/api/spaces/{space_id}/upload")
499
- async def upload_files(space_id: str, files: List[UploadFile] = File(...)):
500
- """Upload and process files for a space"""
501
- try:
502
- # Initialize space
503
- initialize_space(space_id)
504
-
505
- # Save uploaded files temporarily
506
- space_dir = get_space_dir(space_id)
507
- uploads_dir = space_dir / "uploads"
508
- uploads_dir.mkdir(parents=True, exist_ok=True)
509
-
510
- processor = DocumentProcessor()
511
- all_chunks = []
512
- processed_files = []
513
-
514
- for file in files:
515
- # Save file
516
- file_path = uploads_dir / file.filename
517
- with open(file_path, "wb") as f:
518
- content = await file.read()
519
- f.write(content)
520
-
521
- # Process file and extract content
522
- try:
523
- file_data = processor.process_file(file_path)
524
- content = file_data['content']
525
-
526
- # Chunk the content
527
- chunks = processor.chunk_text(content, chunk_size=512, overlap=50, semantic=True)
528
-
529
- # Format chunks for vector database
530
- formatted_chunks = []
531
- for idx, chunk in enumerate(chunks):
532
- formatted_chunks.append({
533
- 'content': chunk,
534
- 'metadata': {
535
- 'filename': file.filename,
536
- 'chunk_index': idx,
537
- 'total_chunks': len(chunks),
538
- 'source_type': file_data['format']
539
- }
540
- })
541
-
542
- all_chunks.extend(formatted_chunks)
543
- processed_files.append({
544
- 'filename': file.filename,
545
- 'chunks': len(chunks),
546
- 'processed_at': datetime.now().isoformat()
547
- })
548
- except Exception as e:
549
- # Log error but continue with other files
550
- print(f"Error processing {file.filename}: {str(e)}")
551
- continue
552
-
553
- # Add to vector database in batches to avoid size limits
554
- if all_chunks:
555
- # Extract texts, metadatas, and generate IDs
556
- texts = [chunk['content'] for chunk in all_chunks]
557
- metadatas = [chunk['metadata'] for chunk in all_chunks]
558
- ids = [f"{space_id}_{idx}_{uuid.uuid4().hex[:8]}" for idx in range(len(all_chunks))]
559
-
560
- # Process in batches of 5000 to avoid ChromaDB batch size limit
561
- batch_size = 5000
562
- for i in range(0, len(texts), batch_size):
563
- batch_texts = texts[i:i + batch_size]
564
- batch_metadatas = metadatas[i:i + batch_size]
565
- batch_ids = ids[i:i + batch_size]
566
-
567
- vector_db.add_documents(batch_texts, batch_metadatas, batch_ids)
568
- print(f"Processed batch {i//batch_size + 1}/{(len(texts)-1)//batch_size + 1}")
569
-
570
- # Save processed files info
571
- processed_file = space_dir / "processed_files.json"
572
- existing = []
573
- if processed_file.exists():
574
- with open(processed_file, 'r') as f:
575
- existing = json.load(f)
576
-
577
- existing.extend(processed_files)
578
- with open(processed_file, 'w') as f:
579
- json.dump(existing, f, indent=2)
580
-
581
- return {
582
- "status": "success",
583
- "files_processed": len(processed_files),
584
- "total_chunks": len(all_chunks)
585
- }
586
-
587
- except Exception as e:
588
- raise e # This strips the wrapper and forces FastAPI to log the raw stack trace
589
-
590
- @app.get("/api/spaces/{space_id}/files")
591
- async def get_files(space_id: str):
592
- """Get processed files for a space"""
593
- processed_file = get_space_dir(space_id) / "processed_files.json"
594
-
595
- if processed_file.exists():
596
- with open(processed_file, 'r') as f:
597
- return json.load(f)
598
-
599
- return []
600
-
601
- @app.delete("/api/spaces/{space_id}/files/{filename}")
602
- async def delete_file(space_id: str, filename: str):
603
- """Delete a specific file from a space"""
604
- try:
605
- # Remove from processed_files.json
606
- processed_file = get_space_dir(space_id) / "processed_files.json"
607
- files_data = []
608
-
609
- if processed_file.exists():
610
- with open(processed_file, 'r') as f:
611
- files_data = json.load(f)
612
-
613
- # Filter out the file to delete
614
- files_data = [f for f in files_data if f.get('filename') != filename]
615
-
616
- with open(processed_file, 'w') as f:
617
- json.dump(files_data, f, indent=2)
618
-
619
- # Delete the actual file
620
- file_path = get_space_dir(space_id) / "uploads" / filename
621
- if file_path.exists():
622
- file_path.unlink()
623
-
624
- # Remove from vector database (if initialized)
625
- # Note: This removes all chunks with this filename from metadata
626
- if vector_db:
627
- try:
628
- # Get all documents in the collection
629
- collection = vector_db.collection
630
- results = collection.get()
631
-
632
- # Find IDs of documents with matching filename
633
- ids_to_delete = []
634
- for idx, metadata in enumerate(results['metadatas']):
635
- if metadata and metadata.get('filename') == filename:
636
- ids_to_delete.append(results['ids'][idx])
637
-
638
- # Delete those documents
639
- if ids_to_delete:
640
- collection.delete(ids=ids_to_delete)
641
- print(f"Deleted {len(ids_to_delete)} chunks for {filename}")
642
- except Exception as e:
643
- print(f"Error removing from vector DB: {e}")
644
-
645
- return {
646
- "status": "success",
647
- "message": f"File {filename} deleted"
648
- }
649
-
650
- except Exception as e:
651
- raise HTTPException(status_code=500, detail=f"Error deleting file: {str(e)}")
652
-
653
-
654
- # ==================== STUDIO API ROUTES ====================
655
- # Routes for Notebook, Flashcards, and Quiz features
656
-
657
- # Import studio models
658
- from models.studio_models import (
659
- NotebookEntry, NotebookEntryCreate, NotebookEntryUpdate,
660
- Flashcard, FlashcardCreate, FlashcardUpdate, FlashcardReview,
661
- FlashcardGenerateRequest,
662
- Quiz, QuizCreate, QuizGenerateRequest, QuizSubmission, QuizResult, QuizHistory,
663
- MasteryLevel
664
- )
665
-
666
- # ===== NOTEBOOK ROUTES =====
667
-
668
- @app.post("/api/studio/notebook", response_model=NotebookEntry)
669
- async def create_notebook_entry(entry_data: NotebookEntryCreate):
670
- """Create a new notebook entry"""
671
- try:
672
- entry = studio_manager.create_notebook_entry(entry_data)
673
- return entry
674
- except Exception as e:
675
- raise HTTPException(status_code=500, detail=str(e))
676
-
677
- @app.get("/api/studio/notebook/space/{space_id}")
678
- async def get_space_notebook(space_id: str):
679
- """Get or create notebook metadata for a space."""
680
- try:
681
- space = spaces_manager.get_space(space_id)
682
- space_name = space['name'] if space else space_id
683
- notebook = studio_manager.ensure_space_notebook(space_id, space_name)
684
- return notebook
685
- except Exception as e:
686
- raise HTTPException(status_code=500, detail=str(e))
687
-
688
- @app.post("/api/studio/notebook/from-chat", response_model=NotebookEntry)
689
- async def add_chat_to_notebook(request: ChatToNotebookRequest):
690
- """Add a chat question/answer pair into a space notebook."""
691
- try:
692
- space = spaces_manager.get_space(request.space_id)
693
- resolved_space_name = request.space_name or (space['name'] if space else request.space_id)
694
-
695
- entry = studio_manager.create_notebook_entry_from_chat(
696
- space_id=request.space_id,
697
- question=request.question,
698
- answer=request.answer,
699
- chat_id=request.chat_id,
700
- assistant_timestamp=request.assistant_timestamp,
701
- tags=request.tags,
702
- space_name=resolved_space_name
703
- )
704
- return entry
705
- except Exception as e:
706
- raise HTTPException(status_code=500, detail=str(e))
707
-
708
- @app.get("/api/studio/notebook", response_model=List[NotebookEntry])
709
- async def list_notebook_entries(space_id: Optional[str] = None):
710
- """List all notebook entries, optionally filtered by space"""
711
- try:
712
- entries = studio_manager.list_notebook_entries(space_id)
713
- return entries
714
- except Exception as e:
715
- raise HTTPException(status_code=500, detail=str(e))
716
-
717
- @app.get("/api/studio/notebook/{entry_id}", response_model=NotebookEntry)
718
- async def get_notebook_entry(entry_id: str):
719
- """Get a single notebook entry"""
720
- entry = studio_manager.get_notebook_entry(entry_id)
721
- if not entry:
722
- raise HTTPException(status_code=404, detail="Notebook entry not found")
723
- return entry
724
-
725
- @app.put("/api/studio/notebook/{entry_id}", response_model=NotebookEntry)
726
- async def update_notebook_entry(entry_id: str, update_data: NotebookEntryUpdate):
727
- """Update a notebook entry"""
728
- entry = studio_manager.update_notebook_entry(entry_id, update_data)
729
- if not entry:
730
- raise HTTPException(status_code=404, detail="Notebook entry not found")
731
- return entry
732
-
733
- @app.delete("/api/studio/notebook/{entry_id}")
734
- async def delete_notebook_entry(entry_id: str):
735
- """Delete a notebook entry"""
736
- success = studio_manager.delete_notebook_entry(entry_id)
737
- if not success:
738
- raise HTTPException(status_code=404, detail="Notebook entry not found")
739
- return {"status": "success", "message": "Notebook entry deleted"}
740
-
741
-
742
- # ===== FLASHCARD ROUTES =====
743
-
744
- @app.post("/api/studio/flashcards", response_model=Flashcard)
745
- async def create_flashcard(card_data: FlashcardCreate):
746
- """Create a new flashcard"""
747
- try:
748
- card = studio_manager.create_flashcard(card_data)
749
- return card
750
- except Exception as e:
751
- raise HTTPException(status_code=500, detail=str(e))
752
-
753
- @app.get("/api/studio/flashcards", response_model=List[Flashcard])
754
- async def list_flashcards(
755
- space_id: Optional[str] = None,
756
- mastery: Optional[MasteryLevel] = None
757
- ):
758
- """List all flashcards, optionally filtered"""
759
- try:
760
- cards = studio_manager.list_flashcards(space_id, mastery)
761
- return cards
762
- except Exception as e:
763
- raise HTTPException(status_code=500, detail=str(e))
764
-
765
- @app.get("/api/studio/flashcards/{card_id}", response_model=Flashcard)
766
- async def get_flashcard(card_id: str):
767
- """Get a single flashcard"""
768
- card = studio_manager.get_flashcard(card_id)
769
- if not card:
770
- raise HTTPException(status_code=404, detail="Flashcard not found")
771
- return card
772
-
773
- @app.put("/api/studio/flashcards/{card_id}", response_model=Flashcard)
774
- async def update_flashcard(card_id: str, update_data: FlashcardUpdate):
775
- """Update a flashcard"""
776
- card = studio_manager.update_flashcard(card_id, update_data)
777
- if not card:
778
- raise HTTPException(status_code=404, detail="Flashcard not found")
779
- return card
780
-
781
- @app.post("/api/studio/flashcards/{card_id}/review", response_model=Flashcard)
782
- async def review_flashcard(card_id: str, review: FlashcardReview):
783
- """Record a flashcard review"""
784
- card = studio_manager.review_flashcard(card_id, review)
785
- if not card:
786
- raise HTTPException(status_code=404, detail="Flashcard not found")
787
- return card
788
-
789
- @app.delete("/api/studio/flashcards/{card_id}")
790
- async def delete_flashcard(card_id: str):
791
- """Delete a flashcard"""
792
- success = studio_manager.delete_flashcard(card_id)
793
- if not success:
794
- raise HTTPException(status_code=404, detail="Flashcard not found")
795
- return {"status": "success", "message": "Flashcard deleted"}
796
-
797
- @app.post("/api/studio/flashcards/generate", response_model=List[Flashcard])
798
- async def generate_flashcards(request: FlashcardGenerateRequest):
799
- """Generate flashcards from content using LLM"""
800
- global studio_generator
801
-
802
- if not studio_generator:
803
- raise HTTPException(status_code=503, detail="LLM not initialized")
804
-
805
- try:
806
- cards = await studio_generator.generate_flashcards(request)
807
- return cards
808
- except Exception as e:
809
- raise HTTPException(status_code=500, detail=str(e))
810
-
811
-
812
- # ===== QUIZ ROUTES =====
813
-
814
- @app.post("/api/studio/quizzes", response_model=Quiz)
815
- async def create_quiz(quiz_data: QuizCreate):
816
- """Create a new quiz"""
817
- try:
818
- quiz = studio_manager.create_quiz(quiz_data)
819
- return quiz
820
- except Exception as e:
821
- raise HTTPException(status_code=500, detail=str(e))
822
-
823
- @app.get("/api/studio/quizzes", response_model=List[Quiz])
824
- async def list_quizzes(space_id: Optional[str] = None):
825
- """List all quizzes, optionally filtered by space"""
826
- try:
827
- quizzes = studio_manager.list_quizzes(space_id)
828
- return quizzes
829
- except Exception as e:
830
- raise HTTPException(status_code=500, detail=str(e))
831
-
832
- @app.get("/api/studio/quizzes/{quiz_id}", response_model=Quiz)
833
- async def get_quiz(quiz_id: str):
834
- """Get a quiz by ID"""
835
- quiz = studio_manager.get_quiz(quiz_id)
836
- if not quiz:
837
- raise HTTPException(status_code=404, detail="Quiz not found")
838
- return quiz
839
-
840
- @app.delete("/api/studio/quizzes/{quiz_id}")
841
- async def delete_quiz(quiz_id: str):
842
- """Delete a quiz"""
843
- success = studio_manager.delete_quiz(quiz_id)
844
- if not success:
845
- raise HTTPException(status_code=404, detail="Quiz not found")
846
- return {"status": "success", "message": "Quiz deleted"}
847
-
848
- @app.post("/api/studio/quizzes/generate", response_model=Quiz)
849
- async def generate_quiz(request: QuizGenerateRequest):
850
- """Generate a quiz from content using LLM"""
851
- global studio_generator
852
-
853
- if not studio_generator:
854
- raise HTTPException(status_code=503, detail="LLM not initialized")
855
-
856
- try:
857
- quiz = await studio_generator.generate_quiz(request)
858
- if not quiz:
859
- raise HTTPException(status_code=500, detail="Failed to generate quiz")
860
- return quiz
861
- except Exception as e:
862
- raise HTTPException(status_code=500, detail=str(e))
863
-
864
- @app.post("/api/studio/quizzes/{quiz_id}/submit", response_model=QuizResult)
865
- async def submit_quiz(quiz_id: str, submission: QuizSubmission):
866
- """Submit quiz answers and get results"""
867
- try:
868
- result = studio_manager.submit_quiz(quiz_id, submission.answers)
869
- if not result:
870
- raise HTTPException(status_code=404, detail="Quiz not found")
871
- return result
872
- except Exception as e:
873
- raise HTTPException(status_code=500, detail=str(e))
874
-
875
- @app.get("/api/studio/quizzes/{quiz_id}/history", response_model=QuizHistory)
876
- async def get_quiz_history(quiz_id: str):
877
- """Get quiz attempt history"""
878
- try:
879
- history = studio_manager.get_quiz_history(quiz_id)
880
- if not history:
881
- raise HTTPException(status_code=404, detail="Quiz not found")
882
- return history
883
- except HTTPException as he:
884
- # If the error is already an HTTPException (like the missing API key error), pass it through directly
885
- raise he
886
- except Exception as e:
887
- # For all other crashes, print the actual traceback to the terminal so you can see what broke
888
- import traceback
889
- traceback.print_exc()
890
- raise HTTPException(status_code=500, detail=str(e))
891
-
892
- # ==================== Run Server ====================
893
-
894
- if __name__ == "__main__":
895
- import uvicorn
896
- uvicorn.run(app, host="0.0.0.0", port=8000, log_level="error")
 
1
+ """
2
+ FastAPI Backend for NotebookPRO
3
+ Handles RAG, LLM, file processing, and chat management
4
+ """
5
+ from fastapi import FastAPI, File, UploadFile, HTTPException
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from pydantic import BaseModel
8
+ from typing import List, Optional, Dict, Any
9
+ from pathlib import Path
10
+ import json
11
+ from datetime import datetime
12
+ import uuid
13
+ import sys
14
+ import warnings
15
+ import logging
16
+ import os
17
+ import shutil
18
+
19
+ # Suppress warnings
20
+ warnings.filterwarnings('ignore')
21
+ os.environ['PYTHONWARNINGS'] = 'ignore'
22
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
23
+ os.environ['TOKENIZERS_PARALLELISM'] = 'false'
24
+ os.environ.setdefault('OMP_NUM_THREADS', '2')
25
+ os.environ.setdefault('MKL_NUM_THREADS', '2')
26
+ os.environ.setdefault('OPENBLAS_NUM_THREADS', '2')
27
+ os.environ.setdefault('NUMEXPR_NUM_THREADS', '2')
28
+ #logging.getLogger().setLevel(logging.ERROR)
29
+
30
+ # Add project root to path
31
+ sys.path.append(str(Path(__file__).parent.parent))
32
+
33
+ import config
34
+ from utils.document_processor import DocumentProcessor
35
+ from utils.vector_db import VectorDatabase
36
+ from utils.hybrid_retriever import HybridRetriever
37
+ from utils.llm_generator import LLMGenerator
38
+ from utils.config_manager import ConfigManager
39
+ from utils.spaces_manager import SpacesManager
40
+ from utils.studio_manager import StudioManager
41
+ from utils.studio_generator import StudioGenerator
42
+
43
+ # Initialize FastAPI
44
+ app = FastAPI(title="NotebookPRO API", version="2.0.0")
45
+
46
+ # CORS - Allow Flutter web to connect
47
+ app.add_middleware(
48
+ CORSMiddleware,
49
+ allow_origins=["*"], # In production, specify your Flutter web URL
50
+ allow_credentials=True,
51
+ allow_methods=["*"],
52
+ allow_headers=["*"],
53
+ )
54
+
55
+ # Global instances
56
+ config_manager = ConfigManager()
57
+ spaces_manager = SpacesManager()
58
+ studio_manager = StudioManager()
59
+ studio_generator = None # Will be initialized after LLM
60
+ vector_db = None
61
+ llm_generator = None
62
+ current_space = None
63
+
64
+ # ==================== Pydantic Models ====================
65
+
66
+ class ChatMessage(BaseModel):
67
+ role: str
68
+ content: str
69
+ timestamp: str
70
+ sources: Optional[List[Dict[str, Any]]] = None
71
+
72
+ class ChatRequest(BaseModel):
73
+ query: str
74
+ space_id: str
75
+ chat_id: Optional[str] = None
76
+ workflow: str = "chat"
77
+
78
+ class ChatResponse(BaseModel):
79
+ response: str
80
+ sources: List[Dict[str, Any]]
81
+ chat_id: str
82
+ timestamp: str
83
+
84
+ class SpaceCreate(BaseModel):
85
+ name: str
86
+
87
+ class SpaceResponse(BaseModel):
88
+ id: str
89
+ name: str
90
+ created_at: str
91
+ file_count: int
92
+
93
+ class ChatInfo(BaseModel):
94
+ id: str
95
+ title: str
96
+ preview: str
97
+ created_at: str
98
+ updated_at: str
99
+ message_count: int
100
+
101
+ class ConfigResponse(BaseModel):
102
+ groq_api_key: Optional[str]
103
+ gemini_api_key: Optional[str]
104
+
105
+ class ConfigUpdate(BaseModel):
106
+ groq_api_key: Optional[str] = None
107
+ gemini_api_key: Optional[str] = None
108
+
109
+ class ChatToNotebookRequest(BaseModel):
110
+ space_id: str
111
+ question: str
112
+ answer: str
113
+ chat_id: Optional[str] = None
114
+ assistant_timestamp: Optional[str] = None
115
+ tags: List[str] = []
116
+ space_name: Optional[str] = None
117
+
118
+ # ==================== Helper Functions ====================
119
+
120
+ def get_data_dir():
121
+ """Get data directory path"""
122
+ return Path(__file__).parent.parent / "data"
123
+
124
+ def get_space_dir(space_id: str):
125
+ """Get space-specific directory"""
126
+ return get_data_dir() / "spaces" / space_id
127
+
128
+ def load_chats_for_space(space_id: str) -> List[Dict]:
129
+ """Load all chats for a space"""
130
+ chats_file = get_space_dir(space_id) / "chats.json"
131
+ if chats_file.exists():
132
+ with open(chats_file, 'r', encoding='utf-8') as f:
133
+ return json.load(f)
134
+ return []
135
+
136
+ def save_chats_for_space(space_id: str, chats: List[Dict]):
137
+ """Save chats for a space"""
138
+ chats_file = get_space_dir(space_id) / "chats.json"
139
+ chats_file.parent.mkdir(parents=True, exist_ok=True)
140
+ with open(chats_file, 'w', encoding='utf-8') as f:
141
+ json.dump(chats, f, indent=2, ensure_ascii=False)
142
+
143
+ def get_chat_title(messages: List[Dict]) -> str:
144
+ """Generate chat title from first user message"""
145
+ for msg in messages:
146
+ if msg['role'] == 'user':
147
+ content = msg['content'][:50]
148
+ return content + "..." if len(msg['content']) > 50 else content
149
+ return "New Chat"
150
+
151
+ def ensure_notebooks_for_existing_spaces() -> int:
152
+ """Ensure every existing space has an associated notebook metadata record."""
153
+ created_count = 0
154
+ spaces = spaces_manager.get_all_spaces()
155
+
156
+ for space in spaces:
157
+ space_id = space.get('id')
158
+ if not space_id:
159
+ continue
160
+
161
+ existing_notebook = studio_manager.get_space_notebook(space_id)
162
+ if existing_notebook:
163
+ continue
164
+
165
+ studio_manager.ensure_space_notebook(space_id, space.get('name', space_id))
166
+ created_count += 1
167
+
168
+ return created_count
169
+
170
+ def rebuild_space_index_if_missing(space_id: str) -> int:
171
+ """Rebuild a space index from uploaded files if the current index is empty."""
172
+ if not vector_db:
173
+ return 0
174
+
175
+ try:
176
+ if vector_db.get_collection_count() > 0:
177
+ return 0
178
+ except Exception:
179
+ # If count check fails, continue with a best-effort rebuild.
180
+ pass
181
+
182
+ uploads_dir = get_space_dir(space_id) / "uploads"
183
+ if not uploads_dir.exists():
184
+ return 0
185
+
186
+ files = [
187
+ p for p in uploads_dir.iterdir()
188
+ if p.is_file() and p.suffix.lower() in {".pdf", ".docx", ".txt"}
189
+ ]
190
+ if not files:
191
+ return 0
192
+
193
+ processor = DocumentProcessor()
194
+ texts: List[str] = []
195
+ metadatas: List[Dict[str, Any]] = []
196
+ ids: List[str] = []
197
+
198
+ for file_path in files:
199
+ try:
200
+ file_data = processor.process_file(file_path)
201
+ chunks = processor.chunk_text(
202
+ file_data['content'],
203
+ chunk_size=512,
204
+ overlap=50,
205
+ semantic=True,
206
+ )
207
+ total_chunks = len(chunks)
208
+ for idx, chunk in enumerate(chunks):
209
+ texts.append(chunk)
210
+ metadatas.append({
211
+ 'filename': file_path.name,
212
+ 'chunk_index': idx,
213
+ 'total_chunks': total_chunks,
214
+ 'source_type': file_data['format'],
215
+ })
216
+ ids.append(f"{space_id}_rebuild_{len(ids)}_{uuid.uuid4().hex[:8]}")
217
+ except Exception as e:
218
+ print(f"Index rebuild skipped {file_path.name}: {e}")
219
+
220
+ if not texts:
221
+ return 0
222
+
223
+ batch_size = 100
224
+ for i in range(0, len(texts), batch_size):
225
+ vector_db.add_documents(
226
+ texts[i:i + batch_size],
227
+ metadatas[i:i + batch_size],
228
+ ids[i:i + batch_size],
229
+ )
230
+
231
+ print(f"Rebuilt index for space '{space_id}' with {len(texts)} chunks")
232
+ return len(texts)
233
+
234
+ def initialize_space(space_id: str):
235
+ """Initialize vector DB and components for a space"""
236
+ global vector_db, llm_generator, studio_generator, current_space
237
+
238
+ # Fast path: reuse already initialized components for the active space.
239
+ if current_space == space_id and vector_db is not None and llm_generator is not None:
240
+ return
241
+
242
+ # Get API keys
243
+ import os
244
+ # Try the config manager first, but fallback to the .env file variables
245
+ groq_key = config_manager.get_api_key('groq') or os.getenv('GROQ_API_KEY')
246
+ gemini_key = config_manager.get_api_key('gemini') or os.getenv('GOOGLE_API_KEY') or os.getenv('GEMINI_API_KEY')
247
+
248
+ if not groq_key and not gemini_key:
249
+ raise HTTPException(status_code=400, detail="No API keys configured. Please add Groq or Gemini API key.")
250
+
251
+ # Initialize vector database for this space (space-local persistence path).
252
+ # Initialize Qdrant cloud database for this space
253
+ vector_db = VectorDatabase(
254
+ collection_name=f"space_{space_id}"
255
+ )
256
+
257
+ # Backward-compatibility: rebuild embeddings from uploaded files if index is empty.
258
+ rebuild_space_index_if_missing(space_id)
259
+
260
+ # Initialize LLM generator - choose provider based on available keys
261
+ if groq_key:
262
+ llm_generator = LLMGenerator(provider="groq", api_key=groq_key)
263
+ else:
264
+ llm_generator = LLMGenerator(provider="gemini", api_key=gemini_key)
265
+
266
+ # Initialize studio generator with LLM
267
+ studio_generator = StudioGenerator(llm_generator, studio_manager)
268
+ current_space = space_id
269
+
270
+ @app.on_event("startup")
271
+ async def startup_sync_notebooks():
272
+ """Auto-create missing notebooks for pre-existing spaces when backend starts."""
273
+ try:
274
+ created = ensure_notebooks_for_existing_spaces()
275
+ if created > 0:
276
+ print(f"Created {created} missing notebook(s) for existing spaces")
277
+ except Exception as e:
278
+ # Keep server startup resilient even if sync fails.
279
+ print(f"Notebook startup sync failed: {e}")
280
+
281
+ # ==================== API Endpoints ====================
282
+
283
+ @app.get("/")
284
+ async def root():
285
+ """Health check"""
286
+ return {"status": "NotebookPRO API is running", "version": "2.0.0"}
287
+
288
+ @app.get("/api/config", response_model=ConfigResponse)
289
+ async def get_config():
290
+ """Get current API keys (masked)"""
291
+ groq_key = config_manager.get_api_key('groq')
292
+ gemini_key = config_manager.get_api_key('gemini')
293
+
294
+ return ConfigResponse(
295
+ groq_api_key="***" + groq_key[-4:] if groq_key else None,
296
+ gemini_api_key="***" + gemini_key[-4:] if gemini_key else None
297
+ )
298
+
299
+ @app.post("/api/config")
300
+ async def update_config(config_update: ConfigUpdate):
301
+ """Update API keys"""
302
+ if config_update.groq_api_key:
303
+ config_manager.set_api_key('groq', config_update.groq_api_key)
304
+ if config_update.gemini_api_key:
305
+ config_manager.set_api_key('gemini', config_update.gemini_api_key)
306
+
307
+ return {"status": "success", "message": "Configuration updated"}
308
+
309
+ @app.get("/api/spaces", response_model=List[SpaceResponse])
310
+ async def get_spaces():
311
+ """Get all spaces"""
312
+ # Self-healing check in case spaces were created externally while server is running.
313
+ ensure_notebooks_for_existing_spaces()
314
+ spaces = spaces_manager.get_all_spaces()
315
+
316
+ result = []
317
+ for space in spaces:
318
+ space_id = space['id']
319
+ space_dir = get_space_dir(space_id)
320
+ processed_file = space_dir / "processed_files.json"
321
+
322
+ file_count = 0
323
+ if processed_file.exists():
324
+ with open(processed_file, 'r') as f:
325
+ file_count = len(json.load(f))
326
+
327
+ result.append(SpaceResponse(
328
+ id=space_id,
329
+ name=space['name'],
330
+ created_at=space['created_at'],
331
+ file_count=file_count
332
+ ))
333
+
334
+ return result
335
+
336
+ @app.post("/api/spaces", response_model=SpaceResponse)
337
+ async def create_space(space_data: SpaceCreate):
338
+ """Create a new space"""
339
+ try:
340
+ space = spaces_manager.create_space(space_data.name)
341
+
342
+ # Create associated notebook metadata with the same name as the space.
343
+ studio_manager.ensure_space_notebook(space['id'], space['name'])
344
+
345
+ return SpaceResponse(
346
+ id=space['id'],
347
+ name=space['name'],
348
+ created_at=space['created_at'],
349
+ file_count=0
350
+ )
351
+ except ValueError as e:
352
+ raise HTTPException(status_code=400, detail=str(e))
353
+
354
+ @app.delete("/api/spaces/{space_id}")
355
+ async def delete_space(space_id: str):
356
+ """Delete a space"""
357
+ try:
358
+ spaces_manager.delete_space(space_id)
359
+
360
+ # Delete space directory
361
+ space_dir = get_space_dir(space_id)
362
+ if space_dir.exists():
363
+ shutil.rmtree(space_dir)
364
+
365
+ return {"status": "success", "message": f"Space {space_id} deleted"}
366
+ except ValueError as e:
367
+ raise HTTPException(status_code=400, detail=str(e))
368
+ except Exception as e:
369
+ raise HTTPException(status_code=500, detail=f"Error deleting space: {str(e)}")
370
+
371
+ @app.get("/api/spaces/{space_id}/chats", response_model=List[ChatInfo])
372
+ async def get_chats(space_id: str):
373
+ """Get all chats for a space"""
374
+ chats = load_chats_for_space(space_id)
375
+
376
+ result = []
377
+ for chat in chats:
378
+ messages = chat.get('messages', [])
379
+ result.append(ChatInfo(
380
+ id=chat['id'],
381
+ title=get_chat_title(messages),
382
+ preview=messages[0]['content'][:100] if messages else "",
383
+ created_at=chat.get('created_at', ''),
384
+ updated_at=chat.get('updated_at', ''),
385
+ message_count=len(messages)
386
+ ))
387
+
388
+ return result
389
+
390
+ @app.get("/api/spaces/{space_id}/chats/{chat_id}")
391
+ async def get_chat(space_id: str, chat_id: str):
392
+ """Get specific chat by ID"""
393
+ chats = load_chats_for_space(space_id)
394
+
395
+ for chat in chats:
396
+ if chat['id'] == chat_id:
397
+ return chat
398
+
399
+ raise HTTPException(status_code=404, detail="Chat not found")
400
+
401
+ @app.delete("/api/spaces/{space_id}/chats/{chat_id}")
402
+ async def delete_chat(space_id: str, chat_id: str):
403
+ """Delete a chat"""
404
+ chats = load_chats_for_space(space_id)
405
+ chats = [c for c in chats if c['id'] != chat_id]
406
+ save_chats_for_space(space_id, chats)
407
+
408
+ return {"status": "success", "message": f"Chat {chat_id} deleted"}
409
+
410
+ @app.post("/api/chat", response_model=ChatResponse)
411
+ async def chat(request: ChatRequest):
412
+ """Process a chat message with RAG"""
413
+ try:
414
+ # Initialize space if needed
415
+ initialize_space(request.space_id)
416
+
417
+ # Create hybrid retriever with 60% vector, 40% BM25
418
+ hybrid_retriever = HybridRetriever(vector_db, alpha=0.6)
419
+
420
+ # Retrieve relevant documents
421
+ documents, metadatas, scores = hybrid_retriever.retrieve(
422
+ query=request.query,
423
+ n_results=5
424
+ )
425
+
426
+ # Build context from retrieved documents
427
+ context_parts = []
428
+ sources = []
429
+
430
+ for idx, (doc, meta, score) in enumerate(zip(documents, metadatas, scores), 1):
431
+ # Extract clean filename for source citation
432
+ filename = meta.get('filename', 'Unknown')
433
+ clean_name = filename.replace('.pdf', '').replace('.docx', '').replace('.txt', '')
434
+ context_parts.append(f"Source [{idx}] ({clean_name}):\n{doc}\n")
435
+ sources.append({
436
+ "content": doc[:200] + "..." if len(doc) > 200 else doc,
437
+ "metadata": meta,
438
+ "score": float(score)
439
+ })
440
+
441
+ context = "\n".join(context_parts)
442
+
443
+ # Use the advanced generate_response method which has the new NotebookLM-style prompt
444
+ response = llm_generator.generate_response(
445
+ prompt=request.query,
446
+ context=context,
447
+ use_case=request.workflow if request.workflow in ["summary", "explanation", "qa", "notes"] else "qa",
448
+ metadatas=metadatas,
449
+ temperature=0.3
450
+ )
451
+
452
+ # Create or update chat
453
+ chat_id = request.chat_id or str(uuid.uuid4())
454
+ chats = load_chats_for_space(request.space_id)
455
+
456
+ # Find existing chat or create new
457
+ chat = None
458
+ for c in chats:
459
+ if c['id'] == chat_id:
460
+ chat = c
461
+ break
462
+
463
+ if not chat:
464
+ chat = {
465
+ 'id': chat_id,
466
+ 'messages': [],
467
+ 'created_at': datetime.now().isoformat(),
468
+ 'updated_at': datetime.now().isoformat()
469
+ }
470
+ chats.append(chat)
471
+
472
+ # Add messages
473
+ timestamp = datetime.now().isoformat()
474
+ chat['messages'].extend([
475
+ {'role': 'user', 'content': request.query, 'timestamp': timestamp},
476
+ {
477
+ 'role': 'assistant',
478
+ 'content': response,
479
+ 'timestamp': timestamp,
480
+ 'sources': sources
481
+ }
482
+ ])
483
+ chat['updated_at'] = timestamp
484
+
485
+ # Save chats
486
+ save_chats_for_space(request.space_id, chats)
487
+
488
+ return ChatResponse(
489
+ response=response,
490
+ sources=sources,
491
+ chat_id=chat_id,
492
+ timestamp=timestamp
493
+ )
494
+
495
+ except Exception as e:
496
+ raise HTTPException(status_code=500, detail=str(e))
497
+
498
+ @app.post("/api/spaces/{space_id}/upload")
499
+ async def upload_files(space_id: str, files: List[UploadFile] = File(...)):
500
+ """Upload and process files for a space"""
501
+ try:
502
+ # Initialize space
503
+ initialize_space(space_id)
504
+
505
+ # Save uploaded files temporarily
506
+ space_dir = get_space_dir(space_id)
507
+ uploads_dir = space_dir / "uploads"
508
+ uploads_dir.mkdir(parents=True, exist_ok=True)
509
+
510
+ processor = DocumentProcessor()
511
+ all_chunks = []
512
+ processed_files = []
513
+
514
+ for file in files:
515
+ # Save file
516
+ file_path = uploads_dir / file.filename
517
+ with open(file_path, "wb") as f:
518
+ content = await file.read()
519
+ f.write(content)
520
+
521
+ # Process file and extract content
522
+ try:
523
+ file_data = processor.process_file(file_path)
524
+ content = file_data['content']
525
+
526
+ # Chunk the content
527
+ chunks = processor.chunk_text(content, chunk_size=512, overlap=50, semantic=True)
528
+
529
+ # Format chunks for vector database
530
+ formatted_chunks = []
531
+ for idx, chunk in enumerate(chunks):
532
+ formatted_chunks.append({
533
+ 'content': chunk,
534
+ 'metadata': {
535
+ 'filename': file.filename,
536
+ 'chunk_index': idx,
537
+ 'total_chunks': len(chunks),
538
+ 'source_type': file_data['format']
539
+ }
540
+ })
541
+
542
+ all_chunks.extend(formatted_chunks)
543
+ processed_files.append({
544
+ 'filename': file.filename,
545
+ 'chunks': len(chunks),
546
+ 'processed_at': datetime.now().isoformat()
547
+ })
548
+ except Exception as e:
549
+ # Log error but continue with other files
550
+ print(f"Error processing {file.filename}: {str(e)}")
551
+ continue
552
+
553
+ # Add to vector database in batches to avoid size limits
554
+ if all_chunks:
555
+ # Extract texts, metadatas, and generate IDs
556
+ texts = [chunk['content'] for chunk in all_chunks]
557
+ metadatas = [chunk['metadata'] for chunk in all_chunks]
558
+ ids = [f"{space_id}_{idx}_{uuid.uuid4().hex[:8]}" for idx in range(len(all_chunks))]
559
+
560
+ # Process in batches of 5000 to avoid ChromaDB batch size limit
561
+ batch_size = 5000
562
+ for i in range(0, len(texts), batch_size):
563
+ batch_texts = texts[i:i + batch_size]
564
+ batch_metadatas = metadatas[i:i + batch_size]
565
+ batch_ids = ids[i:i + batch_size]
566
+
567
+ vector_db.add_documents(batch_texts, batch_metadatas, batch_ids)
568
+ print(f"Processed batch {i//batch_size + 1}/{(len(texts)-1)//batch_size + 1}")
569
+
570
+ # Save processed files info
571
+ processed_file = space_dir / "processed_files.json"
572
+ existing = []
573
+ if processed_file.exists():
574
+ with open(processed_file, 'r') as f:
575
+ existing = json.load(f)
576
+
577
+ existing.extend(processed_files)
578
+ with open(processed_file, 'w') as f:
579
+ json.dump(existing, f, indent=2)
580
+
581
+ return {
582
+ "status": "success",
583
+ "files_processed": len(processed_files),
584
+ "total_chunks": len(all_chunks)
585
+ }
586
+
587
+ except Exception as e:
588
+ raise e # This strips the wrapper and forces FastAPI to log the raw stack trace
589
+
590
+ @app.get("/api/spaces/{space_id}/files")
591
+ async def get_files(space_id: str):
592
+ """Get processed files for a space"""
593
+ processed_file = get_space_dir(space_id) / "processed_files.json"
594
+
595
+ if processed_file.exists():
596
+ with open(processed_file, 'r') as f:
597
+ return json.load(f)
598
+
599
+ return []
600
+
601
+ @app.delete("/api/spaces/{space_id}/files/{filename}")
602
+ async def delete_file(space_id: str, filename: str):
603
+ """Delete a specific file from a space"""
604
+ try:
605
+ # Remove from processed_files.json
606
+ processed_file = get_space_dir(space_id) / "processed_files.json"
607
+ files_data = []
608
+
609
+ if processed_file.exists():
610
+ with open(processed_file, 'r') as f:
611
+ files_data = json.load(f)
612
+
613
+ # Filter out the file to delete
614
+ files_data = [f for f in files_data if f.get('filename') != filename]
615
+
616
+ with open(processed_file, 'w') as f:
617
+ json.dump(files_data, f, indent=2)
618
+
619
+ # Delete the actual file
620
+ file_path = get_space_dir(space_id) / "uploads" / filename
621
+ if file_path.exists():
622
+ file_path.unlink()
623
+
624
+ # Remove from vector database (if initialized)
625
+ # Note: This removes all chunks with this filename from metadata
626
+ if vector_db:
627
+ try:
628
+ # Get all documents in the collection
629
+ collection = vector_db.collection
630
+ results = collection.get()
631
+
632
+ # Find IDs of documents with matching filename
633
+ ids_to_delete = []
634
+ for idx, metadata in enumerate(results['metadatas']):
635
+ if metadata and metadata.get('filename') == filename:
636
+ ids_to_delete.append(results['ids'][idx])
637
+
638
+ # Delete those documents
639
+ if ids_to_delete:
640
+ collection.delete(ids=ids_to_delete)
641
+ print(f"Deleted {len(ids_to_delete)} chunks for {filename}")
642
+ except Exception as e:
643
+ print(f"Error removing from vector DB: {e}")
644
+
645
+ return {
646
+ "status": "success",
647
+ "message": f"File {filename} deleted"
648
+ }
649
+
650
+ except Exception as e:
651
+ raise HTTPException(status_code=500, detail=f"Error deleting file: {str(e)}")
652
+
653
+
654
+ # ==================== STUDIO API ROUTES ====================
655
+ # Routes for Notebook, Flashcards, and Quiz features
656
+
657
+ # Import studio models
658
+ from models.studio_models import (
659
+ NotebookEntry, NotebookEntryCreate, NotebookEntryUpdate,
660
+ Flashcard, FlashcardCreate, FlashcardUpdate, FlashcardReview,
661
+ FlashcardGenerateRequest,
662
+ Quiz, QuizCreate, QuizGenerateRequest, QuizSubmission, QuizResult, QuizHistory,
663
+ MasteryLevel
664
+ )
665
+
666
+ # ===== NOTEBOOK ROUTES =====
667
+
668
+ @app.post("/api/studio/notebook", response_model=NotebookEntry)
669
+ async def create_notebook_entry(entry_data: NotebookEntryCreate):
670
+ """Create a new notebook entry"""
671
+ try:
672
+ entry = studio_manager.create_notebook_entry(entry_data)
673
+ return entry
674
+ except Exception as e:
675
+ raise HTTPException(status_code=500, detail=str(e))
676
+
677
+ @app.get("/api/studio/notebook/space/{space_id}")
678
+ async def get_space_notebook(space_id: str):
679
+ """Get or create notebook metadata for a space."""
680
+ try:
681
+ space = spaces_manager.get_space(space_id)
682
+ space_name = space['name'] if space else space_id
683
+ notebook = studio_manager.ensure_space_notebook(space_id, space_name)
684
+ return notebook
685
+ except Exception as e:
686
+ raise HTTPException(status_code=500, detail=str(e))
687
+
688
+ @app.post("/api/studio/notebook/from-chat", response_model=NotebookEntry)
689
+ async def add_chat_to_notebook(request: ChatToNotebookRequest):
690
+ """Add a chat question/answer pair into a space notebook."""
691
+ try:
692
+ space = spaces_manager.get_space(request.space_id)
693
+ resolved_space_name = request.space_name or (space['name'] if space else request.space_id)
694
+
695
+ entry = studio_manager.create_notebook_entry_from_chat(
696
+ space_id=request.space_id,
697
+ question=request.question,
698
+ answer=request.answer,
699
+ chat_id=request.chat_id,
700
+ assistant_timestamp=request.assistant_timestamp,
701
+ tags=request.tags,
702
+ space_name=resolved_space_name
703
+ )
704
+ return entry
705
+ except Exception as e:
706
+ raise HTTPException(status_code=500, detail=str(e))
707
+
708
+ @app.get("/api/studio/notebook", response_model=List[NotebookEntry])
709
+ async def list_notebook_entries(space_id: Optional[str] = None):
710
+ """List all notebook entries, optionally filtered by space"""
711
+ try:
712
+ entries = studio_manager.list_notebook_entries(space_id)
713
+ return entries
714
+ except Exception as e:
715
+ raise HTTPException(status_code=500, detail=str(e))
716
+
717
+ @app.get("/api/studio/notebook/{entry_id}", response_model=NotebookEntry)
718
+ async def get_notebook_entry(entry_id: str):
719
+ """Get a single notebook entry"""
720
+ entry = studio_manager.get_notebook_entry(entry_id)
721
+ if not entry:
722
+ raise HTTPException(status_code=404, detail="Notebook entry not found")
723
+ return entry
724
+
725
+ @app.put("/api/studio/notebook/{entry_id}", response_model=NotebookEntry)
726
+ async def update_notebook_entry(entry_id: str, update_data: NotebookEntryUpdate):
727
+ """Update a notebook entry"""
728
+ entry = studio_manager.update_notebook_entry(entry_id, update_data)
729
+ if not entry:
730
+ raise HTTPException(status_code=404, detail="Notebook entry not found")
731
+ return entry
732
+
733
+ @app.delete("/api/studio/notebook/{entry_id}")
734
+ async def delete_notebook_entry(entry_id: str):
735
+ """Delete a notebook entry"""
736
+ success = studio_manager.delete_notebook_entry(entry_id)
737
+ if not success:
738
+ raise HTTPException(status_code=404, detail="Notebook entry not found")
739
+ return {"status": "success", "message": "Notebook entry deleted"}
740
+
741
+
742
+ # ===== FLASHCARD ROUTES =====
743
+
744
+ @app.post("/api/studio/flashcards", response_model=Flashcard)
745
+ async def create_flashcard(card_data: FlashcardCreate):
746
+ """Create a new flashcard"""
747
+ try:
748
+ card = studio_manager.create_flashcard(card_data)
749
+ return card
750
+ except Exception as e:
751
+ raise HTTPException(status_code=500, detail=str(e))
752
+
753
+ @app.get("/api/studio/flashcards", response_model=List[Flashcard])
754
+ async def list_flashcards(
755
+ space_id: Optional[str] = None,
756
+ mastery: Optional[MasteryLevel] = None
757
+ ):
758
+ """List all flashcards, optionally filtered"""
759
+ try:
760
+ cards = studio_manager.list_flashcards(space_id, mastery)
761
+ return cards
762
+ except Exception as e:
763
+ raise HTTPException(status_code=500, detail=str(e))
764
+
765
+ @app.get("/api/studio/flashcards/{card_id}", response_model=Flashcard)
766
+ async def get_flashcard(card_id: str):
767
+ """Get a single flashcard"""
768
+ card = studio_manager.get_flashcard(card_id)
769
+ if not card:
770
+ raise HTTPException(status_code=404, detail="Flashcard not found")
771
+ return card
772
+
773
+ @app.put("/api/studio/flashcards/{card_id}", response_model=Flashcard)
774
+ async def update_flashcard(card_id: str, update_data: FlashcardUpdate):
775
+ """Update a flashcard"""
776
+ card = studio_manager.update_flashcard(card_id, update_data)
777
+ if not card:
778
+ raise HTTPException(status_code=404, detail="Flashcard not found")
779
+ return card
780
+
781
+ @app.post("/api/studio/flashcards/{card_id}/review", response_model=Flashcard)
782
+ async def review_flashcard(card_id: str, review: FlashcardReview):
783
+ """Record a flashcard review"""
784
+ card = studio_manager.review_flashcard(card_id, review)
785
+ if not card:
786
+ raise HTTPException(status_code=404, detail="Flashcard not found")
787
+ return card
788
+
789
+ @app.delete("/api/studio/flashcards/{card_id}")
790
+ async def delete_flashcard(card_id: str):
791
+ """Delete a flashcard"""
792
+ success = studio_manager.delete_flashcard(card_id)
793
+ if not success:
794
+ raise HTTPException(status_code=404, detail="Flashcard not found")
795
+ return {"status": "success", "message": "Flashcard deleted"}
796
+
797
+ @app.post("/api/studio/flashcards/generate", response_model=List[Flashcard])
798
+ async def generate_flashcards(request: FlashcardGenerateRequest):
799
+ """Generate flashcards from content using LLM"""
800
+ global studio_generator
801
+
802
+ if not studio_generator:
803
+ raise HTTPException(status_code=503, detail="LLM not initialized")
804
+
805
+ try:
806
+ cards = await studio_generator.generate_flashcards(request)
807
+ return cards
808
+ except Exception as e:
809
+ raise HTTPException(status_code=500, detail=str(e))
810
+
811
+
812
+ # ===== QUIZ ROUTES =====
813
+
814
+ @app.post("/api/studio/quizzes", response_model=Quiz)
815
+ async def create_quiz(quiz_data: QuizCreate):
816
+ """Create a new quiz"""
817
+ try:
818
+ quiz = studio_manager.create_quiz(quiz_data)
819
+ return quiz
820
+ except Exception as e:
821
+ raise HTTPException(status_code=500, detail=str(e))
822
+
823
+ @app.get("/api/studio/quizzes", response_model=List[Quiz])
824
+ async def list_quizzes(space_id: Optional[str] = None):
825
+ """List all quizzes, optionally filtered by space"""
826
+ try:
827
+ quizzes = studio_manager.list_quizzes(space_id)
828
+ return quizzes
829
+ except Exception as e:
830
+ raise HTTPException(status_code=500, detail=str(e))
831
+
832
+ @app.get("/api/studio/quizzes/{quiz_id}", response_model=Quiz)
833
+ async def get_quiz(quiz_id: str):
834
+ """Get a quiz by ID"""
835
+ quiz = studio_manager.get_quiz(quiz_id)
836
+ if not quiz:
837
+ raise HTTPException(status_code=404, detail="Quiz not found")
838
+ return quiz
839
+
840
+ @app.delete("/api/studio/quizzes/{quiz_id}")
841
+ async def delete_quiz(quiz_id: str):
842
+ """Delete a quiz"""
843
+ success = studio_manager.delete_quiz(quiz_id)
844
+ if not success:
845
+ raise HTTPException(status_code=404, detail="Quiz not found")
846
+ return {"status": "success", "message": "Quiz deleted"}
847
+
848
+ @app.post("/api/studio/quizzes/generate", response_model=Quiz)
849
+ async def generate_quiz(request: QuizGenerateRequest):
850
+ """Generate a quiz from content using LLM"""
851
+ global studio_generator
852
+
853
+ if not studio_generator:
854
+ raise HTTPException(status_code=503, detail="LLM not initialized")
855
+
856
+ try:
857
+ quiz = await studio_generator.generate_quiz(request)
858
+ if not quiz:
859
+ raise HTTPException(status_code=500, detail="Failed to generate quiz")
860
+ return quiz
861
+ except Exception as e:
862
+ raise HTTPException(status_code=500, detail=str(e))
863
+
864
+ @app.post("/api/studio/quizzes/{quiz_id}/submit", response_model=QuizResult)
865
+ async def submit_quiz(quiz_id: str, submission: QuizSubmission):
866
+ """Submit quiz answers and get results"""
867
+ try:
868
+ result = studio_manager.submit_quiz(quiz_id, submission.answers)
869
+ if not result:
870
+ raise HTTPException(status_code=404, detail="Quiz not found")
871
+ return result
872
+ except Exception as e:
873
+ raise HTTPException(status_code=500, detail=str(e))
874
+
875
+ @app.get("/api/studio/quizzes/{quiz_id}/history", response_model=QuizHistory)
876
+ async def get_quiz_history(quiz_id: str):
877
+ """Get quiz attempt history"""
878
+ try:
879
+ history = studio_manager.get_quiz_history(quiz_id)
880
+ if not history:
881
+ raise HTTPException(status_code=404, detail="Quiz not found")
882
+ return history
883
+ except HTTPException as he:
884
+ # If the error is already an HTTPException (like the missing API key error), pass it through directly
885
+ raise he
886
+ except Exception as e:
887
+ # For all other crashes, print the actual traceback to the terminal so you can see what broke
888
+ import traceback
889
+ traceback.print_exc()
890
+ raise HTTPException(status_code=500, detail=str(e))
891
+
892
+ # ==================== Run Server ====================
893
+
894
+ if __name__ == "__main__":
895
+ import uvicorn
896
+ uvicorn.run(app, host="0.0.0.0", port=8000, log_level="error")