mohhhhhit commited on
Commit
98d3daf
·
verified ·
1 Parent(s): cdec65b

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +124 -144
main.py CHANGED
@@ -2,6 +2,8 @@
2
  FastAPI Backend for NotebookPRO
3
  Handles RAG, LLM, file processing, and chat management
4
  """
 
 
5
  from fastapi import FastAPI, File, UploadFile, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
7
  from pydantic import BaseModel
@@ -42,6 +44,17 @@ from utils.studio_generator import StudioGenerator
42
 
43
  # Initialize FastAPI
44
  app = FastAPI(title="NotebookPRO API", version="2.0.0")
 
 
 
 
 
 
 
 
 
 
 
45
  # CORS - Allow Flutter web to connect
46
  app.add_middleware(
47
  CORSMiddleware,
@@ -125,20 +138,20 @@ def get_space_dir(space_id: str):
125
  return get_data_dir() / "spaces" / space_id
126
 
127
  def load_chats_for_space(space_id: str) -> List[Dict]:
128
- """Load all chats for a space"""
129
- chats_file = get_space_dir(space_id) / "chats.json"
130
- if chats_file.exists():
131
- with open(chats_file, 'r', encoding='utf-8') as f:
132
- return json.load(f)
133
- return []
134
-
135
- def save_chats_for_space(space_id: str, chats: List[Dict]):
136
- """Save chats for a space"""
137
- chats_file = get_space_dir(space_id) / "chats.json"
138
- chats_file.parent.mkdir(parents=True, exist_ok=True)
139
- with open(chats_file, 'w', encoding='utf-8') as f:
140
- json.dump(chats, f, indent=2, ensure_ascii=False)
141
-
142
  def get_chat_title(messages: List[Dict]) -> str:
143
  """Generate chat title from first user message"""
144
  for msg in messages:
@@ -456,20 +469,17 @@ async def chat(request: ChatRequest):
456
  chats = load_chats_for_space(request.space_id)
457
 
458
  # Find existing chat or create new
459
- chat = None
460
- for c in chats:
461
- if c['id'] == chat_id:
462
- chat = c
463
- break
464
 
465
  if not chat:
466
  chat = {
467
  'id': chat_id,
 
468
  'messages': [],
469
  'created_at': datetime.now().isoformat(),
470
  'updated_at': datetime.now().isoformat()
471
  }
472
- chats.append(chat)
473
 
474
  # Add messages
475
  timestamp = datetime.now().isoformat()
@@ -484,175 +494,145 @@ async def chat(request: ChatRequest):
484
  ])
485
  chat['updated_at'] = timestamp
486
 
487
- # Save chats
488
- save_chats_for_space(request.space_id, chats)
489
-
490
- return ChatResponse(
491
- response=response,
492
- sources=sources,
493
- chat_id=chat_id,
494
- timestamp=timestamp
495
- )
496
 
497
  except Exception as e:
498
  raise HTTPException(status_code=500, detail=str(e))
499
 
500
  @app.post("/api/spaces/{space_id}/upload")
501
- async def upload_files(space_id: str, files: List[UploadFile] = File(...)):
502
- """Upload and process files for a space"""
503
  try:
504
- # Initialize space
505
  initialize_space(space_id)
506
-
507
- # Save uploaded files temporarily
508
- space_dir = get_space_dir(space_id)
509
- uploads_dir = space_dir / "uploads"
510
- uploads_dir.mkdir(parents=True, exist_ok=True)
511
-
512
  processor = DocumentProcessor()
513
  all_chunks = []
514
  processed_files = []
515
-
516
- for file in files:
517
- # Save file
518
- file_path = uploads_dir / file.filename
519
- with open(file_path, "wb") as f:
520
- content = await file.read()
521
- f.write(content)
522
 
523
- # Process file and extract content
524
- try:
525
- file_data = processor.process_file(file_path)
526
- content = file_data['content']
527
-
528
- # Chunk the content
529
- chunks = processor.chunk_text(content, chunk_size=512, overlap=50, semantic=True)
530
-
531
- # Format chunks for vector database
532
- formatted_chunks = []
533
- for idx, chunk in enumerate(chunks):
534
- formatted_chunks.append({
535
- 'content': chunk,
536
- 'metadata': {
537
- 'filename': file.filename,
538
- 'chunk_index': idx,
539
- 'total_chunks': len(chunks),
540
- 'source_type': file_data['format']
541
- }
542
- })
543
-
544
- all_chunks.extend(formatted_chunks)
545
- processed_files.append({
546
- 'filename': file.filename,
547
- 'chunks': len(chunks),
548
- 'processed_at': datetime.now().isoformat()
549
  })
550
- except Exception as e:
551
- # Log error but continue with other files
552
- print(f"Error processing {file.filename}: {str(e)}")
553
- continue
554
-
555
- # Add to vector database in batches to avoid size limits
 
 
 
556
  if all_chunks:
557
- # Extract texts, metadatas, and generate IDs
558
  texts = [chunk['content'] for chunk in all_chunks]
559
  metadatas = [chunk['metadata'] for chunk in all_chunks]
560
  ids = [f"{space_id}_{idx}_{uuid.uuid4().hex[:8]}" for idx in range(len(all_chunks))]
561
 
562
- # Process in batches of 5000 to avoid ChromaDB batch size limit
563
- batch_size = 5000
564
  for i in range(0, len(texts), batch_size):
565
- batch_texts = texts[i:i + batch_size]
566
- batch_metadatas = metadatas[i:i + batch_size]
567
- batch_ids = ids[i:i + batch_size]
568
-
569
- vector_db.add_documents(batch_texts, batch_metadatas, batch_ids)
570
- print(f"Processed batch {i//batch_size + 1}/{(len(texts)-1)//batch_size + 1}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
571
 
572
- # Save processed files info
573
- processed_file = space_dir / "processed_files.json"
574
- existing = []
575
- if processed_file.exists():
576
- with open(processed_file, 'r') as f:
577
- existing = json.load(f)
578
 
579
- existing.extend(processed_files)
580
- with open(processed_file, 'w') as f:
581
- json.dump(existing, f, indent=2)
 
 
 
 
 
 
 
 
 
 
 
582
 
583
  return {
584
- "status": "success",
585
- "files_processed": len(processed_files),
586
- "total_chunks": len(all_chunks)
587
  }
588
 
589
  except Exception as e:
590
- raise e # This strips the wrapper and forces FastAPI to log the raw stack trace
591
-
592
  @app.get("/api/spaces/{space_id}/files")
593
  async def get_files(space_id: str):
594
- """Get processed files for a space"""
595
- processed_file = get_space_dir(space_id) / "processed_files.json"
596
-
597
- if processed_file.exists():
598
- with open(processed_file, 'r') as f:
599
- return json.load(f)
600
-
601
- return []
602
 
603
  @app.delete("/api/spaces/{space_id}/files/{filename}")
604
  async def delete_file(space_id: str, filename: str):
605
  """Delete a specific file from a space"""
606
  try:
607
- # Remove from processed_files.json
608
- processed_file = get_space_dir(space_id) / "processed_files.json"
609
- files_data = []
610
-
611
- if processed_file.exists():
612
- with open(processed_file, 'r') as f:
613
- files_data = json.load(f)
614
-
615
- # Filter out the file to delete
616
- files_data = [f for f in files_data if f.get('filename') != filename]
617
-
618
- with open(processed_file, 'w') as f:
619
- json.dump(files_data, f, indent=2)
620
 
621
- # Delete the actual file
622
  file_path = get_space_dir(space_id) / "uploads" / filename
623
  if file_path.exists():
624
  file_path.unlink()
625
 
626
- # Remove from vector database (if initialized)
627
- # Note: This removes all chunks with this filename from metadata
628
  if vector_db:
629
  try:
630
- # Get all documents in the collection
631
- collection = vector_db.collection
632
- results = collection.get()
633
-
634
- # Find IDs of documents with matching filename
635
- ids_to_delete = []
636
- for idx, metadata in enumerate(results['metadatas']):
637
- if metadata and metadata.get('filename') == filename:
638
- ids_to_delete.append(results['ids'][idx])
639
-
640
- # Delete those documents
641
- if ids_to_delete:
642
- collection.delete(ids=ids_to_delete)
643
- print(f"Deleted {len(ids_to_delete)} chunks for {filename}")
644
  except Exception as e:
645
- print(f"Error removing from vector DB: {e}")
646
 
647
- return {
648
- "status": "success",
649
- "message": f"File {filename} deleted"
650
- }
651
 
652
  except Exception as e:
653
  raise HTTPException(status_code=500, detail=f"Error deleting file: {str(e)}")
654
 
655
-
656
  # ==================== STUDIO API ROUTES ====================
657
  # Routes for Notebook, Flashcards, and Quiz features
658
 
 
2
  FastAPI Backend for NotebookPRO
3
  Handles RAG, LLM, file processing, and chat management
4
  """
5
+ from pymongo import MongoClient
6
+ from fastapi import BackgroundTasks
7
  from fastapi import FastAPI, File, UploadFile, HTTPException
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from pydantic import BaseModel
 
44
 
45
  # Initialize FastAPI
46
  app = FastAPI(title="NotebookPRO API", version="2.0.0")
47
+
48
+ # --- ADD THIS AFTER app = FastAPI(...) ---
49
+ # Initialize MongoDB
50
+ MONGO_URI = os.getenv("MONGO_URI")
51
+ if MONGO_URI:
52
+ mongo_client = MongoClient(MONGO_URI)
53
+ db = mongo_client["notebookpro_db"]
54
+ chats_collection = db["chats"]
55
+ files_collection = db["processed_files"]
56
+ else:
57
+ print("WARNING: MONGO_URI not found in environment variables.")
58
  # CORS - Allow Flutter web to connect
59
  app.add_middleware(
60
  CORSMiddleware,
 
138
  return get_data_dir() / "spaces" / space_id
139
 
140
  def load_chats_for_space(space_id: str) -> List[Dict]:
141
+ """Load all chats for a space from MongoDB"""
142
+ if not MONGO_URI: return []
143
+ cursor = chats_collection.find({"space_id": space_id}, {"_id": 0})
144
+ return list(cursor)
145
+
146
+ def save_chat_to_db(space_id: str, chat: Dict):
147
+ """Save or update a single chat in MongoDB"""
148
+ if not MONGO_URI: return
149
+ chat['space_id'] = space_id
150
+ chats_collection.update_one(
151
+ {"id": chat['id'], "space_id": space_id},
152
+ {"$set": chat},
153
+ upsert=True
154
+ )
155
  def get_chat_title(messages: List[Dict]) -> str:
156
  """Generate chat title from first user message"""
157
  for msg in messages:
 
469
  chats = load_chats_for_space(request.space_id)
470
 
471
  # Find existing chat or create new
472
+ # Fetch specific chat from Mongo or create new
473
+ chat = chats_collection.find_one({"id": chat_id, "space_id": request.space_id}, {"_id": 0})
 
 
 
474
 
475
  if not chat:
476
  chat = {
477
  'id': chat_id,
478
+ 'space_id': request.space_id,
479
  'messages': [],
480
  'created_at': datetime.now().isoformat(),
481
  'updated_at': datetime.now().isoformat()
482
  }
 
483
 
484
  # Add messages
485
  timestamp = datetime.now().isoformat()
 
494
  ])
495
  chat['updated_at'] = timestamp
496
 
497
+ # Save SINGLE chat directly to MongoDB
498
+ save_chat_to_db(request.space_id, chat)
 
 
 
 
 
 
 
499
 
500
  except Exception as e:
501
  raise HTTPException(status_code=500, detail=str(e))
502
 
503
  @app.post("/api/spaces/{space_id}/upload")
504
+ def process_heavy_files_background(space_id: str, saved_file_paths: List[Dict]):
505
+ """Runs in the background so the HTTP request doesn't timeout"""
506
  try:
 
507
  initialize_space(space_id)
 
 
 
 
 
 
508
  processor = DocumentProcessor()
509
  all_chunks = []
510
  processed_files = []
511
+
512
+ for file_info in saved_file_paths:
513
+ file_path = Path(file_info['path'])
514
+ filename = file_info['name']
 
 
 
515
 
516
+ # The heavy CPU work
517
+ file_data = processor.process_file(file_path)
518
+ chunks = processor.chunk_text(file_data['content'], chunk_size=512, overlap=50, semantic=True)
519
+
520
+ for idx, chunk in enumerate(chunks):
521
+ all_chunks.append({
522
+ 'content': chunk,
523
+ 'metadata': {
524
+ 'filename': filename,
525
+ 'chunk_index': idx,
526
+ 'total_chunks': len(chunks),
527
+ 'source_type': file_data['format']
528
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
529
  })
530
+
531
+ processed_files.append({
532
+ 'filename': filename,
533
+ 'space_id': space_id, # Link it to the space
534
+ 'chunks': len(chunks),
535
+ 'processed_at': datetime.now().isoformat()
536
+ })
537
+
538
+ # Upload to Qdrant
539
  if all_chunks:
 
540
  texts = [chunk['content'] for chunk in all_chunks]
541
  metadatas = [chunk['metadata'] for chunk in all_chunks]
542
  ids = [f"{space_id}_{idx}_{uuid.uuid4().hex[:8]}" for idx in range(len(all_chunks))]
543
 
544
+ # Batch size set to 100 for Qdrant Cloud limits
545
+ batch_size = 100
546
  for i in range(0, len(texts), batch_size):
547
+ vector_db.add_documents(
548
+ texts[i:i + batch_size],
549
+ metadatas[i:i + batch_size],
550
+ ids[i:i + batch_size]
551
+ )
552
+
553
+ # Save metadata directly to MongoDB
554
+ if processed_files and MONGO_URI:
555
+ files_collection.insert_many(processed_files)
556
+
557
+ except Exception as e:
558
+ print(f"Background processing failed: {e}")
559
+
560
+ @app.post("/api/spaces/{space_id}/upload")
561
+ async def upload_files(space_id: str, background_tasks: BackgroundTasks, files: List[UploadFile] = File(...)):
562
+ """Accepts files quickly and processes them in the background"""
563
+ try:
564
+ space_dir = get_space_dir(space_id)
565
+ uploads_dir = space_dir / "uploads"
566
+ uploads_dir.mkdir(parents=True, exist_ok=True)
567
 
568
+ saved_files = []
 
 
 
 
 
569
 
570
+ # 1. Save files to hard drive
571
+ for file in files:
572
+ file_path = uploads_dir / file.filename
573
+ with open(file_path, "wb") as f:
574
+ content = await file.read()
575
+ f.write(content)
576
+
577
+ saved_files.append({
578
+ "name": file.filename,
579
+ "path": str(file_path)
580
+ })
581
+
582
+ # 2. Hand heavy math and Mongo saving to background task
583
+ background_tasks.add_task(process_heavy_files_background, space_id, saved_files)
584
 
585
  return {
586
+ "status": "processing",
587
+ "message": f"Successfully received {len(files)} files. Processing in the background."
 
588
  }
589
 
590
  except Exception as e:
591
+ raise HTTPException(status_code=500, detail=str(e))
 
592
  @app.get("/api/spaces/{space_id}/files")
593
  async def get_files(space_id: str):
594
+ """Get processed files for a space from MongoDB"""
595
+ if not MONGO_URI: return []
596
+ cursor = files_collection.find({"space_id": space_id}, {"_id": 0})
597
+ return list(cursor)
 
 
 
 
598
 
599
  @app.delete("/api/spaces/{space_id}/files/{filename}")
600
  async def delete_file(space_id: str, filename: str):
601
  """Delete a specific file from a space"""
602
  try:
603
+ # 1. Remove from MongoDB
604
+ if MONGO_URI:
605
+ files_collection.delete_one({"space_id": space_id, "filename": filename})
 
 
 
 
 
 
 
 
 
 
606
 
607
+ # 2. Delete the actual file
608
  file_path = get_space_dir(space_id) / "uploads" / filename
609
  if file_path.exists():
610
  file_path.unlink()
611
 
612
+ # 3. Remove from Qdrant vector database
 
613
  if vector_db:
614
  try:
615
+ # Qdrant supports deleting by payload filter natively
616
+ from qdrant_client.http import models
617
+ vector_db.client.delete(
618
+ collection_name=vector_db.collection_name,
619
+ points_selector=models.Filter(
620
+ must=[
621
+ models.FieldCondition(
622
+ key="filename",
623
+ match=models.MatchValue(value=filename)
624
+ )
625
+ ]
626
+ )
627
+ )
 
628
  except Exception as e:
629
+ print(f"Error removing from Qdrant DB: {e}")
630
 
631
+ return {"status": "success", "message": f"File {filename} deleted"}
 
 
 
632
 
633
  except Exception as e:
634
  raise HTTPException(status_code=500, detail=f"Error deleting file: {str(e)}")
635
 
 
636
  # ==================== STUDIO API ROUTES ====================
637
  # Routes for Notebook, Flashcards, and Quiz features
638