from fastapi import APIRouter, Depends, HTTPException, status, File, UploadFile, Response, Body from sqlalchemy.ext.asyncio import AsyncSession from app.models import User from app.models.tables import PDFData from app.api.deps import get_db, get_current_user, get_chroma_collection from app.schema import AI_chat_input from app.llm import stream_chat import uuid from fastapi.responses import StreamingResponse from chromadb.api.models.Collection import Collection from pathlib import Path from llama_index.readers.file.pymu_pdf import PyMuPDFReader from llama_index.core.node_parser import SentenceSplitter from typing import Annotated import shutil import tempfile import os from sentence_transformers import SentenceTransformer from .quiz import search_logic from sqlalchemy import select, desc, asc from app.models.tables import ChatSession, ChatMessage from app.schema.models import SessionCreate, SessionResponse, MessageResponse , NoteInfo from app.database import async_session_maker from typing import List router = APIRouter() UPLOAD_DIRECTORY = "uploaded_pdfs" os.makedirs(UPLOAD_DIRECTORY, exist_ok=True) embedding_model = SentenceTransformer('all-MiniLM-L6-v2') @router.post("/stream_chat", response_class=StreamingResponse) async def ai_chat( Input_model: AI_chat_input, collection: Collection = Depends(get_chroma_collection), db: AsyncSession = Depends(get_db), current_user: User = Depends(get_current_user) ): messages_dict = [msg.model_dump() for msg in Input_model.messages] query = f"{Input_model.context};{Input_model.messages[-1].content}" retrieved_docs: str | None = await search_logic(query, collection) return StreamingResponse( stream_chat(messages_dict, Input_model.context, retrieved_docs), media_type="text/plain" ) # Backend/app/api/v1/endpoints/notes.py @router.post("/upload_notes") async def upload_notes( file: Annotated[UploadFile, File(description="A PDF file to upload")], collection: Collection = Depends(get_chroma_collection), db: AsyncSession = Depends(get_db), current_user: User = Depends(get_current_user) ): safe_filename = f"{uuid.uuid4()}_{file.filename}" file_path = Path(UPLOAD_DIRECTORY) / safe_filename try: with open(file_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) chunks = await pdf_process(str(file_path)) if not chunks: raise ValueError("No text chunks could be extracted from this PDF.") full_text_preview = " ".join(chunks)[:2000] doc_embedding = embedding_model.encode(full_text_preview).tolist() file.file.seek(0) new_doc = PDFData( pdf_blob=file.file.read(), pdf_embedding=doc_embedding, user_id=current_user.id, filename=file.filename ) db.add(new_doc) await db.commit() await db.refresh(new_doc) ids = [str(uuid.uuid4()) for _ in chunks] metadatas = [{ "source_file": file.filename, "pdf_id": new_doc.id, "chunk_index": i } for i in range(len(chunks))] await collection.add( ids=ids, documents=chunks, metadatas=metadatas ) return { "status": "success", "filename": file.filename, "doc_id": new_doc.id, "chunks_ingested": len(chunks) } except Exception as e: print(f"Error: {e}") raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}") finally: # Cleanup temp file if file_path.exists(): os.remove(file_path) # #--------Helper Functions--------# async def pdf_process(pdf_path: str): try: loader = PyMuPDFReader() # Load data (this reads the file we just saved) documents = loader.load_data(file_path=pdf_path) text_splitter = SentenceSplitter( chunk_size=1000, chunk_overlap=20 ) text_chunks = [] # Process all pages/documents found in the PDF for doc in documents: cur_text_chunks = text_splitter.split_text(doc.text) text_chunks.extend(cur_text_chunks) return text_chunks except Exception as e: print(f"PDF Processing Error: {e}") raise e # ------------------------- # 1. Session Management # ------------------------- @router.post("/sessions", response_model=SessionResponse) async def create_session( session_in: SessionCreate, db: AsyncSession = Depends(get_db), current_user: User = Depends(get_current_user) ): result = await db.execute(select(PDFData).filter(PDFData.id == session_in.pdf_id, PDFData.user_id == current_user.id)) pdf = result.scalar_one_or_none() if not pdf: raise HTTPException(404, "PDF not found") new_session = ChatSession( id=str(uuid.uuid4()), name=session_in.name, pdf_id=session_in.pdf_id, user_id=current_user.id ) db.add(new_session) await db.commit() await db.refresh(new_session) return new_session @router.get("/sessions/{pdf_id}", response_model=List[SessionResponse]) async def get_sessions( pdf_id: int, db: AsyncSession = Depends(get_db), current_user: User = Depends(get_current_user) ): result = await db.execute( select(ChatSession) .where(ChatSession.pdf_id == pdf_id) .where(ChatSession.user_id == current_user.id) .order_by(desc(ChatSession.created_at)) ) return result.scalars().all() @router.get("/history/{session_id}", response_model=List[MessageResponse]) async def get_history( session_id: str, db: AsyncSession = Depends(get_db), current_user: User = Depends(get_current_user) ): result = await db.execute( select(ChatMessage) .where(ChatMessage.session_id == session_id) .order_by(asc(ChatMessage.created_at)) ) return result.scalars().all() # ------------------------- # 2. Chat with Memory # ------------------------- @router.post("/chat/{session_id}") async def chat_session( session_id: str, user_prompt: str, db: AsyncSession = Depends(get_db), collection: Collection = Depends(get_chroma_collection), current_user: User = Depends(get_current_user) ): # 1. Verify Session session_res = await db.execute(select(ChatSession).where(ChatSession.id == session_id)) session = session_res.scalar_one_or_none() if not session: raise HTTPException(404, "Session not found") await ensure_pdf_in_chroma(session.pdf_id, db, collection) # --------------------------------------------------------- # 3. Save User Message user_msg = ChatMessage(session_id=session_id, role="user", content=user_prompt) db.add(user_msg) await db.commit() # 4. Filter & Search filter_dict = {"pdf_id": session.pdf_id} retrieved_context = await search_logic(user_prompt, collection, filter_dict) # 5. Fetch History & Stream (Rest of your code remains the same) history_res = await db.execute( select(ChatMessage) .where(ChatMessage.session_id == session_id) .order_by(asc(ChatMessage.created_at)) ) history_msgs = history_res.scalars().all() messages_payload = [{"role": m.role, "content": m.content} for m in history_msgs] async def response_generator(): full_response = "" async for chunk in stream_chat(messages_payload, "", retrieved_context): full_response += chunk yield chunk async with async_session_maker() as new_db_session: ai_msg = ChatMessage(session_id=session_id, role="assistant", content=full_response) new_db_session.add(ai_msg) await new_db_session.commit() return StreamingResponse(response_generator(), media_type="text/plain") async def ensure_pdf_in_chroma(pdf_id: int, db: AsyncSession, collection: Collection): """ Checks if embeddings exist for the given PDF ID. If not, it fetches the blob from SQL, chunks it, and re-uploads to Chroma. """ # 1. Check Chroma first (Fast check) # We query for just 1 ID to see if any exist with this metadata existing = await collection.get( where={"pdf_id": pdf_id}, limit=1 ) if existing and len(existing['ids']) > 0: print(f"✅ Embeddings found for PDF {pdf_id}. No action needed.") return print(f"⚠️ Embeddings missing for PDF {pdf_id}. Restoring from SQL...") # 2. Fetch Blob from SQL result = await db.execute(select(PDFData).where(PDFData.id == pdf_id)) pdf_record = result.scalar_one_or_none() if not pdf_record: raise HTTPException(404, "PDF Data not found in database") # 3. Write Blob to Temp File (Required because pdf_process expects a path) # We use valid suffixes so PyMuPDF knows it's a PDF with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(pdf_record.pdf_blob) tmp_path = tmp_file.name try: # 4. Re-Process (Reuse your existing chunking logic) chunks = await pdf_process(tmp_path) if not chunks: print("Warning: Restored PDF has no text.") return # 5. Re-Embed and Upload to Chroma # Generate new UUIDs for the chunks ids = [str(uuid.uuid4()) for _ in chunks] # EXACT SAME metadata structure as upload_notes metadatas = [{ "source_file": pdf_record.filename, "pdf_id": pdf_id, "chunk_index": i } for i in range(len(chunks))] # Re-add to Chroma await collection.add( ids=ids, documents=chunks, metadatas=metadatas ) print(f"♻️ Successfully restored {len(chunks)} chunks for PDF {pdf_id}") except Exception as e: print(f"❌ Error restoring PDF: {e}") raise HTTPException(500, f"Failed to restore PDF embeddings: {str(e)}") finally: # Cleanup temp file if os.path.exists(tmp_path): os.remove(tmp_path) @router.get("/", response_model=List[NoteInfo]) async def get_all_notes( db: AsyncSession = Depends(get_db), current_user: User = Depends(get_current_user) ): """Fetch all uploaded PDFs for the sidebar list.""" result = await db.execute( select(PDFData.id, PDFData.filename, PDFData.created_at) .where(PDFData.user_id == current_user.id) .order_by(desc(PDFData.created_at)) ) return result.all() @router.get("/{pdf_id}/content") async def get_pdf_content( pdf_id: int, db: AsyncSession = Depends(get_db), current_user: User = Depends(get_current_user) ): result = await db.execute( select(PDFData).where(PDFData.id == pdf_id, PDFData.user_id == current_user.id) ) pdf = result.scalar_one_or_none() if not pdf: raise HTTPException(status_code=404, detail="Note not found") # FIX: Add 'Content-Disposition: inline' to tell browser to render it headers = { "Content-Disposition": f"inline; filename={pdf.filename}" } return Response( content=pdf.pdf_blob, media_type="application/pdf", headers=headers ) # ------------------------- # NEW: Delete Note # ------------------------- @router.delete("/{note_id}") async def delete_note( note_id: int, db: AsyncSession = Depends(get_db), current_user: User = Depends(get_current_user), collection: Collection = Depends(get_chroma_collection) ): # 1. Check ownership result = await db.execute( select(PDFData).where(PDFData.id == note_id, PDFData.user_id == current_user.id) ) note = result.scalar_one_or_none() if not note: raise HTTPException(status_code=404, detail="Note not found") # 2. Delete from ChromaDB (using metadata filter) try: # This deletes all chunks where metadata field 'pdf_id' matches await collection.delete(where={"pdf_id": note_id}) except Exception as e: print(f"Error deleting from Chroma: {e}") # Proceed to delete from DB even if Chroma fails to avoid sync issues # 3. Delete from Database (Cascades to Sessions/Messages) await db.delete(note) await db.commit() return {"status": "success", "message": "Note deleted"} # ------------------------- # NEW: Rename Note # ------------------------- @router.put("/{note_id}") async def rename_note( note_id: int, new_filename: str = Body(..., embed=True), # Expects JSON: { "new_filename": "foo.pdf" } db: AsyncSession = Depends(get_db), current_user: User = Depends(get_current_user) ): result = await db.execute( select(PDFData).where(PDFData.id == note_id, PDFData.user_id == current_user.id) ) note = result.scalar_one_or_none() if not note: raise HTTPException(status_code=404, detail="Note not found") note.filename = new_filename await db.commit() await db.refresh(note) return { "id": note.id, "filename": note.filename, "created_at": note.created_at }