Spaces:

aki-008
/

prepAI

Sleeping

App Files Files Community

aki-008 commited on Nov 28, 2025

Commit

657674a

1 Parent(s): 0d22fa6

feat:Chat session mgmt added

Browse files

Files changed (6) hide show

Backend/app/api/v1/endpoints/auth.py +0 -1
Backend/app/api/v1/endpoints/notes.py +198 -18
Backend/app/api/v1/endpoints/quiz.py +7 -14
Backend/app/models/tables.py +29 -3
Backend/app/schema/__init__.py +2 -2
Backend/app/schema/models.py +16 -0

Backend/app/api/v1/endpoints/auth.py CHANGED Viewed

@@ -4,7 +4,6 @@ from sqlalchemy import select
 from datetime import timedelta
 from app.schema import UserCreate, LoginRequest
 from app.schema.models import LoginResponse
-# from app.schema.models import LoginRequest
 from app.models import User
 from app.core import verify_password, get_password_hash, create_access_token
 from app.api.deps import get_db

 from datetime import timedelta
 from app.schema import UserCreate, LoginRequest
 from app.schema.models import LoginResponse
 from app.models import User
 from app.core import verify_password, get_password_hash, create_access_token
 from app.api.deps import get_db

Backend/app/api/v1/endpoints/notes.py CHANGED Viewed

@@ -13,9 +13,15 @@ from llama_index.readers.file import PyMuPDFReader
 from llama_index.core.node_parser import SentenceSplitter
 from typing import Annotated
 import shutil
 import os
 from sentence_transformers import SentenceTransformer
 from .quiz import search_logic
 router = APIRouter(prefix="/notes")
@@ -28,6 +34,7 @@ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
 async def ai_chat(
     Input_model: AI_chat_input,
     collection: Collection = Depends(get_chroma_collection),
     current_user: User = Depends(get_current_user)
 ):
     messages_dict = [msg.model_dump() for msg in Input_model.messages]
@@ -39,6 +46,8 @@ async def ai_chat(
         media_type="text/plain"
     )
 @router.post("/upload_notes")
 async def upload_notes(
     file: Annotated[UploadFile, File(description="A PDF file to upload")],
@@ -46,20 +55,14 @@ async def upload_notes(
     db: AsyncSession = Depends(get_db),
     current_user: User = Depends(get_current_user)
 ):
-    file_content = file.read()
-    await file.seek(0)
     safe_filename = f"{uuid.uuid4()}_{file.filename}"
     file_path = Path(UPLOAD_DIRECTORY) / safe_filename
     try:
         with open(file_path, "wb") as buffer:
             shutil.copyfileobj(file.file, buffer)
-        # 2. Process PDF into chunks
         chunks = await pdf_process(str(file_path))
         if not chunks:
@@ -68,11 +71,11 @@ async def upload_notes(
         full_text_preview = " ".join(chunks)[:2000]
         doc_embedding = embedding_model.encode(full_text_preview).tolist()
         new_doc = PDFData(
-            pdf_blob=file_path.read_bytes(),
-            messages_list=[],
-            pdf_embedding=doc_embedding,
             user_id=current_user.id
         )
@@ -80,13 +83,14 @@ async def upload_notes(
         await db.commit()
         await db.refresh(new_doc)
-        # Generate unique IDs for each chunk
         ids = [str(uuid.uuid4()) for _ in chunks]
-        # Create metadata so you know which file the chunk came from
-        metadatas = [{"source_file": file.filename, "chunk_index": new_doc.id,"chunk_index": i} for i in range(len(chunks))]
-        # Add to ChromaDB
         await collection.add(
             ids=ids,
             documents=chunks,
@@ -96,15 +100,16 @@ async def upload_notes(
         return {
             "status": "success",
             "filename": file.filename,
             "chunks_ingested": len(chunks)
         }
     except Exception as e:
-        print(f"Error: {e}") # Log for server console
         raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")
     finally:
-        # 3. Cleanup: Remove the temp file
         if file_path.exists():
             os.remove(file_path)
@@ -132,4 +137,179 @@ async def pdf_process(pdf_path: str):
         return text_chunks
     except Exception as e:
         print(f"PDF Processing Error: {e}")
-        raise e

 from llama_index.core.node_parser import SentenceSplitter
 from typing import Annotated
 import shutil
+import tempfile
 import os
 from sentence_transformers import SentenceTransformer
 from .quiz import search_logic
+from sqlalchemy import select, desc, asc
+from app.models.tables import ChatSession, ChatMessage
+from app.schema.models import SessionCreate, SessionResponse, MessageResponse
+from app.database import async_session_maker
+from typing import List
 router = APIRouter(prefix="/notes")
 async def ai_chat(
     Input_model: AI_chat_input,
     collection: Collection = Depends(get_chroma_collection),
+    db: AsyncSession = Depends(get_db),
     current_user: User = Depends(get_current_user)
 ):
     messages_dict = [msg.model_dump() for msg in Input_model.messages]
         media_type="text/plain"
     )
+# Backend/app/api/v1/endpoints/notes.py
 @router.post("/upload_notes")
 async def upload_notes(
     file: Annotated[UploadFile, File(description="A PDF file to upload")],
     db: AsyncSession = Depends(get_db),
     current_user: User = Depends(get_current_user)
 ):
     safe_filename = f"{uuid.uuid4()}_{file.filename}"
     file_path = Path(UPLOAD_DIRECTORY) / safe_filename
     try:
         with open(file_path, "wb") as buffer:
             shutil.copyfileobj(file.file, buffer)
         chunks = await pdf_process(str(file_path))
         if not chunks:
         full_text_preview = " ".join(chunks)[:2000]
         doc_embedding = embedding_model.encode(full_text_preview).tolist()
+        file.file.seek(0)
         new_doc = PDFData(
+            pdf_blob=file.file.read(),
+            pdf_embedding=doc_embedding,
             user_id=current_user.id
         )
         await db.commit()
         await db.refresh(new_doc)
         ids = [str(uuid.uuid4()) for _ in chunks]
+        metadatas = [{
+            "source_file": file.filename,
+            "pdf_id": new_doc.id,
+            "chunk_index": i
+        } for i in range(len(chunks))]
         await collection.add(
             ids=ids,
             documents=chunks,
         return {
             "status": "success",
             "filename": file.filename,
+            "doc_id": new_doc.id,
             "chunks_ingested": len(chunks)
         }
     except Exception as e:
+        print(f"Error: {e}")
         raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")
     finally:
+        # Cleanup temp file
         if file_path.exists():
             os.remove(file_path)
         return text_chunks
     except Exception as e:
         print(f"PDF Processing Error: {e}")
+        raise e
+# -------------------------
+# 1. Session Management
+# -------------------------
+@router.post("/sessions", response_model=SessionResponse)
+async def create_session(
+    session_in: SessionCreate,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    result = await db.execute(select(PDFData).filter(PDFData.id == session_in.pdf_id, PDFData.user_id == current_user.id))
+    pdf = result.scalar_one_or_none()
+    if not pdf:
+        raise HTTPException(404, "PDF not found")
+    new_session = ChatSession(
+        id=str(uuid.uuid4()),
+        name=session_in.name,
+        pdf_id=session_in.pdf_id,
+        user_id=current_user.id
+    )
+    db.add(new_session)
+    await db.commit()
+    await db.refresh(new_session)
+    return new_session
+@router.get("/sessions/{pdf_id}", response_model=List[SessionResponse])
+async def get_sessions(
+    pdf_id: int,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    result = await db.execute(
+        select(ChatSession)
+        .where(ChatSession.pdf_id == pdf_id)
+        .where(ChatSession.user_id == current_user.id)
+        .order_by(desc(ChatSession.created_at))
+    )
+    return result.scalars().all()
+@router.get("/history/{session_id}", response_model=List[MessageResponse])
+async def get_history(
+    session_id: str,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    result = await db.execute(
+        select(ChatMessage)
+        .where(ChatMessage.session_id == session_id)
+        .order_by(asc(ChatMessage.created_at))
+    )
+    return result.scalars().all()
+# -------------------------
+# 2. Chat with Memory
+# -------------------------
+@router.post("/chat/{session_id}")
+async def chat_session(
+    session_id: str,
+    user_prompt: str,
+    db: AsyncSession = Depends(get_db),
+    collection: Collection = Depends(get_chroma_collection),
+    current_user: User = Depends(get_current_user)
+):
+    # 1. Verify Session
+    session_res = await db.execute(select(ChatSession).where(ChatSession.id == session_id))
+    session = session_res.scalar_one_or_none()
+    if not session:
+        raise HTTPException(404, "Session not found")
+    await ensure_pdf_in_chroma(session.pdf_id, db, collection)
+    # ---------------------------------------------------------
+    # 3. Save User Message
+    user_msg = ChatMessage(session_id=session_id, role="user", content=user_prompt)
+    db.add(user_msg)
+    await db.commit()
+    # 4. Filter & Search
+    filter_dict = {"pdf_id": session.pdf_id}
+    retrieved_context = await search_logic(user_prompt, collection, filter_dict)
+    # 5. Fetch History & Stream (Rest of your code remains the same)
+    history_res = await db.execute(
+        select(ChatMessage)
+        .where(ChatMessage.session_id == session_id)
+        .order_by(asc(ChatMessage.created_at))
+    )
+    history_msgs = history_res.scalars().all()
+    messages_payload = [{"role": m.role, "content": m.content} for m in history_msgs]
+    async def response_generator():
+        full_response = ""
+        async for chunk in stream_chat(messages_payload, "", retrieved_context):
+            full_response += chunk
+            yield chunk
+        async with async_session_maker() as new_db_session:
+            ai_msg = ChatMessage(session_id=session_id, role="assistant", content=full_response)
+            new_db_session.add(ai_msg)
+            await new_db_session.commit()
+    return StreamingResponse(response_generator(), media_type="text/plain")
+async def ensure_pdf_in_chroma(pdf_id: int, db: AsyncSession, collection: Collection):
+    """
+    Checks if embeddings exist for the given PDF ID.
+    If not, it fetches the blob from SQL, chunks it, and re-uploads to Chroma.
+    """
+    # 1. Check Chroma first (Fast check)
+    # We query for just 1 ID to see if any exist with this metadata
+    existing = await collection.get(
+        where={"pdf_id": pdf_id},
+        limit=1
+    )
+    if existing and len(existing['ids']) > 0:
+        print(f"✅ Embeddings found for PDF {pdf_id}. No action needed.")
+        return
+    print(f"⚠️ Embeddings missing for PDF {pdf_id}. Restoring from SQL...")
+    # 2. Fetch Blob from SQL
+    result = await db.execute(select(PDFData).where(PDFData.id == pdf_id))
+    pdf_record = result.scalar_one_or_none()
+    if not pdf_record:
+        raise HTTPException(404, "PDF Data not found in database")
+    # 3. Write Blob to Temp File (Required because pdf_process expects a path)
+    # We use valid suffixes so PyMuPDF knows it's a PDF
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+        tmp_file.write(pdf_record.pdf_blob)
+        tmp_path = tmp_file.name
+    try:
+        # 4. Re-Process (Reuse your existing chunking logic)
+        chunks = await pdf_process(tmp_path)
+        if not chunks:
+            print("Warning: Restored PDF has no text.")
+            return
+        # 5. Re-Embed and Upload to Chroma
+        # Generate new UUIDs for the chunks
+        ids = [str(uuid.uuid4()) for _ in chunks]
+        # EXACT SAME metadata structure as upload_notes
+        metadatas = [{
+            "source_file": pdf_record.filename,
+            "pdf_id": pdf_id,
+            "chunk_index": i
+        } for i in range(len(chunks))]
+        # Re-add to Chroma
+        await collection.add(
+            ids=ids,
+            documents=chunks,
+            metadatas=metadatas
+        )
+        print(f"♻️ Successfully restored {len(chunks)} chunks for PDF {pdf_id}")
+    except Exception as e:
+        print(f"❌ Error restoring PDF: {e}")
+        raise HTTPException(500, f"Failed to restore PDF embeddings: {str(e)}")
+    finally:
+        # Cleanup temp file
+        if os.path.exists(tmp_path):
+            os.remove(tmp_path)

Backend/app/api/v1/endpoints/quiz.py CHANGED Viewed

@@ -14,27 +14,23 @@ import logging
 router = APIRouter(prefix="/quiz")
-# 1. Set up a logger (if you haven't already globally)
-logger = logging.getLogger("uvicorn.error") # reusing uvicorn's logger ensures it shows up in your terminal
-async def search_logic(query: str, collection: Collection):
-    # Log the incoming query
     logger.info(f"🔍 [Search Logic] Starting search for query: '{query}'")
     try:
         results = await collection.query(
-            query_texts=[query],
-            n_results=5
-        )
-        # Log the raw results to see exactly what ChromaDB returned (helps spot NoneTypes)
         logger.info(f"📄 [Search Logic] Raw results from DB: {results}")
         if results and results.get('documents') and len(results['documents']) > 0:
             raw_docs = results['documents'][0]
-            # Filter None values and Log how many were found vs valid
             valid_docs = [str(doc) for doc in raw_docs if doc is not None]
             logger.info(f"✅ [Search Logic] Processing: Found {len(raw_docs)} items. Valid text items: {len(valid_docs)}")
@@ -42,7 +38,6 @@ async def search_logic(query: str, collection: Collection):
             if len(raw_docs) != len(valid_docs):
                 logger.warning("⚠️ [Search Logic] Warning: Some documents contained NoneType and were skipped.")
-            # Join with a space (safer than empty string)
             final_context = " ".join(valid_docs)
             return final_context
@@ -51,9 +46,7 @@ async def search_logic(query: str, collection: Collection):
             return ""
     except Exception as e:
-        # Log the full error if something crashes
         logger.error(f"❌ [Search Logic] CRITICAL ERROR: {str(e)}")
-        # You might want to re-raise the error or return empty depending on your needs
         return ""
 @router.get("/search_docs")

 router = APIRouter(prefix="/quiz")
+logger = logging.getLogger("uvicorn.error")
+async def search_logic(query: str, collection: Collection, filter_dict: dict = None):
     logger.info(f"🔍 [Search Logic] Starting search for query: '{query}'")
     try:
         results = await collection.query(
+        query_texts=[query],
+        n_results=5,
+        where=filter_dict
+    )
         logger.info(f"📄 [Search Logic] Raw results from DB: {results}")
         if results and results.get('documents') and len(results['documents']) > 0:
             raw_docs = results['documents'][0]
             valid_docs = [str(doc) for doc in raw_docs if doc is not None]
             logger.info(f"✅ [Search Logic] Processing: Found {len(raw_docs)} items. Valid text items: {len(valid_docs)}")
             if len(raw_docs) != len(valid_docs):
                 logger.warning("⚠️ [Search Logic] Warning: Some documents contained NoneType and were skipped.")
             final_context = " ".join(valid_docs)
             return final_context
             return ""
     except Exception as e:
         logger.error(f"❌ [Search Logic] CRITICAL ERROR: {str(e)}")
         return ""
 @router.get("/search_docs")

Backend/app/models/tables.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from sqlalchemy import String, LargeBinary, JSON, ForeignKey
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 from datetime import datetime
 from app.database import Base
@@ -19,7 +19,33 @@ class PDFData(Base):
     id: Mapped[int] = mapped_column(primary_key=True, index=True)
     pdf_blob: Mapped[bytes] = mapped_column(LargeBinary)
-    messages_list: Mapped[List] = mapped_column(JSON)
     pdf_embedding: Mapped[list[float]] = mapped_column(JSON)
     user_id: Mapped[int] = mapped_column(ForeignKey('users.id'))
-    user: Mapped["User"] = relationship(back_populates="pdf_data")

+from sqlalchemy import String, LargeBinary, JSON, ForeignKey, Text, DateTime
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 from datetime import datetime
 from app.database import Base
     id: Mapped[int] = mapped_column(primary_key=True, index=True)
     pdf_blob: Mapped[bytes] = mapped_column(LargeBinary)
     pdf_embedding: Mapped[list[float]] = mapped_column(JSON)
     user_id: Mapped[int] = mapped_column(ForeignKey('users.id'))
+    user: Mapped["User"] = relationship(back_populates="pdf_data")
+    chat_sessions: Mapped[List["ChatSession"]] = relationship(back_populates="pdf_data", cascade="all, delete-orphan")
+class ChatSession(Base):
+    __tablename__ = "chat_sessions"
+    id: Mapped[str] = mapped_column(String, primary_key=True)
+    name: Mapped[str] = mapped_column(String(100))
+    created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
+    pdf_id: Mapped[int] = mapped_column(ForeignKey('pdf_data.id'))
+    pdf_data: Mapped["PDFData"] = relationship(back_populates="chat_sessions")
+    user_id: Mapped[int] = mapped_column(ForeignKey('users.id'))
+    messages: Mapped[List["ChatMessage"]] = relationship(back_populates="session", cascade="all, delete-orphan")
+class ChatMessage(Base):
+    __tablename__ = "chat_messages"
+    id: Mapped[int] = mapped_column(primary_key=True, index=True)
+    session_id: Mapped[str] = mapped_column(ForeignKey('chat_sessions.id'))
+    session: Mapped["ChatSession"] = relationship(back_populates="messages")
+    role: Mapped[str] = mapped_column(String(20))
+    content: Mapped[str] = mapped_column(Text)
+    created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)

Backend/app/schema/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from app.schema.models import UserCreate, Token, LoginRequest, Quiz_input, QuizOutput, IngestRequest, ChatMessage, AI_chat_input, pdf_input
-__all__ = ["UserCreate", "Token", "LoginRequest", "Quiz_input", "QuizOutput", "IngestRequest", "ChatMessage", "AI_chat_input", "pdf_input"]


1	+ from app.schema.models import UserCreate, Token, LoginRequest, Quiz_input, QuizOutput, IngestRequest, ChatMessage, AI_chat_input, pdf_input, SessionCreate, SessionResponse, MessageResponse
2
3	+ __all__ = ["UserCreate", "Token", "LoginRequest", "Quiz_input", "QuizOutput", "IngestRequest", "ChatMessage", "AI_chat_input", "pdf_input", "SessionCreate", "SessionResponse", "MessageResponse"]

Backend/app/schema/models.py CHANGED Viewed

@@ -61,6 +61,22 @@ class AI_chat_input(BaseModel):
         None, description="The unique ID of the current chat session (optional)."
     )
 #--------Notes page models--------#
 class pdf_input(BaseModel):

         None, description="The unique ID of the current chat session (optional)."
     )
+class SessionCreate(BaseModel):
+    pdf_id: int
+    name: str = "New Chat"
+class SessionResponse(BaseModel):
+    id: str
+    name: str
+    created_at: datetime
+    pdf_id: int
+class MessageResponse(BaseModel):
+    id: int
+    role: str
+    content: str
+    created_at: datetime
 #--------Notes page models--------#
 class pdf_input(BaseModel):