Spaces:

aki-008
/

prepAI

Running

App Files Files Community

aki-008 commited on Nov 28, 2025

Commit

4c91d90

1 Parent(s): b9b3311

feat: Upload notes endpoint

Browse files

Files changed (2) hide show

Backend/app/api/v1/endpoints/notes.py +74 -24
Backend/app/models/__init__.py +2 -2

Backend/app/api/v1/endpoints/notes.py CHANGED Viewed

@@ -1,32 +1,33 @@
 from fastapi import APIRouter, Depends, HTTPException, status, File, UploadFile
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.models import User
 from app.api.deps import get_db, get_current_user
-from app.schema import ChatMessage, AI_chat_input, pdf_input
 from app.llm import stream_chat
 import uuid
 from fastapi.responses import StreamingResponse
 from chromadb.api.models.Collection import Collection
 from app.api.deps import get_chroma_collection
-from app.api.deps import get_db, get_current_user, get_chroma_client
 from pathlib import Path
 from llama_index.readers.file import PyMuPDFReader
 from llama_index.core.node_parser import SentenceSplitter
 from typing import Annotated
 import shutil
 import os
-from .quiz import ingest_logic
 router = APIRouter(prefix="/notes")
 UPLOAD_DIRECTORY = "uploaded_pdfs"
 os.makedirs(UPLOAD_DIRECTORY, exist_ok=True)
 @router.post("/stream_chat", response_class=StreamingResponse)
 async def ai_chat(
     Input_model: AI_chat_input,
-    # db: AsyncSession = Depends(get_db),
     current_user: User = Depends(get_current_user)
 ):
     messages_dict = [msg.model_dump() for msg in Input_model.messages]
@@ -43,41 +44,90 @@ async def upload_notes(
     db: AsyncSession = Depends(get_db),
     current_user: User = Depends(get_current_user)
 ):
-    file_path = Path(UPLOAD_DIRECTORY) / file.filename
     try:
         chunks = await pdf_process(str(file_path))
         if not chunks:
-            raise ValueError("No chunks availible")
-        await ingest_logic(chunks, collection)
-        return {"status": "success"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")
     finally:
         if file_path.exists():
             os.remove(file_path)
 # #--------Helper Functions--------#
 async def pdf_process(pdf_path: str):
-    loader = PyMuPDFReader()
-    # 5. Load using the file path string
-    documents = loader.load_data(file_path=pdf_path)
-    text_splitter = SentenceSplitter(
-        chunk_size=1000,
-        chunk_overlap=20
-    )
-    text_chunks = []
-    for doc_idx, doc in enumerate(documents):
-        cur_text_chunks = text_splitter.split_text(doc.text)
-        text_chunks.extend(cur_text_chunks)
-    return text_chunks

 from fastapi import APIRouter, Depends, HTTPException, status, File, UploadFile
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.models import User
+from app.models.tables import PDFData
 from app.api.deps import get_db, get_current_user
+from app.schema import AI_chat_input
 from app.llm import stream_chat
 import uuid
 from fastapi.responses import StreamingResponse
 from chromadb.api.models.Collection import Collection
 from app.api.deps import get_chroma_collection
 from pathlib import Path
 from llama_index.readers.file import PyMuPDFReader
 from llama_index.core.node_parser import SentenceSplitter
 from typing import Annotated
 import shutil
 import os
+from sentence_transformers import SentenceTransformer
 router = APIRouter(prefix="/notes")
 UPLOAD_DIRECTORY = "uploaded_pdfs"
 os.makedirs(UPLOAD_DIRECTORY, exist_ok=True)
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
 @router.post("/stream_chat", response_class=StreamingResponse)
 async def ai_chat(
     Input_model: AI_chat_input,
     current_user: User = Depends(get_current_user)
 ):
     messages_dict = [msg.model_dump() for msg in Input_model.messages]
     db: AsyncSession = Depends(get_db),
     current_user: User = Depends(get_current_user)
 ):
+    file_content = file.read()
+    await file.seek(0)
+    safe_filename = f"{uuid.uuid4()}_{file.filename}"
+    file_path = Path(UPLOAD_DIRECTORY) / safe_filename
     try:
+        with open(file_path, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        # 2. Process PDF into chunks
         chunks = await pdf_process(str(file_path))
         if not chunks:
+            raise ValueError("No text chunks could be extracted from this PDF.")
+        full_text_preview = " ".join(chunks)[:2000]
+        doc_embedding = embedding_model.encode(full_text_preview).tolist()
+        new_doc = PDFData(
+            pdf_blob=file_path.read_bytes(),
+            messages_list=[],
+            pdf_embedding=doc_embedding,
+            user_id=current_user.id
+        )
+        db.add(new_doc)
+        await db.commit()
+        await db.refresh(new_doc)
+        # Generate unique IDs for each chunk
+        ids = [str(uuid.uuid4()) for _ in chunks]
+        # Create metadata so you know which file the chunk came from
+        metadatas = [{"source_file": file.filename, "chunk_index": new_doc.id,"chunk_index": i} for i in range(len(chunks))]
+        # Add to ChromaDB
+        await collection.add(
+            ids=ids,
+            documents=chunks,
+            metadatas=metadatas
+        )
+        return {
+            "status": "success",
+            "filename": file.filename,
+            "chunks_ingested": len(chunks)
+        }
     except Exception as e:
+        print(f"Error: {e}") # Log for server console
         raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")
     finally:
+        # 3. Cleanup: Remove the temp file
         if file_path.exists():
             os.remove(file_path)
 # #--------Helper Functions--------#
 async def pdf_process(pdf_path: str):
+    try:
+        loader = PyMuPDFReader()
+        # Load data (this reads the file we just saved)
+        documents = loader.load_data(file_path=pdf_path)
+        text_splitter = SentenceSplitter(
+            chunk_size=1000,
+            chunk_overlap=20
+        )
+        text_chunks = []
+        # Process all pages/documents found in the PDF
+        for doc in documents:
+            cur_text_chunks = text_splitter.split_text(doc.text)
+            text_chunks.extend(cur_text_chunks)
+        return text_chunks
+    except Exception as e:
+        print(f"PDF Processing Error: {e}")
+        raise e

Backend/app/models/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from app.models.tables import User
-__all__ = [ "User"]


1	+ from app.models.tables import User, PDFData
2
3
4	+ __all__ = [ "User", "PDFData"]