Spaces:

aki-008
/

prepAI

Running

App Files Files Community

aki-008 commited on Nov 27, 2025

Commit

dcde7f3

1 Parent(s): 4735086

feat: upload notes endpoint

Browse files

Files changed (6) hide show

Backend/app/api/v1/endpoints/notes.py +64 -7
Backend/app/api/v1/endpoints/quiz.py +1 -2
Backend/app/llm.py +2 -2
Backend/app/models/tables.py +14 -3
Backend/app/schema/__init__.py +2 -2
Backend/app/schema/models.py +8 -5

Backend/app/api/v1/endpoints/notes.py CHANGED Viewed

@@ -1,18 +1,29 @@
-from fastapi import APIRouter, Depends, HTTPException, status
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.models import User
 from app.api.deps import get_db, get_current_user
-from app.schema import ChatMessage, AI_chat_input
-from .prompts import SYSTEM_PROMPT
-from app.llm import call_llm, stream_chat
 import uuid
 from fastapi.responses import StreamingResponse
-router = APIRouter(prefix="/notes")
-@router.post("/stram_chat", response_class=StreamingResponse)
 async def ai_chat(
     Input_model: AI_chat_input,
     # db: AsyncSession = Depends(get_db),
@@ -23,4 +34,50 @@ async def ai_chat(
     return StreamingResponse(
         stream_chat(messages_dict, Input_model.context),
         media_type="text/plain"
-    )

+from fastapi import APIRouter, Depends, HTTPException, status, File, UploadFile
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.models import User
 from app.api.deps import get_db, get_current_user
+from app.schema import ChatMessage, AI_chat_input, pdf_input
+from app.llm import stream_chat
 import uuid
 from fastapi.responses import StreamingResponse
+from chromadb.api.models.Collection import Collection
+from app.api.deps import get_chroma_collection
+from app.api.deps import get_db, get_current_user, get_chroma_client
+from pathlib import Path
+from llama_index.readers.file import PyMuPDFReader
+from llama_index.core.node_parser import SentenceSplitter
+from typing import Annotated
+import shutil
+import os
+from .quiz import ingest_logic
+router = APIRouter(prefix="/notes")
+UPLOAD_DIRECTORY = "uploaded_pdfs"
+os.makedirs(UPLOAD_DIRECTORY, exist_ok=True)
+@router.post("/stream_chat", response_class=StreamingResponse)
 async def ai_chat(
     Input_model: AI_chat_input,
     # db: AsyncSession = Depends(get_db),
     return StreamingResponse(
         stream_chat(messages_dict, Input_model.context),
         media_type="text/plain"
+    )
+@router.post("/upload_notes")
+async def upload_notes(
+    file: Annotated[UploadFile, File(description="A PDF file to upload")],
+    collection: Collection = Depends(get_chroma_collection),
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    file_path = Path(UPLOAD_DIRECTORY) / file.filename
+    try:
+        chunks = await pdf_process(str(file_path))
+        if not chunks:
+            raise ValueError("No chunks availible")
+        await ingest_logic(chunks, collection)
+        return {"status": "success"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")
+    finally:
+        if file_path.exists():
+            os.remove(file_path)
+# #--------Helper Functions--------#
+async def pdf_process(pdf_path: str):
+    loader = PyMuPDFReader()
+    # 5. Load using the file path string
+    documents = loader.load_data(file_path=pdf_path)
+    text_splitter = SentenceSplitter(
+        chunk_size=1000,
+        chunk_overlap=20
+    )
+    text_chunks = []
+    for doc_idx, doc in enumerate(documents):
+        cur_text_chunks = text_splitter.split_text(doc.text)
+        text_chunks.extend(cur_text_chunks)
+    return text_chunks

Backend/app/api/v1/endpoints/quiz.py CHANGED Viewed

@@ -5,12 +5,11 @@ from app.api.deps import get_db, get_current_user, get_chroma_client
 from app.schema import Quiz_input, QuizOutput, IngestRequest
 from .prompts import SYSTEM_PROMPT
 from fastapi import APIRouter, Depends, HTTPException
-from chromadb.api.models.Collection import Collection # Import Collection type
 from app.api.deps import get_chroma_collection
 from app.llm import call_llm
 import uuid
 router = APIRouter(prefix="/quiz")
 async def search_logic(query: str, collection: Collection):

 from app.schema import Quiz_input, QuizOutput, IngestRequest
 from .prompts import SYSTEM_PROMPT
 from fastapi import APIRouter, Depends, HTTPException
+from chromadb.api.models.Collection import Collection
 from app.api.deps import get_chroma_collection
 from app.llm import call_llm
 import uuid
 router = APIRouter(prefix="/quiz")
 async def search_logic(query: str, collection: Collection):

Backend/app/llm.py CHANGED Viewed

@@ -66,9 +66,9 @@ async def stream_chat(messages:List[dict], context:str):
     full_history = [system_instruction] + conversation_history
     try:
-        # Ensure you are using the async_client initialized earlier
         stream = await client.chat.completions.create(
-            model="openai/gpt-oss-20b", # Recommended for speed/quality on Groq
             messages=full_history,
             temperature=0.7,
             stream=True

     full_history = [system_instruction] + conversation_history
     try:
         stream = await client.chat.completions.create(
+            model="openai/gpt-oss-20b",
             messages=full_history,
             temperature=0.7,
             stream=True

Backend/app/models/tables.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from sqlalchemy import String
-from sqlalchemy.orm import Mapped, mapped_column
 from datetime import datetime
 from app.database import Base
 class User(Base):
     __tablename__ = "users"
@@ -12,3 +12,14 @@ class User(Base):
     email: Mapped[str] = mapped_column(String(100), unique=True, index=True)
     hashed_password: Mapped[str] = mapped_column(String(255))

+from sqlalchemy import String, LargeBinary, JSON, ForeignKey
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 from datetime import datetime
 from app.database import Base
+from typing import List
 class User(Base):
     __tablename__ = "users"
     email: Mapped[str] = mapped_column(String(100), unique=True, index=True)
     hashed_password: Mapped[str] = mapped_column(String(255))
+    pdf_data: Mapped[list["PDFData"]] = relationship(back_populates="user")
+class PDFData(Base):
+    __tablename__ = "pdf_data"
+    id: Mapped[int] = mapped_column(primary_key=True, index=True)
+    pdf_blob: Mapped[bytes] = mapped_column(LargeBinary)
+    messages_list: Mapped[List] = mapped_column(JSON)
+    pdf_embedding: Mapped[list[float]] = mapped_column(JSON)
+    user_id: Mapped[int] = mapped_column(ForeignKey('users.id'))
+    user: Mapped["User"] = relationship(back_populates="pdf_data")

Backend/app/schema/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from app.schema.models import UserCreate, Token, LoginRequest, Quiz_input, QuizOutput, IngestRequest, ChatMessage, AI_chat_input
-__all__ = ["UserCreate", "Token", "LoginRequest", "Quiz_input", "QuizOutput", "IngestRequest", "ChatMessage", "AI_chat_input"]


1	+ from app.schema.models import UserCreate, Token, LoginRequest, Quiz_input, QuizOutput, IngestRequest, ChatMessage, AI_chat_input, pdf_input
2
3	+ __all__ = ["UserCreate", "Token", "LoginRequest", "Quiz_input", "QuizOutput", "IngestRequest", "ChatMessage", "AI_chat_input", "pdf_input"]

Backend/app/schema/models.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from pydantic import BaseModel, EmailStr, Field, field_validator, ConfigDict
 from typing import  Optional, Literal, List
 from datetime import datetime
 #--------Auth models--------#
@@ -46,12 +46,10 @@ class QuizOutput(BaseModel):
 class IngestRequest(BaseModel):
     parsed_doc: str = Field(..., description="The main document content to embed")
-    user_prompt: str = Field(..., description="The user prompt associated with this document")
     id: Optional[str] = None
-# #--------Notes models--------#
 class ChatMessage(BaseModel):
     role: Literal["user", "assistant", "system"] = Field(..., description="Role of the message sender")
     content: str = Field(..., min_length=1, description="Message content")
@@ -61,4 +59,9 @@ class AI_chat_input(BaseModel):
     context: str = Field(..., description="The content of the note/document to chat about")
     session_id: str | None = Field(
         None, description="The unique ID of the current chat session (optional)."
-    )

 from pydantic import BaseModel, EmailStr, Field, field_validator, ConfigDict
 from typing import  Optional, Literal, List
 from datetime import datetime
+from fastapi import FastAPI, UploadFile, File
 #--------Auth models--------#
 class IngestRequest(BaseModel):
     parsed_doc: str = Field(..., description="The main document content to embed")
+    user_prompt: Optional[str] = None
     id: Optional[str] = None
 class ChatMessage(BaseModel):
     role: Literal["user", "assistant", "system"] = Field(..., description="Role of the message sender")
     content: str = Field(..., min_length=1, description="Message content")
     context: str = Field(..., description="The content of the note/document to chat about")
     session_id: str | None = Field(
         None, description="The unique ID of the current chat session (optional)."
+    )
+#--------Notes page models--------#
+class pdf_input(BaseModel):
+    file: UploadFile = File(..., description="The PDF file to be ingested.")