aki-008 commited on
Commit
dcde7f3
·
1 Parent(s): 4735086

feat: upload notes endpoint

Browse files
Backend/app/api/v1/endpoints/notes.py CHANGED
@@ -1,18 +1,29 @@
1
- from fastapi import APIRouter, Depends, HTTPException, status
2
  from sqlalchemy.ext.asyncio import AsyncSession
3
  from app.models import User
4
  from app.api.deps import get_db, get_current_user
5
- from app.schema import ChatMessage, AI_chat_input
6
- from .prompts import SYSTEM_PROMPT
7
- from app.llm import call_llm, stream_chat
8
  import uuid
9
  from fastapi.responses import StreamingResponse
 
 
 
 
 
 
 
 
 
 
10
 
 
11
 
 
 
12
 
13
- router = APIRouter(prefix="/notes")
14
 
15
- @router.post("/stram_chat", response_class=StreamingResponse)
16
  async def ai_chat(
17
  Input_model: AI_chat_input,
18
  # db: AsyncSession = Depends(get_db),
@@ -23,4 +34,50 @@ async def ai_chat(
23
  return StreamingResponse(
24
  stream_chat(messages_dict, Input_model.context),
25
  media_type="text/plain"
26
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException, status, File, UploadFile
2
  from sqlalchemy.ext.asyncio import AsyncSession
3
  from app.models import User
4
  from app.api.deps import get_db, get_current_user
5
+ from app.schema import ChatMessage, AI_chat_input, pdf_input
6
+ from app.llm import stream_chat
 
7
  import uuid
8
  from fastapi.responses import StreamingResponse
9
+ from chromadb.api.models.Collection import Collection
10
+ from app.api.deps import get_chroma_collection
11
+ from app.api.deps import get_db, get_current_user, get_chroma_client
12
+ from pathlib import Path
13
+ from llama_index.readers.file import PyMuPDFReader
14
+ from llama_index.core.node_parser import SentenceSplitter
15
+ from typing import Annotated
16
+ import shutil
17
+ import os
18
+ from .quiz import ingest_logic
19
 
20
+ router = APIRouter(prefix="/notes")
21
 
22
+ UPLOAD_DIRECTORY = "uploaded_pdfs"
23
+ os.makedirs(UPLOAD_DIRECTORY, exist_ok=True)
24
 
 
25
 
26
+ @router.post("/stream_chat", response_class=StreamingResponse)
27
  async def ai_chat(
28
  Input_model: AI_chat_input,
29
  # db: AsyncSession = Depends(get_db),
 
34
  return StreamingResponse(
35
  stream_chat(messages_dict, Input_model.context),
36
  media_type="text/plain"
37
+ )
38
+
39
+ @router.post("/upload_notes")
40
+ async def upload_notes(
41
+ file: Annotated[UploadFile, File(description="A PDF file to upload")],
42
+ collection: Collection = Depends(get_chroma_collection),
43
+ db: AsyncSession = Depends(get_db),
44
+ current_user: User = Depends(get_current_user)
45
+ ):
46
+ file_path = Path(UPLOAD_DIRECTORY) / file.filename
47
+
48
+ try:
49
+
50
+ chunks = await pdf_process(str(file_path))
51
+ if not chunks:
52
+ raise ValueError("No chunks availible")
53
+
54
+ await ingest_logic(chunks, collection)
55
+
56
+ return {"status": "success"}
57
+ except Exception as e:
58
+ raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")
59
+
60
+ finally:
61
+ if file_path.exists():
62
+ os.remove(file_path)
63
+
64
+ # #--------Helper Functions--------#
65
+
66
+ async def pdf_process(pdf_path: str):
67
+ loader = PyMuPDFReader()
68
+
69
+ # 5. Load using the file path string
70
+ documents = loader.load_data(file_path=pdf_path)
71
+
72
+ text_splitter = SentenceSplitter(
73
+ chunk_size=1000,
74
+ chunk_overlap=20
75
+ )
76
+
77
+ text_chunks = []
78
+
79
+ for doc_idx, doc in enumerate(documents):
80
+ cur_text_chunks = text_splitter.split_text(doc.text)
81
+ text_chunks.extend(cur_text_chunks)
82
+
83
+ return text_chunks
Backend/app/api/v1/endpoints/quiz.py CHANGED
@@ -5,12 +5,11 @@ from app.api.deps import get_db, get_current_user, get_chroma_client
5
  from app.schema import Quiz_input, QuizOutput, IngestRequest
6
  from .prompts import SYSTEM_PROMPT
7
  from fastapi import APIRouter, Depends, HTTPException
8
- from chromadb.api.models.Collection import Collection # Import Collection type
9
  from app.api.deps import get_chroma_collection
10
  from app.llm import call_llm
11
  import uuid
12
 
13
-
14
  router = APIRouter(prefix="/quiz")
15
 
16
  async def search_logic(query: str, collection: Collection):
 
5
  from app.schema import Quiz_input, QuizOutput, IngestRequest
6
  from .prompts import SYSTEM_PROMPT
7
  from fastapi import APIRouter, Depends, HTTPException
8
+ from chromadb.api.models.Collection import Collection
9
  from app.api.deps import get_chroma_collection
10
  from app.llm import call_llm
11
  import uuid
12
 
 
13
  router = APIRouter(prefix="/quiz")
14
 
15
  async def search_logic(query: str, collection: Collection):
Backend/app/llm.py CHANGED
@@ -66,9 +66,9 @@ async def stream_chat(messages:List[dict], context:str):
66
  full_history = [system_instruction] + conversation_history
67
 
68
  try:
69
- # Ensure you are using the async_client initialized earlier
70
  stream = await client.chat.completions.create(
71
- model="openai/gpt-oss-20b", # Recommended for speed/quality on Groq
72
  messages=full_history,
73
  temperature=0.7,
74
  stream=True
 
66
  full_history = [system_instruction] + conversation_history
67
 
68
  try:
69
+
70
  stream = await client.chat.completions.create(
71
+ model="openai/gpt-oss-20b",
72
  messages=full_history,
73
  temperature=0.7,
74
  stream=True
Backend/app/models/tables.py CHANGED
@@ -1,8 +1,8 @@
1
- from sqlalchemy import String
2
- from sqlalchemy.orm import Mapped, mapped_column
3
  from datetime import datetime
4
  from app.database import Base
5
-
6
 
7
  class User(Base):
8
  __tablename__ = "users"
@@ -12,3 +12,14 @@ class User(Base):
12
  email: Mapped[str] = mapped_column(String(100), unique=True, index=True)
13
  hashed_password: Mapped[str] = mapped_column(String(255))
14
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import String, LargeBinary, JSON, ForeignKey
2
+ from sqlalchemy.orm import Mapped, mapped_column, relationship
3
  from datetime import datetime
4
  from app.database import Base
5
+ from typing import List
6
 
7
  class User(Base):
8
  __tablename__ = "users"
 
12
  email: Mapped[str] = mapped_column(String(100), unique=True, index=True)
13
  hashed_password: Mapped[str] = mapped_column(String(255))
14
 
15
+ pdf_data: Mapped[list["PDFData"]] = relationship(back_populates="user")
16
+
17
+ class PDFData(Base):
18
+ __tablename__ = "pdf_data"
19
+
20
+ id: Mapped[int] = mapped_column(primary_key=True, index=True)
21
+ pdf_blob: Mapped[bytes] = mapped_column(LargeBinary)
22
+ messages_list: Mapped[List] = mapped_column(JSON)
23
+ pdf_embedding: Mapped[list[float]] = mapped_column(JSON)
24
+ user_id: Mapped[int] = mapped_column(ForeignKey('users.id'))
25
+ user: Mapped["User"] = relationship(back_populates="pdf_data")
Backend/app/schema/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- from app.schema.models import UserCreate, Token, LoginRequest, Quiz_input, QuizOutput, IngestRequest, ChatMessage, AI_chat_input
2
 
3
- __all__ = ["UserCreate", "Token", "LoginRequest", "Quiz_input", "QuizOutput", "IngestRequest", "ChatMessage", "AI_chat_input"]
 
1
+ from app.schema.models import UserCreate, Token, LoginRequest, Quiz_input, QuizOutput, IngestRequest, ChatMessage, AI_chat_input, pdf_input
2
 
3
+ __all__ = ["UserCreate", "Token", "LoginRequest", "Quiz_input", "QuizOutput", "IngestRequest", "ChatMessage", "AI_chat_input", "pdf_input"]
Backend/app/schema/models.py CHANGED
@@ -1,7 +1,7 @@
1
  from pydantic import BaseModel, EmailStr, Field, field_validator, ConfigDict
2
  from typing import Optional, Literal, List
3
  from datetime import datetime
4
-
5
 
6
  #--------Auth models--------#
7
 
@@ -46,12 +46,10 @@ class QuizOutput(BaseModel):
46
 
47
  class IngestRequest(BaseModel):
48
  parsed_doc: str = Field(..., description="The main document content to embed")
49
- user_prompt: str = Field(..., description="The user prompt associated with this document")
50
  id: Optional[str] = None
51
 
52
 
53
- # #--------Notes models--------#
54
-
55
  class ChatMessage(BaseModel):
56
  role: Literal["user", "assistant", "system"] = Field(..., description="Role of the message sender")
57
  content: str = Field(..., min_length=1, description="Message content")
@@ -61,4 +59,9 @@ class AI_chat_input(BaseModel):
61
  context: str = Field(..., description="The content of the note/document to chat about")
62
  session_id: str | None = Field(
63
  None, description="The unique ID of the current chat session (optional)."
64
- )
 
 
 
 
 
 
1
  from pydantic import BaseModel, EmailStr, Field, field_validator, ConfigDict
2
  from typing import Optional, Literal, List
3
  from datetime import datetime
4
+ from fastapi import FastAPI, UploadFile, File
5
 
6
  #--------Auth models--------#
7
 
 
46
 
47
  class IngestRequest(BaseModel):
48
  parsed_doc: str = Field(..., description="The main document content to embed")
49
+ user_prompt: Optional[str] = None
50
  id: Optional[str] = None
51
 
52
 
 
 
53
  class ChatMessage(BaseModel):
54
  role: Literal["user", "assistant", "system"] = Field(..., description="Role of the message sender")
55
  content: str = Field(..., min_length=1, description="Message content")
 
59
  context: str = Field(..., description="The content of the note/document to chat about")
60
  session_id: str | None = Field(
61
  None, description="The unique ID of the current chat session (optional)."
62
+ )
63
+
64
+ #--------Notes page models--------#
65
+
66
+ class pdf_input(BaseModel):
67
+ file: UploadFile = File(..., description="The PDF file to be ingested.")