GLNPardhiv commited on
Commit
465ea05
Β·
1 Parent(s): c1445c9

added docstrings for routeHandlers

Browse files
backend/app/routes/auth.py CHANGED
@@ -17,7 +17,28 @@ router = APIRouter(prefix="/auth", tags=["Authentication"])
17
 
18
  @router.post("/register", response_model=TokenResponse, status_code=status.HTTP_201_CREATED)
19
  def register(payload: UserRegister, db: Session = Depends(get_db)):
20
- """Register a new user account."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Check existing username
22
  if db.query(User).filter(User.username == payload.username).first():
23
  raise HTTPException(
@@ -55,7 +76,27 @@ def register(payload: UserRegister, db: Session = Depends(get_db)):
55
 
56
  @router.post("/login", response_model=TokenResponse)
57
  def login(payload: UserLogin, db: Session = Depends(get_db)):
58
- """Login with email and password."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  user = db.query(User).filter(User.email == payload.email).first()
60
 
61
  if not user or not verify_password(payload.password, user.hashed_password):
@@ -76,7 +117,28 @@ def login(payload: UserLogin, db: Session = Depends(get_db)):
76
 
77
  @router.post("/refresh", response_model=TokenResponse)
78
  def refresh_token(payload: RefreshRequest, db: Session = Depends(get_db)):
79
- """Refresh access token."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  user_id = decode_token(payload.refresh_token, token_type="refresh")
81
  if not user_id:
82
  raise HTTPException(
@@ -103,15 +165,59 @@ def refresh_token(payload: RefreshRequest, db: Session = Depends(get_db)):
103
 
104
  @router.get("/me", response_model=UserResponse)
105
  def get_me(user: User = Depends(get_current_user)):
106
- """Get current authenticated user profile."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  return UserResponse.model_validate(user)
108
 
109
  @router.put("/update")
110
  def update_user_info(payload:UserUpdate,
111
  user: User = Depends(get_current_user),
112
  db: Session = Depends(get_db))-> UserUpdateResponse:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- """Update user info."""
 
 
 
 
 
 
 
 
 
 
 
115
  if payload.username is None and payload.email is None:
116
  raise HTTPException(status_code=400, detail="Username and email are required")
117
 
@@ -142,7 +248,32 @@ def update_user_info(payload:UserUpdate,
142
  def update_password(payload:UpdatePassword,
143
  user: User = Depends(get_current_user),
144
  db: Session = Depends(get_db))-> UpdatePasswordResponse:
145
- """Update user password."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  if not payload.password and not payload.confirm_password:
147
  raise HTTPException(status_code=400, detail="Password and confirm_password are required")
148
  if len(payload.password) == 0 and len(payload.confirm_password) == 0:
 
17
 
18
  @router.post("/register", response_model=TokenResponse, status_code=status.HTTP_201_CREATED)
19
  def register(payload: UserRegister, db: Session = Depends(get_db)):
20
+ """
21
+ Register a new user account and return authentication tokens.
22
+
23
+ Creates a new user in the database after validating that the username and
24
+ email are not already taken. The password is hashed before storage. On
25
+ success, access and refresh tokens are generated and returned along with
26
+ the user's public information.
27
+
28
+ Args:
29
+ payload (UserRegister): The registration details including username, email, and password.
30
+ db (Session, optional): Database session dependency. Defaults to Depends(get_db).
31
+
32
+ Returns:
33
+ TokenResponse: An object containing:
34
+ - access_token (str): jwt access token for authenticating API requests.
35
+ - refresh_token (str): jwt refresh token for obtaining new access tokens.
36
+ - user : UserResponse object with registered user's public information (id, username, email).
37
+
38
+ Raises:
39
+ HTTPException: If the username is already taken (409 Conflict).
40
+ HTTPException: If the email is already registered (409 Conflict).
41
+ """
42
  # Check existing username
43
  if db.query(User).filter(User.username == payload.username).first():
44
  raise HTTPException(
 
76
 
77
  @router.post("/login", response_model=TokenResponse)
78
  def login(payload: UserLogin, db: Session = Depends(get_db)):
79
+ """
80
+ Authenticate a user with email and password.
81
+
82
+ Verifies that the provided email exists in the database and that the
83
+ plain text password matches the stored hash. Upon successful authentication,
84
+ generates new access and refresh tokens for the user session.
85
+
86
+ Args:
87
+ payload (UserLogin): User login data containing email and password.
88
+ db (Session, optional): Database session dependency. Defaults to Depends(get_db).
89
+
90
+ Returns:
91
+ TokenResponse: An object containing:
92
+ - access_token: JWT access token for API authentication.
93
+ - refresh_token: JWT refresh token for obtaining new access tokens.
94
+ - user: UserResponse object with the authenticated user's details.
95
+
96
+ Raises:
97
+ HTTPException: 401 Unauthorized if the email is not found or the
98
+ password does not match the stored hash.
99
+ """
100
  user = db.query(User).filter(User.email == payload.email).first()
101
 
102
  if not user or not verify_password(payload.password, user.hashed_password):
 
117
 
118
  @router.post("/refresh", response_model=TokenResponse)
119
  def refresh_token(payload: RefreshRequest, db: Session = Depends(get_db)):
120
+ """
121
+ Refresh both access and refresh tokens using a valid refresh token.
122
+
123
+ Decodes the provided refresh token to extract the user ID. If the token
124
+ is valid and the user still exists in the database, a new pair of access
125
+ and refresh tokens is generated and returned.
126
+
127
+ Args:
128
+ payload (RefreshRequest): An object containing the refresh token to be used for generating new tokens
129
+ db (Session, optional): Database session dependency. Defaults to Depends(get_db).
130
+
131
+ Returns:
132
+ TokenResponse: A fresh set of credentials containing:
133
+ - access_token: New JWT access token.
134
+ - refresh_token: New JWT refresh token.
135
+ - user: UserResponse object with the user's public details.
136
+
137
+ Raises:
138
+ HTTPException: 401 Unauthorized if:
139
+ - The refresh token is invalid or expired.
140
+ - The user associated with the token no longer exists.
141
+ """
142
  user_id = decode_token(payload.refresh_token, token_type="refresh")
143
  if not user_id:
144
  raise HTTPException(
 
165
 
166
  @router.get("/me", response_model=UserResponse)
167
  def get_me(user: User = Depends(get_current_user)):
168
+ """Retrieve the profile of the currently authenticated user.
169
+
170
+ Returns the user object associated with the valid access token provided
171
+ in the request. This endpoint is useful for fetching the current user's
172
+ information after login or token refresh.
173
+
174
+ Args:
175
+ user: The currently authenticated user, obtained from the `get_current_user` dependency.
176
+
177
+ Returns:
178
+ UserResponse: The authenticated user's public profile data, including
179
+ id, username, email, and any other exposed fields.
180
+
181
+ Note:
182
+ This endpoint relies on the `get_current_user` dependency, which
183
+ will automatically return a 401 Unauthorized response if the access
184
+ token is missing, invalid, or expired. Therefore, this function
185
+ itself does not need to raise any HTTP exceptions.
186
+ """
187
  return UserResponse.model_validate(user)
188
 
189
  @router.put("/update")
190
  def update_user_info(payload:UserUpdate,
191
  user: User = Depends(get_current_user),
192
  db: Session = Depends(get_db))-> UserUpdateResponse:
193
+ """Update the current user's profile information.
194
+
195
+ Allows an authenticated user to change their username and/or email address.
196
+ At least one of `username` or `email` must be provided. If a new value
197
+ is supplied, it is checked for uniqueness against existing users. On
198
+ success, the updated user record is returned.
199
+
200
+ Args:
201
+ payload: UserUpdate object containing optional `username` and `email` fields to update.
202
+ user: The currently authenticated user, obtained from the `get_current_user` dependency.
203
+ db: SQLAlchemy database session, obtained from the dependency.
204
+
205
+ Returns:
206
+ UserUpdateResponse: The updated user profile data (same structure as
207
+ the database model, exposed through the response model).
208
 
209
+ Raises:
210
+ HTTPException: 400 if:
211
+ - Neither `username` nor `email` is provided.
212
+ - The new username is already taken.
213
+ - The new email is already registered (checks both fields).
214
+ - A database error occurs (e.g., integrity or connection issue).
215
+
216
+ Note:
217
+ The function commits changes to the database and refreshes the user
218
+ instance before returning. Any `SQLAlchemyError` triggers a rollback
219
+ and a 400 response.
220
+ """
221
  if payload.username is None and payload.email is None:
222
  raise HTTPException(status_code=400, detail="Username and email are required")
223
 
 
248
  def update_password(payload:UpdatePassword,
249
  user: User = Depends(get_current_user),
250
  db: Session = Depends(get_db))-> UpdatePasswordResponse:
251
+ """Update the authenticated user's password.
252
+
253
+ Validates that both the new password and confirmation are provided and
254
+ match each other. If valid, the password is hashed and stored. The user
255
+ record is then committed to the database.
256
+
257
+ Args:
258
+ payload: UpdatePassword object containing `password` and `confirm_password` fields.
259
+ user: The currently authenticated user, obtained from the `get_current_user` dependency.
260
+ db: SQLAlchemy database session, obtained from the dependency.
261
+
262
+ Returns:
263
+ UpdatePasswordResponse: The updated user object (typically containing
264
+ user details excluding the password hash).
265
+
266
+ Raises:
267
+ HTTPException: 400 if:
268
+ - Either `password` or `confirm_password` is missing or empty.
269
+ - The two password fields do not match.
270
+ - A database error (SQLAlchemyError) occurs during commit.
271
+
272
+ Note:
273
+ The function hashes the password using `hash_password()` before
274
+ saving. Any `SQLAlchemyError` triggers a rollback and raises a 400
275
+ response.
276
+ """
277
  if not payload.password and not payload.confirm_password:
278
  raise HTTPException(status_code=400, detail="Password and confirm_password are required")
279
  if len(payload.password) == 0 and len(payload.confirm_password) == 0:
backend/app/routes/chat.py CHANGED
@@ -26,7 +26,33 @@ def ask_question(
26
  user: User = Depends(get_current_user),
27
  db: Session = Depends(get_db),
28
  ):
29
- """Ask a question with RAG retrieval (non-streaming)."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # Validate document exists if specified
31
  if payload.document_id:
32
  doc = db.query(Document).filter(
@@ -67,7 +93,41 @@ def ask_question_stream(
67
  user: User = Depends(get_current_user),
68
  db: Session = Depends(get_db),
69
  ):
70
- """Ask a question with SSE streaming response."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # Validate document
72
  if payload.document_id:
73
  doc = db.query(Document).filter(
@@ -135,7 +195,25 @@ def get_chat_history(
135
  user: User = Depends(get_current_user),
136
  db: Session = Depends(get_db),
137
  ):
138
- """Get chat history for a specific document."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  messages = (
140
  db.query(ChatMessage)
141
  .filter(
@@ -173,11 +251,32 @@ def export_chat_history(
173
  token: Optional[str] = None,
174
  db: Session = Depends(get_db),
175
  ):
176
- """Export chat history for a document as a downloadable .md or .txt file.
177
-
178
- Accepts auth via either:
179
- - Authorization: Bearer <token> header (standard)
180
- - ?token=<jwt> query parameter (for browser downloads)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  """
182
  from fastapi import Request
183
  from app.auth import decode_token as _decode
@@ -245,7 +344,20 @@ def clear_chat_history(
245
  user: User = Depends(get_current_user),
246
  db: Session = Depends(get_db),
247
  ):
248
- """Clear chat history for a specific document."""
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  db.query(ChatMessage).filter(
250
  ChatMessage.user_id == user.id,
251
  ChatMessage.document_id == document_id,
@@ -263,7 +375,31 @@ def _save_message(
263
  content: str,
264
  sources: list = None,
265
  ):
266
- """Helper: save a chat message to the database."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  msg = ChatMessage(
268
  user_id=user_id,
269
  document_id=document_id,
@@ -276,7 +412,23 @@ def _save_message(
276
 
277
 
278
  def _format_markdown(doc, messages) -> str:
279
- """Format chat history as a Markdown document."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  lines = [
281
  f"# Chat History β€” {doc.original_name}",
282
  "",
@@ -324,7 +476,23 @@ def _format_markdown(doc, messages) -> str:
324
 
325
 
326
  def _format_plaintext(doc, messages) -> str:
327
- """Format chat history as plain text."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  lines = [
329
  f"Chat History β€” {doc.original_name}",
330
  f"Exported at: {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
 
26
  user: User = Depends(get_current_user),
27
  db: Session = Depends(get_db),
28
  ):
29
+ """Ask a question with RAG retrieval (non-streaming).
30
+
31
+ Processes a user's question by retrieving relevant document chunks,
32
+ generating an answer using an LLM, and saving the conversation to chat
33
+ history. If a `document_id` is provided, the retrieval is scoped to that
34
+ specific document; otherwise, it searches across all documents owned by
35
+ the user.
36
+
37
+ Args:
38
+ payload: ChatRequest containing the `question` text and optionally a
39
+ `document_id` to limit the retrieval scope.
40
+ user: The currently authenticated user, obtained from the dependency.
41
+ db: SQLAlchemy database session, obtained from the dependency.
42
+
43
+ Returns:
44
+ ChatResponse: An object containing:
45
+ - answer: The generated answer text.
46
+ - sources: A list of `SourceChunk` objects with metadata about
47
+ the retrieved chunks (e.g., filename, page number, text snippet).
48
+ - document_id: The document ID that was used (if any).
49
+
50
+ Raises:
51
+ HTTPException: 404 if the specified `document_id` does not exist or
52
+ does not belong to the authenticated user.
53
+ HTTPException: 400 if the document exists but its status is not
54
+ "ready" (e.g., still processing or failed).
55
+ """
56
  # Validate document exists if specified
57
  if payload.document_id:
58
  doc = db.query(Document).filter(
 
93
  user: User = Depends(get_current_user),
94
  db: Session = Depends(get_db),
95
  ):
96
+ """Ask a question with Server-Sent Events (SSE) streaming response.
97
+
98
+ Processes a user's question using RAG and streams the answer token by
99
+ token over SSE. The user's question is saved to chat history immediately.
100
+ The assistant's answer is accumulated on the server and saved to history
101
+ only after the stream completes. If a `document_id` is provided, retrieval
102
+ is scoped to that document.
103
+
104
+ Args:
105
+ payload: ChatRequest containing the `question` text and optionally a
106
+ `document_id` to limit the retrieval scope.
107
+ user: The currently authenticated user, obtained from the dependency.
108
+ db: SQLAlchemy database session, obtained from the dependency.
109
+
110
+ Returns:
111
+ StreamingResponse: A FastAPI `StreamingResponse` with:
112
+ - media_type: "text/event-stream"
113
+ - Headers: Cache-Control, Connection, and X-Accel-Buffering set
114
+ for proper SSE behavior.
115
+ - Body: A generator yielding SSE messages with `token` (partial
116
+ answer) and `sources` (final source metadata) events.
117
+
118
+ Raises:
119
+ HTTPException: 404 if the specified `document_id` does not exist or
120
+ does not belong to the authenticated user.
121
+ HTTPException: 400 if the document exists but its status is not
122
+ "ready" (e.g., still processing or failed).
123
+
124
+ Note:
125
+ The streaming response uses a generator `event_stream` that yields
126
+ raw SSE chunks. The assistant's full answer is reconstructed from
127
+ the stream to save the complete conversation history. A separate
128
+ database session is created inside the generator to avoid using the
129
+ closed request session.
130
+ """
131
  # Validate document
132
  if payload.document_id:
133
  doc = db.query(Document).filter(
 
195
  user: User = Depends(get_current_user),
196
  db: Session = Depends(get_db),
197
  ):
198
+ """Retrieve the complete chat history for a specific document.
199
+
200
+ Fetches all messages (both user and assistant) associated with the given
201
+ document and the authenticated user, ordered chronologically from oldest
202
+ to newest. Assistant messages that contain source metadata will have the
203
+ `sources` field populated.
204
+
205
+ Args:
206
+ document_id: The unique identifier of the document whose chat history is requested.
207
+ user: The currently authenticated user, obtained from the dependency.
208
+ db: SQLAlchemy database session, obtained from the dependency.
209
+
210
+ Returns:
211
+ ChatHistoryResponse: An object containing:
212
+ - messages: A list of `ChatMessageResponse` objects, each with
213
+ `id`, `role` ("user" or "assistant"), `content`, `sources`
214
+ (list of `SourceChunk` for assistant messages), and `created_at`.
215
+ - document_id: The document ID that was queried.
216
+ """
217
  messages = (
218
  db.query(ChatMessage)
219
  .filter(
 
251
  token: Optional[str] = None,
252
  db: Session = Depends(get_db),
253
  ):
254
+ """Export the chat history for a document as a downloadable file.
255
+
256
+ Supports Markdown (.md) or plain text (.txt) export. The function accepts
257
+ authentication via either the standard `Authorization: Bearer <token>`
258
+ header (handled by the dependency chain) or a `token` query parameter to
259
+ facilitate browser-initiated downloads that cannot set custom headers.
260
+
261
+ Args:
262
+ document_id: The unique identifier of the document whose chat history is to be exported.
263
+ format: Output format, either "md" (Markdown) or "txt" (plain text). Defaults to "md".
264
+ token: Optional JWT token passed as a query parameter. Used for browser
265
+ downloads when the `Authorization` header is not available.
266
+ db: SQLAlchemy database session, obtained from the dependency.
267
+
268
+ Returns:
269
+ Response: A FastAPI `Response` object with:
270
+ - `content`: Formatted chat history as a string.
271
+ - `media_type`: `text/markdown` or `text/plain`.
272
+ - `headers`: `Content-Disposition` attachment header with a generated filename.
273
+
274
+ Raises:
275
+ HTTPException: 401 if neither the token query parameter nor a valid
276
+ bearer token provides an authenticated user.
277
+ HTTPException: 400 if the `format` parameter is not "md" or "txt".
278
+ HTTPException: 404 if the document does not exist or does not belong
279
+ to the user, or if no chat messages are found for the document.
280
  """
281
  from fastapi import Request
282
  from app.auth import decode_token as _decode
 
344
  user: User = Depends(get_current_user),
345
  db: Session = Depends(get_db),
346
  ):
347
+ """Delete all chat messages associated with a specific document.
348
+
349
+ Removes every chat message (both user and assistant) linked to the given
350
+ `document_id` and the authenticated user. The deletion is permanent and
351
+ cannot be undone.
352
+
353
+ Args:
354
+ document_id: The unique identifier of the document whose chat history should be cleared.
355
+ user: The currently authenticated user, obtained from the dependency.
356
+ db: SQLAlchemy database session, obtained from the dependency.
357
+
358
+ Returns:
359
+ dict: A simple JSON object with a `message` field confirming the deletion.
360
+ """
361
  db.query(ChatMessage).filter(
362
  ChatMessage.user_id == user.id,
363
  ChatMessage.document_id == document_id,
 
375
  content: str,
376
  sources: list = None,
377
  ):
378
+ """Save a chat message to the database.
379
+
380
+ Creates a `ChatMessage` record with the provided user, document,
381
+ role, content, and optional source metadata. The message is added to
382
+ the session and committed immediately. The database session must be
383
+ managed by the caller (e.g., closed after use).
384
+
385
+ Args:
386
+ user_id: The ID of the authenticated user.
387
+ document_id: Optional document ID that the message pertains to.
388
+ Can be `None` for global chat contexts.
389
+ db: SQLAlchemy database session (active, typically from a dependency).
390
+ role: The message sender role, e.g., "user" or "assistant".
391
+ content: The full text content of the message.
392
+ sources: Optional list of source dictionaries (usually from RAG
393
+ retrieval) to be stored as JSON. Defaults to `None`.
394
+
395
+ Returns:
396
+ None
397
+
398
+ Note:
399
+ The function commits the transaction. It does not close the session,
400
+ leaving that responsibility to the caller. If `sources` is provided,
401
+ it is serialized using `json.dumps()`.
402
+ """
403
  msg = ChatMessage(
404
  user_id=user_id,
405
  document_id=document_id,
 
412
 
413
 
414
  def _format_markdown(doc, messages) -> str:
415
+ """Format chat history as a Markdown document.
416
+
417
+ Generates a Markdown string containing the document metadata and the
418
+ full conversation. User messages are labeled "You", assistant messages
419
+ are labeled "Assistant". For assistant responses, if source information
420
+ is available, it is rendered as a numbered list with filename, page,
421
+ confidence, and a text preview.
422
+
423
+ Args:
424
+ doc: The Document object (must have `original_name` attribute).
425
+ messages: List of ChatMessage objects, each with attributes:
426
+ `role` (str), `content` (str), `created_at` (datetime, optional),
427
+ and `sources_json` (str, JSON-encoded list of source dicts).
428
+
429
+ Returns:
430
+ str: A Markdown string ready for writing to a `.md` file.
431
+ """
432
  lines = [
433
  f"# Chat History β€” {doc.original_name}",
434
  "",
 
476
 
477
 
478
  def _format_plaintext(doc, messages) -> str:
479
+ """Format chat history as a plain text document.
480
+
481
+ Generates a plain text string containing the document metadata and the
482
+ full conversation. User messages are labeled "You", assistant messages
483
+ are labeled "Assistant". For assistant responses, if source information
484
+ is available, it is rendered as a numbered list with filename, page,
485
+ and confidence (text preview is omitted in plain text format).
486
+
487
+ Args:
488
+ doc: The Document object (must have `original_name` attribute).
489
+ messages: List of ChatMessage objects, each with attributes:
490
+ `role` (str), `content` (str), `created_at` (datetime, optional),
491
+ and `sources_json` (str, JSON‑encoded list of source dicts).
492
+
493
+ Returns:
494
+ str: A plain text string ready for writing to a `.txt` file.
495
+ """
496
  lines = [
497
  f"Chat History β€” {doc.original_name}",
498
  f"Exported at: {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
backend/app/routes/documents.py CHANGED
@@ -32,10 +32,28 @@ ALLOWED_MIME_TYPES = settings.ALLOWED_MIME_TYPES
32
 
33
 
34
  async def validate_upload(file: UploadFile):
35
- """Validate an incoming UploadFile. Returns path to a temporary saved file.
36
 
37
- Checks extension, size (against settings.MAX_UPLOAD_SIZE_MB), MIME signature
38
- using libmagic, and attempts to parse the file for deep validation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  """
40
  if not file.filename:
41
  raise HTTPException(status_code=400, detail="No filename provided")
@@ -97,8 +115,26 @@ async def validate_upload(file: UploadFile):
97
 
98
  def _ingest_document(document_id: str, filepath: str, original_name: str, user_id: str):
99
  """
100
- Background task: chunk document, generate embeddings, store in ChromaDB.
101
- Updates document status in the database.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  """
103
  from app.database import SessionLocal
104
 
@@ -162,7 +198,31 @@ async def upload_document(
162
  user: User = Depends(get_current_user),
163
  db: Session = Depends(get_db),
164
  ):
165
- """Upload a document for RAG processing."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  # ── Validate file type ───────────────────────────
167
  if not file.filename:
168
  raise HTTPException(status_code=400, detail="No filename provided")
@@ -219,6 +279,26 @@ def list_documents(
219
  user: User = Depends(get_current_user),
220
  db: Session = Depends(get_db),
221
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  """Number of rows to skip"""
223
  skip: int = (page - 1) * per_page
224
 
@@ -254,7 +334,24 @@ def get_document(
254
  user: User = Depends(get_current_user),
255
  db: Session = Depends(get_db),
256
  ):
257
- """Get a specific document's details."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  doc = db.query(Document).filter(
259
  Document.id == document_id,
260
  Document.user_id == user.id,
@@ -272,7 +369,25 @@ def serve_pdf(
272
  user: User = Depends(get_current_user),
273
  db: Session = Depends(get_db),
274
  ):
275
- """Serve the PDF file for the document viewer."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  doc = db.query(Document).filter(
277
  Document.id == document_id,
278
  Document.user_id == user.id,
@@ -299,7 +414,30 @@ def delete_document(
299
  user: User = Depends(get_current_user),
300
  db: Session = Depends(get_db),
301
  ):
302
- """Delete a document and its vector embeddings."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  doc = db.query(Document).filter(
304
  Document.id == document_id,
305
  Document.user_id == user.id,
 
32
 
33
 
34
  async def validate_upload(file: UploadFile):
35
+ """Validate an uploaded file and save it to a temporary file.
36
 
37
+ Checks the file extension, size (against `settings.MAX_UPLOAD_SIZE_MB`),
38
+ MIME type via libmagic, and performs deep validation by attempting to
39
+ parse the file (PDF with pypdf, DOCX with python-docx). On success,
40
+ returns the path to a temporary saved file.
41
+
42
+ Args:
43
+ file: The FastAPI UploadFile object to validate.
44
+
45
+ Returns:
46
+ str: Path to the temporary saved file that passed all validations.
47
+
48
+ Raises:
49
+ HTTPException: With status code 400 if:
50
+ - No filename is provided.
51
+ - The file extension is not allowed. (only .pdf or .docx)
52
+ - The file size exceeds the maximum limit.
53
+ - The MIME type does not match allowed types for the extension.
54
+ - The file is corrupted or cannot be parsed. (invalid PDF/DOCX)
55
+ HTTPException: With status code 500 if:
56
+ - 'python-magic' dependency is missing on the server.
57
  """
58
  if not file.filename:
59
  raise HTTPException(status_code=400, detail="No filename provided")
 
115
 
116
  def _ingest_document(document_id: str, filepath: str, original_name: str, user_id: str):
117
  """
118
+ Process a document in the background: chunk document, generate embeddings, and store in ChromaDB.
119
+
120
+ This function is intended to be run as a background task.
121
+ It creates its own database session, updates the
122
+ document status, extracts text, splits into chunks, generates embeddings,
123
+ stores everything in ChromaDB, and marks the document as 'ready'. On
124
+ failure, it sets status to 'failed' and records the error message.
125
+
126
+ Args:
127
+ document_id: Unique identifier of the document in the database.
128
+ filepath: Absolute or relative path to the uploaded file on disk.
129
+ original_name: original filename provided by the user (for logging and metadata).
130
+ user_id: Identifier of the user who owns the document.
131
+
132
+ Returns:
133
+ None
134
+
135
+ Note:
136
+ This function does not raise exceptions to the caller;
137
+ all errors are logged and the database record is updated accordingly.
138
  """
139
  from app.database import SessionLocal
140
 
 
198
  user: User = Depends(get_current_user),
199
  db: Session = Depends(get_db),
200
  ):
201
+ """
202
+ Upload a document for RAG processing.
203
+
204
+ Validates the uploaded file (extension, size, MIME type, integrity),
205
+ saves it to the user's directory, creates a database record with status
206
+ 'pending', and schedules a background task for chunking and embedding.
207
+
208
+ Args:
209
+ background_tasks: FastAPI BackgroundTasks instance to run the ingestion process asynchronously.
210
+ file: The uploaded file, provided as a multipart/form-data field in the request.
211
+ user: The currently authenticated user, injected by the `get_current_user` dependency.
212
+ db: Database session, injected by the `get_db` dependency.
213
+
214
+ Returns:
215
+ DocumentResponse: The created document record, validated against the
216
+ response model (includes id, filename, original_name, file_size, status, etc.).
217
+
218
+ Raises:
219
+ HTTPException: With status code 400 if:
220
+ - No filename is provided.
221
+ - The file extension is not allowed. (only .pdf or .docx)
222
+ - The file fails validation checks (size, MIME type, integrity).
223
+ HTTPException: With status code 500 if:
224
+ - The server lacks the 'python-magic' dependency.
225
+ """
226
  # ── Validate file type ───────────────────────────
227
  if not file.filename:
228
  raise HTTPException(status_code=400, detail="No filename provided")
 
279
  user: User = Depends(get_current_user),
280
  db: Session = Depends(get_db),
281
  ):
282
+ """
283
+ List all documents for the authenticated user with pagination.
284
+
285
+ Returns a paginated list of documents belonging to the current user,
286
+ ordered by upload date (newest first).
287
+
288
+ Args:
289
+ page: The page number to retrieve (1: indexed). Defaults to 1.
290
+ per_page: The number of documents to return per page. Defaults to 20.
291
+ user: The currently authenticated user, injected by the `get_current_user` dependency.
292
+ db: Database session, injected by the `get_db` dependency.
293
+
294
+ Returns:
295
+ DocumentListResponse: A response model containing:
296
+ - items: A list of DocumentResponse objects for the current page.
297
+ - total: The total number of documents for the user.
298
+ - page: The current page number.
299
+ - pages: The total number of pages available.
300
+ """
301
+
302
  """Number of rows to skip"""
303
  skip: int = (page - 1) * per_page
304
 
 
334
  user: User = Depends(get_current_user),
335
  db: Session = Depends(get_db),
336
  ):
337
+ """
338
+ Retrieve a specific document by its ID for the authenticated user.
339
+
340
+ Fetches the document that matches both the provided `document_id` and
341
+ the current user's ID. If no such document exists, a 404 error is raised.
342
+
343
+ Args:
344
+ document_id: The unique identifier of the document to retrieve.
345
+ user: The currently authenticated user, injected by the `get_current_user` dependency.
346
+ db: Database session, injected by the `get_db` dependency.
347
+
348
+ Returns:
349
+ DocumentResponse: The document record that matches the criteria, validated against the response model
350
+ (includes id, filename, original_name, file_size, status, etc.).
351
+
352
+ Raises:
353
+ HTTPException: With status code 404 if the document is not found or does not belong to the authenticated user.
354
+ """
355
  doc = db.query(Document).filter(
356
  Document.id == document_id,
357
  Document.user_id == user.id,
 
369
  user: User = Depends(get_current_user),
370
  db: Session = Depends(get_db),
371
  ):
372
+ """
373
+ Serve the PDF file for the document viewer.
374
+
375
+ Retrieves the document from the database to verify ownership, then
376
+ returns the actual PDF file from disk as a downloadable response.
377
+
378
+ Args:
379
+ document_id: The unique identifier of the document whose PDF is to be served.
380
+ user: The currently authenticated user, injected by the `get_current_user` dependency.
381
+ db: Database session, injected by the `get_db` dependency.
382
+
383
+ Returns:
384
+ FileResponse: A FastAPI FileResponse object that streams the PDF
385
+ file to the client with the correct media type and original filename.
386
+
387
+ Raises:
388
+ HTTPException: 404 if the document does not exist or does not belong
389
+ to the authenticated user, or if the file is missing on disk.
390
+ """
391
  doc = db.query(Document).filter(
392
  Document.id == document_id,
393
  Document.user_id == user.id,
 
414
  user: User = Depends(get_current_user),
415
  db: Session = Depends(get_db),
416
  ):
417
+ """
418
+ Delete a document and its associated vector embeddings.
419
+
420
+ Removes the document from the database, deletes the physical file from
421
+ disk, and attempts to delete all corresponding vector chunks from ChromaDB.
422
+ If ChromaDB deletion fails, the error is logged but does not block the
423
+ overall operation.
424
+
425
+ Args:
426
+ document_id: The unique identifier of the document to delete.
427
+ user: The currently authenticated user, injected by the `get_current_user` dependency.
428
+ db: Database session, injected by the `get_db` dependency.
429
+
430
+ Returns:
431
+ dict: A JSON response containing a success message confirming the deletion of the document.
432
+
433
+ Raises:
434
+ HTTPException: With status code 404 if the document is not found or does not belong to the authenticated user.
435
+
436
+ Note:
437
+ ChromaDB deletion errors are caught and logged only; they do not
438
+ raise an HTTP exception because the main document record is already
439
+ removed from the database.
440
+ """
441
  doc = db.query(Document).filter(
442
  Document.id == document_id,
443
  Document.user_id == user.id,