eeshanyaj commited on
Commit
a236811
·
1 Parent(s): f19ee9b

added many new features

Browse files
app/api/v1/conversation_routes.py CHANGED
@@ -1,3 +1,9 @@
 
 
 
 
 
 
1
  """
2
  Conversation & Chat API Endpoints (UNIFIED)
3
 
@@ -23,7 +29,7 @@ from app.models.conversation import (
23
  UpdateConversationRequest,
24
  ConversationResponse,
25
  ConversationListResult,
26
- Message
27
  )
28
 
29
 
@@ -505,6 +511,132 @@ async def get_conversation_stats(
505
  )
506
 
507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  # ============================================================================
509
  # HEALTH CHECK
510
  # ============================================================================
 
1
+ # ============================================================================
2
+ # backend/app/api/v1/conversation_routes.py
3
+ # ============================================================================
4
+
5
+
6
+
7
  """
8
  Conversation & Chat API Endpoints (UNIFIED)
9
 
 
29
  UpdateConversationRequest,
30
  ConversationResponse,
31
  ConversationListResult,
32
+ ReactToMessageRequest # 🆕 NEW
33
  )
34
 
35
 
 
511
  )
512
 
513
 
514
+ # ========================================================================
515
+ # 🆕 NEW ENDPOINTS - Add at bottom before health check
516
+ # ========================================================================
517
+
518
+ @router.post("/conversation/{conversation_id}/message/{message_index}/react")
519
+ async def react_to_message(
520
+ conversation_id: str,
521
+ message_index: int,
522
+ request: ReactToMessageRequest,
523
+ current_user: TokenData = Depends(get_current_user)
524
+ ):
525
+ """
526
+ 👍👎 React to a message.
527
+
528
+ - message_index: Index of message in conversation (0-based)
529
+ - reaction: 'like' or 'dislike'
530
+
531
+ Replaces existing reaction if user reacts again.
532
+ User must own the conversation.
533
+ """
534
+ try:
535
+ # Get conversation
536
+ conversation = await conversation_service.get_conversation(
537
+ conversation_id=conversation_id,
538
+ user_id=current_user.user_id
539
+ )
540
+
541
+ if not conversation:
542
+ raise HTTPException(
543
+ status_code=status.HTTP_404_NOT_FOUND,
544
+ detail="Conversation not found or access denied"
545
+ )
546
+
547
+ # Validate message index
548
+ if message_index < 0 or message_index >= len(conversation.messages):
549
+ raise HTTPException(
550
+ status_code=status.HTTP_400_BAD_REQUEST,
551
+ detail=f"Invalid message index. Conversation has {len(conversation.messages)} messages."
552
+ )
553
+
554
+ # Can only react to assistant messages
555
+ message = conversation.messages[message_index]
556
+ if message.role != 'assistant':
557
+ raise HTTPException(
558
+ status_code=status.HTTP_400_BAD_REQUEST,
559
+ detail="Can only react to assistant messages"
560
+ )
561
+
562
+ # Update reaction in MongoDB
563
+ await conversation_repository.update_message_reaction(
564
+ conversation_id=conversation_id,
565
+ message_index=message_index,
566
+ reaction=request.reaction
567
+ )
568
+
569
+ return {
570
+ "message": "Reaction updated successfully",
571
+ "conversation_id": conversation_id,
572
+ "message_index": message_index,
573
+ "reaction": request.reaction
574
+ }
575
+
576
+ except HTTPException:
577
+ raise
578
+ except Exception as e:
579
+ print(f"❌ React to message error: {e}")
580
+ raise HTTPException(
581
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
582
+ detail=f"Failed to update reaction: {str(e)}"
583
+ )
584
+
585
+
586
+ @router.delete("/conversation/{conversation_id}/message/{message_index}/react")
587
+ async def remove_reaction(
588
+ conversation_id: str,
589
+ message_index: int,
590
+ current_user: TokenData = Depends(get_current_user)
591
+ ):
592
+ """
593
+ ❌ Remove reaction from a message.
594
+
595
+ User must own the conversation.
596
+ """
597
+ try:
598
+ # Get conversation
599
+ conversation = await conversation_service.get_conversation(
600
+ conversation_id=conversation_id,
601
+ user_id=current_user.user_id
602
+ )
603
+
604
+ if not conversation:
605
+ raise HTTPException(
606
+ status_code=status.HTTP_404_NOT_FOUND,
607
+ detail="Conversation not found or access denied"
608
+ )
609
+
610
+ # Validate message index
611
+ if message_index < 0 or message_index >= len(conversation.messages):
612
+ raise HTTPException(
613
+ status_code=status.HTTP_400_BAD_REQUEST,
614
+ detail=f"Invalid message index"
615
+ )
616
+
617
+ # Remove reaction in MongoDB
618
+ await conversation_repository.update_message_reaction(
619
+ conversation_id=conversation_id,
620
+ message_index=message_index,
621
+ reaction=None # Remove reaction
622
+ )
623
+
624
+ return {
625
+ "message": "Reaction removed successfully",
626
+ "conversation_id": conversation_id,
627
+ "message_index": message_index
628
+ }
629
+
630
+ except HTTPException:
631
+ raise
632
+ except Exception as e:
633
+ print(f"❌ Remove reaction error: {e}")
634
+ raise HTTPException(
635
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
636
+ detail=f"Failed to remove reaction: {str(e)}"
637
+ )
638
+
639
+
640
  # ============================================================================
641
  # HEALTH CHECK
642
  # ============================================================================
app/api/v1/file_routes.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, Depends, HTTPException
2
+ from typing import Dict, Any
3
+ from app.services.file_service import file_service
4
+ from app.utils.dependencies import get_current_user
5
+ from app.models.user import TokenData
6
+
7
+ router = APIRouter(prefix="/files", tags=["Files"])
8
+
9
+
10
+ @router.post("/upload/image", response_model=Dict[str, Any])
11
+ async def upload_image(
12
+ file: UploadFile = File(..., description="Image file (JPG, PNG, WEBP)"),
13
+ current_user: TokenData = Depends(get_current_user)
14
+ ):
15
+ """
16
+ 📷 Upload image with OCR text extraction.
17
+
18
+ - Extracts text from image using Tesseract OCR
19
+ - Saves file to user's folder
20
+ - Max size: 10MB
21
+ """
22
+ try:
23
+ result = await file_service.process_image(file, current_user.user_id)
24
+ return {"success": True, "data": result}
25
+ except HTTPException:
26
+ raise
27
+ except Exception as e:
28
+ raise HTTPException(500, f"Image upload failed: {str(e)}")
29
+
30
+
31
+ @router.post("/upload/pdf", response_model=Dict[str, Any])
32
+ async def upload_pdf(
33
+ file: UploadFile = File(..., description="PDF document"),
34
+ current_user: TokenData = Depends(get_current_user)
35
+ ):
36
+ """
37
+ 📄 Upload PDF with text extraction.
38
+
39
+ - Extracts all text from PDF pages
40
+ - Returns page count
41
+ - Max size: 10MB
42
+ """
43
+ try:
44
+ result = await file_service.process_pdf(file, current_user.user_id)
45
+ return {"success": True, "data": result}
46
+ except HTTPException:
47
+ raise
48
+ except Exception as e:
49
+ raise HTTPException(500, f"PDF upload failed: {str(e)}")
50
+
51
+
52
+ @router.post("/upload/document", response_model=Dict[str, Any])
53
+ async def upload_document(
54
+ file: UploadFile = File(..., description="DOCX or TXT file"),
55
+ current_user: TokenData = Depends(get_current_user)
56
+ ):
57
+ """
58
+ 📝 Upload DOCX or TXT document.
59
+
60
+ - Extracts text content
61
+ - Supports DOCX and TXT formats
62
+ - Max size: 10MB
63
+ """
64
+ try:
65
+ if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
66
+ result = await file_service.process_docx(file, current_user.user_id)
67
+ elif file.content_type == "text/plain":
68
+ result = await file_service.process_text_file(file, current_user.user_id)
69
+ else:
70
+ raise HTTPException(400, "Unsupported document type. Use DOCX or TXT.")
71
+
72
+ return {"success": True, "data": result}
73
+ except HTTPException:
74
+ raise
75
+ except Exception as e:
76
+ raise HTTPException(500, f"Document upload failed: {str(e)}")
77
+
78
+
79
+ @router.post("/upload/audio", response_model=Dict[str, Any])
80
+ async def upload_audio(
81
+ file: UploadFile = File(..., description="Audio file (MP3, WAV, WEBM, OGG, M4A)"),
82
+ current_user: TokenData = Depends(get_current_user)
83
+ ):
84
+ """
85
+ 🎤 Transcribe audio to text using OpenAI Whisper.
86
+
87
+ - Supports MP3, WAV, WEBM, OGG, M4A
88
+ - Returns full transcription
89
+ - Max size: 10MB
90
+ - Requires OPENAI_API_KEY in environment
91
+ """
92
+ try:
93
+ result = await file_service.transcribe_audio(file, current_user.user_id)
94
+ return {"success": True, "data": result}
95
+ except HTTPException:
96
+ raise
97
+ except Exception as e:
98
+ raise HTTPException(500, f"Audio transcription failed: {str(e)}")
99
+
100
+
101
+ @router.delete("/delete")
102
+ async def delete_file(
103
+ file_path: str,
104
+ current_user: TokenData = Depends(get_current_user)
105
+ ):
106
+ """
107
+ 🗑️ Delete uploaded file.
108
+
109
+ - Requires file_path (relative path from upload dir)
110
+ - User can only delete their own files
111
+ """
112
+ try:
113
+ success = file_service.delete_file(file_path, current_user.user_id)
114
+ if not success:
115
+ raise HTTPException(404, "File not found or access denied")
116
+ return {"success": True, "message": "File deleted successfully"}
117
+ except HTTPException:
118
+ raise
119
+ except Exception as e:
120
+ raise HTTPException(500, f"File deletion failed: {str(e)}")
121
+
122
+
123
+ @router.get("/health")
124
+ async def file_service_health():
125
+ """🏥 Health check for file service"""
126
+ return {
127
+ "status": "healthy",
128
+ "service": "file_upload",
129
+ "supported_formats": {
130
+ "images": ["JPG", "PNG", "WEBP"],
131
+ "documents": ["PDF", "DOCX", "TXT"],
132
+ "audio": ["MP3", "WAV", "WEBM", "OGG", "M4A"]
133
+ }
134
+ }
app/config.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  """
2
  Application Configuration
3
  Settings for Banking RAG Chatbot with JWT Authentication
@@ -49,6 +54,15 @@ class Settings:
49
  # Model names for Groq (using correct GroqCloud naming)
50
  GROQ_CHAT_MODEL: str = os.getenv("GROQ_CHAT_MODEL", "llama-3.1-8b-instant") # For chat interface
51
  GROQ_EVAL_MODEL: str = os.getenv("GROQ_EVAL_MODEL", "llama-3.3-70b-versatile") # For evaluation
 
 
 
 
 
 
 
 
 
52
 
53
  # ========================================================================
54
  # Commented as of now, can be re-enabled if rate limiting is needed
 
1
+ # ============================================================================
2
+ # backend/app/config.py
3
+ # ============================================================================
4
+
5
+
6
  """
7
  Application Configuration
8
  Settings for Banking RAG Chatbot with JWT Authentication
 
54
  # Model names for Groq (using correct GroqCloud naming)
55
  GROQ_CHAT_MODEL: str = os.getenv("GROQ_CHAT_MODEL", "llama-3.1-8b-instant") # For chat interface
56
  GROQ_EVAL_MODEL: str = os.getenv("GROQ_EVAL_MODEL", "llama-3.3-70b-versatile") # For evaluation
57
+
58
+ # ========================================================================
59
+ # FILE UPLOAD SETTINGS
60
+ # ========================================================================
61
+ UPLOAD_DIR: str = os.getenv("UPLOAD_DIR", "./uploads")
62
+ MAX_UPLOAD_SIZE: int = 10 * 1024 * 1024 # 10MB
63
+
64
+ # OpenAI Whisper API (for audio transcription)
65
+ # OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
66
 
67
  # ========================================================================
68
  # Commented as of now, can be re-enabled if rate limiting is needed
app/db/repositories/backup_conversation_repository.py ADDED
@@ -0,0 +1,995 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # """
2
+ # Conversation Repository - MongoDB CRUD operations
3
+ # Handles storing and retrieving conversations from MongoDB Atlas
4
+
5
+ # Repository Pattern: Separates database logic from business logic
6
+ # This makes code cleaner and easier to test
7
+
8
+ # Collections:
9
+ # - conversations: Stores complete conversations with messages
10
+ # - retrieval_logs: Logs each retrieval operation (for RL training data)
11
+ # """
12
+
13
+ # import uuid
14
+ # from datetime import datetime
15
+ # from typing import List, Dict, Optional
16
+ # from bson import ObjectId
17
+
18
+ # from app.db.mongodb import get_database
19
+
20
+
21
+ # # ============================================================================
22
+ # # CONVERSATION REPOSITORY
23
+ # # ============================================================================
24
+
25
+ # class ConversationRepository:
26
+ # """
27
+ # Repository for conversation data in MongoDB.
28
+
29
+ # Provides CRUD operations for:
30
+ # 1. Conversations (user chat sessions)
31
+ # 2. Retrieval logs (for RL training and analytics)
32
+ # """
33
+
34
+ # def __init__(self):
35
+ # """
36
+ # Initialize repository with database connection.
37
+
38
+ # Gracefully handles case where MongoDB is not connected.
39
+ # """
40
+ # self.db = get_database()
41
+
42
+ # # Graceful handling if MongoDB not connected
43
+ # if self.db is None:
44
+ # print("⚠️ ConversationRepository: MongoDB not connected")
45
+ # print(" Repository will not function until database is connected")
46
+ # self.conversations = None
47
+ # self.retrieval_logs = None
48
+ # else:
49
+ # self.conversations = self.db["conversations"]
50
+ # self.retrieval_logs = self.db["retrieval_logs"]
51
+ # print("✅ ConversationRepository initialized with MongoDB")
52
+
53
+ # def _check_connection(self):
54
+ # """
55
+ # Check if MongoDB is connected.
56
+
57
+ # Raises:
58
+ # RuntimeError: If MongoDB is not connected
59
+ # """
60
+ # if self.db is None or self.conversations is None:
61
+ # raise RuntimeError(
62
+ # "MongoDB not connected. Cannot perform database operations. "
63
+ # "Check MONGODB_URI in .env file."
64
+ # )
65
+
66
+ # # ========================================================================
67
+ # # CONVERSATION CRUD OPERATIONS
68
+ # # ========================================================================
69
+
70
+ # async def create_conversation(
71
+ # self,
72
+ # user_id: str,
73
+ # conversation_id: Optional[str] = None
74
+ # ) -> str:
75
+ # """
76
+ # Create a new conversation.
77
+
78
+ # Args:
79
+ # user_id: User ID who owns this conversation
80
+ # conversation_id: Optional custom conversation ID (auto-generated if None)
81
+
82
+ # Returns:
83
+ # str: Conversation ID
84
+
85
+ # Raises:
86
+ # RuntimeError: If MongoDB not connected
87
+ # """
88
+ # self._check_connection()
89
+
90
+ # if conversation_id is None:
91
+ # conversation_id = str(uuid.uuid4())
92
+
93
+ # conversation = {
94
+ # "conversation_id": conversation_id,
95
+ # "user_id": user_id,
96
+ # "messages": [], # Will store all messages
97
+ # "created_at": datetime.now(),
98
+ # "updated_at": datetime.now(),
99
+ # "status": "active" # active, archived, deleted
100
+ # }
101
+
102
+ # await self.conversations.insert_one(conversation)
103
+
104
+ # return conversation_id
105
+
106
+ # async def get_conversation(self, conversation_id: str) -> Optional[Dict]:
107
+ # """
108
+ # Get a conversation by ID.
109
+
110
+ # Args:
111
+ # conversation_id: Conversation ID
112
+
113
+ # Returns:
114
+ # dict or None: Conversation document
115
+
116
+ # Raises:
117
+ # RuntimeError: If MongoDB not connected
118
+ # """
119
+ # self._check_connection()
120
+
121
+ # conversation = await self.conversations.find_one(
122
+ # {"conversation_id": conversation_id}
123
+ # )
124
+
125
+ # # Convert MongoDB ObjectId to string for JSON serialization
126
+ # if conversation and "_id" in conversation:
127
+ # conversation["_id"] = str(conversation["_id"])
128
+
129
+ # return conversation
130
+
131
+ # # async def get_user_conversations(
132
+ # # self,
133
+ # # user_id: str,
134
+ # # limit: int = 10,
135
+ # # skip: int = 0
136
+ # # ) -> List[Dict]:
137
+ # # """
138
+ # # Get all conversations for a user.
139
+
140
+ # # Args:
141
+ # # user_id: User ID
142
+ # # limit: Maximum number of conversations to return
143
+ # # skip: Number of conversations to skip (for pagination)
144
+
145
+ # # Returns:
146
+ # # list: List of conversation documents
147
+
148
+ # # Raises:
149
+ # # RuntimeError: If MongoDB not connected
150
+ # # """
151
+ # # self._check_connection()
152
+
153
+ # # cursor = self.conversations.find(
154
+ # # {"user_id": user_id, "status": "active"}
155
+ # # ).sort("updated_at", -1).skip(skip).limit(limit)
156
+
157
+ # # conversations = await cursor.to_list(length=limit)
158
+
159
+ # # # Convert ObjectIds to strings
160
+ # # for conv in conversations:
161
+ # # if "_id" in conv:
162
+ # # conv["_id"] = str(conv["_id"])
163
+
164
+ # # return conversations
165
+ # async def get_user_conversations(
166
+ # self,
167
+ # user_id: str,
168
+ # limit: int = 10,
169
+ # skip: int = 0
170
+ # ) -> List[Dict]:
171
+ # """Get all conversations for a user."""
172
+ # # Gracefully return empty list if not connected
173
+ # if self.db is None or self.conversations is None:
174
+ # print("⚠️ MongoDB not connected - returning empty conversations list")
175
+ # return []
176
+
177
+ # cursor = self.conversations.find(
178
+ # {"user_id": user_id, "status": "active"}
179
+ # ).sort("updated_at", -1).skip(skip).limit(limit)
180
+
181
+ # conversations = await cursor.to_list(length=limit)
182
+
183
+ # # Convert ObjectIds to strings
184
+ # for conv in conversations:
185
+ # if "_id" in conv:
186
+ # conv["_id"] = str(conv["_id"])
187
+
188
+ # return conversations
189
+
190
+
191
+ # async def add_message(
192
+ # self,
193
+ # conversation_id: str,
194
+ # message: Dict
195
+ # ) -> bool:
196
+ # """
197
+ # Add a message to a conversation.
198
+
199
+ # Args:
200
+ # conversation_id: Conversation ID
201
+ # message: Message dict
202
+ # {
203
+ # 'role': 'user' or 'assistant',
204
+ # 'content': str,
205
+ # 'timestamp': datetime,
206
+ # 'metadata': dict (optional - policy_action, docs_retrieved, etc.)
207
+ # }
208
+
209
+ # Returns:
210
+ # bool: Success status
211
+
212
+ # Raises:
213
+ # RuntimeError: If MongoDB not connected
214
+ # """
215
+ # self._check_connection()
216
+
217
+ # # Ensure timestamp exists
218
+ # if "timestamp" not in message:
219
+ # message["timestamp"] = datetime.now()
220
+
221
+ # # Add message to conversation
222
+ # result = await self.conversations.update_one(
223
+ # {"conversation_id": conversation_id},
224
+ # {
225
+ # "$push": {"messages": message},
226
+ # "$set": {"updated_at": datetime.now()}
227
+ # }
228
+ # )
229
+
230
+ # return result.modified_count > 0
231
+
232
+ # async def get_conversation_history(
233
+ # self,
234
+ # conversation_id: str,
235
+ # max_messages: int = None
236
+ # ) -> List[Dict]:
237
+ # """
238
+ # Get conversation history (messages only).
239
+
240
+ # Args:
241
+ # conversation_id: Conversation ID
242
+ # max_messages: Optional limit on number of messages
243
+
244
+ # Returns:
245
+ # list: List of messages
246
+
247
+ # Raises:
248
+ # RuntimeError: If MongoDB not connected
249
+ # """
250
+ # self._check_connection()
251
+
252
+ # conversation = await self.get_conversation(conversation_id)
253
+
254
+ # if not conversation:
255
+ # return []
256
+
257
+ # messages = conversation.get("messages", [])
258
+
259
+ # if max_messages:
260
+ # messages = messages[-max_messages:]
261
+
262
+ # return messages
263
+
264
+ # async def delete_conversation(self, conversation_id: str) -> bool:
265
+ # """
266
+ # Soft delete a conversation (mark as deleted, don't actually delete).
267
+
268
+ # Args:
269
+ # conversation_id: Conversation ID
270
+
271
+ # Returns:
272
+ # bool: Success status
273
+
274
+ # Raises:
275
+ # RuntimeError: If MongoDB not connected
276
+ # """
277
+ # self._check_connection()
278
+
279
+ # result = await self.conversations.update_one(
280
+ # {"conversation_id": conversation_id},
281
+ # {
282
+ # "$set": {
283
+ # "status": "deleted",
284
+ # "deleted_at": datetime.now()
285
+ # }
286
+ # }
287
+ # )
288
+
289
+ # return result.modified_count > 0
290
+
291
+ # # ========================================================================
292
+ # # RETRIEVAL LOGS (for RL training)
293
+ # # ========================================================================
294
+
295
+ # async def log_retrieval(
296
+ # self,
297
+ # log_data: Dict
298
+ # ) -> str:
299
+ # """
300
+ # Log a retrieval operation (for RL training and analysis).
301
+
302
+ # Args:
303
+ # log_data: Log data dict
304
+ # {
305
+ # 'conversation_id': str,
306
+ # 'user_id': str,
307
+ # 'query': str,
308
+ # 'policy_action': 'FETCH' or 'NO_FETCH',
309
+ # 'policy_confidence': float,
310
+ # 'documents_retrieved': int,
311
+ # 'top_doc_score': float or None,
312
+ # 'retrieved_docs_metadata': list,
313
+ # 'response': str,
314
+ # 'retrieval_time_ms': float,
315
+ # 'generation_time_ms': float,
316
+ # 'total_time_ms': float,
317
+ # 'timestamp': datetime
318
+ # }
319
+
320
+ # Returns:
321
+ # str: Log ID
322
+
323
+ # Raises:
324
+ # RuntimeError: If MongoDB not connected
325
+ # """
326
+ # self._check_connection()
327
+
328
+ # # Add timestamp if not present
329
+ # if "timestamp" not in log_data:
330
+ # log_data["timestamp"] = datetime.now()
331
+
332
+ # # Generate log ID
333
+ # log_id = str(uuid.uuid4())
334
+ # log_data["log_id"] = log_id
335
+
336
+ # # Insert log
337
+ # await self.retrieval_logs.insert_one(log_data)
338
+
339
+ # return log_id
340
+
341
+ # async def get_retrieval_logs(
342
+ # self,
343
+ # conversation_id: Optional[str] = None,
344
+ # user_id: Optional[str] = None,
345
+ # limit: int = 100,
346
+ # skip: int = 0
347
+ # ) -> List[Dict]:
348
+ # """
349
+ # Get retrieval logs (for analysis and RL training).
350
+
351
+ # Args:
352
+ # conversation_id: Optional filter by conversation
353
+ # user_id: Optional filter by user
354
+ # limit: Maximum number of logs
355
+ # skip: Number of logs to skip
356
+
357
+ # Returns:
358
+ # list: List of log documents
359
+
360
+ # Raises:
361
+ # RuntimeError: If MongoDB not connected
362
+ # """
363
+ # self._check_connection()
364
+
365
+ # # Build query
366
+ # query = {}
367
+ # if conversation_id:
368
+ # query["conversation_id"] = conversation_id
369
+ # if user_id:
370
+ # query["user_id"] = user_id
371
+
372
+ # # Fetch logs
373
+ # cursor = self.retrieval_logs.find(query).sort("timestamp", -1).skip(skip).limit(limit)
374
+ # logs = await cursor.to_list(length=limit)
375
+
376
+ # # Convert ObjectIds to strings
377
+ # for log in logs:
378
+ # if "_id" in log:
379
+ # log["_id"] = str(log["_id"])
380
+
381
+ # return logs
382
+
383
+ # async def get_logs_for_rl_training(
384
+ # self,
385
+ # min_date: Optional[datetime] = None,
386
+ # limit: int = 1000
387
+ # ) -> List[Dict]:
388
+ # """
389
+ # Get logs specifically for RL training.
390
+ # Filters for logs with both policy decision and retrieval results.
391
+
392
+ # Args:
393
+ # min_date: Optional minimum date for logs
394
+ # limit: Maximum number of logs
395
+
396
+ # Returns:
397
+ # list: List of log documents suitable for RL training
398
+
399
+ # Raises:
400
+ # RuntimeError: If MongoDB not connected
401
+ # """
402
+ # self._check_connection()
403
+
404
+ # # Build query
405
+ # query = {
406
+ # "policy_action": {"$exists": True},
407
+ # "response": {"$exists": True}
408
+ # }
409
+
410
+ # if min_date:
411
+ # query["timestamp"] = {"$gte": min_date}
412
+
413
+ # # Fetch logs
414
+ # cursor = self.retrieval_logs.find(query).sort("timestamp", -1).limit(limit)
415
+ # logs = await cursor.to_list(length=limit)
416
+
417
+ # # Convert ObjectIds
418
+ # for log in logs:
419
+ # if "_id" in log:
420
+ # log["_id"] = str(log["_id"])
421
+
422
+ # return logs
423
+
424
+ # # ========================================================================
425
+ # # ANALYTICS QUERIES
426
+ # # ========================================================================
427
+
428
+ # async def get_conversation_stats(self, user_id: str) -> Dict:
429
+ # """
430
+ # Get conversation statistics for a user.
431
+
432
+ # Args:
433
+ # user_id: User ID
434
+
435
+ # Returns:
436
+ # dict: Statistics
437
+
438
+ # Raises:
439
+ # RuntimeError: If MongoDB not connected
440
+ # """
441
+ # self._check_connection()
442
+
443
+ # # Count total conversations
444
+ # total_conversations = await self.conversations.count_documents({
445
+ # "user_id": user_id,
446
+ # "status": "active"
447
+ # })
448
+
449
+ # # Count total messages
450
+ # pipeline = [
451
+ # {"$match": {"user_id": user_id, "status": "active"}},
452
+ # {"$project": {"message_count": {"$size": "$messages"}}}
453
+ # ]
454
+
455
+ # result = await self.conversations.aggregate(pipeline).to_list(length=None)
456
+ # total_messages = sum(doc.get("message_count", 0) for doc in result)
457
+
458
+ # return {
459
+ # "total_conversations": total_conversations,
460
+ # "total_messages": total_messages,
461
+ # "avg_messages_per_conversation": total_messages / total_conversations if total_conversations > 0 else 0
462
+ # }
463
+
464
+ # async def get_policy_stats(self, user_id: Optional[str] = None) -> Dict:
465
+ # """
466
+ # Get policy decision statistics.
467
+
468
+ # Args:
469
+ # user_id: Optional user ID filter
470
+
471
+ # Returns:
472
+ # dict: Policy statistics
473
+
474
+ # Raises:
475
+ # RuntimeError: If MongoDB not connected
476
+ # """
477
+ # self._check_connection()
478
+
479
+ # # Build query
480
+ # query = {}
481
+ # if user_id:
482
+ # query["user_id"] = user_id
483
+
484
+ # # Count FETCH vs NO_FETCH
485
+ # fetch_count = await self.retrieval_logs.count_documents({
486
+ # **query,
487
+ # "policy_action": "FETCH"
488
+ # })
489
+
490
+ # no_fetch_count = await self.retrieval_logs.count_documents({
491
+ # **query,
492
+ # "policy_action": "NO_FETCH"
493
+ # })
494
+
495
+ # total = fetch_count + no_fetch_count
496
+
497
+ # return {
498
+ # "fetch_count": fetch_count,
499
+ # "no_fetch_count": no_fetch_count,
500
+ # "total": total,
501
+ # "fetch_rate": fetch_count / total if total > 0 else 0,
502
+ # "no_fetch_rate": no_fetch_count / total if total > 0 else 0
503
+ # }
504
+
505
+
506
+ # # ============================================================================
507
+ # # USAGE EXAMPLE (for reference)
508
+ # # ============================================================================
509
+ # """
510
+ # # In your service or API endpoint:
511
+
512
+ # from app.db.repositories.conversation_repository import ConversationRepository
513
+
514
+ # repo = ConversationRepository()
515
+
516
+ # # Create conversation
517
+ # conv_id = await repo.create_conversation(user_id="user_123")
518
+
519
+ # # Add user message
520
+ # await repo.add_message(conv_id, {
521
+ # 'role': 'user',
522
+ # 'content': 'What is my balance?',
523
+ # 'timestamp': datetime.now()
524
+ # })
525
+
526
+ # # Add assistant message
527
+ # await repo.add_message(conv_id, {
528
+ # 'role': 'assistant',
529
+ # 'content': 'Your balance is $1000',
530
+ # 'timestamp': datetime.now(),
531
+ # 'metadata': {
532
+ # 'policy_action': 'FETCH',
533
+ # 'documents_retrieved': 3
534
+ # }
535
+ # })
536
+
537
+ # # Get conversation history
538
+ # history = await repo.get_conversation_history(conv_id)
539
+
540
+ # # Log retrieval for RL training
541
+ # await repo.log_retrieval({
542
+ # 'conversation_id': conv_id,
543
+ # 'user_id': 'user_123',
544
+ # 'query': 'What is my balance?',
545
+ # 'policy_action': 'FETCH',
546
+ # 'documents_retrieved': 3,
547
+ # 'response': 'Your balance is $1000'
548
+ # })
549
+ # """
550
+
551
+
552
+
553
+
554
+
555
+
556
+
557
+ # """
558
+ # Conversation Repository - MongoDB CRUD operations
559
+ # Handles storing and retrieving conversations from MongoDB Atlas
560
+
561
+ # Repository Pattern: Separates database logic from business logic
562
+ # This makes code cleaner and easier to test
563
+ # """
564
+
565
+ # import uuid
566
+ # from datetime import datetime
567
+ # from typing import List, Dict, Optional
568
+ # from bson import ObjectId
569
+
570
+ # from app.db.mongodb import get_database
571
+
572
+
573
+ # # ============================================================================
574
+ # # CONVERSATION REPOSITORY
575
+ # # ============================================================================
576
+
577
+ # class ConversationRepository:
578
+ # """
579
+ # Repository for conversation data in MongoDB.
580
+
581
+ # Collections used:
582
+ # - conversations: Stores complete conversations with messages
583
+ # - retrieval_logs: Logs each retrieval operation (for RL training)
584
+ # """
585
+
586
+ # def __init__(self):
587
+ # """Initialize repository with database connection"""
588
+ # self.db = get_database()
589
+ # self.conversations = self.db["conversations"]
590
+ # self.retrieval_logs = self.db["retrieval_logs"]
591
+
592
+ # # ========================================================================
593
+ # # CONVERSATION CRUD OPERATIONS
594
+ # # ========================================================================
595
+
596
+ # async def create_conversation(
597
+ # self,
598
+ # user_id: str,
599
+ # conversation_id: Optional[str] = None
600
+ # ) -> str:
601
+ # """
602
+ # Create a new conversation.
603
+
604
+ # Args:
605
+ # user_id: User ID who owns this conversation
606
+ # conversation_id: Optional custom conversation ID (auto-generated if None)
607
+
608
+ # Returns:
609
+ # str: Conversation ID
610
+ # """
611
+ # if conversation_id is None:
612
+ # conversation_id = str(uuid.uuid4())
613
+
614
+ # conversation = {
615
+ # "conversation_id": conversation_id,
616
+ # "user_id": user_id,
617
+ # "messages": [], # Will store all messages
618
+ # "created_at": datetime.now(),
619
+ # "updated_at": datetime.now(),
620
+ # "status": "active" # active, archived, deleted
621
+ # }
622
+
623
+ # await self.conversations.insert_one(conversation)
624
+
625
+ # return conversation_id
626
+
627
+ # async def get_conversation(self, conversation_id: str) -> Optional[Dict]:
628
+ # """
629
+ # Get a conversation by ID.
630
+
631
+ # Args:
632
+ # conversation_id: Conversation ID
633
+
634
+ # Returns:
635
+ # dict or None: Conversation document
636
+ # """
637
+ # conversation = await self.conversations.find_one(
638
+ # {"conversation_id": conversation_id}
639
+ # )
640
+
641
+ # # Convert MongoDB ObjectId to string for JSON serialization
642
+ # if conversation and "_id" in conversation:
643
+ # conversation["_id"] = str(conversation["_id"])
644
+
645
+ # return conversation
646
+
647
+ # async def get_user_conversations(
648
+ # self,
649
+ # user_id: str,
650
+ # limit: int = 10,
651
+ # skip: int = 0
652
+ # ) -> List[Dict]:
653
+ # """
654
+ # Get all conversations for a user.
655
+
656
+ # Args:
657
+ # user_id: User ID
658
+ # limit: Maximum number of conversations to return
659
+ # skip: Number of conversations to skip (for pagination)
660
+
661
+ # Returns:
662
+ # list: List of conversation documents
663
+ # """
664
+ # cursor = self.conversations.find(
665
+ # {"user_id": user_id, "status": "active"}
666
+ # ).sort("updated_at", -1).skip(skip).limit(limit)
667
+
668
+ # conversations = await cursor.to_list(length=limit)
669
+
670
+ # # Convert ObjectIds to strings
671
+ # for conv in conversations:
672
+ # if "_id" in conv:
673
+ # conv["_id"] = str(conv["_id"])
674
+
675
+ # return conversations
676
+
677
+ # async def add_message(
678
+ # self,
679
+ # conversation_id: str,
680
+ # message: Dict
681
+ # ) -> bool:
682
+ # """
683
+ # Add a message to a conversation.
684
+
685
+ # Args:
686
+ # conversation_id: Conversation ID
687
+ # message: Message dict
688
+ # {
689
+ # 'role': 'user' or 'assistant',
690
+ # 'content': str,
691
+ # 'timestamp': datetime,
692
+ # 'metadata': dict (optional - policy_action, docs_retrieved, etc.)
693
+ # }
694
+
695
+ # Returns:
696
+ # bool: Success status
697
+ # """
698
+ # # Ensure timestamp exists
699
+ # if "timestamp" not in message:
700
+ # message["timestamp"] = datetime.now()
701
+
702
+ # # Add message to conversation
703
+ # result = await self.conversations.update_one(
704
+ # {"conversation_id": conversation_id},
705
+ # {
706
+ # "$push": {"messages": message},
707
+ # "$set": {"updated_at": datetime.now()}
708
+ # }
709
+ # )
710
+
711
+ # return result.modified_count > 0
712
+
713
+ # async def get_conversation_history(
714
+ # self,
715
+ # conversation_id: str,
716
+ # max_messages: int = None
717
+ # ) -> List[Dict]:
718
+ # """
719
+ # Get conversation history (messages only).
720
+
721
+ # Args:
722
+ # conversation_id: Conversation ID
723
+ # max_messages: Optional limit on number of messages
724
+
725
+ # Returns:
726
+ # list: List of messages
727
+ # """
728
+ # conversation = await self.get_conversation(conversation_id)
729
+
730
+ # if not conversation:
731
+ # return []
732
+
733
+ # messages = conversation.get("messages", [])
734
+
735
+ # if max_messages:
736
+ # messages = messages[-max_messages:]
737
+
738
+ # return messages
739
+
740
+ # async def delete_conversation(self, conversation_id: str) -> bool:
741
+ # """
742
+ # Soft delete a conversation (mark as deleted, don't actually delete).
743
+
744
+ # Args:
745
+ # conversation_id: Conversation ID
746
+
747
+ # Returns:
748
+ # bool: Success status
749
+ # """
750
+ # result = await self.conversations.update_one(
751
+ # {"conversation_id": conversation_id},
752
+ # {
753
+ # "$set": {
754
+ # "status": "deleted",
755
+ # "deleted_at": datetime.now()
756
+ # }
757
+ # }
758
+ # )
759
+
760
+ # return result.modified_count > 0
761
+
762
+ # # ========================================================================
763
+ # # RETRIEVAL LOGS (for RL training)
764
+ # # ========================================================================
765
+
766
+ # async def log_retrieval(
767
+ # self,
768
+ # log_data: Dict
769
+ # ) -> str:
770
+ # """
771
+ # Log a retrieval operation (for RL training and analysis).
772
+
773
+ # Args:
774
+ # log_data: Log data dict
775
+ # {
776
+ # 'conversation_id': str,
777
+ # 'user_id': str,
778
+ # 'query': str,
779
+ # 'policy_action': 'FETCH' or 'NO_FETCH',
780
+ # 'policy_confidence': float,
781
+ # 'documents_retrieved': int,
782
+ # 'top_doc_score': float or None,
783
+ # 'retrieved_docs_metadata': list,
784
+ # 'response': str,
785
+ # 'retrieval_time_ms': float,
786
+ # 'generation_time_ms': float,
787
+ # 'total_time_ms': float,
788
+ # 'timestamp': datetime
789
+ # }
790
+
791
+ # Returns:
792
+ # str: Log ID
793
+ # """
794
+ # # Add timestamp if not present
795
+ # if "timestamp" not in log_data:
796
+ # log_data["timestamp"] = datetime.now()
797
+
798
+ # # Generate log ID
799
+ # log_id = str(uuid.uuid4())
800
+ # log_data["log_id"] = log_id
801
+
802
+ # # Insert log
803
+ # await self.retrieval_logs.insert_one(log_data)
804
+
805
+ # return log_id
806
+
807
+ # async def get_retrieval_logs(
808
+ # self,
809
+ # conversation_id: Optional[str] = None,
810
+ # user_id: Optional[str] = None,
811
+ # limit: int = 100,
812
+ # skip: int = 0
813
+ # ) -> List[Dict]:
814
+ # """
815
+ # Get retrieval logs (for analysis and RL training).
816
+
817
+ # Args:
818
+ # conversation_id: Optional filter by conversation
819
+ # user_id: Optional filter by user
820
+ # limit: Maximum number of logs
821
+ # skip: Number of logs to skip
822
+
823
+ # Returns:
824
+ # list: List of log documents
825
+ # """
826
+ # # Build query
827
+ # query = {}
828
+ # if conversation_id:
829
+ # query["conversation_id"] = conversation_id
830
+ # if user_id:
831
+ # query["user_id"] = user_id
832
+
833
+ # # Fetch logs
834
+ # cursor = self.retrieval_logs.find(query).sort("timestamp", -1).skip(skip).limit(limit)
835
+ # logs = await cursor.to_list(length=limit)
836
+
837
+ # # Convert ObjectIds to strings
838
+ # for log in logs:
839
+ # if "_id" in log:
840
+ # log["_id"] = str(log["_id"])
841
+
842
+ # return logs
843
+
844
+ # async def get_logs_for_rl_training(
845
+ # self,
846
+ # min_date: Optional[datetime] = None,
847
+ # limit: int = 1000
848
+ # ) -> List[Dict]:
849
+ # """
850
+ # Get logs specifically for RL training.
851
+ # Filters for logs with both policy decision and retrieval results.
852
+
853
+ # Args:
854
+ # min_date: Optional minimum date for logs
855
+ # limit: Maximum number of logs
856
+
857
+ # Returns:
858
+ # list: List of log documents suitable for RL training
859
+ # """
860
+ # # Build query
861
+ # query = {
862
+ # "policy_action": {"$exists": True},
863
+ # "response": {"$exists": True}
864
+ # }
865
+
866
+ # if min_date:
867
+ # query["timestamp"] = {"$gte": min_date}
868
+
869
+ # # Fetch logs
870
+ # cursor = self.retrieval_logs.find(query).sort("timestamp", -1).limit(limit)
871
+ # logs = await cursor.to_list(length=limit)
872
+
873
+ # # Convert ObjectIds
874
+ # for log in logs:
875
+ # if "_id" in log:
876
+ # log["_id"] = str(log["_id"])
877
+
878
+ # return logs
879
+
880
+ # # ========================================================================
881
+ # # ANALYTICS QUERIES
882
+ # # ========================================================================
883
+
884
+ # async def get_conversation_stats(self, user_id: str) -> Dict:
885
+ # """
886
+ # Get conversation statistics for a user.
887
+
888
+ # Args:
889
+ # user_id: User ID
890
+
891
+ # Returns:
892
+ # dict: Statistics
893
+ # """
894
+ # # Count total conversations
895
+ # total_conversations = await self.conversations.count_documents({
896
+ # "user_id": user_id,
897
+ # "status": "active"
898
+ # })
899
+
900
+ # # Count total messages
901
+ # pipeline = [
902
+ # {"$match": {"user_id": user_id, "status": "active"}},
903
+ # {"$project": {"message_count": {"$size": "$messages"}}}
904
+ # ]
905
+
906
+ # result = await self.conversations.aggregate(pipeline).to_list(length=None)
907
+ # total_messages = sum(doc.get("message_count", 0) for doc in result)
908
+
909
+ # return {
910
+ # "total_conversations": total_conversations,
911
+ # "total_messages": total_messages,
912
+ # "avg_messages_per_conversation": total_messages / total_conversations if total_conversations > 0 else 0
913
+ # }
914
+
915
+ # async def get_policy_stats(self, user_id: Optional[str] = None) -> Dict:
916
+ # """
917
+ # Get policy decision statistics.
918
+
919
+ # Args:
920
+ # user_id: Optional user ID filter
921
+
922
+ # Returns:
923
+ # dict: Policy statistics
924
+ # """
925
+ # # Build query
926
+ # query = {}
927
+ # if user_id:
928
+ # query["user_id"] = user_id
929
+
930
+ # # Count FETCH vs NO_FETCH
931
+ # fetch_count = await self.retrieval_logs.count_documents({
932
+ # **query,
933
+ # "policy_action": "FETCH"
934
+ # })
935
+
936
+ # no_fetch_count = await self.retrieval_logs.count_documents({
937
+ # **query,
938
+ # "policy_action": "NO_FETCH"
939
+ # })
940
+
941
+ # total = fetch_count + no_fetch_count
942
+
943
+ # return {
944
+ # "fetch_count": fetch_count,
945
+ # "no_fetch_count": no_fetch_count,
946
+ # "total": total,
947
+ # "fetch_rate": fetch_count / total if total > 0 else 0,
948
+ # "no_fetch_rate": no_fetch_count / total if total > 0 else 0
949
+ # }
950
+
951
+
952
+ # # ============================================================================
953
+ # # USAGE EXAMPLE (for reference)
954
+ # # ============================================================================
955
+ # """
956
+ # # In your service or API endpoint:
957
+
958
+ # from app.db.repositories.conversation_repository import ConversationRepository
959
+
960
+ # repo = ConversationRepository()
961
+
962
+ # # Create conversation
963
+ # conv_id = await repo.create_conversation(user_id="user_123")
964
+
965
+ # # Add user message
966
+ # await repo.add_message(conv_id, {
967
+ # 'role': 'user',
968
+ # 'content': 'What is my balance?',
969
+ # 'timestamp': datetime.now()
970
+ # })
971
+
972
+ # # Add assistant message
973
+ # await repo.add_message(conv_id, {
974
+ # 'role': 'assistant',
975
+ # 'content': 'Your balance is $1000',
976
+ # 'timestamp': datetime.now(),
977
+ # 'metadata': {
978
+ # 'policy_action': 'FETCH',
979
+ # 'documents_retrieved': 3
980
+ # }
981
+ # })
982
+
983
+ # # Get conversation history
984
+ # history = await repo.get_conversation_history(conv_id)
985
+
986
+ # # Log retrieval for RL training
987
+ # await repo.log_retrieval({
988
+ # 'conversation_id': conv_id,
989
+ # 'user_id': 'user_123',
990
+ # 'query': 'What is my balance?',
991
+ # 'policy_action': 'FETCH',
992
+ # 'documents_retrieved': 3,
993
+ # 'response': 'Your balance is $1000'
994
+ # })
995
+ # """
app/db/repositories/conversation_repository.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  """
2
  Conversation Repository - MongoDB Operations (UPDATED)
3
 
@@ -16,7 +21,6 @@ from pymongo import DESCENDING, ASCENDING
16
  from app.db.mongodb import get_database
17
  from app.models.conversation import (
18
  Conversation,
19
- Message,
20
  ConversationListResponse,
21
  ConversationListResult
22
  )
@@ -576,1007 +580,56 @@ class ConversationRepository:
576
 
577
  except Exception as e:
578
  print(f"⚠️ Failed to create indexes: {e}")
 
 
 
 
579
 
580
-
581
- # ============================================================================
582
- # GLOBAL REPOSITORY INSTANCE
583
- # ============================================================================
584
-
585
- conversation_repository = ConversationRepository()
586
-
587
-
588
- # """
589
- # Conversation Repository - MongoDB CRUD operations
590
- # Handles storing and retrieving conversations from MongoDB Atlas
591
-
592
- # Repository Pattern: Separates database logic from business logic
593
- # This makes code cleaner and easier to test
594
-
595
- # Collections:
596
- # - conversations: Stores complete conversations with messages
597
- # - retrieval_logs: Logs each retrieval operation (for RL training data)
598
- # """
599
-
600
- # import uuid
601
- # from datetime import datetime
602
- # from typing import List, Dict, Optional
603
- # from bson import ObjectId
604
-
605
- # from app.db.mongodb import get_database
606
-
607
-
608
- # # ============================================================================
609
- # # CONVERSATION REPOSITORY
610
- # # ============================================================================
611
-
612
- # class ConversationRepository:
613
- # """
614
- # Repository for conversation data in MongoDB.
615
-
616
- # Provides CRUD operations for:
617
- # 1. Conversations (user chat sessions)
618
- # 2. Retrieval logs (for RL training and analytics)
619
- # """
620
-
621
- # def __init__(self):
622
- # """
623
- # Initialize repository with database connection.
624
-
625
- # Gracefully handles case where MongoDB is not connected.
626
- # """
627
- # self.db = get_database()
628
-
629
- # # Graceful handling if MongoDB not connected
630
- # if self.db is None:
631
- # print("⚠️ ConversationRepository: MongoDB not connected")
632
- # print(" Repository will not function until database is connected")
633
- # self.conversations = None
634
- # self.retrieval_logs = None
635
- # else:
636
- # self.conversations = self.db["conversations"]
637
- # self.retrieval_logs = self.db["retrieval_logs"]
638
- # print("✅ ConversationRepository initialized with MongoDB")
639
-
640
- # def _check_connection(self):
641
- # """
642
- # Check if MongoDB is connected.
643
-
644
- # Raises:
645
- # RuntimeError: If MongoDB is not connected
646
- # """
647
- # if self.db is None or self.conversations is None:
648
- # raise RuntimeError(
649
- # "MongoDB not connected. Cannot perform database operations. "
650
- # "Check MONGODB_URI in .env file."
651
- # )
652
-
653
- # # ========================================================================
654
- # # CONVERSATION CRUD OPERATIONS
655
- # # ========================================================================
656
-
657
- # async def create_conversation(
658
- # self,
659
- # user_id: str,
660
- # conversation_id: Optional[str] = None
661
- # ) -> str:
662
- # """
663
- # Create a new conversation.
664
-
665
- # Args:
666
- # user_id: User ID who owns this conversation
667
- # conversation_id: Optional custom conversation ID (auto-generated if None)
668
-
669
- # Returns:
670
- # str: Conversation ID
671
-
672
- # Raises:
673
- # RuntimeError: If MongoDB not connected
674
- # """
675
- # self._check_connection()
676
-
677
- # if conversation_id is None:
678
- # conversation_id = str(uuid.uuid4())
679
-
680
- # conversation = {
681
- # "conversation_id": conversation_id,
682
- # "user_id": user_id,
683
- # "messages": [], # Will store all messages
684
- # "created_at": datetime.now(),
685
- # "updated_at": datetime.now(),
686
- # "status": "active" # active, archived, deleted
687
- # }
688
-
689
- # await self.conversations.insert_one(conversation)
690
-
691
- # return conversation_id
692
-
693
- # async def get_conversation(self, conversation_id: str) -> Optional[Dict]:
694
- # """
695
- # Get a conversation by ID.
696
-
697
- # Args:
698
- # conversation_id: Conversation ID
699
-
700
- # Returns:
701
- # dict or None: Conversation document
702
-
703
- # Raises:
704
- # RuntimeError: If MongoDB not connected
705
- # """
706
- # self._check_connection()
707
-
708
- # conversation = await self.conversations.find_one(
709
- # {"conversation_id": conversation_id}
710
- # )
711
-
712
- # # Convert MongoDB ObjectId to string for JSON serialization
713
- # if conversation and "_id" in conversation:
714
- # conversation["_id"] = str(conversation["_id"])
715
-
716
- # return conversation
717
-
718
- # # async def get_user_conversations(
719
- # # self,
720
- # # user_id: str,
721
- # # limit: int = 10,
722
- # # skip: int = 0
723
- # # ) -> List[Dict]:
724
- # # """
725
- # # Get all conversations for a user.
726
-
727
- # # Args:
728
- # # user_id: User ID
729
- # # limit: Maximum number of conversations to return
730
- # # skip: Number of conversations to skip (for pagination)
731
-
732
- # # Returns:
733
- # # list: List of conversation documents
734
-
735
- # # Raises:
736
- # # RuntimeError: If MongoDB not connected
737
- # # """
738
- # # self._check_connection()
739
-
740
- # # cursor = self.conversations.find(
741
- # # {"user_id": user_id, "status": "active"}
742
- # # ).sort("updated_at", -1).skip(skip).limit(limit)
743
-
744
- # # conversations = await cursor.to_list(length=limit)
745
-
746
- # # # Convert ObjectIds to strings
747
- # # for conv in conversations:
748
- # # if "_id" in conv:
749
- # # conv["_id"] = str(conv["_id"])
750
-
751
- # # return conversations
752
- # async def get_user_conversations(
753
- # self,
754
- # user_id: str,
755
- # limit: int = 10,
756
- # skip: int = 0
757
- # ) -> List[Dict]:
758
- # """Get all conversations for a user."""
759
- # # Gracefully return empty list if not connected
760
- # if self.db is None or self.conversations is None:
761
- # print("⚠️ MongoDB not connected - returning empty conversations list")
762
- # return []
763
-
764
- # cursor = self.conversations.find(
765
- # {"user_id": user_id, "status": "active"}
766
- # ).sort("updated_at", -1).skip(skip).limit(limit)
767
-
768
- # conversations = await cursor.to_list(length=limit)
769
-
770
- # # Convert ObjectIds to strings
771
- # for conv in conversations:
772
- # if "_id" in conv:
773
- # conv["_id"] = str(conv["_id"])
774
-
775
- # return conversations
776
-
777
-
778
- # async def add_message(
779
- # self,
780
- # conversation_id: str,
781
- # message: Dict
782
- # ) -> bool:
783
- # """
784
- # Add a message to a conversation.
785
-
786
- # Args:
787
- # conversation_id: Conversation ID
788
- # message: Message dict
789
- # {
790
- # 'role': 'user' or 'assistant',
791
- # 'content': str,
792
- # 'timestamp': datetime,
793
- # 'metadata': dict (optional - policy_action, docs_retrieved, etc.)
794
- # }
795
-
796
- # Returns:
797
- # bool: Success status
798
-
799
- # Raises:
800
- # RuntimeError: If MongoDB not connected
801
- # """
802
- # self._check_connection()
803
-
804
- # # Ensure timestamp exists
805
- # if "timestamp" not in message:
806
- # message["timestamp"] = datetime.now()
807
-
808
- # # Add message to conversation
809
- # result = await self.conversations.update_one(
810
- # {"conversation_id": conversation_id},
811
- # {
812
- # "$push": {"messages": message},
813
- # "$set": {"updated_at": datetime.now()}
814
- # }
815
- # )
816
-
817
- # return result.modified_count > 0
818
-
819
- # async def get_conversation_history(
820
- # self,
821
- # conversation_id: str,
822
- # max_messages: int = None
823
- # ) -> List[Dict]:
824
- # """
825
- # Get conversation history (messages only).
826
-
827
- # Args:
828
- # conversation_id: Conversation ID
829
- # max_messages: Optional limit on number of messages
830
-
831
- # Returns:
832
- # list: List of messages
833
-
834
- # Raises:
835
- # RuntimeError: If MongoDB not connected
836
- # """
837
- # self._check_connection()
838
-
839
- # conversation = await self.get_conversation(conversation_id)
840
-
841
- # if not conversation:
842
- # return []
843
-
844
- # messages = conversation.get("messages", [])
845
-
846
- # if max_messages:
847
- # messages = messages[-max_messages:]
848
-
849
- # return messages
850
-
851
- # async def delete_conversation(self, conversation_id: str) -> bool:
852
- # """
853
- # Soft delete a conversation (mark as deleted, don't actually delete).
854
-
855
- # Args:
856
- # conversation_id: Conversation ID
857
-
858
- # Returns:
859
- # bool: Success status
860
-
861
- # Raises:
862
- # RuntimeError: If MongoDB not connected
863
- # """
864
- # self._check_connection()
865
-
866
- # result = await self.conversations.update_one(
867
- # {"conversation_id": conversation_id},
868
- # {
869
- # "$set": {
870
- # "status": "deleted",
871
- # "deleted_at": datetime.now()
872
- # }
873
- # }
874
- # )
875
-
876
- # return result.modified_count > 0
877
-
878
- # # ========================================================================
879
- # # RETRIEVAL LOGS (for RL training)
880
- # # ========================================================================
881
-
882
- # async def log_retrieval(
883
- # self,
884
- # log_data: Dict
885
- # ) -> str:
886
- # """
887
- # Log a retrieval operation (for RL training and analysis).
888
-
889
- # Args:
890
- # log_data: Log data dict
891
- # {
892
- # 'conversation_id': str,
893
- # 'user_id': str,
894
- # 'query': str,
895
- # 'policy_action': 'FETCH' or 'NO_FETCH',
896
- # 'policy_confidence': float,
897
- # 'documents_retrieved': int,
898
- # 'top_doc_score': float or None,
899
- # 'retrieved_docs_metadata': list,
900
- # 'response': str,
901
- # 'retrieval_time_ms': float,
902
- # 'generation_time_ms': float,
903
- # 'total_time_ms': float,
904
- # 'timestamp': datetime
905
- # }
906
-
907
- # Returns:
908
- # str: Log ID
909
-
910
- # Raises:
911
- # RuntimeError: If MongoDB not connected
912
- # """
913
- # self._check_connection()
914
-
915
- # # Add timestamp if not present
916
- # if "timestamp" not in log_data:
917
- # log_data["timestamp"] = datetime.now()
918
-
919
- # # Generate log ID
920
- # log_id = str(uuid.uuid4())
921
- # log_data["log_id"] = log_id
922
-
923
- # # Insert log
924
- # await self.retrieval_logs.insert_one(log_data)
925
-
926
- # return log_id
927
-
928
- # async def get_retrieval_logs(
929
- # self,
930
- # conversation_id: Optional[str] = None,
931
- # user_id: Optional[str] = None,
932
- # limit: int = 100,
933
- # skip: int = 0
934
- # ) -> List[Dict]:
935
- # """
936
- # Get retrieval logs (for analysis and RL training).
937
-
938
- # Args:
939
- # conversation_id: Optional filter by conversation
940
- # user_id: Optional filter by user
941
- # limit: Maximum number of logs
942
- # skip: Number of logs to skip
943
-
944
- # Returns:
945
- # list: List of log documents
946
-
947
- # Raises:
948
- # RuntimeError: If MongoDB not connected
949
- # """
950
- # self._check_connection()
951
-
952
- # # Build query
953
- # query = {}
954
- # if conversation_id:
955
- # query["conversation_id"] = conversation_id
956
- # if user_id:
957
- # query["user_id"] = user_id
958
-
959
- # # Fetch logs
960
- # cursor = self.retrieval_logs.find(query).sort("timestamp", -1).skip(skip).limit(limit)
961
- # logs = await cursor.to_list(length=limit)
962
-
963
- # # Convert ObjectIds to strings
964
- # for log in logs:
965
- # if "_id" in log:
966
- # log["_id"] = str(log["_id"])
967
-
968
- # return logs
969
-
970
- # async def get_logs_for_rl_training(
971
- # self,
972
- # min_date: Optional[datetime] = None,
973
- # limit: int = 1000
974
- # ) -> List[Dict]:
975
- # """
976
- # Get logs specifically for RL training.
977
- # Filters for logs with both policy decision and retrieval results.
978
-
979
- # Args:
980
- # min_date: Optional minimum date for logs
981
- # limit: Maximum number of logs
982
-
983
- # Returns:
984
- # list: List of log documents suitable for RL training
985
-
986
- # Raises:
987
- # RuntimeError: If MongoDB not connected
988
- # """
989
- # self._check_connection()
990
-
991
- # # Build query
992
- # query = {
993
- # "policy_action": {"$exists": True},
994
- # "response": {"$exists": True}
995
- # }
996
-
997
- # if min_date:
998
- # query["timestamp"] = {"$gte": min_date}
999
-
1000
- # # Fetch logs
1001
- # cursor = self.retrieval_logs.find(query).sort("timestamp", -1).limit(limit)
1002
- # logs = await cursor.to_list(length=limit)
1003
-
1004
- # # Convert ObjectIds
1005
- # for log in logs:
1006
- # if "_id" in log:
1007
- # log["_id"] = str(log["_id"])
1008
-
1009
- # return logs
1010
-
1011
- # # ========================================================================
1012
- # # ANALYTICS QUERIES
1013
- # # ========================================================================
1014
-
1015
- # async def get_conversation_stats(self, user_id: str) -> Dict:
1016
- # """
1017
- # Get conversation statistics for a user.
1018
-
1019
- # Args:
1020
- # user_id: User ID
1021
-
1022
- # Returns:
1023
- # dict: Statistics
1024
-
1025
- # Raises:
1026
- # RuntimeError: If MongoDB not connected
1027
- # """
1028
- # self._check_connection()
1029
-
1030
- # # Count total conversations
1031
- # total_conversations = await self.conversations.count_documents({
1032
- # "user_id": user_id,
1033
- # "status": "active"
1034
- # })
1035
-
1036
- # # Count total messages
1037
- # pipeline = [
1038
- # {"$match": {"user_id": user_id, "status": "active"}},
1039
- # {"$project": {"message_count": {"$size": "$messages"}}}
1040
- # ]
1041
-
1042
- # result = await self.conversations.aggregate(pipeline).to_list(length=None)
1043
- # total_messages = sum(doc.get("message_count", 0) for doc in result)
1044
-
1045
- # return {
1046
- # "total_conversations": total_conversations,
1047
- # "total_messages": total_messages,
1048
- # "avg_messages_per_conversation": total_messages / total_conversations if total_conversations > 0 else 0
1049
- # }
1050
-
1051
- # async def get_policy_stats(self, user_id: Optional[str] = None) -> Dict:
1052
- # """
1053
- # Get policy decision statistics.
1054
-
1055
- # Args:
1056
- # user_id: Optional user ID filter
1057
-
1058
- # Returns:
1059
- # dict: Policy statistics
1060
-
1061
- # Raises:
1062
- # RuntimeError: If MongoDB not connected
1063
- # """
1064
- # self._check_connection()
1065
-
1066
- # # Build query
1067
- # query = {}
1068
- # if user_id:
1069
- # query["user_id"] = user_id
1070
-
1071
- # # Count FETCH vs NO_FETCH
1072
- # fetch_count = await self.retrieval_logs.count_documents({
1073
- # **query,
1074
- # "policy_action": "FETCH"
1075
- # })
1076
-
1077
- # no_fetch_count = await self.retrieval_logs.count_documents({
1078
- # **query,
1079
- # "policy_action": "NO_FETCH"
1080
- # })
1081
-
1082
- # total = fetch_count + no_fetch_count
1083
-
1084
- # return {
1085
- # "fetch_count": fetch_count,
1086
- # "no_fetch_count": no_fetch_count,
1087
- # "total": total,
1088
- # "fetch_rate": fetch_count / total if total > 0 else 0,
1089
- # "no_fetch_rate": no_fetch_count / total if total > 0 else 0
1090
- # }
1091
-
1092
-
1093
- # # ============================================================================
1094
- # # USAGE EXAMPLE (for reference)
1095
- # # ============================================================================
1096
- # """
1097
- # # In your service or API endpoint:
1098
-
1099
- # from app.db.repositories.conversation_repository import ConversationRepository
1100
-
1101
- # repo = ConversationRepository()
1102
-
1103
- # # Create conversation
1104
- # conv_id = await repo.create_conversation(user_id="user_123")
1105
-
1106
- # # Add user message
1107
- # await repo.add_message(conv_id, {
1108
- # 'role': 'user',
1109
- # 'content': 'What is my balance?',
1110
- # 'timestamp': datetime.now()
1111
- # })
1112
-
1113
- # # Add assistant message
1114
- # await repo.add_message(conv_id, {
1115
- # 'role': 'assistant',
1116
- # 'content': 'Your balance is $1000',
1117
- # 'timestamp': datetime.now(),
1118
- # 'metadata': {
1119
- # 'policy_action': 'FETCH',
1120
- # 'documents_retrieved': 3
1121
- # }
1122
- # })
1123
-
1124
- # # Get conversation history
1125
- # history = await repo.get_conversation_history(conv_id)
1126
-
1127
- # # Log retrieval for RL training
1128
- # await repo.log_retrieval({
1129
- # 'conversation_id': conv_id,
1130
- # 'user_id': 'user_123',
1131
- # 'query': 'What is my balance?',
1132
- # 'policy_action': 'FETCH',
1133
- # 'documents_retrieved': 3,
1134
- # 'response': 'Your balance is $1000'
1135
- # })
1136
- # """
1137
-
1138
-
1139
-
1140
-
1141
-
1142
-
1143
-
1144
- # """
1145
- # Conversation Repository - MongoDB CRUD operations
1146
- # Handles storing and retrieving conversations from MongoDB Atlas
1147
-
1148
- # Repository Pattern: Separates database logic from business logic
1149
- # This makes code cleaner and easier to test
1150
- # """
1151
-
1152
- # import uuid
1153
- # from datetime import datetime
1154
- # from typing import List, Dict, Optional
1155
- # from bson import ObjectId
1156
-
1157
- # from app.db.mongodb import get_database
1158
-
1159
-
1160
- # # ============================================================================
1161
- # # CONVERSATION REPOSITORY
1162
- # # ============================================================================
1163
-
1164
- # class ConversationRepository:
1165
- # """
1166
- # Repository for conversation data in MongoDB.
1167
-
1168
- # Collections used:
1169
- # - conversations: Stores complete conversations with messages
1170
- # - retrieval_logs: Logs each retrieval operation (for RL training)
1171
- # """
1172
-
1173
- # def __init__(self):
1174
- # """Initialize repository with database connection"""
1175
- # self.db = get_database()
1176
- # self.conversations = self.db["conversations"]
1177
- # self.retrieval_logs = self.db["retrieval_logs"]
1178
-
1179
- # # ========================================================================
1180
- # # CONVERSATION CRUD OPERATIONS
1181
- # # ========================================================================
1182
-
1183
- # async def create_conversation(
1184
- # self,
1185
- # user_id: str,
1186
- # conversation_id: Optional[str] = None
1187
- # ) -> str:
1188
- # """
1189
- # Create a new conversation.
1190
-
1191
- # Args:
1192
- # user_id: User ID who owns this conversation
1193
- # conversation_id: Optional custom conversation ID (auto-generated if None)
1194
-
1195
- # Returns:
1196
- # str: Conversation ID
1197
- # """
1198
- # if conversation_id is None:
1199
- # conversation_id = str(uuid.uuid4())
1200
-
1201
- # conversation = {
1202
- # "conversation_id": conversation_id,
1203
- # "user_id": user_id,
1204
- # "messages": [], # Will store all messages
1205
- # "created_at": datetime.now(),
1206
- # "updated_at": datetime.now(),
1207
- # "status": "active" # active, archived, deleted
1208
- # }
1209
-
1210
- # await self.conversations.insert_one(conversation)
1211
-
1212
- # return conversation_id
1213
-
1214
- # async def get_conversation(self, conversation_id: str) -> Optional[Dict]:
1215
- # """
1216
- # Get a conversation by ID.
1217
-
1218
- # Args:
1219
- # conversation_id: Conversation ID
1220
-
1221
- # Returns:
1222
- # dict or None: Conversation document
1223
- # """
1224
- # conversation = await self.conversations.find_one(
1225
- # {"conversation_id": conversation_id}
1226
- # )
1227
-
1228
- # # Convert MongoDB ObjectId to string for JSON serialization
1229
- # if conversation and "_id" in conversation:
1230
- # conversation["_id"] = str(conversation["_id"])
1231
-
1232
- # return conversation
1233
-
1234
- # async def get_user_conversations(
1235
- # self,
1236
- # user_id: str,
1237
- # limit: int = 10,
1238
- # skip: int = 0
1239
- # ) -> List[Dict]:
1240
- # """
1241
- # Get all conversations for a user.
1242
-
1243
- # Args:
1244
- # user_id: User ID
1245
- # limit: Maximum number of conversations to return
1246
- # skip: Number of conversations to skip (for pagination)
1247
-
1248
- # Returns:
1249
- # list: List of conversation documents
1250
- # """
1251
- # cursor = self.conversations.find(
1252
- # {"user_id": user_id, "status": "active"}
1253
- # ).sort("updated_at", -1).skip(skip).limit(limit)
1254
-
1255
- # conversations = await cursor.to_list(length=limit)
1256
-
1257
- # # Convert ObjectIds to strings
1258
- # for conv in conversations:
1259
- # if "_id" in conv:
1260
- # conv["_id"] = str(conv["_id"])
1261
-
1262
- # return conversations
1263
-
1264
- # async def add_message(
1265
- # self,
1266
- # conversation_id: str,
1267
- # message: Dict
1268
- # ) -> bool:
1269
- # """
1270
- # Add a message to a conversation.
1271
-
1272
- # Args:
1273
- # conversation_id: Conversation ID
1274
- # message: Message dict
1275
- # {
1276
- # 'role': 'user' or 'assistant',
1277
- # 'content': str,
1278
- # 'timestamp': datetime,
1279
- # 'metadata': dict (optional - policy_action, docs_retrieved, etc.)
1280
- # }
1281
-
1282
- # Returns:
1283
- # bool: Success status
1284
- # """
1285
- # # Ensure timestamp exists
1286
- # if "timestamp" not in message:
1287
- # message["timestamp"] = datetime.now()
1288
-
1289
- # # Add message to conversation
1290
- # result = await self.conversations.update_one(
1291
- # {"conversation_id": conversation_id},
1292
- # {
1293
- # "$push": {"messages": message},
1294
- # "$set": {"updated_at": datetime.now()}
1295
- # }
1296
- # )
1297
-
1298
- # return result.modified_count > 0
1299
-
1300
- # async def get_conversation_history(
1301
- # self,
1302
- # conversation_id: str,
1303
- # max_messages: int = None
1304
- # ) -> List[Dict]:
1305
- # """
1306
- # Get conversation history (messages only).
1307
-
1308
- # Args:
1309
- # conversation_id: Conversation ID
1310
- # max_messages: Optional limit on number of messages
1311
-
1312
- # Returns:
1313
- # list: List of messages
1314
- # """
1315
- # conversation = await self.get_conversation(conversation_id)
1316
-
1317
- # if not conversation:
1318
- # return []
1319
-
1320
- # messages = conversation.get("messages", [])
1321
-
1322
- # if max_messages:
1323
- # messages = messages[-max_messages:]
1324
-
1325
- # return messages
1326
-
1327
- # async def delete_conversation(self, conversation_id: str) -> bool:
1328
- # """
1329
- # Soft delete a conversation (mark as deleted, don't actually delete).
1330
-
1331
- # Args:
1332
- # conversation_id: Conversation ID
1333
-
1334
- # Returns:
1335
- # bool: Success status
1336
- # """
1337
- # result = await self.conversations.update_one(
1338
- # {"conversation_id": conversation_id},
1339
- # {
1340
- # "$set": {
1341
- # "status": "deleted",
1342
- # "deleted_at": datetime.now()
1343
- # }
1344
- # }
1345
- # )
1346
-
1347
- # return result.modified_count > 0
1348
-
1349
- # # ========================================================================
1350
- # # RETRIEVAL LOGS (for RL training)
1351
- # # ========================================================================
1352
-
1353
- # async def log_retrieval(
1354
- # self,
1355
- # log_data: Dict
1356
- # ) -> str:
1357
- # """
1358
- # Log a retrieval operation (for RL training and analysis).
1359
-
1360
- # Args:
1361
- # log_data: Log data dict
1362
- # {
1363
- # 'conversation_id': str,
1364
- # 'user_id': str,
1365
- # 'query': str,
1366
- # 'policy_action': 'FETCH' or 'NO_FETCH',
1367
- # 'policy_confidence': float,
1368
- # 'documents_retrieved': int,
1369
- # 'top_doc_score': float or None,
1370
- # 'retrieved_docs_metadata': list,
1371
- # 'response': str,
1372
- # 'retrieval_time_ms': float,
1373
- # 'generation_time_ms': float,
1374
- # 'total_time_ms': float,
1375
- # 'timestamp': datetime
1376
- # }
1377
-
1378
- # Returns:
1379
- # str: Log ID
1380
- # """
1381
- # # Add timestamp if not present
1382
- # if "timestamp" not in log_data:
1383
- # log_data["timestamp"] = datetime.now()
1384
-
1385
- # # Generate log ID
1386
- # log_id = str(uuid.uuid4())
1387
- # log_data["log_id"] = log_id
1388
-
1389
- # # Insert log
1390
- # await self.retrieval_logs.insert_one(log_data)
1391
-
1392
- # return log_id
1393
-
1394
- # async def get_retrieval_logs(
1395
- # self,
1396
- # conversation_id: Optional[str] = None,
1397
- # user_id: Optional[str] = None,
1398
- # limit: int = 100,
1399
- # skip: int = 0
1400
- # ) -> List[Dict]:
1401
- # """
1402
- # Get retrieval logs (for analysis and RL training).
1403
-
1404
- # Args:
1405
- # conversation_id: Optional filter by conversation
1406
- # user_id: Optional filter by user
1407
- # limit: Maximum number of logs
1408
- # skip: Number of logs to skip
1409
-
1410
- # Returns:
1411
- # list: List of log documents
1412
- # """
1413
- # # Build query
1414
- # query = {}
1415
- # if conversation_id:
1416
- # query["conversation_id"] = conversation_id
1417
- # if user_id:
1418
- # query["user_id"] = user_id
1419
-
1420
- # # Fetch logs
1421
- # cursor = self.retrieval_logs.find(query).sort("timestamp", -1).skip(skip).limit(limit)
1422
- # logs = await cursor.to_list(length=limit)
1423
-
1424
- # # Convert ObjectIds to strings
1425
- # for log in logs:
1426
- # if "_id" in log:
1427
- # log["_id"] = str(log["_id"])
1428
-
1429
- # return logs
1430
-
1431
- # async def get_logs_for_rl_training(
1432
- # self,
1433
- # min_date: Optional[datetime] = None,
1434
- # limit: int = 1000
1435
- # ) -> List[Dict]:
1436
- # """
1437
- # Get logs specifically for RL training.
1438
- # Filters for logs with both policy decision and retrieval results.
1439
-
1440
- # Args:
1441
- # min_date: Optional minimum date for logs
1442
- # limit: Maximum number of logs
1443
-
1444
- # Returns:
1445
- # list: List of log documents suitable for RL training
1446
- # """
1447
- # # Build query
1448
- # query = {
1449
- # "policy_action": {"$exists": True},
1450
- # "response": {"$exists": True}
1451
- # }
1452
-
1453
- # if min_date:
1454
- # query["timestamp"] = {"$gte": min_date}
1455
-
1456
- # # Fetch logs
1457
- # cursor = self.retrieval_logs.find(query).sort("timestamp", -1).limit(limit)
1458
- # logs = await cursor.to_list(length=limit)
1459
-
1460
- # # Convert ObjectIds
1461
- # for log in logs:
1462
- # if "_id" in log:
1463
- # log["_id"] = str(log["_id"])
1464
-
1465
- # return logs
1466
 
1467
- # # ========================================================================
1468
- # # ANALYTICS QUERIES
1469
- # # ========================================================================
 
1470
 
1471
- # async def get_conversation_stats(self, user_id: str) -> Dict:
1472
- # """
1473
- # Get conversation statistics for a user.
1474
-
1475
- # Args:
1476
- # user_id: User ID
1477
-
1478
- # Returns:
1479
- # dict: Statistics
1480
- # """
1481
- # # Count total conversations
1482
- # total_conversations = await self.conversations.count_documents({
1483
- # "user_id": user_id,
1484
- # "status": "active"
1485
- # })
1486
 
1487
- # # Count total messages
1488
- # pipeline = [
1489
- # {"$match": {"user_id": user_id, "status": "active"}},
1490
- # {"$project": {"message_count": {"$size": "$messages"}}}
1491
- # ]
 
 
 
 
 
1492
 
1493
- # result = await self.conversations.aggregate(pipeline).to_list(length=None)
1494
- # total_messages = sum(doc.get("message_count", 0) for doc in result)
 
1495
 
1496
- # return {
1497
- # "total_conversations": total_conversations,
1498
- # "total_messages": total_messages,
1499
- # "avg_messages_per_conversation": total_messages / total_conversations if total_conversations > 0 else 0
1500
- # }
1501
 
1502
- # async def get_policy_stats(self, user_id: Optional[str] = None) -> Dict:
1503
- # """
1504
- # Get policy decision statistics.
1505
-
1506
- # Args:
1507
- # user_id: Optional user ID filter
1508
-
1509
- # Returns:
1510
- # dict: Policy statistics
1511
- # """
1512
- # # Build query
1513
- # query = {}
1514
- # if user_id:
1515
- # query["user_id"] = user_id
1516
-
1517
- # # Count FETCH vs NO_FETCH
1518
- # fetch_count = await self.retrieval_logs.count_documents({
1519
- # **query,
1520
- # "policy_action": "FETCH"
1521
- # })
1522
-
1523
- # no_fetch_count = await self.retrieval_logs.count_documents({
1524
- # **query,
1525
- # "policy_action": "NO_FETCH"
1526
- # })
1527
-
1528
- # total = fetch_count + no_fetch_count
1529
-
1530
- # return {
1531
- # "fetch_count": fetch_count,
1532
- # "no_fetch_count": no_fetch_count,
1533
- # "total": total,
1534
- # "fetch_rate": fetch_count / total if total > 0 else 0,
1535
- # "no_fetch_rate": no_fetch_count / total if total > 0 else 0
1536
- # }
1537
-
1538
-
1539
- # # ============================================================================
1540
- # # USAGE EXAMPLE (for reference)
1541
- # # ============================================================================
1542
- # """
1543
- # # In your service or API endpoint:
1544
-
1545
- # from app.db.repositories.conversation_repository import ConversationRepository
1546
-
1547
- # repo = ConversationRepository()
1548
-
1549
- # # Create conversation
1550
- # conv_id = await repo.create_conversation(user_id="user_123")
1551
-
1552
- # # Add user message
1553
- # await repo.add_message(conv_id, {
1554
- # 'role': 'user',
1555
- # 'content': 'What is my balance?',
1556
- # 'timestamp': datetime.now()
1557
- # })
1558
 
1559
- # # Add assistant message
1560
- # await repo.add_message(conv_id, {
1561
- # 'role': 'assistant',
1562
- # 'content': 'Your balance is $1000',
1563
- # 'timestamp': datetime.now(),
1564
- # 'metadata': {
1565
- # 'policy_action': 'FETCH',
1566
- # 'documents_retrieved': 3
1567
- # }
1568
- # })
1569
 
1570
- # # Get conversation history
1571
- # history = await repo.get_conversation_history(conv_id)
 
1572
 
1573
- # # Log retrieval for RL training
1574
- # await repo.log_retrieval({
1575
- # 'conversation_id': conv_id,
1576
- # 'user_id': 'user_123',
1577
- # 'query': 'What is my balance?',
1578
- # 'policy_action': 'FETCH',
1579
- # 'documents_retrieved': 3,
1580
- # 'response': 'Your balance is $1000'
1581
- # })
1582
- # """
 
1
+ # ============================================================================
2
+ # backend/app/db/repositories/conversation_repository.py
3
+ # ============================================================================
4
+
5
+
6
  """
7
  Conversation Repository - MongoDB Operations (UPDATED)
8
 
 
21
  from app.db.mongodb import get_database
22
  from app.models.conversation import (
23
  Conversation,
 
24
  ConversationListResponse,
25
  ConversationListResult
26
  )
 
580
 
581
  except Exception as e:
582
  print(f"⚠️ Failed to create indexes: {e}")
583
+ # ============================================================================
584
+ # ADD TO: backend/app/db/repositories/conversation_repository.py
585
+ # Add this method to ConversationRepository class
586
+ # ============================================================================
587
 
588
+ async def update_message_reaction(
589
+ self,
590
+ conversation_id: str,
591
+ message_index: int,
592
+ reaction: Optional[str]
593
+ ) -> bool:
594
+ """
595
+ Update reaction for a specific message.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
 
597
+ Args:
598
+ conversation_id: Conversation ID
599
+ message_index: Index of message in messages array (0-based)
600
+ reaction: 'like', 'dislike', or None (to remove)
601
 
602
+ Returns:
603
+ bool: True if updated
604
+ """
605
+ try:
606
+ from bson import ObjectId
 
 
 
 
 
 
 
 
 
 
607
 
608
+ # Build update query
609
+ result = await self.collection.update_one(
610
+ {"_id": ObjectId(conversation_id)},
611
+ {
612
+ "$set": {
613
+ f"messages.{message_index}.reaction": reaction,
614
+ "updated_at": datetime.utcnow()
615
+ }
616
+ }
617
+ )
618
 
619
+ if result.modified_count > 0:
620
+ print(f" Reaction updated for message {message_index}: {reaction}")
621
+ return True
622
 
623
+ print(f"⚠️ No message updated (conversation or index not found)")
624
+ return False
 
 
 
625
 
626
+ except Exception as e:
627
+ print(f"❌ Update reaction error: {e}")
628
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
629
 
 
 
 
 
 
 
 
 
 
 
630
 
631
+ # ============================================================================
632
+ # GLOBAL REPOSITORY INSTANCE
633
+ # ============================================================================
634
 
635
+ conversation_repository = ConversationRepository()
 
 
 
 
 
 
 
 
 
app/main.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  """
2
  FastAPI Main Application Entry Point (UPDATED)
3
 
@@ -141,6 +146,7 @@ app.add_middleware(
141
 
142
  from app.api.v1 import auth
143
  from app.api.v1 import conversation_routes # ✅ NEW IMPORT
 
144
 
145
  # Auth router (public endpoints - register, login)
146
  app.include_router(
@@ -155,6 +161,12 @@ app.include_router(
155
  prefix="/api/v1/chat",
156
  tags=["💬 Chat & Conversations"]
157
  )
 
 
 
 
 
 
158
 
159
  # ============================================================================
160
  # ROOT ENDPOINTS
@@ -196,6 +208,14 @@ async def root():
196
  "search_conversations": "GET /api/v1/chat/conversations/search (requires token)",
197
  "conversation_stats": "GET /api/v1/chat/conversations/stats (requires token)"
198
  },
 
 
 
 
 
 
 
 
199
  "health": "GET /health"
200
  }
201
  }
 
1
+ # ============================================================================
2
+ # backend/app/main.py
3
+ # ============================================================================
4
+
5
+
6
  """
7
  FastAPI Main Application Entry Point (UPDATED)
8
 
 
146
 
147
  from app.api.v1 import auth
148
  from app.api.v1 import conversation_routes # ✅ NEW IMPORT
149
+ from app.api.v1 import file_routes # new file routes
150
 
151
  # Auth router (public endpoints - register, login)
152
  app.include_router(
 
161
  prefix="/api/v1/chat",
162
  tags=["💬 Chat & Conversations"]
163
  )
164
+ # File Upload router (protected endpoints - requires JWT token)
165
+ app.include_router(
166
+ file_routes.router,
167
+ prefix="/api/v1",
168
+ tags=["📁 File Upload"]
169
+ )
170
 
171
  # ============================================================================
172
  # ROOT ENDPOINTS
 
208
  "search_conversations": "GET /api/v1/chat/conversations/search (requires token)",
209
  "conversation_stats": "GET /api/v1/chat/conversations/stats (requires token)"
210
  },
211
+ "files": {
212
+ "upload_image": "POST /api/v1/files/upload/image (requires token)",
213
+ "upload_pdf": "POST /api/v1/files/upload/pdf (requires token)",
214
+ "upload_document": "POST /api/v1/files/upload/document (requires token)",
215
+ "upload_audio": "POST /api/v1/files/upload/audio (requires token)",
216
+ "delete_file": "DELETE /api/v1/files/delete (requires token)",
217
+ "health": "GET /api/v1/files/health"
218
+ },
219
  "health": "GET /health"
220
  }
221
  }
app/models/conversation.py CHANGED
@@ -1,3 +1,9 @@
 
 
 
 
 
 
1
  """
2
  Conversation Models for MongoDB
3
 
@@ -10,7 +16,7 @@ Handles conversation persistence with:
10
 
11
  from datetime import datetime
12
  from typing import List, Optional, Dict, Any, Annotated
13
- from pydantic import BaseModel, Field, ConfigDict, field_validator, BeforeValidator
14
  from bson import ObjectId
15
 
16
 
@@ -275,3 +281,78 @@ class ConversationListResult(BaseModel):
275
  }
276
  }
277
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================================
2
+ # backend/app/models/conversation.py
3
+ # ============================================================================
4
+
5
+
6
+
7
  """
8
  Conversation Models for MongoDB
9
 
 
16
 
17
  from datetime import datetime
18
  from typing import List, Optional, Dict, Any, Annotated
19
+ from pydantic import BaseModel, Field, ConfigDict, BeforeValidator
20
  from bson import ObjectId
21
 
22
 
 
281
  }
282
  }
283
  )
284
+
285
+ # ============================================================================
286
+ # UPDATE: backend/app/models/conversation.py
287
+ # ADD THIS TO Message CLASS (line ~40)
288
+ # ============================================================================
289
+
290
+ class Message(BaseModel):
291
+ """
292
+ Single message in a conversation.
293
+
294
+ Contains:
295
+ - User/assistant content
296
+ - Metadata from RAG pipeline (policy action, retrieval stats)
297
+ - User reaction (👍 👎)
298
+ - Timestamp
299
+ """
300
+
301
+ role: str = Field(..., description="Role: 'user' or 'assistant'")
302
+ content: str = Field(..., description="Message content")
303
+ timestamp: datetime = Field(default_factory=datetime.utcnow)
304
+
305
+ # Metadata from RAG pipeline (only for assistant messages)
306
+ metadata: Optional[Dict[str, Any]] = Field(
307
+ default=None,
308
+ description="RAG metadata: policy_action, confidence, docs_retrieved, etc."
309
+ )
310
+
311
+ # ========================================================================
312
+ # 🆕 NEW: User reaction to message
313
+ # ========================================================================
314
+ reaction: Optional[str] = Field(
315
+ default=None,
316
+ description="User reaction: 'like', 'dislike', or None",
317
+ pattern="^(like|dislike)$" # Only allow these values
318
+ )
319
+
320
+ model_config = ConfigDict(
321
+ json_encoders={
322
+ datetime: lambda v: v.isoformat()
323
+ },
324
+ json_schema_extra={
325
+ "example": {
326
+ "role": "assistant",
327
+ "content": "Your account balance is $1,234.56",
328
+ "timestamp": "2024-01-15T10:30:00",
329
+ "metadata": {
330
+ "policy_action": "FETCH",
331
+ "confidence": 0.95
332
+ },
333
+ "reaction": "like" # 👍
334
+ }
335
+ }
336
+ )
337
+
338
+
339
+ # ============================================================================
340
+ # 🆕 NEW REQUEST MODEL
341
+ # ============================================================================
342
+
343
+ class ReactToMessageRequest(BaseModel):
344
+ """Request body for reacting to a message"""
345
+
346
+ reaction: str = Field(
347
+ ...,
348
+ description="Reaction type: 'like' or 'dislike'",
349
+ pattern="^(like|dislike)$"
350
+ )
351
+
352
+ model_config = ConfigDict(
353
+ json_schema_extra={
354
+ "example": {
355
+ "reaction": "like"
356
+ }
357
+ }
358
+ )
app/services/chat_service.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  """
2
  Chat Service - Main RAG Pipeline
3
 
 
1
+ # ============================================================================
2
+ # backend/app/services/chat_service.py
3
+ # ============================================================================
4
+
5
+
6
  """
7
  Chat Service - Main RAG Pipeline
8
 
app/services/conversation_service.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  """
2
  Conversation Service - Business Logic Layer (UPDATED)
3
 
 
1
+ # ============================================================================
2
+ # backend/app/services/conversation_service.py
3
+ # ============================================================================
4
+
5
+
6
  """
7
  Conversation Service - Business Logic Layer (UPDATED)
8
 
app/services/file_service.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Optional, Dict, Any
4
+ from fastapi import UploadFile, HTTPException
5
+ import pytesseract
6
+ from PIL import Image
7
+ import PyPDF2
8
+ import docx
9
+ from io import BytesIO
10
+
11
+ from app.utils.file_utils import (
12
+ validate_file_type, validate_file_size, generate_unique_filename,
13
+ save_upload_file, ALLOWED_IMAGE_TYPES,
14
+ # ALLOWED_DOC_TYPES,
15
+ ALLOWED_AUDIO_TYPES
16
+ )
17
+ from app.config import settings
18
+
19
+
20
+ class FileService:
21
+ """File processing service for images, PDFs, documents, and audio"""
22
+
23
+ def __init__(self):
24
+ self.upload_dir = Path(settings.UPLOAD_DIR)
25
+ self.upload_dir.mkdir(parents=True, exist_ok=True)
26
+ print("✅ FileService initialized")
27
+
28
+ async def process_image(self, file: UploadFile, user_id: str) -> Dict[str, Any]:
29
+ """
30
+ Upload image + OCR extraction.
31
+
32
+ Args:
33
+ file: Uploaded image file
34
+ user_id: User ID (for file organization)
35
+
36
+ Returns:
37
+ Dict with file_id, path, extracted_text, size
38
+ """
39
+ if not validate_file_type(file, ALLOWED_IMAGE_TYPES):
40
+ raise HTTPException(400, "Invalid image type. Allowed: JPG, PNG, WEBP")
41
+ if not validate_file_size(file):
42
+ raise HTTPException(400, "File too large (max 10MB)")
43
+
44
+ # Save file
45
+ filename = generate_unique_filename(file.filename)
46
+ filepath = self.upload_dir / "images" / user_id / filename
47
+ await save_upload_file(file, filepath)
48
+
49
+ # OCR extraction
50
+ try:
51
+ image = Image.open(filepath)
52
+ text = pytesseract.image_to_string(image)
53
+ except Exception as e:
54
+ print(f"⚠️ OCR failed: {e}")
55
+ text = ""
56
+
57
+ return {
58
+ "file_id": filename,
59
+ "file_path": str(filepath.relative_to(self.upload_dir)),
60
+ "file_type": "image",
61
+ "extracted_text": text.strip(),
62
+ "size": filepath.stat().st_size,
63
+ "original_filename": file.filename
64
+ }
65
+
66
+ async def process_pdf(self, file: UploadFile, user_id: str) -> Dict[str, Any]:
67
+ """
68
+ Upload PDF + text extraction.
69
+
70
+ Args:
71
+ file: Uploaded PDF file
72
+ user_id: User ID
73
+
74
+ Returns:
75
+ Dict with file_id, path, extracted_text, pages, size
76
+ """
77
+ if file.content_type != "application/pdf":
78
+ raise HTTPException(400, "Invalid PDF file")
79
+ if not validate_file_size(file):
80
+ raise HTTPException(400, "File too large (max 10MB)")
81
+
82
+ # Save
83
+ filename = generate_unique_filename(file.filename)
84
+ filepath = self.upload_dir / "documents" / user_id / filename
85
+ await save_upload_file(file, filepath)
86
+
87
+ # Extract text
88
+ text = ""
89
+ pages = 0
90
+ try:
91
+ with open(filepath, 'rb') as f:
92
+ pdf_reader = PyPDF2.PdfReader(f)
93
+ pages = len(pdf_reader.pages)
94
+ for page in pdf_reader.pages:
95
+ text += page.extract_text() + "\n"
96
+ except Exception as e:
97
+ print(f"⚠️ PDF extraction failed: {e}")
98
+
99
+ return {
100
+ "file_id": filename,
101
+ "file_path": str(filepath.relative_to(self.upload_dir)),
102
+ "file_type": "pdf",
103
+ "extracted_text": text.strip(),
104
+ "pages": pages,
105
+ "size": filepath.stat().st_size,
106
+ "original_filename": file.filename
107
+ }
108
+
109
+ async def process_docx(self, file: UploadFile, user_id: str) -> Dict[str, Any]:
110
+ """
111
+ Upload DOCX + text extraction.
112
+
113
+ Args:
114
+ file: Uploaded DOCX file
115
+ user_id: User ID
116
+
117
+ Returns:
118
+ Dict with file_id, path, extracted_text, size
119
+ """
120
+ if file.content_type != "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
121
+ raise HTTPException(400, "Invalid DOCX file")
122
+ if not validate_file_size(file):
123
+ raise HTTPException(400, "File too large (max 10MB)")
124
+
125
+ # Save
126
+ filename = generate_unique_filename(file.filename)
127
+ filepath = self.upload_dir / "documents" / user_id / filename
128
+ await save_upload_file(file, filepath)
129
+
130
+ # Extract
131
+ text = ""
132
+ try:
133
+ doc = docx.Document(filepath)
134
+ text = "\n".join([para.text for para in doc.paragraphs])
135
+ except Exception as e:
136
+ print(f"⚠️ DOCX extraction failed: {e}")
137
+
138
+ return {
139
+ "file_id": filename,
140
+ "file_path": str(filepath.relative_to(self.upload_dir)),
141
+ "file_type": "docx",
142
+ "extracted_text": text.strip(),
143
+ "size": filepath.stat().st_size,
144
+ "original_filename": file.filename
145
+ }
146
+
147
+ async def process_text_file(self, file: UploadFile, user_id: str) -> Dict[str, Any]:
148
+ """
149
+ Upload TXT file.
150
+
151
+ Args:
152
+ file: Uploaded text file
153
+ user_id: User ID
154
+
155
+ Returns:
156
+ Dict with file_id, path, extracted_text, size
157
+ """
158
+ if file.content_type != "text/plain":
159
+ raise HTTPException(400, "Invalid text file")
160
+ if not validate_file_size(file):
161
+ raise HTTPException(400, "File too large (max 10MB)")
162
+
163
+ filename = generate_unique_filename(file.filename)
164
+ filepath = self.upload_dir / "documents" / user_id / filename
165
+ await save_upload_file(file, filepath)
166
+
167
+ text = ""
168
+ try:
169
+ with open(filepath, 'r', encoding='utf-8') as f:
170
+ text = f.read()
171
+ except Exception as e:
172
+ print(f"⚠️ Text file read failed: {e}")
173
+
174
+ return {
175
+ "file_id": filename,
176
+ "file_path": str(filepath.relative_to(self.upload_dir)),
177
+ "file_type": "text",
178
+ "extracted_text": text.strip(),
179
+ "size": filepath.stat().st_size,
180
+ "original_filename": file.filename
181
+ }
182
+
183
+ # ============================================================================
184
+ # NEW METHOD: Using HuggingFace Transformers Whisper (FREE!)
185
+ # ============================================================================
186
+
187
+ async def transcribe_audio(self, file: UploadFile, user_id: str) -> Dict[str, Any]:
188
+ """
189
+ Speech-to-text using HuggingFace Transformers Whisper (FREE!).
190
+
191
+ Args:
192
+ file: Uploaded audio file
193
+ user_id: User ID
194
+
195
+ Returns:
196
+ Dict with file_id, path, transcription, size
197
+ """
198
+ if not validate_file_type(file, ALLOWED_AUDIO_TYPES):
199
+ raise HTTPException(400, "Invalid audio type. Allowed: MP3, WAV, WEBM, OGG, M4A")
200
+ if not validate_file_size(file):
201
+ raise HTTPException(400, "File too large (max 10MB)")
202
+
203
+ # Save audio
204
+ filename = generate_unique_filename(file.filename)
205
+ filepath = self.upload_dir / "audio" / user_id / filename
206
+ await save_upload_file(file, filepath)
207
+
208
+ # Transcribe using HuggingFace Transformers Whisper (FREE!)
209
+ transcription = ""
210
+ try:
211
+ from transformers import pipeline
212
+ import torch
213
+
214
+ # Lazy load model (only first time)
215
+ if not hasattr(self, '_whisper_pipe'):
216
+ print("🎤 Loading Whisper model (one-time)...")
217
+ device = 0 if torch.cuda.is_available() else -1
218
+ self._whisper_pipe = pipeline(
219
+ "automatic-speech-recognition",
220
+ model="openai/whisper-small", # Small = fast, good accuracy
221
+ device=device
222
+ )
223
+ print("✅ Whisper model loaded")
224
+
225
+ # Transcribe
226
+ result = self._whisper_pipe(str(filepath))
227
+ transcription = result["text"]
228
+
229
+ except Exception as e:
230
+ print(f"⚠️ Whisper transcription failed: {e}")
231
+ raise HTTPException(500, f"Transcription failed: {str(e)}")
232
+
233
+ return {
234
+ "file_id": filename,
235
+ "file_path": str(filepath.relative_to(self.upload_dir)),
236
+ "file_type": "audio",
237
+ "transcription": transcription,
238
+ "size": filepath.stat().st_size,
239
+ "original_filename": file.filename
240
+ }
241
+
242
+ # ============================================================================
243
+ # Old method: OpenAI Whisper API (paid) kept for reference
244
+ # ============================================================================
245
+ # async def transcribe_audio(self, file: UploadFile, user_id: str) -> Dict[str, Any]:
246
+ # """
247
+ # Speech-to-text using OpenAI Whisper API.
248
+
249
+ # Args:
250
+ # file: Uploaded audio file
251
+ # user_id: User ID
252
+
253
+ # Returns:
254
+ # Dict with file_id, path, transcription, size
255
+ # """
256
+ # if not validate_file_type(file, ALLOWED_AUDIO_TYPES):
257
+ # raise HTTPException(400, "Invalid audio type. Allowed: MP3, WAV, WEBM, OGG, M4A")
258
+ # if not validate_file_size(file):
259
+ # raise HTTPException(400, "File too large (max 10MB)")
260
+
261
+ # # Save audio
262
+ # filename = generate_unique_filename(file.filename)
263
+ # filepath = self.upload_dir / "audio" / user_id / filename
264
+ # await save_upload_file(file, filepath)
265
+
266
+ # # Transcribe using OpenAI Whisper API
267
+ # transcription = ""
268
+ # try:
269
+ # from openai import OpenAI
270
+ # client = OpenAI(api_key=settings.OPENAI_API_KEY)
271
+
272
+ # with open(filepath, "rb") as audio_file:
273
+ # transcript = client.audio.transcriptions.create(
274
+ # model="whisper-1",
275
+ # file=audio_file,
276
+ # language="en" # Change if needed
277
+ # )
278
+
279
+ # transcription = transcript.text
280
+ # except Exception as e:
281
+ # print(f"⚠️ Whisper transcription failed: {e}")
282
+ # raise HTTPException(500, f"Transcription failed: {str(e)}")
283
+
284
+ # return {
285
+ # "file_id": filename,
286
+ # "file_path": str(filepath.relative_to(self.upload_dir)),
287
+ # "file_type": "audio",
288
+ # "transcription": transcription,
289
+ # "size": filepath.stat().st_size,
290
+ # "original_filename": file.filename
291
+ # }
292
+
293
+ def delete_file(self, file_path: str, user_id: str) -> bool:
294
+ """
295
+ Delete uploaded file.
296
+
297
+ Args:
298
+ file_path: Relative file path (from upload_dir)
299
+ user_id: User ID (for security check)
300
+
301
+ Returns:
302
+ bool: True if deleted
303
+ """
304
+ try:
305
+ # Security: Ensure file belongs to user
306
+ if user_id not in file_path:
307
+ return False
308
+
309
+ full_path = self.upload_dir / file_path
310
+ if full_path.exists() and full_path.is_file():
311
+ full_path.unlink()
312
+ return True
313
+ return False
314
+ except Exception as e:
315
+ print(f"⚠️ File deletion failed: {e}")
316
+ return False
317
+
318
+
319
+ # ============================================================================
320
+ # GLOBAL SERVICE INSTANCE
321
+ # ============================================================================
322
+
323
+ file_service = FileService()
app/utils/file_utils.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+ from typing import Optional
5
+ from fastapi import UploadFile
6
+ import uuid
7
+
8
+ ALLOWED_IMAGE_TYPES = {"image/jpeg", "image/png", "image/jpg", "image/webp"}
9
+ ALLOWED_DOC_TYPES = {"application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "text/plain"}
10
+ ALLOWED_AUDIO_TYPES = {"audio/mpeg", "audio/wav", "audio/webm", "audio/ogg", "audio/m4a"}
11
+
12
+ MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
13
+
14
+ def validate_file_type(file: UploadFile, allowed_types: set) -> bool:
15
+ """Check if file type is allowed"""
16
+ return file.content_type in allowed_types
17
+
18
+ def validate_file_size(file: UploadFile, max_size: int = MAX_FILE_SIZE) -> bool:
19
+ """Check if file size is under limit"""
20
+ file.file.seek(0, 2)
21
+ size = file.file.tell()
22
+ file.file.seek(0)
23
+ return size <= max_size
24
+
25
+ def generate_unique_filename(original_filename: str) -> str:
26
+ """Generate unique filename with UUID"""
27
+ ext = Path(original_filename).suffix
28
+ return f"{uuid.uuid4()}{ext}"
29
+
30
+ async def save_upload_file(file: UploadFile, destination: Path) -> Path:
31
+ """Save uploaded file to destination"""
32
+ destination.parent.mkdir(parents=True, exist_ok=True)
33
+ with destination.open("wb") as buffer:
34
+ shutil.copyfileobj(file.file, buffer)
35
+ return destination
requirements.txt CHANGED
@@ -69,6 +69,7 @@ numpy
69
 
70
  # ML/Deep Learning
71
  torch
 
72
  transformers
73
 
74
 
@@ -76,4 +77,12 @@ transformers
76
  python-jose[cryptography]
77
  passlib[bcrypt]==1.7.4
78
  python-multipart
79
- bcrypt==4.0.1
 
 
 
 
 
 
 
 
 
69
 
70
  # ML/Deep Learning
71
  torch
72
+ torchaudio
73
  transformers
74
 
75
 
 
77
  python-jose[cryptography]
78
  passlib[bcrypt]==1.7.4
79
  python-multipart
80
+ bcrypt==4.0.1
81
+
82
+ # new features dependencies
83
+ python-multipart
84
+ pillow
85
+ pytesseract
86
+ PyPDF2
87
+ python-docx
88
+ pydub