pluto90 commited on
Commit
be24a36
Β·
verified Β·
1 Parent(s): b41deb1

Update app/api/routes.py

Browse files
Files changed (1) hide show
  1. app/api/routes.py +524 -266
app/api/routes.py CHANGED
@@ -1,266 +1,524 @@
1
- # app/api/routes.py
2
-
3
- from fastapi import APIRouter, UploadFile, Form, HTTPException
4
- from fastapi.responses import StreamingResponse
5
- import uuid, os, json, asyncio
6
- from collections import defaultdict
7
- from bson import ObjectId
8
-
9
- from app.core.pdf_processor import extract_text_from_pdf
10
- from app.core.embedding_engine import embed_and_store, embedder, qdrant, COLLECTION_NAME
11
- from qdrant_client.http.models import Filter, FieldCondition, MatchValue
12
- from app.core.llm_engine import ask_gemini
13
- from app.core.mongo import conversations
14
- from qdrant_client import QdrantClient
15
- from app.core.config import QDRANT_URL, QDRANT_API_KEY
16
-
17
- router = APIRouter()
18
- UPLOAD_DIR = "uploads"
19
- os.makedirs(UPLOAD_DIR, exist_ok=True)
20
-
21
- # Qdrant Client
22
- qdrant_client = QdrantClient(
23
- url=QDRANT_URL,
24
- api_key=QDRANT_API_KEY,
25
- check_compatibility=False)
26
-
27
- # In-memory (for fallback)
28
- chat_histories = defaultdict(list)
29
-
30
-
31
- # ---------------------------------------------------
32
- # βœ… Upload endpoint
33
- # ---------------------------------------------------
34
- @router.post("/upload")
35
- async def upload_pdf(file: UploadFile):
36
- doc_id = str(uuid.uuid4())
37
- file_path = os.path.join(UPLOAD_DIR, f"{doc_id}.pdf")
38
-
39
- with open(file_path, "wb") as f:
40
- f.write(await file.read())
41
-
42
- text = extract_text_from_pdf(file_path)
43
- chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
44
- embed_and_store(chunks, doc_id)
45
-
46
- # βœ… Create MongoDB record
47
- conversations.insert_one({
48
- "doc_id": doc_id,
49
- "name": file.filename,
50
- "file_path": file_path,
51
- "qdrant_collection": COLLECTION_NAME,
52
- "history": [],
53
- "created_at": asyncio.get_event_loop().time(),
54
- })
55
-
56
- return {"doc_id": doc_id, "message": "PDF uploaded and processed successfully."}
57
-
58
-
59
- # ---------------------------------------------------
60
- # βœ… Streaming Upload endpoint (with Mongo insert)
61
- # ---------------------------------------------------
62
- @router.post("/upload-stream")
63
- async def upload_stream(file: UploadFile):
64
- doc_id = str(uuid.uuid4())
65
- file_path = os.path.join(UPLOAD_DIR, f"{doc_id}.pdf")
66
-
67
- with open(file_path, "wb") as f:
68
- content = await file.read()
69
- f.write(content)
70
-
71
- async def generate_upload_events():
72
- try:
73
- yield f"data: {json.dumps({'status': 'βœ… File uploaded successfully. Starting processing...'})}\n\n"
74
- await asyncio.sleep(0.5)
75
-
76
- yield f"data: {json.dumps({'status': 'πŸ“„ Extracting text from PDF...'})}\n\n"
77
- text = extract_text_from_pdf(file_path)
78
- await asyncio.sleep(0.5)
79
-
80
- yield f"data: {json.dumps({'status': '🧠 Generating embeddings...'})}\n\n"
81
- chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
82
- await asyncio.sleep(0.5)
83
-
84
- yield f"data: {json.dumps({'status': 'πŸ’Ύ Storing vectors into database...'})}\n\n"
85
- embed_and_store(chunks, doc_id)
86
- await asyncio.sleep(0.5)
87
-
88
- # βœ… Save chat info to MongoDB
89
- conversations.insert_one({
90
- "doc_id": doc_id,
91
- "name": file.filename,
92
- "file_path": file_path,
93
- "qdrant_collection": COLLECTION_NAME,
94
- "history": [],
95
- "created_at": asyncio.get_event_loop().time(),
96
- })
97
-
98
- yield f"data: {json.dumps({'status': 'πŸŽ‰ Done! You’re good to go.', 'doc_id': doc_id})}\n\n"
99
- yield "event: end\ndata: {}\n\n"
100
-
101
- except Exception as e:
102
- yield f"data: {json.dumps({'status': f'⚠️ Error: {str(e)}'})}\n\n"
103
- yield "event: end\ndata: {}\n\n"
104
-
105
- return StreamingResponse(generate_upload_events(), media_type="text/event-stream")
106
-
107
-
108
- # ---------------------------------------------------
109
- # βœ… Fetch All Chats (for Sidebar)
110
- # ---------------------------------------------------
111
- @router.get("/chats")
112
- async def get_all_chats():
113
- try:
114
- chats = list(conversations.find({}, {"_id": 1, "name": 1, "doc_id": 1}))
115
- for c in chats:
116
- c["_id"] = str(c["_id"])
117
- return {"chats": chats}
118
- except Exception as e:
119
- raise HTTPException(status_code=500, detail=str(e))
120
-
121
-
122
- # ---------------------------------------------------
123
- # βœ… Delete Chat (Qdrant + Mongo + File)
124
- # ---------------------------------------------------
125
- # @router.delete("/chat/{chat_id}")
126
- # async def delete_chat(chat_id: str):
127
- # try:
128
- # chat = conversations.find_one({"_id": ObjectId(chat_id)})
129
- # if not chat:
130
- # raise HTTPException(status_code=404, detail="Chat not found")
131
-
132
- # doc_id = chat.get("doc_id")
133
- # qdrant_collection = chat.get("qdrant_collection")
134
-
135
- # # βœ… Delete embeddings from Qdrant
136
- # try:
137
- # qdrant_client.delete(
138
- # collection_name=qdrant_collection,
139
- # points_selector=Filter(
140
- # must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
141
- # ),
142
- # )
143
- # except Exception as e:
144
- # print(f"⚠️ Qdrant delete failed: {e}")
145
-
146
- # # βœ… Delete uploaded file
147
- # file_path = chat.get("file_path")
148
- # if file_path and os.path.exists(file_path):
149
- # os.remove(file_path)
150
-
151
- # # βœ… Delete from MongoDB
152
- # conversations.delete_one({"_id": ObjectId(chat_id)})
153
-
154
- # return {"status": "success", "message": "Chat deleted successfully"}
155
-
156
- # except Exception as e:
157
- # raise HTTPException(status_code=500, detail=str(e))
158
-
159
-
160
-
161
- @router.delete("/chat/{chat_id}")
162
- async def delete_chat(chat_id: str):
163
- try:
164
- chat = conversations.find_one({"_id": ObjectId(chat_id)})
165
- if not chat:
166
- raise HTTPException(status_code=404, detail="Chat not found")
167
-
168
- doc_id = chat.get("doc_id")
169
-
170
- # βœ… Delete embeddings from Qdrant
171
- try:
172
- qdrant.delete(
173
- collection_name=COLLECTION_NAME,
174
- points_selector=Filter(
175
- must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
176
- ),
177
- )
178
- except Exception as e:
179
- print(f"⚠️ Qdrant delete failed: {e}")
180
-
181
- # βœ… Delete uploaded PDF file
182
- file_path = chat.get("file_path") or os.path.join("uploads", f"{doc_id}.pdf")
183
- if os.path.exists(file_path):
184
- os.remove(file_path)
185
-
186
- # βœ… Delete from MongoDB
187
- conversations.delete_one({"_id": ObjectId(chat_id)})
188
-
189
- return {"status": "success", "message": "Chat and embeddings deleted successfully"}
190
-
191
- except Exception as e:
192
- raise HTTPException(status_code=500, detail=str(e))
193
-
194
-
195
-
196
-
197
- # ---------------------------------------------------
198
- # βœ… Chat Query Endpoint (Persistent Memory)
199
- # ---------------------------------------------------
200
- @router.post("/query")
201
- async def query_pdf(doc_id: str = Form(...), question: str = Form(...)):
202
- # βœ… Save user message
203
- conversations.update_one(
204
- {"doc_id": doc_id},
205
- {"$push": {"history": {"role": "user", "content": question}}},
206
- upsert=True
207
- )
208
-
209
- # βœ… Retrieve history
210
- doc = conversations.find_one({"doc_id": doc_id})
211
- history_list = doc["history"][-10:] if doc and "history" in doc else []
212
-
213
- structured_history = "\n".join(
214
- [f"{h['role'].title()}: {h['content']}" for h in history_list]
215
- )
216
-
217
- # βœ… Vector Search
218
- question_vector = embedder.encode([question])[0].tolist()
219
- # hits = qdrant.search(
220
- # collection_name=COLLECTION_NAME,
221
- # query_vector=question_vector,
222
- # query_filter=Filter(
223
- # must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
224
- # ),
225
- # limit=5,
226
- # )
227
-
228
-
229
- hits = qdrant_client.query_points(
230
- collection_name=COLLECTION_NAME,
231
- query=question_vector,
232
- query_filter=Filter(
233
- must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
234
- ),
235
- limit=5,
236
- ).points
237
-
238
-
239
- context = "\n".join([hit.payload["text"] for hit in hits])
240
- full_context = f"{structured_history}\n\n{context}"
241
-
242
- # βœ… LLM Response
243
- answer = ask_gemini(full_context, question)
244
-
245
- # βœ… Save assistant response
246
- conversations.update_one(
247
- {"doc_id": doc_id},
248
- {"$push": {"history": {"role": "assistant", "content": answer}}},
249
- upsert=True
250
- )
251
-
252
- return {
253
- "answer": answer,
254
- "context_used": context,
255
- "history_count": len(history_list)
256
- }
257
-
258
-
259
-
260
- @router.get("/conversations/{doc_id}")
261
- async def get_conversation(doc_id: str):
262
- doc = conversations.find_one({"doc_id": doc_id})
263
- if not doc:
264
- return {"history": []}
265
- return {"history": doc.get("history", [])}
266
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # app/api/routes.py
2
+
3
+ # from fastapi import APIRouter, UploadFile, Form, HTTPException
4
+ # from fastapi.responses import StreamingResponse
5
+ # import uuid, os, json, asyncio
6
+ # from collections import defaultdict
7
+ # from bson import ObjectId
8
+
9
+ # from app.core.pdf_processor import extract_text_from_pdf
10
+ # from app.core.embedding_engine import embed_and_store, embedder, qdrant, COLLECTION_NAME
11
+ # from qdrant_client.http.models import Filter, FieldCondition, MatchValue
12
+ # from app.core.llm_engine import ask_gemini
13
+ # from app.core.mongo import conversations
14
+ # from qdrant_client import QdrantClient
15
+ # from app.core.config import QDRANT_URL, QDRANT_API_KEY
16
+
17
+ # router = APIRouter()
18
+ # UPLOAD_DIR = "uploads"
19
+ # os.makedirs(UPLOAD_DIR, exist_ok=True)
20
+
21
+ # # Qdrant Client
22
+ # qdrant_client = QdrantClient(
23
+ # url=QDRANT_URL,
24
+ # api_key=QDRANT_API_KEY,
25
+ # check_compatibility=False)
26
+
27
+ # # In-memory (for fallback)
28
+ # chat_histories = defaultdict(list)
29
+
30
+
31
+ # # ---------------------------------------------------
32
+ # # βœ… Upload endpoint
33
+ # # ---------------------------------------------------
34
+ # @router.post("/upload")
35
+ # async def upload_pdf(file: UploadFile):
36
+ # doc_id = str(uuid.uuid4())
37
+ # file_path = os.path.join(UPLOAD_DIR, f"{doc_id}.pdf")
38
+
39
+ # with open(file_path, "wb") as f:
40
+ # f.write(await file.read())
41
+
42
+ # text = extract_text_from_pdf(file_path)
43
+ # chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
44
+ # embed_and_store(chunks, doc_id)
45
+
46
+ # # βœ… Create MongoDB record
47
+ # conversations.insert_one({
48
+ # "doc_id": doc_id,
49
+ # "name": file.filename,
50
+ # "file_path": file_path,
51
+ # "qdrant_collection": COLLECTION_NAME,
52
+ # "history": [],
53
+ # "created_at": asyncio.get_event_loop().time(),
54
+ # })
55
+
56
+ # return {"doc_id": doc_id, "message": "PDF uploaded and processed successfully."}
57
+
58
+
59
+ # # ---------------------------------------------------
60
+ # # βœ… Streaming Upload endpoint (with Mongo insert)
61
+ # # ---------------------------------------------------
62
+ # @router.post("/upload-stream")
63
+ # async def upload_stream(file: UploadFile):
64
+ # doc_id = str(uuid.uuid4())
65
+ # file_path = os.path.join(UPLOAD_DIR, f"{doc_id}.pdf")
66
+
67
+ # with open(file_path, "wb") as f:
68
+ # content = await file.read()
69
+ # f.write(content)
70
+
71
+ # async def generate_upload_events():
72
+ # try:
73
+ # yield f"data: {json.dumps({'status': 'βœ… File uploaded successfully. Starting processing...'})}\n\n"
74
+ # await asyncio.sleep(0.5)
75
+
76
+ # yield f"data: {json.dumps({'status': 'πŸ“„ Extracting text from PDF...'})}\n\n"
77
+ # text = extract_text_from_pdf(file_path)
78
+ # await asyncio.sleep(0.5)
79
+
80
+ # yield f"data: {json.dumps({'status': '🧠 Generating embeddings...'})}\n\n"
81
+ # chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
82
+ # await asyncio.sleep(0.5)
83
+
84
+ # yield f"data: {json.dumps({'status': 'πŸ’Ύ Storing vectors into database...'})}\n\n"
85
+ # embed_and_store(chunks, doc_id)
86
+ # await asyncio.sleep(0.5)
87
+
88
+ # # βœ… Save chat info to MongoDB
89
+ # conversations.insert_one({
90
+ # "doc_id": doc_id,
91
+ # "name": file.filename,
92
+ # "file_path": file_path,
93
+ # "qdrant_collection": COLLECTION_NAME,
94
+ # "history": [],
95
+ # "created_at": asyncio.get_event_loop().time(),
96
+ # })
97
+
98
+ # yield f"data: {json.dumps({'status': 'πŸŽ‰ Done! You’re good to go.', 'doc_id': doc_id})}\n\n"
99
+ # yield "event: end\ndata: {}\n\n"
100
+
101
+ # except Exception as e:
102
+ # yield f"data: {json.dumps({'status': f'⚠️ Error: {str(e)}'})}\n\n"
103
+ # yield "event: end\ndata: {}\n\n"
104
+
105
+ # return StreamingResponse(generate_upload_events(), media_type="text/event-stream")
106
+
107
+
108
+ # # ---------------------------------------------------
109
+ # # βœ… Fetch All Chats (for Sidebar)
110
+ # # ---------------------------------------------------
111
+ # @router.get("/chats")
112
+ # async def get_all_chats():
113
+ # try:
114
+ # chats = list(conversations.find({}, {"_id": 1, "name": 1, "doc_id": 1}))
115
+ # for c in chats:
116
+ # c["_id"] = str(c["_id"])
117
+ # return {"chats": chats}
118
+ # except Exception as e:
119
+ # raise HTTPException(status_code=500, detail=str(e))
120
+
121
+
122
+ # # ---------------------------------------------------
123
+ # # βœ… Delete Chat (Qdrant + Mongo + File)
124
+ # # ---------------------------------------------------
125
+ # # @router.delete("/chat/{chat_id}")
126
+ # # async def delete_chat(chat_id: str):
127
+ # # try:
128
+ # # chat = conversations.find_one({"_id": ObjectId(chat_id)})
129
+ # # if not chat:
130
+ # # raise HTTPException(status_code=404, detail="Chat not found")
131
+
132
+ # # doc_id = chat.get("doc_id")
133
+ # # qdrant_collection = chat.get("qdrant_collection")
134
+
135
+ # # # βœ… Delete embeddings from Qdrant
136
+ # # try:
137
+ # # qdrant_client.delete(
138
+ # # collection_name=qdrant_collection,
139
+ # # points_selector=Filter(
140
+ # # must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
141
+ # # ),
142
+ # # )
143
+ # # except Exception as e:
144
+ # # print(f"⚠️ Qdrant delete failed: {e}")
145
+
146
+ # # # βœ… Delete uploaded file
147
+ # # file_path = chat.get("file_path")
148
+ # # if file_path and os.path.exists(file_path):
149
+ # # os.remove(file_path)
150
+
151
+ # # # βœ… Delete from MongoDB
152
+ # # conversations.delete_one({"_id": ObjectId(chat_id)})
153
+
154
+ # # return {"status": "success", "message": "Chat deleted successfully"}
155
+
156
+ # # except Exception as e:
157
+ # # raise HTTPException(status_code=500, detail=str(e))
158
+
159
+
160
+
161
+ # @router.delete("/chat/{chat_id}")
162
+ # async def delete_chat(chat_id: str):
163
+ # try:
164
+ # chat = conversations.find_one({"_id": ObjectId(chat_id)})
165
+ # if not chat:
166
+ # raise HTTPException(status_code=404, detail="Chat not found")
167
+
168
+ # doc_id = chat.get("doc_id")
169
+
170
+ # # βœ… Delete embeddings from Qdrant
171
+ # try:
172
+ # qdrant.delete(
173
+ # collection_name=COLLECTION_NAME,
174
+ # points_selector=Filter(
175
+ # must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
176
+ # ),
177
+ # )
178
+ # except Exception as e:
179
+ # print(f"⚠️ Qdrant delete failed: {e}")
180
+
181
+ # # βœ… Delete uploaded PDF file
182
+ # file_path = chat.get("file_path") or os.path.join("uploads", f"{doc_id}.pdf")
183
+ # if os.path.exists(file_path):
184
+ # os.remove(file_path)
185
+
186
+ # # βœ… Delete from MongoDB
187
+ # conversations.delete_one({"_id": ObjectId(chat_id)})
188
+
189
+ # return {"status": "success", "message": "Chat and embeddings deleted successfully"}
190
+
191
+ # except Exception as e:
192
+ # raise HTTPException(status_code=500, detail=str(e))
193
+
194
+
195
+
196
+
197
+ # # ---------------------------------------------------
198
+ # # βœ… Chat Query Endpoint (Persistent Memory)
199
+ # # ---------------------------------------------------
200
+ # @router.post("/query")
201
+ # async def query_pdf(doc_id: str = Form(...), question: str = Form(...)):
202
+ # # βœ… Save user message
203
+ # conversations.update_one(
204
+ # {"doc_id": doc_id},
205
+ # {"$push": {"history": {"role": "user", "content": question}}},
206
+ # upsert=True
207
+ # )
208
+
209
+ # # βœ… Retrieve history
210
+ # doc = conversations.find_one({"doc_id": doc_id})
211
+ # history_list = doc["history"][-10:] if doc and "history" in doc else []
212
+
213
+ # structured_history = "\n".join(
214
+ # [f"{h['role'].title()}: {h['content']}" for h in history_list]
215
+ # )
216
+
217
+ # # βœ… Vector Search
218
+ # question_vector = embedder.encode([question])[0].tolist()
219
+ # # hits = qdrant.search(
220
+ # # collection_name=COLLECTION_NAME,
221
+ # # query_vector=question_vector,
222
+ # # query_filter=Filter(
223
+ # # must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
224
+ # # ),
225
+ # # limit=5,
226
+ # # )
227
+
228
+
229
+ # hits = qdrant_client.query_points(
230
+ # collection_name=COLLECTION_NAME,
231
+ # query=question_vector,
232
+ # query_filter=Filter(
233
+ # must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
234
+ # ),
235
+ # limit=5,
236
+ # ).points
237
+
238
+
239
+ # context = "\n".join([hit.payload["text"] for hit in hits])
240
+ # full_context = f"{structured_history}\n\n{context}"
241
+
242
+ # # βœ… LLM Response
243
+ # answer = ask_gemini(full_context, question)
244
+
245
+ # # βœ… Save assistant response
246
+ # conversations.update_one(
247
+ # {"doc_id": doc_id},
248
+ # {"$push": {"history": {"role": "assistant", "content": answer}}},
249
+ # upsert=True
250
+ # )
251
+
252
+ # return {
253
+ # "answer": answer,
254
+ # "context_used": context,
255
+ # "history_count": len(history_list)
256
+ # }
257
+
258
+
259
+
260
+ # @router.get("/conversations/{doc_id}")
261
+ # async def get_conversation(doc_id: str):
262
+ # doc = conversations.find_one({"doc_id": doc_id})
263
+ # if not doc:
264
+ # return {"history": []}
265
+ # return {"history": doc.get("history", [])}
266
+
267
+
268
+
269
+
270
+
271
+
272
+
273
+
274
+
275
+
276
+
277
+
278
+
279
+
280
+
281
+
282
+
283
+
284
+
285
+
286
+
287
+
288
+
289
+
290
+
291
+
292
+
293
+
294
+
295
+
296
+
297
+
298
+
299
+
300
+
301
+
302
+
303
+
304
+
305
+
306
+
307
+
308
+
309
+ # app/api/routes.py
310
+
311
+ from fastapi import APIRouter, UploadFile, Form, HTTPException
312
+ from fastapi.responses import StreamingResponse
313
+ import uuid, os, json, asyncio
314
+ from collections import defaultdict
315
+ from bson import ObjectId
316
+
317
+ from app.core.pdf_processor import extract_text_from_pdf
318
+ from app.core.embedding_engine import embed_and_store, embedder, qdrant, COLLECTION_NAME
319
+ from qdrant_client.http.models import Filter, FieldCondition, MatchValue
320
+ from app.core.llm_engine import ask_gemini
321
+ from app.core.mongo import conversations
322
+ from qdrant_client import QdrantClient
323
+ from app.core.config import QDRANT_URL
324
+
325
+ router = APIRouter()
326
+ UPLOAD_DIR = "uploads"
327
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
328
+
329
+ # Qdrant Client
330
+ qdrant_client = QdrantClient(url=QDRANT_URL)
331
+
332
+ # In-memory (for fallback)
333
+ chat_histories = defaultdict(list)
334
+
335
+
336
+ # ---------------------------------------------------
337
+ # βœ… Upload endpoint
338
+ # ---------------------------------------------------
339
+ @router.post("/upload")
340
+ async def upload_pdf(file: UploadFile):
341
+ doc_id = str(uuid.uuid4())
342
+ file_path = os.path.join(UPLOAD_DIR, f"{doc_id}.pdf")
343
+
344
+ with open(file_path, "wb") as f:
345
+ f.write(await file.read())
346
+
347
+ text = extract_text_from_pdf(file_path)
348
+ chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
349
+ embed_and_store(chunks, doc_id)
350
+
351
+ # βœ… Create MongoDB record
352
+ conversations.insert_one({
353
+ "doc_id": doc_id,
354
+ "name": file.filename,
355
+ "file_path": file_path,
356
+ "qdrant_collection": COLLECTION_NAME,
357
+ "history": [],
358
+ "created_at": asyncio.get_event_loop().time(),
359
+ })
360
+
361
+ return {"doc_id": doc_id, "message": "PDF uploaded and processed successfully."}
362
+
363
+
364
+ # ---------------------------------------------------
365
+ # βœ… Streaming Upload endpoint (with Mongo insert)
366
+ # ---------------------------------------------------
367
+ @router.post("/upload-stream")
368
+ async def upload_stream(file: UploadFile):
369
+ doc_id = str(uuid.uuid4())
370
+ file_path = os.path.join(UPLOAD_DIR, f"{doc_id}.pdf")
371
+
372
+ with open(file_path, "wb") as f:
373
+ content = await file.read()
374
+ f.write(content)
375
+
376
+ async def generate_upload_events():
377
+ try:
378
+ yield f"data: {json.dumps({'status': 'βœ… File uploaded successfully. Starting processing...'})}\n\n"
379
+ await asyncio.sleep(0.5)
380
+
381
+ yield f"data: {json.dumps({'status': 'πŸ“„ Extracting text from PDF...'})}\n\n"
382
+ text = extract_text_from_pdf(file_path)
383
+ await asyncio.sleep(0.5)
384
+
385
+ yield f"data: {json.dumps({'status': '🧠 Generating embeddings...'})}\n\n"
386
+ chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
387
+ await asyncio.sleep(0.5)
388
+
389
+ yield f"data: {json.dumps({'status': 'πŸ’Ύ Storing vectors into database...'})}\n\n"
390
+ embed_and_store(chunks, doc_id)
391
+ await asyncio.sleep(0.5)
392
+
393
+ # βœ… Save chat info to MongoDB
394
+ conversations.insert_one({
395
+ "doc_id": doc_id,
396
+ "name": file.filename,
397
+ "file_path": file_path,
398
+ "qdrant_collection": COLLECTION_NAME,
399
+ "history": [],
400
+ "created_at": asyncio.get_event_loop().time(),
401
+ })
402
+
403
+ yield f"data: {json.dumps({'status': 'πŸŽ‰ Done! You’re good to go.', 'doc_id': doc_id})}\n\n"
404
+ yield "event: end\ndata: {}\n\n"
405
+
406
+ except Exception as e:
407
+ yield f"data: {json.dumps({'status': f'⚠️ Error: {str(e)}'})}\n\n"
408
+ yield "event: end\ndata: {}\n\n"
409
+
410
+ return StreamingResponse(generate_upload_events(), media_type="text/event-stream")
411
+
412
+
413
+ # ---------------------------------------------------
414
+ # βœ… Fetch All Chats (for Sidebar)
415
+ # ---------------------------------------------------
416
+ @router.get("/chats")
417
+ async def get_all_chats():
418
+ try:
419
+ chats = list(conversations.find({}, {"_id": 1, "name": 1, "doc_id": 1}))
420
+ for c in chats:
421
+ c["_id"] = str(c["_id"])
422
+ return {"chats": chats}
423
+ except Exception as e:
424
+ raise HTTPException(status_code=500, detail=str(e))
425
+
426
+
427
+ # ---------------------------------------------------
428
+ # βœ… Delete Chat (Qdrant + Mongo + File)
429
+ # ---------------------------------------------------
430
+
431
+ @router.delete("/chat/{chat_id}")
432
+ async def delete_chat(chat_id: str):
433
+ try:
434
+ chat = conversations.find_one({"_id": ObjectId(chat_id)})
435
+ if not chat:
436
+ raise HTTPException(status_code=404, detail="Chat not found")
437
+
438
+ doc_id = chat.get("doc_id")
439
+
440
+ # βœ… Delete embeddings from Qdrant
441
+ try:
442
+ qdrant.delete(
443
+ collection_name=COLLECTION_NAME,
444
+ points_selector=Filter(
445
+ must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
446
+ ),
447
+ )
448
+ except Exception as e:
449
+ print(f"⚠️ Qdrant delete failed: {e}")
450
+
451
+ # βœ… Delete uploaded PDF file
452
+ file_path = chat.get("file_path") or os.path.join("uploads", f"{doc_id}.pdf")
453
+ if os.path.exists(file_path):
454
+ os.remove(file_path)
455
+
456
+ # βœ… Delete from MongoDB
457
+ conversations.delete_one({"_id": ObjectId(chat_id)})
458
+
459
+ return {"status": "success", "message": "Chat and embeddings deleted successfully"}
460
+
461
+ except Exception as e:
462
+ raise HTTPException(status_code=500, detail=str(e))
463
+
464
+
465
+
466
+
467
+ # ---------------------------------------------------
468
+ # βœ… Chat Query Endpoint (Persistent Memory)
469
+ # ---------------------------------------------------
470
+ @router.post("/query")
471
+ async def query_pdf(doc_id: str = Form(...), question: str = Form(...)):
472
+ # βœ… Save user message
473
+ conversations.update_one(
474
+ {"doc_id": doc_id},
475
+ {"$push": {"history": {"role": "user", "content": question}}},
476
+ upsert=True
477
+ )
478
+
479
+ # βœ… Retrieve history
480
+ doc = conversations.find_one({"doc_id": doc_id})
481
+ history_list = doc["history"][-10:] if doc and "history" in doc else []
482
+
483
+ structured_history = "\n".join(
484
+ [f"{h['role'].title()}: {h['content']}" for h in history_list]
485
+ )
486
+
487
+ # βœ… Vector Search
488
+ question_vector = embedder.encode([question])[0].tolist()
489
+ hits = qdrant.search(
490
+ collection_name=COLLECTION_NAME,
491
+ query_vector=question_vector,
492
+ query_filter=Filter(
493
+ must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
494
+ ),
495
+ limit=5,
496
+ )
497
+ context = "\n".join([hit.payload["text"] for hit in hits])
498
+ full_context = f"{structured_history}\n\n{context}"
499
+
500
+ # βœ… LLM Response
501
+ answer = ask_gemini(full_context, question)
502
+
503
+ # βœ… Save assistant response
504
+ conversations.update_one(
505
+ {"doc_id": doc_id},
506
+ {"$push": {"history": {"role": "assistant", "content": answer}}},
507
+ upsert=True
508
+ )
509
+
510
+ return {
511
+ "answer": answer,
512
+ "context_used": context,
513
+ "history_count": len(history_list)
514
+ }
515
+
516
+
517
+
518
+ @router.get("/conversations/{doc_id}")
519
+ async def get_conversation(doc_id: str):
520
+ doc = conversations.find_one({"doc_id": doc_id})
521
+ if not doc:
522
+ return {"history": []}
523
+ return {"history": doc.get("history", [])}
524
+