pluto90 commited on
Commit
77742f7
Β·
verified Β·
1 Parent(s): 7a39123

Upload routes.py

Browse files
Files changed (1) hide show
  1. app/api/routes.py +266 -0
app/api/routes.py CHANGED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/api/routes.py
2
+
3
+ from fastapi import APIRouter, UploadFile, Form, HTTPException
4
+ from fastapi.responses import StreamingResponse
5
+ import uuid, os, json, asyncio
6
+ from collections import defaultdict
7
+ from bson import ObjectId
8
+
9
+ from app.core.pdf_processor import extract_text_from_pdf
10
+ from app.core.embedding_engine import embed_and_store, embedder, qdrant, COLLECTION_NAME
11
+ from qdrant_client.http.models import Filter, FieldCondition, MatchValue
12
+ from app.core.llm_engine import ask_gemini
13
+ from app.core.mongo import conversations
14
+ from qdrant_client import QdrantClient
15
+ from app.core.config import QDRANT_URL, QDRANT_API_KEY
16
+
17
+ router = APIRouter()
18
+ UPLOAD_DIR = "uploads"
19
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
20
+
21
+ # Qdrant Client
22
+ qdrant_client = QdrantClient(
23
+ url=QDRANT_URL,
24
+ api_key=QDRANT_API_KEY,
25
+ check_compatibility=False)
26
+
27
+ # In-memory (for fallback)
28
+ chat_histories = defaultdict(list)
29
+
30
+
31
+ # ---------------------------------------------------
32
+ # βœ… Upload endpoint
33
+ # ---------------------------------------------------
34
+ @router.post("/upload")
35
+ async def upload_pdf(file: UploadFile):
36
+ doc_id = str(uuid.uuid4())
37
+ file_path = os.path.join(UPLOAD_DIR, f"{doc_id}.pdf")
38
+
39
+ with open(file_path, "wb") as f:
40
+ f.write(await file.read())
41
+
42
+ text = extract_text_from_pdf(file_path)
43
+ chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
44
+ embed_and_store(chunks, doc_id)
45
+
46
+ # βœ… Create MongoDB record
47
+ conversations.insert_one({
48
+ "doc_id": doc_id,
49
+ "name": file.filename,
50
+ "file_path": file_path,
51
+ "qdrant_collection": COLLECTION_NAME,
52
+ "history": [],
53
+ "created_at": asyncio.get_event_loop().time(),
54
+ })
55
+
56
+ return {"doc_id": doc_id, "message": "PDF uploaded and processed successfully."}
57
+
58
+
59
+ # ---------------------------------------------------
60
+ # βœ… Streaming Upload endpoint (with Mongo insert)
61
+ # ---------------------------------------------------
62
+ @router.post("/upload-stream")
63
+ async def upload_stream(file: UploadFile):
64
+ doc_id = str(uuid.uuid4())
65
+ file_path = os.path.join(UPLOAD_DIR, f"{doc_id}.pdf")
66
+
67
+ with open(file_path, "wb") as f:
68
+ content = await file.read()
69
+ f.write(content)
70
+
71
+ async def generate_upload_events():
72
+ try:
73
+ yield f"data: {json.dumps({'status': 'βœ… File uploaded successfully. Starting processing...'})}\n\n"
74
+ await asyncio.sleep(0.5)
75
+
76
+ yield f"data: {json.dumps({'status': 'πŸ“„ Extracting text from PDF...'})}\n\n"
77
+ text = extract_text_from_pdf(file_path)
78
+ await asyncio.sleep(0.5)
79
+
80
+ yield f"data: {json.dumps({'status': '🧠 Generating embeddings...'})}\n\n"
81
+ chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
82
+ await asyncio.sleep(0.5)
83
+
84
+ yield f"data: {json.dumps({'status': 'πŸ’Ύ Storing vectors into database...'})}\n\n"
85
+ embed_and_store(chunks, doc_id)
86
+ await asyncio.sleep(0.5)
87
+
88
+ # βœ… Save chat info to MongoDB
89
+ conversations.insert_one({
90
+ "doc_id": doc_id,
91
+ "name": file.filename,
92
+ "file_path": file_path,
93
+ "qdrant_collection": COLLECTION_NAME,
94
+ "history": [],
95
+ "created_at": asyncio.get_event_loop().time(),
96
+ })
97
+
98
+ yield f"data: {json.dumps({'status': 'πŸŽ‰ Done! You’re good to go.', 'doc_id': doc_id})}\n\n"
99
+ yield "event: end\ndata: {}\n\n"
100
+
101
+ except Exception as e:
102
+ yield f"data: {json.dumps({'status': f'⚠️ Error: {str(e)}'})}\n\n"
103
+ yield "event: end\ndata: {}\n\n"
104
+
105
+ return StreamingResponse(generate_upload_events(), media_type="text/event-stream")
106
+
107
+
108
+ # ---------------------------------------------------
109
+ # βœ… Fetch All Chats (for Sidebar)
110
+ # ---------------------------------------------------
111
+ @router.get("/chats")
112
+ async def get_all_chats():
113
+ try:
114
+ chats = list(conversations.find({}, {"_id": 1, "name": 1, "doc_id": 1}))
115
+ for c in chats:
116
+ c["_id"] = str(c["_id"])
117
+ return {"chats": chats}
118
+ except Exception as e:
119
+ raise HTTPException(status_code=500, detail=str(e))
120
+
121
+
122
+ # ---------------------------------------------------
123
+ # βœ… Delete Chat (Qdrant + Mongo + File)
124
+ # ---------------------------------------------------
125
+ # @router.delete("/chat/{chat_id}")
126
+ # async def delete_chat(chat_id: str):
127
+ # try:
128
+ # chat = conversations.find_one({"_id": ObjectId(chat_id)})
129
+ # if not chat:
130
+ # raise HTTPException(status_code=404, detail="Chat not found")
131
+
132
+ # doc_id = chat.get("doc_id")
133
+ # qdrant_collection = chat.get("qdrant_collection")
134
+
135
+ # # βœ… Delete embeddings from Qdrant
136
+ # try:
137
+ # qdrant_client.delete(
138
+ # collection_name=qdrant_collection,
139
+ # points_selector=Filter(
140
+ # must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
141
+ # ),
142
+ # )
143
+ # except Exception as e:
144
+ # print(f"⚠️ Qdrant delete failed: {e}")
145
+
146
+ # # βœ… Delete uploaded file
147
+ # file_path = chat.get("file_path")
148
+ # if file_path and os.path.exists(file_path):
149
+ # os.remove(file_path)
150
+
151
+ # # βœ… Delete from MongoDB
152
+ # conversations.delete_one({"_id": ObjectId(chat_id)})
153
+
154
+ # return {"status": "success", "message": "Chat deleted successfully"}
155
+
156
+ # except Exception as e:
157
+ # raise HTTPException(status_code=500, detail=str(e))
158
+
159
+
160
+
161
+ @router.delete("/chat/{chat_id}")
162
+ async def delete_chat(chat_id: str):
163
+ try:
164
+ chat = conversations.find_one({"_id": ObjectId(chat_id)})
165
+ if not chat:
166
+ raise HTTPException(status_code=404, detail="Chat not found")
167
+
168
+ doc_id = chat.get("doc_id")
169
+
170
+ # βœ… Delete embeddings from Qdrant
171
+ try:
172
+ qdrant.delete(
173
+ collection_name=COLLECTION_NAME,
174
+ points_selector=Filter(
175
+ must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
176
+ ),
177
+ )
178
+ except Exception as e:
179
+ print(f"⚠️ Qdrant delete failed: {e}")
180
+
181
+ # βœ… Delete uploaded PDF file
182
+ file_path = chat.get("file_path") or os.path.join("uploads", f"{doc_id}.pdf")
183
+ if os.path.exists(file_path):
184
+ os.remove(file_path)
185
+
186
+ # βœ… Delete from MongoDB
187
+ conversations.delete_one({"_id": ObjectId(chat_id)})
188
+
189
+ return {"status": "success", "message": "Chat and embeddings deleted successfully"}
190
+
191
+ except Exception as e:
192
+ raise HTTPException(status_code=500, detail=str(e))
193
+
194
+
195
+
196
+
197
+ # ---------------------------------------------------
198
+ # βœ… Chat Query Endpoint (Persistent Memory)
199
+ # ---------------------------------------------------
200
+ @router.post("/query")
201
+ async def query_pdf(doc_id: str = Form(...), question: str = Form(...)):
202
+ # βœ… Save user message
203
+ conversations.update_one(
204
+ {"doc_id": doc_id},
205
+ {"$push": {"history": {"role": "user", "content": question}}},
206
+ upsert=True
207
+ )
208
+
209
+ # βœ… Retrieve history
210
+ doc = conversations.find_one({"doc_id": doc_id})
211
+ history_list = doc["history"][-10:] if doc and "history" in doc else []
212
+
213
+ structured_history = "\n".join(
214
+ [f"{h['role'].title()}: {h['content']}" for h in history_list]
215
+ )
216
+
217
+ # βœ… Vector Search
218
+ question_vector = embedder.encode([question])[0].tolist()
219
+ # hits = qdrant.search(
220
+ # collection_name=COLLECTION_NAME,
221
+ # query_vector=question_vector,
222
+ # query_filter=Filter(
223
+ # must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
224
+ # ),
225
+ # limit=5,
226
+ # )
227
+
228
+
229
+ hits = qdrant_client.query_points(
230
+ collection_name=COLLECTION_NAME,
231
+ query=question_vector,
232
+ query_filter=Filter(
233
+ must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
234
+ ),
235
+ limit=5,
236
+ ).points
237
+
238
+
239
+ context = "\n".join([hit.payload["text"] for hit in hits])
240
+ full_context = f"{structured_history}\n\n{context}"
241
+
242
+ # βœ… LLM Response
243
+ answer = ask_gemini(full_context, question)
244
+
245
+ # βœ… Save assistant response
246
+ conversations.update_one(
247
+ {"doc_id": doc_id},
248
+ {"$push": {"history": {"role": "assistant", "content": answer}}},
249
+ upsert=True
250
+ )
251
+
252
+ return {
253
+ "answer": answer,
254
+ "context_used": context,
255
+ "history_count": len(history_list)
256
+ }
257
+
258
+
259
+
260
+ @router.get("/conversations/{doc_id}")
261
+ async def get_conversation(doc_id: str):
262
+ doc = conversations.find_one({"doc_id": doc_id})
263
+ if not doc:
264
+ return {"history": []}
265
+ return {"history": doc.get("history", [])}
266
+