pluto90 commited on
Commit
cc30e2f
Β·
verified Β·
1 Parent(s): 198eec5

Update app/api/routes.py

Browse files
Files changed (1) hide show
  1. app/api/routes.py +302 -18
app/api/routes.py CHANGED
@@ -1,5 +1,239 @@
1
- # app/api/routes.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
 
 
 
3
  from fastapi import APIRouter, UploadFile, Form, HTTPException
4
  from fastapi.responses import StreamingResponse
5
  import uuid, os, json, asyncio
@@ -9,11 +243,13 @@ from bson import ObjectId
9
  from app.core.pdf_processor import extract_text_from_pdf
10
  from app.core.embedding_engine import embed_and_store, embedder, qdrant, COLLECTION_NAME
11
  from qdrant_client.http.models import Filter, FieldCondition, MatchValue
12
- from app.core.llm_engine import ask_gemini
13
  from app.core.mongo import conversations
14
  from qdrant_client import QdrantClient
15
  from app.core.config import QDRANT_URL, QDRANT_API_KEY
16
 
 
 
17
  router = APIRouter()
18
  UPLOAD_DIR = "uploads"
19
  os.makedirs(UPLOAD_DIR, exist_ok=True)
@@ -162,6 +398,12 @@ async def delete_chat(chat_id: str):
162
  # ---------------------------------------------------
163
  @router.post("/query")
164
  async def query_pdf(doc_id: str = Form(...), question: str = Form(...)):
 
 
 
 
 
 
165
  # βœ… Save user message
166
  conversations.update_one(
167
  {"doc_id": doc_id},
@@ -178,23 +420,61 @@ async def query_pdf(doc_id: str = Form(...), question: str = Form(...)):
178
  )
179
 
180
  # βœ… Vector Search
181
- question_vector = embedder.encode([question])[0].tolist()
182
 
183
- hits = qdrant_client.query_points(
184
- collection_name=COLLECTION_NAME,
185
- query=question_vector,
186
- query_filter=Filter(
187
- must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
188
- ),
189
- limit=5,
190
- ).points
 
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
- context = "\n".join([hit.payload["text"] for hit in hits])
194
- full_context = f"{structured_history}\n\n{context}"
195
 
196
- # βœ… LLM Response
197
- answer = ask_gemini(full_context, question)
198
 
199
  # βœ… Save assistant response
200
  conversations.update_one(
@@ -202,17 +482,21 @@ async def query_pdf(doc_id: str = Form(...), question: str = Form(...)):
202
  {"$push": {"history": {"role": "assistant", "content": answer}}},
203
  upsert=True
204
  )
205
-
206
  return {
207
  "answer": answer,
208
- "context_used": context,
 
 
209
  "history_count": len(history_list)
210
  }
211
 
212
 
 
213
  @router.get("/conversations/{doc_id}")
214
  async def get_conversation(doc_id: str):
215
  doc = conversations.find_one({"doc_id": doc_id})
216
  if not doc:
217
  return {"history": []}
218
- return {"history": doc.get("history", [])}
 
 
1
+ # # app/api/routes.py
2
+
3
+ # from fastapi import APIRouter, UploadFile, Form, HTTPException
4
+ # from fastapi.responses import StreamingResponse
5
+ # import uuid, os, json, asyncio
6
+ # from collections import defaultdict
7
+ # from bson import ObjectId
8
+
9
+ # from app.core.pdf_processor import extract_text_from_pdf
10
+ # from app.core.embedding_engine import embed_and_store, embedder, qdrant, COLLECTION_NAME
11
+ # from qdrant_client.http.models import Filter, FieldCondition, MatchValue
12
+ # from app.core.llm_engine import ask_gemini
13
+ # from app.core.mongo import conversations
14
+ # from qdrant_client import QdrantClient
15
+ # from app.core.config import QDRANT_URL, QDRANT_API_KEY
16
+
17
+ # router = APIRouter()
18
+ # UPLOAD_DIR = "uploads"
19
+ # os.makedirs(UPLOAD_DIR, exist_ok=True)
20
+
21
+ # # Qdrant Client
22
+ # qdrant_client = QdrantClient(
23
+ # url=QDRANT_URL,
24
+ # api_key=QDRANT_API_KEY,
25
+ # check_compatibility=False)
26
+
27
+ # # In-memory (for fallback)
28
+ # chat_histories = defaultdict(list)
29
+
30
+
31
+ # # ---------------------------------------------------
32
+ # # βœ… Upload endpoint
33
+ # # ---------------------------------------------------
34
+ # @router.post("/upload")
35
+ # async def upload_pdf(file: UploadFile):
36
+ # doc_id = str(uuid.uuid4())
37
+ # file_path = os.path.join(UPLOAD_DIR, f"{doc_id}.pdf")
38
+
39
+ # with open(file_path, "wb") as f:
40
+ # f.write(await file.read())
41
+
42
+ # text = extract_text_from_pdf(file_path)
43
+ # chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
44
+ # embed_and_store(chunks, doc_id)
45
+
46
+ # # βœ… Create MongoDB record
47
+ # conversations.insert_one({
48
+ # "doc_id": doc_id,
49
+ # "name": file.filename,
50
+ # "file_path": file_path,
51
+ # "qdrant_collection": COLLECTION_NAME,
52
+ # "history": [],
53
+ # "created_at": asyncio.get_event_loop().time(),
54
+ # })
55
+
56
+ # return {"doc_id": doc_id, "message": "PDF uploaded and processed successfully."}
57
+
58
+
59
+ # # ---------------------------------------------------
60
+ # # βœ… Streaming Upload endpoint (with Mongo insert)
61
+ # # ---------------------------------------------------
62
+ # @router.post("/upload-stream")
63
+ # async def upload_stream(file: UploadFile):
64
+ # doc_id = str(uuid.uuid4())
65
+ # file_path = os.path.join(UPLOAD_DIR, f"{doc_id}.pdf")
66
+
67
+ # with open(file_path, "wb") as f:
68
+ # content = await file.read()
69
+ # f.write(content)
70
+
71
+ # async def generate_upload_events():
72
+ # try:
73
+ # yield f"data: {json.dumps({'status': 'βœ… File uploaded successfully. Starting processing...'})}\n\n"
74
+ # await asyncio.sleep(0.5)
75
+
76
+ # yield f"data: {json.dumps({'status': 'πŸ“„ Extracting text from PDF...'})}\n\n"
77
+ # text = extract_text_from_pdf(file_path)
78
+ # await asyncio.sleep(0.5)
79
+
80
+ # yield f"data: {json.dumps({'status': '🧠 Generating embeddings...'})}\n\n"
81
+ # chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
82
+ # await asyncio.sleep(0.5)
83
+
84
+ # yield f"data: {json.dumps({'status': 'πŸ’Ύ Storing vectors into database...'})}\n\n"
85
+ # embed_and_store(chunks, doc_id)
86
+ # await asyncio.sleep(0.5)
87
+
88
+ # # βœ… Save chat info to MongoDB
89
+ # conversations.insert_one({
90
+ # "doc_id": doc_id,
91
+ # "name": file.filename,
92
+ # "file_path": file_path,
93
+ # "qdrant_collection": COLLECTION_NAME,
94
+ # "history": [],
95
+ # "created_at": asyncio.get_event_loop().time(),
96
+ # })
97
+
98
+ # yield f"data: {json.dumps({'status': 'πŸŽ‰ Done! You’re good to go.', 'doc_id': doc_id})}\n\n"
99
+ # yield "event: end\ndata: {}\n\n"
100
+
101
+ # except Exception as e:
102
+ # yield f"data: {json.dumps({'status': f'⚠️ Error: {str(e)}'})}\n\n"
103
+ # yield "event: end\ndata: {}\n\n"
104
+
105
+ # return StreamingResponse(generate_upload_events(), media_type="text/event-stream")
106
+
107
+
108
+ # # ---------------------------------------------------
109
+ # # βœ… Fetch All Chats (for Sidebar)
110
+ # # ---------------------------------------------------
111
+ # @router.get("/chats")
112
+ # async def get_all_chats():
113
+ # try:
114
+ # chats = list(conversations.find({}, {"_id": 1, "name": 1, "doc_id": 1}))
115
+ # for c in chats:
116
+ # c["_id"] = str(c["_id"])
117
+ # return {"chats": chats}
118
+ # except Exception as e:
119
+ # raise HTTPException(status_code=500, detail=str(e))
120
+
121
+
122
+ # # ---------------------------------------------------
123
+ # # βœ… Delete Chat (Qdrant + Mongo + File)
124
+ # # ---------------------------------------------------
125
+ # @router.delete("/chat/{chat_id}")
126
+ # async def delete_chat(chat_id: str):
127
+ # try:
128
+ # chat = conversations.find_one({"_id": ObjectId(chat_id)})
129
+ # if not chat:
130
+ # raise HTTPException(status_code=404, detail="Chat not found")
131
+
132
+ # doc_id = chat.get("doc_id")
133
+
134
+ # # βœ… Delete embeddings from Qdrant
135
+ # try:
136
+ # qdrant.delete(
137
+ # collection_name=COLLECTION_NAME,
138
+ # points_selector=Filter(
139
+ # must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
140
+ # ),
141
+ # )
142
+ # except Exception as e:
143
+ # print(f"⚠️ Qdrant delete failed: {e}")
144
+
145
+ # # βœ… Delete uploaded PDF file
146
+ # file_path = chat.get("file_path") or os.path.join("uploads", f"{doc_id}.pdf")
147
+ # if os.path.exists(file_path):
148
+ # os.remove(file_path)
149
+
150
+ # # βœ… Delete from MongoDB
151
+ # conversations.delete_one({"_id": ObjectId(chat_id)})
152
+
153
+ # return {"status": "success", "message": "Chat and embeddings deleted successfully"}
154
+
155
+ # except Exception as e:
156
+ # raise HTTPException(status_code=500, detail=str(e))
157
+
158
+
159
+
160
+ # # ---------------------------------------------------
161
+ # # βœ… Chat Query Endpoint (Persistent Memory)
162
+ # # ---------------------------------------------------
163
+ # @router.post("/query")
164
+ # async def query_pdf(doc_id: str = Form(...), question: str = Form(...)):
165
+ # # βœ… Save user message
166
+ # conversations.update_one(
167
+ # {"doc_id": doc_id},
168
+ # {"$push": {"history": {"role": "user", "content": question}}},
169
+ # upsert=True
170
+ # )
171
+
172
+ # # βœ… Retrieve history
173
+ # doc = conversations.find_one({"doc_id": doc_id})
174
+ # history_list = doc["history"][-10:] if doc and "history" in doc else []
175
+
176
+ # structured_history = "\n".join(
177
+ # [f"{h['role'].title()}: {h['content']}" for h in history_list]
178
+ # )
179
+
180
+ # # βœ… Vector Search
181
+ # question_vector = embedder.encode([question])[0].tolist()
182
+
183
+ # hits = qdrant_client.query_points(
184
+ # collection_name=COLLECTION_NAME,
185
+ # query=question_vector,
186
+ # query_filter=Filter(
187
+ # must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
188
+ # ),
189
+ # limit=5,
190
+ # ).points
191
+
192
+
193
+ # context = "\n".join([hit.payload["text"] for hit in hits])
194
+ # full_context = f"{structured_history}\n\n{context}"
195
+
196
+ # # βœ… LLM Response
197
+ # answer = ask_gemini(full_context, question)
198
+
199
+ # # βœ… Save assistant response
200
+ # conversations.update_one(
201
+ # {"doc_id": doc_id},
202
+ # {"$push": {"history": {"role": "assistant", "content": answer}}},
203
+ # upsert=True
204
+ # )
205
+
206
+ # return {
207
+ # "answer": answer,
208
+ # "context_used": context,
209
+ # "history_count": len(history_list)
210
+ # }
211
+
212
+
213
+ # @router.get("/conversations/{doc_id}")
214
+ # async def get_conversation(doc_id: str):
215
+ # doc = conversations.find_one({"doc_id": doc_id})
216
+ # if not doc:
217
+ # return {"history": []}
218
+ # return {"history": doc.get("history", [])}
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+
227
+
228
+
229
+
230
+
231
+
232
+
233
 
234
+
235
+
236
+ # app/api/routes.py
237
  from fastapi import APIRouter, UploadFile, Form, HTTPException
238
  from fastapi.responses import StreamingResponse
239
  import uuid, os, json, asyncio
 
243
  from app.core.pdf_processor import extract_text_from_pdf
244
  from app.core.embedding_engine import embed_and_store, embedder, qdrant, COLLECTION_NAME
245
  from qdrant_client.http.models import Filter, FieldCondition, MatchValue
246
+ # from app.core.llm_engine import ask_gemini
247
  from app.core.mongo import conversations
248
  from qdrant_client import QdrantClient
249
  from app.core.config import QDRANT_URL, QDRANT_API_KEY
250
 
251
+ from app.graph.graph_builder import build_graph
252
+
253
  router = APIRouter()
254
  UPLOAD_DIR = "uploads"
255
  os.makedirs(UPLOAD_DIR, exist_ok=True)
 
398
  # ---------------------------------------------------
399
  @router.post("/query")
400
  async def query_pdf(doc_id: str = Form(...), question: str = Form(...)):
401
+
402
+ print("API DEBUG β†’ doc_id:", doc_id)
403
+ print("API DEBUG β†’ question:", question)
404
+
405
+
406
+
407
  # βœ… Save user message
408
  conversations.update_one(
409
  {"doc_id": doc_id},
 
420
  )
421
 
422
  # βœ… Vector Search
423
+ # question_vector = embedder.encode([question])[0].tolist()
424
 
425
+ # hits = qdrant_client.query_points(
426
+ # collection_name=COLLECTION_NAME,
427
+ # query=question_vector,
428
+ # query_filter=Filter(
429
+ # must=[FieldCondition(key="doc_id", match=MatchValue(value=doc_id))]
430
+ # ),
431
+ # limit=5,
432
+ # ).points
433
+
434
 
435
+ # context = "\n".join([hit.payload["text"] for hit in hits])
436
+ # full_context = f"{structured_history}\n\n{context}"
437
+
438
+ # # βœ… LLM Response
439
+ # answer = ask_gemini(full_context, question)
440
+
441
+ graph= build_graph()
442
+
443
+ print("FINAL β†’ sending to graph:", {
444
+ "query": question,
445
+ "doc_id": doc_id
446
+ })
447
+
448
+ # Run LangGraph
449
+ # result = graph.invoke({
450
+ # "query": question,
451
+ # "doc_id": doc_id
452
+ # })
453
+
454
+
455
+
456
+ initial_state = {
457
+ "query": str(question),
458
+ "doc_id": str(doc_id),
459
+ "history": structured_history,
460
+ "route": None,
461
+ "context": None,
462
+ "final_answer": None
463
+ }
464
+
465
+ print("FINAL STATE β†’", initial_state)
466
+
467
+ result = graph.invoke(initial_state)
468
+
469
+
470
+
471
+
472
+ answer = result["final_answer"]
473
+ context = result.get("context", "")
474
+
475
+ # ------------
476
 
 
 
477
 
 
 
478
 
479
  # βœ… Save assistant response
480
  conversations.update_one(
 
482
  {"$push": {"history": {"role": "assistant", "content": answer}}},
483
  upsert=True
484
  )
485
+ print("EVALUATION β†’", result.get("evaluation"))
486
  return {
487
  "answer": answer,
488
+ # "context_used": context,
489
+ "sources": result.get("sources", []),
490
+ "evaluation": result.get("evaluation", []),
491
  "history_count": len(history_list)
492
  }
493
 
494
 
495
+
496
  @router.get("/conversations/{doc_id}")
497
  async def get_conversation(doc_id: str):
498
  doc = conversations.find_one({"doc_id": doc_id})
499
  if not doc:
500
  return {"history": []}
501
+ return {"history": doc.get("history", [])}
502
+