itsmehardawood commited on
Commit
a249293
·
1 Parent(s): 3fe718a

Deployment

Browse files
.env ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ PAYPAL_CLIENT_ID = "AQL2cqJbTd3yoVyOk7fGhwsvH0vfZKi8jHz0RlA40ZyZ0N8pJZZ_A7KuzxMr7w6oiKDGlz44EYVl05qs"
2
+ PAYPAL_CLIENT_ID_sn = "AawVJHHptCzgOKPLacd0VOhziveuy63nA7M35HT--9U5BgkWzixkQSI4vRUvPlMnVG8nGocn8muw6QAC"
3
+ PAYPAL_CLIENT_SECRET = "ENhmJb_su9sjwh5ti4yLkLwW3GR0SYjTVrvZHjecBwM_DPoVWJX1GUaT6Nb2TGj-eEUXBqREFSYQiIDt"
4
+ PAYPAL_CLIENT_SECRET_sn = "ECXLFuvC5aAgC5wDFhSM1CTXh4M0MIJ4UsV6CagqZlp5FY9cGY1qNql7-dmaylp4mG0WqC3M-wUsqvgM"
5
+ PAYPAL_ENVIRONMENT = "sandbox"
App/__pycache__/main.cpython-312.pyc ADDED
Binary file (39.1 kB). View file
 
App/__pycache__/utils.cpython-312.pyc ADDED
Binary file (14.2 kB). View file
 
App/main.py ADDED
@@ -0,0 +1,1061 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import base64
4
+ import bcrypt
5
+ from datetime import datetime, timedelta , timezone
6
+ import io
7
+ from typing import Optional , Literal
8
+ from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Query as QueryParam , Request
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from pydantic import BaseModel, EmailStr, Field , HttpUrl
11
+ from jose import jwt
12
+ from motor.motor_asyncio import AsyncIOMotorClient
13
+ from bson.binary import Binary
14
+ from fastapi import status
15
+ # from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound
16
+ from App.utils import (
17
+ load_and_split_bytes,
18
+ add_documents_to_index,
19
+ get_llm,
20
+ get_existing_retriever,
21
+ get_collection_stats ,
22
+ build_chroma_index ,
23
+ create_rag_chain_with_history ,
24
+ count_tokens ,
25
+ search_youtube_video ,
26
+ cosine_similarity
27
+ )
28
+ from bson import ObjectId
29
+ import openai
30
+ import numpy as np
31
+ import logging
32
+ from langchain_openai import OpenAIEmbeddings
33
+ import os
34
+ from dotenv import load_dotenv
35
+ from paypalcheckoutsdk.core import PayPalHttpClient, SandboxEnvironment, LiveEnvironment
36
+ from paypalcheckoutsdk.orders import OrdersCreateRequest, OrdersCaptureRequest
37
+
38
+
39
+ load_dotenv()
40
+
41
+ env_name = os.getenv("PAYPAL_ENVIRONMENT", "sandbox")
42
+ creds = dict(
43
+ client_id = os.getenv("PAYPAL_CLIENT_ID_sn"),
44
+ client_secret = os.getenv("PAYPAL_CLIENT_SECRET_sn"),
45
+ )
46
+ environment = (
47
+ LiveEnvironment(**creds)
48
+ if env_name == "live"
49
+ else SandboxEnvironment(**creds)
50
+ )
51
+ p_client = PayPalHttpClient(environment)
52
+
53
+
54
+
55
+ # Configuration for JWT
56
+ SECRET_KEY = "hssjhdahsd"
57
+ ALGORITHM = "HS256"
58
+ ACCESS_TOKEN_EXPIRE_MINUTES = 30
59
+
60
+ app = FastAPI()
61
+ app.add_middleware(
62
+ CORSMiddleware,
63
+ allow_origins=["*" , 'http://localhost:3000'],
64
+ allow_credentials=True,
65
+ allow_methods=["*"],
66
+ allow_headers=["*"],
67
+ )
68
+
69
+ # MongoDB connection
70
+ connection_string = os.getenv(
71
+ "MONGODB_URI",
72
+ "mongodb+srv://ahmed0499280:haseeb.2003@cluster0.hzgrxp2.mongodb.net/"
73
+ "?retryWrites=true&w=majority&appName=Cluster0"
74
+ )
75
+ client = AsyncIOMotorClient(connection_string)
76
+ db = client["Cluster0"]
77
+ users_collection = db["users"]
78
+ chatbot_history_collection = db["chatbothistory"]
79
+ documents_collection = db["documents"] # Collection to store document files
80
+ chroma_db_collection = db["chroma_db_store"] # Collection to store Chroma DB
81
+ flash_cards = db['flash_cards']
82
+ videos_collection = db['videos']
83
+ orders_collection = db["orders"]
84
+ subscriptions_collection = db["subscriptions"]
85
+ # Default language setting
86
+ language = None
87
+
88
+ # Pydantic models
89
+ class CreateSubOrderSchema(BaseModel):
90
+ user_id: str
91
+ plan: Literal["monthly", "yearly"]
92
+
93
+ class TrialResponse(BaseModel):
94
+ status: str
95
+ expires: datetime
96
+
97
+ class OrderResponse(BaseModel):
98
+ order_id: str
99
+ status: str
100
+
101
+ class SubscriptionRequest(BaseModel):
102
+ user_id: str
103
+ plan: str
104
+
105
+ class SearchRequest(BaseModel):
106
+ query: str
107
+
108
+
109
+ class Video(BaseModel):
110
+ link: str
111
+ description: str
112
+
113
+ class LanguageRequest(BaseModel):
114
+ language: str
115
+
116
+ class QueryModel(BaseModel):
117
+ question: str
118
+ user_id: str
119
+ diacritics:bool
120
+ level : str
121
+
122
+ class UserSignup(BaseModel):
123
+ username: str = Field(..., min_length=3, max_length=50)
124
+ email: EmailStr
125
+ password: str = Field(..., min_length=8)
126
+ language: str
127
+ is_admin : bool
128
+
129
+
130
+ class UserModel(BaseModel):
131
+ id: str
132
+ username: str
133
+ email: str
134
+ language: str
135
+ is_admin: bool
136
+ password: Optional[str] = None
137
+
138
+
139
+ class UserResponse(BaseModel):
140
+ id: str
141
+ username: str
142
+ email: str
143
+
144
+ class UserLogin(BaseModel):
145
+ email: EmailStr
146
+ password: str
147
+
148
+ class Token(BaseModel):
149
+ access_token: str
150
+ token_type: str
151
+
152
+ class DocumentInfo(BaseModel):
153
+ filename: str
154
+ upload_date: datetime
155
+ file_size: int
156
+ chunks: int
157
+ user_id: str
158
+
159
+ class FlashcardSaveModel(BaseModel):
160
+ user_id: str
161
+ question: str
162
+ answer: str
163
+
164
+ # Helper for login
165
+ async def authenticate_user(email: str, password: str):
166
+ user = await users_collection.find_one({"email": email})
167
+ if not user or not bcrypt.checkpw(password.encode(), user["password"].encode()):
168
+ return None
169
+ return user
170
+
171
+
172
+ @app.get("/users/{user_id}", response_model=UserModel)
173
+ async def get_user(user_id: str):
174
+ """
175
+ Fetch a single user by their user_id.
176
+ Supports lookup by MongoDB ObjectId or string _id.
177
+ """
178
+ # Build query: if user_id is a valid ObjectId, use that, else match string _id
179
+ try:
180
+ obj_id = ObjectId(user_id)
181
+ query = {"_id": obj_id}
182
+ except Exception:
183
+ query = {"_id": user_id}
184
+
185
+ user = await users_collection.find_one(query)
186
+ if not user:
187
+ raise HTTPException(status_code=404, detail="User not found")
188
+
189
+ # Prepare response: convert _id to id
190
+ user_data = {
191
+ "id": str(user.get("_id")),
192
+ "username": user.get("username"),
193
+ "email": user.get("email"),
194
+ "language": user.get("language"),
195
+ "is_admin": user.get("is_admin", False),
196
+ }
197
+ return user_data
198
+
199
+
200
+ # Initialize retriever at startup
201
+ @app.on_event("startup")
202
+ async def startup_event():
203
+ # Retrieve existing retriever if it exists
204
+ try:
205
+ retriever = get_existing_retriever()
206
+ if retriever:
207
+ app.state.retriever = retriever
208
+ stats = get_collection_stats()
209
+ print(f"Loaded existing vector database with {stats['document_count']} document chunks")
210
+ else:
211
+ app.state.retriever = None
212
+ print("No existing vector database found. Will create when documents are uploaded.")
213
+ except Exception as e:
214
+ print(f"Error loading vector database: {e}")
215
+ app.state.retriever = None
216
+
217
+ ### API Endpoints
218
+
219
+ # To get Language
220
+ @app.post("/language")
221
+ async def receive_language(req: LanguageRequest):
222
+ global language
223
+ language = req.language
224
+ return {'Message': f"Language is Selected to '{language}'"}
225
+
226
+ # To SignUp
227
+ @app.post("/signup", response_model=UserResponse, tags=["auth"])
228
+ async def signup(user: UserSignup):
229
+ if await users_collection.find_one({"email": user.email}):
230
+ raise HTTPException(status_code=400, detail="Email already registered")
231
+ salt = bcrypt.gensalt()
232
+ hashed = bcrypt.hashpw(user.password.encode(), salt).decode()
233
+ user_id = str(uuid.uuid4())
234
+ await users_collection.insert_one({
235
+ "_id": user_id,
236
+ "username": user.username,
237
+ "email": user.email,
238
+ "password": hashed,
239
+ "language": user.language ,
240
+ "is_admin" : False
241
+ })
242
+ print(user.language)
243
+ return {"id": user_id, "username": user.username, "email": user.email}
244
+
245
+ # TO Login
246
+ @app.post("/login", response_model=Token, tags=["auth"])
247
+ async def login(credentials: UserLogin):
248
+ user = await authenticate_user(credentials.email, credentials.password)
249
+ if not user:
250
+ raise HTTPException(status_code=401, detail="Invalid email or password")
251
+ expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
252
+ to_encode = {"sub": user["_id"], "exp": expire}
253
+ token = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
254
+ return {"access_token": token, "token_type": "bearer"}
255
+
256
+ # To Add Document to RAG - MongoDB storage for both document and Chroma
257
+ @app.post("/build_rag", tags=["rag"])
258
+ async def build_rag_endpoint(file: UploadFile = File(...), user_id: str = QueryParam()):
259
+ if not file.filename.endswith(".docx"):
260
+ raise HTTPException(status_code=400, detail="Only .docx files are supported.")
261
+
262
+ try:
263
+ # Read the file into memory
264
+ file_content = await file.read()
265
+ file_size = len(file_content)
266
+
267
+ # Generate a unique document ID
268
+ doc_id = str(uuid.uuid4())
269
+
270
+ # Process the document for RAG
271
+ temp_file = io.BytesIO(file_content)
272
+ docs = load_and_split_bytes(temp_file)
273
+
274
+ # Store document in MongoDB
275
+ doc_info = {
276
+ "_id": doc_id,
277
+ "filename": file.filename,
278
+ "upload_date": datetime.utcnow(),
279
+ "file_size": file_size,
280
+ "chunks": len(docs),
281
+ "user_id": user_id,
282
+ "file_content": Binary(file_content) # Store as Binary BSON type
283
+ }
284
+
285
+ await documents_collection.insert_one(doc_info)
286
+
287
+ # Add to or update the Chroma vector store and save to MongoDB
288
+ collection_name = "default"
289
+ app.state.retriever = add_documents_to_index(docs, collection_name=collection_name)
290
+
291
+ # Get updated stats
292
+ stats = get_collection_stats(collection_name)
293
+
294
+ return {
295
+ "message": f"File '{file.filename}' added to knowledge base.",
296
+ "document_id": doc_id,
297
+ "total_chunks_in_db": stats["document_count"]
298
+ }
299
+
300
+ except Exception as e:
301
+ raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
302
+
303
+ # To query RAG
304
+ @app.post("/query_rag", tags=["rag"])
305
+ async def query_rag_endpoint(payload: QueryModel):
306
+ now = datetime.now(timezone.utc)
307
+
308
+ subscription = await subscriptions_collection.find_one({
309
+ "user_id": payload.user_id,
310
+ "status": "active"
311
+ })
312
+
313
+ if not subscription:
314
+ raise HTTPException(status_code=403, detail="No active subscription found.")
315
+
316
+ end_date = subscription.get("end_date")
317
+ if end_date and end_date.tzinfo is None:
318
+ end_date = end_date.replace(tzinfo=timezone.utc)
319
+ if end_date and now > end_date:
320
+ # Update status to expired
321
+ await subscriptions_collection.update_one(
322
+ {"_id": subscription["_id"]},
323
+ {"$set": {"status": "expired"}}
324
+ )
325
+ raise HTTPException(status_code=403, detail="Subscription expired.")
326
+ llm = get_llm()
327
+ cl_promp =f'''You are a binary intent classifier. When given a user message, decide if the user is asking to play or watch a video—explicitly or implicitly.
328
+
329
+ Instructions:
330
+ 1. Analyze the provided message (`{payload.question}`) to determine intent:
331
+ - If the user intends to start, resume, or watch a video (e.g. “play the video now,” “let me see that demo,” “could you launch the tutorial clip?”), choose:
332
+ play_video
333
+ - Otherwise, choose:
334
+ other
335
+ 2. Output exactly one label (play_video or other), with no additional text, punctuation, or formatting.
336
+
337
+ Input:
338
+ {{message}}
339
+
340
+ Output:'''
341
+ cl = llm.invoke(cl_promp).content
342
+ print(cl)
343
+ if cl == 'play_video':
344
+ try:
345
+ # 1) Fetch up to 1000 docs (just the link+description)
346
+ docs = await videos_collection.find(
347
+ {},
348
+ {"link": 1, "description": 1, "_id": 0}
349
+ ).to_list(length=1000)
350
+
351
+ if not docs:
352
+ raise HTTPException(status_code=404, detail="No videos found")
353
+
354
+ # 2) Get descriptions as a list
355
+ descriptions = [doc["description"] for doc in docs]
356
+
357
+ # 3) Embed query using LangChain's OpenAIEmbeddings
358
+ query_embedding = embeddings_model.embed_query(payload.question)
359
+
360
+ # 4) Embed all descriptions (this is still inefficient but uses LangChain)
361
+ description_embeddings = embeddings_model.embed_documents(descriptions)
362
+
363
+ # 5) Compute similarities
364
+ similarities = [cosine_similarity(query_embedding, desc_emb)
365
+ for desc_emb in description_embeddings]
366
+
367
+ # 6) Pick best match
368
+ best_idx = int(np.argmax(similarities))
369
+ best_doc = docs[best_idx]
370
+ print(similarities[best_idx])
371
+ if similarities[best_idx] > 0.6:
372
+ return {
373
+ "answer" : "Here is video Based on your query",
374
+ "youtube": {
375
+ "embed_url": best_doc["link"],
376
+ "watch_url": ""
377
+ },
378
+ "transcript": ""
379
+ }
380
+ else:
381
+ return {
382
+ "answer" : "No Video found" ,
383
+ "youtube": {
384
+ "embed_url": "",
385
+ "watch_url": ""
386
+ },
387
+ "transcript": ""
388
+ }
389
+
390
+ except Exception as e:
391
+ logging.error(f"Error during video search: {str(e)}")
392
+ raise HTTPException(status_code=500, detail="Error processing search request")
393
+
394
+
395
+
396
+ # t_prompt = f'''You are a concise title generator. When given a user’s message, produce a short, keyword‑rich title that captures the core topic or intent—optimized for searching on YouTube.
397
+ # Instructions:
398
+ # 1. Read the provided message (`{payload.question}`).
399
+ # 2. Extract its main subject or action.
400
+ # 3. Craft a clear, descriptive title (5–8 words) suitable as a YouTube search query.
401
+ # 4. Output **exactly** the title, with no extra text or punctuation.
402
+
403
+ # Input:
404
+ # {{message}}
405
+
406
+ # Output:
407
+ # '''
408
+ # message_title = llm.invoke(t_prompt).content
409
+ # print(message_title)
410
+ # try:
411
+ # vid = search_youtube_video(message_title)
412
+ # try:
413
+ # transcript_data = YouTubeTranscriptApi.get_transcript(vid, languages=['en'])
414
+ # # transcript_data is a list of { "text": "...", "start": 12.34, "duration": 2.5 }
415
+ # except NoTranscriptFound:
416
+ # transcript_data = []
417
+ # except Exception:
418
+ # raise HTTPException(status_code=404, detail="Could not find a matching YouTube video.")
419
+ # embed_url = f"https://www.youtube.com/embed/{vid}"
420
+ # watch_url = f"https://www.youtube.com/watch?v={vid}"
421
+ # return {
422
+ # "answer": f"Sure—here’s what I found on YouTube for your request:",
423
+ # "youtube": {
424
+ # "embed_url": embed_url,
425
+ # "watch_url": watch_url
426
+ # },
427
+ # "transcript": transcript_data
428
+ # }
429
+ else:
430
+ # Use app.state.retriever which is initialized at startup or updated when docs are added
431
+ if not hasattr(app.state, "retriever") or app.state.retriever is None:
432
+ # Check if vector store exists but hasn't been loaded
433
+ retriever = get_existing_retriever()
434
+ if retriever:
435
+ app.state.retriever = retriever
436
+ else:
437
+ raise HTTPException(
438
+ status_code=400,
439
+ detail="No documents have been uploaded. Please upload documents using /build_rag first."
440
+ )
441
+
442
+ # Get user language preference
443
+ if language is None:
444
+ user_id = payload.user_id
445
+ user = await users_collection.find_one({"_id": user_id})
446
+ if user:
447
+ user_lan = user.get("language")
448
+ # else:
449
+ # user_lan = "Arabic" # Default to English
450
+ else:
451
+ user_lan = language
452
+ question_tokens = count_tokens(payload.question)
453
+ print(f"Question tokens: {question_tokens}")
454
+ retrieved_docs = app.state.retriever.get_relevant_documents(payload.question)
455
+ context_text = "\n\n".join([doc.page_content for doc in retrieved_docs])
456
+ context_tokens = count_tokens(context_text)
457
+ chat_history = await chatbot_history_collection.find_one({"userId": payload.user_id})
458
+ previous_messages = []
459
+
460
+ if chat_history and "messages" in chat_history:
461
+ history_limit = 10
462
+ recent_messages = chat_history["messages"][-history_limit:]
463
+
464
+ for msg in recent_messages:
465
+ previous_messages.append({"role": "human", "content": msg["user_message"]})
466
+ previous_messages.append({"role": "assistant", "content": msg["ai_response"]})
467
+ history_text = ""
468
+ for msg in previous_messages:
469
+ history_text += f"{msg['role']}: {msg['content']}\n"
470
+
471
+ history_tokens = count_tokens(history_text)
472
+ print(f"History tokens: {history_tokens}")
473
+
474
+ # Create RAG chain and generate response
475
+ llm = get_llm()
476
+ print("Diacritics : " , payload.diacritics)
477
+ print("Language : " ,user_lan)
478
+ rag_chain = create_rag_chain_with_history(app.state.retriever, llm, user_lan,payload.level ,payload.diacritics, previous_messages)
479
+ response = rag_chain.invoke({"input": payload.question})
480
+ response_tokens = count_tokens(response['answer'])
481
+ print(f"Response tokens: {response_tokens}")
482
+ print(f"Total tokens: {question_tokens + history_tokens + context_tokens}")
483
+ # Record chat history
484
+ msg_record = {
485
+ "user_message": payload.question,
486
+ "ai_response": response['answer'],
487
+ "timestamp": datetime.utcnow()
488
+ }
489
+
490
+ await chatbot_history_collection.update_one(
491
+ {"userId": payload.user_id},
492
+ {"$push": {"messages": msg_record}},
493
+ upsert=True
494
+ )
495
+
496
+ return {
497
+ "answer": response["answer"] ,
498
+ "youtube": {
499
+ "embed_url": "",
500
+ "watch_url": ""
501
+ },
502
+ "transcript": "" }
503
+
504
+
505
+ @app.post("/save_flashcard", tags=["flashcards"])
506
+ async def save_flashcard(payload: FlashcardSaveModel):
507
+ """
508
+ Save a chat exchange as a flashcard with LLM-generated title.
509
+ """
510
+ try:
511
+ # Generate title using LLM
512
+ # Get LLM instance
513
+ llm = get_llm()
514
+
515
+ # Create a prompt for title generation
516
+ title_prompt = f"""
517
+ Based on the following question and answer, generate a concise, descriptive title (15 words or less)
518
+ that captures the main topic or key insight. Make it specific enough that someone can understand
519
+ what information they'll find in this flashcard.
520
+
521
+ Question: {payload.question}
522
+
523
+ Answer: {payload.answer}
524
+
525
+ Title:
526
+ """
527
+
528
+ # Generate title using LLM
529
+ title_response = llm.invoke(title_prompt)
530
+
531
+ # Clean up the response (remove any quotation marks, extra spaces, etc.)
532
+ title = title_response.content.strip().strip('"\'').strip()
533
+
534
+ # If LLM fails to generate a good title, fall back to question snippet
535
+ if not title or len(title) > 100:
536
+ title = payload.question[:50] + ("..." if len(payload.question) > 50 else "")
537
+
538
+ # Create the flashcard document
539
+ flashcard_doc = {
540
+ "_id": str(uuid.uuid4()),
541
+ "user_id": payload.user_id,
542
+ "title": title,
543
+ "question": payload.question,
544
+ "answer": payload.answer,
545
+ "created_at": datetime.utcnow()
546
+ }
547
+
548
+ # Insert into flash_cards collection
549
+ await flash_cards.insert_one(flashcard_doc)
550
+
551
+ return {
552
+ "success": True,
553
+ "flashcard_id": flashcard_doc["_id"],
554
+ "title": title, # Return the generated title to the frontend
555
+ "message": "Flashcard saved successfully"
556
+ }
557
+ except Exception as e:
558
+ print(f"Error saving flashcard: {str(e)}")
559
+ raise HTTPException(status_code=500, detail=f"Error saving flashcard: {str(e)}")
560
+
561
+ # Save Flash Cards
562
+ @app.get("/flashcards/{user_id}", tags=["flashcards"])
563
+ async def get_user_flashcards(user_id: str):
564
+ """
565
+ Retrieve all flashcards for a specific user.
566
+ """
567
+ try:
568
+ cursor = flash_cards.find({"user_id": user_id}).sort("created_at", -1)
569
+ flashcards = []
570
+ async for card in cursor:
571
+ # Convert ObjectId to string if needed
572
+ card["_id"] = str(card["_id"])
573
+ flashcards.append(card)
574
+
575
+ return {"user_id": user_id, "flashcards": flashcards}
576
+ except Exception as e:
577
+ raise HTTPException(status_code=500, detail=f"Error retrieving flashcards: {str(e)}")
578
+
579
+
580
+ # Delete FlashCards
581
+ @app.delete("/flashcards/{flashcard_id}", tags=["flashcards"])
582
+ async def delete_flashcard(flashcard_id: str, user_id: str = QueryParam()):
583
+ """
584
+ Delete a specific flashcard. Requires user_id to verify ownership.
585
+ """
586
+ try:
587
+ result = await flash_cards.delete_one({
588
+ "_id": flashcard_id,
589
+ "user_id": user_id
590
+ })
591
+
592
+ if result.deleted_count == 0:
593
+ raise HTTPException(status_code=404, detail="Flashcard not found or not owned by this user")
594
+
595
+ return {"success": True, "message": "Flashcard deleted successfully"}
596
+ except Exception as e:
597
+ raise HTTPException(status_code=500, detail=f"Error deleting flashcard: {str(e)}")
598
+
599
+ # For Video
600
+
601
+ @app.post("/add_video")
602
+ async def add_video(video: Video):
603
+ result = await videos_collection.insert_one({
604
+ "link": video.link,
605
+ "description": video.description
606
+ })
607
+ if result.inserted_id:
608
+ return {"message": "Video added successfully", "id": str(result.inserted_id)}
609
+ raise HTTPException(500, "Failed to add video")
610
+
611
+
612
+
613
+ # To Delete a document
614
+ @app.delete("/documents/{document_id}", tags=["rag"], status_code=status.HTTP_204_NO_CONTENT)
615
+ async def delete_document(document_id: str):
616
+ # 1. Delete the document metadata + file bytes
617
+ result = await documents_collection.delete_one({"_id": document_id})
618
+ if result.deleted_count == 0:
619
+ raise HTTPException(status_code=404, detail="Document not found")
620
+
621
+ # 2. Rebuild the RAG index from all remaining documents
622
+ # - Fetch all stored documents
623
+ cursor = documents_collection.find({})
624
+ all_docs = []
625
+ async for doc in cursor:
626
+ # load_and_split_bytes expects a file-like, so wrap bytes in BytesIO
627
+ from io import BytesIO
628
+ chunks = load_and_split_bytes(BytesIO(doc["file_content"]))
629
+ all_docs.extend(chunks)
630
+
631
+ # 3. If there are any chunks left, rebuild; otherwise clear retriever
632
+ if all_docs:
633
+ # This will re-create your Chroma index and persist it to Mongo
634
+ app.state.retriever = build_chroma_index(
635
+ all_docs,
636
+ collection_name="default"
637
+ )
638
+ else:
639
+ # No documents → clear both in-memory and persisted vector store
640
+ app.state.retriever = None
641
+ await chroma_db_collection.delete_one({"_id": "default"})
642
+
643
+
644
+
645
+
646
+
647
+
648
+ # To download a document
649
+ @app.get("/documents", tags=["rag"])
650
+ async def list_all_documents():
651
+ try:
652
+ cursor = documents_collection.find({})
653
+ documents = []
654
+ async for doc in cursor:
655
+ # Create a copy without the binary content
656
+ documents.append({
657
+ "id": str(doc["_id"]),
658
+ "filename": doc.get("filename"),
659
+ "upload_date": doc.get("upload_date"),
660
+ "file_size": doc.get("file_size"),
661
+ "chunks": doc.get("chunks"),
662
+ })
663
+
664
+ return {"documents": documents}
665
+ except Exception as e:
666
+ raise HTTPException(status_code=500, detail=str(e))
667
+
668
+ # To get Chat History
669
+ @app.get("/chat-history/{user_id}")
670
+ async def get_chat_history(user_id: str):
671
+ try:
672
+ cursor = chatbot_history_collection.find({"userId": user_id})
673
+ chat_history = []
674
+ async for doc in cursor:
675
+ # Convert ObjectId to string
676
+ doc["_id"] = str(doc["_id"])
677
+ chat_history.append(doc)
678
+
679
+ if not chat_history:
680
+ raise HTTPException(status_code=404, detail="No chat history found for this user")
681
+
682
+ return {"user_id": user_id, "chat_history": chat_history}
683
+ except Exception as e:
684
+ raise HTTPException(status_code=500, detail=str(e))
685
+
686
+
687
+ # To list uploaded documents
688
+ @app.get("/documents/user/{user_id}", tags=["rag"])
689
+ async def list_documents(user_id: str):
690
+ try:
691
+ cursor = documents_collection.find({"user_id": user_id})
692
+ documents = []
693
+ async for doc in cursor:
694
+ # Create a copy without the binary content
695
+ doc_info = {
696
+ "id": str(doc["_id"]),
697
+ "filename": doc["filename"],
698
+ "upload_date": doc["upload_date"],
699
+ "file_size": doc["file_size"],
700
+ "chunks": doc["chunks"],
701
+ }
702
+ documents.append(doc_info)
703
+
704
+ return {"user_id": user_id, "documents": documents}
705
+ except Exception as e:
706
+ raise HTTPException(status_code=500, detail=str(e))
707
+
708
+ # To get knowledge base info
709
+ @app.get("/knowledge-base-info", tags=["rag"])
710
+ async def get_knowledge_base_info():
711
+ stats = get_collection_stats()
712
+ return {
713
+ "documents_loaded": stats["exists"],
714
+ "total_chunks": stats["document_count"]
715
+ }
716
+
717
+ embeddings_model = OpenAIEmbeddings(model="text-embedding-ada-002" , openai_api_key = "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")
718
+
719
+ @app.post("/search_video")
720
+ async def search_video(req: SearchRequest):
721
+ if not req.query.strip():
722
+ raise HTTPException(status_code=400, detail="Search query cannot be empty")
723
+
724
+ try:
725
+ # 1) Fetch up to 1000 docs (just the link+description)
726
+ docs = await videos_collection.find(
727
+ {},
728
+ {"link": 1, "description": 1, "_id": 0}
729
+ ).to_list(length=1000)
730
+
731
+ if not docs:
732
+ raise HTTPException(status_code=404, detail="No videos found")
733
+
734
+ # 2) Get descriptions as a list
735
+ descriptions = [doc["description"] for doc in docs]
736
+
737
+ # 3) Embed query using LangChain's OpenAIEmbeddings
738
+ query_embedding = embeddings_model.embed_query(req.query)
739
+
740
+ # 4) Embed all descriptions (this is still inefficient but uses LangChain)
741
+ description_embeddings = embeddings_model.embed_documents(descriptions)
742
+
743
+ # 5) Compute similarities
744
+ similarities = [cosine_similarity(query_embedding, desc_emb)
745
+ for desc_emb in description_embeddings]
746
+
747
+ # 6) Pick best match
748
+ best_idx = int(np.argmax(similarities))
749
+ best_doc = docs[best_idx]
750
+
751
+ return {
752
+ "link": best_doc["link"],
753
+ "description": best_doc["description"],
754
+ "score": similarities[best_idx]
755
+ }
756
+
757
+ except Exception as e:
758
+ logging.error(f"Error during video search: {str(e)}")
759
+ raise HTTPException(status_code=500, detail="Error processing search request")
760
+
761
+
762
+ # @app.post("/start-trial/{user_id}", response_model=TrialResponse)
763
+ # async def start_trial(user_id: str):
764
+ # trial = await subscriptions_collection.find_one({
765
+ # "user_id": user_id,
766
+ # "plan": "trial"
767
+ # })
768
+
769
+ # if trial:
770
+ # # Optional: Mark expired trial as "expired" for clarity
771
+ # if trial["end_date"] < datetime.utcnow() and trial["status"] == "active":
772
+ # await subscriptions_collection.update_one(
773
+ # {"_id": trial["_id"]},
774
+ # {"$set": {"status": "expired", "updated_at": datetime.utcnow()}}
775
+ # )
776
+ # raise HTTPException(400, "You have already used your free trial. Please subscribe to continue.")
777
+
778
+ # now = datetime.utcnow()
779
+ # trial = {
780
+ # "user_id": user_id,
781
+ # "plan": "trial",
782
+ # "status": "active",
783
+ # "start_date": now,
784
+ # "end_date": now + timedelta(days=3),
785
+ # "created_at": now,
786
+ # "updated_at": now
787
+ # }
788
+ # await subscriptions_collection.insert_one(trial)
789
+
790
+ # return {"status": "trial started", "expires": trial["end_date"]}
791
+
792
+ async def has_valid_active_subscription(user_id: str) -> bool:
793
+ now = datetime.utcnow()
794
+ sub = await subscriptions_collection.find_one({
795
+ "user_id": user_id,
796
+ "status": "active",
797
+ "end_date": {"$gt": now}
798
+ })
799
+ return sub is not None
800
+
801
+ async def expire_old_subscriptions(user_id: str):
802
+ now = datetime.utcnow()
803
+ await subscriptions_collection.update_many(
804
+ {
805
+ "user_id": user_id,
806
+ "status": "active",
807
+ "end_date": {"$lte": now} # already expired
808
+ },
809
+ {"$set": {"status": "expired", "updated_at": now}}
810
+ )
811
+
812
+ @app.post("/start-trial/{user_id}", response_model=TrialResponse)
813
+ async def start_trial(user_id: str):
814
+ await expire_old_subscriptions(user_id)
815
+
816
+ if await has_valid_active_subscription(user_id):
817
+ raise HTTPException(400, "You already have an active subscription.")
818
+
819
+ now = datetime.utcnow()
820
+ trial = {
821
+ "user_id": user_id,
822
+ "plan": "trial",
823
+ "status": "active",
824
+ "start_date": now,
825
+ "end_date": now + timedelta(days=3),
826
+ "created_at": now,
827
+ "updated_at": now
828
+ }
829
+ await subscriptions_collection.insert_one(trial)
830
+ return {"status": "trial started", "expires": trial["end_date"].isoformat()}
831
+
832
+
833
+ @app.post("/create-subscription-order")
834
+ async def create_sub_order(sub_req: SubscriptionRequest): # Pass it from frontend (POST body or query param)
835
+ user_id = sub_req.user_id
836
+ plan = sub_req.plan
837
+ price_map = {
838
+ "monthly": "10.00",
839
+ "yearly": "100.00"
840
+ }
841
+ if plan not in price_map:
842
+ raise HTTPException(400, "Invalid plan selected")
843
+ price = price_map[plan]
844
+ request = OrdersCreateRequest()
845
+ request.prefer("return=representation")
846
+ request.request_body({
847
+ "intent": "CAPTURE",
848
+ "purchase_units": [{
849
+ "custom_id": user_id, # This can be any internal identifier
850
+ "amount": {
851
+ "currency_code": "USD",
852
+ "value": price
853
+ }
854
+ }],
855
+ "application_context": {
856
+ "return_url": f"http://localhost:3000/payments", # optional
857
+ "cancel_url": "http://localhost:3000/payments"
858
+ }
859
+ })
860
+
861
+ response = p_client.execute(request)
862
+ result = response.result
863
+
864
+ approve_url = next((link.href for link in result.links if link.rel == "approve"), None)
865
+ now = datetime.utcnow()
866
+ await orders_collection.insert_one({
867
+ "paypal_order_id": result.id,
868
+ "user_id": user_id,
869
+ "plan": plan,
870
+ "status": result.status,
871
+ "created_at": now,
872
+ "updated_at": now
873
+ })
874
+
875
+ return {
876
+ "order_id": result.id,
877
+ "status": result.status,
878
+ "approve_url": approve_url
879
+ }
880
+
881
+
882
+ @app.post("/capture-order/{order_id}", response_model=dict)
883
+ async def capture_order(order_id: str):
884
+ # ... your existing PayPal capture code here ...
885
+ req = OrdersCaptureRequest(order_id)
886
+ req.request_body({})
887
+
888
+ # ❌ Don't await this — it's synchronous
889
+ # resp = p_client.execute(req)
890
+ import asyncio
891
+
892
+ resp = await asyncio.to_thread(p_client.execute, req)
893
+
894
+ if resp.status_code != 201:
895
+ raise HTTPException(400, "Failed to capture order")
896
+
897
+ cap = resp.result
898
+ order_doc = await orders_collection.find_one({"paypal_order_id": order_id})
899
+ if not order_doc:
900
+ raise HTTPException(404, "Order not found")
901
+
902
+ # Expire old subscriptions before inserting new one
903
+ await expire_old_subscriptions(order_doc["user_id"])
904
+
905
+ if await has_valid_active_subscription(order_doc["user_id"]):
906
+ raise HTTPException(400, "You already have an active subscription.")
907
+
908
+ now = datetime.utcnow()
909
+ days = 30 if order_doc["plan"] == "monthly" else 365
910
+ sub = {
911
+ "user_id": order_doc["user_id"],
912
+ "plan": order_doc["plan"],
913
+ "status": "active",
914
+ "start_date": now,
915
+ "end_date": now + timedelta(days=days),
916
+ "paypal_order_id": order_id,
917
+ "created_at": now,
918
+ "updated_at": now
919
+ }
920
+ await subscriptions_collection.insert_one(sub)
921
+
922
+ # Update order status
923
+ await orders_collection.update_one(
924
+ {"paypal_order_id": order_id},
925
+ {"$set": {"status": cap.status}}
926
+ )
927
+
928
+ return {"status": cap.status}
929
+
930
+
931
+ # @app.post("/capture-order/{order_id}", response_model=dict)
932
+ # async def capture_order(order_id: str):
933
+ # req = OrdersCaptureRequest(order_id)
934
+ # req.request_body({})
935
+
936
+ # # ❌ Don't await this — it's synchronous
937
+ # # resp = p_client.execute(req)
938
+ # import asyncio
939
+
940
+ # resp = await asyncio.to_thread(p_client.execute, req)
941
+
942
+ # if resp.status_code != 201:
943
+ # raise HTTPException(400, "Failed to capture order")
944
+
945
+ # cap = resp.result
946
+
947
+ # order_doc = await orders_collection.find_one({"paypal_order_id": order_id})
948
+ # if not order_doc:
949
+ # raise HTTPException(404, "Order not found")
950
+
951
+ # await orders_collection.update_one(
952
+ # {"paypal_order_id": order_id},
953
+ # {"$set": {"status": cap.status}}
954
+ # )
955
+
956
+ # # Insert into subscriptions if plan exists
957
+ # if "plan" in order_doc:
958
+ # now = datetime.utcnow()
959
+ # days = 30 if order_doc["plan"] == "monthly" else 365
960
+ # sub = {
961
+ # "user_id": order_doc["user_id"],
962
+ # "plan": order_doc["plan"],
963
+ # "status": "active",
964
+ # "start_date": now,
965
+ # "end_date": now + timedelta(days=days),
966
+ # "paypal_order_id": order_id,
967
+ # "created_at": now,
968
+ # "updated_at": now
969
+ # }
970
+ # existing = await subscriptions_collection.find_one({"user_id": order_doc["user_id"]})
971
+ # if existing:
972
+ # # Optional: You can update the existing subscription instead of inserting
973
+ # await subscriptions_collection.update_one(
974
+ # {"user_id": order_doc["user_id"]},
975
+ # {"$set": {
976
+ # "plan": order_doc["plan"],
977
+ # "status": "active",
978
+ # "start_date": now,
979
+ # "end_date": now + timedelta(days=days),
980
+ # "paypal_order_id": order_id,
981
+ # "updated_at": now
982
+ # }}
983
+ # )
984
+ # else:
985
+ # await subscriptions_collection.insert_one(sub)
986
+
987
+
988
+ # return {"status": cap.status}
989
+
990
+
991
+ @app.post("/cancel", response_model=dict)
992
+ async def cancel_subscription(user_id: str):
993
+ subscription = await subscriptions_collection.find_one({
994
+ "user_id": user_id,
995
+ "status": "active"
996
+ })
997
+
998
+ if not subscription:
999
+ raise HTTPException(status_code=404, detail="Active subscription not found")
1000
+
1001
+ await subscriptions_collection.update_one(
1002
+ {"_id": subscription["_id"]},
1003
+ {"$set": {"status": "cancelled", "updated_at": datetime.utcnow()}}
1004
+ )
1005
+
1006
+ return {"status": "cancelled", "message": "Subscription cancelled successfully"}
1007
+
1008
+
1009
+
1010
+
1011
+ # @app.get("/payments")
1012
+ # async def success_page(user_id: str, token: str = "", PayerID: str = ""):
1013
+ # return {"message": "Payment approved", "user_id": user_id, "token": token, "PayerID": PayerID}
1014
+
1015
+
1016
+ @app.post("/webhook")
1017
+ async def paypal_webhook(request: Request):
1018
+ event = await request.json()
1019
+ if event.get("event_type") == "PAYMENT.CAPTURE.COMPLETED":
1020
+ rid = (event["resource"]
1021
+ .get("supplementary_data", {})
1022
+ .get("related_ids", {})
1023
+ .get("order_id"))
1024
+ if rid:
1025
+ await orders_collection.update_one(
1026
+ {"paypal_order_id": rid},
1027
+ {"$set": {"status": "COMPLETED"}}
1028
+ )
1029
+ return {"ok": True}
1030
+
1031
+
1032
+
1033
+
1034
+
1035
+ # To Health Check APP
1036
+ @app.get("/health", tags=["health"])
1037
+ async def health_check():
1038
+ try:
1039
+ await client.admin.command("ping")
1040
+ db_status = "connected"
1041
+ except Exception as e:
1042
+ db_status = f"error: {str(e)}"
1043
+
1044
+ # Check if vector store is accessible
1045
+ try:
1046
+ stats = get_collection_stats()
1047
+ vector_db_status = "connected" if stats["exists"] else "empty but ready"
1048
+ except Exception as e:
1049
+ vector_db_status = f"error: {str(e)}"
1050
+
1051
+ return {
1052
+ "status": "healthy",
1053
+ "database": db_status,
1054
+ "vector_database": vector_db_status,
1055
+ "storage_type": "mongodb"
1056
+ }
1057
+
1058
+
1059
+ # if __name__ == "__main__":
1060
+ # import uvicorn
1061
+ # uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 8001)))
App/utils.py ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import pickle
4
+ from typing import List, Optional, BinaryIO, Dict, Any
5
+ from langchain_community.document_loaders import Docx2txtLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_huggingface import HuggingFaceEmbeddings
8
+ from langchain_community.vectorstores import Chroma
9
+ from langchain_openai import ChatOpenAI
10
+ from langchain.chains import create_retrieval_chain
11
+ from langchain.chains.combine_documents import create_stuff_documents_chain
12
+ from langchain.prompts import ChatPromptTemplate
13
+ from langchain.schema import Document
14
+ import tempfile
15
+ from pymongo import MongoClient
16
+ from bson.binary import Binary
17
+ import uuid
18
+ import tiktoken
19
+ from googleapiclient.discovery import build
20
+ import numpy as np
21
+
22
+ # MongoDB connection
23
+ MONGODB_URI = os.getenv(
24
+ "MONGODB_URI",
25
+ "mongodb+srv://ahmed0499280:haseeb.2003@cluster0.hzgrxp2.mongodb.net/"
26
+ "?retryWrites=true&w=majority&appName=Cluster0"
27
+ )
28
+
29
+ # MongoDB client
30
+ client = MongoClient(MONGODB_URI)
31
+ db = client["Cluster0"]
32
+ chroma_db_collection = db["chroma_db_store"] # Collection for storing Chroma DB
33
+ YOUTUBE_API_KEY = "AIzaSyDIaXWJJX2W8swWl093DMyNZ_7TZUGe3DI"
34
+ youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
35
+ # Create a custom in-memory ChromaDB client
36
+ class MongoChromaStore:
37
+ """Custom storage for Chroma that uses MongoDB instead of disk"""
38
+
39
+ @staticmethod
40
+ def save_chroma(chroma_db, collection_name="default"):
41
+ """Save a Chroma DB to MongoDB"""
42
+ try:
43
+ # Extract necessary data from Chroma DB
44
+ # This is a simplification - in a real implementation we'd need to extract more data
45
+ embeddings = chroma_db._collection.get()
46
+
47
+ # Prepare data for MongoDB storage
48
+ chroma_data = {
49
+ "_id": collection_name,
50
+ "embeddings": Binary(pickle.dumps(embeddings)),
51
+ "last_updated": pickle.dumps(embeddings["metadatas"] if "metadatas" in embeddings else [])
52
+ }
53
+
54
+ # Store or update in MongoDB
55
+ chroma_db_collection.replace_one(
56
+ {"_id": collection_name},
57
+ chroma_data,
58
+ upsert=True
59
+ )
60
+
61
+ return True
62
+ except Exception as e:
63
+ print(f"Error saving Chroma DB to MongoDB: {e}")
64
+ return False
65
+
66
+
67
+
68
+ def count_tokens(text, model_name="gpt-3.5-turbo"):
69
+ """Count tokens for a text string."""
70
+ try:
71
+ encoding = tiktoken.encoding_for_model(model_name)
72
+ except KeyError:
73
+ # Fall back to cl100k_base encoding if model not found
74
+ encoding = tiktoken.get_encoding("cl100k_base")
75
+
76
+ return len(encoding.encode(text))
77
+
78
+ # Video search
79
+ def search_youtube_video(query: str) -> str:
80
+ """
81
+ Search YouTube for the top video matching `query` and return its videoId.
82
+ """
83
+ res = (
84
+ youtube.search()
85
+ .list(
86
+ q=query,
87
+ part="id,snippet",
88
+ type="video",
89
+ maxResults=1
90
+ )
91
+ .execute()
92
+ )
93
+ items = res.get("items", [])
94
+ if not items:
95
+ raise ValueError("No video found for query.")
96
+ return items[0]["id"]["videoId"]
97
+
98
+
99
+ # Load and split DOCX into chunks (from file path)
100
+ def load_and_split(filepath: str, chunk_size: int = 1000, chunk_overlap: int = 200) -> List[Document]:
101
+ loader = Docx2txtLoader(filepath)
102
+ docs = loader.load()
103
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
104
+ return text_splitter.split_documents(docs)
105
+
106
+ # Load and split DOCX from bytes (for MongoDB storage)
107
+ def load_and_split_bytes(file_bytes: BinaryIO, chunk_size: int = 1000, chunk_overlap: int = 200) -> List[Document]:
108
+ # Create a temporary file
109
+ with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as temp_file:
110
+ temp_path = temp_file.name
111
+
112
+ # If file_bytes is a BytesIO object, get its content
113
+ if hasattr(file_bytes, 'read'):
114
+ file_bytes.seek(0)
115
+ content = file_bytes.read()
116
+ temp_file.write(content)
117
+ else:
118
+ # If it's already bytes
119
+ temp_file.write(file_bytes)
120
+
121
+ try:
122
+ # Process the temp file
123
+ return load_and_split(temp_path, chunk_size, chunk_overlap)
124
+ finally:
125
+ # Clean up
126
+ if os.path.exists(temp_path):
127
+ os.remove(temp_path)
128
+
129
+ # Build Chroma index and save to MongoDB
130
+ def build_chroma_index(docs, embedding_model: str = "Omartificial-Intelligence-Space/GATE-AraBert-v1", collection_name: str = "default"):
131
+ # Create temporary directory for Chroma
132
+ temp_dir = tempfile.mkdtemp()
133
+
134
+ try:
135
+ embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
136
+
137
+ # Create or update Chroma DB
138
+ chroma_db = Chroma.from_documents(
139
+ docs,
140
+ embeddings,
141
+ persist_directory=temp_dir,
142
+ collection_name=collection_name
143
+ )
144
+
145
+ # Save Chroma DB to MongoDB
146
+ MongoChromaStore.save_chroma(chroma_db, collection_name)
147
+
148
+ # Return the retriever
149
+ return chroma_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
150
+
151
+ finally:
152
+ # Clean up temporary directory
153
+ import shutil
154
+ if os.path.exists(temp_dir):
155
+ shutil.rmtree(temp_dir)
156
+
157
+ # Get existing Chroma DB from MongoDB
158
+ def get_existing_retriever(embedding_model: str = "Omartificial-Intelligence-Space/GATE-AraBert-v1", collection_name: str = "default"):
159
+ # Check if collection exists in MongoDB
160
+ chroma_data = chroma_db_collection.find_one({"_id": collection_name})
161
+ if not chroma_data:
162
+ return None
163
+
164
+ try:
165
+ # Create temporary directory for Chroma
166
+ temp_dir = tempfile.mkdtemp()
167
+
168
+ # Deserialize embeddings from MongoDB
169
+ embeddings_data = pickle.loads(chroma_data["embeddings"])
170
+
171
+ # Use the embeddings to recreate the Chroma DB
172
+ embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
173
+
174
+ # At this point we would need to reconstruct the Chroma DB
175
+ # This is a simplified implementation that doesn't fully work
176
+ # In a production system, you would need a more complete solution
177
+
178
+ # For now, let's create a new Chroma DB and add the documents
179
+ # This is not ideal but shows the concept
180
+ if "documents" in embeddings_data and embeddings_data["documents"]:
181
+ # Create documents from the stored data
182
+ docs = []
183
+ for i, text in enumerate(embeddings_data["documents"]):
184
+ metadata = embeddings_data["metadatas"][i] if "metadatas" in embeddings_data else {}
185
+ docs.append(Document(page_content=text, metadata=metadata))
186
+
187
+ # Create a new Chroma DB
188
+ chroma_db = Chroma.from_documents(
189
+ docs,
190
+ embeddings,
191
+ persist_directory=temp_dir,
192
+ collection_name=collection_name
193
+ )
194
+
195
+ return chroma_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
196
+
197
+ except Exception as e:
198
+ print(f"Error loading Chroma DB from MongoDB: {e}")
199
+ return None
200
+ finally:
201
+ # Clean up temporary directory
202
+ import shutil
203
+ if os.path.exists(temp_dir):
204
+ shutil.rmtree(temp_dir)
205
+
206
+ def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
207
+ return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
208
+
209
+
210
+ # Get document count in the collection
211
+ def get_collection_stats(collection_name: str = "default"):
212
+ # Check if collection exists in MongoDB
213
+ chroma_data = chroma_db_collection.find_one({"_id": collection_name})
214
+ if not chroma_data:
215
+ return {"exists": False, "document_count": 0}
216
+
217
+ try:
218
+ # Deserialize embeddings from MongoDB
219
+ embeddings_data = pickle.loads(chroma_data["embeddings"])
220
+
221
+ # Count documents
222
+ doc_count = len(embeddings_data["documents"]) if "documents" in embeddings_data else 0
223
+
224
+ return {
225
+ "exists": True,
226
+ "document_count": doc_count
227
+ }
228
+ except Exception as e:
229
+ print(f"Error getting collection stats: {e}")
230
+ return {"exists": False, "document_count": 0}
231
+
232
+ # Instantiate LLM (Google Gemini or OpenAI)
233
+ def get_llm(temperature: float = 0.0):
234
+ return ChatOpenAI(model="o4-mini", api_key="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")
235
+
236
+
237
+ def create_rag_chain_with_history(retriever, llm, lan, level,diacritics=False, history=None ):
238
+ if history is None:
239
+ history = []
240
+
241
+ # Create the base system prompt based on language
242
+ if lan.lower() == 'arabic' and diacritics == True:
243
+ system_prompt = (
244
+ "You are an Assistant for answering questions. "
245
+ "Use the following retrieved context snippets to answer. "
246
+ "Look for the relevance between the context and the question before answering. "
247
+ "If you do not know the answer, say that you do not know. "
248
+ "Be polite, act like a teacher, and provide as detailed an answer as possible based on the context. "
249
+ "Consider the conversation history when responding. "
250
+ "You are designed to help Muslims learn Arabic, so explanations should be culturally respectful and appropriate. "
251
+ "Be responsive to the user's needs—if the user seems stuck or confused during the chat, proactively offer helpful suggestions, clarifications, or encouragement. "
252
+ "Adjust your explanations according to the student's level: "
253
+ "for 'beginner', use very simple language, break down grammar and context step-by-step, and give clear examples; "
254
+ "for 'intermediate', provide more detailed grammar and usage insights with moderate complexity; "
255
+ "for 'advanced', include deeper linguistic explanations, nuanced examples, and encourage self-reflection. "
256
+ "Grammar and contextual explanations should start at the appropriate level and build gradually. "
257
+ "Include examples from the connected knowledge base when possible; otherwise, generate clear and relevant examples yourself. "
258
+ f"Always respond in {lan} *with all proper diacritics*. "
259
+ f"Student level: {level}."
260
+ "{context}"
261
+ )
262
+ elif lan.lower() == 'arabic' and diacritics == False:
263
+ system_prompt = (
264
+ "You are an Assistant for answering questions. "
265
+ "Use the following retrieved context snippets to answer. "
266
+ "Look for the relevance between the context and the question before answering. "
267
+ "If you do not know the answer, say that you do not know. "
268
+ "Be polite, act like a teacher, and provide as detailed an answer as possible based on the context. "
269
+ "Consider the conversation history when responding. "
270
+ "You are designed to help Muslims learn Arabic, so explanations should be culturally respectful and appropriate. "
271
+ "Be responsive to the user's needs—if the user seems stuck or confused during the chat, proactively offer helpful suggestions, clarifications, or encouragement. "
272
+ "Adjust your explanations according to the student's level: "
273
+ "for 'beginner', use very simple language, break down grammar and context step-by-step, and give clear examples; "
274
+ "for 'intermediate', provide more detailed grammar and usage insights with moderate complexity; "
275
+ "for 'advanced', include deeper linguistic explanations, nuanced examples, and encourage self-reflection. "
276
+ "Grammar and contextual explanations should start at the appropriate level and build gradually. "
277
+ "Include examples from the connected knowledge base when possible; otherwise, generate clear and relevant examples yourself. "
278
+ f"Always respond in {lan} *without diacritics*"
279
+ f"Student level: {level}."
280
+ "{context}"
281
+ )
282
+ else:
283
+ system_prompt = (
284
+ "You are an Assistant for answering questions. "
285
+ "Use the following retrieved context snippets to answer. "
286
+ "Look for the relevance between the context and the question before answering. "
287
+ "If you do not know the answer, say that you do not know. "
288
+ "Be polite, act like a teacher, and provide as detailed an answer as possible based on the context. "
289
+ "Consider the conversation history when responding. "
290
+ "You are designed to help Muslims learn Arabic, so explanations should be culturally respectful and appropriate. "
291
+ "Be responsive to the user's needs—if the user seems stuck or confused during the chat, proactively offer helpful suggestions, clarifications, or encouragement. "
292
+ "Adjust your explanations according to the student's level: "
293
+ "for 'beginner', use very simple language, break down grammar and context step-by-step, and give clear examples; "
294
+ "for 'intermediate', provide more detailed grammar and usage insights with moderate complexity; "
295
+ "for 'advanced', include deeper linguistic explanations, nuanced examples, and encourage self-reflection. "
296
+ "Grammar and contextual explanations should start at the appropriate level and build gradually. "
297
+ "Include examples from the connected knowledge base when possible; otherwise, generate clear and relevant examples yourself. "
298
+ f"Always respond in {lan}. "
299
+ f"Student level: {level}. "
300
+ "{context}"
301
+ )
302
+
303
+
304
+ # Create messages with history
305
+ messages = [('system', system_prompt)]
306
+
307
+ # Add conversation history
308
+ for message in history:
309
+ messages.append((message["role"], message["content"]))
310
+
311
+ # Add current user query
312
+ messages.append(('human', '{input}'))
313
+
314
+ prompt = ChatPromptTemplate.from_messages(messages)
315
+ question_answer_chain = create_stuff_documents_chain(llm, prompt)
316
+
317
+ return create_retrieval_chain(retriever, question_answer_chain)
318
+
319
+
320
+
321
+
322
+ # Additional function to add documents to existing index
323
+ def add_documents_to_index(docs, embedding_model: str = "Omartificial-Intelligence-Space/GATE-AraBert-v1", collection_name: str = "default"):
324
+ # Get existing retriever
325
+ existing_retriever = get_existing_retriever(embedding_model, collection_name)
326
+
327
+ # If no existing retriever, create a new one
328
+ if not existing_retriever:
329
+ return build_chroma_index(docs, embedding_model, collection_name)
330
+
331
+ # If we have an existing retriever, we need to add documents to it
332
+ # This is a simplified implementation
333
+ # In a production system, you would need a more complete solution
334
+
335
+ # Create temporary directory for Chroma
336
+ temp_dir = tempfile.mkdtemp()
337
+
338
+ try:
339
+ embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
340
+
341
+ # Get existing documents
342
+ chroma_data = chroma_db_collection.find_one({"_id": collection_name})
343
+ if chroma_data:
344
+ embeddings_data = pickle.loads(chroma_data["embeddings"])
345
+
346
+ # Create documents from the stored data
347
+ existing_docs = []
348
+ if "documents" in embeddings_data and embeddings_data["documents"]:
349
+ for i, text in enumerate(embeddings_data["documents"]):
350
+ metadata = embeddings_data["metadatas"][i] if "metadatas" in embeddings_data else {}
351
+ existing_docs.append(Document(page_content=text, metadata=metadata))
352
+
353
+ # Combine with new documents
354
+ all_docs = existing_docs + docs
355
+
356
+ # Create a new Chroma DB with all documents
357
+ chroma_db = Chroma.from_documents(
358
+ all_docs,
359
+ embeddings,
360
+ persist_directory=temp_dir,
361
+ collection_name=collection_name
362
+ )
363
+
364
+ # Save Chroma DB to MongoDB
365
+ MongoChromaStore.save_chroma(chroma_db, collection_name)
366
+
367
+ return chroma_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
368
+ else:
369
+ # If no existing data, create a new index
370
+ return build_chroma_index(docs, embedding_model, collection_name)
371
+
372
+ finally:
373
+ # Clean up temporary directory
374
+ import shutil
375
+ if os.path.exists(temp_dir):
376
+ shutil.rmtree(temp_dir)
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python image
2
+ FROM python:3.10
3
+ # Set the working directory
4
+ WORKDIR /code
5
+ # Copy files
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+ COPY ./app ./app
9
+ # Expose port
10
+ EXPOSE 7860
11
+ # Run the FastAPI app using uvicorn
12
+ CMD ["uvicorn", "App.main:app", "--host", "0.0.0.0", "--port", "7860"]
requirements.txt ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohappyeyeballs==2.6.1
2
+ aiohttp==3.11.18
3
+ aiosignal==1.3.2
4
+ annotated-types==0.7.0
5
+ anyio==4.9.0
6
+ asgiref==3.8.1
7
+ attrs==25.3.0
8
+ backoff==2.2.1
9
+ bcrypt==4.3.0
10
+ build==1.2.2.post1
11
+ cachetools==5.5.2
12
+ certifi==2025.4.26
13
+ cffi==1.17.1
14
+ charset-normalizer==3.4.2
15
+ chromadb==1.0.8
16
+ click==8.1.8
17
+ coloredlogs==15.0.1
18
+ cryptography==44.0.3
19
+ dataclasses-json==0.6.7
20
+ defusedxml==0.7.1
21
+ Deprecated==1.2.18
22
+ distro==1.9.0
23
+ dnspython==2.7.0
24
+ docx2txt==0.9
25
+ durationpy==0.9
26
+ ecdsa==0.19.1
27
+ email_validator==2.2.0
28
+ fastapi==0.115.9
29
+ filelock==3.18.0
30
+ filetype==1.2.0
31
+ flatbuffers==25.2.10
32
+ frozenlist==1.6.0
33
+ fsspec==2025.3.2
34
+ google-ai-generativelanguage==0.6.18
35
+ google-api-core==2.24.2
36
+ google-api-python-client==2.169.0
37
+ google-auth==2.40.1
38
+ google-auth-httplib2==0.2.0
39
+ googleapis-common-protos==1.70.0
40
+ greenlet==3.2.1
41
+ grpcio==1.71.0
42
+ grpcio-status==1.71.0
43
+ h11==0.16.0
44
+ httpcore==1.0.9
45
+ httplib2==0.22.0
46
+ httptools==0.6.4
47
+ httpx==0.28.1
48
+ httpx-sse==0.4.0
49
+ huggingface-hub==0.30.2
50
+ humanfriendly==10.0
51
+ idna==3.10
52
+ importlib_metadata==8.6.1
53
+ importlib_resources==6.5.2
54
+ Jinja2==3.1.6
55
+ jiter==0.9.0
56
+ joblib==1.5.0
57
+ jsonpatch==1.33
58
+ jsonpointer==3.0.0
59
+ jsonschema==4.23.0
60
+ jsonschema-specifications==2025.4.1
61
+ jwt==1.3.1
62
+ kubernetes==32.0.1
63
+ langchain==0.3.25
64
+ langchain-community==0.3.23
65
+ langchain-core==0.3.58
66
+ langchain-google-genai==2.1.4
67
+ langchain-huggingface==0.1.2
68
+ langchain-openai==0.3.16
69
+ langchain-text-splitters==0.3.8
70
+ langsmith==0.3.42
71
+ markdown-it-py==3.0.0
72
+ MarkupSafe==3.0.2
73
+ marshmallow==3.26.1
74
+ mdurl==0.1.2
75
+ mmh3==5.1.0
76
+ motor==3.7.0
77
+ mpmath==1.3.0
78
+ multidict==6.4.3
79
+ mypy_extensions==1.1.0
80
+ networkx==3.4.2
81
+ numpy==2.2.5
82
+ nvidia-cublas-cu12==12.6.4.1
83
+ nvidia-cuda-cupti-cu12==12.6.80
84
+ nvidia-cuda-nvrtc-cu12==12.6.77
85
+ nvidia-cuda-runtime-cu12==12.6.77
86
+ nvidia-cudnn-cu12==9.5.1.17
87
+ nvidia-cufft-cu12==11.3.0.4
88
+ nvidia-cufile-cu12==1.11.1.6
89
+ nvidia-curand-cu12==10.3.7.77
90
+ nvidia-cusolver-cu12==11.7.1.2
91
+ nvidia-cusparse-cu12==12.5.4.2
92
+ nvidia-cusparselt-cu12==0.6.3
93
+ nvidia-nccl-cu12==2.26.2
94
+ nvidia-nvjitlink-cu12==12.6.85
95
+ nvidia-nvtx-cu12==12.6.77
96
+ oauthlib==3.2.2
97
+ onnxruntime==1.21.1
98
+ openai==1.77.0
99
+ opentelemetry-api==1.32.1
100
+ opentelemetry-exporter-otlp-proto-common==1.32.1
101
+ opentelemetry-exporter-otlp-proto-grpc==1.32.1
102
+ opentelemetry-instrumentation==0.53b1
103
+ opentelemetry-instrumentation-asgi==0.53b1
104
+ opentelemetry-instrumentation-fastapi==0.53b1
105
+ opentelemetry-proto==1.32.1
106
+ opentelemetry-sdk==1.32.1
107
+ opentelemetry-semantic-conventions==0.53b1
108
+ opentelemetry-util-http==0.53b1
109
+ orjson==3.10.18
110
+ overrides==7.7.0
111
+ packaging==24.2
112
+ passlib==1.7.4
113
+ paypal-checkout-serversdk==1.0.3
114
+ paypalhttp==1.0.1
115
+ pillow==11.2.1
116
+ posthog==4.0.1
117
+ propcache==0.3.1
118
+ proto-plus==1.26.1
119
+ protobuf==5.29.4
120
+ pyasn1==0.6.1
121
+ pyasn1_modules==0.4.2
122
+ pycparser==2.22
123
+ pydantic==2.11.4
124
+ pydantic-settings==2.9.1
125
+ pydantic_core==2.33.2
126
+ Pygments==2.19.1
127
+ pymongo==4.12.1
128
+ pyOpenSSL==25.1.0
129
+ pyparsing==3.2.3
130
+ PyPika==0.48.9
131
+ pyproject_hooks==1.2.0
132
+ python-dateutil==2.9.0.post0
133
+ python-dotenv==1.1.0
134
+ python-jose==3.4.0
135
+ python-multipart==0.0.20
136
+ PyYAML==6.0.2
137
+ referencing==0.36.2
138
+ regex==2024.11.6
139
+ requests==2.32.3
140
+ requests-oauthlib==2.0.0
141
+ requests-toolbelt==1.0.0
142
+ rich==14.0.0
143
+ rpds-py==0.24.0
144
+ rsa==4.9.1
145
+ safetensors==0.5.3
146
+ scikit-learn==1.6.1
147
+ scipy==1.15.2
148
+ sentence-transformers==4.1.0
149
+ setuptools==80.3.1
150
+ shellingham==1.5.4
151
+ six==1.17.0
152
+ sniffio==1.3.1
153
+ SQLAlchemy==2.0.40
154
+ starlette==0.45.3
155
+ sympy==1.14.0
156
+ tenacity==9.1.2
157
+ threadpoolctl==3.6.0
158
+ tiktoken==0.9.0
159
+ tokenizers==0.21.1
160
+ torch==2.7.0
161
+ tqdm==4.67.1
162
+ transformers==4.51.3
163
+ triton==3.3.0
164
+ typer==0.15.3
165
+ typing-inspect==0.9.0
166
+ typing-inspection==0.4.0
167
+ typing_extensions==4.13.2
168
+ uritemplate==4.1.1
169
+ urllib3==2.4.0
170
+ uvicorn==0.34.2
171
+ uvloop==0.21.0
172
+ watchfiles==1.0.5
173
+ websocket-client==1.8.0
174
+ websockets==15.0.1
175
+ wrapt==1.17.2
176
+ yarl==1.20.0
177
+ youtube-transcript-api==1.0.3
178
+ zipp==3.21.0
179
+ zstandard==0.23.0