Spaces:

ADKU
/

WorkspaceAI

Sleeping

App Files Files Community

ADKU commited on Mar 7, 2025

Commit

8c2204f

verified ·

1 Parent(s): a8eddfb

Update app.py

Browse files

Files changed (1) hide show

app.py +179 -48

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from pymongo import MongoClient
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
@@ -34,6 +36,10 @@ class QueryRequest(BaseModel):
     email: str
     query: str
 def fetch_latest_data():
     return {
         "users": list(db.users.find()),
@@ -44,18 +50,16 @@ def fetch_latest_data():
         "schedules": list(db.schedules.find())
     }
-def generate_sentences(db):
-    # Your existing logic here...
     users, teams, projects, modules, documents, schedules = (
-        db["users"], db["teams"], db["projects"], db["modules"], db["documents"], db["schedules"]
     )
     user_sentences = {}  # Store categorized sentences per user
     for user in users:
         username = user.get("username", "Unknown User")
         email = user.get("email", "Unknown Email")
         if email not in user_sentences:
             user_sentences[email] = {
                 "Teams": [],
@@ -64,105 +68,225 @@ def generate_sentences(db):
                 "Documents": [],
                 "Schedules": []
             }
         # User team ownership and membership
         owned_teams = [team for team in teams if team.get("owner", {}).get("email") == email]
         if owned_teams:
             team_names = ", ".join(f'"{team["teamName"]}"' for team in owned_teams)
             user_sentences[email]["Teams"].append(f"User {username} owns the teams: {team_names}.")
         member_teams = [team for team in teams if any(m["email"] == email for m in team.get("members", []))]
         if member_teams:
             team_names = ", ".join(f'"{team["teamName"]}"' for team in member_teams)
             user_sentences[email]["Teams"].append(f"User {username} is a member of the teams: {team_names}.")
         # Find projects in teams they own or are part of
         relevant_teams = owned_teams + member_teams
         team_ids = [str(team["_id"]) for team in relevant_teams]
         user_projects = [p for p in projects if str(p.get("owner", {}).get("teamId")) in team_ids]
         if user_projects:
             for project in user_projects:
                 proj_name = project["projName"]
-                user_sentences[email]["Projects"].append(f"User {username} is involved in project {proj_name}.")
                 # Find modules under this project
                 proj_modules = [m for m in modules if str(m.get("projId")) == str(project["_id"])]
                 if proj_modules:
                     for module in proj_modules:
                         module_name = module["moduleName"]
                         user_sentences[email]["Modules & Tasks"].append(f"In project {proj_name}, module {module_name} exists.")
                         # Find tasks in this module assigned to the user
                         assigned_tasks = [
                             task for task in module.get("tasks", [])
                             if any(a["email"] == email for a in task.get("assignedTo", []))
                         ]
                         if assigned_tasks:
-                            task_names = ", ".join(f'"{t["taskName"]}"' for t in assigned_tasks)
-                            user_sentences[email]["Modules & Tasks"].append(f"Tasks assigned to {username} in {module_name}: {task_names}.")
                 # Find documents in this project
                 proj_docs = [d for d in documents if str(d.get("owner", {}).get("projId")) == str(project["_id"])]
                 if proj_docs:
                     doc_names = ", ".join(f'"{d["title"]}"' for d in proj_docs)
                     user_sentences[email]["Documents"].append(f"Documents related to project {proj_name}: {doc_names}.")
         # Find meeting schedules related to their teams
         user_schedules = [s for s in schedules if str(s.get("teamId")) in team_ids]
         if user_schedules:
             for schedule in user_schedules:
-                schedule_detail = f'{schedule["moto"]} scheduled on {schedule["date"]} at {schedule["time"]}.'
                 user_sentences[email]["Schedules"].append(schedule_detail)
     return user_sentences
-def update_faiss_index(user_sentences):
-    # Your existing FAISS logic here...
-    faiss_indices = {}
     for email, categories in user_sentences.items():
-        sentences = sum(categories.values(), [])
-        if not sentences:
-            continue
-        embeddings = model.encode(sentences, convert_to_numpy=True)
-        embedding_dim = embeddings.shape[1]
-        index = faiss.IndexFlatL2(embedding_dim)
-        index.add(embeddings)
-        faiss_indices[email] = {"index": index, "sentences": sentences}
-    return faiss_indices
-def get_relevant_sentences(email, query, faiss_indices):
-    # Your FAISS query logic here...
-    if email not in faiss_indices:
         return ["User not found or no data available."]
-    index_data = faiss_indices[email]
-    index = index_data["index"]
-    sentences = index_data["sentences"]
     # Compute query embedding
     query_embedding = model.encode([query], convert_to_numpy=True)
-    k = 100
     distances, indices = index.search(query_embedding, k)
-    # Filter sentences based on FAISS similarity threshold
     threshold = 1.5
-    filtered_sentences = [sentences[idx] for dist, idx in zip(distances[0], indices[0]) if dist < threshold]
-    return filtered_sentences if filtered_sentences else ["No relevant information found."]
 def generate_response(email, query):
-    filtered_sentences = get_relevant_sentences(email, query, faiss_indices)
-    prompt = f"Query: {query}\n\n" + "\n".join(filtered_sentences)
     model = genai.GenerativeModel("gemini-1.5-flash")
     response = model.generate_content(prompt)
-    return response.text
 @app.post("/chat")
 async def chat(request: QueryRequest):
@@ -180,4 +304,11 @@ def home():
 faiss_indices = update_faiss_index(generate_sentences(fetch_latest_data()))
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import numpy as np
 from sentence_transformers import SentenceTransformer
 from pymongo import MongoClient
+import threading
+import time
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
     email: str
     query: str
+# FAISS Index (Per User)
+user_indexes = {}  # Stores FAISS index per user {email: FAISS index}
+user_sentence_mapping = {}  # Maps user emails to (id, sentence) pairs
 def fetch_latest_data():
     return {
         "users": list(db.users.find()),
         "schedules": list(db.schedules.find())
     }
+def generate_sentences(data):
     users, teams, projects, modules, documents, schedules = (
+        data["users"], data["teams"], data["projects"], data["modules"], data["documents"], data["schedules"]
     )
     user_sentences = {}  # Store categorized sentences per user
     for user in users:
         username = user.get("username", "Unknown User")
         email = user.get("email", "Unknown Email")
         if email not in user_sentences:
             user_sentences[email] = {
                 "Teams": [],
                 "Documents": [],
                 "Schedules": []
             }
+            if not any(user_sentences[email].values()):  # No data found for this user
+                user_sentences[email]["General"] = [f"User {username} is registered but has no assigned data."]
         # User team ownership and membership
         owned_teams = [team for team in teams if team.get("owner", {}).get("email") == email]
         if owned_teams:
             team_names = ", ".join(f'"{team["teamName"]}"' for team in owned_teams)
             user_sentences[email]["Teams"].append(f"User {username} owns the teams: {team_names}.")
         member_teams = [team for team in teams if any(m["email"] == email for m in team.get("members", []))]
         if member_teams:
             team_names = ", ".join(f'"{team["teamName"]}"' for team in member_teams)
             user_sentences[email]["Teams"].append(f"User {username} is a member of the teams: {team_names}.")
         # Find projects in teams they own or are part of
         relevant_teams = owned_teams + member_teams
         team_ids = [str(team["_id"]) for team in relevant_teams]
         user_projects = [p for p in projects if str(p.get("owner", {}).get("teamId")) in team_ids]
         if user_projects:
             for project in user_projects:
                 proj_name = project["projName"]
+                team_creator = next((t["teamName"] for t in teams if str(t["_id"]) == str(project.get("owner", {}).get("teamId"))), "Unknown Team")
+                user_sentences[email]["Projects"].append(f"User {username} is involved in project {proj_name}, created by team {team_creator}.")
                 # Find modules under this project
                 proj_modules = [m for m in modules if str(m.get("projId")) == str(project["_id"])]
                 if proj_modules:
                     for module in proj_modules:
                         module_name = module["moduleName"]
                         user_sentences[email]["Modules & Tasks"].append(f"In project {proj_name}, module {module_name} exists.")
                         # Find tasks in this module assigned to the user
                         assigned_tasks = [
                             task for task in module.get("tasks", [])
                             if any(a["email"] == email for a in task.get("assignedTo", []))
                         ]
                         if assigned_tasks:
+                            task_details = ", ".join(f'"{t["taskName"]}" (Status: {t.get("status", "Unknown")})' for t in assigned_tasks)
+                            user_sentences[email]["Modules & Tasks"].append(f"Tasks assigned to {username} in {module_name}: {task_details}.")
                 # Find documents in this project
                 proj_docs = [d for d in documents if str(d.get("owner", {}).get("projId")) == str(project["_id"])]
                 if proj_docs:
                     doc_names = ", ".join(f'"{d["title"]}"' for d in proj_docs)
                     user_sentences[email]["Documents"].append(f"Documents related to project {proj_name}: {doc_names}.")
         # Find meeting schedules related to their teams
         user_schedules = [s for s in schedules if str(s.get("teamId")) in team_ids]
         if user_schedules:
             for schedule in user_schedules:
+                related_team = next((t["teamName"] for t in teams if str(t["_id"]) == str(schedule.get("teamId"))), "Unknown Team")
+                related_project = next((p["projName"] for p in projects if str(p["_id"]) == str(schedule.get("projId"))), "Unknown Project")
+                schedule_detail = f'{schedule["moto"]} scheduled on {schedule["date"]} at {schedule["time"]} for team {related_team} in project {related_project}.'
                 user_sentences[email]["Schedules"].append(schedule_detail)
     return user_sentences
+def fetch_initial_data():
+    data = {
+        "users": list(db.users.find()),
+        "teams": list(db.teams.find()),
+        "projects": list(db.projects.find()),
+        "modules": list(db.modules.find()),
+        "documents": list(db.documents.find()),
+        "schedules": list(db.schedules.find())
+    }
+    user_sentences = generate_sentences(data)
+    user_count = 0  # Track users added to FAISS
     for email, categories in user_sentences.items():
+        sentences = sum(categories.values(), [])  # Flatten categorized sentences
+        print(f"User: {email}, Sentences Count: {len(sentences)}")  # Debugging Output
+        if sentences:
+            user_count += 1
+            embedding_dim = model.get_sentence_embedding_dimension()
+            user_indexes[email] = faiss.IndexFlatL2(embedding_dim)
+            embeddings = model.encode(sentences, convert_to_numpy=True)
+            user_indexes[email].add(embeddings)
+            user_sentence_mapping[email] = [(idx, s) for idx, s in enumerate(sentences)]
+    print(f"Total Users Indexed in FAISS: {user_count} / {len(data['users'])}")
+def update_user_embeddings(email):
+    """
+    Regenerate structured sentences for the user, update FAISS index.
+    """
+    data = {
+        "users": list(db.users.find({"email": email})),
+        "teams": list(db.teams.find()),
+        "projects": list(db.projects.find()),
+        "modules": list(db.modules.find()),
+        "documents": list(db.documents.find()),
+        "schedules": list(db.schedules.find())
+    }
+    user_sentences = generate_sentences(data)
+    if email in user_sentences:
+        sentences = sum(user_sentences[email].values(), [])  # Flatten structured sentences
+        if sentences:
+            embeddings = model.encode(sentences, convert_to_numpy=True)
+            embedding_dim = model.get_sentence_embedding_dimension()
+            # Rebuild FAISS index for this user
+            user_indexes[email] = faiss.IndexFlatL2(embedding_dim)
+            user_indexes[email].add(embeddings)
+            user_sentence_mapping[email] = [(idx, s) for idx, s in enumerate(sentences)]
+            print(f"Updated embeddings for {email}. Total sentences: {len(sentences)}")
+def watch_changes():
+    """Monitor MongoDB for changes, identify affected users, and update embeddings dynamically."""
+    print("Watching MongoDB for changes...")
+    while True:
+        try:
+            with db.watch() as stream:  # Watch the entire database
+                for change in stream:
+                    print("Detected Change:", change)  # Debugging print
+                    operation = change["operationType"]
+                    collection_name = change["ns"]["coll"]  # Get the collection that changed
+                    doc_id = change["documentKey"]["_id"]
+                    emails = set()  # Store affected user emails
+                    # Fetch user email based on the collection that was updated
+                    if collection_name == "users":
+                        full_doc = change.get("fullDocument", {})
+                        if full_doc and "email" in full_doc:
+                            emails.add(full_doc["email"])
+                    elif collection_name == "teams":
+                        team_doc = db.teams.find_one({"_id": doc_id})
+                        if team_doc and "owner" in team_doc:
+                            emails.add(team_doc["owner"].get("email"))
+                    elif collection_name == "projects":
+                        project_doc = db.projects.find_one({"_id": doc_id})
+                        if project_doc and "owner" in project_doc:
+                            emails.add(project_doc["owner"].get("email"))
+                    elif collection_name == "modules":
+                        module_doc = db.modules.find_one({"_id": doc_id})
+                        if module_doc:
+                            # Fetch users assigned to the module
+                            for user in module_doc.get("assignedTo", []):
+                                if "email" in user:
+                                    emails.add(user["email"])
+                    elif collection_name == "documents":
+                        doc = db.documents.find_one({"_id": doc_id})
+                        if doc and "owner" in doc:
+                            emails.add(doc["owner"].get("email"))
+                    elif collection_name == "schedules":
+                        schedule_doc = db.schedules.find_one({"_id": doc_id})
+                        if schedule_doc:
+                            team_id = schedule_doc.get("teamId")
+                            team_doc = db.teams.find_one({"_id": team_id})
+                            if team_doc and "owner" in team_doc:
+                                emails.add(team_doc["owner"].get("email"))
+                    if emails:
+                        for email in emails:
+                            print(f"Detected {operation} for user: {email}")
+                            if operation in ["insert", "update", "delete"]:
+                                update_user_embeddings(email)
+                                print(f"Updated user {email} due to {operation} operation.")
+                    else:
+                        print(f"Change detected in {collection_name}, but no associated email found.")
+        except Exception as e:
+            print(f"Error in watch_changes(): {e}")
+            print("Reconnecting to MongoDB Change Stream in 5 seconds...")
+            time.sleep(5)  # Prevent infinite error loops
+def get_relevant_sentences(email, query):
+    """Retrieve relevant sentences using FAISS for the given user and query."""
+    if email not in user_indexes:
         return ["User not found or no data available."]
+    index = user_indexes[email]  # FAISS index for the user
+    sentence_data = user_sentence_mapping.get(email, [])  # Sentence mapping
+    if not sentence_data:
+        return ["No stored sentences for this user."]
     # Compute query embedding
     query_embedding = model.encode([query], convert_to_numpy=True)
+    # Perform FAISS search (top-k nearest neighbors)
+    k = min(100, len(sentence_data))  # Limit k to available sentences
     distances, indices = index.search(query_embedding, k)
+    # Set a similarity threshold to filter results
     threshold = 1.5
+    relevant_sentences = [sentence_data[idx][1] for dist, idx in zip(distances[0], indices[0]) if dist < threshold]
+    return relevant_sentences if relevant_sentences else ["No relevant information found."]
 def generate_response(email, query):
+    """Generate a natural language response using Gemini based on FAISS search results."""
+    relevant_sentences = get_relevant_sentences(email, query)
+    # Construct prompt using relevant sentences
+    prompt = f"Query: {query}\nContext:\n" + "\n".join(relevant_sentences) + "\nAnswer in a natural way."
+    # Use Gemini API to generate response
     model = genai.GenerativeModel("gemini-1.5-flash")
     response = model.generate_content(prompt)
+    return response.text if response.text else "I'm unable to find relevant information."
 @app.post("/chat")
 async def chat(request: QueryRequest):
 faiss_indices = update_faiss_index(generate_sentences(fetch_latest_data()))
 if __name__ == "__main__":
+    # Fetch initial data for FAISS indexing
+    fetch_initial_data()
+    # Start watching for real-time changes in a separate thread
+    threading.Thread(target=watch_changes, daemon=True).start()
+    print(f"Active Threads: {threading.active_count()}")  # Debugging thread count
     uvicorn.run(app, host="0.0.0.0", port=7860)