rairo commited on
Commit
115c3fc
·
verified ·
1 Parent(s): 125bda3

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +120 -106
main.py CHANGED
@@ -16,145 +16,154 @@ from dotenv import load_dotenv
16
  from firebase_admin import credentials, firestore, storage
17
  from google import genai
18
 
 
 
 
 
 
 
 
 
 
 
 
19
  load_dotenv()
20
 
21
- # --------- Flask & Firebase Setup ---------
22
  app = Flask(__name__)
23
  CORS(app)
24
 
25
- # Initialize Firebase with Firestore + Storage
26
  cred_json = os.environ.get("FIREBASE")
27
  if not cred_json:
28
  raise RuntimeError("Missing FIREBASE env var")
29
  cred = credentials.Certificate(json.loads(cred_json))
30
- firebase_admin.initialize_app(cred, {
31
- "storageBucket": os.environ.get("Firebase_Storage")
32
- })
33
- fs = firestore.client()
34
- bucket = storage.bucket()
35
 
36
- # --------- Google GenAI Client ---------
37
  client = genai.Client(api_key=os.getenv("Gemini"))
38
 
39
- # --------- FAISS Cache Paths (unchanged) ---------
40
  INDEX_PATH = "vector.index"
41
- DOCS_PATH = "documents.pkl"
42
-
43
-
44
 
45
- # --------- Fetch & Summarize Firestore Docs ---------
46
- def fetch_documents() -> list[str]:
47
- docs: list[str] = []
48
 
49
- # 1) Participants
50
  for snap in fs.collection("participants").stream():
51
  d = snap.to_dict()
52
- name = d.get('name', 'Unknown Participant')
53
- ent = d.get('enterpriseName', 'Unknown Enterprise')
54
- sector = d.get('sector', 'Unknown Sector')
55
- stage = d.get('stage', 'Unknown Stage')
56
- devtype = d.get('developmentType', 'Unknown Type')
57
- docs.append(
58
- f"{name} ({ent}), sector: {sector}, stage: {stage}, type: {devtype}."
59
- )
60
-
61
- # 2) Interventions
62
- for snap in fs.collection("interventions").stream():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  d = snap.to_dict()
64
- area = d.get('area', 'General')
65
- for item in d.get('interventions', []):
66
- title = item.get('title')
67
- if title:
68
- docs.append(f"Intervention: {title} under {area}.")
 
 
 
69
 
70
- # 3) Feedbacks
71
  for snap in fs.collection("feedbacks").stream():
72
  d = snap.to_dict()
73
- intervention = d.get('interventionTitle', 'Unknown Intervention')
74
- smeName = d.get('smeName', 'Unknown SME')
75
- comment = d.get('comment')
 
76
  if comment:
77
- docs.append(f"Feedback on {intervention} by {smeName}: {comment}")
78
 
79
- # 4) Compliance Documents
80
  for snap in fs.collection("complianceDocuments").stream():
81
  d = snap.to_dict()
82
- pName = d.get('participantName', 'Unknown Participant')
83
- docType = d.get('documentType', 'Unknown Type')
84
- status = d.get('status', 'Unknown Status')
85
- expiry = d.get('expiryDate', 'Unknown Expiry')
86
- docs.append(
87
- f"Compliance document '{docType}' for {pName} is {status} (expires {expiry})."
88
- )
89
-
90
- # 5) Assigned Interventions
91
- for snap in fs.collection("assignedInterventions").stream():
92
- d = snap.to_dict()
93
- title = d.get('interventionTitle', 'Unknown Intervention')
94
- smeName = d.get('smeName', 'Unknown SME')
95
- cons = d.get('consultantId', 'Unknown Consultant')
96
- status = d.get('status', 'Unknown Status')
97
- docs.append(
98
- f"Assigned intervention '{title}' for {smeName} by consultant {cons} ({status})."
99
- )
100
-
101
- # 6) Consultants
102
- for snap in fs.collection("consultants").stream():
103
- d = snap.to_dict()
104
- name = d.get('name', 'Unknown Consultant')
105
- expertise= d.get('expertise', [])
106
- rating = d.get('rating')
107
- exp_txt = ", ".join(expertise) if expertise else "no listed expertise"
108
- rating_txt = f"rating {rating}" if rating is not None else "no rating"
109
- docs.append(f"Consultant {name} with expertise in {exp_txt} and {rating_txt}.")
110
 
111
  return docs
112
 
113
- # --------- Embedding Helper ---------
114
  def get_embeddings(texts: list[str]) -> list[list[float]]:
115
- resp = client.models.embed_content(
116
- model="text-embedding-004",
117
- contents=texts
118
- # , config=types.EmbedContentConfig(output_dimensionality=512)
119
- )
120
  return [emb.values for emb in resp.embeddings]
121
 
122
- # --------- Build or Load FAISS Index ---------
123
- def build_or_load_index():
124
- if os.path.exists(INDEX_PATH) and os.path.exists(DOCS_PATH):
125
- with open(DOCS_PATH, "rb") as f:
126
- documents = pickle.load(f)
127
- index = faiss.read_index(INDEX_PATH)
128
- else:
129
- documents = fetch_documents()
130
- embs = np.array(get_embeddings(documents), dtype="float32")
131
- dim = embs.shape[1]
132
- index = faiss.IndexFlatIP(dim)
133
- index.add(embs)
134
- # cache to disk
135
- with open(DOCS_PATH, "wb") as f:
136
- pickle.dump(documents, f)
137
- faiss.write_index(index, INDEX_PATH)
138
- return documents, index
139
-
140
- documents, index = build_or_load_index()
141
-
142
- # --------- RAG Chat Helper ---------
143
- def retrieve_and_respond(user_query: str, top_k: int = 3) -> str:
144
- # 1) Embed query
145
  q_emb = np.array(get_embeddings([user_query]), dtype="float32")
146
- # 2) Search index
147
- _, idxs = index.search(q_emb, top_k)
148
- ctx = "\n\n".join(documents[i] for i in idxs[0])
149
- # 3) Build prompt
150
  prompt = f"Use the context below to answer:\n\n{ctx}\n\nQuestion: {user_query}\nAnswer:"
151
- # 4) Chat
152
- chat = client.chats.create(model="gemini-2.0-flash-thinking-exp")
153
  resp = chat.send_message(prompt)
154
  return resp.text
155
 
156
 
157
-
158
  # --------- Helpers for Bank-Statement Processing ---------
159
 
160
  def read_pdf_pages(file_obj):
@@ -250,13 +259,18 @@ def process_pdf_pages(pdf_file):
250
  def chat_endpoint():
251
  data = request.get_json(force=True)
252
  q = data.get("user_query")
253
- if not q:
254
- return jsonify({"error": "Missing user_query"}), 400
 
 
 
 
255
  try:
256
- return jsonify({"reply": retrieve_and_respond(q)})
 
257
  except Exception as e:
258
  return jsonify({"error": str(e)}), 500
259
-
260
  # --------- Endpoint: Upload & Store Bank Statements ---------
261
 
262
  @app.route("/upload_statements", methods=["POST"])
 
16
  from firebase_admin import credentials, firestore, storage
17
  from google import genai
18
 
19
+ import os
20
+ import json
21
+ import pickle
22
+ import numpy as np
23
+ from flask import Flask, request, jsonify
24
+ from flask_cors import CORS
25
+ from dotenv import load_dotenv
26
+ from firebase_admin import credentials, firestore, storage, initialize_app
27
+ from google import genai
28
+ import faiss
29
+
30
  load_dotenv()
31
 
32
+ # --- Flask Setup ---
33
  app = Flask(__name__)
34
  CORS(app)
35
 
36
+ # --- Firebase Initialization ---
37
  cred_json = os.environ.get("FIREBASE")
38
  if not cred_json:
39
  raise RuntimeError("Missing FIREBASE env var")
40
  cred = credentials.Certificate(json.loads(cred_json))
41
+ initialize_app(cred, {"storageBucket": os.environ.get("Firebase_Storage")})
42
+
43
+ fs = firestore.client()
44
+ bucket = storage.bucket()
 
45
 
46
+ # --- Gemini Client ---
47
  client = genai.Client(api_key=os.getenv("Gemini"))
48
 
49
+ # --- FAISS Setup ---
50
  INDEX_PATH = "vector.index"
51
+ DOCS_PATH = "documents.pkl"
 
 
52
 
53
+ # --- Role-Aware Firestore Fetch ---
54
+ def fetch_documents(role: str, user_id: str) -> list[str]:
55
+ docs = []
56
 
57
+ # 1) participants
58
  for snap in fs.collection("participants").stream():
59
  d = snap.to_dict()
60
+ owner_id = snap.id
61
+ if role == "incubatee" and owner_id != user_id:
62
+ continue
63
+ if role == "consultant" and user_id not in d.get("assignedConsultants", []):
64
+ continue
65
+ name = d.get('beneficiaryName', 'Unknown')
66
+ ent = d.get('enterpriseName', 'Unknown')
67
+ sector = d.get('sector', 'Unknown')
68
+ stage = d.get('stage', 'Unknown')
69
+ devtype = d.get('developmentType', 'Unknown')
70
+ docs.append(f"{name} ({ent}), sector: {sector}, stage: {stage}, type: {devtype}.")
71
+
72
+ # 2) consultants
73
+ for snap in fs.collection("consultants").stream():
74
+ d = snap.to_dict()
75
+ if role == "consultant" and snap.id != user_id:
76
+ continue
77
+ name = d.get("name", "Unknown")
78
+ expertise = ", ".join(d.get("expertise", [])) or "no listed expertise"
79
+ rating = d.get("rating", "no rating")
80
+ docs.append(f"Consultant {name} with expertise in {expertise} and rating {rating}.")
81
+
82
+ # 3) programs
83
+ if role in ["admin", "operations", "funder", "incubatee"]:
84
+ for snap in fs.collection("programs").stream():
85
+ d = snap.to_dict()
86
+ docs.append(f"Program {d.get('name')} ({d.get('status')}): {d.get('type')} - Budget {d.get('budget')}")
87
+
88
+ # 4) interventions
89
+ if role in ["admin", "operations", "incubatee"]:
90
+ for snap in fs.collection("interventions").stream():
91
+ d = snap.to_dict()
92
+ for item in d.get('interventions', []):
93
+ title = item.get("title")
94
+ area = d.get("areaOfSupport", "General")
95
+ if title:
96
+ docs.append(f"Intervention: {title} under {area}.")
97
+
98
+ # 5) assignedInterventions
99
+ for snap in fs.collection("assignedInterventions").stream():
100
  d = snap.to_dict()
101
+ if role == "consultant" and user_id not in d.get("consultantId", []):
102
+ continue
103
+ if role == "incubatee" and d.get("participantId") != user_id:
104
+ continue
105
+ title = d.get("interventionTitle", "Unknown")
106
+ sme = d.get("smeName", "Unknown")
107
+ status = d.get("status", "Unknown")
108
+ docs.append(f"Assigned intervention '{title}' for {sme} ({status})")
109
 
110
+ # 6) feedbacks
111
  for snap in fs.collection("feedbacks").stream():
112
  d = snap.to_dict()
113
+ if role == "consultant" and d.get("consultantId") != user_id:
114
+ continue
115
+ intervention = d.get("interventionTitle", "Unknown")
116
+ comment = d.get("comment")
117
  if comment:
118
+ docs.append(f"Feedback on {intervention}: {comment}")
119
 
120
+ # 7) complianceDocuments
121
  for snap in fs.collection("complianceDocuments").stream():
122
  d = snap.to_dict()
123
+ if role == "incubatee" and d.get("participantId") != user_id:
124
+ continue
125
+ docs.append(f"Compliance document '{d.get('documentType')}' for {d.get('participantName')} is {d.get('status')} (expires {d.get('expiryDate')})")
126
+
127
+ # 8) interventionDatabase
128
+ if role in ["admin", "operations", "director", "funder"]:
129
+ for snap in fs.collection("interventionDatabase").stream():
130
+ d = snap.to_dict()
131
+ title = d.get("interventionTitle", "Unknown")
132
+ status = d.get("status", "Unknown")
133
+ feedback = d.get("feedback", "")
134
+ docs.append(f"Finalized intervention '{title}' ({status}): {feedback}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  return docs
137
 
138
+ # --- Embedding ---
139
  def get_embeddings(texts: list[str]) -> list[list[float]]:
140
+ resp = client.models.embed_content(model="text-embedding-004", contents=texts)
 
 
 
 
141
  return [emb.values for emb in resp.embeddings]
142
 
143
+ # --- Dynamic Index ---
144
+ def build_faiss_index(docs: list[str]):
145
+ embs = np.array(get_embeddings(docs), dtype="float32")
146
+ dim = embs.shape[1]
147
+ index = faiss.IndexFlatIP(dim)
148
+ index.add(embs)
149
+ return index
150
+
151
+ # --- Retrieval Helper ---
152
+ def retrieve_and_respond(user_query: str, role: str, user_id: str) -> str:
153
+ docs = fetch_documents(role, user_id)
154
+ if not docs:
155
+ return "No relevant data found for your role or access level."
156
+
157
+ index = build_faiss_index(docs)
 
 
 
 
 
 
 
 
158
  q_emb = np.array(get_embeddings([user_query]), dtype="float32")
159
+ _, idxs = index.search(q_emb, 3)
160
+ ctx = "\n\n".join(docs[i] for i in idxs[0])
 
 
161
  prompt = f"Use the context below to answer:\n\n{ctx}\n\nQuestion: {user_query}\nAnswer:"
162
+ chat = client.chats.create(model="gemini-2.0-flash-thinking-exp")
 
163
  resp = chat.send_message(prompt)
164
  return resp.text
165
 
166
 
 
167
  # --------- Helpers for Bank-Statement Processing ---------
168
 
169
  def read_pdf_pages(file_obj):
 
259
  def chat_endpoint():
260
  data = request.get_json(force=True)
261
  q = data.get("user_query")
262
+ role = data.get("role")
263
+ user_id = data.get("user_id")
264
+
265
+ if not q or not role or not user_id:
266
+ return jsonify({"error": "Missing user_query, role, or user_id"}), 400
267
+
268
  try:
269
+ reply = retrieve_and_respond(q, role.lower(), user_id)
270
+ return jsonify({"reply": reply})
271
  except Exception as e:
272
  return jsonify({"error": str(e)}), 500
273
+
274
  # --------- Endpoint: Upload & Store Bank Statements ---------
275
 
276
  @app.route("/upload_statements", methods=["POST"])