alesamodio commited on
Commit
ee157dc
·
1 Parent(s): 3ced307

remove streamlit from retrieve

Browse files
Files changed (2) hide show
  1. Retrieve.py +10 -5
  2. socrates_core.py +34 -4
Retrieve.py CHANGED
@@ -36,6 +36,7 @@ def retrieve_chunks_from_vdb(
36
  use_metadata_boost: bool = False,
37
  filter_keywords: List[str] = None,
38
  topic: str = None,
 
39
  ) -> List[Dict]:
40
  """
41
  Download FAISS index from Supabase, search it, return top-k results.
@@ -94,7 +95,8 @@ def retrieve_all_chunks(
94
  k: int = 10,
95
  filter_keywords: List[str] = None,
96
  topic: str = None,
97
- topic_to_dbs: Dict[str, List[str]] = None
 
98
  ) -> List[Dict]:
99
  """
100
  Retrieve chunks across all Supabase sources (dbs + personal_info + chat_history).
@@ -136,11 +138,12 @@ def retrieve_all_chunks(
136
  query_vector=query_vector,
137
  model=model,
138
  query=query,
139
- username=username, # ✅ FIX
140
  k=k,
141
  use_metadata_boost=use_metadata_boost,
142
  filter_keywords=filter_keywords,
143
- topic = topic,
 
144
  )
145
 
146
  return all_chunks
@@ -151,7 +154,8 @@ def retrieve_from_db(
151
  query: str,
152
  model,
153
  username: str,
154
- k: int = 5
 
155
  ) -> List[Dict]:
156
  """
157
  Retrieve top-k chunks from a single Supabase FAISS db (e.g. db6).
@@ -163,7 +167,8 @@ def retrieve_from_db(
163
  model=model,
164
  query=query,
165
  username=username,
166
- k=k
 
167
  )
168
  # used in retrieve_chunks_from_vdb
169
  def compute_metadata_boost(metadata: Dict, query: str, filter_keywords: List[str] = None) -> float:
 
36
  use_metadata_boost: bool = False,
37
  filter_keywords: List[str] = None,
38
  topic: str = None,
39
+ db6_override_store: Optional[FAISS] = None, # NEW
40
  ) -> List[Dict]:
41
  """
42
  Download FAISS index from Supabase, search it, return top-k results.
 
95
  k: int = 10,
96
  filter_keywords: List[str] = None,
97
  topic: str = None,
98
+ topic_to_dbs: Dict[str, List[str]] = None,
99
+ db6_override_store: Optional[FAISS] = None # NEW
100
  ) -> List[Dict]:
101
  """
102
  Retrieve chunks across all Supabase sources (dbs + personal_info + chat_history).
 
138
  query_vector=query_vector,
139
  model=model,
140
  query=query,
141
+ username=username,
142
  k=k,
143
  use_metadata_boost=use_metadata_boost,
144
  filter_keywords=filter_keywords,
145
+ topic=topic,
146
+ db6_override_store=db6_override_store, # NEW: passed through
147
  )
148
 
149
  return all_chunks
 
154
  query: str,
155
  model,
156
  username: str,
157
+ k: int = 5,
158
+ db6_override_store: Optional[FAISS] = None, # NEW (optional)
159
  ) -> List[Dict]:
160
  """
161
  Retrieve top-k chunks from a single Supabase FAISS db (e.g. db6).
 
167
  model=model,
168
  query=query,
169
  username=username,
170
+ k=k,
171
+ db6_override_store=db6_override_store, # pass through
172
  )
173
  # used in retrieve_chunks_from_vdb
174
  def compute_metadata_boost(metadata: Dict, query: str, filter_keywords: List[str] = None) -> float:
socrates_core.py CHANGED
@@ -1,6 +1,6 @@
1
  # socrates_core.py
2
  from typing import Dict, Any, Optional, List
3
-
4
  from supabase import create_client
5
  from langchain_huggingface import HuggingFaceEmbeddings
6
 
@@ -80,21 +80,51 @@ def socrates_reply(
80
  except Exception:
81
  emotion_result = None
82
 
 
83
  # ---- Classify message ----
84
  analysis = analyze_message(user_id, user_msg_en)
85
  story_topic = analysis.get("topic_for_story")
 
 
 
 
 
86
 
87
- # ---- Retrieval ----
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  chunks = retrieve_all_chunks(
89
  query=user_msg_en,
90
  model=embeddings,
91
  user_id=user_id,
92
  username=username,
93
  k=10,
94
- topic=analysis.get("topic"),
95
  topic_to_dbs=TOPIC_TO_DBS,
96
  )
97
-
98
  # ---- Story / anecdote selection ----
99
  socratic_story_dic = pick_story_with_fallback(user_id, story_topic)
100
  socratic_story_formatted = build_story_txt(
 
1
  # socrates_core.py
2
  from typing import Dict, Any, Optional, List
3
+ from db7_pipeline import run_db7_pipeline
4
  from supabase import create_client
5
  from langchain_huggingface import HuggingFaceEmbeddings
6
 
 
80
  except Exception:
81
  emotion_result = None
82
 
83
+ # ---- Classify message ----
84
  # ---- Classify message ----
85
  analysis = analyze_message(user_id, user_msg_en)
86
  story_topic = analysis.get("topic_for_story")
87
+ topic = analysis.get("topic")
88
+ needs_news_fetch = analysis.get("needs_news_fetch", False)
89
+
90
+ # ---- Optional db7 news pipeline (build fresh db6) ----
91
+ db7_result = None # we just store it for now, not used in retrieval yet
92
 
93
+ try:
94
+ # Very simple condition to start with:
95
+ # later you can refine with needs_news_fetch or news_type
96
+ if topic == "news" and needs_news_fetch:
97
+ db7_result = run_db7_pipeline(
98
+ news_topic=[user_msg_en], # or a keyword list, we can refine later
99
+ user_query=user_msg_en,
100
+ username=username,
101
+ user_id=user_id,
102
+ user_countries=profile.get("countries_of_interest", []),
103
+ user_lang=user_lang_code,
104
+ )
105
+ # db7_result is a dict like:
106
+ # {
107
+ # "status": "ok",
108
+ # "keywords": ...,
109
+ # "articles": ...,
110
+ # "top_full": ...,
111
+ # "faiss_object": <FAISS store for updated db6>
112
+ # }
113
+ except Exception as e:
114
+ print(f"⚠️ db7 pipeline failed or not configured: {e}")
115
+ db7_result = None
116
+
117
+ # ---- Retrieval (still old behaviour for now) ----
118
  chunks = retrieve_all_chunks(
119
  query=user_msg_en,
120
  model=embeddings,
121
  user_id=user_id,
122
  username=username,
123
  k=10,
124
+ topic=topic,
125
  topic_to_dbs=TOPIC_TO_DBS,
126
  )
127
+
128
  # ---- Story / anecdote selection ----
129
  socratic_story_dic = pick_story_with_fallback(user_id, story_topic)
130
  socratic_story_formatted = build_story_txt(