Spaces:
Sleeping
Sleeping
Commit ·
ee157dc
1
Parent(s): 3ced307
remove streamlit from retrieve
Browse files- Retrieve.py +10 -5
- socrates_core.py +34 -4
Retrieve.py
CHANGED
|
@@ -36,6 +36,7 @@ def retrieve_chunks_from_vdb(
|
|
| 36 |
use_metadata_boost: bool = False,
|
| 37 |
filter_keywords: List[str] = None,
|
| 38 |
topic: str = None,
|
|
|
|
| 39 |
) -> List[Dict]:
|
| 40 |
"""
|
| 41 |
Download FAISS index from Supabase, search it, return top-k results.
|
|
@@ -94,7 +95,8 @@ def retrieve_all_chunks(
|
|
| 94 |
k: int = 10,
|
| 95 |
filter_keywords: List[str] = None,
|
| 96 |
topic: str = None,
|
| 97 |
-
topic_to_dbs: Dict[str, List[str]] = None
|
|
|
|
| 98 |
) -> List[Dict]:
|
| 99 |
"""
|
| 100 |
Retrieve chunks across all Supabase sources (dbs + personal_info + chat_history).
|
|
@@ -136,11 +138,12 @@ def retrieve_all_chunks(
|
|
| 136 |
query_vector=query_vector,
|
| 137 |
model=model,
|
| 138 |
query=query,
|
| 139 |
-
username=username,
|
| 140 |
k=k,
|
| 141 |
use_metadata_boost=use_metadata_boost,
|
| 142 |
filter_keywords=filter_keywords,
|
| 143 |
-
topic
|
|
|
|
| 144 |
)
|
| 145 |
|
| 146 |
return all_chunks
|
|
@@ -151,7 +154,8 @@ def retrieve_from_db(
|
|
| 151 |
query: str,
|
| 152 |
model,
|
| 153 |
username: str,
|
| 154 |
-
k: int = 5
|
|
|
|
| 155 |
) -> List[Dict]:
|
| 156 |
"""
|
| 157 |
Retrieve top-k chunks from a single Supabase FAISS db (e.g. db6).
|
|
@@ -163,7 +167,8 @@ def retrieve_from_db(
|
|
| 163 |
model=model,
|
| 164 |
query=query,
|
| 165 |
username=username,
|
| 166 |
-
k=k
|
|
|
|
| 167 |
)
|
| 168 |
# used in retrieve_chunks_from_vdb
|
| 169 |
def compute_metadata_boost(metadata: Dict, query: str, filter_keywords: List[str] = None) -> float:
|
|
|
|
| 36 |
use_metadata_boost: bool = False,
|
| 37 |
filter_keywords: List[str] = None,
|
| 38 |
topic: str = None,
|
| 39 |
+
db6_override_store: Optional[FAISS] = None, # NEW
|
| 40 |
) -> List[Dict]:
|
| 41 |
"""
|
| 42 |
Download FAISS index from Supabase, search it, return top-k results.
|
|
|
|
| 95 |
k: int = 10,
|
| 96 |
filter_keywords: List[str] = None,
|
| 97 |
topic: str = None,
|
| 98 |
+
topic_to_dbs: Dict[str, List[str]] = None,
|
| 99 |
+
db6_override_store: Optional[FAISS] = None # NEW
|
| 100 |
) -> List[Dict]:
|
| 101 |
"""
|
| 102 |
Retrieve chunks across all Supabase sources (dbs + personal_info + chat_history).
|
|
|
|
| 138 |
query_vector=query_vector,
|
| 139 |
model=model,
|
| 140 |
query=query,
|
| 141 |
+
username=username,
|
| 142 |
k=k,
|
| 143 |
use_metadata_boost=use_metadata_boost,
|
| 144 |
filter_keywords=filter_keywords,
|
| 145 |
+
topic=topic,
|
| 146 |
+
db6_override_store=db6_override_store, # NEW: passed through
|
| 147 |
)
|
| 148 |
|
| 149 |
return all_chunks
|
|
|
|
| 154 |
query: str,
|
| 155 |
model,
|
| 156 |
username: str,
|
| 157 |
+
k: int = 5,
|
| 158 |
+
db6_override_store: Optional[FAISS] = None, # NEW (optional)
|
| 159 |
) -> List[Dict]:
|
| 160 |
"""
|
| 161 |
Retrieve top-k chunks from a single Supabase FAISS db (e.g. db6).
|
|
|
|
| 167 |
model=model,
|
| 168 |
query=query,
|
| 169 |
username=username,
|
| 170 |
+
k=k,
|
| 171 |
+
db6_override_store=db6_override_store, # pass through
|
| 172 |
)
|
| 173 |
# used in retrieve_chunks_from_vdb
|
| 174 |
def compute_metadata_boost(metadata: Dict, query: str, filter_keywords: List[str] = None) -> float:
|
socrates_core.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# socrates_core.py
|
| 2 |
from typing import Dict, Any, Optional, List
|
| 3 |
-
|
| 4 |
from supabase import create_client
|
| 5 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 6 |
|
|
@@ -80,21 +80,51 @@ def socrates_reply(
|
|
| 80 |
except Exception:
|
| 81 |
emotion_result = None
|
| 82 |
|
|
|
|
| 83 |
# ---- Classify message ----
|
| 84 |
analysis = analyze_message(user_id, user_msg_en)
|
| 85 |
story_topic = analysis.get("topic_for_story")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
chunks = retrieve_all_chunks(
|
| 89 |
query=user_msg_en,
|
| 90 |
model=embeddings,
|
| 91 |
user_id=user_id,
|
| 92 |
username=username,
|
| 93 |
k=10,
|
| 94 |
-
topic=
|
| 95 |
topic_to_dbs=TOPIC_TO_DBS,
|
| 96 |
)
|
| 97 |
-
|
| 98 |
# ---- Story / anecdote selection ----
|
| 99 |
socratic_story_dic = pick_story_with_fallback(user_id, story_topic)
|
| 100 |
socratic_story_formatted = build_story_txt(
|
|
|
|
| 1 |
# socrates_core.py
|
| 2 |
from typing import Dict, Any, Optional, List
|
| 3 |
+
from db7_pipeline import run_db7_pipeline
|
| 4 |
from supabase import create_client
|
| 5 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 6 |
|
|
|
|
| 80 |
except Exception:
|
| 81 |
emotion_result = None
|
| 82 |
|
| 83 |
+
# ---- Classify message ----
|
| 84 |
# ---- Classify message ----
|
| 85 |
analysis = analyze_message(user_id, user_msg_en)
|
| 86 |
story_topic = analysis.get("topic_for_story")
|
| 87 |
+
topic = analysis.get("topic")
|
| 88 |
+
needs_news_fetch = analysis.get("needs_news_fetch", False)
|
| 89 |
+
|
| 90 |
+
# ---- Optional db7 news pipeline (build fresh db6) ----
|
| 91 |
+
db7_result = None # we just store it for now, not used in retrieval yet
|
| 92 |
|
| 93 |
+
try:
|
| 94 |
+
# Very simple condition to start with:
|
| 95 |
+
# later you can refine with needs_news_fetch or news_type
|
| 96 |
+
if topic == "news" and needs_news_fetch:
|
| 97 |
+
db7_result = run_db7_pipeline(
|
| 98 |
+
news_topic=[user_msg_en], # or a keyword list, we can refine later
|
| 99 |
+
user_query=user_msg_en,
|
| 100 |
+
username=username,
|
| 101 |
+
user_id=user_id,
|
| 102 |
+
user_countries=profile.get("countries_of_interest", []),
|
| 103 |
+
user_lang=user_lang_code,
|
| 104 |
+
)
|
| 105 |
+
# db7_result is a dict like:
|
| 106 |
+
# {
|
| 107 |
+
# "status": "ok",
|
| 108 |
+
# "keywords": ...,
|
| 109 |
+
# "articles": ...,
|
| 110 |
+
# "top_full": ...,
|
| 111 |
+
# "faiss_object": <FAISS store for updated db6>
|
| 112 |
+
# }
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print(f"⚠️ db7 pipeline failed or not configured: {e}")
|
| 115 |
+
db7_result = None
|
| 116 |
+
|
| 117 |
+
# ---- Retrieval (still old behaviour for now) ----
|
| 118 |
chunks = retrieve_all_chunks(
|
| 119 |
query=user_msg_en,
|
| 120 |
model=embeddings,
|
| 121 |
user_id=user_id,
|
| 122 |
username=username,
|
| 123 |
k=10,
|
| 124 |
+
topic=topic,
|
| 125 |
topic_to_dbs=TOPIC_TO_DBS,
|
| 126 |
)
|
| 127 |
+
|
| 128 |
# ---- Story / anecdote selection ----
|
| 129 |
socratic_story_dic = pick_story_with_fallback(user_id, story_topic)
|
| 130 |
socratic_story_formatted = build_story_txt(
|