Update model_utils.py
Browse files- model_utils.py +29 -5
model_utils.py
CHANGED
|
@@ -185,12 +185,35 @@ def retrieve_context(question: str, max_entries: int = MAX_CONTEXT_ENTRIES) -> s
|
|
| 185 |
def answer_from_glossary(message: str) -> Optional[str]:
|
| 186 |
"""
|
| 187 |
Try to answer using the glossary index.
|
| 188 |
-
|
|
|
|
| 189 |
"""
|
| 190 |
-
if not getattr(qa_store, "GLOSSARY", None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
return None
|
| 192 |
|
| 193 |
-
# Encode question
|
| 194 |
q_emb = embed_model.encode(
|
| 195 |
[message],
|
| 196 |
convert_to_numpy=True,
|
|
@@ -201,8 +224,9 @@ def answer_from_glossary(message: str) -> Optional[str]:
|
|
| 201 |
best_idx = int(np.argmax(sims))
|
| 202 |
best_sim = float(sims[best_idx])
|
| 203 |
|
| 204 |
-
#
|
| 205 |
-
|
|
|
|
| 206 |
return None
|
| 207 |
|
| 208 |
item = qa_store.GLOSSARY[best_idx]
|
|
|
|
| 185 |
def answer_from_glossary(message: str) -> Optional[str]:
|
| 186 |
"""
|
| 187 |
Try to answer using the glossary index.
|
| 188 |
+
Priority 1: Exact string match of the Term inside the user's message.
|
| 189 |
+
Priority 2: Vector embedding match (if confidence is high).
|
| 190 |
"""
|
| 191 |
+
if not getattr(qa_store, "GLOSSARY", None):
|
| 192 |
+
return None
|
| 193 |
+
|
| 194 |
+
# --- FIX START: Check for EXACT term match first ---
|
| 195 |
+
# This fixes the issue where "What is Science" matches "Pollution"
|
| 196 |
+
# just because "Pollution" definition contains the word "Science".
|
| 197 |
+
|
| 198 |
+
normalized_msg = message.lower().strip()
|
| 199 |
+
|
| 200 |
+
for item in qa_store.GLOSSARY:
|
| 201 |
+
term = item.get("term", "").lower().strip()
|
| 202 |
+
# If the specific term appears in the message (e.g. "Science" in "What is Science?")
|
| 203 |
+
if term and term in normalized_msg:
|
| 204 |
+
# Optional: Check if the message is SHORT (so we don't trigger on long sentences accidentally)
|
| 205 |
+
if len(normalized_msg) < len(term) + 20:
|
| 206 |
+
definition = item.get("definition", "").strip()
|
| 207 |
+
example = item.get("example", "").strip()
|
| 208 |
+
if example:
|
| 209 |
+
return f"{definition} ຕົວຢ່າງ: {example}"
|
| 210 |
+
return definition
|
| 211 |
+
# --- FIX END ---
|
| 212 |
+
|
| 213 |
+
# If no exact text match, proceed to Vector Similarity (the old code)
|
| 214 |
+
if qa_store.GLOSSARY_EMBEDDINGS is None:
|
| 215 |
return None
|
| 216 |
|
|
|
|
| 217 |
q_emb = embed_model.encode(
|
| 218 |
[message],
|
| 219 |
convert_to_numpy=True,
|
|
|
|
| 224 |
best_idx = int(np.argmax(sims))
|
| 225 |
best_sim = float(sims[best_idx])
|
| 226 |
|
| 227 |
+
# INCREASE THRESHOLD:
|
| 228 |
+
# Raised from 0.55 to 0.65 to prevent weak matches (like Science matching Pollution)
|
| 229 |
+
if best_sim < 0.65:
|
| 230 |
return None
|
| 231 |
|
| 232 |
item = qa_store.GLOSSARY[best_idx]
|