Heng2004 commited on
Commit
581710d
·
verified ·
1 Parent(s): 6dbe87c

Update model_utils.py

Browse files
Files changed (1) hide show
  1. model_utils.py +29 -5
model_utils.py CHANGED
@@ -185,12 +185,35 @@ def retrieve_context(question: str, max_entries: int = MAX_CONTEXT_ENTRIES) -> s
185
  def answer_from_glossary(message: str) -> Optional[str]:
186
  """
187
  Try to answer using the glossary index.
188
- Returns Lao answer string or None if not confident.
 
189
  """
190
- if not getattr(qa_store, "GLOSSARY", None) or qa_store.GLOSSARY_EMBEDDINGS is None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  return None
192
 
193
- # Encode question
194
  q_emb = embed_model.encode(
195
  [message],
196
  convert_to_numpy=True,
@@ -201,8 +224,9 @@ def answer_from_glossary(message: str) -> Optional[str]:
201
  best_idx = int(np.argmax(sims))
202
  best_sim = float(sims[best_idx])
203
 
204
- # tune this threshold later if needed
205
- if best_sim < 0.55:
 
206
  return None
207
 
208
  item = qa_store.GLOSSARY[best_idx]
 
185
  def answer_from_glossary(message: str) -> Optional[str]:
186
  """
187
  Try to answer using the glossary index.
188
+ Priority 1: Exact string match of the Term inside the user's message.
189
+ Priority 2: Vector embedding match (if confidence is high).
190
  """
191
+ if not getattr(qa_store, "GLOSSARY", None):
192
+ return None
193
+
194
+ # --- FIX START: Check for EXACT term match first ---
195
+ # This fixes the issue where "What is Science" matches "Pollution"
196
+ # just because "Pollution" definition contains the word "Science".
197
+
198
+ normalized_msg = message.lower().strip()
199
+
200
+ for item in qa_store.GLOSSARY:
201
+ term = item.get("term", "").lower().strip()
202
+ # If the specific term appears in the message (e.g. "Science" in "What is Science?")
203
+ if term and term in normalized_msg:
204
+ # Optional: Check if the message is SHORT (so we don't trigger on long sentences accidentally)
205
+ if len(normalized_msg) < len(term) + 20:
206
+ definition = item.get("definition", "").strip()
207
+ example = item.get("example", "").strip()
208
+ if example:
209
+ return f"{definition} ຕົວຢ່າງ: {example}"
210
+ return definition
211
+ # --- FIX END ---
212
+
213
+ # If no exact text match, proceed to Vector Similarity (the old code)
214
+ if qa_store.GLOSSARY_EMBEDDINGS is None:
215
  return None
216
 
 
217
  q_emb = embed_model.encode(
218
  [message],
219
  convert_to_numpy=True,
 
224
  best_idx = int(np.argmax(sims))
225
  best_sim = float(sims[best_idx])
226
 
227
+ # INCREASE THRESHOLD:
228
+ # Raised from 0.55 to 0.65 to prevent weak matches (like Science matching Pollution)
229
+ if best_sim < 0.65:
230
  return None
231
 
232
  item = qa_store.GLOSSARY[best_idx]