Spaces:

Heng2004
/

Laos-Natural-Science-Chatbot

Running

App Files Files Community

Heng2004 commited on about 16 hours ago

Commit

581710d

verified ·

1 Parent(s): 6dbe87c

Update model_utils.py

Browse files

Files changed (1) hide show

model_utils.py +29 -5

model_utils.py CHANGED Viewed

@@ -185,12 +185,35 @@ def retrieve_context(question: str, max_entries: int = MAX_CONTEXT_ENTRIES) -> s
 def answer_from_glossary(message: str) -> Optional[str]:
     """
     Try to answer using the glossary index.
-    Returns Lao answer string or None if not confident.
     """
-    if not getattr(qa_store, "GLOSSARY", None) or qa_store.GLOSSARY_EMBEDDINGS is None:
         return None
-    # Encode question
     q_emb = embed_model.encode(
         [message],
         convert_to_numpy=True,
@@ -201,8 +224,9 @@ def answer_from_glossary(message: str) -> Optional[str]:
     best_idx = int(np.argmax(sims))
     best_sim = float(sims[best_idx])
-    # tune this threshold later if needed
-    if best_sim < 0.55:
         return None
     item = qa_store.GLOSSARY[best_idx]

 def answer_from_glossary(message: str) -> Optional[str]:
     """
     Try to answer using the glossary index.
+    Priority 1: Exact string match of the Term inside the user's message.
+    Priority 2: Vector embedding match (if confidence is high).
     """
+    if not getattr(qa_store, "GLOSSARY", None):
+        return None
+    # --- FIX START: Check for EXACT term match first ---
+    # This fixes the issue where "What is Science" matches "Pollution"
+    # just because "Pollution" definition contains the word "Science".
+    normalized_msg = message.lower().strip()
+    for item in qa_store.GLOSSARY:
+        term = item.get("term", "").lower().strip()
+        # If the specific term appears in the message (e.g. "Science" in "What is Science?")
+        if term and term in normalized_msg:
+            # Optional: Check if the message is SHORT (so we don't trigger on long sentences accidentally)
+            if len(normalized_msg) < len(term) + 20:
+                definition = item.get("definition", "").strip()
+                example = item.get("example", "").strip()
+                if example:
+                    return f"{definition} ຕົວຢ່າງ: {example}"
+                return definition
+    # --- FIX END ---
+    # If no exact text match, proceed to Vector Similarity (the old code)
+    if qa_store.GLOSSARY_EMBEDDINGS is None:
         return None
     q_emb = embed_model.encode(
         [message],
         convert_to_numpy=True,
     best_idx = int(np.argmax(sims))
     best_sim = float(sims[best_idx])
+    # INCREASE THRESHOLD:
+    # Raised from 0.55 to 0.65 to prevent weak matches (like Science matching Pollution)
+    if best_sim < 0.65:
         return None
     item = qa_store.GLOSSARY[best_idx]