Spaces:

Heng2004
/

Laos-Natural-Science-Chatbot

Running

App Files Files Community

Heng2004 commited on 9 days ago

Commit

42ccf80

verified ·

1 Parent(s): 921357f

Update model_utils.py

Browse files

Files changed (1) hide show

model_utils.py +39 -17

model_utils.py CHANGED Viewed

@@ -101,11 +101,40 @@ def retrieve_context(question: str, max_entries: int = 2) -> str:
     return "\n\n".join(context_blocks)
-def build_prompt(question: str) -> str:
     context = retrieve_context(question)
     return f"""{SYSTEM_PROMPT}
-ຂໍ້ມູນອ້າງອີງ:
 {context}
 ຄຳຖາມ: {question}
@@ -113,8 +142,8 @@ def build_prompt(question: str) -> str:
 ຄຳຕອບດ້ວຍພາສາລາວ:"""
-def generate_answer(question: str) -> str:
-    prompt = build_prompt(question)
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
@@ -126,14 +155,11 @@ def generate_answer(question: str) -> str:
     generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
     answer = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
-    # ✅ Enforce 2–3 short sentences
-    # `re` is already imported at the top of this file
     sentences = re.split(r"(?<=[\.?!…])\s+", answer)
     short_answer = " ".join(sentences[:3]).strip()
     return short_answer if short_answer else answer
 def answer_from_qa(question: str) -> Optional[str]:
     """
@@ -179,16 +205,12 @@ def laos_history_bot(message: str, history: List) -> str:
     direct = answer_from_qa(message)
     if direct:
-        # later you can make this dynamic from the dataset
-        meta = "[ຊັ້ນ M1, ບົດ 1]"
-        return f"{meta} {direct}"
     try:
-        answer = generate_answer(message)
     except Exception as e:  # noqa: BLE001
         return f"ລະບົບມີບັນຫາ: {e}"
-    meta = "[ຊັ້ນ M1, ບົດ 1]"  # placeholder, later make dynamic
-    return f"{meta} {answer}"

     return "\n\n".join(context_blocks)
+def _format_history(history: Optional[List]) -> str:
+    """
+    Convert last few chat turns into a Lao conversation snippet
+    to give the model context for follow-up questions.
+    Gradio history format: [[user_msg, bot_msg], [user_msg, bot_msg], ...]
+    """
+    if not history:
+        return ""
+    # keep only the last 3 turns to avoid very long prompts
+    recent = history[-3:]
+    lines = []
+    for turn in recent:
+        if not isinstance(turn, (list, tuple)) or len(turn) != 2:
+            continue
+        user_msg, bot_msg = turn
+        lines.append(f"ນັກຮຽນ: {user_msg}")
+        lines.append(f"ອາຈານ AI: {bot_msg}")
+    if not lines:
+        return ""
+    joined = "\n".join(lines)
+    return f"ປະຫວັດການສົນທະນາກ່ອນໜ້າ:\n{joined}\n\n"
+def build_prompt(question: str, history: Optional[List] = None) -> str:
     context = retrieve_context(question)
+    history_block = _format_history(history)
     return f"""{SYSTEM_PROMPT}
+{history_block}ຂໍ້ມູນອ້າງອີງ:
 {context}
 ຄຳຖາມ: {question}
 ຄຳຕອບດ້ວຍພາສາລາວ:"""
+def generate_answer(question: str, history: Optional[List] = None) -> str:
+    prompt = build_prompt(question, history)
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
     generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
     answer = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
+    # (your 2–3 sentence enforcement can stay here)
     sentences = re.split(r"(?<=[\.?!…])\s+", answer)
     short_answer = " ".join(sentences[:3]).strip()
     return short_answer if short_answer else answer
 def answer_from_qa(question: str) -> Optional[str]:
     """
     direct = answer_from_qa(message)
     if direct:
+        return direct
     try:
+        # ✅ pass history to let LLM understand follow-up questions
+        answer = generate_answer(message, history)
     except Exception as e:  # noqa: BLE001
         return f"ລະບົບມີບັນຫາ: {e}"
+    return answer