Spaces:

j-js
/

GameAI

Sleeping

App Files Files Community

j-js commited on about 1 month ago

Commit

c0df734

verified ·

1 Parent(s): 0e7b568

Update conversation_logic.py

Browse files

Files changed (1) hide show

conversation_logic.py +113 -68

conversation_logic.py CHANGED Viewed

@@ -12,10 +12,6 @@ from retrieval_engine import RetrievalEngine
 from utils import short_lines
-# -----------------------------
-# Retrieval intent configuration
-# -----------------------------
 RETRIEVAL_ALLOWED_INTENTS = {
     "walkthrough",
     "step_by_step",
@@ -39,26 +35,64 @@ DIRECT_SOLVE_PATTERNS = [
 STRUCTURE_KEYWORDS = {
     "algebra": [
-        "equation", "solve", "isolate", "variable", "linear", "expression",
-        "unknown", "algebra", "substitute", "rearrange"
     ],
     "percent": [
-        "percent", "%", "percentage", "increase", "decrease", "of"
     ],
     "ratio": [
-        "ratio", "proportion", "proportional", "part", "share"
     ],
     "statistics": [
-        "mean", "median", "mode", "range", "average", "standard deviation"
     ],
     "probability": [
-        "probability", "chance", "likely", "odds", "event"
     ],
     "geometry": [
-        "triangle", "circle", "angle", "area", "perimeter", "radius", "diameter"
     ],
     "number_properties": [
-        "integer", "odd", "even", "prime", "divisible", "factor", "multiple"
     ],
 }
@@ -70,46 +104,61 @@ INTENT_KEYWORDS = {
     "hint": ["hint", "nudge", "clue"],
     "definition": ["define", "definition", "what does", "what is meant by"],
     "concept": ["concept", "idea", "principle", "rule"],
-    "instruction": ["how do i", "how to", "what should i do first", "what step"],
 }
 MISMATCH_TERMS = {
     "algebra": [
-        "absolute value", "modulus", "square root", "quadratic", "inequality",
-        "roots", "parabola", "simultaneous equations"
     ],
     "percent": [
-        "triangle", "circle", "prime", "absolute value"
     ],
     "ratio": [
-        "absolute value", "quadratic", "circle"
     ],
     "statistics": [
-        "absolute value", "prime", "triangle"
     ],
     "probability": [
-        "absolute value", "circle area", "quadratic"
     ],
     "geometry": [
-        "absolute value", "prime", "median salary"
     ],
     "number_properties": [
-        "circle", "triangle", "absolute value"
     ],
 }
-# -----------------------------
-# Reply building
-# -----------------------------
 def _teaching_lines(chunks: List[RetrievedChunk]) -> List[str]:
-    lines = []
     for chunk in chunks:
-        text = chunk.text.strip().replace("\n", " ")
         if len(text) > 220:
             text = text[:217].rstrip() + "…"
-        topic = getattr(chunk, "topic", "general") or "general"
         lines.append(f"- {topic}: {text}")
     return lines
@@ -134,7 +183,7 @@ def _compose_quant_reply(
     if intent == "definition":
         if steps:
             return f"Here is the idea in context:\n- {steps[0]}"
-        return "This means identifying the mathematical idea being used and expressing it clearly."
     if intent in {"walkthrough", "step_by_step", "explain", "method", "concept"}:
         if not steps:
@@ -151,7 +200,6 @@ def _compose_quant_reply(
             return f"Walkthrough:\n{body}\n\nThat gives {internal}."
         return f"Walkthrough:\n{body}"
-    # answer/default
     if reveal_answer and internal:
         if result.answer_value and str(result.answer_value).startswith("x ="):
             return f"The result is {result.answer_value}."
@@ -165,30 +213,27 @@ def _compose_quant_reply(
     return "I can help with this, but I cannot confidently solve it from the current parse alone yet."
-# -----------------------------
-# Intent / retrieval helpers
-# -----------------------------
 def _normalize_text(text: str) -> str:
     return re.sub(r"\s+", " ", (text or "").strip().lower())
 def _extract_keywords(text: str) -> Set[str]:
-    raw = re.findall(r"[a-zA-Z][a-zA-Z0-9_+-]*", text.lower())
     stop = {
         "the", "a", "an", "is", "are", "to", "of", "for", "and", "or", "in", "on",
         "at", "by", "this", "that", "it", "be", "do", "i", "me", "my", "you",
-        "how", "what", "why", "give", "show", "please", "can"
     }
     return {w for w in raw if len(w) > 2 and w not in stop}
 def _infer_structure_terms(question_text: str, topic: Optional[str]) -> List[str]:
     terms: List[str] = []
     if topic and topic in STRUCTURE_KEYWORDS:
         terms.extend(STRUCTURE_KEYWORDS[topic])
-    q = question_text.lower()
     if "=" in q:
         terms.extend(["equation", "solve"])
@@ -207,11 +252,14 @@ def _infer_structure_terms(question_text: str, topic: Optional[str]) -> List[str
 def _infer_mismatch_terms(topic: Optional[str], question_text: str) -> List[str]:
     if not topic or topic not in MISMATCH_TERMS:
         return []
-    q = question_text.lower()
-    terms = []
     for term in MISMATCH_TERMS[topic]:
         if term not in q:
             terms.append(term)
     return terms
@@ -226,7 +274,10 @@ def _is_direct_solve_request(text: str, intent: str) -> bool:
     t = _normalize_text(text)
     if any(re.search(p, t) for p in DIRECT_SOLVE_PATTERNS):
-        if not any(word in t for word in ["how", "explain", "why", "method", "hint", "define", "definition", "step"]):
             return True
     return False
@@ -251,34 +302,29 @@ def _score_chunk(
     topic: Optional[str],
     question_text: str,
 ) -> float:
-    text = f"{getattr(chunk, 'topic', '')} {chunk.text}".lower()
     score = 0.0
-    # topic match
     if topic:
-        chunk_topic = (getattr(chunk, "topic", "") or "").lower()
         if chunk_topic == topic.lower():
             score += 4.0
         elif topic.lower() in text:
             score += 2.0
-    # structure match
     structure_terms = _infer_structure_terms(question_text, topic)
     for term in structure_terms:
         if term.lower() in text:
             score += 1.5
-    # intent match
     for term in _intent_keywords(intent):
         if term.lower() in text:
             score += 1.2
-    # question keyword overlap
     q_keywords = _extract_keywords(question_text)
     overlap = sum(1 for kw in q_keywords if kw in text)
     score += min(overlap * 0.4, 3.0)
-    # penalties for obvious mismatch
     mismatch_terms = _infer_mismatch_terms(topic, question_text)
     for bad in mismatch_terms:
         if bad.lower() in text:
@@ -295,7 +341,8 @@ def _filter_retrieved_chunks(
     min_score: float = 2.5,
     max_chunks: int = 3,
 ) -> List[RetrievedChunk]:
-    scored = []
     for chunk in chunks:
         s = _score_chunk(chunk, intent, topic, question_text)
         if s >= min_score:
@@ -314,7 +361,7 @@ def _build_retrieval_query(
 ) -> str:
     parts: List[str] = []
-    base = question_text.strip() if question_text.strip() else raw_user_text.strip()
     if base:
         parts.append(base)
@@ -335,10 +382,6 @@ def _build_retrieval_query(
     return " ".join(parts).strip()
-# -----------------------------
-# Public entry point
-# -----------------------------
 def generate_response(
     raw_user_text: str,
     tone: float = 0.5,
@@ -355,21 +398,23 @@ def generate_response(
     intent = detect_intent(user_text)
     help_mode = intent_to_help_mode(intent)
     reveal_answer = help_mode == "answer" or transparency >= 0.8
     result = SolverResult(
         domain="general",
         solved=False,
         answer_letter=None,
         answer_value=None,
         internal_answer=None,
         steps=[],
-        topic=None,
     )
-    used_retrieval = False
-    used_generator = False
     selected_chunks: List[RetrievedChunk] = []
     if is_quant_question(solver_input):
@@ -388,7 +433,6 @@ def generate_response(
         raw_user_text=user_text or solver_input,
     )
-    # Use passed-in retrieval context only if retrieval is allowed
     if allow_retrieval and retrieval_context:
         filtered = _filter_retrieved_chunks(
             chunks=retrieval_context,
@@ -398,9 +442,9 @@ def generate_response(
         )
         if filtered:
             selected_chunks = filtered
-            used_retrieval = True
-    # Otherwise retrieve fresh if allowed
     elif allow_retrieval and retrieval_engine is not None:
         query = _build_retrieval_query(
             raw_user_text=user_text,
@@ -418,13 +462,12 @@ def generate_response(
         )
         if filtered:
             selected_chunks = filtered
-            used_retrieval = True
-    # Add teaching notes only if they survived filtering
     if selected_chunks:
         reply = f"{reply}\n\nRelevant study notes:\n" + "\n".join(_teaching_lines(selected_chunks))
-    # Optional generator fallback for non-quant / weak cases
     if not result.solved and generator_engine is not None:
         try:
             generated = generator_engine.generate(
@@ -435,7 +478,7 @@ def generate_response(
             )
             if generated and generated.strip():
                 reply = generated.strip()
-                used_generator = True
         except Exception:
             pass
@@ -446,16 +489,18 @@ def generate_response(
         transparency=transparency,
     )
     return {
-        "reply": short_lines(reply),
         "meta": {
             "domain": result.domain,
             "solved": result.solved,
-            "help_mode": help_mode,
             "answer_letter": result.answer_letter,
             "answer_value": result.answer_value,
             "topic": result.topic,
-            "used_retrieval": used_retrieval,
-            "used_generator": used_generator,
         },
     }

 from utils import short_lines
 RETRIEVAL_ALLOWED_INTENTS = {
     "walkthrough",
     "step_by_step",
 STRUCTURE_KEYWORDS = {
     "algebra": [
+        "equation",
+        "solve",
+        "isolate",
+        "variable",
+        "linear",
+        "expression",
+        "unknown",
+        "algebra",
+        "substitute",
+        "rearrange",
     ],
     "percent": [
+        "percent",
+        "%",
+        "percentage",
+        "increase",
+        "decrease",
+        "of",
     ],
     "ratio": [
+        "ratio",
+        "proportion",
+        "proportional",
+        "part",
+        "share",
     ],
     "statistics": [
+        "mean",
+        "median",
+        "mode",
+        "range",
+        "average",
+        "standard deviation",
     ],
     "probability": [
+        "probability",
+        "chance",
+        "likely",
+        "odds",
+        "event",
     ],
     "geometry": [
+        "triangle",
+        "circle",
+        "angle",
+        "area",
+        "perimeter",
+        "radius",
+        "diameter",
     ],
     "number_properties": [
+        "integer",
+        "odd",
+        "even",
+        "prime",
+        "divisible",
+        "factor",
+        "multiple",
     ],
 }
     "hint": ["hint", "nudge", "clue"],
     "definition": ["define", "definition", "what does", "what is meant by"],
     "concept": ["concept", "idea", "principle", "rule"],
+    "instruction": ["how do i", "how to", "what should i do first", "what step", "first step"],
 }
 MISMATCH_TERMS = {
     "algebra": [
+        "absolute value",
+        "modulus",
+        "square root",
+        "quadratic",
+        "inequality",
+        "roots",
+        "parabola",
+        "simultaneous equations",
     ],
     "percent": [
+        "triangle",
+        "circle",
+        "prime",
+        "absolute value",
     ],
     "ratio": [
+        "absolute value",
+        "quadratic",
+        "circle",
     ],
     "statistics": [
+        "absolute value",
+        "prime",
+        "triangle",
     ],
     "probability": [
+        "absolute value",
+        "circle area",
+        "quadratic",
     ],
     "geometry": [
+        "absolute value",
+        "prime",
+        "median salary",
     ],
     "number_properties": [
+        "circle",
+        "triangle",
+        "absolute value",
     ],
 }
 def _teaching_lines(chunks: List[RetrievedChunk]) -> List[str]:
+    lines: List[str] = []
     for chunk in chunks:
+        text = (chunk.text or "").strip().replace("\n", " ")
         if len(text) > 220:
             text = text[:217].rstrip() + "…"
+        topic = chunk.topic or "general"
         lines.append(f"- {topic}: {text}")
     return lines
     if intent == "definition":
         if steps:
             return f"Here is the idea in context:\n- {steps[0]}"
+        return "This is asking for the meaning of the term or operation in the problem."
     if intent in {"walkthrough", "step_by_step", "explain", "method", "concept"}:
         if not steps:
             return f"Walkthrough:\n{body}\n\nThat gives {internal}."
         return f"Walkthrough:\n{body}"
     if reveal_answer and internal:
         if result.answer_value and str(result.answer_value).startswith("x ="):
             return f"The result is {result.answer_value}."
     return "I can help with this, but I cannot confidently solve it from the current parse alone yet."
 def _normalize_text(text: str) -> str:
     return re.sub(r"\s+", " ", (text or "").strip().lower())
 def _extract_keywords(text: str) -> Set[str]:
+    raw = re.findall(r"[a-zA-Z][a-zA-Z0-9_+-]*", (text or "").lower())
     stop = {
         "the", "a", "an", "is", "are", "to", "of", "for", "and", "or", "in", "on",
         "at", "by", "this", "that", "it", "be", "do", "i", "me", "my", "you",
+        "how", "what", "why", "give", "show", "please", "can",
     }
     return {w for w in raw if len(w) > 2 and w not in stop}
 def _infer_structure_terms(question_text: str, topic: Optional[str]) -> List[str]:
     terms: List[str] = []
     if topic and topic in STRUCTURE_KEYWORDS:
         terms.extend(STRUCTURE_KEYWORDS[topic])
+    q = (question_text or "").lower()
     if "=" in q:
         terms.extend(["equation", "solve"])
 def _infer_mismatch_terms(topic: Optional[str], question_text: str) -> List[str]:
     if not topic or topic not in MISMATCH_TERMS:
         return []
+    q = (question_text or "").lower()
+    terms: List[str] = []
     for term in MISMATCH_TERMS[topic]:
         if term not in q:
             terms.append(term)
     return terms
     t = _normalize_text(text)
     if any(re.search(p, t) for p in DIRECT_SOLVE_PATTERNS):
+        if not any(
+            word in t
+            for word in ["how", "explain", "why", "method", "hint", "define", "definition", "step"]
+        ):
             return True
     return False
     topic: Optional[str],
     question_text: str,
 ) -> float:
+    text = f"{chunk.topic} {chunk.text}".lower()
     score = 0.0
     if topic:
+        chunk_topic = (chunk.topic or "").lower()
         if chunk_topic == topic.lower():
             score += 4.0
         elif topic.lower() in text:
             score += 2.0
     structure_terms = _infer_structure_terms(question_text, topic)
     for term in structure_terms:
         if term.lower() in text:
             score += 1.5
     for term in _intent_keywords(intent):
         if term.lower() in text:
             score += 1.2
     q_keywords = _extract_keywords(question_text)
     overlap = sum(1 for kw in q_keywords if kw in text)
     score += min(overlap * 0.4, 3.0)
     mismatch_terms = _infer_mismatch_terms(topic, question_text)
     for bad in mismatch_terms:
         if bad.lower() in text:
     min_score: float = 2.5,
     max_chunks: int = 3,
 ) -> List[RetrievedChunk]:
+    scored: List[tuple[float, RetrievedChunk]] = []
     for chunk in chunks:
         s = _score_chunk(chunk, intent, topic, question_text)
         if s >= min_score:
 ) -> str:
     parts: List[str] = []
+    base = question_text.strip() if (question_text or "").strip() else (raw_user_text or "").strip()
     if base:
         parts.append(base)
     return " ".join(parts).strip()
 def generate_response(
     raw_user_text: str,
     tone: float = 0.5,
     intent = detect_intent(user_text)
     help_mode = intent_to_help_mode(intent)
     reveal_answer = help_mode == "answer" or transparency >= 0.8
     result = SolverResult(
         domain="general",
         solved=False,
+        help_mode=help_mode,
         answer_letter=None,
         answer_value=None,
+        topic=None,
+        used_retrieval=False,
+        used_generator=False,
         internal_answer=None,
         steps=[],
+        teaching_chunks=[],
+        meta={},
     )
     selected_chunks: List[RetrievedChunk] = []
     if is_quant_question(solver_input):
         raw_user_text=user_text or solver_input,
     )
     if allow_retrieval and retrieval_context:
         filtered = _filter_retrieved_chunks(
             chunks=retrieval_context,
         )
         if filtered:
             selected_chunks = filtered
+            result.used_retrieval = True
+            result.teaching_chunks = filtered
     elif allow_retrieval and retrieval_engine is not None:
         query = _build_retrieval_query(
             raw_user_text=user_text,
         )
         if filtered:
             selected_chunks = filtered
+            result.used_retrieval = True
+            result.teaching_chunks = filtered
     if selected_chunks:
         reply = f"{reply}\n\nRelevant study notes:\n" + "\n".join(_teaching_lines(selected_chunks))
     if not result.solved and generator_engine is not None:
         try:
             generated = generator_engine.generate(
             )
             if generated and generated.strip():
                 reply = generated.strip()
+                result.used_generator = True
         except Exception:
             pass
         transparency=transparency,
     )
+    result.reply = short_lines(reply)
     return {
+        "reply": result.reply,
         "meta": {
             "domain": result.domain,
             "solved": result.solved,
+            "help_mode": result.help_mode,
             "answer_letter": result.answer_letter,
             "answer_value": result.answer_value,
             "topic": result.topic,
+            "used_retrieval": result.used_retrieval,
+            "used_generator": result.used_generator,
         },
     }