Spaces:

j-js
/

GameAI

Sleeping

App Files Files Community

j-js commited on 21 days ago

Commit

9bed299

verified ·

1 Parent(s): d1c397a

Update solver_overlapping_sets.py

Browse files

Files changed (1) hide show

solver_overlapping_sets.py +549 -41

solver_overlapping_sets.py CHANGED Viewed

@@ -1,63 +1,571 @@
 from __future__ import annotations
 import re
-from typing import Optional, List
 from models import SolverResult
-def _nums(text: str) -> List[int]:
-    return [int(x) for x in re.findall(r"-?\d+", text)]
-def solve_overlapping_sets(text: str) -> Optional[SolverResult]:
-    lower = (text or "").lower()
-    if not any(w in lower for w in [
-        "both", "either", "neither", "union", "intersection",
-        "overlap", "venn"
-    ]):
-        return None
-    nums = _nums(lower)
-    # Basic inclusion-exclusion:
-    # "30 study math, 20 study science, 8 study both"
-    if ("both" in lower or "overlap" in lower) and len(nums) >= 3:
-        a = nums[0]
-        b = nums[1]
-        both = nums[2]
         union = a + b - both
-        return SolverResult(
-            domain="quant",
-            solved=True,
-            topic="overlapping_sets",
-            answer_value=str(union),
-            internal_answer=str(union),
             steps=[
-                "Use inclusion–exclusion.",
-                "Total in either set = set A + set B − overlap.",
             ],
         )
-    # "out of 50, 30 like tea, 20 like coffee, 8 like both, how many neither?"
-    if "neither" in lower and len(nums) >= 4:
-        total = nums[0]
-        a = nums[1]
-        b = nums[2]
-        both = nums[3]
-        union = a + b - both
-        neither = total - union
-        return SolverResult(
-            domain="quant",
-            solved=True,
-            topic="overlapping_sets",
-            answer_value=str(neither),
-            internal_answer=str(neither),
             steps=[
-                "Use inclusion–exclusion to find how many are in at least one set.",
-                "Subtract that from the total to get neither.",
             ],
         )
     return None

 from __future__ import annotations
 import re
+from typing import Optional, List, Dict, Tuple
 from models import SolverResult
+# ----------------------------
+# Helpers
+# ----------------------------
+def _normalize(text: str) -> str:
+    t = (text or "").lower()
+    t = t.replace("∩", " intersection ")
+    t = t.replace("∪", " union ")
+    t = t.replace("&", " and ")
+    t = t.replace("%", " percent ")
+    t = re.sub(r"\s+", " ", t).strip()
+    return t
+def _nums(text: str) -> List[float]:
+    vals: List[float] = []
+    for x in re.findall(r"-?\d+(?:\.\d+)?", text):
+        try:
+            vals.append(float(x))
+        except Exception:
+            pass
+    return vals
+def _fmt_num(x: float) -> str:
+    if abs(x - round(x)) < 1e-9:
+        return str(int(round(x)))
+    return f"{x:.2f}".rstrip("0").rstrip(".")
+def _safe_result(
+    internal_answer: float,
+    steps: List[str],
+    interpretation: Optional[str] = None,
+) -> SolverResult:
+    """
+    Keep the answer internally available for the system, while the steps remain
+    method-oriented and do not reveal the final numeric result.
+    """
+    answer_str = _fmt_num(internal_answer)
+    final_steps = []
+    if interpretation:
+        final_steps.append(interpretation)
+    final_steps.extend(steps)
+    return SolverResult(
+        domain="quant",
+        solved=True,
+        topic="overlapping_sets",
+        answer_value=answer_str,
+        internal_answer=answer_str,
+        steps=final_steps,
+    )
+def _contains_any(text: str, phrases: List[str]) -> bool:
+    return any(p in text for p in phrases)
+def _is_overlapping_sets_context(lower: str) -> bool:
+    keywords = [
+        "both", "either", "neither", "union", "intersection", "overlap",
+        "venn", "at least one", "at least 1", "none", "exactly two",
+        "exactly 2", "all three", "all 3", "combined roster", "in common",
+        "only one", "only 1", "only two", "only 2", "took", "club",
+        "clubs", "course", "courses", "class", "classes", "students",
+        "survey", "surveys", "like", "liked", "roster", "pet", "pets",
+        "team", "teams", "belong", "belongs", "belonged", "sign up",
+        "signed up", "play", "played", "members", "none of these",
+    ]
+    return any(k in lower for k in keywords)
+def _question_target(lower: str) -> str:
+    """
+    Infer what the question is asking for.
+    """
+    if _contains_any(lower, ["how many neither", "how many none", "belong to none", "taken none", "none of the", "do not play any", "liked none"]):
+        return "neither"
+    if _contains_any(lower, ["at least one", "at least 1", "either set", "either group", "combined roster", "no student's name listed more than once", "no duplicate names", "total in either", "total in at least one"]):
+        return "union"
+    if _contains_any(lower, ["exactly two", "exactly 2", "only two", "only 2"]):
+        return "exactly_two"
+    if _contains_any(lower, ["all three", "all 3", "all the three"]):
+        return "all_three"
+    if _contains_any(lower, ["only one", "only 1", "exactly one", "exactly 1"]):
+        return "exactly_one"
+    if _contains_any(lower, ["both", "overlap", "intersection", "in common"]):
+        return "intersection"
+    return "unknown"
+def _extract_total(lower: str) -> Optional[float]:
+    patterns = [
+        r"out of (\d+(?:\.\d+)?)",
+        r"of (\d+(?:\.\d+)?) (?:students|people|adults|employees|members|workers)",
+        r"there are (\d+(?:\.\d+)?)",
+        r"in a class of (\d+(?:\.\d+)?)",
+        r"each of the (\d+(?:\.\d+)?) members",
+        r"each of the (\d+(?:\.\d+)?) students",
+        r"this semester[, ]*each of the (\d+(?:\.\d+)?) students",
+        r"(\d+(?:\.\d+)?) people own",
+        r"(\d+(?:\.\d+)?)% of those surveyed",
+        r"total(?: is| =)? (\d+(?:\.\d+)?)",
+    ]
+    for pat in patterns:
+        m = re.search(pat, lower)
+        if m:
+            return float(m.group(1))
+    return None
+def _extract_neither(lower: str) -> Optional[float]:
+    patterns = [
+        r"(\d+(?:\.\d+)?) .*do not play any",
+        r"(\d+(?:\.\d+)?) .*do not play any of",
+        r"(\d+(?:\.\d+)?) .*none of",
+        r"(\d+(?:\.\d+)?) .*belong to none",
+        r"(\d+(?:\.\d+)?) .*taken none",
+        r"(\d+(?:\.\d+)?) .*liked none",
+        r"none(?: =| is)? (\d+(?:\.\d+)?)",
+        r"neither(?: =| is)? (\d+(?:\.\d+)?)",
+    ]
+    for pat in patterns:
+        m = re.search(pat, lower)
+        if m:
+            return float(m.group(1))
+    # "85% liked at least one" -> none = 15
+    m = re.search(r"(\d+(?:\.\d+)?)\s*percent .*at least one", lower)
+    if m:
+        return 100.0 - float(m.group(1))
+    return None
+def _extract_exactly_two(lower: str) -> Optional[float]:
+    patterns = [
+        r"(\d+(?:\.\d+)?) .*exactly two",
+        r"(\d+(?:\.\d+)?) .*exactly 2",
+        r"(\d+(?:\.\d+)?) .*only two",
+        r"(\d+(?:\.\d+)?) .*only 2",
+    ]
+    for pat in patterns:
+        m = re.search(pat, lower)
+        if m:
+            return float(m.group(1))
+    return None
+def _extract_all_three(lower: str) -> Optional[float]:
+    patterns = [
+        r"(\d+(?:\.\d+)?) .*all three",
+        r"(\d+(?:\.\d+)?) .*all 3",
+        r"(\d+(?:\.\d+)?) .*all the three",
+        r"(\d+(?:\.\d+)?) .*on all 3",
+    ]
+    for pat in patterns:
+        m = re.search(pat, lower)
+        if m:
+            return float(m.group(1))
+    return None
+def _extract_pairwise_including_triple(lower: str) -> List[float]:
+    """
+    Extract values like:
+    - 7 play both Hockey and Cricket
+    - E and M had 9 names in common
+    These are pairwise intersections that INCLUDE anyone in all three.
+    """
+    vals: List[float] = []
+    patterns = [
+        r"(\d+(?:\.\d+)?) .*both [a-z0-9 ]+ and [a-z0-9 ]+",
+        r"(\d+(?:\.\d+)?) .*in common",
+        r"([a-z]) and ([a-z]) had (\d+(?:\.\d+)?) names in common",
+    ]
+    for pat in patterns[:2]:
+        for m in re.finditer(pat, lower):
+            try:
+                vals.append(float(m.group(1)))
+            except Exception:
+                pass
+    for m in re.finditer(patterns[2], lower):
+        try:
+            vals.append(float(m.group(3)))
+        except Exception:
+            pass
+    # Deduplicate while preserving order
+    out = []
+    for v in vals:
+        if v not in out:
+            out.append(v)
+    return out[:3]
+def _extract_single_set_counts(lower: str) -> List[float]:
+    """
+    Tries to capture the main three single-set totals from common GMAT wording.
+    """
+    vals: List[float] = []
+    patterns = [
+        r"(\d+(?:\.\d+)?) .*sign up for the [a-z ]+ club",
+        r"(\d+(?:\.\d+)?) .*took [a-z]",
+        r"(\d+(?:\.\d+)?) .*owned [a-z]+",
+        r"(\d+(?:\.\d+)?) .*play [a-z]+",
+        r"(\d+(?:\.\d+)?) .*liked product [123a-z]",
+        r"(\d+(?:\.\d+)?) .*are on the [a-z ]+ team",
+        r"(\d+(?:\.\d+)?) .*roster",
+        r"(\d+(?:\.\d+)?) belong to [abc]",
+        r"(\d+(?:\.\d+)?) have taken [a-z ]+ course",
+        r"(\d+(?:\.\d+)?) students took [abc]",
+        r"(\d+(?:\.\d+)?) members .* poetry club",
+        r"(\d+(?:\.\d+)?) students .* history club",
+        r"(\d+(?:\.\d+)?) students .* writing club",
+    ]
+    for pat in patterns:
+        for m in re.finditer(pat, lower):
+            try:
+                vals.append(float(m.group(1)))
+            except Exception:
+                pass
+    # fallback: collect leading list from phrasing like "20 play hockey, 15 play cricket and 11 play football"
+    for m in re.finditer(r"(\d+(?:\.\d+)?) [a-z ]+, (\d+(?:\.\d+)?) [a-z ]+ and (\d+(?:\.\d+)?) [a-z ]+", lower):
+        try:
+            triple = [float(m.group(1)), float(m.group(2)), float(m.group(3))]
+            for v in triple:
+                vals.append(v)
+        except Exception:
+            pass
+    out = []
+    for v in vals:
+        if v not in out:
+            out.append(v)
+    return out[:3]
+def _extract_generic_numbers(lower: str) -> List[float]:
+    return _nums(lower)
+# ----------------------------
+# 2-set solvers
+# ----------------------------
+def _solve_two_set_basic(lower: str) -> Optional[SolverResult]:
+    nums = _extract_generic_numbers(lower)
+    target = _question_target(lower)
+    # Pattern: "30 study math, 20 study science, 8 study both"
+    if len(nums) >= 3 and _contains_any(lower, ["both", "overlap", "intersection", "in common"]):
+        a, b, both = nums[0], nums[1], nums[2]
+        if target in ["union", "unknown", "intersection"]:
+            if target == "intersection":
+                return _safe_result(
+                    internal_answer=both,
+                    interpretation="This is a 2-set overlap question asking for the intersection.",
+                    steps=[
+                        "Identify the overlap as the group counted in both sets.",
+                        "Use the given overlap directly if it is already stated.",
+                    ],
+                )
+            union = a + b - both
+            return _safe_result(
+                internal_answer=union,
+                interpretation="This is a 2-set inclusion–exclusion setup.",
+                steps=[
+                    "Add the two set totals.",
+                    "Subtract the overlap once because it was counted twice.",
+                    "That gives the number in at least one of the two sets.",
+                ],
+            )
+    # Pattern with total and neither
+    if target == "neither" and len(nums) >= 4:
+        total, a, b, both = nums[0], nums[1], nums[2], nums[3]
         union = a + b - both
+        neither = total - union
+        return _safe_result(
+            internal_answer=neither,
+            interpretation="This is a 2-set total-minus-union question.",
             steps=[
+                "First find how many are in at least one set using inclusion–exclusion.",
+                "Then subtract that union from the total.",
             ],
         )
+    # Exactly one / only one
+    if target == "exactly_one" and len(nums) >= 3:
+        a, b, both = nums[0], nums[1], nums[2]
+        exactly_one = (a - both) + (b - both)
+        return _safe_result(
+            internal_answer=exactly_one,
+            interpretation="This is a 2-set exactly-one question.",
+            steps=[
+                "Remove the overlap from each set to get the set-only parts.",
+                "Add the two non-overlapping parts.",
+            ],
+        )
+    return None
+# ----------------------------
+# 3-set solvers
+# ----------------------------
+def _solve_three_set_from_pairwise_and_triple(lower: str) -> Optional[SolverResult]:
+    """
+    Handles the classic formula:
+    Union = A + B + C - (AB + AC + BC) + ABC
+    Also:
+    Neither = Total - Union
+    Exactly-two = (AB + AC + BC) - 3*ABC
+    """
+    singles = _extract_single_set_counts(lower)
+    pairwise = _extract_pairwise_including_triple(lower)
+    triple = _extract_all_three(lower)
+    total = _extract_total(lower)
+    neither = _extract_neither(lower)
+    target = _question_target(lower)
+    if len(singles) == 3 and len(pairwise) == 3 and triple is not None:
+        a, b, c = singles
+        ab, ac, bc = pairwise
+        abc = triple
+        union = a + b + c - ab - ac - bc + abc
+        exactly_two = (ab + ac + bc) - 3 * abc
+        if target in ["union", "unknown"]:
+            return _safe_result(
+                internal_answer=union,
+                interpretation="This is a 3-set inclusion–exclusion question with pairwise overlaps and a triple overlap.",
+                steps=[
+                    "Add the three set totals.",
+                    "Subtract each pairwise overlap once because those people/items were double-counted.",
+                    "Add the all-three overlap back once because it was subtracted too many times.",
+                ],
+            )
+        if target == "neither" and total is not None:
+            ans = total - union
+            return _safe_result(
+                internal_answer=ans,
+                interpretation="This is a total-minus-3-set-union question.",
+                steps=[
+                    "Use 3-set inclusion–exclusion to find how many are in at least one set.",
+                    "Subtract that result from the total.",
+                ],
+            )
+        if target == "exactly_two":
+            return _safe_result(
+                internal_answer=exactly_two,
+                interpretation="This is a 3-set exactly-two question derived from pairwise overlaps that include the triple-overlap region.",
+                steps=[
+                    "Each pairwise count includes the all-three region.",
+                    "Subtract the all-three group once from each pairwise overlap to convert pairwise totals into exactly-two regions.",
+                    "Then add those exactly-two regions together.",
+                ],
+            )
+        if target == "all_three" and total is not None and neither is not None:
+            # Reverse solve:
+            # Total = Union + Neither
+            # Total - Neither = A+B+C - (AB+AC+BC) + ABC
+            # ABC = (Total-Neither) - (A+B+C) + (AB+AC+BC)
+            ans = (total - neither) - (a + b + c) + (ab + ac + bc)
+            return _safe_result(
+                internal_answer=ans,
+                interpretation="This is a reverse 3-set inclusion–exclusion question solving for the all-three overlap.",
+                steps=[
+                    "Convert the problem into a union count by removing the neither group from the total, if needed.",
+                    "Set up the 3-set inclusion–exclusion equation.",
+                    "Rearrange the equation so the all-three overlap is isolated.",
+                ],
+            )
+    return None
+def _solve_three_set_from_exactly_two_and_triple(lower: str) -> Optional[SolverResult]:
+    """
+    Uses:
+    Total = A + B + C - exactly_two - 2*all_three + neither
+    or equivalently
+    Union = A + B + C - exactly_two - 2*all_three
+    """
+    singles = _extract_single_set_counts(lower)
+    exact2 = _extract_exactly_two(lower)
+    triple = _extract_all_three(lower)
+    total = _extract_total(lower)
+    neither = _extract_neither(lower)
+    target = _question_target(lower)
+    if len(singles) == 3 and exact2 is not None:
+        a, b, c = singles
+        # Find all-three
+        if target == "all_three" and total is not None:
+            n = neither if neither is not None else 0.0
+            # total = a+b+c - exact2 - 2*triple + n
+            ans = (a + b + c - exact2 + n - total) / 2.0
+            return _safe_result(
+                internal_answer=ans,
+                interpretation="This is the exactly-two / all-three 3-set formula.",
+                steps=[
+                    "Use the version of inclusion–exclusion written in terms of exactly-two and all-three.",
+                    "Treat the union as total minus neither when necessary.",
+                    "Rearrange the equation so the all-three region is isolated.",
+                ],
+            )
+        # Find neither
+        if target == "neither" and total is not None and triple is not None:
+            ans = total - (a + b + c - exact2 - 2 * triple)
+            return _safe_result(
+                internal_answer=ans,
+                interpretation="This is a 3-set total-versus-union question using exactly-two and all-three.",
+                steps=[
+                    "Compute the union from the three set totals, the exactly-two count, and the all-three count.",
+                    "Subtract the union from the total to get neither.",
+                ],
+            )
+        # Find exactly-two
+        if target == "exactly_two" and total is not None and triple is not None:
+            n = neither if neither is not None else 0.0
+            ans = a + b + c + n - total - 2 * triple
+            return _safe_result(
+                internal_answer=ans,
+                interpretation="This is a reverse solve for the exactly-two total in a 3-set problem.",
+                steps=[
+                    "Use the total/union form of the 3-set formula written with exactly-two and all-three.",
+                    "Substitute the known values.",
+                    "Rearrange to isolate the exactly-two count.",
+                ],
+            )
+        # Find union / at least one
+        if target in ["union", "unknown"] and triple is not None:
+            ans = a + b + c - exact2 - 2 * triple
+            return _safe_result(
+                internal_answer=ans,
+                interpretation="This is a 3-set union problem using exactly-two and all-three.",
+                steps=[
+                    "Start with the sum of the three set totals.",
+                    "Subtract the exactly-two contribution.",
+                    "Subtract the extra double-counting caused by the all-three region.",
+                ],
+            )
+    return None
+def _solve_three_set_from_total_and_triple(lower: str) -> Optional[SolverResult]:
+    """
+    Example:
+    Total known, neither implied 0, singles known, all-three known -> find exactly-two
+    Formula:
+    Total = A + B + C - exactly_two - 2*all_three + neither
+    """
+    singles = _extract_single_set_counts(lower)
+    total = _extract_total(lower)
+    triple = _extract_all_three(lower)
+    neither = _extract_neither(lower)
+    target = _question_target(lower)
+    if len(singles) == 3 and total is not None and triple is not None and target == "exactly_two":
+        a, b, c = singles
+        n = neither if neither is not None else 0.0
+        ans = a + b + c + n - total - 2 * triple
+        return _safe_result(
+            internal_answer=ans,
+            interpretation="This is a 3-set exactly-two question using total, singles, and all-three.",
+            steps=[
+                "Use the 3-set formula written in terms of exactly-two and all-three.",
+                "If the problem says everyone is in at least one set, take neither as zero.",
+                "Rearrange to isolate the exactly-two count.",
+            ],
+        )
+    return None
+def _solve_percent_variant(lower: str) -> Optional[SolverResult]:
+    """
+    Supports survey-style percentage overlapping sets.
+    """
+    if "percent" not in lower:
+        return None
+    res = _solve_three_set_from_exactly_two_and_triple(lower)
+    if res is not None:
+        return res
+    res = _solve_three_set_from_pairwise_and_triple(lower)
+    if res is not None:
+        return res
+    return None
+def _solve_subset_bound_variant(lower: str) -> Optional[SolverResult]:
+    """
+    Handles disguised overlap-style minimum union questions such as
+    'ranges are 17, 28, 35; what is the minimum possible total range?'
+    where the minimum union is the largest set if all smaller sets fit inside it.
+    """
+    if not _contains_any(lower, ["minimum possible", "minimum", "largest", "smallest", "range"]):
+        return None
+    nums = _extract_generic_numbers(lower)
+    if len(nums) >= 3:
+        # Heuristic: for disguised minimum-union overlap questions, the minimum
+        # possible union is max(set sizes).
+        vals = nums[:3]
+        ans = max(vals)
+        return _safe_result(
+            internal_answer=ans,
+            interpretation="This is a disguised minimum-union overlapping-sets idea.",
             steps=[
+                "To minimize the total covered range/group, make the smaller sets lie entirely inside the largest set whenever possible.",
+                "So the minimum possible overall coverage cannot be smaller than the largest individual set.",
             ],
         )
+    return None
+# ----------------------------
+# Main router
+# ----------------------------
+def solve_overlapping_sets(text: str) -> Optional[SolverResult]:
+    lower = _normalize(text)
+    if not _is_overlapping_sets_context(lower):
+        return None
+    # Strongest / most specific first
+    for solver in [
+        _solve_percent_variant,
+        _solve_three_set_from_pairwise_and_triple,
+        _solve_three_set_from_exactly_two_and_triple,
+        _solve_three_set_from_total_and_triple,
+        _solve_subset_bound_variant,
+        _solve_two_set_basic,
+    ]:
+        result = solver(lower)
+        if result is not None:
+            return result
     return None