from __future__ import annotations import re from typing import Optional, List, Dict, Tuple from models import SolverResult # ---------------------------- # Helpers # ---------------------------- def _normalize(text: str) -> str: t = (text or "").lower() t = t.replace("∩", " intersection ") t = t.replace("∪", " union ") t = t.replace("&", " and ") t = t.replace("%", " percent ") t = re.sub(r"\s+", " ", t).strip() return t def _nums(text: str) -> List[float]: vals: List[float] = [] for x in re.findall(r"-?\d+(?:\.\d+)?", text): try: vals.append(float(x)) except Exception: pass return vals def _fmt_num(x: float) -> str: if abs(x - round(x)) < 1e-9: return str(int(round(x))) return f"{x:.2f}".rstrip("0").rstrip(".") def _safe_result( internal_answer: float, steps: List[str], interpretation: Optional[str] = None, ) -> SolverResult: """ Keep the answer internally available for the system, while the steps remain method-oriented and do not reveal the final numeric result. """ answer_str = _fmt_num(internal_answer) final_steps = [] if interpretation: final_steps.append(interpretation) final_steps.extend(steps) return SolverResult( domain="quant", solved=True, topic="overlapping_sets", answer_value=answer_str, internal_answer=answer_str, steps=final_steps, ) def _contains_any(text: str, phrases: List[str]) -> bool: return any(p in text for p in phrases) def _is_overlapping_sets_context(lower: str) -> bool: keywords = [ "both", "either", "neither", "union", "intersection", "overlap", "venn", "at least one", "at least 1", "none", "exactly two", "exactly 2", "all three", "all 3", "combined roster", "in common", "only one", "only 1", "only two", "only 2", "took", "club", "clubs", "course", "courses", "class", "classes", "students", "survey", "surveys", "like", "liked", "roster", "pet", "pets", "team", "teams", "belong", "belongs", "belonged", "sign up", "signed up", "play", "played", "members", "none of these", ] return any(k in lower for k in keywords) def _question_target(lower: str) -> str: """ Infer what the question is asking for. """ if _contains_any(lower, ["how many neither", "how many none", "belong to none", "taken none", "none of the", "do not play any", "liked none"]): return "neither" if _contains_any(lower, ["at least one", "at least 1", "either set", "either group", "combined roster", "no student's name listed more than once", "no duplicate names", "total in either", "total in at least one"]): return "union" if _contains_any(lower, ["exactly two", "exactly 2", "only two", "only 2"]): return "exactly_two" if _contains_any(lower, ["all three", "all 3", "all the three"]): return "all_three" if _contains_any(lower, ["only one", "only 1", "exactly one", "exactly 1"]): return "exactly_one" if _contains_any(lower, ["both", "overlap", "intersection", "in common"]): return "intersection" return "unknown" def _extract_total(lower: str) -> Optional[float]: patterns = [ r"out of (\d+(?:\.\d+)?)", r"of (\d+(?:\.\d+)?) (?:students|people|adults|employees|members|workers)", r"there are (\d+(?:\.\d+)?)", r"in a class of (\d+(?:\.\d+)?)", r"each of the (\d+(?:\.\d+)?) members", r"each of the (\d+(?:\.\d+)?) students", r"this semester[, ]*each of the (\d+(?:\.\d+)?) students", r"(\d+(?:\.\d+)?) people own", r"(\d+(?:\.\d+)?)% of those surveyed", r"total(?: is| =)? (\d+(?:\.\d+)?)", ] for pat in patterns: m = re.search(pat, lower) if m: return float(m.group(1)) return None def _extract_neither(lower: str) -> Optional[float]: patterns = [ r"(\d+(?:\.\d+)?) .*do not play any", r"(\d+(?:\.\d+)?) .*do not play any of", r"(\d+(?:\.\d+)?) .*none of", r"(\d+(?:\.\d+)?) .*belong to none", r"(\d+(?:\.\d+)?) .*taken none", r"(\d+(?:\.\d+)?) .*liked none", r"none(?: =| is)? (\d+(?:\.\d+)?)", r"neither(?: =| is)? (\d+(?:\.\d+)?)", ] for pat in patterns: m = re.search(pat, lower) if m: return float(m.group(1)) # "85% liked at least one" -> none = 15 m = re.search(r"(\d+(?:\.\d+)?)\s*percent .*at least one", lower) if m: return 100.0 - float(m.group(1)) return None def _extract_exactly_two(lower: str) -> Optional[float]: patterns = [ r"(\d+(?:\.\d+)?) .*exactly two", r"(\d+(?:\.\d+)?) .*exactly 2", r"(\d+(?:\.\d+)?) .*only two", r"(\d+(?:\.\d+)?) .*only 2", ] for pat in patterns: m = re.search(pat, lower) if m: return float(m.group(1)) return None def _extract_all_three(lower: str) -> Optional[float]: patterns = [ r"(\d+(?:\.\d+)?) .*all three", r"(\d+(?:\.\d+)?) .*all 3", r"(\d+(?:\.\d+)?) .*all the three", r"(\d+(?:\.\d+)?) .*on all 3", ] for pat in patterns: m = re.search(pat, lower) if m: return float(m.group(1)) return None def _extract_pairwise_including_triple(lower: str) -> List[float]: """ Extract values like: - 7 play both Hockey and Cricket - E and M had 9 names in common These are pairwise intersections that INCLUDE anyone in all three. """ vals: List[float] = [] patterns = [ r"(\d+(?:\.\d+)?) .*both [a-z0-9 ]+ and [a-z0-9 ]+", r"(\d+(?:\.\d+)?) .*in common", r"([a-z]) and ([a-z]) had (\d+(?:\.\d+)?) names in common", ] for pat in patterns[:2]: for m in re.finditer(pat, lower): try: vals.append(float(m.group(1))) except Exception: pass for m in re.finditer(patterns[2], lower): try: vals.append(float(m.group(3))) except Exception: pass # Deduplicate while preserving order out = [] for v in vals: if v not in out: out.append(v) return out[:3] def _extract_single_set_counts(lower: str) -> List[float]: """ Tries to capture the main three single-set totals from common GMAT wording. """ vals: List[float] = [] patterns = [ r"(\d+(?:\.\d+)?) .*sign up for the [a-z ]+ club", r"(\d+(?:\.\d+)?) .*took [a-z]", r"(\d+(?:\.\d+)?) .*owned [a-z]+", r"(\d+(?:\.\d+)?) .*play [a-z]+", r"(\d+(?:\.\d+)?) .*liked product [123a-z]", r"(\d+(?:\.\d+)?) .*are on the [a-z ]+ team", r"(\d+(?:\.\d+)?) .*roster", r"(\d+(?:\.\d+)?) belong to [abc]", r"(\d+(?:\.\d+)?) have taken [a-z ]+ course", r"(\d+(?:\.\d+)?) students took [abc]", r"(\d+(?:\.\d+)?) members .* poetry club", r"(\d+(?:\.\d+)?) students .* history club", r"(\d+(?:\.\d+)?) students .* writing club", ] for pat in patterns: for m in re.finditer(pat, lower): try: vals.append(float(m.group(1))) except Exception: pass # fallback: collect leading list from phrasing like "20 play hockey, 15 play cricket and 11 play football" for m in re.finditer(r"(\d+(?:\.\d+)?) [a-z ]+, (\d+(?:\.\d+)?) [a-z ]+ and (\d+(?:\.\d+)?) [a-z ]+", lower): try: triple = [float(m.group(1)), float(m.group(2)), float(m.group(3))] for v in triple: vals.append(v) except Exception: pass out = [] for v in vals: if v not in out: out.append(v) return out[:3] def _extract_generic_numbers(lower: str) -> List[float]: return _nums(lower) # ---------------------------- # 2-set solvers # ---------------------------- def _solve_two_set_basic(lower: str) -> Optional[SolverResult]: nums = _extract_generic_numbers(lower) target = _question_target(lower) # Pattern: "30 study math, 20 study science, 8 study both" if len(nums) >= 3 and _contains_any(lower, ["both", "overlap", "intersection", "in common"]): a, b, both = nums[0], nums[1], nums[2] if target in ["union", "unknown", "intersection"]: if target == "intersection": return _safe_result( internal_answer=both, interpretation="This is a 2-set overlap question asking for the intersection.", steps=[ "Identify the overlap as the group counted in both sets.", "Use the given overlap directly if it is already stated.", ], ) union = a + b - both return _safe_result( internal_answer=union, interpretation="This is a 2-set inclusion–exclusion setup.", steps=[ "Add the two set totals.", "Subtract the overlap once because it was counted twice.", "That gives the number in at least one of the two sets.", ], ) # Pattern with total and neither if target == "neither" and len(nums) >= 4: total, a, b, both = nums[0], nums[1], nums[2], nums[3] union = a + b - both neither = total - union return _safe_result( internal_answer=neither, interpretation="This is a 2-set total-minus-union question.", steps=[ "First find how many are in at least one set using inclusion–exclusion.", "Then subtract that union from the total.", ], ) # Exactly one / only one if target == "exactly_one" and len(nums) >= 3: a, b, both = nums[0], nums[1], nums[2] exactly_one = (a - both) + (b - both) return _safe_result( internal_answer=exactly_one, interpretation="This is a 2-set exactly-one question.", steps=[ "Remove the overlap from each set to get the set-only parts.", "Add the two non-overlapping parts.", ], ) return None # ---------------------------- # 3-set solvers # ---------------------------- def _solve_three_set_from_pairwise_and_triple(lower: str) -> Optional[SolverResult]: """ Handles the classic formula: Union = A + B + C - (AB + AC + BC) + ABC Also: Neither = Total - Union Exactly-two = (AB + AC + BC) - 3*ABC """ singles = _extract_single_set_counts(lower) pairwise = _extract_pairwise_including_triple(lower) triple = _extract_all_three(lower) total = _extract_total(lower) neither = _extract_neither(lower) target = _question_target(lower) if len(singles) == 3 and len(pairwise) == 3 and triple is not None: a, b, c = singles ab, ac, bc = pairwise abc = triple union = a + b + c - ab - ac - bc + abc exactly_two = (ab + ac + bc) - 3 * abc if target in ["union", "unknown"]: return _safe_result( internal_answer=union, interpretation="This is a 3-set inclusion–exclusion question with pairwise overlaps and a triple overlap.", steps=[ "Add the three set totals.", "Subtract each pairwise overlap once because those people/items were double-counted.", "Add the all-three overlap back once because it was subtracted too many times.", ], ) if target == "neither" and total is not None: ans = total - union return _safe_result( internal_answer=ans, interpretation="This is a total-minus-3-set-union question.", steps=[ "Use 3-set inclusion–exclusion to find how many are in at least one set.", "Subtract that result from the total.", ], ) if target == "exactly_two": return _safe_result( internal_answer=exactly_two, interpretation="This is a 3-set exactly-two question derived from pairwise overlaps that include the triple-overlap region.", steps=[ "Each pairwise count includes the all-three region.", "Subtract the all-three group once from each pairwise overlap to convert pairwise totals into exactly-two regions.", "Then add those exactly-two regions together.", ], ) if target == "all_three" and total is not None and neither is not None: # Reverse solve: # Total = Union + Neither # Total - Neither = A+B+C - (AB+AC+BC) + ABC # ABC = (Total-Neither) - (A+B+C) + (AB+AC+BC) ans = (total - neither) - (a + b + c) + (ab + ac + bc) return _safe_result( internal_answer=ans, interpretation="This is a reverse 3-set inclusion–exclusion question solving for the all-three overlap.", steps=[ "Convert the problem into a union count by removing the neither group from the total, if needed.", "Set up the 3-set inclusion–exclusion equation.", "Rearrange the equation so the all-three overlap is isolated.", ], ) return None def _solve_three_set_from_exactly_two_and_triple(lower: str) -> Optional[SolverResult]: """ Uses: Total = A + B + C - exactly_two - 2*all_three + neither or equivalently Union = A + B + C - exactly_two - 2*all_three """ singles = _extract_single_set_counts(lower) exact2 = _extract_exactly_two(lower) triple = _extract_all_three(lower) total = _extract_total(lower) neither = _extract_neither(lower) target = _question_target(lower) if len(singles) == 3 and exact2 is not None: a, b, c = singles # Find all-three if target == "all_three" and total is not None: n = neither if neither is not None else 0.0 # total = a+b+c - exact2 - 2*triple + n ans = (a + b + c - exact2 + n - total) / 2.0 return _safe_result( internal_answer=ans, interpretation="This is the exactly-two / all-three 3-set formula.", steps=[ "Use the version of inclusion–exclusion written in terms of exactly-two and all-three.", "Treat the union as total minus neither when necessary.", "Rearrange the equation so the all-three region is isolated.", ], ) # Find neither if target == "neither" and total is not None and triple is not None: ans = total - (a + b + c - exact2 - 2 * triple) return _safe_result( internal_answer=ans, interpretation="This is a 3-set total-versus-union question using exactly-two and all-three.", steps=[ "Compute the union from the three set totals, the exactly-two count, and the all-three count.", "Subtract the union from the total to get neither.", ], ) # Find exactly-two if target == "exactly_two" and total is not None and triple is not None: n = neither if neither is not None else 0.0 ans = a + b + c + n - total - 2 * triple return _safe_result( internal_answer=ans, interpretation="This is a reverse solve for the exactly-two total in a 3-set problem.", steps=[ "Use the total/union form of the 3-set formula written with exactly-two and all-three.", "Substitute the known values.", "Rearrange to isolate the exactly-two count.", ], ) # Find union / at least one if target in ["union", "unknown"] and triple is not None: ans = a + b + c - exact2 - 2 * triple return _safe_result( internal_answer=ans, interpretation="This is a 3-set union problem using exactly-two and all-three.", steps=[ "Start with the sum of the three set totals.", "Subtract the exactly-two contribution.", "Subtract the extra double-counting caused by the all-three region.", ], ) return None def _solve_three_set_from_total_and_triple(lower: str) -> Optional[SolverResult]: """ Example: Total known, neither implied 0, singles known, all-three known -> find exactly-two Formula: Total = A + B + C - exactly_two - 2*all_three + neither """ singles = _extract_single_set_counts(lower) total = _extract_total(lower) triple = _extract_all_three(lower) neither = _extract_neither(lower) target = _question_target(lower) if len(singles) == 3 and total is not None and triple is not None and target == "exactly_two": a, b, c = singles n = neither if neither is not None else 0.0 ans = a + b + c + n - total - 2 * triple return _safe_result( internal_answer=ans, interpretation="This is a 3-set exactly-two question using total, singles, and all-three.", steps=[ "Use the 3-set formula written in terms of exactly-two and all-three.", "If the problem says everyone is in at least one set, take neither as zero.", "Rearrange to isolate the exactly-two count.", ], ) return None def _solve_percent_variant(lower: str) -> Optional[SolverResult]: """ Supports survey-style percentage overlapping sets. """ if "percent" not in lower: return None res = _solve_three_set_from_exactly_two_and_triple(lower) if res is not None: return res res = _solve_three_set_from_pairwise_and_triple(lower) if res is not None: return res return None def _solve_subset_bound_variant(lower: str) -> Optional[SolverResult]: """ Handles disguised overlap-style minimum union questions such as 'ranges are 17, 28, 35; what is the minimum possible total range?' where the minimum union is the largest set if all smaller sets fit inside it. """ if not _contains_any(lower, ["minimum possible", "minimum", "largest", "smallest", "range"]): return None nums = _extract_generic_numbers(lower) if len(nums) >= 3: # Heuristic: for disguised minimum-union overlap questions, the minimum # possible union is max(set sizes). vals = nums[:3] ans = max(vals) return _safe_result( internal_answer=ans, interpretation="This is a disguised minimum-union overlapping-sets idea.", steps=[ "To minimize the total covered range/group, make the smaller sets lie entirely inside the largest set whenever possible.", "So the minimum possible overall coverage cannot be smaller than the largest individual set.", ], ) return None # ---------------------------- # Main router # ---------------------------- def solve_overlapping_sets(text: str) -> Optional[SolverResult]: lower = _normalize(text) if not _is_overlapping_sets_context(lower): return None # Strongest / most specific first for solver in [ _solve_percent_variant, _solve_three_set_from_pairwise_and_triple, _solve_three_set_from_exactly_two_and_triple, _solve_three_set_from_total_and_triple, _solve_subset_bound_variant, _solve_two_set_basic, ]: result = solver(lower) if result is not None: return result return None