| from __future__ import annotations |
|
|
| import re |
| from typing import Optional, List, Dict, Tuple |
|
|
| from models import SolverResult |
|
|
|
|
| |
| |
| |
|
|
| def _normalize(text: str) -> str: |
| t = (text or "").lower() |
| t = t.replace("∩", " intersection ") |
| t = t.replace("∪", " union ") |
| t = t.replace("&", " and ") |
| t = t.replace("%", " percent ") |
| t = re.sub(r"\s+", " ", t).strip() |
| return t |
|
|
|
|
| def _nums(text: str) -> List[float]: |
| vals: List[float] = [] |
| for x in re.findall(r"-?\d+(?:\.\d+)?", text): |
| try: |
| vals.append(float(x)) |
| except Exception: |
| pass |
| return vals |
|
|
|
|
| def _fmt_num(x: float) -> str: |
| if abs(x - round(x)) < 1e-9: |
| return str(int(round(x))) |
| return f"{x:.2f}".rstrip("0").rstrip(".") |
|
|
|
|
| def _safe_result( |
| internal_answer: float, |
| steps: List[str], |
| interpretation: Optional[str] = None, |
| ) -> SolverResult: |
| """ |
| Keep the answer internally available for the system, while the steps remain |
| method-oriented and do not reveal the final numeric result. |
| """ |
| answer_str = _fmt_num(internal_answer) |
| final_steps = [] |
| if interpretation: |
| final_steps.append(interpretation) |
| final_steps.extend(steps) |
| return SolverResult( |
| domain="quant", |
| solved=True, |
| topic="overlapping_sets", |
| answer_value=answer_str, |
| internal_answer=answer_str, |
| steps=final_steps, |
| ) |
|
|
|
|
| def _contains_any(text: str, phrases: List[str]) -> bool: |
| return any(p in text for p in phrases) |
|
|
|
|
| def _is_overlapping_sets_context(lower: str) -> bool: |
| keywords = [ |
| "both", "either", "neither", "union", "intersection", "overlap", |
| "venn", "at least one", "at least 1", "none", "exactly two", |
| "exactly 2", "all three", "all 3", "combined roster", "in common", |
| "only one", "only 1", "only two", "only 2", "took", "club", |
| "clubs", "course", "courses", "class", "classes", "students", |
| "survey", "surveys", "like", "liked", "roster", "pet", "pets", |
| "team", "teams", "belong", "belongs", "belonged", "sign up", |
| "signed up", "play", "played", "members", "none of these", |
| ] |
| return any(k in lower for k in keywords) |
|
|
|
|
| def _question_target(lower: str) -> str: |
| """ |
| Infer what the question is asking for. |
| """ |
| if _contains_any(lower, ["how many neither", "how many none", "belong to none", "taken none", "none of the", "do not play any", "liked none"]): |
| return "neither" |
| if _contains_any(lower, ["at least one", "at least 1", "either set", "either group", "combined roster", "no student's name listed more than once", "no duplicate names", "total in either", "total in at least one"]): |
| return "union" |
| if _contains_any(lower, ["exactly two", "exactly 2", "only two", "only 2"]): |
| return "exactly_two" |
| if _contains_any(lower, ["all three", "all 3", "all the three"]): |
| return "all_three" |
| if _contains_any(lower, ["only one", "only 1", "exactly one", "exactly 1"]): |
| return "exactly_one" |
| if _contains_any(lower, ["both", "overlap", "intersection", "in common"]): |
| return "intersection" |
| return "unknown" |
|
|
|
|
| def _extract_total(lower: str) -> Optional[float]: |
| patterns = [ |
| r"out of (\d+(?:\.\d+)?)", |
| r"of (\d+(?:\.\d+)?) (?:students|people|adults|employees|members|workers)", |
| r"there are (\d+(?:\.\d+)?)", |
| r"in a class of (\d+(?:\.\d+)?)", |
| r"each of the (\d+(?:\.\d+)?) members", |
| r"each of the (\d+(?:\.\d+)?) students", |
| r"this semester[, ]*each of the (\d+(?:\.\d+)?) students", |
| r"(\d+(?:\.\d+)?) people own", |
| r"(\d+(?:\.\d+)?)% of those surveyed", |
| r"total(?: is| =)? (\d+(?:\.\d+)?)", |
| ] |
| for pat in patterns: |
| m = re.search(pat, lower) |
| if m: |
| return float(m.group(1)) |
| return None |
|
|
|
|
| def _extract_neither(lower: str) -> Optional[float]: |
| patterns = [ |
| r"(\d+(?:\.\d+)?) .*do not play any", |
| r"(\d+(?:\.\d+)?) .*do not play any of", |
| r"(\d+(?:\.\d+)?) .*none of", |
| r"(\d+(?:\.\d+)?) .*belong to none", |
| r"(\d+(?:\.\d+)?) .*taken none", |
| r"(\d+(?:\.\d+)?) .*liked none", |
| r"none(?: =| is)? (\d+(?:\.\d+)?)", |
| r"neither(?: =| is)? (\d+(?:\.\d+)?)", |
| ] |
| for pat in patterns: |
| m = re.search(pat, lower) |
| if m: |
| return float(m.group(1)) |
|
|
| |
| m = re.search(r"(\d+(?:\.\d+)?)\s*percent .*at least one", lower) |
| if m: |
| return 100.0 - float(m.group(1)) |
|
|
| return None |
|
|
|
|
| def _extract_exactly_two(lower: str) -> Optional[float]: |
| patterns = [ |
| r"(\d+(?:\.\d+)?) .*exactly two", |
| r"(\d+(?:\.\d+)?) .*exactly 2", |
| r"(\d+(?:\.\d+)?) .*only two", |
| r"(\d+(?:\.\d+)?) .*only 2", |
| ] |
| for pat in patterns: |
| m = re.search(pat, lower) |
| if m: |
| return float(m.group(1)) |
| return None |
|
|
|
|
| def _extract_all_three(lower: str) -> Optional[float]: |
| patterns = [ |
| r"(\d+(?:\.\d+)?) .*all three", |
| r"(\d+(?:\.\d+)?) .*all 3", |
| r"(\d+(?:\.\d+)?) .*all the three", |
| r"(\d+(?:\.\d+)?) .*on all 3", |
| ] |
| for pat in patterns: |
| m = re.search(pat, lower) |
| if m: |
| return float(m.group(1)) |
| return None |
|
|
|
|
| def _extract_pairwise_including_triple(lower: str) -> List[float]: |
| """ |
| Extract values like: |
| - 7 play both Hockey and Cricket |
| - E and M had 9 names in common |
| These are pairwise intersections that INCLUDE anyone in all three. |
| """ |
| vals: List[float] = [] |
|
|
| patterns = [ |
| r"(\d+(?:\.\d+)?) .*both [a-z0-9 ]+ and [a-z0-9 ]+", |
| r"(\d+(?:\.\d+)?) .*in common", |
| r"([a-z]) and ([a-z]) had (\d+(?:\.\d+)?) names in common", |
| ] |
|
|
| for pat in patterns[:2]: |
| for m in re.finditer(pat, lower): |
| try: |
| vals.append(float(m.group(1))) |
| except Exception: |
| pass |
|
|
| for m in re.finditer(patterns[2], lower): |
| try: |
| vals.append(float(m.group(3))) |
| except Exception: |
| pass |
|
|
| |
| out = [] |
| for v in vals: |
| if v not in out: |
| out.append(v) |
| return out[:3] |
|
|
|
|
| def _extract_single_set_counts(lower: str) -> List[float]: |
| """ |
| Tries to capture the main three single-set totals from common GMAT wording. |
| """ |
| vals: List[float] = [] |
|
|
| patterns = [ |
| r"(\d+(?:\.\d+)?) .*sign up for the [a-z ]+ club", |
| r"(\d+(?:\.\d+)?) .*took [a-z]", |
| r"(\d+(?:\.\d+)?) .*owned [a-z]+", |
| r"(\d+(?:\.\d+)?) .*play [a-z]+", |
| r"(\d+(?:\.\d+)?) .*liked product [123a-z]", |
| r"(\d+(?:\.\d+)?) .*are on the [a-z ]+ team", |
| r"(\d+(?:\.\d+)?) .*roster", |
| r"(\d+(?:\.\d+)?) belong to [abc]", |
| r"(\d+(?:\.\d+)?) have taken [a-z ]+ course", |
| r"(\d+(?:\.\d+)?) students took [abc]", |
| r"(\d+(?:\.\d+)?) members .* poetry club", |
| r"(\d+(?:\.\d+)?) students .* history club", |
| r"(\d+(?:\.\d+)?) students .* writing club", |
| ] |
|
|
| for pat in patterns: |
| for m in re.finditer(pat, lower): |
| try: |
| vals.append(float(m.group(1))) |
| except Exception: |
| pass |
|
|
| |
| for m in re.finditer(r"(\d+(?:\.\d+)?) [a-z ]+, (\d+(?:\.\d+)?) [a-z ]+ and (\d+(?:\.\d+)?) [a-z ]+", lower): |
| try: |
| triple = [float(m.group(1)), float(m.group(2)), float(m.group(3))] |
| for v in triple: |
| vals.append(v) |
| except Exception: |
| pass |
|
|
| out = [] |
| for v in vals: |
| if v not in out: |
| out.append(v) |
| return out[:3] |
|
|
|
|
| def _extract_generic_numbers(lower: str) -> List[float]: |
| return _nums(lower) |
|
|
|
|
| |
| |
| |
|
|
| def _solve_two_set_basic(lower: str) -> Optional[SolverResult]: |
| nums = _extract_generic_numbers(lower) |
| target = _question_target(lower) |
|
|
| |
| if len(nums) >= 3 and _contains_any(lower, ["both", "overlap", "intersection", "in common"]): |
| a, b, both = nums[0], nums[1], nums[2] |
|
|
| if target in ["union", "unknown", "intersection"]: |
| if target == "intersection": |
| return _safe_result( |
| internal_answer=both, |
| interpretation="This is a 2-set overlap question asking for the intersection.", |
| steps=[ |
| "Identify the overlap as the group counted in both sets.", |
| "Use the given overlap directly if it is already stated.", |
| ], |
| ) |
|
|
| union = a + b - both |
| return _safe_result( |
| internal_answer=union, |
| interpretation="This is a 2-set inclusion–exclusion setup.", |
| steps=[ |
| "Add the two set totals.", |
| "Subtract the overlap once because it was counted twice.", |
| "That gives the number in at least one of the two sets.", |
| ], |
| ) |
|
|
| |
| if target == "neither" and len(nums) >= 4: |
| total, a, b, both = nums[0], nums[1], nums[2], nums[3] |
| union = a + b - both |
| neither = total - union |
| return _safe_result( |
| internal_answer=neither, |
| interpretation="This is a 2-set total-minus-union question.", |
| steps=[ |
| "First find how many are in at least one set using inclusion–exclusion.", |
| "Then subtract that union from the total.", |
| ], |
| ) |
|
|
| |
| if target == "exactly_one" and len(nums) >= 3: |
| a, b, both = nums[0], nums[1], nums[2] |
| exactly_one = (a - both) + (b - both) |
| return _safe_result( |
| internal_answer=exactly_one, |
| interpretation="This is a 2-set exactly-one question.", |
| steps=[ |
| "Remove the overlap from each set to get the set-only parts.", |
| "Add the two non-overlapping parts.", |
| ], |
| ) |
|
|
| return None |
|
|
|
|
| |
| |
| |
|
|
| def _solve_three_set_from_pairwise_and_triple(lower: str) -> Optional[SolverResult]: |
| """ |
| Handles the classic formula: |
| Union = A + B + C - (AB + AC + BC) + ABC |
| Also: |
| Neither = Total - Union |
| Exactly-two = (AB + AC + BC) - 3*ABC |
| """ |
| singles = _extract_single_set_counts(lower) |
| pairwise = _extract_pairwise_including_triple(lower) |
| triple = _extract_all_three(lower) |
| total = _extract_total(lower) |
| neither = _extract_neither(lower) |
| target = _question_target(lower) |
|
|
| if len(singles) == 3 and len(pairwise) == 3 and triple is not None: |
| a, b, c = singles |
| ab, ac, bc = pairwise |
| abc = triple |
|
|
| union = a + b + c - ab - ac - bc + abc |
| exactly_two = (ab + ac + bc) - 3 * abc |
|
|
| if target in ["union", "unknown"]: |
| return _safe_result( |
| internal_answer=union, |
| interpretation="This is a 3-set inclusion–exclusion question with pairwise overlaps and a triple overlap.", |
| steps=[ |
| "Add the three set totals.", |
| "Subtract each pairwise overlap once because those people/items were double-counted.", |
| "Add the all-three overlap back once because it was subtracted too many times.", |
| ], |
| ) |
|
|
| if target == "neither" and total is not None: |
| ans = total - union |
| return _safe_result( |
| internal_answer=ans, |
| interpretation="This is a total-minus-3-set-union question.", |
| steps=[ |
| "Use 3-set inclusion–exclusion to find how many are in at least one set.", |
| "Subtract that result from the total.", |
| ], |
| ) |
|
|
| if target == "exactly_two": |
| return _safe_result( |
| internal_answer=exactly_two, |
| interpretation="This is a 3-set exactly-two question derived from pairwise overlaps that include the triple-overlap region.", |
| steps=[ |
| "Each pairwise count includes the all-three region.", |
| "Subtract the all-three group once from each pairwise overlap to convert pairwise totals into exactly-two regions.", |
| "Then add those exactly-two regions together.", |
| ], |
| ) |
|
|
| if target == "all_three" and total is not None and neither is not None: |
| |
| |
| |
| |
| ans = (total - neither) - (a + b + c) + (ab + ac + bc) |
| return _safe_result( |
| internal_answer=ans, |
| interpretation="This is a reverse 3-set inclusion–exclusion question solving for the all-three overlap.", |
| steps=[ |
| "Convert the problem into a union count by removing the neither group from the total, if needed.", |
| "Set up the 3-set inclusion–exclusion equation.", |
| "Rearrange the equation so the all-three overlap is isolated.", |
| ], |
| ) |
|
|
| return None |
|
|
|
|
| def _solve_three_set_from_exactly_two_and_triple(lower: str) -> Optional[SolverResult]: |
| """ |
| Uses: |
| Total = A + B + C - exactly_two - 2*all_three + neither |
| or equivalently |
| Union = A + B + C - exactly_two - 2*all_three |
| """ |
| singles = _extract_single_set_counts(lower) |
| exact2 = _extract_exactly_two(lower) |
| triple = _extract_all_three(lower) |
| total = _extract_total(lower) |
| neither = _extract_neither(lower) |
| target = _question_target(lower) |
|
|
| if len(singles) == 3 and exact2 is not None: |
| a, b, c = singles |
|
|
| |
| if target == "all_three" and total is not None: |
| n = neither if neither is not None else 0.0 |
| |
| ans = (a + b + c - exact2 + n - total) / 2.0 |
| return _safe_result( |
| internal_answer=ans, |
| interpretation="This is the exactly-two / all-three 3-set formula.", |
| steps=[ |
| "Use the version of inclusion–exclusion written in terms of exactly-two and all-three.", |
| "Treat the union as total minus neither when necessary.", |
| "Rearrange the equation so the all-three region is isolated.", |
| ], |
| ) |
|
|
| |
| if target == "neither" and total is not None and triple is not None: |
| ans = total - (a + b + c - exact2 - 2 * triple) |
| return _safe_result( |
| internal_answer=ans, |
| interpretation="This is a 3-set total-versus-union question using exactly-two and all-three.", |
| steps=[ |
| "Compute the union from the three set totals, the exactly-two count, and the all-three count.", |
| "Subtract the union from the total to get neither.", |
| ], |
| ) |
|
|
| |
| if target == "exactly_two" and total is not None and triple is not None: |
| n = neither if neither is not None else 0.0 |
| ans = a + b + c + n - total - 2 * triple |
| return _safe_result( |
| internal_answer=ans, |
| interpretation="This is a reverse solve for the exactly-two total in a 3-set problem.", |
| steps=[ |
| "Use the total/union form of the 3-set formula written with exactly-two and all-three.", |
| "Substitute the known values.", |
| "Rearrange to isolate the exactly-two count.", |
| ], |
| ) |
|
|
| |
| if target in ["union", "unknown"] and triple is not None: |
| ans = a + b + c - exact2 - 2 * triple |
| return _safe_result( |
| internal_answer=ans, |
| interpretation="This is a 3-set union problem using exactly-two and all-three.", |
| steps=[ |
| "Start with the sum of the three set totals.", |
| "Subtract the exactly-two contribution.", |
| "Subtract the extra double-counting caused by the all-three region.", |
| ], |
| ) |
|
|
| return None |
|
|
|
|
| def _solve_three_set_from_total_and_triple(lower: str) -> Optional[SolverResult]: |
| """ |
| Example: |
| Total known, neither implied 0, singles known, all-three known -> find exactly-two |
| Formula: |
| Total = A + B + C - exactly_two - 2*all_three + neither |
| """ |
| singles = _extract_single_set_counts(lower) |
| total = _extract_total(lower) |
| triple = _extract_all_three(lower) |
| neither = _extract_neither(lower) |
| target = _question_target(lower) |
|
|
| if len(singles) == 3 and total is not None and triple is not None and target == "exactly_two": |
| a, b, c = singles |
| n = neither if neither is not None else 0.0 |
| ans = a + b + c + n - total - 2 * triple |
| return _safe_result( |
| internal_answer=ans, |
| interpretation="This is a 3-set exactly-two question using total, singles, and all-three.", |
| steps=[ |
| "Use the 3-set formula written in terms of exactly-two and all-three.", |
| "If the problem says everyone is in at least one set, take neither as zero.", |
| "Rearrange to isolate the exactly-two count.", |
| ], |
| ) |
|
|
| return None |
|
|
|
|
| def _solve_percent_variant(lower: str) -> Optional[SolverResult]: |
| """ |
| Supports survey-style percentage overlapping sets. |
| """ |
| if "percent" not in lower: |
| return None |
|
|
| res = _solve_three_set_from_exactly_two_and_triple(lower) |
| if res is not None: |
| return res |
|
|
| res = _solve_three_set_from_pairwise_and_triple(lower) |
| if res is not None: |
| return res |
|
|
| return None |
|
|
|
|
| def _solve_subset_bound_variant(lower: str) -> Optional[SolverResult]: |
| """ |
| Handles disguised overlap-style minimum union questions such as |
| 'ranges are 17, 28, 35; what is the minimum possible total range?' |
| where the minimum union is the largest set if all smaller sets fit inside it. |
| """ |
| if not _contains_any(lower, ["minimum possible", "minimum", "largest", "smallest", "range"]): |
| return None |
|
|
| nums = _extract_generic_numbers(lower) |
| if len(nums) >= 3: |
| |
| |
| vals = nums[:3] |
| ans = max(vals) |
| return _safe_result( |
| internal_answer=ans, |
| interpretation="This is a disguised minimum-union overlapping-sets idea.", |
| steps=[ |
| "To minimize the total covered range/group, make the smaller sets lie entirely inside the largest set whenever possible.", |
| "So the minimum possible overall coverage cannot be smaller than the largest individual set.", |
| ], |
| ) |
| return None |
|
|
|
|
| |
| |
| |
|
|
| def solve_overlapping_sets(text: str) -> Optional[SolverResult]: |
| lower = _normalize(text) |
|
|
| if not _is_overlapping_sets_context(lower): |
| return None |
|
|
| |
| for solver in [ |
| _solve_percent_variant, |
| _solve_three_set_from_pairwise_and_triple, |
| _solve_three_set_from_exactly_two_and_triple, |
| _solve_three_set_from_total_and_triple, |
| _solve_subset_bound_variant, |
| _solve_two_set_basic, |
| ]: |
| result = solver(lower) |
| if result is not None: |
| return result |
|
|
| return None |