Spaces:
Running on Zero
Running on Zero
| """Deterministic numeric-consistency checker (no LLM, instant, free). | |
| A ≤4B model generating freehand still slips on prose numbers (e.g. "gera R$375k/dia" | |
| when 5.000 rides × R$25 = R$125k). The teacher/code-audit fixes the TRAINING corpus, | |
| but the live student isn't audited at inference (that would 3× latency + burn ZeroGPU | |
| quota). So instead the Space runs this cheap pattern-checker over the generated case and | |
| **flags** likely inconsistencies for the author to verify — fitting the authoring/review | |
| model. It WARNS, never silently "fixes" prose. | |
| Conservative by design: only flags when it confidently extracts a relationship. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| _UNIT = r"(?:corridas?|viagens?|unidades?|clientes?|pedidos?|atendimentos?|" \ | |
| r"rides?|trips?|units?|customers?|orders?)" | |
| _PERDAY = r"(?:por dia|/\s*dia|ao dia|di[aá]ri[ao]s?|per day|/\s*day|daily|por m[eê]s|/\s*m[eê]s|monthly|per month)" | |
| def _num(text: str) -> float | None: | |
| """Parse a pt-BR/en number, honoring mil/milhão (and thousands/decimal seps).""" | |
| t = text.lower() | |
| mult = 1.0 | |
| if "milh" in t or "million" in t or "mi " in t: | |
| mult = 1_000_000.0 | |
| elif "mil" in t or "thousand" in t: | |
| mult = 1_000.0 | |
| m = re.search(r"\d[\d.,]*", t) | |
| if not m: | |
| return None | |
| n = m.group(0) | |
| if "," in n and "." in n: # pt-BR: '.' thousands, ',' decimal | |
| n = n.replace(".", "").replace(",", ".") | |
| elif "," in n: # only comma → decimal sep | |
| n = n.replace(",", ".") | |
| else: # only dots → thousands sep (375.000) | |
| if re.search(r"\.\d{3}\b", n): | |
| n = n.replace(".", "") | |
| try: | |
| return float(n) * mult | |
| except ValueError: | |
| return None | |
| def _money(seg: str): | |
| """First R$/$ amount in a fragment (carrying mil/milhão words after it).""" | |
| m = re.search(r"(?:R\$|\$)\s*([\d.,]+(?:\s*(?:mil|milh[õo]es?|milh[ãa]o|million|thousand))?)", seg, re.I) | |
| return _num(m.group(1)) if m else None | |
| def _close(a: float, b: float, tol: float = 0.05) -> bool: | |
| if a is None or b is None: | |
| return True | |
| big = max(abs(a), abs(b), 1.0) | |
| return abs(a - b) / big <= tol | |
| def _fmt(v: float) -> str: | |
| v = round(v) | |
| return f"{v:,.0f}".replace(",", ".") | |
| def _sentences(text: str) -> list[str]: | |
| return re.split(r"(?<=[.;:])\s+|\n", text) | |
| def check(obj: dict, lang: str = "pt") -> list[str]: | |
| """Return author-facing warnings about likely numeric inconsistencies.""" | |
| if not obj: | |
| return [] | |
| c = obj.get("case") or {} | |
| data = c.get("data") or [] | |
| text = " ".join([c.get("context", "")] + [str(d) for d in data] | |
| + [str(e.get("content", "")) for e in (c.get("exhibits") or [])]) | |
| warns: list[str] = [] | |
| # --- 1) daily/monthly revenue ≈ quantity × unit price ------------------- | |
| # quantity per period (prefer "média"/"total"/largest) | |
| qtys = [_num(m.group(0)) for m in | |
| re.finditer(rf"\d[\d.,]*\s*{_UNIT}[^.]*?{_PERDAY}", text, re.I)] | |
| qtys = [q for q in qtys if q] | |
| unit_price = None | |
| mp = re.search(rf"(?:R\$|\$)\s*([\d.,]+)\s*(?:por|/|each|per)\s*{_UNIT}", text, re.I) | |
| if mp: | |
| unit_price = _num(mp.group(1)) | |
| # the revenue sentence: has "receita/gera/faturamento" + a per-period amount | |
| rev = None | |
| for s in _sentences(text): | |
| if re.search(r"receita|fatur|gera|revenue|generat|bring", s, re.I) and re.search(_PERDAY, s, re.I): | |
| rev = _money(s) | |
| if rev: | |
| break | |
| if qtys and unit_price and rev: | |
| q = max(qtys) | |
| if not _close(q * unit_price, rev): | |
| warns.append( | |
| (f"receita ({_fmt(rev)}) não bate com {_fmt(q)} × {_fmt(unit_price)} = " | |
| f"{_fmt(q * unit_price)}" if lang == "pt" else | |
| f"revenue ({_fmt(rev)}) doesn't match {_fmt(q)} × {_fmt(unit_price)} = " | |
| f"{_fmt(q * unit_price)}")) | |
| # --- 2) cost + profit ≈ price ------------------------------------------ | |
| def _find(label_re): | |
| m = re.search(rf"(?:{label_re})[^.]*?(?:R\$|\$)\s*([\d.,]+)", text, re.I) | |
| return _num(m.group(1)) if m else None | |
| custo = _find(r"custo|cost") | |
| lucro = _find(r"lucro|margem de lucro|profit") | |
| preco = _find(r"pre[çc]o|price") | |
| if custo and lucro and preco and not _close(custo + lucro, preco): | |
| warns.append( | |
| (f"custo ({_fmt(custo)}) + lucro ({_fmt(lucro)}) = {_fmt(custo + lucro)} " | |
| f"≠ preço ({_fmt(preco)})" if lang == "pt" else | |
| f"cost ({_fmt(custo)}) + profit ({_fmt(lucro)}) = {_fmt(custo + lucro)} " | |
| f"≠ price ({_fmt(preco)})")) | |
| return warns | |
| __all__ = ["check"] | |