"""Простой regex-экстрактор суммы из текста.""" from __future__ import annotations import re from typing import Any, Optional AMOUNT_PATTERN = re.compile(r"\d+(?:,\d{1,2})?", re.IGNORECASE) class ExpenseAmountExtractor: """Извлекает сумму как целое число или число с запятой.""" def __init__(self, suppliers: list[str] | None = None) -> None: self.suppliers = suppliers or [] @staticmethod def to_float(value: str) -> Optional[float]: try: return float(value.replace(",", ".")) except ValueError: return None @staticmethod def phrase_span(text: str, phrase: Optional[str]) -> Optional[tuple[int, int]]: if not phrase: return None idx = text.lower().find(phrase.lower()) if idx == -1: return None return idx, idx + len(phrase) @staticmethod def overlaps(span1: tuple[int, int], span2: Optional[tuple[int, int]]) -> bool: if span2 is None: return False return span1[0] < span2[1] and span2[0] < span1[1] def extract( self, text: str, matched_date_phrase: Optional[str] = None, matched_supplier_phrase: Optional[str] = None, debug: bool = False, ) -> dict[str, Any]: date_span = self.phrase_span(text, matched_date_phrase) supplier_span = self.phrase_span(text, matched_supplier_phrase) candidates: list[dict[str, Any]] = [] for match in AMOUNT_PATTERN.finditer(text): span = match.span() overlaps_date = self.overlaps(span, date_span) overlaps_supplier = self.overlaps(span, supplier_span) amount_text = match.group(0) if debug: candidates.append({ "value": amount_text, "span": [span[0], span[1]], "overlaps_date": overlaps_date, "overlaps_supplier": overlaps_supplier, }) if overlaps_date or overlaps_supplier: continue amount = self.to_float(amount_text) if amount is not None: payload = {"amount": amount, "amount_text": amount_text} if debug: payload["amount_debug"] = { "matched_date_phrase": matched_date_phrase, "matched_supplier_phrase": matched_supplier_phrase, "date_span": list(date_span) if date_span else None, "supplier_span": list(supplier_span) if supplier_span else None, "candidates": candidates, "selected": amount_text, } return payload payload = {"amount": None, "amount_text": None} if debug: payload["amount_debug"] = { "matched_date_phrase": matched_date_phrase, "matched_supplier_phrase": matched_supplier_phrase, "date_span": list(date_span) if date_span else None, "supplier_span": list(supplier_span) if supplier_span else None, "candidates": candidates, "selected": None, } return payload