"""Central configuration — paths, model ids, and feature flags. Kept deliberately small. The one rule that matters: nothing here changes a tax number — those live only in src/engine/tax_tables.py. """ from __future__ import annotations from pathlib import Path ROOT = Path(__file__).resolve().parent.parent DATA_DIR = ROOT / "data" REGULATION_DIR = DATA_DIR / "regulation" CHART_OF_ACCOUNTS_DIR = DATA_DIR / "catalogo_cuentas" USER_DATA_DIR = DATA_DIR / "user" LEDGER_PATH = USER_DATA_DIR / "ledger.db" # Model — our fine-tuned MiniCPM (OpenBMB), trained on Modal, published to the Hub. # SAT transaction classifier: eval_loss 0.155, token accuracy 96.1% (3 epochs). MODEL_REPO = "eldinosaur/cuentas-claras-sat-classifier-minicpm" MODEL_BASE = "openbmb/MiniCPM4.1-8B" DATASET_REPO = "eldinosaur/cuentas-claras-sat-classifier" # GGUF quantizations for local llama.cpp inference (🦙 / 🔌). GGUF_REPO = "eldinosaur/cuentas-claras-sat-classifier-gguf" MODEL_GGUF_FILE = "cuentas-claras-sat-Q4_K_M.gguf" # laptop default (~4.97 GB) USE_LOCAL_LLAMACPP = True # local-first; Modal endpoint is fallback # Retrieval guardrail: below this cosine similarity, abstain and recommend a CPA. # Tuned for the pure-Python TF-IDF retriever (src/retrieval/index.py). RETRIEVAL_MIN_SCORE = 0.08 DEFAULT_COUNTRY = "MX" # "MX" | "US" DEFAULT_REGIME = "RESICO" # "RESICO" | "GENERAL"