PocketAccountant / src /config.py
eldinosaur's picture
PocketAccountant: custom ledger UI + deterministic agent (engine, ledger, retrieval, classifier)
c55ab5e verified
Raw
History Blame Contribute Delete
1.43 kB
"""Central configuration — paths, model ids, and feature flags.
Kept deliberately small. The one rule that matters: nothing here changes a tax
number — those live only in src/engine/tax_tables.py.
"""
from __future__ import annotations
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
DATA_DIR = ROOT / "data"
REGULATION_DIR = DATA_DIR / "regulation"
CHART_OF_ACCOUNTS_DIR = DATA_DIR / "catalogo_cuentas"
USER_DATA_DIR = DATA_DIR / "user"
LEDGER_PATH = USER_DATA_DIR / "ledger.db"
# Model — our fine-tuned MiniCPM (OpenBMB), trained on Modal, published to the Hub.
# SAT transaction classifier: eval_loss 0.155, token accuracy 96.1% (3 epochs).
MODEL_REPO = "eldinosaur/cuentas-claras-sat-classifier-minicpm"
MODEL_BASE = "openbmb/MiniCPM4.1-8B"
DATASET_REPO = "eldinosaur/cuentas-claras-sat-classifier"
# GGUF quantizations for local llama.cpp inference (🦙 / 🔌).
GGUF_REPO = "eldinosaur/cuentas-claras-sat-classifier-gguf"
MODEL_GGUF_FILE = "cuentas-claras-sat-Q4_K_M.gguf" # laptop default (~4.97 GB)
USE_LOCAL_LLAMACPP = True # local-first; Modal endpoint is fallback
# Retrieval guardrail: below this cosine similarity, abstain and recommend a CPA.
# Tuned for the pure-Python TF-IDF retriever (src/retrieval/index.py).
RETRIEVAL_MIN_SCORE = 0.08
DEFAULT_COUNTRY = "MX" # "MX" | "US"
DEFAULT_REGIME = "RESICO" # "RESICO" | "GENERAL"