PocketAccountant / scripts /test_llm_agent.py
eldinosaur's picture
US-aware Capture/Ledger + wire in the reasoning LLM agent (Qwen3-8B via vLLM/Modal) with router fallback
d103096 verified
Raw
History Blame Contribute Delete
3.17 kB
"""Test the vLLM reasoning agent end-to-end against the live Modal endpoint.
Warms up the endpoint (cold start downloads + loads the model), then runs a range of
questions through the real Agent loop and prints the tools the model chose + answers.
"""
import json
import os
import sys
import time
import urllib.request
try:
sys.stdout.reconfigure(encoding="utf-8")
except Exception:
pass
BASE = "https://carloscmoracd--pa-agent-llm-serve.modal.run/v1"
os.environ["PA_LLM_MODE"] = "openai"
os.environ["PA_LLM_ENDPOINT"] = BASE
os.environ["PA_LLM_MODEL"] = "pa-agent"
from src.agent import Agent, ToolContext, build_tools
from src.agent.serving import OpenAIToolClient
from src.finetune import RuleClassifier
from src.ledger import Ledger
from src.retrieval import Retriever
from src import config
def warmup(max_wait=600):
print("Warming up endpoint (cold start can take a few minutes)…", flush=True)
start = time.time()
while time.time() - start < max_wait:
try:
req = urllib.request.Request(BASE + "/models",
headers={"Authorization": "Bearer EMPTY"})
with urllib.request.urlopen(req, timeout=30) as r:
data = json.loads(r.read())
ids = [m["id"] for m in data.get("data", [])]
print(f" ready after {int(time.time()-start)}s. models: {ids}", flush=True)
return True
except Exception as e:
print(f" …not ready ({int(time.time()-start)}s): {type(e).__name__}", flush=True)
time.sleep(15)
return False
def main():
if not warmup():
print("Endpoint never came up."); return
lg = Ledger(":memory:")
lg.record_income("2024-05-03", "Branding", 18000, iva_rate="0.16")
lg.record_income("2024-05-17", "Website", 27000, iva_rate="0.16",
isr_retenido="2700", iva_retenido="2880")
lg.record_expense("2024-05-05", "Adobe", 1300, iva_rate="0.16")
lg.record_expense("2024-05-12", "Office rent", 4000, iva_rate="0.16")
retriever = Retriever.from_corpus_dir(config.REGULATION_DIR)
ctx = ToolContext(lg, retriever=retriever, classifier=RuleClassifier(), country="MX")
tools = build_tools(ctx)
cases = [
("[mx][en][2024-05] How much VAT do I owe this month?", ),
("[mx][en][2024-05] Which tax regime suits me better?", ),
("[mx][es][2024-05] ¿Puedo deducir mi laptop nueva?", ),
("[us][en][2024-05] How much federal tax will I owe this year?", ),
("[us][en][2024-05] What is the QBI deduction and do I qualify?", ),
("[mx][en][2024-05] Explain what RESICO is in one sentence.", ),
]
for (q,) in cases:
print("\n" + "=" * 70)
print("Q:", q)
t0 = time.time()
try:
trace = Agent(OpenAIToolClient(BASE), tools, max_steps=5).run(q)
print(f"tools used: {[s.tool for s in trace.steps]} ({time.time()-t0:.1f}s)")
print("A:", (trace.final_answer or "").replace("\n", " ")[:400])
except Exception as e:
print("ERROR:", repr(e)[:300])
if __name__ == "__main__":
main()