Spaces:

build-small-hackathon
/

PocketAccountant

Running

App Files Files Community

PocketAccountant / scripts /test_llm_agent.py

eldinosaur

US-aware Capture/Ledger + wire in the reasoning LLM agent (Qwen3-8B via vLLM/Modal) with router fallback

d103096 verified 17 days ago

Raw

History Blame Contribute Delete

3.17 kB

	"""Test the vLLM reasoning agent end-to-end against the live Modal endpoint.

	Warms up the endpoint (cold start downloads + loads the model), then runs a range of
	questions through the real Agent loop and prints the tools the model chose + answers.
	"""

	import json
	import os
	import sys
	import time
	import urllib.request

	try:
	sys.stdout.reconfigure(encoding="utf-8")
	except Exception:
	pass

	BASE = "https://carloscmoracd--pa-agent-llm-serve.modal.run/v1"
	os.environ["PA_LLM_MODE"] = "openai"
	os.environ["PA_LLM_ENDPOINT"] = BASE
	os.environ["PA_LLM_MODEL"] = "pa-agent"

	from src.agent import Agent, ToolContext, build_tools
	from src.agent.serving import OpenAIToolClient
	from src.finetune import RuleClassifier
	from src.ledger import Ledger
	from src.retrieval import Retriever
	from src import config


	def warmup(max_wait=600):
	print("Warming up endpoint (cold start can take a few minutes)…", flush=True)
	start = time.time()
	while time.time() - start < max_wait:
	try:
	req = urllib.request.Request(BASE + "/models",
	headers={"Authorization": "Bearer EMPTY"})
	with urllib.request.urlopen(req, timeout=30) as r:
	data = json.loads(r.read())
	ids = [m["id"] for m in data.get("data", [])]
	print(f" ready after {int(time.time()-start)}s. models: {ids}", flush=True)
	return True
	except Exception as e:
	print(f" …not ready ({int(time.time()-start)}s): {type(e).__name__}", flush=True)
	time.sleep(15)
	return False


	def main():
	if not warmup():
	print("Endpoint never came up."); return

	lg = Ledger(":memory:")
	lg.record_income("2024-05-03", "Branding", 18000, iva_rate="0.16")
	lg.record_income("2024-05-17", "Website", 27000, iva_rate="0.16",
	isr_retenido="2700", iva_retenido="2880")
	lg.record_expense("2024-05-05", "Adobe", 1300, iva_rate="0.16")
	lg.record_expense("2024-05-12", "Office rent", 4000, iva_rate="0.16")

	retriever = Retriever.from_corpus_dir(config.REGULATION_DIR)
	ctx = ToolContext(lg, retriever=retriever, classifier=RuleClassifier(), country="MX")
	tools = build_tools(ctx)

	cases = [
	("[mx][en][2024-05] How much VAT do I owe this month?", ),
	("[mx][en][2024-05] Which tax regime suits me better?", ),
	("[mx][es][2024-05] ¿Puedo deducir mi laptop nueva?", ),
	("[us][en][2024-05] How much federal tax will I owe this year?", ),
	("[us][en][2024-05] What is the QBI deduction and do I qualify?", ),
	("[mx][en][2024-05] Explain what RESICO is in one sentence.", ),
	]
	for (q,) in cases:
	print("\n" + "=" * 70)
	print("Q:", q)
	t0 = time.time()
	try:
	trace = Agent(OpenAIToolClient(BASE), tools, max_steps=5).run(q)
	print(f"tools used: {[s.tool for s in trace.steps]} ({time.time()-t0:.1f}s)")
	print("A:", (trace.final_answer or "").replace("\n", " ")[:400])
	except Exception as e:
	print("ERROR:", repr(e)[:300])


	if __name__ == "__main__":
	main()