"""Test the vLLM reasoning agent end-to-end against the live Modal endpoint. Warms up the endpoint (cold start downloads + loads the model), then runs a range of questions through the real Agent loop and prints the tools the model chose + answers. """ import json import os import sys import time import urllib.request try: sys.stdout.reconfigure(encoding="utf-8") except Exception: pass BASE = "https://carloscmoracd--pa-agent-llm-serve.modal.run/v1" os.environ["PA_LLM_MODE"] = "openai" os.environ["PA_LLM_ENDPOINT"] = BASE os.environ["PA_LLM_MODEL"] = "pa-agent" from src.agent import Agent, ToolContext, build_tools from src.agent.serving import OpenAIToolClient from src.finetune import RuleClassifier from src.ledger import Ledger from src.retrieval import Retriever from src import config def warmup(max_wait=600): print("Warming up endpoint (cold start can take a few minutes)…", flush=True) start = time.time() while time.time() - start < max_wait: try: req = urllib.request.Request(BASE + "/models", headers={"Authorization": "Bearer EMPTY"}) with urllib.request.urlopen(req, timeout=30) as r: data = json.loads(r.read()) ids = [m["id"] for m in data.get("data", [])] print(f" ready after {int(time.time()-start)}s. models: {ids}", flush=True) return True except Exception as e: print(f" …not ready ({int(time.time()-start)}s): {type(e).__name__}", flush=True) time.sleep(15) return False def main(): if not warmup(): print("Endpoint never came up."); return lg = Ledger(":memory:") lg.record_income("2024-05-03", "Branding", 18000, iva_rate="0.16") lg.record_income("2024-05-17", "Website", 27000, iva_rate="0.16", isr_retenido="2700", iva_retenido="2880") lg.record_expense("2024-05-05", "Adobe", 1300, iva_rate="0.16") lg.record_expense("2024-05-12", "Office rent", 4000, iva_rate="0.16") retriever = Retriever.from_corpus_dir(config.REGULATION_DIR) ctx = ToolContext(lg, retriever=retriever, classifier=RuleClassifier(), country="MX") tools = build_tools(ctx) cases = [ ("[mx][en][2024-05] How much VAT do I owe this month?", ), ("[mx][en][2024-05] Which tax regime suits me better?", ), ("[mx][es][2024-05] ¿Puedo deducir mi laptop nueva?", ), ("[us][en][2024-05] How much federal tax will I owe this year?", ), ("[us][en][2024-05] What is the QBI deduction and do I qualify?", ), ("[mx][en][2024-05] Explain what RESICO is in one sentence.", ), ] for (q,) in cases: print("\n" + "=" * 70) print("Q:", q) t0 = time.time() try: trace = Agent(OpenAIToolClient(BASE), tools, max_steps=5).run(q) print(f"tools used: {[s.tool for s in trace.steps]} ({time.time()-t0:.1f}s)") print("A:", (trace.final_answer or "").replace("\n", " ")[:400]) except Exception as e: print("ERROR:", repr(e)[:300]) if __name__ == "__main__": main()