File size: 3,170 Bytes
d103096
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
"""Test the vLLM reasoning agent end-to-end against the live Modal endpoint.

Warms up the endpoint (cold start downloads + loads the model), then runs a range of
questions through the real Agent loop and prints the tools the model chose + answers.
"""

import json
import os
import sys
import time
import urllib.request

try:
    sys.stdout.reconfigure(encoding="utf-8")
except Exception:
    pass

BASE = "https://carloscmoracd--pa-agent-llm-serve.modal.run/v1"
os.environ["PA_LLM_MODE"] = "openai"
os.environ["PA_LLM_ENDPOINT"] = BASE
os.environ["PA_LLM_MODEL"] = "pa-agent"

from src.agent import Agent, ToolContext, build_tools
from src.agent.serving import OpenAIToolClient
from src.finetune import RuleClassifier
from src.ledger import Ledger
from src.retrieval import Retriever
from src import config


def warmup(max_wait=600):
    print("Warming up endpoint (cold start can take a few minutes)…", flush=True)
    start = time.time()
    while time.time() - start < max_wait:
        try:
            req = urllib.request.Request(BASE + "/models",
                                         headers={"Authorization": "Bearer EMPTY"})
            with urllib.request.urlopen(req, timeout=30) as r:
                data = json.loads(r.read())
                ids = [m["id"] for m in data.get("data", [])]
                print(f"  ready after {int(time.time()-start)}s. models: {ids}", flush=True)
                return True
        except Exception as e:
            print(f"  …not ready ({int(time.time()-start)}s): {type(e).__name__}", flush=True)
            time.sleep(15)
    return False


def main():
    if not warmup():
        print("Endpoint never came up."); return

    lg = Ledger(":memory:")
    lg.record_income("2024-05-03", "Branding", 18000, iva_rate="0.16")
    lg.record_income("2024-05-17", "Website", 27000, iva_rate="0.16",
                     isr_retenido="2700", iva_retenido="2880")
    lg.record_expense("2024-05-05", "Adobe", 1300, iva_rate="0.16")
    lg.record_expense("2024-05-12", "Office rent", 4000, iva_rate="0.16")

    retriever = Retriever.from_corpus_dir(config.REGULATION_DIR)
    ctx = ToolContext(lg, retriever=retriever, classifier=RuleClassifier(), country="MX")
    tools = build_tools(ctx)

    cases = [
        ("[mx][en][2024-05] How much VAT do I owe this month?", ),
        ("[mx][en][2024-05] Which tax regime suits me better?", ),
        ("[mx][es][2024-05] ¿Puedo deducir mi laptop nueva?", ),
        ("[us][en][2024-05] How much federal tax will I owe this year?", ),
        ("[us][en][2024-05] What is the QBI deduction and do I qualify?", ),
        ("[mx][en][2024-05] Explain what RESICO is in one sentence.", ),
    ]
    for (q,) in cases:
        print("\n" + "=" * 70)
        print("Q:", q)
        t0 = time.time()
        try:
            trace = Agent(OpenAIToolClient(BASE), tools, max_steps=5).run(q)
            print(f"tools used: {[s.tool for s in trace.steps]}  ({time.time()-t0:.1f}s)")
            print("A:", (trace.final_answer or "").replace("\n", " ")[:400])
        except Exception as e:
            print("ERROR:", repr(e)[:300])


if __name__ == "__main__":
    main()