File size: 2,488 Bytes
c61a185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import torch
import time
import psutil
import os
from model.nano_gpt import AgentGPT, Config
import tiktoken

def measure_memory():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / (1024 * 1024) # MB

def benchmark():
    # 1. Setup
    config = Config()
    # Ensure 100M config
    config.n_layer = 10
    config.n_embd = 640
    
    print(f"--- Edge Agentic Model Benchmark (100M) ---")
    
    mem_before = measure_memory()
    print(f"Memory before model init: {mem_before:.2f} MB")
    
    start_init = time.time()
    model = AgentGPT(config)
    model.eval()
    end_init = time.time()
    
    mem_after = measure_memory()
    print(f"Model Init Time: {end_init - start_init:.4f}s")
    print(f"Memory after model init: {mem_after:.2f} MB")
    print(f"Approx. Model Weight Size: {mem_after - mem_before:.2f} MB")
    
    # 2. Tokenizer
    enc = tiktoken.get_encoding("cl100k_base")
    prompt = "<|goal|> Calculate the trajectory of the satellite using the radar tool. <|thought|>"
    tokens = torch.tensor([enc.encode(prompt)])
    
    # 3. Inference Benchmark (Latency)
    print("\nStarting Inference Benchmark (10 tokens)...")
    
    # Warmup
    with torch.no_grad():
        _ = model.generate(tokens, max_new_tokens=1)
        
    start_inf = time.time()
    with torch.no_grad():
        output = model.generate(tokens, max_new_tokens=10)
    end_inf = time.time()
    
    total_time = end_inf - start_inf
    tokens_per_sec = 10 / total_time
    
    print(f"Total Time for 10 tokens: {total_time:.4f}s")
    print(f"Throughput: {tokens_per_sec:.2f} tokens/sec (CPU)")
    
    # 4. Agentic Reasoning Benchmark
    print("\n--- Recursive Reasoning Benchmark ---")
    from agent.recursive_reasoning import RecursiveAgenticLoop
    
    class TiktokenWrapper:
        def __init__(self, e): self.e = e
        def encode(self, t, **kwargs): 
            ids = self.e.encode(t)
            if kwargs.get('return_tensors') == 'pt': return torch.tensor([ids])
            return ids
        def decode(self, i): return self.e.decode(i)
        
    tokenizer = TiktokenWrapper(enc)
    loop = RecursiveAgenticLoop(model, tokenizer, demo_mode=True) # Benchmark the structure
    
    start_loop = time.time()
    _ = loop.generate_with_reasoning(prompt)
    end_loop = time.time()
    
    print(f"Full 4-Iteration Recursive Loop Time: {end_loop - start_loop:.4f}s")
    print("-" * 40)

if __name__ == "__main__":
    benchmark()