import torch import time import psutil import os from model.nano_gpt import AgentGPT, Config import tiktoken def measure_memory(): process = psutil.Process(os.getpid()) return process.memory_info().rss / (1024 * 1024) # MB def benchmark(): # 1. Setup config = Config() # Ensure 100M config config.n_layer = 10 config.n_embd = 640 print(f"--- Edge Agentic Model Benchmark (100M) ---") mem_before = measure_memory() print(f"Memory before model init: {mem_before:.2f} MB") start_init = time.time() model = AgentGPT(config) model.eval() end_init = time.time() mem_after = measure_memory() print(f"Model Init Time: {end_init - start_init:.4f}s") print(f"Memory after model init: {mem_after:.2f} MB") print(f"Approx. Model Weight Size: {mem_after - mem_before:.2f} MB") # 2. Tokenizer enc = tiktoken.get_encoding("cl100k_base") prompt = "<|goal|> Calculate the trajectory of the satellite using the radar tool. <|thought|>" tokens = torch.tensor([enc.encode(prompt)]) # 3. Inference Benchmark (Latency) print("\nStarting Inference Benchmark (10 tokens)...") # Warmup with torch.no_grad(): _ = model.generate(tokens, max_new_tokens=1) start_inf = time.time() with torch.no_grad(): output = model.generate(tokens, max_new_tokens=10) end_inf = time.time() total_time = end_inf - start_inf tokens_per_sec = 10 / total_time print(f"Total Time for 10 tokens: {total_time:.4f}s") print(f"Throughput: {tokens_per_sec:.2f} tokens/sec (CPU)") # 4. Agentic Reasoning Benchmark print("\n--- Recursive Reasoning Benchmark ---") from agent.recursive_reasoning import RecursiveAgenticLoop class TiktokenWrapper: def __init__(self, e): self.e = e def encode(self, t, **kwargs): ids = self.e.encode(t) if kwargs.get('return_tensors') == 'pt': return torch.tensor([ids]) return ids def decode(self, i): return self.e.decode(i) tokenizer = TiktokenWrapper(enc) loop = RecursiveAgenticLoop(model, tokenizer, demo_mode=True) # Benchmark the structure start_loop = time.time() _ = loop.generate_with_reasoning(prompt) end_loop = time.time() print(f"Full 4-Iteration Recursive Loop Time: {end_loop - start_loop:.4f}s") print("-" * 40) if __name__ == "__main__": benchmark()