Spaces:
Sleeping
Sleeping
| import torch | |
| import time | |
| import psutil | |
| import os | |
| from model.nano_gpt import AgentGPT, Config | |
| import tiktoken | |
| def measure_memory(): | |
| process = psutil.Process(os.getpid()) | |
| return process.memory_info().rss / (1024 * 1024) # MB | |
| def benchmark(): | |
| # 1. Setup | |
| config = Config() | |
| # Ensure 100M config | |
| config.n_layer = 10 | |
| config.n_embd = 640 | |
| print(f"--- Edge Agentic Model Benchmark (100M) ---") | |
| mem_before = measure_memory() | |
| print(f"Memory before model init: {mem_before:.2f} MB") | |
| start_init = time.time() | |
| model = AgentGPT(config) | |
| model.eval() | |
| end_init = time.time() | |
| mem_after = measure_memory() | |
| print(f"Model Init Time: {end_init - start_init:.4f}s") | |
| print(f"Memory after model init: {mem_after:.2f} MB") | |
| print(f"Approx. Model Weight Size: {mem_after - mem_before:.2f} MB") | |
| # 2. Tokenizer | |
| enc = tiktoken.get_encoding("cl100k_base") | |
| prompt = "<|goal|> Calculate the trajectory of the satellite using the radar tool. <|thought|>" | |
| tokens = torch.tensor([enc.encode(prompt)]) | |
| # 3. Inference Benchmark (Latency) | |
| print("\nStarting Inference Benchmark (10 tokens)...") | |
| # Warmup | |
| with torch.no_grad(): | |
| _ = model.generate(tokens, max_new_tokens=1) | |
| start_inf = time.time() | |
| with torch.no_grad(): | |
| output = model.generate(tokens, max_new_tokens=10) | |
| end_inf = time.time() | |
| total_time = end_inf - start_inf | |
| tokens_per_sec = 10 / total_time | |
| print(f"Total Time for 10 tokens: {total_time:.4f}s") | |
| print(f"Throughput: {tokens_per_sec:.2f} tokens/sec (CPU)") | |
| # 4. Agentic Reasoning Benchmark | |
| print("\n--- Recursive Reasoning Benchmark ---") | |
| from agent.recursive_reasoning import RecursiveAgenticLoop | |
| class TiktokenWrapper: | |
| def __init__(self, e): self.e = e | |
| def encode(self, t, **kwargs): | |
| ids = self.e.encode(t) | |
| if kwargs.get('return_tensors') == 'pt': return torch.tensor([ids]) | |
| return ids | |
| def decode(self, i): return self.e.decode(i) | |
| tokenizer = TiktokenWrapper(enc) | |
| loop = RecursiveAgenticLoop(model, tokenizer, demo_mode=True) # Benchmark the structure | |
| start_loop = time.time() | |
| _ = loop.generate_with_reasoning(prompt) | |
| end_loop = time.time() | |
| print(f"Full 4-Iteration Recursive Loop Time: {end_loop - start_loop:.4f}s") | |
| print("-" * 40) | |
| if __name__ == "__main__": | |
| benchmark() | |