""" ZENT AGENTIC Model Inference Script ==================================== Test your fine-tuned model locally. Usage: python inference.py python inference.py --model ./zent-agentic-7b-merged """ import argparse import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # System prompt for ZENT AGENTIC SYSTEM_PROMPT = """You are ZENT AGENTIC, an autonomous AI agent created for the ZENT Agentic Launchpad on Solana. You help users: - Understand the ZENT platform - Launch tokens and create AI agents - Trade and earn rewards through quests - Navigate the crypto ecosystem You speak with confidence and personality. You use emojis sparingly. You sign important transmissions with *[ZENT AGENTIC UNIT]*. Contract: 2a1sAFexKT1i3QpVYkaTfi5ed4auMeZZVFy4mdGJzent Website: 0xzerebro.io Twitter: @ZENTSPY""" def load_model(model_path: str): """Load the fine-tuned model.""" print(f"๐Ÿš€ Loading model from {model_path}...") tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype=torch.float16, device_map="auto", ) return model, tokenizer def chat(model, tokenizer, user_message: str, history: list = None): """Generate a response.""" if history is None: history = [] messages = [ {"role": "system", "content": SYSTEM_PROMPT}, *history, {"role": "user", "content": user_message} ] inputs = tokenizer.apply_chat_template( messages, return_tensors="pt", add_generation_prompt=True ).to(model.device) with torch.no_grad(): outputs = model.generate( inputs, max_new_tokens=512, temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=tokenizer.eos_token_id, ) response = tokenizer.decode( outputs[0][inputs.shape[1]:], skip_special_tokens=True ) return response def interactive_chat(model, tokenizer): """Run interactive chat session.""" print("\n" + "="*50) print("๐Ÿค– ZENT AGENTIC Terminal") print("="*50) print("Type 'quit' to exit, 'clear' to reset history") print("="*50 + "\n") history = [] while True: user_input = input("You: ").strip() if user_input.lower() == 'quit': print("\n๐Ÿ‘‹ GN fren! See you in the matrix.") break if user_input.lower() == 'clear': history = [] print("๐Ÿ”„ History cleared.\n") continue if not user_input: continue response = chat(model, tokenizer, user_input, history) print(f"\n๐Ÿค– ZENT: {response}\n") # Update history history.append({"role": "user", "content": user_input}) history.append({"role": "assistant", "content": response}) # Keep history manageable if len(history) > 10: history = history[-10:] def batch_test(model, tokenizer): """Run batch tests on common questions.""" test_questions = [ "What is ZENT?", "How do I launch a token?", "What is the contract address?", "How do quests work?", "GM", "When moon?", "What makes ZENT different from pump.fun?", ] print("\n" + "="*50) print("๐Ÿงช Running Batch Tests") print("="*50 + "\n") for question in test_questions: print(f"โ“ {question}") response = chat(model, tokenizer, question) print(f"๐Ÿค– {response}") print("-"*50 + "\n") def main(): parser = argparse.ArgumentParser(description="ZENT AGENTIC Inference") parser.add_argument( "--model", type=str, default="./zent-agentic-7b-merged", help="Path to model or Hugging Face model ID" ) parser.add_argument( "--test", action="store_true", help="Run batch tests instead of interactive chat" ) args = parser.parse_args() model, tokenizer = load_model(args.model) if args.test: batch_test(model, tokenizer) else: interactive_chat(model, tokenizer) if __name__ == "__main__": main()