| from llama_cpp import Llama | |
| def run_local_llm(): | |
| print("Loading AgGPT-9... (This may take a moment)") | |
| model_path = "./AgGPT-9.gguf" | |
| model = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=35) | |
| print("Model loaded. Type 'exit' to quit.") | |
| while True: | |
| prompt = input("\nEnter your prompt: ") | |
| if prompt.lower() == 'exit': | |
| break | |
| messages = [ | |
| {"role": "system", "content": "You are AgGPT-9, an advanced AI assistant created by AG, the 9th series of the AgGPT models."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| output = model.create_chat_completion(messages, max_tokens=550, temperature=0.7) | |
| print("\nGenerated text:") | |
| print(output["choices"][0]["message"]["content"]) | |
| if __name__ == "__main__": | |
| run_local_llm() | |