| from llama_cpp import Llama | |
| class Sophos: | |
| def __init__(self, model_path="Qwen3-4B-Instruct-2507-Q3_K_S.gguf"): | |
| self.model_path = model_path | |
| self.model = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=35) | |
| def ask(self, prompt): | |
| """Ask Sophos a question and return the answer.""" | |
| messages = [ | |
| {"role": "user", "content": prompt} | |
| ] | |
| output = self.model.create_chat_completion(messages, max_tokens=1050, temperature=0.7) | |
| return output["choices"][0]["message"]["content"] | |
| if __name__ == "__main__": | |
| sophos = Sophos() | |
| while True: | |
| prompt = input("\nEnter your prompt: ") | |
| if prompt.lower() in ['exit', 'quit', 'q']: | |
| break | |
| response = sophos.ask(prompt) | |
| print(response) |