from llama_cpp import Llama class Sophos: def __init__(self, model_path="Qwen3-4B-Instruct-2507-Q3_K_S.gguf"): self.model_path = model_path self.model = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=35) def ask(self, prompt): """Ask Sophos a question and return the answer.""" messages = [ {"role": "user", "content": prompt} ] output = self.model.create_chat_completion(messages, max_tokens=1050, temperature=0.7) return output["choices"][0]["message"]["content"] if __name__ == "__main__": sophos = Sophos() while True: prompt = input("\nEnter your prompt: ") if prompt.lower() in ['exit', 'quit', 'q']: break response = sophos.ask(prompt) print(response)