from llama_cpp import Llama

class Sophos:
    def __init__(self, model_path="Qwen3-4B-Instruct-2507-Q3_K_S.gguf"):
        self.model_path = model_path
        self.model = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=35)

    def ask(self, prompt):
        """Ask Sophos a question and return the answer."""
        messages = [
            {"role": "user", "content": prompt}
        ]

        output = self.model.create_chat_completion(messages, max_tokens=1050, temperature=0.7)
        return output["choices"][0]["message"]["content"]

if __name__ == "__main__":
    sophos = Sophos()
    
    while True:
        prompt = input("\nEnter your prompt: ")
        if prompt.lower() in ['exit', 'quit', 'q']:
            break
        
        response = sophos.ask(prompt)
        print(response)