!pip install llama-cpp-python from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="darkai-1/darkit-1.5-pro", filename="darkit-1.5-pro.gguf", n_ctx=4096, n_threads=4, n_gpu_layers=35, ) messages = [ { "role": "user", "content": "Who are you?" } ] stream = llm.create_chat_completion( messages=messages, temperature=0.7, top_p=0.8, top_k=20, stream=True ) full_text = "" for chunk in stream: delta = chunk["choices"][0]["delta"] if "content" in delta: text = delta["content"] print(text, end="", flush=True) full_text += text