{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install llama-cpp-python huggingface_hub --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import HfApi\n", "from llama_cpp import Llama\n", "import os\n", "\n", "REPO_ID = \"darkai-1/darkit-v2.0\"\n", "api = HfApi()\n", "\n", "files = api.list_repo_files(REPO_ID)\n", "gguf_files = [f for f in files if f.endswith(\".gguf\")]\n", "\n", "for i, f in enumerate(gguf_files):\n", " print(f\"[{i}] {f}\")\n", "\n", "choice = int(input(\"Select model number: \"))\n", "filename = gguf_files[choice]\n", "\n", "llm = Llama.from_pretrained(\n", " repo_id=REPO_ID,\n", " filename=filename,\n", " n_ctx=2048,\n", " n_batch=128,\n", " n_ubatch=128,\n", " n_threads=os.cpu_count() or 4,\n", " n_threads_batch=os.cpu_count() or 4,\n", " n_gpu_layers=-1,\n", " verbose=False,\n", " no_perf=True,\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "llm.set_cache(None)\n", "\n", "PROMPT = \"Hi how are you?\"\n", "\n", "stream = llm(\n", " f\"<|im_start|>user\\n{PROMPT}<|im_end|>\\n<|im_start|>assistant\\n\",\n", " max_tokens=128,\n", " temperature=0.7,\n", " top_p=0.8,\n", " top_k=20,\n", " stream=True,\n", " stop=[\n", " \"<|im_end|>\",\n", " \"<|im_start|>\",\n", " \"\\n\\nUser:\",\n", " \"\\n\\nAssistant:\"\n", " ],\n", " echo=False\n", ")\n", "\n", "for chunk in stream:\n", " text = chunk[\"choices\"][0][\"text\"]\n", "\n", " if text:\n", " print(text, end=\"\", flush=True)\n", "\n", "print()\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 0 }