{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install llama-cpp-python huggingface_hub --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import HfApi\n", "from llama_cpp import Llama\n", "import os\n", "\n", "os.environ[\"LLAMA_CPP_LOG_LEVEL\"] = \"ERROR\"\n", "\n", "REPO_ID = \"darkai-1/darkit\"\n", "api = HfApi()\n", "\n", "# list gguf files\n", "files = api.list_repo_files(REPO_ID)\n", "gguf_files = [f for f in files if f.endswith(\".gguf\")]\n", "\n", "print(\"Available models:\\n\")\n", "for i, f in enumerate(gguf_files):\n", " print(f\"[{i}] {f}\")\n", "\n", "choice = int(input(\"\\nSelect model number: \"))\n", "filename = gguf_files[choice]\n", "\n", "print(\"Loading model:\", filename)\n", "\n", "llm = Llama.from_pretrained(\n", " repo_id=REPO_ID,\n", " filename=filename,\n", " n_ctx=8192,\n", " n_threads=2,\n", " n_gpu_layers=1,\n", " verbose=False\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "while True:\n", " user_input = input(\"You: \")\n", " if user_input.lower() in [\"exit\", \"quit\"]:\n", " break\n", "\n", " messages = [{\"role\": \"user\", \"content\": user_input}]\n", "\n", " stream = llm.create_chat_completion(\n", " messages=messages,\n", " temperature=0.7,\n", " top_p=0.8,\n", " top_k=20,\n", " stream=True\n", " )\n", "\n", " for chunk in stream:\n", " delta = chunk[\"choices\"][0][\"delta\"]\n", " if \"content\" in delta:\n", " print(delta[\"content\"], end=\"\", flush=True)\n", " print(\"\\n\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 0 }