{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "!pip install llama-cpp-python huggingface_hub --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from huggingface_hub import HfApi\n",
        "from llama_cpp import Llama\n",
        "import os\n",
        "\n",
        "os.environ[\"LLAMA_CPP_LOG_LEVEL\"] = \"ERROR\"\n",
        "\n",
        "REPO_ID = \"darkai-1/darkit\"\n",
        "api = HfApi()\n",
        "\n",
        "# list gguf files\n",
        "files = api.list_repo_files(REPO_ID)\n",
        "gguf_files = [f for f in files if f.endswith(\".gguf\")]\n",
        "\n",
        "print(\"Available models:\\n\")\n",
        "for i, f in enumerate(gguf_files):\n",
        "    print(f\"[{i}] {f}\")\n",
        "\n",
        "choice = int(input(\"\\nSelect model number: \"))\n",
        "filename = gguf_files[choice]\n",
        "\n",
        "print(\"Loading model:\", filename)\n",
        "\n",
        "llm = Llama.from_pretrained(\n",
        "    repo_id=REPO_ID,\n",
        "    filename=filename,\n",
        "    n_ctx=8192,\n",
        "    n_threads=2,\n",
        "    n_gpu_layers=1,\n",
        "    verbose=False\n",
        ")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "while True:\n",
        "    user_input = input(\"You: \")\n",
        "    if user_input.lower() in [\"exit\", \"quit\"]:\n",
        "        break\n",
        "\n",
        "    messages = [{\"role\": \"user\", \"content\": user_input}]\n",
        "\n",
        "    stream = llm.create_chat_completion(\n",
        "        messages=messages,\n",
        "        temperature=0.7,\n",
        "        top_p=0.8,\n",
        "        top_k=20,\n",
        "        stream=True\n",
        "    )\n",
        "\n",
        "    for chunk in stream:\n",
        "        delta = chunk[\"choices\"][0][\"delta\"]\n",
        "        if \"content\" in delta:\n",
        "            print(delta[\"content\"], end=\"\", flush=True)\n",
        "    print(\"\\n\")\n"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}