File size: 2,660 Bytes
c2a74d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "!pip install llama-cpp-python huggingface_hub --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from huggingface_hub import HfApi\n",
        "from llama_cpp import Llama\n",
        "import os\n",
        "\n",
        "REPO_ID = \"darkai-1/darkit-v2.5\"\n",
        "api = HfApi()\n",
        "\n",
        "files = api.list_repo_files(REPO_ID)\n",
        "gguf_files = [f for f in files if f.endswith(\".gguf\")]\n",
        "\n",
        "for i, f in enumerate(gguf_files):\n",
        "    print(f\"[{i}] {f}\")\n",
        "\n",
        "choice = int(input(\"Select model number: \"))\n",
        "filename = gguf_files[choice]\n",
        "\n",
        "llm = Llama.from_pretrained(\n",
        "    repo_id=REPO_ID,\n",
        "    filename=filename,\n",
        "    n_ctx=2048,\n",
        "    n_batch=128,\n",
        "    n_ubatch=128,\n",
        "    n_threads=os.cpu_count() or 4,\n",
        "    n_threads_batch=os.cpu_count() or 4,\n",
        "    n_gpu_layers=-1,\n",
        "    verbose=False,\n",
        "    no_perf=True,\n",
        ")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "llm.set_cache(None)\n",
        "\n",
        "PROMPT = \"Hi how are you?\"\n",
        "\n",
        "stream = llm(\n",
        "    f\"<|im_start|>user\\n{PROMPT}<|im_end|>\\n<|im_start|>assistant\\n\",\n",
        "    max_tokens=128,\n",
        "    temperature=0.7,\n",
        "    top_p=0.8,\n",
        "    top_k=20,\n",
        "    stream=True,\n",
        "    stop=[\n",
        "        \"<|im_end|>\",\n",
        "        \"<|im_start|>\",\n",
        "        \"\\n\\nUser:\",\n",
        "        \"\\n\\nAssistant:\"\n",
        "    ],\n",
        "    echo=False\n",
        ")\n",
        "\n",
        "for chunk in stream:\n",
        "    text = chunk[\"choices\"][0][\"text\"]\n",
        "\n",
        "    if text:\n",
        "        print(text, end=\"\", flush=True)\n",
        "\n",
        "print()\n"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}