File size: 2,660 Bytes
446a244
 
540809c
446a244
 
 
 
 
bc95a68
446a244
 
bc95a68
 
 
 
 
 
 
5403104
 
bc95a68
7cd419b
bc95a68
 
 
 
 
 
 
 
91f1244
bc95a68
 
446a244
5403104
bc95a68
91f1244
 
 
 
 
 
 
 
446a244
 
 
 
 
 
 
 
 
91f1244
 
0def099
91f1244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc95a68
91f1244
 
446a244
91f1244
 
446a244
91f1244
446a244
 
 
 
 
 
 
 
540809c
446a244
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "!pip install llama-cpp-python huggingface_hub --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from huggingface_hub import HfApi\n",
        "from llama_cpp import Llama\n",
        "import os\n",
        "\n",
        "REPO_ID = \"darkai-1/darkit-v2.0\"\n",
        "api = HfApi()\n",
        "\n",
        "files = api.list_repo_files(REPO_ID)\n",
        "gguf_files = [f for f in files if f.endswith(\".gguf\")]\n",
        "\n",
        "for i, f in enumerate(gguf_files):\n",
        "    print(f\"[{i}] {f}\")\n",
        "\n",
        "choice = int(input(\"Select model number: \"))\n",
        "filename = gguf_files[choice]\n",
        "\n",
        "llm = Llama.from_pretrained(\n",
        "    repo_id=REPO_ID,\n",
        "    filename=filename,\n",
        "    n_ctx=2048,\n",
        "    n_batch=128,\n",
        "    n_ubatch=128,\n",
        "    n_threads=os.cpu_count() or 4,\n",
        "    n_threads_batch=os.cpu_count() or 4,\n",
        "    n_gpu_layers=-1,\n",
        "    verbose=False,\n",
        "    no_perf=True,\n",
        ")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "llm.set_cache(None)\n",
        "\n",
        "PROMPT = \"Hi how are you?\"\n",
        "\n",
        "stream = llm(\n",
        "    f\"<|im_start|>user\\n{PROMPT}<|im_end|>\\n<|im_start|>assistant\\n\",\n",
        "    max_tokens=128,\n",
        "    temperature=0.7,\n",
        "    top_p=0.8,\n",
        "    top_k=20,\n",
        "    stream=True,\n",
        "    stop=[\n",
        "        \"<|im_end|>\",\n",
        "        \"<|im_start|>\",\n",
        "        \"\\n\\nUser:\",\n",
        "        \"\\n\\nAssistant:\"\n",
        "    ],\n",
        "    echo=False\n",
        ")\n",
        "\n",
        "for chunk in stream:\n",
        "    text = chunk[\"choices\"][0][\"text\"]\n",
        "\n",
        "    if text:\n",
        "        print(text, end=\"\", flush=True)\n",
        "\n",
        "print()\n"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}