File size: 2,633 Bytes
446a244
 
540809c
446a244
 
 
 
 
bc95a68
446a244
 
bc95a68
 
 
 
 
 
 
5403104
 
bc95a68
5403104
bc95a68
5403104
bc95a68
 
5403104
bc95a68
 
 
 
 
 
 
5403104
bc95a68
 
5403104
446a244
 
5403104
bc95a68
d4ec88a
 
ea04ec0
9fb0db7
446a244
 
 
 
 
 
 
 
 
bc95a68
 
 
 
 
 
446a244
bc95a68
 
 
 
 
 
 
446a244
bc95a68
 
 
 
 
446a244
 
 
 
 
 
 
 
540809c
446a244
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "!pip install llama-cpp-python huggingface_hub --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from huggingface_hub import HfApi\n",
        "from llama_cpp import Llama\n",
        "import os\n",
        "\n",
        "os.environ[\"LLAMA_CPP_LOG_LEVEL\"] = \"ERROR\"\n",
        "\n",
        "REPO_ID = \"darkai-1/darkit\"\n",
        "api = HfApi()\n",
        "\n",
        "# list gguf files\n",
        "files = api.list_repo_files(REPO_ID)\n",
        "gguf_files = [f for f in files if f.endswith(\".gguf\")]\n",
        "\n",
        "print(\"Available models:\\n\")\n",
        "for i, f in enumerate(gguf_files):\n",
        "    print(f\"[{i}] {f}\")\n",
        "\n",
        "choice = int(input(\"\\nSelect model number: \"))\n",
        "filename = gguf_files[choice]\n",
        "\n",
        "print(\"Loading model:\", filename)\n",
        "\n",
        "llm = Llama.from_pretrained(\n",
        "    repo_id=REPO_ID,\n",
        "    filename=filename,\n",
        "    n_ctx=8192,\n",
        "    n_threads=2,\n",
        "    n_gpu_layers=1,\n",
        "    verbose=False\n",
        ")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "while True:\n",
        "    user_input = input(\"You: \")\n",
        "    if user_input.lower() in [\"exit\", \"quit\"]:\n",
        "        break\n",
        "\n",
        "    messages = [{\"role\": \"user\", \"content\": user_input}]\n",
        "\n",
        "    stream = llm.create_chat_completion(\n",
        "        messages=messages,\n",
        "        temperature=0.7,\n",
        "        top_p=0.8,\n",
        "        top_k=20,\n",
        "        stream=True\n",
        "    )\n",
        "\n",
        "    for chunk in stream:\n",
        "        delta = chunk[\"choices\"][0][\"delta\"]\n",
        "        if \"content\" in delta:\n",
        "            print(delta[\"content\"], end=\"\", flush=True)\n",
        "    print(\"\\n\")\n"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}