Spaces:
Running
Running
Tobias Bergmann commited on
Commit ·
53cb438
1
Parent(s): 2e11c33
start server
Browse files
app.py
CHANGED
|
@@ -8,24 +8,22 @@ from typing import Iterator, List, Dict
|
|
| 8 |
|
| 9 |
import requests
|
| 10 |
import json
|
|
|
|
| 11 |
import gradio as gr
|
| 12 |
|
| 13 |
today_date = datetime.today().strftime("%B %-d, %Y") # noqa: DTZ002
|
| 14 |
|
| 15 |
-
SYS_PROMPT = f"""
|
| 16 |
-
Today's Date: {today_date}.
|
| 17 |
You are Granite, developed by IBM. You are a helpful AI assistant"""
|
| 18 |
-
TITLE = "IBM Granite 3.1
|
| 19 |
DESCRIPTION = """
|
| 20 |
-
<p>Granite 3.1
|
| 21 |
-
or enter your own. Keep in mind that AI can occasionally make mistakes.
|
| 22 |
<span class="gr_docs_link">
|
| 23 |
<a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
|
| 24 |
</span>
|
| 25 |
</p>
|
| 26 |
"""
|
| 27 |
LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
|
| 28 |
-
MAX_INPUT_TOKEN_LENGTH = 128_000
|
| 29 |
MAX_NEW_TOKENS = 1024
|
| 30 |
TEMPERATURE = 0.7
|
| 31 |
TOP_P = 0.85
|
|
@@ -39,9 +37,12 @@ gguf_path = hf_hub_download(
|
|
| 39 |
local_dir="."
|
| 40 |
)
|
| 41 |
|
| 42 |
-
#
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
def generate(
|
| 47 |
message: str,
|
|
|
|
| 8 |
|
| 9 |
import requests
|
| 10 |
import json
|
| 11 |
+
import subprocess
|
| 12 |
import gradio as gr
|
| 13 |
|
| 14 |
today_date = datetime.today().strftime("%B %-d, %Y") # noqa: DTZ002
|
| 15 |
|
| 16 |
+
SYS_PROMPT = f"""Today's Date: {today_date}.
|
|
|
|
| 17 |
You are Granite, developed by IBM. You are a helpful AI assistant"""
|
| 18 |
+
TITLE = "IBM Granite 3.1 3b a800 MoE Instruct from local GGUF server"
|
| 19 |
DESCRIPTION = """
|
| 20 |
+
<p>Granite 3.1 3b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
|
|
|
|
| 21 |
<span class="gr_docs_link">
|
| 22 |
<a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
|
| 23 |
</span>
|
| 24 |
</p>
|
| 25 |
"""
|
| 26 |
LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
|
|
|
|
| 27 |
MAX_NEW_TOKENS = 1024
|
| 28 |
TEMPERATURE = 0.7
|
| 29 |
TOP_P = 0.85
|
|
|
|
| 37 |
local_dir="."
|
| 38 |
)
|
| 39 |
|
| 40 |
+
# start llama-server
|
| 41 |
+
subprocess.run(["chmod", "+x", "llama-server"])
|
| 42 |
+
command = ["./llama-server", "-m", "granite-3.1-3b-a800m-instruct-Q8_0.gguf", "-ngl", "0", "--temp", "0.0", "-c", "2048", "-t", "8", "--port", "8081"]
|
| 43 |
+
process = subprocess.Popen(command)
|
| 44 |
+
print(f"Llama-server process started with PID {process.pid}")
|
| 45 |
+
|
| 46 |
|
| 47 |
def generate(
|
| 48 |
message: str,
|