Spaces:

TobDeBer
/

Granite4MicroCPU

Sleeping

TobDeBer commited on Sep 3, 2025

Commit

1ae9a3e

verified ·

1 Parent(s): 93d5980

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,9 +15,9 @@ today_date = datetime.today().strftime("%B %-d, %Y")  # noqa: DTZ002
 SYS_PROMPT = f"""Today's Date: {today_date}.
 You are Granite, developed by IBM. You are a helpful AI assistant"""
-TITLE = "IBM Granite 3.1 3b a800 MoE Instruct from local GGUF server"
 DESCRIPTION = """
-<p>Granite 3.1 3b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
 <span class="gr_docs_link">
 <a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
 </span>
@@ -30,16 +30,30 @@ TOP_P = 0.85
 TOP_K = 50
 REPETITION_PENALTY = 1.05
-# download GGUF into local directory
 gguf_path = hf_hub_download(
-            repo_id="bartowski/granite-3.1-3b-a800m-instruct-GGUF",
-            filename="granite-3.1-3b-a800m-instruct-Q8_0.gguf",
             local_dir="."
-        )
 # start llama-server
-subprocess.run(["chmod", "+x", "llama-server"])
-command = ["./llama-server", "-m", "granite-3.1-3b-a800m-instruct-Q8_0.gguf", "-ngl", "0", "--temp", "0.0", "-c", "2048", "-t", "8", "--port", "8081"]
 process = subprocess.Popen(command)
 print(f"Llama-server process started with PID {process.pid}")

 SYS_PROMPT = f"""Today's Date: {today_date}.
 You are Granite, developed by IBM. You are a helpful AI assistant"""
+TITLE = "IBM Granite 4 Tiny Preview served from local GGUF server"
 DESCRIPTION = """
+<p>Granite 4 Tiny is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
 <span class="gr_docs_link">
 <a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
 </span>
 TOP_K = 50
 REPETITION_PENALTY = 1.05
+# TODO: determine platform: CUDA or CPU
+platform = "CPU"
+# login to HF with space secret and download gguf and executable
+# huggingface-cli login --token hf_xyzxyz
+gguf_name = "SmartQuant-granite-3.3-8b-instruct.gguf"
 gguf_path = hf_hub_download(
+            repo_id="TobDeBer/SmartQuant",
+            filename=gguf_name,
+            local_dir="."
+)
+# TODO: set exe_name depending on platform
+exe_name = "llama-server-6343-cuda"
+exe_path = hf_hub_download(
+            repo_id="TobDeBer/SmartQuant",
+            filename=exe_name,
             local_dir="."
+)
 # start llama-server
+subprocess.run(["chmod", "+x", exe_name])
+command = ["./"+exe_name, "-m", gguf_name, "--temp", "0.0", "-c", "2048", "-t", "8", "--port", "8081"]
 process = subprocess.Popen(command)
 print(f"Llama-server process started with PID {process.pid}")