Spaces:
Running
Running
Commit ·
0a9f3b7
1
Parent(s): fd363ed
Upgraded model to Qwen-30B-Q3-K-XL
Browse files
app.py
CHANGED
|
@@ -3,6 +3,9 @@
|
|
| 3 |
|
| 4 |
To do
|
| 5 |
- create the custom gradio look
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
"""
|
| 8 |
from huggingface_hub import hf_hub_download
|
|
@@ -17,14 +20,14 @@ import re # remove thinking tag from response
|
|
| 17 |
# ----- Get Model ----- #
|
| 18 |
# Download Q4_K_M GGUF file from the repo
|
| 19 |
model_path = hf_hub_download(
|
| 20 |
-
repo_id="
|
| 21 |
-
filename="
|
| 22 |
)
|
| 23 |
|
| 24 |
# Initialize llama.cpp with the local cached path
|
| 25 |
llm = Llama(
|
| 26 |
model_path=model_path,
|
| 27 |
-
n_ctx=
|
| 28 |
n_threads=2
|
| 29 |
)
|
| 30 |
|
|
|
|
| 3 |
|
| 4 |
To do
|
| 5 |
- create the custom gradio look
|
| 6 |
+
- explore making it look better
|
| 7 |
+
- get a better model — Qwen 30b coder
|
| 8 |
+
- use zerogpu
|
| 9 |
|
| 10 |
"""
|
| 11 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 20 |
# ----- Get Model ----- #
|
| 21 |
# Download Q4_K_M GGUF file from the repo
|
| 22 |
model_path = hf_hub_download(
|
| 23 |
+
repo_id="unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
|
| 24 |
+
filename="Qwen3-Coder-30B-A3B-Instruct-UD-Q3_K_XL.gguf" # fallback: Q2_K_XL
|
| 25 |
)
|
| 26 |
|
| 27 |
# Initialize llama.cpp with the local cached path
|
| 28 |
llm = Llama(
|
| 29 |
model_path=model_path,
|
| 30 |
+
n_ctx=4096,
|
| 31 |
n_threads=2
|
| 32 |
)
|
| 33 |
|