Rishi-Jain-27 commited on
Commit
0a9f3b7
·
1 Parent(s): fd363ed

Upgraded model to Qwen-30B-Q3-K-XL

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -3,6 +3,9 @@
3
 
4
  To do
5
  - create the custom gradio look
 
 
 
6
 
7
  """
8
  from huggingface_hub import hf_hub_download
@@ -17,14 +20,14 @@ import re # remove thinking tag from response
17
  # ----- Get Model ----- #
18
  # Download Q4_K_M GGUF file from the repo
19
  model_path = hf_hub_download(
20
- repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
21
- filename="qwen2.5-coder-7b-instruct-q4_k_m.gguf"
22
  )
23
 
24
  # Initialize llama.cpp with the local cached path
25
  llm = Llama(
26
  model_path=model_path,
27
- n_ctx=2048,
28
  n_threads=2
29
  )
30
 
 
3
 
4
  To do
5
  - create the custom gradio look
6
+ - explore making it look better
7
+ - get a better model — Qwen 30b coder
8
+ - use zerogpu
9
 
10
  """
11
  from huggingface_hub import hf_hub_download
 
20
  # ----- Get Model ----- #
21
  # Download Q4_K_M GGUF file from the repo
22
  model_path = hf_hub_download(
23
+ repo_id="unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
24
+ filename="Qwen3-Coder-30B-A3B-Instruct-UD-Q3_K_XL.gguf" # fallback: Q2_K_XL
25
  )
26
 
27
  # Initialize llama.cpp with the local cached path
28
  llm = Llama(
29
  model_path=model_path,
30
+ n_ctx=4096,
31
  n_threads=2
32
  )
33