nryadav18 commited on
Commit
da089aa
·
verified ·
1 Parent(s): 00db226

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -6,9 +6,11 @@ app = FastAPI()
6
 
7
  # Download and initialize the model when the server starts
8
  llm = Llama.from_pretrained(
9
- repo_id="Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
10
- filename="*q4_k_m.gguf", # 4-bit quantization for speed and low memory
11
- n_ctx=2048 # Context window size
 
 
12
  )
13
 
14
  class EvalRequest(BaseModel):
 
6
 
7
  # Download and initialize the model when the server starts
8
  llm = Llama.from_pretrained(
9
+ repo_id="Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF", # Make sure this says 0.5B!
10
+ filename="*q4_k_m.gguf",
11
+ n_ctx=2048,
12
+ n_threads=2,
13
+ n_batch=512
14
  )
15
 
16
  class EvalRequest(BaseModel):