Spaces:

nryadav18
/

python-code-evaluator

Sleeping

nryadav18 commited on Feb 22

Commit

da089aa

verified ·

1 Parent(s): 00db226

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,9 +6,11 @@ app = FastAPI()
 # Download and initialize the model when the server starts
 llm = Llama.from_pretrained(
-    repo_id="Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
-    filename="*q4_k_m.gguf", # 4-bit quantization for speed and low memory
-    n_ctx=2048 # Context window size
 )
 class EvalRequest(BaseModel):

 # Download and initialize the model when the server starts
 llm = Llama.from_pretrained(
+    repo_id="Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF", # Make sure this says 0.5B!
+    filename="*q4_k_m.gguf",
+    n_ctx=2048,
+    n_threads=2,
+    n_batch=512
 )
 class EvalRequest(BaseModel):