Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,9 +6,11 @@ app = FastAPI()
|
|
| 6 |
|
| 7 |
# Download and initialize the model when the server starts
|
| 8 |
llm = Llama.from_pretrained(
|
| 9 |
-
repo_id="Qwen/Qwen2.5-Coder-
|
| 10 |
-
filename="*q4_k_m.gguf",
|
| 11 |
-
n_ctx=2048
|
|
|
|
|
|
|
| 12 |
)
|
| 13 |
|
| 14 |
class EvalRequest(BaseModel):
|
|
|
|
| 6 |
|
| 7 |
# Download and initialize the model when the server starts
|
| 8 |
llm = Llama.from_pretrained(
|
| 9 |
+
repo_id="Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF", # Make sure this says 0.5B!
|
| 10 |
+
filename="*q4_k_m.gguf",
|
| 11 |
+
n_ctx=2048,
|
| 12 |
+
n_threads=2,
|
| 13 |
+
n_batch=512
|
| 14 |
)
|
| 15 |
|
| 16 |
class EvalRequest(BaseModel):
|