Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -114,13 +114,13 @@ async def load_model():
|
|
| 114 |
print(f"Model downloaded to: {model_path}")
|
| 115 |
|
| 116 |
print("Loading model into memory...")
|
| 117 |
-
llm = Llama(
|
| 118 |
model_path=model_path,
|
| 119 |
-
n_ctx=MAX_CONTEXT,
|
| 120 |
-
n_threads=
|
| 121 |
-
n_batch=512
|
| 122 |
verbose=False,
|
| 123 |
-
n_gpu_layers=0
|
| 124 |
)
|
| 125 |
print("Model loaded successfully!")
|
| 126 |
|
|
|
|
| 114 |
print(f"Model downloaded to: {model_path}")
|
| 115 |
|
| 116 |
print("Loading model into memory...")
|
| 117 |
+
llm = Llama(
|
| 118 |
model_path=model_path,
|
| 119 |
+
n_ctx=MAX_CONTEXT,
|
| 120 |
+
n_threads=8, # Changed from 4 to 8
|
| 121 |
+
n_batch=1024, # Changed from 512 to 1024
|
| 122 |
verbose=False,
|
| 123 |
+
n_gpu_layers=0
|
| 124 |
)
|
| 125 |
print("Model loaded successfully!")
|
| 126 |
|