Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,8 +16,8 @@ from concurrent.futures import ThreadPoolExecutor
|
|
| 16 |
app = Flask(__name__)
|
| 17 |
logging.basicConfig(level=logging.INFO)
|
| 18 |
|
| 19 |
-
MAX_CONTEXT_TOKENS = 1024 *
|
| 20 |
-
MAX_GENERATION_TOKENS = 1024 *
|
| 21 |
|
| 22 |
with open('engines.json', 'r') as f:
|
| 23 |
MODELS = json.load(f)
|
|
@@ -57,7 +57,7 @@ class LLMManager:
|
|
| 57 |
n_threads_batch=2,
|
| 58 |
use_mlock=True,
|
| 59 |
mmap=True,
|
| 60 |
-
low_vram=
|
| 61 |
vocab_only=False,
|
| 62 |
verbose=False,
|
| 63 |
logits_all=False,
|
|
|
|
| 16 |
app = Flask(__name__)
|
| 17 |
logging.basicConfig(level=logging.INFO)
|
| 18 |
|
| 19 |
+
MAX_CONTEXT_TOKENS = 1024 * 6
|
| 20 |
+
MAX_GENERATION_TOKENS = 1024 * 4
|
| 21 |
|
| 22 |
with open('engines.json', 'r') as f:
|
| 23 |
MODELS = json.load(f)
|
|
|
|
| 57 |
n_threads_batch=2,
|
| 58 |
use_mlock=True,
|
| 59 |
mmap=True,
|
| 60 |
+
low_vram=True,
|
| 61 |
vocab_only=False,
|
| 62 |
verbose=False,
|
| 63 |
logits_all=False,
|