Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,9 @@ import os
|
|
| 3 |
import logging
|
| 4 |
import threading
|
| 5 |
import time
|
|
|
|
|
|
|
|
|
|
| 6 |
from llama_cpp import Llama
|
| 7 |
import requests
|
| 8 |
import tempfile
|
|
@@ -14,7 +17,7 @@ app = Flask(__name__)
|
|
| 14 |
logging.basicConfig(level=logging.INFO)
|
| 15 |
|
| 16 |
MAX_CONTEXT_TOKENS = 1024 * 8
|
| 17 |
-
MAX_GENERATION_TOKENS = 1024 *
|
| 18 |
|
| 19 |
with open('engines.json', 'r') as f:
|
| 20 |
MODELS = json.load(f)
|
|
|
|
| 3 |
import logging
|
| 4 |
import threading
|
| 5 |
import time
|
| 6 |
+
import subprocess
|
| 7 |
+
import sys
|
| 8 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "llama-cpp-python==0.3.15"])
|
| 9 |
from llama_cpp import Llama
|
| 10 |
import requests
|
| 11 |
import tempfile
|
|
|
|
| 17 |
logging.basicConfig(level=logging.INFO)
|
| 18 |
|
| 19 |
MAX_CONTEXT_TOKENS = 1024 * 8
|
| 20 |
+
MAX_GENERATION_TOKENS = 1024 * 8
|
| 21 |
|
| 22 |
with open('engines.json', 'r') as f:
|
| 23 |
MODELS = json.load(f)
|