Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ MODEL_REPO = "Kezovic/iris-q4gguf-baseline-10k"#iris-f16gguf-test" #iris-q4gguf-
|
|
| 8 |
MODEL_FILE = "llama-3.2-1b-instruct.Q4_K_M.gguf"#llama-3.2-1b-instruct.F16.gguf"#Llama-3.2-1B-Instruct.Q4_K_M.gguf"#llama-3.2-1b-instruct.Q4_K_M.gguf"
|
| 9 |
CONTEXT_WINDOW = 2048
|
| 10 |
MAX_NEW_TOKENS = 400
|
| 11 |
-
TEMPERATURE =
|
| 12 |
|
| 13 |
# --- Model Loading ---
|
| 14 |
llm = None
|
|
@@ -21,7 +21,8 @@ def load_llm():
|
|
| 21 |
model_path=model_path,
|
| 22 |
n_ctx=CONTEXT_WINDOW,
|
| 23 |
n_threads=2,
|
| 24 |
-
verbose=False
|
|
|
|
| 25 |
)
|
| 26 |
print("Model loaded successfully!")
|
| 27 |
except Exception as e:
|
|
|
|
| 8 |
MODEL_FILE = "llama-3.2-1b-instruct.Q4_K_M.gguf"#llama-3.2-1b-instruct.F16.gguf"#Llama-3.2-1B-Instruct.Q4_K_M.gguf"#llama-3.2-1b-instruct.Q4_K_M.gguf"
|
| 9 |
CONTEXT_WINDOW = 2048
|
| 10 |
MAX_NEW_TOKENS = 400
|
| 11 |
+
TEMPERATURE = 1.5
|
| 12 |
|
| 13 |
# --- Model Loading ---
|
| 14 |
llm = None
|
|
|
|
| 21 |
model_path=model_path,
|
| 22 |
n_ctx=CONTEXT_WINDOW,
|
| 23 |
n_threads=2,
|
| 24 |
+
verbose=False,
|
| 25 |
+
min_p = 0.1
|
| 26 |
)
|
| 27 |
print("Model loaded successfully!")
|
| 28 |
except Exception as e:
|