56ad5c9 93179c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from llama_cpp import Llama llm = None # Sẽ được khởi tạo sau def load_model(): global llm if llm is None: llm = Llama( model_path="models/tinyllama.gguf", n_ctx=2048, n_threads=4, ) return llm