from llama_cpp import Llama llm = None # Sẽ được khởi tạo sau def load_model(): global llm if llm is None: llm = Llama( model_path="models/tinyllama.gguf", n_ctx=2048, n_threads=4, ) return llm