import gradio import spaces import psaiops.common.model import psaiops.common.style import psaiops.common.tokenizer import psaiops.compose.maths.app as app # META ######################################################################### app.MODEL = 'qwen/qwen3.5-9b' # additional args to use when loading the model _CONFIG = {} # frontload the model on the CPU to avoid downloading it from the GPU slot # psaiops.common.model.get_model(name=app.MODEL, device='cpu', **_CONFIG) # but do not instantiate unless necessary _MODEL = None _TOKENIZER = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL) # LAZY ######################################################################### def fetch_model() -> object: global _MODEL # control when the model is downloaded to avoid moving it to the CPU if _MODEL is None: _MODEL = psaiops.common.model.get_model(name=app.MODEL, device='cuda', **_CONFIG) # tuple of objects or (None, None) return _MODEL def fetch_tokenizer() -> object: global _TOKENIZER # not strictly necessary, but symmetry is everything if _TOKENIZER is None: _TOKENIZER = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL) # tuple of objects or (None, None) return _TOKENIZER # MAIN ######################################################################### demo = app.create_app(tabulate=app.update_table_data(tokenizer=fetch_tokenizer())) # demo.queue() demo.launch(theme=gradio.themes.Soft(), css=psaiops.common.style.BUTTON)