Spaces:
Running on Zero
Running on Zero
File size: 1,512 Bytes
a987ba3 1293cc8 b033e9b 1293cc8 326fd8f 1293cc8 9e48985 1293cc8 a57f140 1293cc8 a57f140 1293cc8 a57f140 1293cc8 a57f140 1293cc8 a57f140 1293cc8 a57f140 1293cc8 a57f140 1293cc8 a57f140 1293cc8 a57f140 1293cc8 a57f140 b3dc473 1293cc8 326fd8f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | import gradio
import spaces
import psaiops.common.model
import psaiops.common.style
import psaiops.common.tokenizer
import psaiops.compose.maths.app as app
# META #########################################################################
app.MODEL = 'qwen/qwen3.5-9b'
# additional args to use when loading the model
_CONFIG = {}
# frontload the model on the CPU to avoid downloading it from the GPU slot
# psaiops.common.model.get_model(name=app.MODEL, device='cpu', **_CONFIG)
# but do not instantiate unless necessary
_MODEL = None
_TOKENIZER = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL)
# LAZY #########################################################################
def fetch_model() -> object:
global _MODEL
# control when the model is downloaded to avoid moving it to the CPU
if _MODEL is None:
_MODEL = psaiops.common.model.get_model(name=app.MODEL, device='cuda', **_CONFIG)
# tuple of objects or (None, None)
return _MODEL
def fetch_tokenizer() -> object:
global _TOKENIZER
# not strictly necessary, but symmetry is everything
if _TOKENIZER is None:
_TOKENIZER = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL)
# tuple of objects or (None, None)
return _TOKENIZER
# MAIN #########################################################################
demo = app.create_app(tabulate=app.update_table_data(tokenizer=fetch_tokenizer()))
# demo.queue()
demo.launch(theme=gradio.themes.Soft(), css=psaiops.common.style.BUTTON)
|