File size: 1,512 Bytes
a987ba3
1293cc8
b033e9b
1293cc8
326fd8f
1293cc8
9e48985
1293cc8
 
 
a57f140
 
 
 
 
1293cc8
a57f140
1293cc8
 
a57f140
 
1293cc8
 
 
a57f140
 
1293cc8
a57f140
 
1293cc8
a57f140
1293cc8
a57f140
 
1293cc8
a57f140
 
1293cc8
a57f140
1293cc8
 
 
a57f140
b3dc473
1293cc8
326fd8f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio
import spaces

import psaiops.common.model
import psaiops.common.style
import psaiops.common.tokenizer
import psaiops.compose.maths.app as app

# META #########################################################################

app.MODEL = 'qwen/qwen3.5-9b'

# additional args to use when loading the model
_CONFIG = {}

# frontload the model on the CPU to avoid downloading it from the GPU slot
# psaiops.common.model.get_model(name=app.MODEL, device='cpu', **_CONFIG)

# but do not instantiate unless necessary
_MODEL = None
_TOKENIZER = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL)

# LAZY #########################################################################

def fetch_model() -> object:
    global _MODEL
    # control when the model is downloaded to avoid moving it to the CPU
    if _MODEL is None:
        _MODEL = psaiops.common.model.get_model(name=app.MODEL, device='cuda', **_CONFIG)
    # tuple of objects or (None, None)
    return _MODEL

def fetch_tokenizer() -> object:
    global _TOKENIZER
    # not strictly necessary, but symmetry is everything
    if _TOKENIZER is None:
        _TOKENIZER = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL)
    # tuple of objects or (None, None)
    return _TOKENIZER

# MAIN #########################################################################

demo = app.create_app(tabulate=app.update_table_data(tokenizer=fetch_tokenizer()))

# demo.queue()
demo.launch(theme=gradio.themes.Soft(), css=psaiops.common.style.BUTTON)