apehex commited on
Commit
0cbcd64
·
1 Parent(s): 952e79f

Remove useless args + enable debugging.

Browse files
Files changed (1) hide show
  1. app.py +23 -21
app.py CHANGED
@@ -8,34 +8,35 @@ import psaiops.score.residual.app as app
8
 
9
  # META #########################################################################
10
 
 
 
 
 
 
11
  # frontload the model on the CPU to avoid downloading it from the GPU slot
12
- psaiops.common.model.get_model(name=app.MODEL, device='cpu')
13
 
14
  # but do not instantiate unless necessary
15
- _MODELS = {
16
- 'cpu': None,
17
- 'cuda': None,}
18
- _TOKENIZERS = {
19
- 'cpu': psaiops.common.tokenizer.get_tokenizer(name=app.MODEL, device='cpu'),
20
- 'cuda': psaiops.common.tokenizer.get_tokenizer(name=app.MODEL, device='cuda'),}
21
 
22
  # LAZY #########################################################################
23
 
24
- def fetch_model(device: str='cuda') -> object:
25
- global _MODELS
26
  # control when the model is downloaded to avoid moving it to the CPU
27
- if _MODELS.get(device, None) is None:
28
- _MODELS[device] = psaiops.common.model.get_model(name=app.MODEL, device=device)
29
  # tuple of objects or (None, None)
30
- return _MODELS.get(device, None)
31
 
32
- def fetch_tokenizer(device: str='cpu') -> object:
33
- global _TOKENIZERS
34
  # not strictly necessary, but symmetry is everything
35
- if _TOKENIZERS.get(device, None) is None:
36
- _TOKENIZERS[device] = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL, device=device)
37
  # tuple of objects or (None, None)
38
- return _TOKENIZERS.get(device, None)
39
 
40
  # EVENT HANDLERS ###############################################################
41
 
@@ -45,7 +46,7 @@ def highlight_tokens(
45
  output_data: object,
46
  ) -> list:
47
  # do not download the model without the GPU wrapper
48
- __tokenizer = fetch_tokenizer(device='cpu')
49
  # fill all the arguments that cannot be pickled
50
  return app.update_token_focus(
51
  left_idx=left_idx,
@@ -61,8 +62,8 @@ def compute_states(
61
  prompt_str: str,
62
  ) -> tuple:
63
  # load the model and tokenizer inside the GPU wrapper
64
- __model = fetch_model(device='cuda')
65
- __tokenizer = fetch_tokenizer(device='cuda')
66
  # fill all the arguments that cannot be pickled
67
  return app.update_computation_state(
68
  token_num=token_num,
@@ -76,5 +77,6 @@ def compute_states(
76
  # MAIN #########################################################################
77
 
78
  demo = app.create_app(highlight=highlight_tokens, compute=compute_states)
 
79
  demo.queue()
80
- demo.launch(theme=gradio.themes.Soft(), css=psaiops.common.style.BUTTON)
 
8
 
9
  # META #########################################################################
10
 
11
+ app.MODEL = 'qwen/qwen3.5-9b'
12
+
13
+ # additional args to use when loading the model
14
+ _CONFIG = {}
15
+
16
  # frontload the model on the CPU to avoid downloading it from the GPU slot
17
+ psaiops.common.model.get_model(name=app.MODEL, device='cpu', **_CONFIG)
18
 
19
  # but do not instantiate unless necessary
20
+ _MODEL = None
21
+ _TOKENIZER = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL)
 
 
 
 
22
 
23
  # LAZY #########################################################################
24
 
25
+ def fetch_model() -> object:
26
+ global _MODEL
27
  # control when the model is downloaded to avoid moving it to the CPU
28
+ if _MODEL is None:
29
+ _MODEL = psaiops.common.model.get_model(name=app.MODEL, device='cuda', **_CONFIG)
30
  # tuple of objects or (None, None)
31
+ return _MODEL
32
 
33
+ def fetch_tokenizer() -> object:
34
+ global _TOKENIZER
35
  # not strictly necessary, but symmetry is everything
36
+ if _TOKENIZER is None:
37
+ _TOKENIZER = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL)
38
  # tuple of objects or (None, None)
39
+ return _TOKENIZER
40
 
41
  # EVENT HANDLERS ###############################################################
42
 
 
46
  output_data: object,
47
  ) -> list:
48
  # do not download the model without the GPU wrapper
49
+ __tokenizer = fetch_tokenizer()
50
  # fill all the arguments that cannot be pickled
51
  return app.update_token_focus(
52
  left_idx=left_idx,
 
62
  prompt_str: str,
63
  ) -> tuple:
64
  # load the model and tokenizer inside the GPU wrapper
65
+ __model = fetch_model()
66
+ __tokenizer = fetch_tokenizer()
67
  # fill all the arguments that cannot be pickled
68
  return app.update_computation_state(
69
  token_num=token_num,
 
77
  # MAIN #########################################################################
78
 
79
  demo = app.create_app(highlight=highlight_tokens, compute=compute_states)
80
+
81
  demo.queue()
82
+ demo.launch(theme=gradio.themes.Soft(), css=psaiops.common.style.ALL, debug=True)