Spaces:

apehex
/

residual-visualization

Sleeping

App Files Files Community

apehex commited on 28 days ago

Commit

0cbcd64

1 Parent(s): 952e79f

Remove useless args + enable debugging.

Browse files

Files changed (1) hide show

app.py +23 -21

app.py CHANGED Viewed

@@ -8,34 +8,35 @@ import psaiops.score.residual.app as app
 # META #########################################################################
 # frontload the model on the CPU to avoid downloading it from the GPU slot
-psaiops.common.model.get_model(name=app.MODEL, device='cpu')
 # but do not instantiate unless necessary
-_MODELS = {
-    'cpu': None,
-    'cuda': None,}
-_TOKENIZERS = {
-    'cpu': psaiops.common.tokenizer.get_tokenizer(name=app.MODEL, device='cpu'),
-    'cuda': psaiops.common.tokenizer.get_tokenizer(name=app.MODEL, device='cuda'),}
 # LAZY #########################################################################
-def fetch_model(device: str='cuda') -> object:
-    global _MODELS
     # control when the model is downloaded to avoid moving it to the CPU
-    if _MODELS.get(device, None) is None:
-        _MODELS[device] = psaiops.common.model.get_model(name=app.MODEL, device=device)
     # tuple of objects or (None, None)
-    return _MODELS.get(device, None)
-def fetch_tokenizer(device: str='cpu') -> object:
-    global _TOKENIZERS
     # not strictly necessary, but symmetry is everything
-    if _TOKENIZERS.get(device, None) is None:
-        _TOKENIZERS[device] = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL, device=device)
     # tuple of objects or (None, None)
-    return _TOKENIZERS.get(device, None)
 # EVENT HANDLERS ###############################################################
@@ -45,7 +46,7 @@ def highlight_tokens(
     output_data: object,
 ) -> list:
     # do not download the model without the GPU wrapper
-    __tokenizer = fetch_tokenizer(device='cpu')
     # fill all the arguments that cannot be pickled
     return app.update_token_focus(
         left_idx=left_idx,
@@ -61,8 +62,8 @@ def compute_states(
     prompt_str: str,
 ) -> tuple:
     # load the model and tokenizer inside the GPU wrapper
-    __model = fetch_model(device='cuda')
-    __tokenizer = fetch_tokenizer(device='cuda')
     # fill all the arguments that cannot be pickled
     return app.update_computation_state(
         token_num=token_num,
@@ -76,5 +77,6 @@ def compute_states(
 # MAIN #########################################################################
 demo = app.create_app(highlight=highlight_tokens, compute=compute_states)
 demo.queue()
-demo.launch(theme=gradio.themes.Soft(), css=psaiops.common.style.BUTTON)

 # META #########################################################################
+app.MODEL = 'qwen/qwen3.5-9b'
+# additional args to use when loading the model
+_CONFIG = {}
 # frontload the model on the CPU to avoid downloading it from the GPU slot
+psaiops.common.model.get_model(name=app.MODEL, device='cpu', **_CONFIG)
 # but do not instantiate unless necessary
+_MODEL = None
+_TOKENIZER = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL)
 # LAZY #########################################################################
+def fetch_model() -> object:
+    global _MODEL
     # control when the model is downloaded to avoid moving it to the CPU
+    if _MODEL is None:
+        _MODEL = psaiops.common.model.get_model(name=app.MODEL, device='cuda', **_CONFIG)
     # tuple of objects or (None, None)
+    return _MODEL
+def fetch_tokenizer() -> object:
+    global _TOKENIZER
     # not strictly necessary, but symmetry is everything
+    if _TOKENIZER is None:
+        _TOKENIZER = psaiops.common.tokenizer.get_tokenizer(name=app.MODEL)
     # tuple of objects or (None, None)
+    return _TOKENIZER
 # EVENT HANDLERS ###############################################################
     output_data: object,
 ) -> list:
     # do not download the model without the GPU wrapper
+    __tokenizer = fetch_tokenizer()
     # fill all the arguments that cannot be pickled
     return app.update_token_focus(
         left_idx=left_idx,
     prompt_str: str,
 ) -> tuple:
     # load the model and tokenizer inside the GPU wrapper
+    __model = fetch_model()
+    __tokenizer = fetch_tokenizer()
     # fill all the arguments that cannot be pickled
     return app.update_computation_state(
         token_num=token_num,
 # MAIN #########################################################################
 demo = app.create_app(highlight=highlight_tokens, compute=compute_states)
 demo.queue()
+demo.launch(theme=gradio.themes.Soft(), css=psaiops.common.style.ALL, debug=True)