Spaces:

end-rin
/

unicode-attack-demo

Running

App Files Files Community

end-rin commited on 26 days ago

Commit

8141c38

verified ·

1 Parent(s): 1340b8f

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +28 -17

app.py CHANGED Viewed

@@ -148,9 +148,8 @@ MODELS = {
     },
 }
-# Global model cache (only keep one model loaded at a time to save memory)
-_current_model = None
-_current_model_name = None
 _llama_class = None
@@ -208,29 +207,39 @@ def transform_text(text: str, style: str) -> str:
 def load_model(model_key: str):
-    """Load a GGUF model. Unloads previous model to save memory."""
-    global _current_model, _current_model_name
-    if _current_model_name == model_key and _current_model is not None:
-        return _current_model
-    # Unload previous model
-    if _current_model is not None:
-        del _current_model
-        _current_model = None
-        _current_model_name = None
     config = MODELS[model_key]
     Llama = _get_llama_class()
-    _current_model = Llama.from_pretrained(
         repo_id=config['repo_id'],
         filename=config['filename'],
         n_ctx=256,
         n_threads=8,
         verbose=False,
     )
-    _current_model_name = model_key
-    return _current_model
 def get_prediction(model, text: str, task: str, model_key: str) -> str:
@@ -587,6 +596,8 @@ def create_demo():
 # =============================================================================
 if __name__ == "__main__":
     demo = create_demo()
     demo.queue(default_concurrency_limit=1)
     demo.launch()

     },
 }
+# Global model cache — all three models are pre-loaded at startup
+_loaded_models = {}
 _llama_class = None
 def load_model(model_key: str):
+    """Return a pre-loaded model, or load on demand as fallback."""
+    if model_key in _loaded_models:
+        return _loaded_models[model_key]
+    # Fallback: load on demand if not yet ready
     config = MODELS[model_key]
     Llama = _get_llama_class()
+    model = Llama.from_pretrained(
         repo_id=config['repo_id'],
         filename=config['filename'],
         n_ctx=256,
         n_threads=8,
         verbose=False,
     )
+    _loaded_models[model_key] = model
+    return model
+def preload_all_models():
+    """Pre-load all models at startup so switching is instant."""
+    Llama = _get_llama_class()
+    for key, config in MODELS.items():
+        if key not in _loaded_models:
+            print(f"Pre-loading {config['name']}...")
+            _loaded_models[key] = Llama.from_pretrained(
+                repo_id=config['repo_id'],
+                filename=config['filename'],
+                n_ctx=256,
+                n_threads=8,
+                verbose=False,
+            )
+            print(f"  {config['name']} ready.")
+    print("All models pre-loaded.")
 def get_prediction(model, text: str, task: str, model_key: str) -> str:
 # =============================================================================
 if __name__ == "__main__":
+    print("Starting model pre-load...")
+    preload_all_models()
     demo = create_demo()
     demo.queue(default_concurrency_limit=1)
     demo.launch()