Spaces:

build-small-hackathon
/

Case-Lantern

Running

lastmass commited on 5 days ago

Commit

fc6d202

1 Parent(s): adff921

Add ZeroGPU support: @spaces.GPU + n_gpu_layers=-1

Files changed (3) hide show

__pycache__/app.cpython-39.pyc ADDED Viewed

Binary file (17.1 kB). View file

app.py CHANGED Viewed

@@ -211,9 +211,15 @@ def demo_reply(prompt: str, state: GameState, mode: str) -> str:
 # ---------------------------------------------------------------------------
-# Model loading — llama-cpp-python (GGUF)
 # ---------------------------------------------------------------------------
 @lru_cache(maxsize=1)
 def get_llm():
     """Load the GGUF model.  Raises RuntimeError when DEMO_MODE is forced."""
@@ -226,12 +232,13 @@ def get_llm():
         repo_id=GGUF_REPO,
         filename=GGUF_FILE,
         n_ctx=2048,
-        n_threads=2,
         verbose=False,
     )
-def call_model(messages: List[Dict[str, str]], state: GameState, fallback_mode: str) -> str:
     if DEMO_MODE in {"1", "true", "yes", "on"}:
         return demo_reply(messages[-1]["content"], state, fallback_mode)
@@ -256,6 +263,15 @@ def call_model(messages: List[Dict[str, str]], state: GameState, fallback_mode:
         )
 # ---------------------------------------------------------------------------
 # Game logic
 # ---------------------------------------------------------------------------

 # ---------------------------------------------------------------------------
+# Model loading — llama-cpp-python (GGUF) with ZeroGPU support
 # ---------------------------------------------------------------------------
+try:
+    import spaces
+    HAS_ZEROGPU = True
+except ImportError:
+    HAS_ZEROGPU = False
 @lru_cache(maxsize=1)
 def get_llm():
     """Load the GGUF model.  Raises RuntimeError when DEMO_MODE is forced."""
         repo_id=GGUF_REPO,
         filename=GGUF_FILE,
         n_ctx=2048,
+        n_threads=4,
+        n_gpu_layers=-1,  # offload all layers to GPU when available
         verbose=False,
     )
+def _call_model_inner(messages: List[Dict[str, str]], state: GameState, fallback_mode: str) -> str:
     if DEMO_MODE in {"1", "true", "yes", "on"}:
         return demo_reply(messages[-1]["content"], state, fallback_mode)
         )
+# Wrap with @spaces.GPU when ZeroGPU is available
+if HAS_ZEROGPU:
+    @spaces.GPU
+    def call_model(messages, state, fallback_mode):
+        return _call_model_inner(messages, state, fallback_mode)
+else:
+    call_model = _call_model_inner
 # ---------------------------------------------------------------------------
 # Game logic
 # ---------------------------------------------------------------------------

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 gradio==4.44.1
 llama-cpp-python==0.3.22
 huggingface_hub>=0.24.0

 gradio==4.44.1
 llama-cpp-python==0.3.22
 huggingface_hub>=0.24.0
+spaces