lastmass commited on
Commit
fc6d202
·
1 Parent(s): adff921

Add ZeroGPU support: @spaces.GPU + n_gpu_layers=-1

Browse files
Files changed (3) hide show
  1. __pycache__/app.cpython-39.pyc +0 -0
  2. app.py +19 -3
  3. requirements.txt +1 -0
__pycache__/app.cpython-39.pyc ADDED
Binary file (17.1 kB). View file
 
app.py CHANGED
@@ -211,9 +211,15 @@ def demo_reply(prompt: str, state: GameState, mode: str) -> str:
211
 
212
 
213
  # ---------------------------------------------------------------------------
214
- # Model loading — llama-cpp-python (GGUF)
215
  # ---------------------------------------------------------------------------
216
 
 
 
 
 
 
 
217
  @lru_cache(maxsize=1)
218
  def get_llm():
219
  """Load the GGUF model. Raises RuntimeError when DEMO_MODE is forced."""
@@ -226,12 +232,13 @@ def get_llm():
226
  repo_id=GGUF_REPO,
227
  filename=GGUF_FILE,
228
  n_ctx=2048,
229
- n_threads=2,
 
230
  verbose=False,
231
  )
232
 
233
 
234
- def call_model(messages: List[Dict[str, str]], state: GameState, fallback_mode: str) -> str:
235
  if DEMO_MODE in {"1", "true", "yes", "on"}:
236
  return demo_reply(messages[-1]["content"], state, fallback_mode)
237
 
@@ -256,6 +263,15 @@ def call_model(messages: List[Dict[str, str]], state: GameState, fallback_mode:
256
  )
257
 
258
 
 
 
 
 
 
 
 
 
 
259
  # ---------------------------------------------------------------------------
260
  # Game logic
261
  # ---------------------------------------------------------------------------
 
211
 
212
 
213
  # ---------------------------------------------------------------------------
214
+ # Model loading — llama-cpp-python (GGUF) with ZeroGPU support
215
  # ---------------------------------------------------------------------------
216
 
217
+ try:
218
+ import spaces
219
+ HAS_ZEROGPU = True
220
+ except ImportError:
221
+ HAS_ZEROGPU = False
222
+
223
  @lru_cache(maxsize=1)
224
  def get_llm():
225
  """Load the GGUF model. Raises RuntimeError when DEMO_MODE is forced."""
 
232
  repo_id=GGUF_REPO,
233
  filename=GGUF_FILE,
234
  n_ctx=2048,
235
+ n_threads=4,
236
+ n_gpu_layers=-1, # offload all layers to GPU when available
237
  verbose=False,
238
  )
239
 
240
 
241
+ def _call_model_inner(messages: List[Dict[str, str]], state: GameState, fallback_mode: str) -> str:
242
  if DEMO_MODE in {"1", "true", "yes", "on"}:
243
  return demo_reply(messages[-1]["content"], state, fallback_mode)
244
 
 
263
  )
264
 
265
 
266
+ # Wrap with @spaces.GPU when ZeroGPU is available
267
+ if HAS_ZEROGPU:
268
+ @spaces.GPU
269
+ def call_model(messages, state, fallback_mode):
270
+ return _call_model_inner(messages, state, fallback_mode)
271
+ else:
272
+ call_model = _call_model_inner
273
+
274
+
275
  # ---------------------------------------------------------------------------
276
  # Game logic
277
  # ---------------------------------------------------------------------------
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  gradio==4.44.1
2
  llama-cpp-python==0.3.22
3
  huggingface_hub>=0.24.0
 
 
1
  gradio==4.44.1
2
  llama-cpp-python==0.3.22
3
  huggingface_hub>=0.24.0
4
+ spaces