Spaces:

TobDeBer
/

SmolTransform

Running on Zero

TobDeBer commited on Dec 25, 2025

Commit

a34e50c

1 Parent(s): 22e6bfc

zero

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,11 +5,21 @@ import time
 from threading import Thread
 import sys
 import os
-os.environ["BNB_CUDA_VERSION"] = "0" # Forces bitsandbytes to recognize no GPU
 os.environ["OMP_NUM_THREADS"] = "1" # Prevents race conditions in custom CPU kernels
 os.environ["VECLIB_MAXIMUM_ISA"] = "AVX2"
 os.environ["MKL_DEBUG_CPU_TYPE"] = "5" # Forces MKL to use AVX2
 # Model configuration
 if len(sys.argv) > 1 and os.path.exists(sys.argv[1]):
     MODEL_NAME = sys.argv[1]
@@ -67,6 +77,7 @@ def load_model():
     except Exception as e:
         return f"❌ Error loading model: {str(e)}"
 def chat_predict(message, history, max_length, temperature, top_p, repetition_penalty, system_prompt):
     """Generate text using the loaded model with streaming and history"""
     global model, tokenizer
@@ -76,6 +87,9 @@ def chat_predict(message, history, max_length, temperature, top_p, repetition_pe
         return
     try:
         # Prepare messages for chat template
         messages = []
         if system_prompt:

 from threading import Thread
 import sys
 import os
+# os.environ["BNB_CUDA_VERSION"] = "0" # Forces bitsandbytes to recognize no GPU
 os.environ["OMP_NUM_THREADS"] = "1" # Prevents race conditions in custom CPU kernels
 os.environ["VECLIB_MAXIMUM_ISA"] = "AVX2"
 os.environ["MKL_DEBUG_CPU_TYPE"] = "5" # Forces MKL to use AVX2
+try:
+    import spaces
+except ImportError:
+    spaces = None
+def gpu_decorator(func):
+    if spaces:
+        return spaces.GPU(func)
+    return func
 # Model configuration
 if len(sys.argv) > 1 and os.path.exists(sys.argv[1]):
     MODEL_NAME = sys.argv[1]
     except Exception as e:
         return f"❌ Error loading model: {str(e)}"
+@gpu_decorator
 def chat_predict(message, history, max_length, temperature, top_p, repetition_penalty, system_prompt):
     """Generate text using the loaded model with streaming and history"""
     global model, tokenizer
         return
     try:
+        if torch.cuda.is_available():
+            model.to("cuda")
         # Prepare messages for chat template
         messages = []
         if system_prompt: