TobDeBer commited on
Commit
a34e50c
·
1 Parent(s): 22e6bfc
Files changed (1) hide show
  1. app.py +15 -1
app.py CHANGED
@@ -5,11 +5,21 @@ import time
5
  from threading import Thread
6
  import sys
7
  import os
8
- os.environ["BNB_CUDA_VERSION"] = "0" # Forces bitsandbytes to recognize no GPU
9
  os.environ["OMP_NUM_THREADS"] = "1" # Prevents race conditions in custom CPU kernels
10
  os.environ["VECLIB_MAXIMUM_ISA"] = "AVX2"
11
  os.environ["MKL_DEBUG_CPU_TYPE"] = "5" # Forces MKL to use AVX2
12
 
 
 
 
 
 
 
 
 
 
 
13
  # Model configuration
14
  if len(sys.argv) > 1 and os.path.exists(sys.argv[1]):
15
  MODEL_NAME = sys.argv[1]
@@ -67,6 +77,7 @@ def load_model():
67
  except Exception as e:
68
  return f"❌ Error loading model: {str(e)}"
69
 
 
70
  def chat_predict(message, history, max_length, temperature, top_p, repetition_penalty, system_prompt):
71
  """Generate text using the loaded model with streaming and history"""
72
  global model, tokenizer
@@ -76,6 +87,9 @@ def chat_predict(message, history, max_length, temperature, top_p, repetition_pe
76
  return
77
 
78
  try:
 
 
 
79
  # Prepare messages for chat template
80
  messages = []
81
  if system_prompt:
 
5
  from threading import Thread
6
  import sys
7
  import os
8
+ # os.environ["BNB_CUDA_VERSION"] = "0" # Forces bitsandbytes to recognize no GPU
9
  os.environ["OMP_NUM_THREADS"] = "1" # Prevents race conditions in custom CPU kernels
10
  os.environ["VECLIB_MAXIMUM_ISA"] = "AVX2"
11
  os.environ["MKL_DEBUG_CPU_TYPE"] = "5" # Forces MKL to use AVX2
12
 
13
+ try:
14
+ import spaces
15
+ except ImportError:
16
+ spaces = None
17
+
18
+ def gpu_decorator(func):
19
+ if spaces:
20
+ return spaces.GPU(func)
21
+ return func
22
+
23
  # Model configuration
24
  if len(sys.argv) > 1 and os.path.exists(sys.argv[1]):
25
  MODEL_NAME = sys.argv[1]
 
77
  except Exception as e:
78
  return f"❌ Error loading model: {str(e)}"
79
 
80
+ @gpu_decorator
81
  def chat_predict(message, history, max_length, temperature, top_p, repetition_penalty, system_prompt):
82
  """Generate text using the loaded model with streaming and history"""
83
  global model, tokenizer
 
87
  return
88
 
89
  try:
90
+ if torch.cuda.is_available():
91
+ model.to("cuda")
92
+
93
  # Prepare messages for chat template
94
  messages = []
95
  if system_prompt: