R-Kentaren commited on
Commit
6861394
·
verified ·
1 Parent(s): ada8101

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -5
app.py CHANGED
@@ -11,7 +11,6 @@ import torch
11
  from transformers import pipeline, TextIteratorStreamer, StoppingCriteria
12
  from transformers import AutoTokenizer
13
  from ddgs import DDGS
14
- import spaces # Import spaces early to enable ZeroGPU support
15
  from torch.utils._pytree import tree_map
16
  from config import *
17
  # Global event to signal cancellation from the UI thread to the generation thread
@@ -19,9 +18,6 @@ cancel_event = threading.Event()
19
 
20
  access_token=os.environ['HF_TOKEN']
21
 
22
- # Optional: Disable GPU visibility if you wish to force CPU usage
23
- # os.environ["CUDA_VISIBLE_DEVICES"] = ""
24
-
25
 
26
 
27
  # Global cache for pipelines to avoid re-loading.
@@ -109,7 +105,7 @@ def get_duration(user_msg, chat_history, system_prompt, enable_search, max_resul
109
 
110
  return base_duration + token_duration + search_duration + aot_compilation_buffer
111
 
112
- @spaces.GPU(duration=get_duration)
113
  def chat_response(user_msg, chat_history, system_prompt,
114
  enable_search, max_results, max_chars,
115
  model_name, max_tokens, temperature,
 
11
  from transformers import pipeline, TextIteratorStreamer, StoppingCriteria
12
  from transformers import AutoTokenizer
13
  from ddgs import DDGS
 
14
  from torch.utils._pytree import tree_map
15
  from config import *
16
  # Global event to signal cancellation from the UI thread to the generation thread
 
18
 
19
  access_token=os.environ['HF_TOKEN']
20
 
 
 
 
21
 
22
 
23
  # Global cache for pipelines to avoid re-loading.
 
105
 
106
  return base_duration + token_duration + search_duration + aot_compilation_buffer
107
 
108
+
109
  def chat_response(user_msg, chat_history, system_prompt,
110
  enable_search, max_results, max_chars,
111
  model_name, max_tokens, temperature,