Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,7 +11,6 @@ import torch
|
|
| 11 |
from transformers import pipeline, TextIteratorStreamer, StoppingCriteria
|
| 12 |
from transformers import AutoTokenizer
|
| 13 |
from ddgs import DDGS
|
| 14 |
-
import spaces # Import spaces early to enable ZeroGPU support
|
| 15 |
from torch.utils._pytree import tree_map
|
| 16 |
from config import *
|
| 17 |
# Global event to signal cancellation from the UI thread to the generation thread
|
|
@@ -19,9 +18,6 @@ cancel_event = threading.Event()
|
|
| 19 |
|
| 20 |
access_token=os.environ['HF_TOKEN']
|
| 21 |
|
| 22 |
-
# Optional: Disable GPU visibility if you wish to force CPU usage
|
| 23 |
-
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
| 24 |
-
|
| 25 |
|
| 26 |
|
| 27 |
# Global cache for pipelines to avoid re-loading.
|
|
@@ -109,7 +105,7 @@ def get_duration(user_msg, chat_history, system_prompt, enable_search, max_resul
|
|
| 109 |
|
| 110 |
return base_duration + token_duration + search_duration + aot_compilation_buffer
|
| 111 |
|
| 112 |
-
|
| 113 |
def chat_response(user_msg, chat_history, system_prompt,
|
| 114 |
enable_search, max_results, max_chars,
|
| 115 |
model_name, max_tokens, temperature,
|
|
|
|
| 11 |
from transformers import pipeline, TextIteratorStreamer, StoppingCriteria
|
| 12 |
from transformers import AutoTokenizer
|
| 13 |
from ddgs import DDGS
|
|
|
|
| 14 |
from torch.utils._pytree import tree_map
|
| 15 |
from config import *
|
| 16 |
# Global event to signal cancellation from the UI thread to the generation thread
|
|
|
|
| 18 |
|
| 19 |
access_token=os.environ['HF_TOKEN']
|
| 20 |
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
# Global cache for pipelines to avoid re-loading.
|
|
|
|
| 105 |
|
| 106 |
return base_duration + token_duration + search_duration + aot_compilation_buffer
|
| 107 |
|
| 108 |
+
|
| 109 |
def chat_response(user_msg, chat_history, system_prompt,
|
| 110 |
enable_search, max_results, max_chars,
|
| 111 |
model_name, max_tokens, temperature,
|