Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,6 @@ import requests
|
|
| 17 |
from transformers import (
|
| 18 |
Qwen2VLForConditionalGeneration,
|
| 19 |
Qwen2_5_VLForConditionalGeneration,
|
| 20 |
-
AutoModelForImageTextToText,
|
| 21 |
AutoProcessor,
|
| 22 |
TextIteratorStreamer,
|
| 23 |
AutoModel,
|
|
@@ -30,8 +29,20 @@ MAX_MAX_NEW_TOKENS = 4096
|
|
| 30 |
DEFAULT_MAX_NEW_TOKENS = 2048
|
| 31 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 32 |
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# --- Model Loading ---
|
| 36 |
|
| 37 |
# To address the warnings, we add `use_fast=False` to ensure we use the
|
|
@@ -81,7 +92,9 @@ model_v4 = AutoModel.from_pretrained(
|
|
| 81 |
MODEL_ID_V4,
|
| 82 |
trust_remote_code=True,
|
| 83 |
torch_dtype=torch.bfloat16,
|
| 84 |
-
|
|
|
|
|
|
|
| 85 |
).eval().to(device)
|
| 86 |
tokenizer_v4 = AutoTokenizer.from_pretrained(MODEL_ID_V4, trust_remote_code=True, use_fast=False)
|
| 87 |
|
|
@@ -312,4 +325,4 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
| 312 |
)
|
| 313 |
|
| 314 |
if __name__ == "__main__":
|
| 315 |
-
demo.queue(max_size=50).launch(share=True, show_error=True)
|
|
|
|
| 17 |
from transformers import (
|
| 18 |
Qwen2VLForConditionalGeneration,
|
| 19 |
Qwen2_5_VLForConditionalGeneration,
|
|
|
|
| 20 |
AutoProcessor,
|
| 21 |
TextIteratorStreamer,
|
| 22 |
AutoModel,
|
|
|
|
| 29 |
DEFAULT_MAX_NEW_TOKENS = 2048
|
| 30 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 31 |
|
| 32 |
+
# Let the environment (e.g., Hugging Face Spaces) determine the device.
|
| 33 |
+
# This avoids conflicts with the CUDA environment setup by the platform.
|
| 34 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 35 |
|
| 36 |
+
print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
|
| 37 |
+
print("torch.__version__ =", torch.__version__)
|
| 38 |
+
print("torch.version.cuda =", torch.version.cuda)
|
| 39 |
+
print("cuda available:", torch.cuda.is_available())
|
| 40 |
+
print("cuda device count:", torch.cuda.device_count())
|
| 41 |
+
if torch.cuda.is_available():
|
| 42 |
+
print("current device:", torch.cuda.current_device())
|
| 43 |
+
print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
|
| 44 |
+
|
| 45 |
+
print("Using device:", device)
|
| 46 |
# --- Model Loading ---
|
| 47 |
|
| 48 |
# To address the warnings, we add `use_fast=False` to ensure we use the
|
|
|
|
| 92 |
MODEL_ID_V4,
|
| 93 |
trust_remote_code=True,
|
| 94 |
torch_dtype=torch.bfloat16,
|
| 95 |
+
# Using 'sdpa' can sometimes cause issues in certain environments,
|
| 96 |
+
# letting transformers choose the default is safer.
|
| 97 |
+
# attn_implementation='sdpa'
|
| 98 |
).eval().to(device)
|
| 99 |
tokenizer_v4 = AutoTokenizer.from_pretrained(MODEL_ID_V4, trust_remote_code=True, use_fast=False)
|
| 100 |
|
|
|
|
| 325 |
)
|
| 326 |
|
| 327 |
if __name__ == "__main__":
|
| 328 |
+
demo.queue(max_size=50).launch(share=True, ssr_mode=False, show_error=True)
|