OpthChat_a

Paused

farrell236 commited on Jun 26, 2025

Commit

1d3684c

verified ·

1 Parent(s): 7caf75b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,18 +3,20 @@
 # This source code is based on by web_demo_mm.py, by Alibaba Cloud.
 # Licensed under Apache License 2.0
-import os
 import copy
 import re
 from argparse import ArgumentParser
 from threading import Thread
 import gradio as gr
 import torch
 from qwen_vl_utils import process_vision_info
 from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
-DEFAULT_CKPT_PATH = 'farrell236/test_model'
 AUTH_TOKEN = os.environ.get("HF_spaces")
 def _get_args():
@@ -60,13 +62,14 @@ def _load_model_processor(args):
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             args.checkpoint_path,
             use_auth_token=args.auth_token,
-            torch_dtype='auto',
             attn_implementation='flash_attention_2',
             device_map=device_map)
     else:
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             args.checkpoint_path,
             use_auth_token=args.auth_token,
             device_map=device_map)
     processor = AutoProcessor.from_pretrained('Qwen/Qwen2.5-VL-3B-Instruct')
@@ -145,6 +148,7 @@ def _transform_messages(original_messages):
 def _launch_demo(args, model, processor):
     def call_local_model(model, processor, messages,
                          max_tokens=1024, temperature=0.6,
                          top_p=0.9, top_k=50,

 # This source code is based on by web_demo_mm.py, by Alibaba Cloud.
 # Licensed under Apache License 2.0
 import copy
+import os
 import re
 from argparse import ArgumentParser
 from threading import Thread
 import gradio as gr
 import torch
+import spaces
 from qwen_vl_utils import process_vision_info
 from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
+# DEFAULT_CKPT_PATH = 'farrell236/test_model'
+DEFAULT_CKPT_PATH = 'Qwen/Qwen2.5-VL-32B-Instruct'
 AUTH_TOKEN = os.environ.get("HF_spaces")
 def _get_args():
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             args.checkpoint_path,
             use_auth_token=args.auth_token,
+            torch_dtype=torch.bfloat16,
             attn_implementation='flash_attention_2',
             device_map=device_map)
     else:
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             args.checkpoint_path,
             use_auth_token=args.auth_token,
+            torch_dtype=torch.bfloat16,
             device_map=device_map)
     processor = AutoProcessor.from_pretrained('Qwen/Qwen2.5-VL-3B-Instruct')
 def _launch_demo(args, model, processor):
+    @spaces.GPU
     def call_local_model(model, processor, messages,
                          max_tokens=1024, temperature=0.6,
                          top_p=0.9, top_k=50,