Spaces:
Runtime error
Runtime error
fix image upload bug
Browse files- app.py +5 -5
- tools/conversation.py +5 -0
app.py
CHANGED
|
@@ -25,10 +25,10 @@ import torch
|
|
| 25 |
|
| 26 |
# huggingface-cli login
|
| 27 |
|
| 28 |
-
device = 'cuda'
|
| 29 |
model_path = os.getenv("MODEL_PATH", "omni-research/Tarsier2-7b")
|
| 30 |
-
max_n_frames = int(os.getenv("MAX_N_FRAMES",
|
| 31 |
debug = False
|
|
|
|
| 32 |
|
| 33 |
# ========================================
|
| 34 |
# Model Initialization
|
|
@@ -60,7 +60,7 @@ def gradio_reset(chat_state, img_file, img_list):
|
|
| 60 |
|
| 61 |
|
| 62 |
def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
|
| 63 |
-
print(gr_img,
|
| 64 |
conv_type = ''
|
| 65 |
if 'tarsier2-7b' in model_path.lower():
|
| 66 |
conv_type = 'tarsier2-7b'
|
|
@@ -78,7 +78,7 @@ def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
|
|
| 78 |
if gr_img is None and gr_video is None and gr_gif is None:
|
| 79 |
return None, None, None, gr.update(interactive=True), gr.update(interactive=True, placeholder='Please upload video/image first!'), chat_state, None, None
|
| 80 |
if gr_video or gr_img or gr_gif:
|
| 81 |
-
for img_file in [gr_video,
|
| 82 |
if img_file is not None:
|
| 83 |
break
|
| 84 |
return gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_file, img_list
|
|
@@ -87,7 +87,7 @@ def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
|
|
| 87 |
def gradio_ask(user_message, chatbot, chat_state):
|
| 88 |
if len(user_message) == 0:
|
| 89 |
return gr.update(interactive=True, placeholder='Input should not be empty!'), chatbot, chat_state
|
| 90 |
-
chat_state =
|
| 91 |
chatbot = chatbot + [[user_message, None]]
|
| 92 |
return '', chatbot, chat_state
|
| 93 |
|
|
|
|
| 25 |
|
| 26 |
# huggingface-cli login
|
| 27 |
|
|
|
|
| 28 |
model_path = os.getenv("MODEL_PATH", "omni-research/Tarsier2-7b")
|
| 29 |
+
max_n_frames = int(os.getenv("MAX_N_FRAMES", 16))
|
| 30 |
debug = False
|
| 31 |
+
device = 'cuda' if not debug else 'cpu'
|
| 32 |
|
| 33 |
# ========================================
|
| 34 |
# Model Initialization
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
|
| 63 |
+
print("video, image or gif:", gr_video, gr_img, gr_gif)
|
| 64 |
conv_type = ''
|
| 65 |
if 'tarsier2-7b' in model_path.lower():
|
| 66 |
conv_type = 'tarsier2-7b'
|
|
|
|
| 78 |
if gr_img is None and gr_video is None and gr_gif is None:
|
| 79 |
return None, None, None, gr.update(interactive=True), gr.update(interactive=True, placeholder='Please upload video/image first!'), chat_state, None, None
|
| 80 |
if gr_video or gr_img or gr_gif:
|
| 81 |
+
for img_file in [gr_video, gr_img, gr_gif]:
|
| 82 |
if img_file is not None:
|
| 83 |
break
|
| 84 |
return gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_file, img_list
|
|
|
|
| 87 |
def gradio_ask(user_message, chatbot, chat_state):
|
| 88 |
if len(user_message) == 0:
|
| 89 |
return gr.update(interactive=True, placeholder='Input should not be empty!'), chatbot, chat_state
|
| 90 |
+
chat_state = chat.ask(user_message, chat_state)
|
| 91 |
chatbot = chatbot + [[user_message, None]]
|
| 92 |
return '', chatbot, chat_state
|
| 93 |
|
tools/conversation.py
CHANGED
|
@@ -78,6 +78,7 @@ class Chat:
|
|
| 78 |
|
| 79 |
def prepare_model_inputs(self, conv, visual_data_file=None, images=None, n_frames=None):
|
| 80 |
conv.messages.append([conv.roles[1], None])
|
|
|
|
| 81 |
conv.messages[0][1] = re.sub(f"({IMAGE_TOKEN}|{VIDEO_TOKEN})\n*", "", conv.messages[0][1])
|
| 82 |
|
| 83 |
if images is None or isinstance(images, list) and len(images) == 0:
|
|
@@ -89,6 +90,10 @@ class Chat:
|
|
| 89 |
images = None
|
| 90 |
else:
|
| 91 |
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
if isinstance(images, list) and len(images) > 0:
|
| 94 |
conv.messages[0][1] = IMAGE_TOKEN*len(images) + '\n' + conv.messages[0][1]
|
|
|
|
| 78 |
|
| 79 |
def prepare_model_inputs(self, conv, visual_data_file=None, images=None, n_frames=None):
|
| 80 |
conv.messages.append([conv.roles[1], None])
|
| 81 |
+
print(conv.messages)
|
| 82 |
conv.messages[0][1] = re.sub(f"({IMAGE_TOKEN}|{VIDEO_TOKEN})\n*", "", conv.messages[0][1])
|
| 83 |
|
| 84 |
if images is None or isinstance(images, list) and len(images) == 0:
|
|
|
|
| 90 |
images = None
|
| 91 |
else:
|
| 92 |
raise NotImplementedError
|
| 93 |
+
|
| 94 |
+
# os.system("rm tmp_images/*")
|
| 95 |
+
# for i, img in enumerate(images):
|
| 96 |
+
# img.save(f"tmp_images/{i+1}.jpg")
|
| 97 |
|
| 98 |
if isinstance(images, list) and len(images) > 0:
|
| 99 |
conv.messages[0][1] = IMAGE_TOKEN*len(images) + '\n' + conv.messages[0][1]
|