am commited on
Commit
07794b9
·
1 Parent(s): 3431d22
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: Misty2
3
- emoji: 😊
4
- colorFrom: blue
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.44.1
8
  app_file: app.py
9
  license: apache-2.0
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Misty
3
+ emoji: 🩻
4
+ colorFrom: green
5
+ colorTo: gray
6
  sdk: gradio
7
  sdk_version: 5.44.1
8
  app_file: app.py
9
  license: apache-2.0
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,7 +1,5 @@
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
3
- from transformers.image_utils import load_image, valid_images
4
- from transformers.image_transforms import resize
5
  from threading import Thread
6
  import re
7
  import time
@@ -9,14 +7,14 @@ import torch
9
  import spaces
10
  import math
11
  import os
 
12
 
13
- # from transformers import Qwen2_5_VLForConditionalGeneration
14
- from qwen_vl_utils import process_vision_info, fetch_image
15
 
16
-
17
- # pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/testmodel2")
18
- # pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/dsv5mx3")
19
- pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/gmdsv5mx3")
20
 
21
  auth_token = os.environ.get("HF_TOKEN") or True
22
  DEFAULT_PROMPT = "Find abnormalities and support devices."
@@ -24,43 +22,33 @@ DEFAULT_PROMPT = "Find abnormalities and support devices."
24
  model = AutoModelForImageTextToText.from_pretrained(
25
  pretrained_model_name_or_path=pretrained_model_name_or_path,
26
  dtype=torch.bfloat16,
27
- # attn_implementation="flash_attention_2",
28
- # trust_remote_code=True,
29
  token=auth_token
30
  ).eval().to("cuda")
31
 
32
 
33
- processor_config={}
34
- # if isinstance(model, Qwen2_5_VLForConditionalGeneration):
35
- # processor_config = {"min_pixels": 28*28*2, "max_pixels": 476*476}
36
-
37
-
38
  processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path,
39
  use_fast=True,
40
- **processor_config
41
  )
42
 
43
 
44
  @spaces.GPU
45
  def model_inference(
46
- text, history, image=None
47
  ):
48
 
49
-
50
  print(f"text: {text}")
51
  print(f"history: {history}")
52
 
53
  if len(text) == 0:
54
- # return 'bad request', 'Please input a query.'
55
  raise gr.Error("Please input a query.", duration=3, print_exception=False)
56
 
57
  if image is None:
58
  raise gr.Error("Please provide an image.", duration=3, print_exception=False)
59
 
60
- # image = load_image(image)
61
  print(f"image0: {image} size: {image.size}")
62
- image = fetch_image({"image": image, "min_pixels": 28*28*2, "max_pixels": 476*476})
63
- print(f"image1: {image} size: {image.size}")
 
64
 
65
 
66
  messages=[]
@@ -70,21 +58,17 @@ def model_inference(
70
  h = history[i]
71
  if len(h.get("content").strip()) > 0:
72
  if valid_index is None and h['role'] == 'assistant':
73
- valid_index = i-1 #supposed to be 0
74
  messages.append({"role": h['role'], "content": [{"type": "text", "text": h['content']}] })
75
 
76
- # print(f"valid_index: {valid_index}")
77
  if valid_index is None:
78
  messages = []
79
  if len(messages) > 0 and valid_index > 0:
80
- # print(f"removing previous messages (without image) valid_index: {valid_index}")
81
  messages = messages[valid_index:] #remove previous messages (without image)
82
 
83
  # current prompt
84
  messages.append({"role": "user","content": [{"type": "text", "text": text}]})
85
  messages[0]['content'].insert(0, {"type": "image"})
86
-
87
-
88
  print(f"messages: {messages}")
89
 
90
 
@@ -95,8 +79,7 @@ def model_inference(
95
 
96
  # Generate
97
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
98
- generation_args = dict(inputs, streamer=streamer, max_new_tokens=2048)
99
- # generated_text = ""
100
 
101
  with torch.inference_mode():
102
  thread = Thread(target=model.generate, kwargs=generation_args)
@@ -108,60 +91,49 @@ def model_inference(
108
 
109
  for new_text in streamer:
110
  buffer += new_text
111
- # generated_text_without_prompt = buffer#[len(ext_buffer):]
112
- # time.sleep(0.01)
113
- # print(f"buffer: {buffer}")
114
  yield buffer
115
 
116
 
117
- # CSS = """
118
- # .contain { display: flex; flex-direction: column; }
119
- # #component-0 { height: 100%; }
120
- # #chatbot { flex-grow: 1; height: 600px; } /* Set height here */
121
- # """
122
-
123
-
124
  with gr.Blocks() as demo:
125
 
 
126
  send_btn = gr.Button("Send", variant="primary", render=False)
127
  textbox = gr.Textbox(show_label=False, placeholder="Enter your text here and press ENTER", render=False, submit_btn="Send")
128
- # chatbot = gr.Chatbot(type="messages", label="AI", render_markdown=True, sanitize_html=False, allow_tags=True, height=800, container=False, show_share_button=False)
129
 
130
  with gr.Row():
131
- with gr.Column(scale=0.5):
132
- # input_type_radio = gr.Radio(choices=["Image", "Video"], value="Image", label="Select Input Type")
133
  image_input = gr.Image(type="pil", visible=True, sources="upload", show_label=False)
134
 
135
  clear_btn = gr.Button("Clear", variant="secondary")
136
 
137
- # with gr.Column():
138
  ex =gr.Examples(
139
  examples=[
140
- ["example_images/35.jpg", "Find abnormalities and support devices."],
141
  ["example_images/363.jpg", "Provide a comprehensive image analysis, and list all abnormalities."],
142
- ["example_images/376.jpg", "Examine the chest X-ray."],
 
 
 
 
 
143
  ],
144
  inputs=[image_input, textbox],
145
  )
146
 
147
- # with gr.Column(scale=7):
148
-
149
- chat_interface = gr.ChatInterface(fn=model_inference,
150
- # title='title', description='description',
151
- type="messages",
152
- chatbot=gr.Chatbot(type="messages", label="AI", render_markdown=True, sanitize_html=False, allow_tags=True, height='40vw', container=False, show_share_button=False),
153
- # chatbot=gr.Chatbot(type="messages", label="AI", render_markdown=True, sanitize_html=False, allow_tags=True, scale=1, show_share_button=False),
154
- textbox=textbox,
155
- additional_inputs=image_input,
156
- multimodal=False,
157
- fill_height=False,
158
- # css=CSS,
159
- )
160
- # chat_interface.chatbot.render_markdown=True
161
- # chat_interface.chatbot.sanitize_html=False
162
- # chat_interface.chatbot.allow_tags=True
163
- # chat_interface.chatbot.elem_id="chatbot"
164
-
165
 
166
  # Clear chat history when an example is selected (keep example-populated inputs intact)
167
  ex.load_input_event.then(
@@ -192,6 +164,6 @@ with gr.Blocks() as demo:
192
 
193
 
194
 
195
-
196
  demo.launch(debug=False, server_name="0.0.0.0")
197
-
 
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
 
 
3
  from threading import Thread
4
  import re
5
  import time
 
7
  import spaces
8
  import math
9
  import os
10
+ # from qwen_vl_utils import process_vision_info, fetch_image
11
 
12
+ # run locally: CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 MODEL=./model_dir python app.py
13
+ # and open http://localhost:7860
14
 
15
+ # pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/gmdsv5mx3")
16
+ # pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/gr1")
17
+ pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/mrcxr1")
 
18
 
19
  auth_token = os.environ.get("HF_TOKEN") or True
20
  DEFAULT_PROMPT = "Find abnormalities and support devices."
 
22
  model = AutoModelForImageTextToText.from_pretrained(
23
  pretrained_model_name_or_path=pretrained_model_name_or_path,
24
  dtype=torch.bfloat16,
 
 
25
  token=auth_token
26
  ).eval().to("cuda")
27
 
28
 
 
 
 
 
 
29
  processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path,
30
  use_fast=True,
 
31
  )
32
 
33
 
34
  @spaces.GPU
35
  def model_inference(
36
+ text, history, image
37
  ):
38
 
 
39
  print(f"text: {text}")
40
  print(f"history: {history}")
41
 
42
  if len(text) == 0:
 
43
  raise gr.Error("Please input a query.", duration=3, print_exception=False)
44
 
45
  if image is None:
46
  raise gr.Error("Please provide an image.", duration=3, print_exception=False)
47
 
 
48
  print(f"image0: {image} size: {image.size}")
49
+ # image = fetch_image({"image": image, "min_pixels": 28*28*2, "max_pixels": 476*476})
50
+ # image.thumbnail((512, 512)) #resize image to 512x512 preserve aspect ratio
51
+ # print(f"image1: {image} size: {image.size}")
52
 
53
 
54
  messages=[]
 
58
  h = history[i]
59
  if len(h.get("content").strip()) > 0:
60
  if valid_index is None and h['role'] == 'assistant':
61
+ valid_index = i-1
62
  messages.append({"role": h['role'], "content": [{"type": "text", "text": h['content']}] })
63
 
 
64
  if valid_index is None:
65
  messages = []
66
  if len(messages) > 0 and valid_index > 0:
 
67
  messages = messages[valid_index:] #remove previous messages (without image)
68
 
69
  # current prompt
70
  messages.append({"role": "user","content": [{"type": "text", "text": text}]})
71
  messages[0]['content'].insert(0, {"type": "image"})
 
 
72
  print(f"messages: {messages}")
73
 
74
 
 
79
 
80
  # Generate
81
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
82
+ generation_args = dict(inputs, streamer=streamer, max_new_tokens=4096)
 
83
 
84
  with torch.inference_mode():
85
  thread = Thread(target=model.generate, kwargs=generation_args)
 
91
 
92
  for new_text in streamer:
93
  buffer += new_text
 
 
 
94
  yield buffer
95
 
96
 
 
 
 
 
 
 
 
97
  with gr.Blocks() as demo:
98
 
99
+ # gr.Markdown('<h1 style="text-align:center; margin: 0.2em 0;">Demo.</h1>')
100
  send_btn = gr.Button("Send", variant="primary", render=False)
101
  textbox = gr.Textbox(show_label=False, placeholder="Enter your text here and press ENTER", render=False, submit_btn="Send")
 
102
 
103
  with gr.Row():
104
+ with gr.Column(scale=1):
 
105
  image_input = gr.Image(type="pil", visible=True, sources="upload", show_label=False)
106
 
107
  clear_btn = gr.Button("Clear", variant="secondary")
108
 
 
109
  ex =gr.Examples(
110
  examples=[
111
+ ["example_images/35.jpg", "Examine the chest X-ray."],
112
  ["example_images/363.jpg", "Provide a comprehensive image analysis, and list all abnormalities."],
113
+ ["example_images/4747.jpg", "Find abnormalities and support devices."],
114
+ ["example_images/87.jpg", "Find abnormalities and support devices."],
115
+ ["example_images/6218.jpg", "Find abnormalities and support devices."],
116
+ ["example_images/6447.jpg", "Find abnormalities and support devices."],
117
+
118
+
119
  ],
120
  inputs=[image_input, textbox],
121
  )
122
 
123
+ with gr.Column(scale=2):
124
+ chat_interface = gr.ChatInterface(fn=model_inference,
125
+ type="messages",
126
+ chatbot=gr.Chatbot(type="messages", label="AI", render_markdown=True, sanitize_html=False, allow_tags=True, height='35vw', container=False, show_share_button=False),
127
+ textbox=textbox,
128
+ additional_inputs=image_input,
129
+ multimodal=False,
130
+ fill_height=False,
131
+ show_api=False,
132
+ )
133
+ gr.HTML('<span style="color:lightgray">Start with a full prompt: Find abnormalities and support devices.<br>\
134
+ Follow up with additial questions, such as Provide differentials or Write a structured report.<br>')
135
+
136
+
 
 
 
 
137
 
138
  # Clear chat history when an example is selected (keep example-populated inputs intact)
139
  ex.load_input_event.then(
 
164
 
165
 
166
 
167
+ demo.queue(max_size=10)
168
  demo.launch(debug=False, server_name="0.0.0.0")
169
+
example_images/376.jpg DELETED
Binary file (61 kB)
 
example_images/4747.jpg ADDED
example_images/6218.jpg ADDED
example_images/6447.jpg ADDED
example_images/87.jpg ADDED
requirements.txt CHANGED
@@ -4,5 +4,5 @@ transformers==4.56.0
4
  huggingface_hub
5
  gradio==5.44.1
6
  spaces==0.40.1
7
- qwen_vl_utils==0.0.11
8
 
 
4
  huggingface_hub
5
  gradio==5.44.1
6
  spaces==0.40.1
7
+ # qwen_vl_utils==0.0.11
8