stzhao commited on
Commit
ce2ea77
·
verified ·
1 Parent(s): 6150e51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -16
app.py CHANGED
@@ -11,13 +11,14 @@ import tempfile
11
  from datetime import datetime
12
  import re
13
 
14
- def export_to_zip(images, conversations):
15
  """
16
  将图像和对话数据导出为ZIP文件
17
 
18
  Args:
19
  images: 提取的图像列表
20
  conversations: 对话JSON数据
 
21
 
22
  Returns:
23
  生成的ZIP文件路径
@@ -82,6 +83,56 @@ def base64_to_image(
82
  print(f"Base64解码失败: {str(e)}")
83
  return None
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  def extract_images_from_messages(messages):
86
  """
87
  从消息中提取所有图像
@@ -116,6 +167,9 @@ def process_message(file_path):
116
  # 提取图像
117
  images, messages = extract_images_from_messages(messages)
118
 
 
 
 
119
  # 创建HTML输出
120
  html_output = '<div style="color: black;">' # 添加一个包裹所有内容的div,设置文本颜色为黑色
121
 
@@ -152,29 +206,43 @@ def process_message(file_path):
152
  html_output += '</div>'
153
 
154
  html_output += '</div>' # 关闭最外层div
155
- return html_output, images, messages
156
 
157
  except Exception as e:
158
- return f"<div style='color: red;'>Error processing file: {str(e)}</div>", [], None
159
 
160
  def upload_and_process(file):
161
  if file is None:
162
- return "请上传一个JSON文件", [], None
163
 
164
- html_output, images, messages = process_message(file.name)
165
- return html_output, images, messages
166
 
167
  def use_example():
168
  # 使用示例文件
169
  example_path = "test_message_gpt.json"
170
  return process_message(example_path)
171
 
172
- def handle_export(images, conversations):
173
- """处理导出请求"""
174
  if not images or conversations is None:
175
  return None
176
 
177
- zip_path = export_to_zip(images, conversations)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  return zip_path
179
 
180
  # 确保示例文件存在
@@ -232,29 +300,41 @@ with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color:
232
 
233
  # 添加导出按钮
234
  with gr.Row():
235
- export_btn = gr.Button("导出为ZIP")
236
- download_file = gr.File(label="下载ZIP文件")
 
 
 
 
 
237
 
238
  # 存储当前结果的状态变量
239
  current_images = gr.State([])
240
  current_json = gr.State(None)
 
241
 
242
  visualize_button.click(
243
  fn=upload_and_process,
244
  inputs=[file_input],
245
- outputs=[output, current_images, current_json]
246
  )
247
 
248
  example_button.click(
249
  fn=use_example,
250
  inputs=[],
251
- outputs=[output, current_images, current_json]
252
  )
253
 
254
- export_btn.click(
255
- fn=handle_export,
256
  inputs=[current_images, current_json],
257
- outputs=[download_file]
 
 
 
 
 
 
258
  )
259
 
260
  # 启动Gradio应用
 
11
  from datetime import datetime
12
  import re
13
 
14
+ def export_to_zip(images, conversations, format_type="original"):
15
  """
16
  将图像和对话数据导出为ZIP文件
17
 
18
  Args:
19
  images: 提取的图像列表
20
  conversations: 对话JSON数据
21
+ format_type: 格式类型,"original"或"sharegpt"
22
 
23
  Returns:
24
  生成的ZIP文件路径
 
83
  print(f"Base64解码失败: {str(e)}")
84
  return None
85
 
86
+ def process_message_to_sharegpt_format(message):
87
+ """
88
+ 将消息转换为ShareGPT格式
89
+
90
+ Args:
91
+ message: 原始消息数据
92
+
93
+ Returns:
94
+ ShareGPT格式的数据
95
+ """
96
+ sharegpt_images = []
97
+ sharegpt_conversation = []
98
+ image_idx = 0
99
+
100
+ for i, message_item in enumerate(message):
101
+ role = message_item['role']
102
+
103
+ content_list = message_item['content']
104
+ whole_content = ""
105
+ for content_item in content_list:
106
+ content_type = content_item['type']
107
+ if content_type == "text":
108
+ content_value = content_item['text']
109
+ whole_content += content_value
110
+ elif content_type == "image_url":
111
+ content_value = content_item['image_url']['url']
112
+ whole_content += "<image>"
113
+ image = base64_to_image(content_value)
114
+ if image:
115
+ sharegpt_images.append(image)
116
+ image_idx += 1
117
+
118
+ if i == 0:
119
+ sharegpt_conversation.append({"from": "human", "value": whole_content})
120
+ continue
121
+
122
+ if "<interpreter>" in whole_content:
123
+ gpt_content, observation_content = whole_content.split("<interpreter>", -1)
124
+ sharegpt_conversation.append({"from": "gpt", "value": gpt_content})
125
+ sharegpt_conversation.append({"from": "observation", "value": "<interpreter>"+observation_content})
126
+ elif i != 0:
127
+ sharegpt_conversation.append({"from": "gpt", "value": whole_content})
128
+
129
+ sharegpt_data_item = {
130
+ "conversations": sharegpt_conversation,
131
+ "images": sharegpt_images
132
+ }
133
+
134
+ return sharegpt_data_item
135
+
136
  def extract_images_from_messages(messages):
137
  """
138
  从消息中提取所有图像
 
167
  # 提取图像
168
  images, messages = extract_images_from_messages(messages)
169
 
170
+ # 转换为ShareGPT格式
171
+ sharegpt_data = process_message_to_sharegpt_format(messages)
172
+
173
  # 创建HTML输出
174
  html_output = '<div style="color: black;">' # 添加一个包裹所有内容的div,设置文本颜色为黑色
175
 
 
206
  html_output += '</div>'
207
 
208
  html_output += '</div>' # 关闭最外层div
209
+ return html_output, images, messages, sharegpt_data
210
 
211
  except Exception as e:
212
+ return f"<div style='color: red;'>Error processing file: {str(e)}</div>", [], None, None
213
 
214
  def upload_and_process(file):
215
  if file is None:
216
+ return "请上传一个JSON文件", [], None, None
217
 
218
+ html_output, images, messages, sharegpt_data = process_message(file.name)
219
+ return html_output, images, messages, sharegpt_data
220
 
221
  def use_example():
222
  # 使用示例文件
223
  example_path = "test_message_gpt.json"
224
  return process_message(example_path)
225
 
226
+ def handle_export_original(images, conversations):
227
+ """处理原始格式导出请求"""
228
  if not images or conversations is None:
229
  return None
230
 
231
+ zip_path = export_to_zip(images, conversations, "original")
232
+ return zip_path
233
+
234
+ def handle_export_sharegpt(sharegpt_data):
235
+ """处理ShareGPT格式导出请求"""
236
+ if sharegpt_data is None:
237
+ return None
238
+
239
+ images = sharegpt_data.get("images", [])
240
+ conversations = sharegpt_data.get("conversations", [])
241
+
242
+ if not images and not conversations:
243
+ return None
244
+
245
+ zip_path = export_to_zip(images, conversations, "sharegpt")
246
  return zip_path
247
 
248
  # 确保示例文件存在
 
300
 
301
  # 添加导出按钮
302
  with gr.Row():
303
+ with gr.Column():
304
+ export_original_btn = gr.Button("导出原始格式")
305
+ download_original_file = gr.File(label="下载���始格式ZIP")
306
+
307
+ with gr.Column():
308
+ export_sharegpt_btn = gr.Button("导出ShareGPT格式")
309
+ download_sharegpt_file = gr.File(label="下载ShareGPT格式ZIP")
310
 
311
  # 存储当前结果的状态变量
312
  current_images = gr.State([])
313
  current_json = gr.State(None)
314
+ current_sharegpt = gr.State(None)
315
 
316
  visualize_button.click(
317
  fn=upload_and_process,
318
  inputs=[file_input],
319
+ outputs=[output, current_images, current_json, current_sharegpt]
320
  )
321
 
322
  example_button.click(
323
  fn=use_example,
324
  inputs=[],
325
+ outputs=[output, current_images, current_json, current_sharegpt]
326
  )
327
 
328
+ export_original_btn.click(
329
+ fn=handle_export_original,
330
  inputs=[current_images, current_json],
331
+ outputs=[download_original_file]
332
+ )
333
+
334
+ export_sharegpt_btn.click(
335
+ fn=handle_export_sharegpt,
336
+ inputs=[current_sharegpt],
337
+ outputs=[download_sharegpt_file]
338
  )
339
 
340
  # 启动Gradio应用