Spaces:

Agents-X
/

data-view

Running

App Files Files Community

stzhao commited on May 30, 2025

Commit

ce2ea77

verified ·

1 Parent(s): 6150e51

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -16

app.py CHANGED Viewed

@@ -11,13 +11,14 @@ import tempfile
 from datetime import datetime
 import re
-def export_to_zip(images, conversations):
     """
     将图像和对话数据导出为ZIP文件
     Args:
         images: 提取的图像列表
         conversations: 对话JSON数据
     Returns:
         生成的ZIP文件路径
@@ -82,6 +83,56 @@ def base64_to_image(
         print(f"Base64解码失败: {str(e)}")
         return None
 def extract_images_from_messages(messages):
     """
     从消息中提取所有图像
@@ -116,6 +167,9 @@ def process_message(file_path):
         # 提取图像
         images, messages = extract_images_from_messages(messages)
         # 创建HTML输出
         html_output = '<div style="color: black;">'  # 添加一个包裹所有内容的div，设置文本颜色为黑色
@@ -152,29 +206,43 @@ def process_message(file_path):
             html_output += '</div>'
         html_output += '</div>'  # 关闭最外层div
-        return html_output, images, messages
     except Exception as e:
-        return f"<div style='color: red;'>Error processing file: {str(e)}</div>", [], None
 def upload_and_process(file):
     if file is None:
-        return "请上传一个JSON文件", [], None
-    html_output, images, messages = process_message(file.name)
-    return html_output, images, messages
 def use_example():
     # 使用示例文件
     example_path = "test_message_gpt.json"
     return process_message(example_path)
-def handle_export(images, conversations):
-    """处理导出请求"""
     if not images or conversations is None:
         return None
-    zip_path = export_to_zip(images, conversations)
     return zip_path
 # 确保示例文件存在
@@ -232,29 +300,41 @@ with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color:
     # 添加导出按钮
     with gr.Row():
-        export_btn = gr.Button("导出为ZIP")
-        download_file = gr.File(label="下载ZIP文件")
     # 存储当前结果的状态变量
     current_images = gr.State([])
     current_json = gr.State(None)
     visualize_button.click(
         fn=upload_and_process,
         inputs=[file_input],
-        outputs=[output, current_images, current_json]
     )
     example_button.click(
         fn=use_example,
         inputs=[],
-        outputs=[output, current_images, current_json]
     )
-    export_btn.click(
-        fn=handle_export,
         inputs=[current_images, current_json],
-        outputs=[download_file]
     )
 # 启动Gradio应用

 from datetime import datetime
 import re
+def export_to_zip(images, conversations, format_type="original"):
     """
     将图像和对话数据导出为ZIP文件
     Args:
         images: 提取的图像列表
         conversations: 对话JSON数据
+        format_type: 格式类型，"original"或"sharegpt"
     Returns:
         生成的ZIP文件路径
         print(f"Base64解码失败: {str(e)}")
         return None
+def process_message_to_sharegpt_format(message):
+    """
+    将消息转换为ShareGPT格式
+    Args:
+        message: 原始消息数据
+    Returns:
+        ShareGPT格式的数据
+    """
+    sharegpt_images = []
+    sharegpt_conversation = []
+    image_idx = 0
+    for i, message_item in enumerate(message):
+        role = message_item['role']
+        content_list = message_item['content']
+        whole_content = ""
+        for content_item in content_list:
+            content_type = content_item['type']
+            if content_type == "text":
+                content_value = content_item['text']
+                whole_content += content_value
+            elif content_type == "image_url":
+                content_value = content_item['image_url']['url']
+                whole_content += "<image>"
+                image = base64_to_image(content_value)
+                if image:
+                    sharegpt_images.append(image)
+                    image_idx += 1
+        if i == 0:
+            sharegpt_conversation.append({"from": "human", "value": whole_content})
+            continue
+        if "<interpreter>" in whole_content:
+            gpt_content, observation_content = whole_content.split("<interpreter>", -1)
+            sharegpt_conversation.append({"from": "gpt", "value": gpt_content})
+            sharegpt_conversation.append({"from": "observation", "value": "<interpreter>"+observation_content})
+        elif i != 0:
+            sharegpt_conversation.append({"from": "gpt", "value": whole_content})
+    sharegpt_data_item = {
+        "conversations": sharegpt_conversation,
+        "images": sharegpt_images
+    }
+    return sharegpt_data_item
 def extract_images_from_messages(messages):
     """
     从消息中提取所有图像
         # 提取图像
         images, messages = extract_images_from_messages(messages)
+        # 转换为ShareGPT格式
+        sharegpt_data = process_message_to_sharegpt_format(messages)
         # 创建HTML输出
         html_output = '<div style="color: black;">'  # 添加一个包裹所有内容的div，设置文本颜色为黑色
             html_output += '</div>'
         html_output += '</div>'  # 关闭最外层div
+        return html_output, images, messages, sharegpt_data
     except Exception as e:
+        return f"<div style='color: red;'>Error processing file: {str(e)}</div>", [], None, None
 def upload_and_process(file):
     if file is None:
+        return "请上传一个JSON文件", [], None, None
+    html_output, images, messages, sharegpt_data = process_message(file.name)
+    return html_output, images, messages, sharegpt_data
 def use_example():
     # 使用示例文件
     example_path = "test_message_gpt.json"
     return process_message(example_path)
+def handle_export_original(images, conversations):
+    """处理原始格式导出请求"""
     if not images or conversations is None:
         return None
+    zip_path = export_to_zip(images, conversations, "original")
+    return zip_path
+def handle_export_sharegpt(sharegpt_data):
+    """处理ShareGPT格式导出请求"""
+    if sharegpt_data is None:
+        return None
+    images = sharegpt_data.get("images", [])
+    conversations = sharegpt_data.get("conversations", [])
+    if not images and not conversations:
+        return None
+    zip_path = export_to_zip(images, conversations, "sharegpt")
     return zip_path
 # 确保示例文件存在
     # 添加导出按钮
     with gr.Row():
+        with gr.Column():
+            export_original_btn = gr.Button("导出原始格式")
+            download_original_file = gr.File(label="下载���始格式ZIP")
+        with gr.Column():
+            export_sharegpt_btn = gr.Button("导出ShareGPT格式")
+            download_sharegpt_file = gr.File(label="下载ShareGPT格式ZIP")
     # 存储当前结果的状态变量
     current_images = gr.State([])
     current_json = gr.State(None)
+    current_sharegpt = gr.State(None)
     visualize_button.click(
         fn=upload_and_process,
         inputs=[file_input],
+        outputs=[output, current_images, current_json, current_sharegpt]
     )
     example_button.click(
         fn=use_example,
         inputs=[],
+        outputs=[output, current_images, current_json, current_sharegpt]
     )
+    export_original_btn.click(
+        fn=handle_export_original,
         inputs=[current_images, current_json],
+        outputs=[download_original_file]
+    )
+    export_sharegpt_btn.click(
+        fn=handle_export_sharegpt,
+        inputs=[current_sharegpt],
+        outputs=[download_sharegpt_file]
     )
 # 启动Gradio应用