File size: 3,832 Bytes
91daf98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
import requests
import base64
import json
import time
from mimetypes import guess_type
from tqdm import tqdm
# from parse_sequence import parse_sequence
# from parse_visual import run_parallel
# from parse_image import render_file
from call_openai import setup_client, call_openai
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--image-folder-path', type=str, default='exp/figures/test', help='Path to the input folder')
parser.add_argument('--out-path', type=str, default='data/raw', help='Path to the output file')
args = parser.parse_args()
file_path = args.image_folder_path
out_path = args.out_path

client, deployment_name = setup_client()
call_client = call_openai

def local_image_to_data_url(image_path):
    # Encode a local image into data URL
    mime_type, _ = guess_type(image_path)
    if mime_type is None:
        mime_type = 'application/octet-stream' 
    with open(image_path, "rb") as image_file:
        base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
    return f"data:{mime_type};base64,{base64_encoded_data}"

def call_model_1(prompt, image_path):
    message_text = [
        {"role":"system","content":"You are an AI assistant that helps people find information."},
        {"role":"user","content":[
            {
                "type": "text",
                "text": prompt
            },
            {
            "type": "image_url",
            "image_url": {"url": local_image_to_data_url(image_path)}
            }
        ]}
    ]
    return call_client(client, deployment_name, message_text)

def call_model_2(prompt1, image_path, output1, prompt2):
    message_text = [
        {"role":"system","content":"You are an AI assistant that helps people find information."},
        {"role":"user","content":[
            {
                "type": "text",
                "text": prompt1
            },
            {
            "type": "image_url",
            "image_url": {"url": local_image_to_data_url(image_path)}
            }
        ]},
        {"role":"assistant","content":output1},
        {"role":"user","content":prompt2}
    ]
    return call_client(client, deployment_name, message_text)

files = [f for f in os.listdir(args.image_folder_path) if os.path.isfile(os.path.join(args.image_folder_path, f))]  
files.sort()  
results = []
for filename in tqdm(files):
    time.sleep(0.5)
    output1 = None
    output2 = None
    image_path = os.path.join(file_path, filename)  
    # Send request
    prompt1 = """Propose a series of questions about the 3D shape and give the answers. The first question should ask for a detailed description and others should focus on the specific geometric properties, number, size proportions and positional relationship, and other details."""
    prompt2 = """Based on the dialogue, please give a final description of the 3D shape. No more than 70 words."""
    while output1 is None or str(output1).startswith("I'm sorry"):
        try:
            output1 = call_model_1(prompt1, image_path)
        except requests.RequestException as e:  
            print(f"Request failed: {e}")
            time.sleep(1)  
            output1 = None  
    while output2 is None or str(output2).startswith("I'm sorry"):
        try:
            output2 = call_model_2(prompt1, image_path, output1, prompt2)
        except requests.RequestException as e:  
            print(f"Request failed: {e}")
            time.sleep(1)  
            output2 = None  

    result = {
        "pic_name":filename,
        "questions": output1,
        "description":output2
    }
    results.append(result)

with open(out_path, 'w+', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)