Spaces:

kshdes37
/

cadspace

Runtime error

App Files Files Community

cadspace / CADFusion /src /data_preprocessing /captioning.py

kshdes37

Upload 50 files

91daf98 verified 9 months ago

raw

history blame contribute delete

3.83 kB

	import os
	import requests
	import base64
	import json
	import time
	from mimetypes import guess_type
	from tqdm import tqdm
	# from parse_sequence import parse_sequence
	# from parse_visual import run_parallel
	# from parse_image import render_file
	from call_openai import setup_client, call_openai
	import argparse

	parser = argparse.ArgumentParser()
	parser.add_argument('--image-folder-path', type=str, default='exp/figures/test', help='Path to the input folder')
	parser.add_argument('--out-path', type=str, default='data/raw', help='Path to the output file')
	args = parser.parse_args()
	file_path = args.image_folder_path
	out_path = args.out_path

	client, deployment_name = setup_client()
	call_client = call_openai

	def local_image_to_data_url(image_path):
	# Encode a local image into data URL
	mime_type, _ = guess_type(image_path)
	if mime_type is None:
	mime_type = 'application/octet-stream'
	with open(image_path, "rb") as image_file:
	base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
	return f"data:{mime_type};base64,{base64_encoded_data}"

	def call_model_1(prompt, image_path):
	message_text = [
	{"role":"system","content":"You are an AI assistant that helps people find information."},
	{"role":"user","content":[
	{
	"type": "text",
	"text": prompt
	},
	{
	"type": "image_url",
	"image_url": {"url": local_image_to_data_url(image_path)}
	}
	]}
	]
	return call_client(client, deployment_name, message_text)

	def call_model_2(prompt1, image_path, output1, prompt2):
	message_text = [
	{"role":"system","content":"You are an AI assistant that helps people find information."},
	{"role":"user","content":[
	{
	"type": "text",
	"text": prompt1
	},
	{
	"type": "image_url",
	"image_url": {"url": local_image_to_data_url(image_path)}
	}
	]},
	{"role":"assistant","content":output1},
	{"role":"user","content":prompt2}
	]
	return call_client(client, deployment_name, message_text)

	files = [f for f in os.listdir(args.image_folder_path) if os.path.isfile(os.path.join(args.image_folder_path, f))]
	files.sort()
	results = []
	for filename in tqdm(files):
	time.sleep(0.5)
	output1 = None
	output2 = None
	image_path = os.path.join(file_path, filename)
	# Send request
	prompt1 = """Propose a series of questions about the 3D shape and give the answers. The first question should ask for a detailed description and others should focus on the specific geometric properties, number, size proportions and positional relationship, and other details."""
	prompt2 = """Based on the dialogue, please give a final description of the 3D shape. No more than 70 words."""
	while output1 is None or str(output1).startswith("I'm sorry"):
	try:
	output1 = call_model_1(prompt1, image_path)
	except requests.RequestException as e:
	print(f"Request failed: {e}")
	time.sleep(1)
	output1 = None
	while output2 is None or str(output2).startswith("I'm sorry"):
	try:
	output2 = call_model_2(prompt1, image_path, output1, prompt2)
	except requests.RequestException as e:
	print(f"Request failed: {e}")
	time.sleep(1)
	output2 = None

	result = {
	"pic_name":filename,
	"questions": output1,
	"description":output2
	}
	results.append(result)

	with open(out_path, 'w+', encoding='utf-8') as f:
	json.dump(results, f, ensure_ascii=False, indent=4)