File size: 1,542 Bytes
625a17f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import json
import string
def extract_object_name(text):
parts = text.split("is")
if len(parts) > 1:
return parts[1].strip()
return None
text_pth = "/home/yuqian_fu/Projects/PSALM/check_text_select_scene_600_20250514.json"
save_path = "/home/yuqian_fu/Projects/PSALM/check_text_select_scene_600_objname_llavatext.json"
new_data = []
sent_id = 0
with open(text_pth, "r") as fp:
datas = json.load(fp)
# data是一帧帧图片
for data in datas:
#instruct_list = []
new_annos = []
for anno in data["first_frame_anns"]:
text = anno["text"]
# 提取is之后的句子
raw = extract_object_name(text)
#将raw变小写
raw_lower = raw.lower()
# 删除 "green" 并去掉多余的空格
result = raw_lower.replace("green", "").strip()
# 删除所有标点符号
sent = result.translate(str.maketrans('', '', string.punctuation))
#tokens = sent.split()
# sample = {
# "tokens": tokens,
# "raw": raw,
# "sent_id": sent_id,
# "sent": sent
# }
anno["llava_text"] = sent
new_annos.append(anno)
sent_id += 1
#instruct_list.append(sample)
# del anno["text"] #debug
#data["instruction"] = instruct_list
data["first_frame_anns"] = new_annos
del data["instruction"] #debug
new_data.append(data)
#print(sent_id)
print("len of new_data: ", len(new_data))
with open(save_path, "w") as fp:
json.dump(new_data, fp)
|