File size: 6,901 Bytes
b27cd24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import json
import os
import base64
from time import sleep
from tqdm import tqdm
import openai

# Prefer environment variable; remove hardcoded keys for safety.
openai.api_key = "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"

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

def _build_annotation_prompt():
    """
    Returns a compact, deterministic prompt with the exact questions and options
    used by the GUI tool, and asks for strict JSON output.
    """
    # Questions and options mirrored from gui1.py (keep these in lockstep). :contentReference[oaicite:2]{index=2}
    return (
        "You are an expert at analyzing a single image of a line of people. "
        "Answer the following 17 questions STRICTLY as a single JSON object. "
        "Use the exact keys provided, and for multiple-choice fields choose ONE "
        "of the listed options verbatim. If something is not visible, pick the most appropriate option (e.g., 'N/A').\n\n"
        "Return ONLY JSON. No prose.\n\n"
        "Definitions:\n"
        "- **Start of the line (front):** the person closest to the counter or service point. "
        "This is usually the direction the line is facing towards.\n"
        "- **End of the line (back):** the person farthest from the counter or service point. "
        "This is usually the last person to join the line.\n\n"
        "Fields:\n"
        '1) "number_of_turns": integer\n'
        '2) "line_shape": one of ["Straight","Curved","S-shaped","Angled","other"]\n'
        '3) "line_facing_direction": one of ["Facing towards","Facing away","Facing sideways","other"]\n'
        '4) "number_of_people_in_line": integer\n'
        '5) "line_purpose": string (short phrase)\n'
        '6) "start_person_description": string (brief)\n'
        '7) "end_person_description": string (brief)\n'
        '8) "counter_person_description": string (brief)\n'
        '9) "boundary_present": one of ["yes","no"]\n'
        '10) "boundary_types": one of ["none","cones","rope dividers","stanchions","other"]\n'
        '11) "end_of_line_visible": one of ["yes","no"]\n'
        '12) "end_of_line_location_if_visible": one of ["far left","center left","center","center right","far right","N/A"]\n'
        '13) "direction_to_turn_to_see_end_if_not_visible": one of ["left","right","back","N/A"]\n'
        '14) "start_of_line_visible": one of ["yes","no"]\n'
        '15) "start_of_line_location_if_visible": one of ["far left","center left","center","center right","far right","N/A"]\n'
        '16) "direction_to_turn_to_see_start_if_not_visible": one of ["left","right","back","N/A"]\n'
        '17) "line_completeness": one of ["full","partial"]\n\n'
        "JSON schema example (values are placeholders):\n"
        "{\n"
        '  "number_of_turns": 0,\n'
        '  "line_shape": "Straight",\n'
        '  "line_facing_direction": "Facing towards",\n'
        '  "number_of_people_in_line": 16,\n'
        '  "line_purpose": "airport",\n'
        '  "start_person_description": "man wearing hat and blue shirt",\n'
        '  "end_person_description": "person wearing black t-shirt",\n'
        '  "counter_person_description": "unknown",\n'
        '  "boundary_present": "true",\n'
        '  "boundary_types": "rope dividers",\n'
        '  "end_of_line_visible": "yes",\n'
        '  "end_of_line_location_if_visible": "far left",\n'
        '  "direction_to_turn_to_see_end_if_not_visible": "N/A",\n'
        '  "start_of_line_visible": "no",\n'
        '  "start_of_line_location_if_visible": "N/A",\n'
        '  "direction_to_turn_to_see_start_if_not_visible": "right",\n'
        '  "line_completeness": "partial"\n'
        "}"
    )

def generate_reranking(image_paths, res_file_name, temperature=0.2):
    """
    New behavior: given a list of single-image paths, ask the 17 GUI questions
    for each image and write a JSON answer per image to `res_file_name`.

    Args:
        image_paths (List[str]): list of absolute or relative image paths.
        res_file_name (str): output text file; we append one record per image:
            <basename>\n<json>\n\n
        temperature (float): sampling temperature.
    """
    prompt = _build_annotation_prompt()  # from gui spec :contentReference[oaicite:3]{index=3}

    with open(res_file_name, "a", encoding="utf-8") as f:
        for img_path in tqdm(image_paths):
            basename=os.path.basename(img_path)

            img_b64 = encode_image(img_path)
            print(img_path)
            response = openai.ChatCompletion.create(
                model="gpt-4o",
                messages=[
                    {
                        "role": "system",
                        "content": [
                            {
                                "type": "text",
                                "text": (
                                    "You analyze a SINGLE image and return ONLY valid JSON "
                                    "with the specified keys and enumerated options. Do not include any extra text."
                                ),
                            }
                        ],
                    },
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": prompt},
                            {
                                "type": "image_url",
                                "image_url": {"url": f"data:image/png;base64,{img_b64}"},
                            },
                        ],
                    },
                ],
                max_tokens=1000,
                temperature=temperature,
            )

            content = response.choices[0].message.content.strip()
            # Optional: validate JSON quickly; if it fails, still write raw for debugging.
            try:
                parsed = json.loads(content)
                content = json.dumps(parsed, ensure_ascii=False)
            except Exception:
                pass  # leave as-is for troubleshooting

            f.write(os.path.basename(img_path) + "\n" + content + "\n\n")
            sleep(0.5)

root = "/vast/ds5725/linefinder/LineFinder/Images"

# Subfolders
subfolders = ["QueuesInAirports","QueuesInSupermarkets", "QueuesInBanks", "ImagesOnline"]

# Collect all absolute file paths
all_files = []
for sub in subfolders:
    folder_path = os.path.join(root, sub)
    for dirpath, _, filenames in os.walk(folder_path):
        for f in filenames:
            all_files.append(os.path.abspath(os.path.join(dirpath, f)))
all_files.sort()
# print(all_files[0])
# test=[all_files[0],all_files[1]]
generate_reranking(all_files[63:], "gpt_line_test.jsonl")