Spaces:
Sleeping
Sleeping
Commit
·
df87278
1
Parent(s):
0a274b3
updates
Browse files
app.py
CHANGED
|
@@ -25,22 +25,20 @@ try:
|
|
| 25 |
dataset = load_dataset(dataset_name, split="train")
|
| 26 |
print("Loaded existing dataset:", dataset)
|
| 27 |
print("Dataset features:", dataset.features) # Check if 'split' is part of features
|
| 28 |
-
|
| 29 |
-
|
| 30 |
# Check if the 'split' column exists; if not, add it
|
| 31 |
if 'split' not in dataset.column_names:
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
if image_id in results
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
dataset = dataset.add_column("split", split_values)
|
| 42 |
print("Added 'split' column to dataset.")
|
| 43 |
-
|
| 44 |
else:
|
| 45 |
print("'split' column already exists.")
|
| 46 |
|
|
@@ -98,35 +96,46 @@ def get_caption_for_image_id(image_path):
|
|
| 98 |
print("Caption not found for image_id:", image_id) # Debugging line
|
| 99 |
return None
|
| 100 |
|
|
|
|
| 101 |
|
| 102 |
# Function to get a random image that hasn’t been fully annotated
|
| 103 |
def get_next_image(session_data):
|
| 104 |
with lock:
|
| 105 |
-
#
|
| 106 |
available_images = [
|
| 107 |
img for img in image_files
|
| 108 |
if img not in annotation_counts or
|
| 109 |
-
(
|
| 110 |
-
(
|
| 111 |
]
|
| 112 |
|
| 113 |
-
print("Available images
|
| 114 |
-
|
| 115 |
-
# Shuffle available images to randomize the order
|
| 116 |
-
random.shuffle(available_images)
|
| 117 |
-
print("Available images after shuffle:", available_images) # Debugging line
|
| 118 |
|
| 119 |
-
#
|
| 120 |
if session_data["current_image"] is None and available_images:
|
| 121 |
-
#
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
|
| 126 |
|
| 127 |
-
#
|
| 128 |
def save_annotation(caption, session_data):
|
| 129 |
-
global dataset, annotation_counts #
|
| 130 |
|
| 131 |
if session_data["current_image"] is None:
|
| 132 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|
|
@@ -168,8 +177,11 @@ def save_annotation(caption, session_data):
|
|
| 168 |
dataset.push_to_hub(dataset_name)
|
| 169 |
print("Pushed updated dataset")
|
| 170 |
|
| 171 |
-
#
|
| 172 |
-
if
|
|
|
|
|
|
|
|
|
|
| 173 |
session_data["current_image"] = None
|
| 174 |
|
| 175 |
# Fetch the next image
|
|
@@ -181,7 +193,7 @@ def save_annotation(caption, session_data):
|
|
| 181 |
else:
|
| 182 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|
| 183 |
|
| 184 |
-
|
| 185 |
def initialize_interface(session_data):
|
| 186 |
next_image = get_next_image(session_data)
|
| 187 |
if next_image:
|
|
|
|
| 25 |
dataset = load_dataset(dataset_name, split="train")
|
| 26 |
print("Loaded existing dataset:", dataset)
|
| 27 |
print("Dataset features:", dataset.features) # Check if 'split' is part of features
|
| 28 |
+
|
|
|
|
| 29 |
# Check if the 'split' column exists; if not, add it
|
| 30 |
if 'split' not in dataset.column_names:
|
| 31 |
+
# Define the 'split' values based on `image_id`
|
| 32 |
+
print(results)
|
| 33 |
+
print(example["image_id"] in results)
|
| 34 |
+
split_values = [
|
| 35 |
+
"dev" if example["image_id"] in results else "train"
|
| 36 |
+
for example in dataset
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
# Add 'split' column to the dataset
|
| 40 |
dataset = dataset.add_column("split", split_values)
|
| 41 |
print("Added 'split' column to dataset.")
|
|
|
|
| 42 |
else:
|
| 43 |
print("'split' column already exists.")
|
| 44 |
|
|
|
|
| 96 |
print("Caption not found for image_id:", image_id) # Debugging line
|
| 97 |
return None
|
| 98 |
|
| 99 |
+
shown_counts = {}
|
| 100 |
|
| 101 |
# Function to get a random image that hasn’t been fully annotated
|
| 102 |
def get_next_image(session_data):
|
| 103 |
with lock:
|
| 104 |
+
# Filter available images based on annotation counts and split
|
| 105 |
available_images = [
|
| 106 |
img for img in image_files
|
| 107 |
if img not in annotation_counts or
|
| 108 |
+
("val" in img and annotation_counts.get(img, 0) < 2) or
|
| 109 |
+
("val" not in img and annotation_counts.get(img, 0) == 0)
|
| 110 |
]
|
| 111 |
|
| 112 |
+
print("Available images:", available_images) # Debugging line
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
+
# Select an image to show based on split type
|
| 115 |
if session_data["current_image"] is None and available_images:
|
| 116 |
+
random.shuffle(available_images) # Shuffle for randomness
|
| 117 |
+
for img in available_images:
|
| 118 |
+
image_id = re.search(r'_(\d+)\.', img).group(1).lstrip('0') # Extract image ID
|
| 119 |
+
split = "dev" if image_id in results else "train"
|
| 120 |
+
|
| 121 |
+
# Show 'dev' images twice
|
| 122 |
+
if split == "dev":
|
| 123 |
+
if shown_counts.get(img, 0) < 2:
|
| 124 |
+
shown_counts[img] = shown_counts.get(img, 0) + 1
|
| 125 |
+
session_data["current_image"] = img
|
| 126 |
+
print("Selected 'dev' image_id:", session_data["current_image"])
|
| 127 |
+
break
|
| 128 |
+
# Show 'train' images once
|
| 129 |
+
else:
|
| 130 |
+
session_data["current_image"] = img
|
| 131 |
+
print("Selected 'train' image_id:", session_data["current_image"])
|
| 132 |
+
break
|
| 133 |
|
| 134 |
return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
|
| 135 |
|
| 136 |
+
# Update save_annotation function to reset the shown count if 'dev' image is shown twice
|
| 137 |
def save_annotation(caption, session_data):
|
| 138 |
+
global dataset, annotation_counts, shown_counts # Include shown_counts
|
| 139 |
|
| 140 |
if session_data["current_image"] is None:
|
| 141 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|
|
|
|
| 177 |
dataset.push_to_hub(dataset_name)
|
| 178 |
print("Pushed updated dataset")
|
| 179 |
|
| 180 |
+
# Reset shown count if the 'dev' image has been shown twice
|
| 181 |
+
if split == "dev" and shown_counts.get(image_id, 0) >= 2:
|
| 182 |
+
shown_counts[image_id] = 0 # Reset count for 'dev' images shown twice
|
| 183 |
+
session_data["current_image"] = None
|
| 184 |
+
elif split == "train":
|
| 185 |
session_data["current_image"] = None
|
| 186 |
|
| 187 |
# Fetch the next image
|
|
|
|
| 193 |
else:
|
| 194 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|
| 195 |
|
| 196 |
+
|
| 197 |
def initialize_interface(session_data):
|
| 198 |
next_image = get_next_image(session_data)
|
| 199 |
if next_image:
|