Spaces:
Sleeping
Sleeping
Commit
·
7971cd8
1
Parent(s):
c5ec5d8
updates
Browse files
app.py
CHANGED
|
@@ -22,7 +22,22 @@ with open('nearest_neighbors_with_captions.json', 'r') as f:
|
|
| 22 |
try:
|
| 23 |
dataset = load_dataset(dataset_name, split="train")
|
| 24 |
print("Loaded existing dataset:", dataset)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# Create a dictionary to keep track of the highest annotation count for each image
|
| 27 |
annotation_counts = {}
|
| 28 |
for example in dataset:
|
|
@@ -40,9 +55,9 @@ except Exception as e:
|
|
| 40 |
'image_id': Value(dtype='string'),
|
| 41 |
'caption': Value(dtype='string'),
|
| 42 |
'annotation_count': Value(dtype='int32'),
|
| 43 |
-
'
|
| 44 |
})
|
| 45 |
-
dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': [], '
|
| 46 |
annotation_counts = {}
|
| 47 |
dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
|
| 48 |
|
|
@@ -122,12 +137,12 @@ def save_annotation(caption, session_data):
|
|
| 122 |
"image_id": [image_id],
|
| 123 |
"caption": [caption],
|
| 124 |
"annotation_count": [annotation_count + 1],
|
| 125 |
-
"
|
| 126 |
}, features=Features({
|
| 127 |
'image_id': Value(dtype='string'),
|
| 128 |
'caption': Value(dtype='string'),
|
| 129 |
'annotation_count': Value(dtype='int32'),
|
| 130 |
-
'
|
| 131 |
}))
|
| 132 |
|
| 133 |
# Update the annotation count in the dictionary
|
|
|
|
| 22 |
try:
|
| 23 |
dataset = load_dataset(dataset_name, split="train")
|
| 24 |
print("Loaded existing dataset:", dataset)
|
| 25 |
+
print("Dataset features:", dataset.features) # Check if 'split' is part of features
|
| 26 |
+
|
| 27 |
+
# Check if the 'split' column exists; if not, add it
|
| 28 |
+
if 'split' not in dataset.column_names:
|
| 29 |
+
# Define the 'split' values based on `image_id`
|
| 30 |
+
split_values = [
|
| 31 |
+
"dev" if example["image_id"] in results else "train"
|
| 32 |
+
for example in dataset
|
| 33 |
+
]
|
| 34 |
|
| 35 |
+
# Add 'split' column to the dataset
|
| 36 |
+
dataset = dataset.add_column("split", split_values)
|
| 37 |
+
print("Added 'split' column to dataset.")
|
| 38 |
+
else:
|
| 39 |
+
print("'split' column already exists.")
|
| 40 |
+
|
| 41 |
# Create a dictionary to keep track of the highest annotation count for each image
|
| 42 |
annotation_counts = {}
|
| 43 |
for example in dataset:
|
|
|
|
| 55 |
'image_id': Value(dtype='string'),
|
| 56 |
'caption': Value(dtype='string'),
|
| 57 |
'annotation_count': Value(dtype='int32'),
|
| 58 |
+
'split': Value(dtype='string')
|
| 59 |
})
|
| 60 |
+
dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': [], 'split': []}, features=features)
|
| 61 |
annotation_counts = {}
|
| 62 |
dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
|
| 63 |
|
|
|
|
| 137 |
"image_id": [image_id],
|
| 138 |
"caption": [caption],
|
| 139 |
"annotation_count": [annotation_count + 1],
|
| 140 |
+
"split": [split]
|
| 141 |
}, features=Features({
|
| 142 |
'image_id': Value(dtype='string'),
|
| 143 |
'caption': Value(dtype='string'),
|
| 144 |
'annotation_count': Value(dtype='int32'),
|
| 145 |
+
'split': Value(dtype='string')
|
| 146 |
}))
|
| 147 |
|
| 148 |
# Update the annotation count in the dictionary
|