Spaces:
Sleeping
Sleeping
Commit
·
7de933f
1
Parent(s):
8be8093
updaes
Browse files
app.py
CHANGED
|
@@ -15,6 +15,9 @@ else:
|
|
| 15 |
print("HUGGINGFACE_TOKEN environment variable not set.")
|
| 16 |
dataset_name = "GeorgeIbrahim/EGYCOCO" # Replace with your dataset name
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
# Load or create the dataset
|
| 19 |
try:
|
| 20 |
dataset = load_dataset(dataset_name, split="train")
|
|
@@ -25,6 +28,7 @@ try:
|
|
| 25 |
for example in dataset:
|
| 26 |
image_id = example["image_id"]
|
| 27 |
count = example["annotation_count"]
|
|
|
|
| 28 |
if image_id not in annotation_counts or count > annotation_counts[image_id]:
|
| 29 |
annotation_counts[image_id] = count
|
| 30 |
|
|
@@ -35,9 +39,10 @@ except Exception as e:
|
|
| 35 |
features = Features({
|
| 36 |
'image_id': Value(dtype='string'),
|
| 37 |
'caption': Value(dtype='string'),
|
| 38 |
-
'annotation_count': Value(dtype='int32')
|
|
|
|
| 39 |
})
|
| 40 |
-
dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': []}, features=features)
|
| 41 |
annotation_counts = {}
|
| 42 |
dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
|
| 43 |
|
|
@@ -45,9 +50,6 @@ image_folder = "images"
|
|
| 45 |
image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
|
| 46 |
lock = threading.Lock()
|
| 47 |
|
| 48 |
-
with open('nearest_neighbors_with_captions.json', 'r') as f:
|
| 49 |
-
results = json.load(f)
|
| 50 |
-
|
| 51 |
|
| 52 |
def get_caption_for_image_id(image_path):
|
| 53 |
"""
|
|
@@ -107,7 +109,7 @@ def save_annotation(caption, session_data):
|
|
| 107 |
|
| 108 |
with lock:
|
| 109 |
image_id = session_data["current_image"]
|
| 110 |
-
|
| 111 |
# Save caption or "skipped" based on user input
|
| 112 |
if caption.strip().lower() == "skip":
|
| 113 |
caption = "skipped"
|
|
@@ -119,11 +121,13 @@ def save_annotation(caption, session_data):
|
|
| 119 |
new_data = Dataset.from_dict({
|
| 120 |
"image_id": [image_id],
|
| 121 |
"caption": [caption],
|
| 122 |
-
"annotation_count": [annotation_count + 1]
|
|
|
|
| 123 |
}, features=Features({
|
| 124 |
'image_id': Value(dtype='string'),
|
| 125 |
'caption': Value(dtype='string'),
|
| 126 |
-
'annotation_count': Value(dtype='int32')
|
|
|
|
| 127 |
}))
|
| 128 |
|
| 129 |
# Update the annotation count in the dictionary
|
|
|
|
| 15 |
print("HUGGINGFACE_TOKEN environment variable not set.")
|
| 16 |
dataset_name = "GeorgeIbrahim/EGYCOCO" # Replace with your dataset name
|
| 17 |
|
| 18 |
+
with open('nearest_neighbors_with_captions.json', 'r') as f:
|
| 19 |
+
results = json.load(f)
|
| 20 |
+
|
| 21 |
# Load or create the dataset
|
| 22 |
try:
|
| 23 |
dataset = load_dataset(dataset_name, split="train")
|
|
|
|
| 28 |
for example in dataset:
|
| 29 |
image_id = example["image_id"]
|
| 30 |
count = example["annotation_count"]
|
| 31 |
+
|
| 32 |
if image_id not in annotation_counts or count > annotation_counts[image_id]:
|
| 33 |
annotation_counts[image_id] = count
|
| 34 |
|
|
|
|
| 39 |
features = Features({
|
| 40 |
'image_id': Value(dtype='string'),
|
| 41 |
'caption': Value(dtype='string'),
|
| 42 |
+
'annotation_count': Value(dtype='int32'),
|
| 43 |
+
'split': Value(dtype='string')
|
| 44 |
})
|
| 45 |
+
dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': [], 'split': []}, features=features)
|
| 46 |
annotation_counts = {}
|
| 47 |
dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
|
| 48 |
|
|
|
|
| 50 |
image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
|
| 51 |
lock = threading.Lock()
|
| 52 |
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
def get_caption_for_image_id(image_path):
|
| 55 |
"""
|
|
|
|
| 109 |
|
| 110 |
with lock:
|
| 111 |
image_id = session_data["current_image"]
|
| 112 |
+
split = "dev" if image_id in results else "train"
|
| 113 |
# Save caption or "skipped" based on user input
|
| 114 |
if caption.strip().lower() == "skip":
|
| 115 |
caption = "skipped"
|
|
|
|
| 121 |
new_data = Dataset.from_dict({
|
| 122 |
"image_id": [image_id],
|
| 123 |
"caption": [caption],
|
| 124 |
+
"annotation_count": [annotation_count + 1],
|
| 125 |
+
"split": [split]
|
| 126 |
}, features=Features({
|
| 127 |
'image_id': Value(dtype='string'),
|
| 128 |
'caption': Value(dtype='string'),
|
| 129 |
+
'annotation_count': Value(dtype='int32'),
|
| 130 |
+
'split': Value(dtype='string')
|
| 131 |
}))
|
| 132 |
|
| 133 |
# Update the annotation count in the dictionary
|