Spaces:

GeorgeIbrahim
/

Data_Collection

Sleeping

App Files Files Community

GeorgeSherif commited on Nov 6, 2024

Commit

b98ca58

1 Parent(s): 77215fd

update

Browse files

Files changed (1) hide show

app.py +6 -15

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import threading
 import random
 from datasets import load_dataset, Dataset, Features, Value, concatenate_datasets
 from huggingface_hub import login
 # Authenticate with Hugging Face
 token = os.getenv("HUGGINGFACE_TOKEN")
 if token:
@@ -13,18 +12,16 @@ else:
     print("HUGGINGFACE_TOKEN environment variable not set.")
 dataset_name = "GeorgeIbrahim/EGYCOCO"  # Replace with your dataset name
-# Define the features schema once at the beginning
-features = Features({
-    'image_id': Value(dtype='string'),
-    'caption': Value(dtype='string'),
-})
 # Load or create the dataset with train and val splits
 try:
     dataset = load_dataset(dataset_name)
     print("Loaded existing dataset:", dataset)
 except Exception as e:
     # Create empty datasets for train and val splits if they don't exist
     train_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
     val_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
     dataset = {"train": train_dataset, "val": val_dataset}
@@ -32,12 +29,6 @@ except Exception as e:
     dataset["train"].push_to_hub(f"{dataset_name}", split="train")
     dataset["val"].push_to_hub(f"{dataset_name}", split="val")
-# Verify that both splits are in the dataset
-if "train" not in dataset:
-    dataset["train"] = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
-if "val" not in dataset:
-    dataset["val"] = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
 image_folder = "test"
 image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
 lock = threading.Lock()
@@ -76,7 +67,7 @@ def save_annotation(caption, session_data):
             caption = "skipped"
         # Add the new annotation as a new row to the appropriate split
-        new_data = Dataset.from_dict({"image_id": [image_id], "caption": [caption]}, features=features)
         global dataset
         dataset[split] = concatenate_datasets([dataset[split], new_data])
@@ -126,4 +117,4 @@ with gr.Blocks() as demo:
     # Load initial image
     demo.load(fn=initialize_interface, inputs=session_data, outputs=[image, caption])
-demo.launch(share=True)

 import random
 from datasets import load_dataset, Dataset, Features, Value, concatenate_datasets
 from huggingface_hub import login
 # Authenticate with Hugging Face
 token = os.getenv("HUGGINGFACE_TOKEN")
 if token:
     print("HUGGINGFACE_TOKEN environment variable not set.")
 dataset_name = "GeorgeIbrahim/EGYCOCO"  # Replace with your dataset name
 # Load or create the dataset with train and val splits
 try:
     dataset = load_dataset(dataset_name)
     print("Loaded existing dataset:", dataset)
 except Exception as e:
     # Create empty datasets for train and val splits if they don't exist
+    features = Features({
+        'image_id': Value(dtype='string'),
+        'caption': Value(dtype='string'),
+    })
     train_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
     val_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
     dataset = {"train": train_dataset, "val": val_dataset}
     dataset["train"].push_to_hub(f"{dataset_name}", split="train")
     dataset["val"].push_to_hub(f"{dataset_name}", split="val")
 image_folder = "test"
 image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
 lock = threading.Lock()
             caption = "skipped"
         # Add the new annotation as a new row to the appropriate split
+        new_data = Dataset.from_dict({"image_id": [image_id], "caption": [caption]})
         global dataset
         dataset[split] = concatenate_datasets([dataset[split], new_data])
     # Load initial image
     demo.load(fn=initialize_interface, inputs=session_data, outputs=[image, caption])
+demo.launch(share=True)