GeorgeSherif commited on
Commit
b98ca58
·
1 Parent(s): 77215fd
Files changed (1) hide show
  1. app.py +6 -15
app.py CHANGED
@@ -4,7 +4,6 @@ import threading
4
  import random
5
  from datasets import load_dataset, Dataset, Features, Value, concatenate_datasets
6
  from huggingface_hub import login
7
-
8
  # Authenticate with Hugging Face
9
  token = os.getenv("HUGGINGFACE_TOKEN")
10
  if token:
@@ -13,18 +12,16 @@ else:
13
  print("HUGGINGFACE_TOKEN environment variable not set.")
14
  dataset_name = "GeorgeIbrahim/EGYCOCO" # Replace with your dataset name
15
 
16
- # Define the features schema once at the beginning
17
- features = Features({
18
- 'image_id': Value(dtype='string'),
19
- 'caption': Value(dtype='string'),
20
- })
21
-
22
  # Load or create the dataset with train and val splits
23
  try:
24
  dataset = load_dataset(dataset_name)
25
  print("Loaded existing dataset:", dataset)
26
  except Exception as e:
27
  # Create empty datasets for train and val splits if they don't exist
 
 
 
 
28
  train_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
29
  val_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
30
  dataset = {"train": train_dataset, "val": val_dataset}
@@ -32,12 +29,6 @@ except Exception as e:
32
  dataset["train"].push_to_hub(f"{dataset_name}", split="train")
33
  dataset["val"].push_to_hub(f"{dataset_name}", split="val")
34
 
35
- # Verify that both splits are in the dataset
36
- if "train" not in dataset:
37
- dataset["train"] = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
38
- if "val" not in dataset:
39
- dataset["val"] = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
40
-
41
  image_folder = "test"
42
  image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
43
  lock = threading.Lock()
@@ -76,7 +67,7 @@ def save_annotation(caption, session_data):
76
  caption = "skipped"
77
 
78
  # Add the new annotation as a new row to the appropriate split
79
- new_data = Dataset.from_dict({"image_id": [image_id], "caption": [caption]}, features=features)
80
  global dataset
81
  dataset[split] = concatenate_datasets([dataset[split], new_data])
82
 
@@ -126,4 +117,4 @@ with gr.Blocks() as demo:
126
  # Load initial image
127
  demo.load(fn=initialize_interface, inputs=session_data, outputs=[image, caption])
128
 
129
- demo.launch(share=True)
 
4
  import random
5
  from datasets import load_dataset, Dataset, Features, Value, concatenate_datasets
6
  from huggingface_hub import login
 
7
  # Authenticate with Hugging Face
8
  token = os.getenv("HUGGINGFACE_TOKEN")
9
  if token:
 
12
  print("HUGGINGFACE_TOKEN environment variable not set.")
13
  dataset_name = "GeorgeIbrahim/EGYCOCO" # Replace with your dataset name
14
 
 
 
 
 
 
 
15
  # Load or create the dataset with train and val splits
16
  try:
17
  dataset = load_dataset(dataset_name)
18
  print("Loaded existing dataset:", dataset)
19
  except Exception as e:
20
  # Create empty datasets for train and val splits if they don't exist
21
+ features = Features({
22
+ 'image_id': Value(dtype='string'),
23
+ 'caption': Value(dtype='string'),
24
+ })
25
  train_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
26
  val_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
27
  dataset = {"train": train_dataset, "val": val_dataset}
 
29
  dataset["train"].push_to_hub(f"{dataset_name}", split="train")
30
  dataset["val"].push_to_hub(f"{dataset_name}", split="val")
31
 
 
 
 
 
 
 
32
  image_folder = "test"
33
  image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
34
  lock = threading.Lock()
 
67
  caption = "skipped"
68
 
69
  # Add the new annotation as a new row to the appropriate split
70
+ new_data = Dataset.from_dict({"image_id": [image_id], "caption": [caption]})
71
  global dataset
72
  dataset[split] = concatenate_datasets([dataset[split], new_data])
73
 
 
117
  # Load initial image
118
  demo.load(fn=initialize_interface, inputs=session_data, outputs=[image, caption])
119
 
120
+ demo.launch(share=True)