Spaces:
Sleeping
Sleeping
add check to see if correct class order used for outpu
Browse files- dataPrep/data_preparation.py +13 -10
- ui/app.py +5 -1
dataPrep/data_preparation.py
CHANGED
|
@@ -74,15 +74,15 @@ task.connect({
|
|
| 74 |
})
|
| 75 |
|
| 76 |
# ----- Load a subset from a given dataset & track with ClearML -----
|
| 77 |
-
data_plants,
|
| 78 |
-
DATASET_LINK, DATASET_SUBSET_RATIO,
|
| 79 |
)
|
| 80 |
|
| 81 |
|
| 82 |
# ---- Exploratory data analysis (EDA) ----
|
| 83 |
|
| 84 |
# Reformatting the label feature to understand bias
|
| 85 |
-
labels_list =
|
| 86 |
df_labels = pd.Series(labels_list)
|
| 87 |
label_count = df_labels.value_counts(sort=False)
|
| 88 |
|
|
@@ -111,7 +111,6 @@ clearml_logger.report_scalar(
|
|
| 111 |
value=(max_count / min_count),
|
| 112 |
iteration=1
|
| 113 |
)
|
| 114 |
-
|
| 115 |
print("--- Class imbalance analysis --- ")
|
| 116 |
print(f"Max labels in a class: {max_count}")
|
| 117 |
print(f"Min labels in a class: {min_count}")
|
|
@@ -123,17 +122,16 @@ class_names = features['label'].names
|
|
| 123 |
formatted_class_names = [" ".join(name.replace('_', ' ').split()) for name in class_names]
|
| 124 |
label_count.index = formatted_class_names
|
| 125 |
|
| 126 |
-
# Plotting class distribution
|
| 127 |
plt.figure(figsize=(10,6))
|
| 128 |
label_count.plot(kind='bar', color='skyblue')
|
| 129 |
-
plt.title("Class Distribution in
|
| 130 |
plt.xlabel("Class")
|
| 131 |
plt.ylabel("Count")
|
| 132 |
plt.tight_layout()
|
| 133 |
|
| 134 |
clearml_logger.report_matplotlib_figure(
|
| 135 |
title="EDA Class Distribution",
|
| 136 |
-
series="Subset
|
| 137 |
figure=plt.gcf(),
|
| 138 |
iteration=1
|
| 139 |
)
|
|
@@ -151,7 +149,7 @@ if __name__ == "__main__":
|
|
| 151 |
}
|
| 152 |
|
| 153 |
prototype_loaders = make_dataset_loaders(
|
| 154 |
-
|
| 155 |
)
|
| 156 |
|
| 157 |
print("\n--- Handoff Test Successful ---")
|
|
@@ -175,9 +173,14 @@ if __name__ == "__main__":
|
|
| 175 |
print(f"Validation loader batches: {len(final_loaders['val'])}")
|
| 176 |
print(f"Test loader batches: {len(final_loaders['test'])}")
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
|
|
|
| 179 |
# Close the ClearML task
|
| 180 |
-
task.mark_completed()
|
| 181 |
task.close()
|
| 182 |
-
|
| 183 |
print("\n--- Script Finished ---")
|
|
|
|
| 74 |
})
|
| 75 |
|
| 76 |
# ----- Load a subset from a given dataset & track with ClearML -----
|
| 77 |
+
data_plants, prototyping_dataset, features, clearml_dataset = make_subset(
|
| 78 |
+
DATASET_LINK, DATASET_SUBSET_RATIO, clearml_logger
|
| 79 |
)
|
| 80 |
|
| 81 |
|
| 82 |
# ---- Exploratory data analysis (EDA) ----
|
| 83 |
|
| 84 |
# Reformatting the label feature to understand bias
|
| 85 |
+
labels_list = prototyping_dataset['label']
|
| 86 |
df_labels = pd.Series(labels_list)
|
| 87 |
label_count = df_labels.value_counts(sort=False)
|
| 88 |
|
|
|
|
| 111 |
value=(max_count / min_count),
|
| 112 |
iteration=1
|
| 113 |
)
|
|
|
|
| 114 |
print("--- Class imbalance analysis --- ")
|
| 115 |
print(f"Max labels in a class: {max_count}")
|
| 116 |
print(f"Min labels in a class: {min_count}")
|
|
|
|
| 122 |
formatted_class_names = [" ".join(name.replace('_', ' ').split()) for name in class_names]
|
| 123 |
label_count.index = formatted_class_names
|
| 124 |
|
|
|
|
| 125 |
plt.figure(figsize=(10,6))
|
| 126 |
label_count.plot(kind='bar', color='skyblue')
|
| 127 |
+
plt.title("Class Distribution in Prototype Dataset")
|
| 128 |
plt.xlabel("Class")
|
| 129 |
plt.ylabel("Count")
|
| 130 |
plt.tight_layout()
|
| 131 |
|
| 132 |
clearml_logger.report_matplotlib_figure(
|
| 133 |
title="EDA Class Distribution",
|
| 134 |
+
series="Prototype Subset",
|
| 135 |
figure=plt.gcf(),
|
| 136 |
iteration=1
|
| 137 |
)
|
|
|
|
| 149 |
}
|
| 150 |
|
| 151 |
prototype_loaders = make_dataset_loaders(
|
| 152 |
+
prototyping_dataset, SEED, BATCH_SIZE, TEST_SIZE, aug_config
|
| 153 |
)
|
| 154 |
|
| 155 |
print("\n--- Handoff Test Successful ---")
|
|
|
|
| 173 |
print(f"Validation loader batches: {len(final_loaders['val'])}")
|
| 174 |
print(f"Test loader batches: {len(final_loaders['test'])}")
|
| 175 |
|
| 176 |
+
# Record dataset info in ClearML
|
| 177 |
+
task.connect_configuration(
|
| 178 |
+
{"dataset_id": clearml_dataset.id},
|
| 179 |
+
name="Dataset Metadata"
|
| 180 |
+
)
|
| 181 |
+
task.mark_completed()
|
| 182 |
|
| 183 |
+
|
| 184 |
# Close the ClearML task
|
|
|
|
| 185 |
task.close()
|
|
|
|
| 186 |
print("\n--- Script Finished ---")
|
ui/app.py
CHANGED
|
@@ -48,7 +48,7 @@ class PlantDiseaseApp:
|
|
| 48 |
try:
|
| 49 |
# Load model if needed
|
| 50 |
if modelName != self.current_modelName:
|
| 51 |
-
self.model = self.model_loader.loadModel(modelName)
|
| 52 |
self.current_modelName = modelName
|
| 53 |
|
| 54 |
# Preprocess image
|
|
@@ -61,6 +61,10 @@ class PlantDiseaseApp:
|
|
| 61 |
# Convert logits to probabilities
|
| 62 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy()[0]
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
# Map to class names
|
| 65 |
predictions = {name: float(prob) for name, prob in zip(self.class_names, probs)}
|
| 66 |
|
|
|
|
| 48 |
try:
|
| 49 |
# Load model if needed
|
| 50 |
if modelName != self.current_modelName:
|
| 51 |
+
self.model, self = self.model_loader.loadModel(modelName)
|
| 52 |
self.current_modelName = modelName
|
| 53 |
|
| 54 |
# Preprocess image
|
|
|
|
| 61 |
# Convert logits to probabilities
|
| 62 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy()[0]
|
| 63 |
|
| 64 |
+
|
| 65 |
+
predID = probs.argmanx().item()
|
| 66 |
+
print("predicted index: " + predID)
|
| 67 |
+
|
| 68 |
# Map to class names
|
| 69 |
predictions = {name: float(prob) for name, prob in zip(self.class_names, probs)}
|
| 70 |
|