Spaces:
Sleeping
Sleeping
wandb fix
Browse files- user-friendly-metrics.py +13 -20
user-friendly-metrics.py
CHANGED
|
@@ -82,16 +82,14 @@ class UserFriendlyMetrics(evaluate.Metric):
|
|
| 82 |
citation=_CITATION,
|
| 83 |
inputs_description=_KWARGS_DESCRIPTION,
|
| 84 |
# This defines the format of each prediction and reference
|
| 85 |
-
features=datasets.Features(
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
}
|
| 94 |
-
),
|
| 95 |
# Additional links to the codebase or references
|
| 96 |
codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
|
| 97 |
reference_urls=["http://path.to.reference.url/new_module"],
|
|
@@ -155,7 +153,7 @@ class UserFriendlyMetrics(evaluate.Metric):
|
|
| 155 |
|
| 156 |
predictions, references = payload_to_uf_metrics(payload, model_name=model_name, filter_dict=self.filter_dict)
|
| 157 |
|
| 158 |
-
results[model_name]["per_sequence"][seq_name] = self._compute(predictions, references)
|
| 159 |
|
| 160 |
# overall
|
| 161 |
model_payload = Payload(
|
|
@@ -166,7 +164,7 @@ class UserFriendlyMetrics(evaluate.Metric):
|
|
| 166 |
)
|
| 167 |
predictions, references = payload_to_uf_metrics(payload, model_name=model_name, filter_dict=self.filter_dict)
|
| 168 |
|
| 169 |
-
results[model_name]["overall"] = self._compute(predictions, references)
|
| 170 |
|
| 171 |
return results
|
| 172 |
|
|
@@ -220,19 +218,14 @@ class UserFriendlyMetrics(evaluate.Metric):
|
|
| 220 |
|
| 221 |
categories = {
|
| 222 |
"user_friendly_metrics": {
|
| 223 |
-
"
|
| 224 |
-
"mostly_tracked_score_0.5",
|
| 225 |
-
"mostly_tracked_score_0.8",
|
| 226 |
},
|
| 227 |
"evaluation_metrics_dev": {
|
| 228 |
"recall",
|
| 229 |
},
|
| 230 |
"user_friendly_metrics_dev": {
|
| 231 |
-
"
|
| 232 |
-
|
| 233 |
-
"mostly_tracked_count_0.8",
|
| 234 |
-
"unique_obj_count",
|
| 235 |
-
},
|
| 236 |
"predictions_summary": {
|
| 237 |
"tp",
|
| 238 |
"fn",
|
|
|
|
| 82 |
citation=_CITATION,
|
| 83 |
inputs_description=_KWARGS_DESCRIPTION,
|
| 84 |
# This defines the format of each prediction and reference
|
| 85 |
+
features=datasets.Features({
|
| 86 |
+
"predictions": datasets.Sequence(
|
| 87 |
+
datasets.Sequence(datasets.Value("float"))
|
| 88 |
+
),
|
| 89 |
+
"references": datasets.Features({ "all":
|
| 90 |
+
datasets.Sequence(datasets.Sequence(datasets.Value("float")))}
|
| 91 |
+
)
|
| 92 |
+
}), #couldn't get this to work
|
|
|
|
|
|
|
| 93 |
# Additional links to the codebase or references
|
| 94 |
codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
|
| 95 |
reference_urls=["http://path.to.reference.url/new_module"],
|
|
|
|
| 153 |
|
| 154 |
predictions, references = payload_to_uf_metrics(payload, model_name=model_name, filter_dict=self.filter_dict)
|
| 155 |
|
| 156 |
+
results[model_name]["per_sequence"][seq_name] = self._compute(predictions=predictions, references=references)
|
| 157 |
|
| 158 |
# overall
|
| 159 |
model_payload = Payload(
|
|
|
|
| 164 |
)
|
| 165 |
predictions, references = payload_to_uf_metrics(payload, model_name=model_name, filter_dict=self.filter_dict)
|
| 166 |
|
| 167 |
+
results[model_name]["overall"] = self._compute(predictions=predictions, references=references)
|
| 168 |
|
| 169 |
return results
|
| 170 |
|
|
|
|
| 218 |
|
| 219 |
categories = {
|
| 220 |
"user_friendly_metrics": {
|
| 221 |
+
f"mostly_tracked_score_{str(threshold).replace('.', '_')}" for threshold in self.recognition_thresholds
|
|
|
|
|
|
|
| 222 |
},
|
| 223 |
"evaluation_metrics_dev": {
|
| 224 |
"recall",
|
| 225 |
},
|
| 226 |
"user_friendly_metrics_dev": {
|
| 227 |
+
f"mostly_tracked_count_{str(threshold).replace('.', '_')}" for threshold in self.recognition_thresholds
|
| 228 |
+
}.union("unique_object_count"),
|
|
|
|
|
|
|
|
|
|
| 229 |
"predictions_summary": {
|
| 230 |
"tp",
|
| 231 |
"fn",
|