Spaces:

mdocekal
/

precision_recall_fscore_accuracy

Runtime error

App Files Files Community

Martin Dočekal commited on Mar 27, 2025

Commit

d0c77e3

1 Parent(s): 8d943be

initial commit

Browse files

Files changed (6) hide show

.gitattributes +0 -35
README.md +44 -3
app.py +6 -0
precision_recall_fscore_accuracy.py +131 -0
requirements.txt +3 -0
tests.py +97 -0

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,53 @@
 ---
 title: Precision Recall Fscore Accuracy
-emoji: 🚀
-colorFrom: red
 colorTo: green
 sdk: gradio
 sdk_version: 5.23.1
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Precision Recall Fscore Accuracy
+tags:
+- evaluate
+- metric
+colorFrom: gray
 colorTo: green
+description: >-
+  This metric calculates precision, recall, accuracy, and fscore for classification tasks using scikit-learn.
 sdk: gradio
 sdk_version: 5.23.1
 app_file: app.py
 pinned: false
+datasets: []
 ---
+# Metric Card for Precision Recall Accuracy Fscore
+This metric calculates precision, recall, accuracy, and fscore for classification tasks using scikit-learn.
+## How to Use
+    >>> predictions = [0, 1, 0, 1]
+    >>> references =  [1, 1, 0, 0]
+    >>> metric = evaluate.load("precision_recall_fscore_accuracy", average="binary")
+    >>> metric.compute(predictions=predictions, references=references)
+    {'precision': 0.5, 'recall': 0.5, 'fscore': 0.5, 'accuracy': 0.5}
+## Inputs
+- **predictions** (List of int|str): List of predicted labels.
+- **references** (List of int|str): List of true labels.
+## Outputs
+Dictionary containing the following metrics:
+- **precision** (float): Precision score.
+- **recall** (float): Recall score.
+- **fscore** (float): F1 score.
+- **accuracy** (float): Accuracy score.
+## Citation
+```bibtex
+@article{scikit-learn,
+  title={Scikit-learn: Machine Learning in {P}ython},
+  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
+         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
+         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
+         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
+  journal={Journal of Machine Learning Research},
+  volume={12},
+  pages={2825--2830},
+  year={2011}
+}
+```

app.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import evaluate
+from evaluate.utils import launch_gradio_widget
+module = evaluate.load("mdocekal/precision_recall_accuracy_fscore")
+launch_gradio_widget(module)

precision_recall_fscore_accuracy.py ADDED Viewed

	@@ -0,0 +1,131 @@

+from collections import Counter
+from typing import Optional, Union
+import evaluate
+import datasets
+from sklearn.metrics import precision_recall_fscore_support, accuracy_score
+_CITATION = """
+@article{scikit-learn,
+  title={Scikit-learn: Machine Learning in {P}ython},
+  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
+         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
+         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
+         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
+  journal={Journal of Machine Learning Research},
+  volume={12},
+  pages={2825--2830},
+  year={2011}
+}
+"""
+_DESCRIPTION = """\
+This metric calculates precision, recall, accuracy, and fscore for classification tasks using scikit-learn.
+"""
+_KWARGS_DESCRIPTION = """
+Args:
+    predictions: list or numpy array of predicted labels.
+    references: list or numpy array of true labels.
+    average (str, optional): Type of averaging performed on the data.
+        This parameter is required for multiclass/multilabel targets.
+        If ``None``, the metrics for each class are returned. Otherwise, this
+        determines the type of averaging performed on the data:
+        ``'binary'``:
+            Only report results for the class specified by ``pos_label``.
+            This is applicable only if targets (``y_{true,pred}``) are binary.
+        ``'micro'``:
+            Calculate metrics globally by counting the total true positives,
+            false negatives and false positives.
+        ``'macro'``:
+            Calculate metrics for each label, and find their unweighted
+            mean.  This does not take label imbalance into account.
+        ``'weighted'``:
+            Calculate metrics for each label, and find their average weighted
+            by support (the number of true instances for each label). This
+            alters 'macro' to account for label imbalance; it can result in an
+            F-score that is not between precision and recall.
+        ``'samples'``:
+            Calculate metrics for each instance, and find their average (only
+            meaningful for multilabel classification where this differs from
+            :func:`accuracy_score`).
+    zero_division (int or str, optional): default="warn"
+        Sets the value to return when there is a zero division:
+        - recall: when there are no positive labels
+        - precision: when there are no positive predictions
+        - f-score: both
+        Notes:
+        - If set to "warn", this acts like 0, but a warning is also raised.
+        - If set to `np.nan`, such values will be excluded from the average.
+        .. versionadded:: 1.3
+           `np.nan` option was added.
+Returns:
+    A dictionary with the following keys:
+        - precision: Precision score.
+        - recall: Recall score.
+        - f1: F1-score.
+        - accuracy: Accuracy score.
+Examples:
+    >>> predictions = [0, 1, 0, 1]
+    >>> references =  [1, 1, 0, 0]
+    >>> metric = evaluate.load("precision_recall_fscore_accuracy", average="binary")
+    >>> metric.compute(predictions=predictions, references=references)
+    {'precision': 0.5, 'recall': 0.5, 'fscore': 0.5, 'accuracy': 0.5}
+"""
+@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class PrecisionRecallFscoreAccuracy(evaluate.Metric):
+    """
+    Implementation of example based evaluation metrics for multi-label classification presented in Zhang and Zhou (2014).
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.beta = kwargs.get("beta", 1.0)
+        self.average = kwargs.get("average", None)
+        self.zero_division = kwargs.get("zero_division", "warn")
+    def _info(self):
+        return evaluate.MetricInfo(
+            # This is the description that will appear on the modules page.
+            module_type="metric",
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            # This defines the format of each prediction and reference
+            features=[
+                datasets.Features({
+                    'predictions': datasets.Value('int64'),
+                    'references': datasets.Value('int64'),
+                }),
+                datasets.Features({
+                    'predictions': datasets.Value('string'),
+                    'references': datasets.Value('string'),
+                }),
+            ]
+        )
+    def _compute(self, predictions: list[Union[int,str]], references: list[Union[int,str]]):
+        precision, recall, f1, _ = precision_recall_fscore_support(
+            references, predictions, average=self.average, zero_division=self.zero_division
+        )
+        accuracy = accuracy_score(references, predictions)
+        return {
+            "precision": precision,
+            "recall": recall,
+            "fscore": f1,
+            "accuracy": accuracy,
+        }

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+evaluate
+datasets
+scikit-learn

tests.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from unittest import TestCase
+from precision_recall_fscore_accuracy import PrecisionRecallFscoreAccuracy
+class PrecisionRecallFscoreAccuracyTestBinary(TestCase):
+    """
+    All of these tests are also used for multiset configuration. So please mind this and write the test in a way that
+    it is valid for both configurations (do not use same label multiple times).
+    """
+    def setUp(self):
+        self.metric = PrecisionRecallFscoreAccuracy(average="binary")
+    def test_eok(self):
+        self.assertDictEqual(
+            {
+                "precision": 1.0,
+                "recall": 1.0,
+                "fscore": 1.0,
+                "accuracy": 1.0,
+            },
+            self.metric.compute(
+                predictions=[0, 1, 0],
+                references=[0, 1, 0]
+            )
+        )
+    def test_eok_string(self):
+        self.assertDictEqual(
+            {
+                "precision": 1.0,
+                "recall": 1.0,
+                "accuracy": 1.0,
+                "fscore": 1.0
+            },
+            self.metric.compute(
+                predictions=["0", "1", "0"],
+                references=["0", "1", "0"]
+            )
+        )
+    def test_completely_different(self):
+        self.assertDictEqual(
+            {
+                "precision": 0.0,
+                "recall": 0.0,
+                "accuracy": 0.0,
+                "fscore": 0.0
+            },
+            self.metric.compute(
+                predictions=[0, 1, 0],
+                references=[1, 0, 1]
+            )
+        )
+    def test_max_precision(self):
+        self.assertDictEqual(
+            {
+                "precision": 1.0,
+                "recall": 0.5,
+                "accuracy": 0.5,
+                "fscore": 2 / 3
+            },
+            self.metric.compute(
+                predictions=[0, 1],
+                references=[1, 1]
+            )
+        )
+    def test_max_recall(self):
+        self.assertDictEqual(
+            {
+                "precision": 0.5,
+                "recall": 1.0,
+                "accuracy": 0.5,
+                "fscore": 2 / 3
+            },
+            self.metric.compute(
+                predictions=[1, 1],
+                references=[1, 0]
+            )
+        )
+    def test_partial_match(self):
+        self.assertDictEqual(
+            {
+                "precision": 0.5,
+                "recall": 0.5,
+                "accuracy": 0.5,
+                "fscore": 0.5
+            },
+            self.metric.compute(
+                predictions=[0, 1, 0, 1],
+                references=[1, 1, 0, 0]
+            )
+        )