Martin Dočekal commited on
Commit
d0c77e3
·
1 Parent(s): 8d943be

initial commit

Browse files
Files changed (6) hide show
  1. .gitattributes +0 -35
  2. README.md +44 -3
  3. app.py +6 -0
  4. precision_recall_fscore_accuracy.py +131 -0
  5. requirements.txt +3 -0
  6. tests.py +97 -0
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,12 +1,53 @@
1
  ---
2
  title: Precision Recall Fscore Accuracy
3
- emoji: 🚀
4
- colorFrom: red
 
 
5
  colorTo: green
 
 
6
  sdk: gradio
7
  sdk_version: 5.23.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Precision Recall Fscore Accuracy
3
+ tags:
4
+ - evaluate
5
+ - metric
6
+ colorFrom: gray
7
  colorTo: green
8
+ description: >-
9
+ This metric calculates precision, recall, accuracy, and fscore for classification tasks using scikit-learn.
10
  sdk: gradio
11
  sdk_version: 5.23.1
12
  app_file: app.py
13
  pinned: false
14
+ datasets: []
15
  ---
16
 
17
+ # Metric Card for Precision Recall Accuracy Fscore
18
+ This metric calculates precision, recall, accuracy, and fscore for classification tasks using scikit-learn.
19
+
20
+ ## How to Use
21
+
22
+ >>> predictions = [0, 1, 0, 1]
23
+ >>> references = [1, 1, 0, 0]
24
+ >>> metric = evaluate.load("precision_recall_fscore_accuracy", average="binary")
25
+ >>> metric.compute(predictions=predictions, references=references)
26
+ {'precision': 0.5, 'recall': 0.5, 'fscore': 0.5, 'accuracy': 0.5}
27
+
28
+ ## Inputs
29
+ - **predictions** (List of int|str): List of predicted labels.
30
+ - **references** (List of int|str): List of true labels.
31
+
32
+ ## Outputs
33
+ Dictionary containing the following metrics:
34
+ - **precision** (float): Precision score.
35
+ - **recall** (float): Recall score.
36
+ - **fscore** (float): F1 score.
37
+ - **accuracy** (float): Accuracy score.
38
+
39
+ ## Citation
40
+ ```bibtex
41
+ @article{scikit-learn,
42
+ title={Scikit-learn: Machine Learning in {P}ython},
43
+ author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
44
+ and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
45
+ and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
46
+ Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
47
+ journal={Journal of Machine Learning Research},
48
+ volume={12},
49
+ pages={2825--2830},
50
+ year={2011}
51
+ }
52
+
53
+ ```
app.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import evaluate
2
+ from evaluate.utils import launch_gradio_widget
3
+
4
+
5
+ module = evaluate.load("mdocekal/precision_recall_accuracy_fscore")
6
+ launch_gradio_widget(module)
precision_recall_fscore_accuracy.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import Counter
2
+ from typing import Optional, Union
3
+
4
+ import evaluate
5
+ import datasets
6
+
7
+ from sklearn.metrics import precision_recall_fscore_support, accuracy_score
8
+
9
+
10
+ _CITATION = """
11
+ @article{scikit-learn,
12
+ title={Scikit-learn: Machine Learning in {P}ython},
13
+ author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
14
+ and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
15
+ and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
16
+ Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
17
+ journal={Journal of Machine Learning Research},
18
+ volume={12},
19
+ pages={2825--2830},
20
+ year={2011}
21
+ }
22
+
23
+ """
24
+
25
+ _DESCRIPTION = """\
26
+ This metric calculates precision, recall, accuracy, and fscore for classification tasks using scikit-learn.
27
+ """
28
+
29
+ _KWARGS_DESCRIPTION = """
30
+ Args:
31
+ predictions: list or numpy array of predicted labels.
32
+ references: list or numpy array of true labels.
33
+ average (str, optional): Type of averaging performed on the data.
34
+ This parameter is required for multiclass/multilabel targets.
35
+ If ``None``, the metrics for each class are returned. Otherwise, this
36
+ determines the type of averaging performed on the data:
37
+
38
+ ``'binary'``:
39
+ Only report results for the class specified by ``pos_label``.
40
+ This is applicable only if targets (``y_{true,pred}``) are binary.
41
+ ``'micro'``:
42
+ Calculate metrics globally by counting the total true positives,
43
+ false negatives and false positives.
44
+ ``'macro'``:
45
+ Calculate metrics for each label, and find their unweighted
46
+ mean. This does not take label imbalance into account.
47
+ ``'weighted'``:
48
+ Calculate metrics for each label, and find their average weighted
49
+ by support (the number of true instances for each label). This
50
+ alters 'macro' to account for label imbalance; it can result in an
51
+ F-score that is not between precision and recall.
52
+ ``'samples'``:
53
+ Calculate metrics for each instance, and find their average (only
54
+ meaningful for multilabel classification where this differs from
55
+ :func:`accuracy_score`).
56
+ zero_division (int or str, optional): default="warn"
57
+ Sets the value to return when there is a zero division:
58
+
59
+ - recall: when there are no positive labels
60
+ - precision: when there are no positive predictions
61
+ - f-score: both
62
+
63
+ Notes:
64
+
65
+ - If set to "warn", this acts like 0, but a warning is also raised.
66
+ - If set to `np.nan`, such values will be excluded from the average.
67
+
68
+ .. versionadded:: 1.3
69
+ `np.nan` option was added.
70
+
71
+ Returns:
72
+ A dictionary with the following keys:
73
+ - precision: Precision score.
74
+ - recall: Recall score.
75
+ - f1: F1-score.
76
+ - accuracy: Accuracy score.
77
+
78
+ Examples:
79
+ >>> predictions = [0, 1, 0, 1]
80
+ >>> references = [1, 1, 0, 0]
81
+ >>> metric = evaluate.load("precision_recall_fscore_accuracy", average="binary")
82
+ >>> metric.compute(predictions=predictions, references=references)
83
+ {'precision': 0.5, 'recall': 0.5, 'fscore': 0.5, 'accuracy': 0.5}
84
+
85
+ """
86
+
87
+
88
+ @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
89
+ class PrecisionRecallFscoreAccuracy(evaluate.Metric):
90
+ """
91
+ Implementation of example based evaluation metrics for multi-label classification presented in Zhang and Zhou (2014).
92
+ """
93
+
94
+ def __init__(self, *args, **kwargs):
95
+ super().__init__(*args, **kwargs)
96
+ self.beta = kwargs.get("beta", 1.0)
97
+ self.average = kwargs.get("average", None)
98
+ self.zero_division = kwargs.get("zero_division", "warn")
99
+
100
+ def _info(self):
101
+ return evaluate.MetricInfo(
102
+ # This is the description that will appear on the modules page.
103
+ module_type="metric",
104
+ description=_DESCRIPTION,
105
+ citation=_CITATION,
106
+ inputs_description=_KWARGS_DESCRIPTION,
107
+ # This defines the format of each prediction and reference
108
+ features=[
109
+ datasets.Features({
110
+ 'predictions': datasets.Value('int64'),
111
+ 'references': datasets.Value('int64'),
112
+ }),
113
+ datasets.Features({
114
+ 'predictions': datasets.Value('string'),
115
+ 'references': datasets.Value('string'),
116
+ }),
117
+ ]
118
+ )
119
+
120
+ def _compute(self, predictions: list[Union[int,str]], references: list[Union[int,str]]):
121
+ precision, recall, f1, _ = precision_recall_fscore_support(
122
+ references, predictions, average=self.average, zero_division=self.zero_division
123
+ )
124
+ accuracy = accuracy_score(references, predictions)
125
+ return {
126
+ "precision": precision,
127
+ "recall": recall,
128
+ "fscore": f1,
129
+ "accuracy": accuracy,
130
+ }
131
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ evaluate
2
+ datasets
3
+ scikit-learn
tests.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unittest import TestCase
2
+
3
+ from precision_recall_fscore_accuracy import PrecisionRecallFscoreAccuracy
4
+
5
+
6
+ class PrecisionRecallFscoreAccuracyTestBinary(TestCase):
7
+ """
8
+ All of these tests are also used for multiset configuration. So please mind this and write the test in a way that
9
+ it is valid for both configurations (do not use same label multiple times).
10
+ """
11
+
12
+ def setUp(self):
13
+ self.metric = PrecisionRecallFscoreAccuracy(average="binary")
14
+
15
+ def test_eok(self):
16
+ self.assertDictEqual(
17
+ {
18
+ "precision": 1.0,
19
+ "recall": 1.0,
20
+ "fscore": 1.0,
21
+ "accuracy": 1.0,
22
+ },
23
+ self.metric.compute(
24
+ predictions=[0, 1, 0],
25
+ references=[0, 1, 0]
26
+ )
27
+ )
28
+
29
+ def test_eok_string(self):
30
+ self.assertDictEqual(
31
+ {
32
+ "precision": 1.0,
33
+ "recall": 1.0,
34
+ "accuracy": 1.0,
35
+ "fscore": 1.0
36
+ },
37
+ self.metric.compute(
38
+ predictions=["0", "1", "0"],
39
+ references=["0", "1", "0"]
40
+ )
41
+ )
42
+
43
+ def test_completely_different(self):
44
+ self.assertDictEqual(
45
+ {
46
+ "precision": 0.0,
47
+ "recall": 0.0,
48
+ "accuracy": 0.0,
49
+ "fscore": 0.0
50
+ },
51
+ self.metric.compute(
52
+ predictions=[0, 1, 0],
53
+ references=[1, 0, 1]
54
+ )
55
+ )
56
+
57
+ def test_max_precision(self):
58
+ self.assertDictEqual(
59
+ {
60
+ "precision": 1.0,
61
+ "recall": 0.5,
62
+ "accuracy": 0.5,
63
+ "fscore": 2 / 3
64
+ },
65
+ self.metric.compute(
66
+ predictions=[0, 1],
67
+ references=[1, 1]
68
+ )
69
+ )
70
+
71
+ def test_max_recall(self):
72
+ self.assertDictEqual(
73
+ {
74
+ "precision": 0.5,
75
+ "recall": 1.0,
76
+ "accuracy": 0.5,
77
+ "fscore": 2 / 3
78
+ },
79
+ self.metric.compute(
80
+ predictions=[1, 1],
81
+ references=[1, 0]
82
+ )
83
+ )
84
+
85
+ def test_partial_match(self):
86
+ self.assertDictEqual(
87
+ {
88
+ "precision": 0.5,
89
+ "recall": 0.5,
90
+ "accuracy": 0.5,
91
+ "fscore": 0.5
92
+ },
93
+ self.metric.compute(
94
+ predictions=[0, 1, 0, 1],
95
+ references=[1, 1, 0, 0]
96
+ )
97
+ )