submit app
Browse files- distinct.py +3 -2
distinct.py
CHANGED
|
@@ -156,16 +156,17 @@ class distinct(evaluate.Measurement):
|
|
| 156 |
tokens = list(tokenizer.tokenize(prediction))
|
| 157 |
tokens_2grams = ngrams(list(tokenizer.tokenize(prediction)), 2, left_pad_symbol='<s>')
|
| 158 |
tokens_3grams = ngrams(list(tokenizer.tokenize(prediction)), 3, left_pad_symbol='<s>')
|
|
|
|
| 159 |
except Exception as e:
|
| 160 |
raise e
|
| 161 |
-
|
| 162 |
distinct_tokens = distinct_tokens | set(tokens)
|
| 163 |
distinct_tokens_2grams = distinct_tokens_2grams | set(tokens_2grams)
|
| 164 |
distinct_tokens_3grams = distinct_tokens_3grams | set(tokens_3grams)
|
| 165 |
total_tokens.extend(tokens)
|
| 166 |
total_tokens_2grams.extend(list(tokens_2grams))
|
| 167 |
total_tokens_3grams.extend(list(tokens_3grams))
|
| 168 |
-
|
| 169 |
Distinct_1 = len(distinct_tokens)/len(total_tokens)
|
| 170 |
Distinct_2 = len(distinct_tokens_2grams)/len(total_tokens_2grams)
|
| 171 |
Distinct_3 = len(distinct_tokens_3grams)/len(total_tokens_3grams)
|
|
|
|
| 156 |
tokens = list(tokenizer.tokenize(prediction))
|
| 157 |
tokens_2grams = ngrams(list(tokenizer.tokenize(prediction)), 2, left_pad_symbol='<s>')
|
| 158 |
tokens_3grams = ngrams(list(tokenizer.tokenize(prediction)), 3, left_pad_symbol='<s>')
|
| 159 |
+
|
| 160 |
except Exception as e:
|
| 161 |
raise e
|
| 162 |
+
print(tokens_2grams)
|
| 163 |
distinct_tokens = distinct_tokens | set(tokens)
|
| 164 |
distinct_tokens_2grams = distinct_tokens_2grams | set(tokens_2grams)
|
| 165 |
distinct_tokens_3grams = distinct_tokens_3grams | set(tokens_3grams)
|
| 166 |
total_tokens.extend(tokens)
|
| 167 |
total_tokens_2grams.extend(list(tokens_2grams))
|
| 168 |
total_tokens_3grams.extend(list(tokens_3grams))
|
| 169 |
+
print(distinct_tokens_2grams, total_tokens_2grams)
|
| 170 |
Distinct_1 = len(distinct_tokens)/len(total_tokens)
|
| 171 |
Distinct_2 = len(distinct_tokens_2grams)/len(total_tokens_2grams)
|
| 172 |
Distinct_3 = len(distinct_tokens_3grams)/len(total_tokens_3grams)
|