Update README.md
Browse files
README.md
CHANGED
|
@@ -34,7 +34,10 @@ def get_keywords(
|
|
| 34 |
# Additional keywords (might also indicate the end of a keyword sequence).
|
| 35 |
# You can merge these with the begining keyword `B-KWD`.
|
| 36 |
2: "I-KWD",
|
| 37 |
-
}
|
|
|
|
|
|
|
|
|
|
| 38 |
):
|
| 39 |
# Initialize the tokenizer and model.
|
| 40 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
@@ -77,11 +80,11 @@ def get_keywords(
|
|
| 77 |
"score": float(probabilities[i, idx])
|
| 78 |
}
|
| 79 |
for i, idx in enumerate(prediction)
|
| 80 |
-
if idx == 1 or idx == 2
|
| 81 |
]
|
| 82 |
```
|
| 83 |
|
| 84 |
-
Choose a text and use the model on it. For example, I've chosen to use [this](https://
|
| 85 |
Then, you can call `get_keywords` on it and extract its keywords:
|
| 86 |
```python
|
| 87 |
# Reading the text from a file, since it is an article, and the text is large.
|
|
@@ -94,7 +97,11 @@ pprint(keywords)
|
|
| 94 |
```
|
| 95 |
```sh
|
| 96 |
Keywords:
|
| 97 |
-
[{'entity': '
|
| 98 |
-
{'entity': '
|
| 99 |
-
{'entity': '
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
```
|
|
|
|
| 34 |
# Additional keywords (might also indicate the end of a keyword sequence).
|
| 35 |
# You can merge these with the begining keyword `B-KWD`.
|
| 36 |
2: "I-KWD",
|
| 37 |
+
},
|
| 38 |
+
# Probability threshold based on which the keywords will be accepted.
|
| 39 |
+
# If their probabiliy is less than `threshold`, they won't be added to the list of keywords.
|
| 40 |
+
threshold=0.50
|
| 41 |
):
|
| 42 |
# Initialize the tokenizer and model.
|
| 43 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
| 80 |
"score": float(probabilities[i, idx])
|
| 81 |
}
|
| 82 |
for i, idx in enumerate(prediction)
|
| 83 |
+
if (idx == 1 or idx == 2) and float(probabilities[i, idx]) > threshold
|
| 84 |
]
|
| 85 |
```
|
| 86 |
|
| 87 |
+
Choose a text and use the model on it. For example, I've chosen to use [this](https://novini.bg/biznes/biznes_tehnologii/781108) article.
|
| 88 |
Then, you can call `get_keywords` on it and extract its keywords:
|
| 89 |
```python
|
| 90 |
# Reading the text from a file, since it is an article, and the text is large.
|
|
|
|
| 97 |
```
|
| 98 |
```sh
|
| 99 |
Keywords:
|
| 100 |
+
[{'entity': 'Туитър', 'entity_group': 'B-KWD', 'score': 0.9278278946876526},
|
| 101 |
+
{'entity': 'Илон', 'entity_group': 'B-KWD', 'score': 0.5862686634063721},
|
| 102 |
+
{'entity': 'Мъск', 'entity_group': 'B-KWD', 'score': 0.5289096832275391},
|
| 103 |
+
{'entity': 'изпълнителен',
|
| 104 |
+
'entity_group': 'B-KWD',
|
| 105 |
+
'score': 0.679943323135376},
|
| 106 |
+
{'entity': 'директор', 'entity_group': 'I-KWD', 'score': 0.6161141991615295}]
|
| 107 |
```
|