Spaces:
Sleeping
Sleeping
Create keywords_processor.py
Browse files- keywords_processor.py +10 -0
keywords_processor.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
| 3 |
+
|
| 4 |
+
def process_keywords(text):
|
| 5 |
+
"""テキストからN-gramを生成してリストとして返す"""
|
| 6 |
+
text = re.sub(r"[,\\n]+", " ", text)
|
| 7 |
+
vectorizer = CountVectorizer(ngram_range=(1, 3))
|
| 8 |
+
X = vectorizer.fit_transform([text])
|
| 9 |
+
features = vectorizer.get_feature_names_out()
|
| 10 |
+
return features
|