Yasu777 commited on
Commit
540dde0
·
verified ·
1 Parent(s): b184017

Create keywords_processor.py

Browse files
Files changed (1) hide show
  1. keywords_processor.py +10 -0
keywords_processor.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from sklearn.feature_extraction.text import CountVectorizer
3
+
4
+ def process_keywords(text):
5
+ """テキストからN-gramを生成してリストとして返す"""
6
+ text = re.sub(r"[,\\n]+", " ", text)
7
+ vectorizer = CountVectorizer(ngram_range=(1, 3))
8
+ X = vectorizer.fit_transform([text])
9
+ features = vectorizer.get_feature_names_out()
10
+ return features