Spaces:
Sleeping
Sleeping
Update keywords_processor.py
Browse files- keywords_processor.py +7 -12
keywords_processor.py
CHANGED
|
@@ -8,28 +8,23 @@ def process_keywords(text):
|
|
| 8 |
text = re.sub(r"[^\w\s]", "", text)
|
| 9 |
# 連続する空白を一つにする
|
| 10 |
text = re.sub(r"\s+", " ", text).strip()
|
| 11 |
-
vectorizer = CountVectorizer(ngram_range=(1, 2)
|
| 12 |
X = vectorizer.fit_transform([text])
|
| 13 |
keywords = vectorizer.get_feature_names_out().tolist()
|
| 14 |
return keywords
|
| 15 |
|
| 16 |
-
def save_keywords(keywords, filename="output1.txt"):
|
| 17 |
-
output1_path = os.path.join(os.path.dirname(__file__), filename)
|
| 18 |
-
with open(output1_path, 'w', encoding='utf-8') as file:
|
| 19 |
-
for keyword in keywords:
|
| 20 |
-
file.write(keyword + "\n")
|
| 21 |
-
return f"Keywords saved to {output1_path}"
|
| 22 |
-
|
| 23 |
def ngram_generator(main_text, other_texts):
|
| 24 |
-
# main_text と other_texts を結合し、カンマと改行で分割
|
| 25 |
texts = [main_text] + re.split(r',|\n', other_texts)
|
| 26 |
all_keywords = []
|
| 27 |
for text in texts:
|
| 28 |
keywords = process_keywords(text)
|
| 29 |
-
all_keywords.extend(keywords)
|
| 30 |
-
# Remove duplicates and save keywords
|
| 31 |
unique_keywords = list(set(all_keywords))
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
return ", ".join(sorted(unique_keywords)), output_text
|
| 34 |
|
| 35 |
if __name__ == "__main__":
|
|
|
|
| 8 |
text = re.sub(r"[^\w\s]", "", text)
|
| 9 |
# 連続する空白を一つにする
|
| 10 |
text = re.sub(r"\s+", " ", text).strip()
|
| 11 |
+
vectorizer = CountVectorizer(ngram_range=(1, 2))
|
| 12 |
X = vectorizer.fit_transform([text])
|
| 13 |
keywords = vectorizer.get_feature_names_out().tolist()
|
| 14 |
return keywords
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
def ngram_generator(main_text, other_texts):
|
|
|
|
| 17 |
texts = [main_text] + re.split(r',|\n', other_texts)
|
| 18 |
all_keywords = []
|
| 19 |
for text in texts:
|
| 20 |
keywords = process_keywords(text)
|
| 21 |
+
all_keywords.extend(keywords)
|
|
|
|
| 22 |
unique_keywords = list(set(all_keywords))
|
| 23 |
+
output1_path = os.path.join(os.path.dirname(__file__), "output1.txt")
|
| 24 |
+
with open(output1_path, 'w', encoding='utf-8') as file:
|
| 25 |
+
for keyword in unique_keywords:
|
| 26 |
+
file.write(keyword + "\n")
|
| 27 |
+
output_text = f"Keywords saved to {output1_path}"
|
| 28 |
return ", ".join(sorted(unique_keywords)), output_text
|
| 29 |
|
| 30 |
if __name__ == "__main__":
|