Yasu777 commited on
Commit
e2a5e80
·
verified ·
1 Parent(s): 394f0f6

Update keywords_processor.py

Browse files
Files changed (1) hide show
  1. keywords_processor.py +7 -12
keywords_processor.py CHANGED
@@ -8,28 +8,23 @@ def process_keywords(text):
8
  text = re.sub(r"[^\w\s]", "", text)
9
  # 連続する空白を一つにする
10
  text = re.sub(r"\s+", " ", text).strip()
11
- vectorizer = CountVectorizer(ngram_range=(1, 2), token_pattern=r"(?u)\b\w+\b")
12
  X = vectorizer.fit_transform([text])
13
  keywords = vectorizer.get_feature_names_out().tolist()
14
  return keywords
15
 
16
- def save_keywords(keywords, filename="output1.txt"):
17
- output1_path = os.path.join(os.path.dirname(__file__), filename)
18
- with open(output1_path, 'w', encoding='utf-8') as file:
19
- for keyword in keywords:
20
- file.write(keyword + "\n")
21
- return f"Keywords saved to {output1_path}"
22
-
23
  def ngram_generator(main_text, other_texts):
24
- # main_text と other_texts を結合し、カンマと改行で分割
25
  texts = [main_text] + re.split(r',|\n', other_texts)
26
  all_keywords = []
27
  for text in texts:
28
  keywords = process_keywords(text)
29
- all_keywords.extend(keywords) # Extend the list of keywords for each text
30
- # Remove duplicates and save keywords
31
  unique_keywords = list(set(all_keywords))
32
- output_text = save_keywords(unique_keywords)
 
 
 
 
33
  return ", ".join(sorted(unique_keywords)), output_text
34
 
35
  if __name__ == "__main__":
 
8
  text = re.sub(r"[^\w\s]", "", text)
9
  # 連続する空白を一つにする
10
  text = re.sub(r"\s+", " ", text).strip()
11
+ vectorizer = CountVectorizer(ngram_range=(1, 2))
12
  X = vectorizer.fit_transform([text])
13
  keywords = vectorizer.get_feature_names_out().tolist()
14
  return keywords
15
 
 
 
 
 
 
 
 
16
  def ngram_generator(main_text, other_texts):
 
17
  texts = [main_text] + re.split(r',|\n', other_texts)
18
  all_keywords = []
19
  for text in texts:
20
  keywords = process_keywords(text)
21
+ all_keywords.extend(keywords)
 
22
  unique_keywords = list(set(all_keywords))
23
+ output1_path = os.path.join(os.path.dirname(__file__), "output1.txt")
24
+ with open(output1_path, 'w', encoding='utf-8') as file:
25
+ for keyword in unique_keywords:
26
+ file.write(keyword + "\n")
27
+ output_text = f"Keywords saved to {output1_path}"
28
  return ", ".join(sorted(unique_keywords)), output_text
29
 
30
  if __name__ == "__main__":