Yasu777 commited on
Commit
af34ef8
·
verified ·
1 Parent(s): 4f3b515

Update keywords_processor.py

Browse files
Files changed (1) hide show
  1. keywords_processor.py +18 -9
keywords_processor.py CHANGED
@@ -1,18 +1,17 @@
1
  import gradio as gr
2
- from janome.tokenizer import Tokenizer
3
  from sklearn.feature_extraction.text import CountVectorizer
4
  import os
5
 
6
- def custom_tokenizer(text):
7
- t = Tokenizer()
8
- tokens = [token.surface for token in t.tokenize(text)]
9
- return tokens
10
-
11
  def process_keywords(text):
12
  # カンマと改行を空白に変換
13
- text = " ".join(text.split())
 
 
 
 
14
  try:
15
- vectorizer = CountVectorizer(ngram_range=(1, 3), tokenizer=custom_tokenizer)
16
  X = vectorizer.fit_transform([text])
17
  features = vectorizer.get_feature_names_out()
18
  if features.size > 0:
@@ -28,13 +27,23 @@ def save_keywords(keywords, filename="output1.txt"):
28
  with open(filename, 'w', encoding='utf-8') as file:
29
  if keywords:
30
  for keyword in keywords:
 
31
  file.write(keyword + "\n")
32
- return "Keywords saved to {}".format(filename)
 
 
33
 
34
  def process_and_save_keywords(text):
35
  keywords = process_keywords(text)
 
 
 
 
 
 
36
  save_result = save_keywords(keywords)
37
  print(save_result)
 
38
  return ", ".join(keywords) if keywords else "No keywords", save_result
39
 
40
  with gr.Blocks() as demo:
 
1
  import gradio as gr
2
+ import re
3
  from sklearn.feature_extraction.text import CountVectorizer
4
  import os
5
 
 
 
 
 
 
6
  def process_keywords(text):
7
  # カンマと改行を空白に変換
8
+ text = re.sub(r"[,\n]+", " ", text)
9
+ # 英数字と空白以外を削除
10
+ text = re.sub(r"[^\w\s]", "", text)
11
+ # 連続する空白を一つにする
12
+ text = re.sub(r"\s+", " ", text)
13
  try:
14
+ vectorizer = CountVectorizer(ngram_range=(1, 3), token_pattern=r"(?u)\b\w+\b")
15
  X = vectorizer.fit_transform([text])
16
  features = vectorizer.get_feature_names_out()
17
  if features.size > 0:
 
27
  with open(filename, 'w', encoding='utf-8') as file:
28
  if keywords:
29
  for keyword in keywords:
30
+ print(f"Saving keyword: {keyword}") # 保存しようとしているキーワードをログに出力
31
  file.write(keyword + "\n")
32
+ else:
33
+ print("No keywords to save.") # 保存するキーワードがない場合のログ
34
+ return f"Keywords saved to {filename}"
35
 
36
  def process_and_save_keywords(text):
37
  keywords = process_keywords(text)
38
+ # キーワードが生成されたかどうかをチェック
39
+ if keywords:
40
+ print("Generated keywords:", ", ".join(keywords))
41
+ else:
42
+ print("No keywords generated from the input.")
43
+
44
  save_result = save_keywords(keywords)
45
  print(save_result)
46
+
47
  return ", ".join(keywords) if keywords else "No keywords", save_result
48
 
49
  with gr.Blocks() as demo: