Yasu777 commited on
Commit
0b81deb
·
verified ·
1 Parent(s): af34ef8

Update keywords_processor.py

Browse files
Files changed (1) hide show
  1. keywords_processor.py +21 -41
keywords_processor.py CHANGED
@@ -1,60 +1,40 @@
1
  import gradio as gr
2
  import re
3
  from sklearn.feature_extraction.text import CountVectorizer
4
- import os
5
 
6
- def process_keywords(text):
7
- # カンマと改行を空白に変換
8
- text = re.sub(r"[,\n]+", " ", text)
9
- # 英数字と空白以外削除
10
- text = re.sub(r"[^\w\s]", "", text)
11
- # 連続する空白を一つにする
12
- text = re.sub(r"\s+", " ", text)
13
- try:
14
- vectorizer = CountVectorizer(ngram_range=(1, 3), token_pattern=r"(?u)\b\w+\b")
15
- X = vectorizer.fit_transform([text])
16
- features = vectorizer.get_feature_names_out()
17
- if features.size > 0:
18
- print("Generated N-grams:", features)
19
- else:
20
- print("No N-grams generated from the input.")
21
- return features
22
- except Exception as e:
23
- print(f"Error processing keywords: {str(e)}")
24
- return []
25
 
26
  def save_keywords(keywords, filename="output1.txt"):
27
  with open(filename, 'w', encoding='utf-8') as file:
28
- if keywords:
29
- for keyword in keywords:
30
- print(f"Saving keyword: {keyword}") # 保存しようとしているキーワードをログに出力
31
- file.write(keyword + "\n")
32
- else:
33
- print("No keywords to save.") # 保存するキーワードがない場合のログ
34
  return f"Keywords saved to {filename}"
35
 
36
- def process_and_save_keywords(text):
37
- keywords = process_keywords(text)
38
- # キーワードが生成されたかどうかをチェック
39
- if keywords:
40
- print("Generated keywords:", ", ".join(keywords))
41
- else:
42
- print("No keywords generated from the input.")
43
-
44
- save_result = save_keywords(keywords)
45
- print(save_result)
46
-
47
- return ", ".join(keywords) if keywords else "No keywords", save_result
48
 
49
  with gr.Blocks() as demo:
50
  gr.Markdown("### N-gram Generator and Saver")
51
- text_input = gr.Textbox(label="Enter text")
 
52
  output_keywords = gr.Textbox(label="Generated N-grams")
53
  output_message = gr.Textbox(label="Output Message")
54
  submit_button = gr.Button("Generate and Save N-grams")
55
 
56
  submit_button.click(
57
- fn=process_and_save_keywords,
58
- inputs=text_input,
59
  outputs=[output_keywords, output_message]
60
  )
 
 
 
1
  import gradio as gr
2
  import re
3
  from sklearn.feature_extraction.text import CountVectorizer
 
4
 
5
+ def process_keywords(texts):
6
+ all_text = " ".join(texts)
7
+ all_text = re.sub(r"[^\w\s]", "", all_text) # 英数字と空白以外を削除
8
+ all_text = re.sub(r"\s+", " ", all_text) # 連続する空白を一つにする
9
+ vectorizer = CountVectorizer(ngram_range=(1, 3), token_pattern=r"(?u)\b\w+\b")
10
+ X = vectorizer.fit_transform([all_text])
11
+ features = vectorizer.get_feature_names_out()
12
+ return features
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def save_keywords(keywords, filename="output1.txt"):
15
  with open(filename, 'w', encoding='utf-8') as file:
16
+ for keyword in keywords:
17
+ file.write(keyword + "\n")
 
 
 
 
18
  return f"Keywords saved to {filename}"
19
 
20
+ def ngram_generator(main_text, other_texts):
21
+ texts = [main_text] + other_texts.split(",")
22
+ keywords = process_keywords(texts)
23
+ output_text = save_keywords(keywords)
24
+ return ", ".join(keywords) if keywords else "No keywords", output_text
 
 
 
 
 
 
 
25
 
26
  with gr.Blocks() as demo:
27
  gr.Markdown("### N-gram Generator and Saver")
28
+ main_text_input = gr.Textbox(label="メインキーワード")
29
+ other_texts_input = gr.Textbox(label="その他のキーワード(カンマ区切り)")
30
  output_keywords = gr.Textbox(label="Generated N-grams")
31
  output_message = gr.Textbox(label="Output Message")
32
  submit_button = gr.Button("Generate and Save N-grams")
33
 
34
  submit_button.click(
35
+ fn=ngram_generator,
36
+ inputs=[main_text_input, other_texts_input],
37
  outputs=[output_keywords, output_message]
38
  )
39
+
40
+ if __name__ == "__main__":