Yasu777 commited on
Commit
110ce7a
·
verified ·
1 Parent(s): d9ac378

Update keywords_processor.py

Browse files
Files changed (1) hide show
  1. keywords_processor.py +7 -21
keywords_processor.py CHANGED
@@ -1,31 +1,17 @@
1
  import os
2
- import gradio as gr
3
- import re
4
- from sklearn.feature_extraction.text import CountVectorizer
5
 
6
- def process_keywords(text):
7
- # と改行を空白に置換
8
- text = re.sub(r"[,\n]+", " ", text)
9
- vectorizer = CountVectorizer(ngram_range=(1, 3))
10
- X = vectorizer.fit_transform([text])
11
- keywords = vectorizer.get_feature_names_out().tolist()
12
- return keywords
13
-
14
- def ngram_generator(main_text, other_texts):
15
- # main_textとother_textsを単一のテキストとして連結
16
- full_text = main_text + " " + other_texts
17
- keywords = process_keywords(full_text)
18
  output1_path = os.path.join(os.path.dirname(__file__), "output1.txt")
19
  with open(output1_path, 'w', encoding='utf-8') as file:
20
- for keyword in sorted(keywords):
21
- file.write(keyword + "\n")
22
- output_text = f"Keywords saved to {output1_path}"
23
- return ", ".join(sorted(keywords)), output_text
24
 
25
  if __name__ == "__main__":
26
  import sys
27
  main_text = sys.argv[1] if len(sys.argv) > 1 else ""
28
  other_texts = sys.argv[2] if len(sys.argv) > 2 else ""
29
- keywords, output_text = ngram_generator(main_text, other_texts)
30
- print(keywords)
31
  print(output_text)
 
1
  import os
 
 
 
2
 
3
+ def save_raw_text(main_text, other_texts):
4
+ # メイテキストその他のテキストを改行で結合
5
+ combined_text = main_text + "\\n" + other_texts
6
+ # 結合したテキストを output1.txt に保存
 
 
 
 
 
 
 
 
7
  output1_path = os.path.join(os.path.dirname(__file__), "output1.txt")
8
  with open(output1_path, 'w', encoding='utf-8') as file:
9
+ file.write(combined_text)
10
+ return f"Text saved to {output1_path}"
 
 
11
 
12
  if __name__ == "__main__":
13
  import sys
14
  main_text = sys.argv[1] if len(sys.argv) > 1 else ""
15
  other_texts = sys.argv[2] if len(sys.argv) > 2 else ""
16
+ output_text = save_raw_text(main_text, other_texts)
 
17
  print(output_text)