Spaces:
Sleeping
Sleeping
Update keywords_processor.py
Browse files- keywords_processor.py +7 -21
keywords_processor.py
CHANGED
|
@@ -1,31 +1,17 @@
|
|
| 1 |
import os
|
| 2 |
-
import gradio as gr
|
| 3 |
-
import re
|
| 4 |
-
from sklearn.feature_extraction.text import CountVectorizer
|
| 5 |
|
| 6 |
-
def
|
| 7 |
-
#
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
X = vectorizer.fit_transform([text])
|
| 11 |
-
keywords = vectorizer.get_feature_names_out().tolist()
|
| 12 |
-
return keywords
|
| 13 |
-
|
| 14 |
-
def ngram_generator(main_text, other_texts):
|
| 15 |
-
# main_textとother_textsを単一のテキストとして連結
|
| 16 |
-
full_text = main_text + " " + other_texts
|
| 17 |
-
keywords = process_keywords(full_text)
|
| 18 |
output1_path = os.path.join(os.path.dirname(__file__), "output1.txt")
|
| 19 |
with open(output1_path, 'w', encoding='utf-8') as file:
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
output_text = f"Keywords saved to {output1_path}"
|
| 23 |
-
return ", ".join(sorted(keywords)), output_text
|
| 24 |
|
| 25 |
if __name__ == "__main__":
|
| 26 |
import sys
|
| 27 |
main_text = sys.argv[1] if len(sys.argv) > 1 else ""
|
| 28 |
other_texts = sys.argv[2] if len(sys.argv) > 2 else ""
|
| 29 |
-
|
| 30 |
-
print(keywords)
|
| 31 |
print(output_text)
|
|
|
|
| 1 |
import os
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
def save_raw_text(main_text, other_texts):
|
| 4 |
+
# メインテキストとその他のテキストを改行で結合
|
| 5 |
+
combined_text = main_text + "\\n" + other_texts
|
| 6 |
+
# 結合したテキストを output1.txt に保存
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
output1_path = os.path.join(os.path.dirname(__file__), "output1.txt")
|
| 8 |
with open(output1_path, 'w', encoding='utf-8') as file:
|
| 9 |
+
file.write(combined_text)
|
| 10 |
+
return f"Text saved to {output1_path}"
|
|
|
|
|
|
|
| 11 |
|
| 12 |
if __name__ == "__main__":
|
| 13 |
import sys
|
| 14 |
main_text = sys.argv[1] if len(sys.argv) > 1 else ""
|
| 15 |
other_texts = sys.argv[2] if len(sys.argv) > 2 else ""
|
| 16 |
+
output_text = save_raw_text(main_text, other_texts)
|
|
|
|
| 17 |
print(output_text)
|