Spaces:
No application file
No application file
Upload 3 files
Browse files作ったのはワイやないで、Claude 3.5 Sonnetはんやで
- log_short.py +72 -0
- log_short_hm.py +90 -0
- log_short_m.py +77 -0
log_short.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import re
|
| 5 |
+
from datetime import datetime, timezone, timedelta
|
| 6 |
+
|
| 7 |
+
def is_valid_filename(filename):
|
| 8 |
+
# 拡張子を除いたファイル名が数字のみで構成されているか確認
|
| 9 |
+
base_name = os.path.splitext(filename)[0]
|
| 10 |
+
return bool(re.match(r'^\d+$', base_name))
|
| 11 |
+
|
| 12 |
+
def convert_timestamp_to_jst(timestamp):
|
| 13 |
+
jst = timezone(timedelta(hours=9))
|
| 14 |
+
dt = datetime.fromtimestamp(timestamp / 1000, jst)
|
| 15 |
+
return dt.strftime('%Y-%m-%d %H:%M:%S JST')
|
| 16 |
+
|
| 17 |
+
def process_directory(directory):
|
| 18 |
+
try:
|
| 19 |
+
all_posts = []
|
| 20 |
+
# ディレクトリ内の全JSONファイルを処理
|
| 21 |
+
for filename in os.listdir(directory):
|
| 22 |
+
if not filename.endswith('.json') or filename.endswith('_s.json'):
|
| 23 |
+
continue
|
| 24 |
+
|
| 25 |
+
# ファイル名が数字のみで構成されているか確認
|
| 26 |
+
if not is_valid_filename(filename):
|
| 27 |
+
print(f"スキップ: {filename} はファイル名が数字のみではありません")
|
| 28 |
+
continue
|
| 29 |
+
|
| 30 |
+
input_file = os.path.join(directory, filename)
|
| 31 |
+
thread_no = int(os.path.splitext(filename)[0])
|
| 32 |
+
|
| 33 |
+
# JSONファイルを読み込む
|
| 34 |
+
with open(input_file, 'r', encoding='utf-8') as f:
|
| 35 |
+
data = json.load(f)
|
| 36 |
+
|
| 37 |
+
if "thread_array" in data:
|
| 38 |
+
for post in data["thread_array"]:
|
| 39 |
+
new_post = {
|
| 40 |
+
"thread_no": thread_no,
|
| 41 |
+
"num": post.get("num"),
|
| 42 |
+
"timestamp": post.get("timestamp"),
|
| 43 |
+
"datetime": convert_timestamp_to_jst(post.get("timestamp")) if post.get("timestamp") else None,
|
| 44 |
+
"body": post.get("body")
|
| 45 |
+
}
|
| 46 |
+
all_posts.append(new_post)
|
| 47 |
+
|
| 48 |
+
# 全データを単一のJSONファイルに保存
|
| 49 |
+
output_file = os.path.join(directory, 'log_short.json')
|
| 50 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 51 |
+
json.dump({"posts": all_posts}, f, ensure_ascii=False, indent=2)
|
| 52 |
+
|
| 53 |
+
print(f"変換完了: {output_file}")
|
| 54 |
+
return True
|
| 55 |
+
|
| 56 |
+
except json.JSONDecodeError as e:
|
| 57 |
+
print(f"JSONパースエラー: {str(e)}")
|
| 58 |
+
return False
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"エラー: {str(e)}")
|
| 61 |
+
return False
|
| 62 |
+
|
| 63 |
+
def main():
|
| 64 |
+
if len(sys.argv) != 2:
|
| 65 |
+
print("使用方法: python script.py <ディレクトリパス>")
|
| 66 |
+
sys.exit(1)
|
| 67 |
+
|
| 68 |
+
directory = sys.argv[1]
|
| 69 |
+
process_directory(directory)
|
| 70 |
+
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
main()
|
log_short_hm.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import re
|
| 5 |
+
from datetime import datetime, timezone, timedelta
|
| 6 |
+
from collections import defaultdict
|
| 7 |
+
|
| 8 |
+
def convert_timestamp_to_jst(timestamp):
|
| 9 |
+
jst = timezone(timedelta(hours=9))
|
| 10 |
+
dt = datetime.fromtimestamp(timestamp / 1000, jst)
|
| 11 |
+
return dt.strftime('%Y-%m-%d %H:%M:%S JST')
|
| 12 |
+
|
| 13 |
+
def get_year_month_day(timestamp):
|
| 14 |
+
jst = timezone(timedelta(hours=9))
|
| 15 |
+
dt = datetime.fromtimestamp(timestamp / 1000, jst)
|
| 16 |
+
return dt.strftime('%Y-%m'), dt.day
|
| 17 |
+
|
| 18 |
+
def process_directory(directory):
|
| 19 |
+
try:
|
| 20 |
+
# 年月と前半/後半で投稿を分類する辞書を初期化
|
| 21 |
+
monthly_posts = defaultdict(lambda: {"first_half": [], "second_half": []})
|
| 22 |
+
|
| 23 |
+
# ディレクトリ内の全JSONファイルを処理
|
| 24 |
+
for filename in os.listdir(directory):
|
| 25 |
+
if not filename.endswith('.json') or filename.endswith('_s.json') or filename.startswith('log_short'):
|
| 26 |
+
continue
|
| 27 |
+
|
| 28 |
+
# ファイル名が数字のみで構成されているか確認
|
| 29 |
+
base_name = os.path.splitext(filename)[0]
|
| 30 |
+
if not re.match(r'^\d+$', base_name):
|
| 31 |
+
print(f"スキップ: {filename} はファイル名が数字のみではありません")
|
| 32 |
+
continue
|
| 33 |
+
|
| 34 |
+
input_file = os.path.join(directory, filename)
|
| 35 |
+
thread_no = int(base_name)
|
| 36 |
+
|
| 37 |
+
# JSONファイルを読み込む
|
| 38 |
+
with open(input_file, 'r', encoding='utf-8') as f:
|
| 39 |
+
data = json.load(f)
|
| 40 |
+
|
| 41 |
+
if "thread_array" in data:
|
| 42 |
+
for post in data["thread_array"]:
|
| 43 |
+
timestamp = post.get("timestamp")
|
| 44 |
+
if timestamp:
|
| 45 |
+
year_month, day = get_year_month_day(timestamp)
|
| 46 |
+
new_post = {
|
| 47 |
+
"thread_no": thread_no,
|
| 48 |
+
"num": post.get("num"),
|
| 49 |
+
"timestamp": timestamp,
|
| 50 |
+
"datetime": convert_timestamp_to_jst(timestamp),
|
| 51 |
+
"body": post.get("body")
|
| 52 |
+
}
|
| 53 |
+
# 日付で前半・後半に分類
|
| 54 |
+
if day <= 15:
|
| 55 |
+
monthly_posts[year_month]["first_half"].append(new_post)
|
| 56 |
+
else:
|
| 57 |
+
monthly_posts[year_month]["second_half"].append(new_post)
|
| 58 |
+
|
| 59 |
+
# 年月ごとにJSONファイルを出力
|
| 60 |
+
for year_month, half_posts in monthly_posts.items():
|
| 61 |
+
# 前半(1-15日)の出力
|
| 62 |
+
if half_posts["first_half"]:
|
| 63 |
+
output_file = os.path.join(directory, f'log_short_hm_{year_month}_1.json')
|
| 64 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 65 |
+
json.dump({"posts": half_posts["first_half"]}, f, ensure_ascii=False, indent=2)
|
| 66 |
+
print(f"変換完了: {output_file}")
|
| 67 |
+
|
| 68 |
+
# 後半(16-31日)の出力
|
| 69 |
+
if half_posts["second_half"]:
|
| 70 |
+
output_file = os.path.join(directory, f'log_short_hm_{year_month}_16.json')
|
| 71 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 72 |
+
json.dump({"posts": half_posts["second_half"]}, f, ensure_ascii=False, indent=2)
|
| 73 |
+
print(f"変換完了: {output_file}")
|
| 74 |
+
|
| 75 |
+
return True
|
| 76 |
+
|
| 77 |
+
except json.JSONDecodeError as e:
|
| 78 |
+
print(f"JSONパースエラー: {str(e)}")
|
| 79 |
+
return False
|
| 80 |
+
except Exception as e:
|
| 81 |
+
print(f"エラー: {str(e)}")
|
| 82 |
+
return False
|
| 83 |
+
|
| 84 |
+
def main():
|
| 85 |
+
# 引数が指定されていない場合はカレントディレクトリを使用
|
| 86 |
+
directory = sys.argv[1] if len(sys.argv) > 1 else "."
|
| 87 |
+
process_directory(directory)
|
| 88 |
+
|
| 89 |
+
if __name__ == "__main__":
|
| 90 |
+
main()
|
log_short_m.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import re
|
| 5 |
+
from datetime import datetime, timezone, timedelta
|
| 6 |
+
from collections import defaultdict
|
| 7 |
+
|
| 8 |
+
def convert_timestamp_to_jst(timestamp):
|
| 9 |
+
jst = timezone(timedelta(hours=9))
|
| 10 |
+
dt = datetime.fromtimestamp(timestamp / 1000, jst)
|
| 11 |
+
return dt.strftime('%Y-%m-%d %H:%M:%S JST')
|
| 12 |
+
|
| 13 |
+
def get_year_month(timestamp):
|
| 14 |
+
jst = timezone(timedelta(hours=9))
|
| 15 |
+
dt = datetime.fromtimestamp(timestamp / 1000, jst)
|
| 16 |
+
return dt.strftime('%Y-%m')
|
| 17 |
+
|
| 18 |
+
def process_directory(directory):
|
| 19 |
+
try:
|
| 20 |
+
# 年月ごとの投稿を格納する辞書
|
| 21 |
+
monthly_posts = defaultdict(list)
|
| 22 |
+
|
| 23 |
+
# ディレクトリ内の全JSONファイルを処理
|
| 24 |
+
for filename in os.listdir(directory):
|
| 25 |
+
if not filename.endswith('.json') or filename.endswith('_s.json') or filename.startswith('log_short'):
|
| 26 |
+
continue
|
| 27 |
+
|
| 28 |
+
# ファイル名が数字のみで構成されているか確認
|
| 29 |
+
base_name = os.path.splitext(filename)[0]
|
| 30 |
+
if not re.match(r'^\d+$', base_name):
|
| 31 |
+
print(f"スキップ: {filename} はファイル名が数字のみではありません")
|
| 32 |
+
continue
|
| 33 |
+
|
| 34 |
+
input_file = os.path.join(directory, filename)
|
| 35 |
+
thread_no = int(base_name)
|
| 36 |
+
|
| 37 |
+
# JSONファイルを読み込む
|
| 38 |
+
with open(input_file, 'r', encoding='utf-8') as f:
|
| 39 |
+
data = json.load(f)
|
| 40 |
+
|
| 41 |
+
if "thread_array" in data:
|
| 42 |
+
for post in data["thread_array"]:
|
| 43 |
+
timestamp = post.get("timestamp")
|
| 44 |
+
if timestamp:
|
| 45 |
+
year_month = get_year_month(timestamp)
|
| 46 |
+
new_post = {
|
| 47 |
+
"thread_no": thread_no,
|
| 48 |
+
"num": post.get("num"),
|
| 49 |
+
"timestamp": timestamp,
|
| 50 |
+
"datetime": convert_timestamp_to_jst(timestamp),
|
| 51 |
+
"body": post.get("body")
|
| 52 |
+
}
|
| 53 |
+
monthly_posts[year_month].append(new_post)
|
| 54 |
+
|
| 55 |
+
# 年月ごとにJSONファイルを出力
|
| 56 |
+
for year_month, posts in monthly_posts.items():
|
| 57 |
+
output_file = os.path.join(directory, f'log_short_m_{year_month}.json')
|
| 58 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 59 |
+
json.dump({"posts": posts}, f, ensure_ascii=False, indent=2)
|
| 60 |
+
print(f"変換完了: {output_file}")
|
| 61 |
+
|
| 62 |
+
return True
|
| 63 |
+
|
| 64 |
+
except json.JSONDecodeError as e:
|
| 65 |
+
print(f"JSONパースエラー: {str(e)}")
|
| 66 |
+
return False
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"エラー: {str(e)}")
|
| 69 |
+
return False
|
| 70 |
+
|
| 71 |
+
def main():
|
| 72 |
+
# 引数が指定されていない場合はカレントディレクトリを使用
|
| 73 |
+
directory = sys.argv[1] if len(sys.argv) > 1 else "."
|
| 74 |
+
process_directory(directory)
|
| 75 |
+
|
| 76 |
+
if __name__ == "__main__":
|
| 77 |
+
main()
|