Spaces:
No application file
No application file
| import json | |
| import os | |
| import sys | |
| import re | |
| from datetime import datetime, timezone, timedelta | |
| from collections import defaultdict | |
| def convert_timestamp_to_jst(timestamp): | |
| jst = timezone(timedelta(hours=9)) | |
| dt = datetime.fromtimestamp(timestamp / 1000, jst) | |
| return dt.strftime('%Y-%m-%d %H:%M:%S JST') | |
| def get_year_month_day(timestamp): | |
| jst = timezone(timedelta(hours=9)) | |
| dt = datetime.fromtimestamp(timestamp / 1000, jst) | |
| return dt.strftime('%Y-%m'), dt.day | |
| def process_directory(directory): | |
| try: | |
| # 年月と前半/後半で投稿を分類する辞書を初期化 | |
| monthly_posts = defaultdict(lambda: {"first_half": [], "second_half": []}) | |
| # ディレクトリ内の全JSONファイルを処理 | |
| for filename in os.listdir(directory): | |
| if not filename.endswith('.json') or filename.endswith('_s.json') or filename.startswith('log_short'): | |
| continue | |
| # ファイル名が数字のみで構成されているか確認 | |
| base_name = os.path.splitext(filename)[0] | |
| if not re.match(r'^\d+$', base_name): | |
| print(f"スキップ: {filename} はファイル名が数字のみではありません") | |
| continue | |
| input_file = os.path.join(directory, filename) | |
| thread_no = int(base_name) | |
| # JSONファイルを読み込む | |
| with open(input_file, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| if "thread_array" in data: | |
| for post in data["thread_array"]: | |
| timestamp = post.get("timestamp") | |
| if timestamp: | |
| year_month, day = get_year_month_day(timestamp) | |
| new_post = { | |
| "thread_no": thread_no, | |
| "num": post.get("num"), | |
| "timestamp": timestamp, | |
| "datetime": convert_timestamp_to_jst(timestamp), | |
| "body": post.get("body") | |
| } | |
| # 日付で前半・後半に分類 | |
| if day <= 15: | |
| monthly_posts[year_month]["first_half"].append(new_post) | |
| else: | |
| monthly_posts[year_month]["second_half"].append(new_post) | |
| # 年月ごとにJSONファイルを出力 | |
| for year_month, half_posts in monthly_posts.items(): | |
| # 前半(1-15日)の出力 | |
| if half_posts["first_half"]: | |
| output_file = os.path.join(directory, f'log_short_hm_{year_month}_1.json') | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| json.dump({"posts": half_posts["first_half"]}, f, ensure_ascii=False, indent=2) | |
| print(f"変換完了: {output_file}") | |
| # 後半(16-31日)の出力 | |
| if half_posts["second_half"]: | |
| output_file = os.path.join(directory, f'log_short_hm_{year_month}_16.json') | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| json.dump({"posts": half_posts["second_half"]}, f, ensure_ascii=False, indent=2) | |
| print(f"変換完了: {output_file}") | |
| return True | |
| except json.JSONDecodeError as e: | |
| print(f"JSONパースエラー: {str(e)}") | |
| return False | |
| except Exception as e: | |
| print(f"エラー: {str(e)}") | |
| return False | |
| def main(): | |
| # 引数が指定されていない場合はカレントディレクトリを使用 | |
| directory = sys.argv[1] if len(sys.argv) > 1 else "." | |
| process_directory(directory) | |
| if __name__ == "__main__": | |
| main() | |