Spaces:
Sleeping
Sleeping
| """ | |
| Excel νμΌμ CSV νμμΌλ‘ λ³ννλ μ€ν¬λ¦½νΈ | |
| """ | |
| import pandas as pd | |
| import os | |
| from datetime import datetime | |
| # λλ ν 리 μ€μ | |
| data_dir = r'c:\Users\korea\Desktop\dacon_broadcast_paper\data' | |
| output_dir = r'c:\Users\korea\Desktop\dacon_broadcast_paper\data_csv' | |
| # μΆλ ₯ λλ ν 리 μμ± | |
| if not os.path.exists(output_dir): | |
| os.makedirs(output_dir) | |
| print(f"CSV μΆλ ₯ λλ ν 리 μμ±: {output_dir}") | |
| # λ³νν νμΌ λͺ©λ‘ | |
| files = [ | |
| 'article_metrics_monthly.xlsx', | |
| 'contents.xlsx', | |
| 'demographics_part001.xlsx', | |
| 'demographics_part002.xlsx', | |
| 'referrer.xlsx' | |
| ] | |
| print("=" * 80) | |
| print("Excel β CSV λ³ν μμ") | |
| print("=" * 80) | |
| for file in files: | |
| start_time = datetime.now() | |
| file_path = os.path.join(data_dir, file) | |
| csv_filename = file.replace('.xlsx', '.csv') | |
| csv_path = os.path.join(output_dir, csv_filename) | |
| print(f"\n[μ²λ¦¬ μ€] {file}") | |
| try: | |
| # Excel νμΌ μ½κΈ° | |
| df = pd.read_excel(file_path) | |
| # CSVλ‘ μ μ₯ (UTF-8 with BOM for Excel compatibility) | |
| df.to_csv(csv_path, index=False, encoding='utf-8-sig') | |
| # μ²λ¦¬ μκ° κ³μ° | |
| elapsed = (datetime.now() - start_time).total_seconds() | |
| # κ²°κ³Ό μΆλ ₯ | |
| print(f" β μλ£: {csv_filename}") | |
| print(f" - ν κ°μ: {len(df):,}") | |
| print(f" - μ΄ κ°μ: {len(df.columns)}") | |
| print(f" - μ²λ¦¬ μκ°: {elapsed:.2f}μ΄") | |
| print(f" - μ μ₯ κ²½λ‘: {csv_path}") | |
| except Exception as e: | |
| print(f" β μ€λ₯ λ°μ: {str(e)}") | |
| # demographics νμΌ λ³ν© (μ νμ¬ν) | |
| print("\n" + "=" * 80) | |
| print("[μΆκ° μμ ] demographics νμΌ λ³ν©") | |
| print("=" * 80) | |
| try: | |
| demo_part1 = pd.read_csv(os.path.join(output_dir, 'demographics_part001.csv')) | |
| demo_part2 = pd.read_csv(os.path.join(output_dir, 'demographics_part002.csv')) | |
| # λ νμΌ λ³ν© | |
| demographics_merged = pd.concat([demo_part1, demo_part2], ignore_index=True) | |
| # λ³ν©λ νμΌ μ μ₯ | |
| merged_path = os.path.join(output_dir, 'demographics_merged.csv') | |
| demographics_merged.to_csv(merged_path, index=False, encoding='utf-8-sig') | |
| print(f"β demographics λ³ν© μλ£") | |
| print(f" - μ΄ ν κ°μ: {len(demographics_merged):,}") | |
| print(f" - μ μ₯ κ²½λ‘: {merged_path}") | |
| except Exception as e: | |
| print(f"β λ³ν© μ€ μ€λ₯ λ°μ: {str(e)}") | |
| print("\n" + "=" * 80) | |
| print("λͺ¨λ λ³ν μμ μλ£!") | |
| print("=" * 80) | |