Spaces:
Sleeping
Sleeping
File size: 2,537 Bytes
d4a3b8b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | """
Excel νμΌμ CSV νμμΌλ‘ λ³ννλ μ€ν¬λ¦½νΈ
"""
import pandas as pd
import os
from datetime import datetime
# λλ ν 리 μ€μ
data_dir = r'c:\Users\korea\Desktop\dacon_broadcast_paper\data'
output_dir = r'c:\Users\korea\Desktop\dacon_broadcast_paper\data_csv'
# μΆλ ₯ λλ ν 리 μμ±
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print(f"CSV μΆλ ₯ λλ ν 리 μμ±: {output_dir}")
# λ³νν νμΌ λͺ©λ‘
files = [
'article_metrics_monthly.xlsx',
'contents.xlsx',
'demographics_part001.xlsx',
'demographics_part002.xlsx',
'referrer.xlsx'
]
print("=" * 80)
print("Excel β CSV λ³ν μμ")
print("=" * 80)
for file in files:
start_time = datetime.now()
file_path = os.path.join(data_dir, file)
csv_filename = file.replace('.xlsx', '.csv')
csv_path = os.path.join(output_dir, csv_filename)
print(f"\n[μ²λ¦¬ μ€] {file}")
try:
# Excel νμΌ μ½κΈ°
df = pd.read_excel(file_path)
# CSVλ‘ μ μ₯ (UTF-8 with BOM for Excel compatibility)
df.to_csv(csv_path, index=False, encoding='utf-8-sig')
# μ²λ¦¬ μκ° κ³μ°
elapsed = (datetime.now() - start_time).total_seconds()
# κ²°κ³Ό μΆλ ₯
print(f" β μλ£: {csv_filename}")
print(f" - ν κ°μ: {len(df):,}")
print(f" - μ΄ κ°μ: {len(df.columns)}")
print(f" - μ²λ¦¬ μκ°: {elapsed:.2f}μ΄")
print(f" - μ μ₯ κ²½λ‘: {csv_path}")
except Exception as e:
print(f" β μ€λ₯ λ°μ: {str(e)}")
# demographics νμΌ λ³ν© (μ νμ¬ν)
print("\n" + "=" * 80)
print("[μΆκ° μμ
] demographics νμΌ λ³ν©")
print("=" * 80)
try:
demo_part1 = pd.read_csv(os.path.join(output_dir, 'demographics_part001.csv'))
demo_part2 = pd.read_csv(os.path.join(output_dir, 'demographics_part002.csv'))
# λ νμΌ λ³ν©
demographics_merged = pd.concat([demo_part1, demo_part2], ignore_index=True)
# λ³ν©λ νμΌ μ μ₯
merged_path = os.path.join(output_dir, 'demographics_merged.csv')
demographics_merged.to_csv(merged_path, index=False, encoding='utf-8-sig')
print(f"β demographics λ³ν© μλ£")
print(f" - μ΄ ν κ°μ: {len(demographics_merged):,}")
print(f" - μ μ₯ κ²½λ‘: {merged_path}")
except Exception as e:
print(f"β λ³ν© μ€ μ€λ₯ λ°μ: {str(e)}")
print("\n" + "=" * 80)
print("λͺ¨λ λ³ν μμ
μλ£!")
print("=" * 80)
|