| """ |
| analyze_dates.py โ ์์ง๋ ๋ด์ค ๊ธฐ์ฌ ๋ฐํ ์ผ์ ํธ๋ ๋ ๋ถ์ ๋ฐ ์ต์ ๊ฐฑ์ ์ฃผ๊ธฐ ๋์ถ ์คํฌ๋ฆฝํธ |
| =================================================================================== |
| """ |
|
|
| import glob |
| import os |
| import platform |
|
|
| import matplotlib.pyplot as plt |
| import pandas as pd |
|
|
|
|
| def run_analysis(): |
| |
| files = glob.glob("Articles_*.xlsx") |
| if not files: |
| print("โ ๋ถ์ํ Articles_*.xlsx ํ์ผ์ด ๋ก์ปฌ ๋๋ ํ ๋ฆฌ์ ์์ต๋๋ค.") |
| return |
|
|
| print(f"๐ ๋ฐ๊ฒฌ๋ ๋ด์ค ๊ธฐ์ฌ ํ์ผ ๋ชฉ๋ก: {files}") |
|
|
| |
| dfs = [] |
| for f in files: |
| try: |
| df = pd.read_excel(f) |
| dfs.append(df) |
| except Exception as e: |
| print(f"โ ๏ธ {f} ๋ก๋ ์คํจ: {e}") |
|
|
| if not dfs: |
| print("โ ์ ํจํ ๊ธฐ์ฌ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.") |
| return |
|
|
| df_all = pd.concat(dfs, ignore_index=True) |
| df_all = df_all.drop_duplicates(subset=["url"]) |
| print(f"๐ ๋ณํฉ ์๋ฃ๋ ๊ณ ์ AI ํํ
ํฌ ๊ธฐ์ฌ ์ด๋: {len(df_all)}๊ฑด") |
|
|
| |
| df_all["published_date"] = pd.to_datetime(df_all["published_date"], errors="coerce") |
| df_all = df_all.dropna(subset=["published_date"]) |
| df_all = df_all.sort_values(by="published_date") |
|
|
| |
| df_all["date_only"] = df_all["published_date"].dt.date |
| date_counts = df_all.groupby("date_only").size().reset_index(name="count") |
|
|
| |
| print("\n" + "=" * 50) |
| print("๐
[์ผ์๋ณ AI ํํ
ํฌ ๊ธฐ์ฌ ์์ฐ ํธ๋ ๋ ํ]") |
| print("=" * 50) |
| print(date_counts.to_string(index=False)) |
| print("=" * 50) |
|
|
| |
| total_days = (date_counts["date_only"].max() - date_counts["date_only"].min()).days + 1 |
| total_articles = date_counts["count"].sum() |
| avg_daily = total_articles / max(total_days, 1) |
|
|
| print(f"โฑ๏ธ ๊ด์ธก ๊ธฐ๊ฐ: {total_days}์ผ ({date_counts['date_only'].min()} ~ {date_counts['date_only'].max()})") |
| print(f"๐ ์ผํ๊ท AI ํํ
ํฌ ๋ด์ค ์์ฐ๋: {avg_daily:.2f}๊ฑด") |
|
|
| |
| if avg_daily >= 10: |
| recommendation = "โจ ๋งค์ผ 1ํ ๊ฐฑ์ (ํ๋ฃจ ๊ธฐ์ฌ ์์ฐ๋์ด 10๊ฑด ์ด์์ผ๋ก ๋งค์ฐ ๋ง์, ์ค์๊ฐ ํธ๋ ๋ ํฌ์ฐฉ์ ์ํด ๋งค์ผ ์๋ฒฝ 1์ ์๋ํ๊ฐ ํ์์ ์
๋๋ค.)" |
| elif avg_daily >= 3: |
| recommendation = "โจ 2~3์ผ์ 1ํ ๊ฐฑ์ (๊ธฐ์ฌ๊ฐ 2~3์ผ ๋จ์๋ก ์ ๋นํ ๋ชจ์์ ๋ ๊ทธ๋ํ๋ฅผ ๋น๋ํ๋ ๊ฒ์ด API ๋น์ฉ ๋๋น ์ง์ ๋ฐ๋ ์ ๊ฐ์ฅ ํจ์จ์ ์
๋๋ค.)" |
| else: |
| recommendation = "โจ 5์ผ~1์ฃผ์ 1ํ ๊ฐฑ์ (AI ํํ
ํฌ ํ์ ๋๋ฉ์ธ ํน์ฑ์ ์ผ์ผ ๋ฐํ๋์ด 3๊ฑด ๋ฏธ๋ง์ผ๋ก ํ์ํ๋ฏ๋ก, 5์ผ ๊ฐ๊ฒฉ์ผ๋ก ๋ชฐ์์ ๊ฐฑ์ ํ๋ ๊ฒ์ด ํฉ๋ฆฌ์ ์
๋๋ค.)" |
|
|
| print("-" * 50) |
| print("๐ก [์ต์ ์ GraphRAG ์๋ํ ์ฃผ๊ธฐ ์ ์]") |
| print(f" {recommendation}") |
| print("=" * 50 + "\n") |
|
|
| |
| if platform.system() == "Darwin": |
| plt.rc("font", family="AppleGothic") |
| plt.rcParams["axes.unicode_minus"] = False |
|
|
| plt.figure(figsize=(10, 5)) |
| bars = plt.bar( |
| date_counts["date_only"].astype(str), |
| date_counts["count"], |
| color="royalblue", |
| edgecolor="black", |
| alpha=0.85, |
| ) |
|
|
| |
| for bar in bars: |
| height = bar.get_height() |
| plt.text( |
| bar.get_x() + bar.get_width() / 2.0, |
| height + 0.1, |
| f"{int(height)}๊ฑด", |
| ha="center", |
| va="bottom", |
| fontsize=10, |
| fontweight="bold", |
| ) |
|
|
| plt.title("์ผ์๋ณ AI ํํ
ํฌ ๋ด์ค ์์ฐ ํธ๋ ๋ ๋ถ์", fontsize=15, pad=15, fontweight="bold") |
| plt.xlabel("๊ธฐ์ฌ ๋ฐํ ์ผ์", fontsize=12) |
| plt.ylabel("์์ฐ ๊ฑด์", fontsize=12) |
| plt.grid(axis="y", linestyle="--", alpha=0.5) |
| plt.xticks(rotation=25) |
| plt.tight_layout() |
|
|
| |
| os.makedirs("artifacts", exist_ok=True) |
| img_path = "artifacts/daily_trend_analysis.png" |
| plt.savefig(img_path, dpi=200) |
| print(f"๐พ ์๊ฐํ ๋ถ์ ์ฐจํธ ์ ์ฅ ์๋ฃ โก๏ธ [์ ๋๊ฒฝ๋ก]: {os.path.abspath(img_path)}") |
|
|
|
|
| if __name__ == "__main__": |
| run_analysis() |
|
|