File size: 4,622 Bytes
64ad66f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ecde19
64ad66f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""
analyze_dates.py โ€” ์ˆ˜์ง‘๋œ ๋‰ด์Šค ๊ธฐ์‚ฌ ๋ฐœํ–‰ ์ผ์ž ํŠธ๋ Œ๋“œ ๋ถ„์„ ๋ฐ ์ตœ์  ๊ฐฑ์‹  ์ฃผ๊ธฐ ๋„์ถœ ์Šคํฌ๋ฆฝํŠธ
===================================================================================
"""

import glob
import os
import platform

import matplotlib.pyplot as plt
import pandas as pd


def run_analysis():
    # 1. ํ”„๋กœ์ ํŠธ ํด๋”์˜ ๋ชจ๋“  Articles_*.xlsx ๊ธฐ์‚ฌ ํŒŒ์ผ ๋กœ๋“œ
    files = glob.glob("Articles_*.xlsx")
    if not files:
        print("โŒ ๋ถ„์„ํ•  Articles_*.xlsx ํŒŒ์ผ์ด ๋กœ์ปฌ ๋””๋ ‰ํ† ๋ฆฌ์— ์—†์Šต๋‹ˆ๋‹ค.")
        return

    print(f"๐Ÿ“‚ ๋ฐœ๊ฒฌ๋œ ๋‰ด์Šค ๊ธฐ์‚ฌ ํŒŒ์ผ ๋ชฉ๋ก: {files}")

    # 2. ๋ฐ์ดํ„ฐ ๋ณ‘ํ•ฉ ๋ฐ ์ค‘๋ณต ์ œ๊ฑฐ
    dfs = []
    for f in files:
        try:
            df = pd.read_excel(f)
            dfs.append(df)
        except Exception as e:
            print(f"โš ๏ธ {f} ๋กœ๋“œ ์‹คํŒจ: {e}")

    if not dfs:
        print("โŒ ์œ ํšจํ•œ ๊ธฐ์‚ฌ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
        return

    df_all = pd.concat(dfs, ignore_index=True)
    df_all = df_all.drop_duplicates(subset=["url"])  # ๋™์ผ ๊ธฐ์‚ฌ ์ค‘๋ณต ์ œ๊ฑฐ
    print(f"๐Ÿ“Š ๋ณ‘ํ•ฉ ์™„๋ฃŒ๋œ ๊ณ ์œ  AI ํ•€ํ…Œํฌ ๊ธฐ์‚ฌ ์ด๋Ÿ‰: {len(df_all)}๊ฑด")

    # 3. ๋‚ ์งœ ํŒŒ์‹ฑ ๋ฐ ์ •๋ ฌ (๋‚ ์งœ ํฌ๋งท ํ‘œ์ค€ํ™”)
    df_all["published_date"] = pd.to_datetime(df_all["published_date"], errors="coerce")
    df_all = df_all.dropna(subset=["published_date"])
    df_all = df_all.sort_values(by="published_date")

    # ์ผ์ž๋งŒ ์ถ”์ถœํ•˜์—ฌ ์ง‘๊ณ„
    df_all["date_only"] = df_all["published_date"].dt.date
    date_counts = df_all.groupby("date_only").size().reset_index(name="count")

    # 4. ๋ถ„์„ํ‘œ ํ„ฐ๋ฏธ๋„ ์ถœ๋ ฅ
    print("\n" + "=" * 50)
    print("๐Ÿ“… [์ผ์ž๋ณ„ AI ํ•€ํ…Œํฌ ๊ธฐ์‚ฌ ์ƒ์‚ฐ ํŠธ๋ Œ๋“œ ํ‘œ]")
    print("=" * 50)
    print(date_counts.to_string(index=False))
    print("=" * 50)

    # 5. ์ˆ˜ํ•™์  ๋ถ„์„ ๋ฐ ๊ถŒ์žฅ ์ฃผ๊ธฐ ์ถ”์ฒœ
    total_days = (date_counts["date_only"].max() - date_counts["date_only"].min()).days + 1
    total_articles = date_counts["count"].sum()
    avg_daily = total_articles / max(total_days, 1)

    print(f"โฑ๏ธ  ๊ด€์ธก ๊ธฐ๊ฐ„: {total_days}์ผ ({date_counts['date_only'].min()} ~ {date_counts['date_only'].max()})")
    print(f"๐Ÿ“ˆ ์ผํ‰๊ท  AI ํ•€ํ…Œํฌ ๋‰ด์Šค ์ƒ์‚ฐ๋Ÿ‰: {avg_daily:.2f}๊ฑด")

    # ์ผํ‰๊ท  ๋ณผ๋ฅจ์— ๋”ฐ๋ฅธ ์ตœ์ ํ™” ์ž๋™ํ™” ์ฃผ๊ธฐ ์ถ”์ฒœ ์•Œ๊ณ ๋ฆฌ์ฆ˜
    if avg_daily >= 10:
        recommendation = "โœจ ๋งค์ผ 1ํšŒ ๊ฐฑ์‹  (ํ•˜๋ฃจ ๊ธฐ์‚ฌ ์ƒ์‚ฐ๋Ÿ‰์ด 10๊ฑด ์ด์ƒ์œผ๋กœ ๋งค์šฐ ๋งŽ์•„, ์‹ค์‹œ๊ฐ„ ํŠธ๋ Œ๋“œ ํฌ์ฐฉ์„ ์œ„ํ•ด ๋งค์ผ ์ƒˆ๋ฒฝ 1์‹œ ์ž๋™ํ™”๊ฐ€ ํ•„์ˆ˜์ ์ž…๋‹ˆ๋‹ค.)"
    elif avg_daily >= 3:
        recommendation = "โœจ 2~3์ผ์— 1ํšŒ ๊ฐฑ์‹  (๊ธฐ์‚ฌ๊ฐ€ 2~3์ผ ๋‹จ์œ„๋กœ ์ ๋‹นํžˆ ๋ชจ์˜€์„ ๋•Œ ๊ทธ๋ž˜ํ”„๋ฅผ ๋นŒ๋“œํ•˜๋Š” ๊ฒƒ์ด API ๋น„์šฉ ๋Œ€๋น„ ์ง€์‹ ๋ฐ€๋„ ์ƒ ๊ฐ€์žฅ ํšจ์œจ์ ์ž…๋‹ˆ๋‹ค.)"
    else:
        recommendation = "โœจ 5์ผ~1์ฃผ์— 1ํšŒ ๊ฐฑ์‹  (AI ํ•€ํ…Œํฌ ํ‹ˆ์ƒˆ ๋„๋ฉ”์ธ ํŠน์„ฑ์ƒ ์ผ์ผ ๋ฐœํ–‰๋Ÿ‰์ด 3๊ฑด ๋ฏธ๋งŒ์œผ๋กœ ํ˜‘์†Œํ•˜๋ฏ€๋กœ, 5์ผ ๊ฐ„๊ฒฉ์œผ๋กœ ๋ชฐ์•„์„œ ๊ฐฑ์‹ ํ•˜๋Š” ๊ฒƒ์ด ํ•ฉ๋ฆฌ์ ์ž…๋‹ˆ๋‹ค.)"

    print("-" * 50)
    print("๐Ÿ’ก [์ตœ์ ์˜ GraphRAG ์ž๋™ํ™” ์ฃผ๊ธฐ ์ œ์•ˆ]")
    print(f"   {recommendation}")
    print("=" * 50 + "\n")

    # 6. ์ฐจํŠธ ์‹œ๊ฐํ™” ๋ฐ ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ €์žฅ
    if platform.system() == "Darwin":
        plt.rc("font", family="AppleGothic")  # Mac ํ•œ๊ธ€ ํฐํŠธ ๊นจ์ง ๋ฐฉ์ง€
    plt.rcParams["axes.unicode_minus"] = False

    plt.figure(figsize=(10, 5))
    bars = plt.bar(
        date_counts["date_only"].astype(str),
        date_counts["count"],
        color="royalblue",
        edgecolor="black",
        alpha=0.85,
    )

    # ๋ง‰๋Œ€ ์œ„์— ์ˆซ์ž ํ‘œ์‹œ
    for bar in bars:
        height = bar.get_height()
        plt.text(
            bar.get_x() + bar.get_width() / 2.0,
            height + 0.1,
            f"{int(height)}๊ฑด",
            ha="center",
            va="bottom",
            fontsize=10,
            fontweight="bold",
        )

    plt.title("์ผ์ž๋ณ„ AI ํ•€ํ…Œํฌ ๋‰ด์Šค ์ƒ์‚ฐ ํŠธ๋ Œ๋“œ ๋ถ„์„", fontsize=15, pad=15, fontweight="bold")
    plt.xlabel("๊ธฐ์‚ฌ ๋ฐœํ–‰ ์ผ์ž", fontsize=12)
    plt.ylabel("์ƒ์‚ฐ ๊ฑด์ˆ˜", fontsize=12)
    plt.grid(axis="y", linestyle="--", alpha=0.5)
    plt.xticks(rotation=25)
    plt.tight_layout()

    # artifacts ํด๋” ์•„๋ž˜์— ๋ถ„์„ ๊ฒฐ๊ณผ๋ฌผ ์ฐจํŠธ ์ €์žฅ
    os.makedirs("artifacts", exist_ok=True)
    img_path = "artifacts/daily_trend_analysis.png"
    plt.savefig(img_path, dpi=200)
    print(f"๐Ÿ’พ ์‹œ๊ฐํ™” ๋ถ„์„ ์ฐจํŠธ ์ €์žฅ ์™„๋ฃŒ โžก๏ธ [์ ˆ๋Œ€๊ฒฝ๋กœ]: {os.path.abspath(img_path)}")


if __name__ == "__main__":
    run_analysis()