JHyeok5 commited on
Commit
d2836d9
ยท
verified ยท
1 Parent(s): a8ce434

Upload folder using huggingface_hub

Browse files
requirements-trend.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ httpx>=0.27.0
2
+ beautifulsoup4>=4.12.0
3
+ lxml>=5.0.0
4
+ requests>=2.31.0
5
+ google-api-python-client>=2.100.0
6
+ apify-client>=1.6.0
7
+ supabase>=2.0.0
8
+ python-dotenv>=1.0.0
scripts/run_trend_engine.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RE:Play Trend Engine v3 โ€” ์ฃผ๊ฐ„ ๋ฐฐ์น˜ ์˜ค์ผ€์ŠคํŠธ๋ ˆ์ดํ„ฐ
3
+
4
+ ์ˆœ์ฐจ ์‹คํ–‰ ํŒŒ์ดํ”„๋ผ์ธ:
5
+ 1. ์นด์นด์˜ค๋งต ๊ทธ๋ฆฌ๋“œ ์Šค์บ” + ๋ฆฌ๋ทฐ ํŒŒ์‹ฑ (trend_spots ๋งˆ์Šคํ„ฐ ์ƒ์„ฑ)
6
+ 2. SpotMatcher ์ดˆ๊ธฐํ™” (trend_spots + story_spots ์‚ฌ์ „ ๋กœ๋“œ)
7
+ 3. ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์ˆ˜์ง‘ (URL ํ™•๋ณด + ํฌ๋กค๋ง + DB ์ €์žฅ)
8
+ 4. ๋ธ”๋กœ๊ทธ ๋ณธ๋ฌธ โ†’ ์žฅ์†Œ๋ช… ์ถ”์ถœ + mention_count ์ง‘๊ณ„
9
+ 5. ์œ ํŠœ๋ธŒ API (SpotMatcher ์—ฐ๋™)
10
+ 6. ์ธ์Šคํƒ€๊ทธ๋žจ Apify (SpotMatcher ์—ฐ๋™)
11
+ 7. ์ข…ํ•ฉ ์Šค์ฝ”์–ด ๊ณ„์‚ฐ + ๋žญํ‚น ์ƒ์„ฑ
12
+
13
+ Usage:
14
+ python backend/scripts/run_trend_engine.py
15
+ """
16
+
17
+ import asyncio
18
+ import json
19
+ import logging
20
+ import os
21
+ import re
22
+ import sys
23
+ import time
24
+ from datetime import date, timedelta
25
+
26
+ # backend/ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ import path์— ์ถ”๊ฐ€
27
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
28
+
29
+ # ๋กœ์ปฌ ์‹คํ–‰ ์‹œ .env ํŒŒ์ผ ๋กœ๋“œ
30
+ try:
31
+ from dotenv import load_dotenv
32
+ # ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ์˜ .env ํŒŒ์ผ ๋กœ๋“œ
33
+ env_path = os.path.join(os.path.dirname(__file__), "..", "..", ".env")
34
+ load_dotenv(env_path)
35
+ except ImportError:
36
+ pass # GitHub Actions ๋“ฑ dotenv ์—†๋Š” ํ™˜๊ฒฝ
37
+
38
+ from supabase import create_client
39
+
40
+ from trend_engine.collectors.naver_blog import NaverBlogCollector
41
+ from trend_engine.collectors.kakaomap import KakaoMapCollector
42
+ from trend_engine.collectors.youtube import YouTubeCollector
43
+ from trend_engine.collectors.instagram import InstagramCollector
44
+ from trend_engine.spot_matcher import SpotMatcher
45
+ from trend_engine.trend_scorer import generate_weekly_ranking
46
+ from trend_engine.place_extractor import PlaceNameExtractor
47
+
48
+ logging.basicConfig(
49
+ level=logging.INFO,
50
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
51
+ datefmt="%Y-%m-%d %H:%M:%S",
52
+ )
53
+ logger = logging.getLogger("trend_engine.orchestrator")
54
+
55
+
56
+ def _get_supabase_client():
57
+ url = os.environ.get("SUPABASE_URL") or os.environ.get("VITE_SUPABASE_URL")
58
+ key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_SERVICE_KEY")
59
+ if not url or not key:
60
+ raise ValueError("SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY must be set")
61
+ return create_client(url, key)
62
+
63
+
64
+ def run_step(name: str, func, results: dict):
65
+ """๋‹จ์ผ ํŒŒ์ดํ”„๋ผ์ธ ๋‹จ๊ณ„๋ฅผ ์‹คํ–‰ํ•˜๊ณ  ๊ฒฐ๊ณผ๋ฅผ ๊ธฐ๋กํ•œ๋‹ค. ๋ฐ˜ํ™˜๊ฐ’์„ ๋Œ๋ ค์ค€๋‹ค."""
66
+ logger.info("โ”โ”โ” [START] %s โ”โ”โ”", name)
67
+ start = time.time()
68
+ try:
69
+ result = func()
70
+ elapsed = time.time() - start
71
+ results[name] = {"status": "ok", "result": _summarize(result), "elapsed_sec": round(elapsed, 1)}
72
+ logger.info("โœ“ [DONE] %s โ€” %.1f์ดˆ", name, elapsed)
73
+ return result
74
+ except Exception as e:
75
+ elapsed = time.time() - start
76
+ results[name] = {"status": "error", "error": str(e), "elapsed_sec": round(elapsed, 1)}
77
+ logger.error("โœ— [FAIL] %s โ€” %s (%.1f์ดˆ)", name, e, elapsed)
78
+ return None
79
+
80
+
81
+ def run_async_step(name: str, coro, results: dict):
82
+ """asyncio ์ฝ”๋ฃจํ‹ด์„ ์‹คํ–‰ํ•˜๋Š” run_step ๋ณ€ํ˜•."""
83
+ def wrapper():
84
+ return asyncio.run(coro)
85
+ return run_step(name, wrapper, results)
86
+
87
+
88
+ def _summarize(result) -> str:
89
+ """๊ฒฐ๊ณผ๋ฅผ ๋กœ๊ทธ์šฉ ์š”์•ฝ ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜."""
90
+ if isinstance(result, dict):
91
+ return json.dumps(result, ensure_ascii=False, default=str)[:200]
92
+ return str(result)[:200]
93
+
94
+
95
+ def main() -> None:
96
+ total_start = time.time()
97
+ results: dict = {}
98
+
99
+ sb = _get_supabase_client()
100
+
101
+ # โ”€โ”€ 1. ์นด์นด์˜ค๋งต ๊ทธ๋ฆฌ๋“œ ์Šค์บ” (๋จผ์ € ์‹คํ–‰ โ†’ trend_spots ๋งˆ์Šคํ„ฐ ์ƒ์„ฑ) โ”€โ”€
102
+ kakao = KakaoMapCollector(sb)
103
+ run_async_step("1_kakaomap", kakao.run(), results)
104
+
105
+ # โ”€โ”€ 2. SpotMatcher ์ดˆ๊ธฐํ™” (trend_spots + story_spots ์‚ฌ์ „ ๋กœ๋“œ) โ”€โ”€
106
+ matcher = SpotMatcher(sb)
107
+ extractor = PlaceNameExtractor(sb)
108
+ logger.info(
109
+ "SpotMatcher ์ค€๋น„ ์™„๋ฃŒ โ€” trend_spots %d๊ฑด, story_spots %d๊ฑด",
110
+ len(matcher.trend_spots),
111
+ len(matcher.story_spots),
112
+ )
113
+
114
+ # โ”€โ”€ 3. ์œ ํŠœ๋ธŒ API (SpotMatcher ์—ฐ๋™) โ”€โ”€
115
+ youtube = YouTubeCollector(sb, spot_matcher=matcher)
116
+ run_step("3_youtube", youtube.run, results)
117
+
118
+ # โ”€โ”€ 4. ์ธ์Šคํƒ€๊ทธ๋žจ Apify (SpotMatcher ์—ฐ๋™) โ”€โ”€
119
+ instagram = InstagramCollector(sb, spot_matcher=matcher)
120
+ run_step("4_instagram", instagram.run, results)
121
+
122
+ # โ”€โ”€ 5. ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค โ€” ๋น„ํ™œ์„ฑ (Place ID ๋งค์นญ ๋ถˆ๊ฐ€) โ”€โ”€
123
+ logger.info("๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค: ๋น„ํ™œ์„ฑ (Place ID ๋งค์นญ ๋ถˆ๊ฐ€, 2026-02)")
124
+ results["5_naver_place"] = {
125
+ "status": "skipped",
126
+ "reason": "Place ID matching unavailable",
127
+ "elapsed_sec": 0,
128
+ }
129
+
130
+ # โ”€โ”€ 6. ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์ˆ˜์ง‘ (URL + ๋ณ‘๋ ฌ ํฌ๋กค๋ง + ์ €์žฅ) โ”€โ”€
131
+ blog = NaverBlogCollector(sb)
132
+ run_step("6_naver_blog", blog.run, results)
133
+
134
+ # โ”€โ”€ 7. ๋ธ”๋กœ๊ทธ ๋ณธ๋ฌธ โ†’ ์žฅ์†Œ๋ช… ์ถ”์ถœ + mention_count ์ง‘๊ณ„ โ”€โ”€
135
+ def extract_blog_places():
136
+ """๋ธ”๋กœ๊ทธ ํฌ์ŠคํŠธ์—์„œ ์žฅ์†Œ๋ช… ์ถ”์ถœ โ†’ mention_count ์ง‘๊ณ„ โ†’ spot_trends ์ €์žฅ."""
137
+ today = date.today()
138
+ period_start = today - timedelta(days=7)
139
+
140
+ # spot_trends์—์„œ naver_blog + __pending__ ๋ ˆ์ฝ”๋“œ ์กฐํšŒ (ํŽ˜์ด์ง€๋„ค์ด์…˜)
141
+ records = []
142
+ page_size = 1000
143
+ offset = 0
144
+ try:
145
+ while True:
146
+ batch = (
147
+ sb.table("spot_trends")
148
+ .select("id, raw_data")
149
+ .eq("source", "naver_blog")
150
+ .eq("spot_id", "__pending__")
151
+ .range(offset, offset + page_size - 1)
152
+ .execute()
153
+ )
154
+ rows = batch.data or []
155
+ records.extend(rows)
156
+ if len(rows) < page_size:
157
+ break
158
+ offset += page_size
159
+ except Exception as e:
160
+ logger.warning("๋ธ”๋กœ๊ทธ pending ๋ ˆ์ฝ”๋“œ ์กฐํšŒ ์‹คํŒจ: %s", e)
161
+ return {"error": str(e)}
162
+
163
+ logger.info("๋ธ”๋กœ๊ทธ pending ๋ ˆ์ฝ”๋“œ: %d๊ฑด ์กฐํšŒ", len(records))
164
+ if not records:
165
+ return {"pending_records": 0, "places_found": 0}
166
+
167
+ # ์žฅ์†Œ๋ณ„ ์–ธ๊ธ‰ ํšŸ์ˆ˜ ์ง‘๊ณ„
168
+ place_mentions: dict[str, int] = {}
169
+
170
+ for record in records:
171
+ raw = record.get("raw_data", {})
172
+ content = raw.get("content_preview", "")
173
+ title = raw.get("title", "")
174
+ text = f"{title} {content}"
175
+ text = re.sub(r"<[^>]+>", "", text) # HTML ํƒœ๊ทธ ์ œ๊ฑฐ
176
+
177
+ places = extractor.extract(text)
178
+ for place in places:
179
+ matched_id = matcher.match(place["name"])
180
+ if matched_id:
181
+ place_mentions[matched_id] = place_mentions.get(matched_id, 0) + 1
182
+
183
+ # ์ง‘๊ณ„ ๊ฒฐ๊ณผ๋ฅผ spot_trends์— ์ €์žฅ (์žฅ์†Œ๋ณ„ mention_count)
184
+ saved = 0
185
+ for spot_id, count in place_mentions.items():
186
+ try:
187
+ sb.table("spot_trends").insert({
188
+ "spot_id": spot_id,
189
+ "source": "naver_blog",
190
+ "metric_type": "mention_count",
191
+ "metric_value": count,
192
+ "period_start": period_start.isoformat(),
193
+ "period_end": today.isoformat(),
194
+ "raw_data": {"aggregated_from": "blog_post_extraction"},
195
+ }).execute()
196
+ saved += 1
197
+ except Exception as e:
198
+ logger.warning("mention_count ์ €์žฅ ์‹คํŒจ (%s): %s", spot_id, e)
199
+
200
+ return {
201
+ "pending_records": len(records),
202
+ "places_found": len(place_mentions),
203
+ "mention_records_saved": saved,
204
+ }
205
+
206
+ run_step("7_blog_place_extraction", extract_blog_places, results)
207
+
208
+ # โ”€โ”€ 8. ์ข…ํ•ฉ ์Šค์ฝ”์–ด ๊ณ„์‚ฐ + ๋žญํ‚น ์ƒ์„ฑ โ”€โ”€
209
+ def calc_scores():
210
+ return generate_weekly_ranking(sb)
211
+
212
+ run_step("8_score_calculation", calc_scores, results)
213
+
214
+ # โ”€โ”€ ๊ฒฐ๊ณผ ์š”์•ฝ โ”€โ”€
215
+ total_elapsed = time.time() - total_start
216
+ ok_count = sum(1 for r in results.values() if r.get("status") == "ok")
217
+ err_count = sum(1 for r in results.values() if r.get("status") == "error")
218
+ skip_count = sum(1 for r in results.values() if r.get("status") == "skipped")
219
+
220
+ summary = {
221
+ "total_steps": len(results),
222
+ "succeeded": ok_count,
223
+ "failed": err_count,
224
+ "skipped": skip_count,
225
+ "total_elapsed_sec": round(total_elapsed, 1),
226
+ "steps": {
227
+ k: {"status": v.get("status"), "elapsed_sec": v.get("elapsed_sec", 0)}
228
+ for k, v in results.items()
229
+ },
230
+ }
231
+
232
+ logger.info("โ”โ”โ” TREND ENGINE COMPLETE โ”โ”โ”")
233
+ logger.info(
234
+ "์„ฑ๊ณต: %d / ์‹คํŒจ: %d / ์Šคํ‚ต: %d / ์ด ์†Œ์š”: %.1f์ดˆ",
235
+ ok_count, err_count, skip_count, total_elapsed,
236
+ )
237
+
238
+ # JSON ์š”์•ฝ ์ถœ๋ ฅ (GitHub Actions ๋กœ๊ทธ์šฉ)
239
+ print(json.dumps(summary, ensure_ascii=False, indent=2))
240
+
241
+ # ์ „์ฒด ์‹คํŒจ ์‹œ์—๋งŒ ๋น„์ •์ƒ ์ข…๋ฃŒ
242
+ if ok_count == 0:
243
+ logger.error("๋ชจ๋“  ๋‹จ๊ณ„๊ฐ€ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
244
+ sys.exit(1)
245
+
246
+
247
+ if __name__ == "__main__":
248
+ main()
trend_engine/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """RE:Play Trend Engine v3 โ€” ํŠธ๋ Œ๋“œ ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘ ํŒŒ์ดํ”„๋ผ์ธ"""
trend_engine/collectors/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Trend Engine data collectors โ€” ์ฑ„๋„๋ณ„ ์ˆ˜์ง‘๊ธฐ"""
2
+
3
+ from .naver_blog import NaverBlogCollector
4
+ from .naver_place import NaverPlaceCollector
5
+ from .kakaomap import KakaoMapCollector
6
+ from .youtube import YouTubeCollector
7
+ from .instagram import InstagramCollector
8
+
9
+ __all__ = [
10
+ "NaverBlogCollector",
11
+ "NaverPlaceCollector",
12
+ "KakaoMapCollector",
13
+ "YouTubeCollector",
14
+ "InstagramCollector",
15
+ ]
trend_engine/collectors/instagram.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Instagram Collector โ€” Apify SaaS (Instagram Hashtag Scraper)
3
+
4
+ ์ˆ˜์ง‘ ๋ฐฉ์‹:
5
+ 1. Apify์˜ instagram-hashtag-scraper Actor๋กœ ํ•ด์‹œํƒœ๊ทธ๋ณ„ ๊ฒŒ์‹œ๋ฌผ ์ˆ˜์ง‘
6
+ 2. ์œ„์น˜๋ณ„ ํ‰๊ท  ์ฐธ์—ฌ๋„(์ข‹์•„์š”+๋Œ“๊ธ€) ์ง‘๊ณ„
7
+ 3. spot_trends ํ…Œ์ด๋ธ”์— ์ €์žฅ
8
+ """
9
+
10
+ import os
11
+ import logging
12
+ from datetime import date
13
+
14
+ from apify_client import ApifyClient
15
+
16
+ from trend_engine.place_extractor import PlaceNameExtractor
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
21
+ # ์„ค์ •
22
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
23
+
24
+ INSTAGRAM_HASHTAGS = [
25
+ "์• ์›”์นดํŽ˜",
26
+ "์• ์›”๋ง›์ง‘",
27
+ "์• ์›”๊ฐ€๋ณผ๋งŒํ•œ๊ณณ",
28
+ "์• ์›”ํ•ด์•ˆ",
29
+ "์• ์›”์—ฌํ–‰",
30
+ "์ œ์ฃผ์• ์›”",
31
+ "์• ์›”๊ฐ์„ฑ",
32
+ "ํ•œ๋‹ดํ•ด์•ˆ",
33
+ "๊ณฝ์ง€ํ•ด๋ณ€",
34
+ "์• ์›”ํ•ซํ”Œ",
35
+ "์• ์›”๋””์ €ํŠธ",
36
+ ]
37
+
38
+ RESULTS_LIMIT_PER_HASHTAG = 50
39
+
40
+ ACTOR_ID = "apify/instagram-hashtag-scraper"
41
+
42
+
43
+ class InstagramCollector:
44
+ """Apify Instagram Hashtag Scraper ๊ธฐ๋ฐ˜ ์ˆ˜์ง‘๊ธฐ."""
45
+
46
+ def __init__(self, supabase_client, spot_matcher=None):
47
+ self.supabase = supabase_client
48
+ self.apify = ApifyClient(os.environ["APIFY_API_TOKEN"])
49
+ self.spot_matcher = spot_matcher
50
+ self.extractor = PlaceNameExtractor(supabase_client)
51
+
52
+ # ------------------------------------------------------------------
53
+ # ํ•ด์‹œํƒœ๊ทธ ๊ฒŒ์‹œ๋ฌผ ์ˆ˜์ง‘
54
+ # ------------------------------------------------------------------
55
+
56
+ def collect_hashtag_posts(self) -> list[dict]:
57
+ """
58
+ Apify Actor๋ฅผ ์‹คํ–‰ํ•˜์—ฌ ํ•ด์‹œํƒœ๊ทธ๋ณ„ ๊ฒŒ์‹œ๋ฌผ์„ ์ˆ˜์ง‘ํ•œ๋‹ค.
59
+
60
+ Returns:
61
+ [{hashtag, location_name, likes_count, comments_count, caption, timestamp}, ...]
62
+ """
63
+ run_input = {
64
+ "hashtags": INSTAGRAM_HASHTAGS,
65
+ "resultsLimit": RESULTS_LIMIT_PER_HASHTAG,
66
+ }
67
+
68
+ logger.info(
69
+ "Apify Actor ์‹คํ–‰ ์‹œ์ž‘: %d๊ฐœ ํ•ด์‹œํƒœ๊ทธ, ํ•ด์‹œํƒœ๊ทธ๋‹น %d๊ฑด",
70
+ len(INSTAGRAM_HASHTAGS), RESULTS_LIMIT_PER_HASHTAG,
71
+ )
72
+
73
+ try:
74
+ run = self.apify.actor(ACTOR_ID).call(run_input=run_input)
75
+ except Exception as e:
76
+ logger.error("Apify Actor ์‹คํ–‰ ์‹คํŒจ: %s", e)
77
+ return []
78
+
79
+ all_posts: list[dict] = []
80
+ dataset_id = run["defaultDatasetId"]
81
+
82
+ for item in self.apify.dataset(dataset_id).iterate_items():
83
+ all_posts.append({
84
+ "hashtag": item.get("hashtag", ""),
85
+ "location_name": item.get("locationName", ""),
86
+ "likes_count": item.get("likesCount", 0),
87
+ "comments_count": item.get("commentsCount", 0),
88
+ "caption": item.get("caption", ""),
89
+ "timestamp": item.get("timestamp", ""),
90
+ "url": item.get("url", ""),
91
+ })
92
+
93
+ logger.info("๊ฒŒ์‹œ๋ฌผ ์ˆ˜์ง‘ ์™„๋ฃŒ: %d๊ฑด", len(all_posts))
94
+ return all_posts
95
+
96
+ # ------------------------------------------------------------------
97
+ # ์œ„์น˜๋ณ„ ๋ฉ”ํŠธ๋ฆญ ์ง‘๊ณ„
98
+ # ------------------------------------------------------------------
99
+
100
+ def aggregate_location_metrics(self, posts: list[dict]) -> dict[str, dict]:
101
+ """
102
+ ์ˆ˜์ง‘๋œ ๊ฒŒ์‹œ๋ฌผ์—์„œ ์œ„์น˜๋ณ„ ์ธ๊ธฐ๋„ ๋ฉ”ํŠธ๋ฆญ์„ ์ง‘๊ณ„ํ•œ๋‹ค.
103
+
104
+ 1์ฐจ: ์œ„์น˜ ํƒœ๊ทธ ๊ธฐ๋ฐ˜ (location_name)
105
+ 2์ฐจ: ์บก์…˜ ๊ธฐ๋ฐ˜ ์žฅ์†Œ๋ช… ์ถ”์ถœ (์œ„์น˜ ํƒœ๊ทธ ์—†๋Š” ๊ฒŒ์‹œ๋ฌผ)
106
+
107
+ Returns:
108
+ {location_name: {post_count, total_likes, total_comments, avg_engagement}}
109
+ """
110
+ location_metrics: dict[str, dict] = {}
111
+ no_location_posts: list[dict] = []
112
+
113
+ for post in posts:
114
+ loc = post["location_name"]
115
+ if not loc:
116
+ no_location_posts.append(post)
117
+ continue
118
+
119
+ if loc not in location_metrics:
120
+ location_metrics[loc] = {
121
+ "post_count": 0,
122
+ "total_likes": 0,
123
+ "total_comments": 0,
124
+ "hashtags": set(),
125
+ }
126
+
127
+ location_metrics[loc]["post_count"] += 1
128
+ location_metrics[loc]["total_likes"] += post["likes_count"]
129
+ location_metrics[loc]["total_comments"] += post["comments_count"]
130
+ if post["hashtag"]:
131
+ location_metrics[loc]["hashtags"].add(post["hashtag"])
132
+
133
+ # 2์ฐจ: ์บก์…˜ ๊ธฐ๋ฐ˜ ์žฅ์†Œ๋ช… ์ถ”์ถœ (์œ„์น˜ ํƒœ๊ทธ ์—†๋Š” ๊ฒŒ์‹œ๋ฌผ)
134
+ caption_extracted = 0
135
+ for post in no_location_posts:
136
+ caption = post.get("caption", "")
137
+ if not caption or len(caption) < 5:
138
+ continue
139
+
140
+ places = self.extractor.extract(caption)
141
+ for place in places:
142
+ loc = place["name"]
143
+ if loc not in location_metrics:
144
+ location_metrics[loc] = {
145
+ "post_count": 0,
146
+ "total_likes": 0,
147
+ "total_comments": 0,
148
+ "hashtags": set(),
149
+ }
150
+ location_metrics[loc]["post_count"] += 1
151
+ location_metrics[loc]["total_likes"] += post["likes_count"]
152
+ location_metrics[loc]["total_comments"] += post["comments_count"]
153
+ if post["hashtag"]:
154
+ location_metrics[loc]["hashtags"].add(post["hashtag"])
155
+ caption_extracted += 1
156
+ break # ๊ฒŒ์‹œ๋ฌผ๋‹น 1๊ฐœ ์žฅ์†Œ๋งŒ ์นด์šดํŠธ
157
+
158
+ # ํ‰๊ท  ์ฐธ์—ฌ๋„ ๊ณ„์‚ฐ + set โ†’ list ๋ณ€ํ™˜
159
+ for loc, metrics in location_metrics.items():
160
+ count = max(metrics["post_count"], 1)
161
+ metrics["avg_engagement"] = (
162
+ metrics["total_likes"] + metrics["total_comments"]
163
+ ) / count
164
+ metrics["hashtags"] = sorted(metrics["hashtags"])
165
+
166
+ logger.info(
167
+ "์œ„์น˜๋ณ„ ์ง‘๊ณ„ ์™„๋ฃŒ: %d๊ฐœ ์œ„์น˜ (์œ„์น˜ํƒœ๊ทธ %d๊ฑด, ์บก์…˜์ถ”์ถœ %d๊ฑด, ๋ฏธ์‹๋ณ„ %d๊ฑด)",
168
+ len(location_metrics),
169
+ sum(1 for p in posts if p["location_name"]),
170
+ caption_extracted,
171
+ len(no_location_posts) - caption_extracted,
172
+ )
173
+ return location_metrics
174
+
175
+ # ------------------------------------------------------------------
176
+ # DB ์ €์žฅ
177
+ # ------------------------------------------------------------------
178
+
179
+ def _save_to_db(
180
+ self,
181
+ location_metrics: dict[str, dict],
182
+ total_posts: int,
183
+ ) -> int:
184
+ """์ง‘๊ณ„๋œ ๋ฉ”ํŠธ๋ฆญ์„ spot_trends ํ…Œ์ด๋ธ”์— ์ €์žฅํ•œ๋‹ค.
185
+
186
+ SpotMatcher๊ฐ€ ์„ค์ •๋œ ๊ฒฝ์šฐ trend_spots.id๋กœ ์ •๊ทœํ™”.
187
+ ๋งค์นญ ์‹คํŒจํ•œ ์œ„์น˜๋Š” ์Šคํ‚ต.
188
+ """
189
+ today = date.today()
190
+ saved = 0
191
+ skipped = 0
192
+
193
+ for loc_name, metrics in location_metrics.items():
194
+ # SpotMatcher๋กœ spot_id ์ •๊ทœํ™”
195
+ if self.spot_matcher:
196
+ spot_id = self.spot_matcher.match(loc_name)
197
+ if not spot_id:
198
+ skipped += 1
199
+ continue
200
+ else:
201
+ spot_id = f"ig_{loc_name}"
202
+
203
+ # post_count ๋ฉ”ํŠธ๋ฆญ
204
+ try:
205
+ self.supabase.table("spot_trends").insert({
206
+ "spot_id": spot_id,
207
+ "source": "instagram",
208
+ "metric_type": "post_count",
209
+ "metric_value": metrics["post_count"],
210
+ "period_start": today.isoformat(),
211
+ "period_end": today.isoformat(),
212
+ "raw_data": {
213
+ "location_name": loc_name,
214
+ "total_likes": metrics["total_likes"],
215
+ "total_comments": metrics["total_comments"],
216
+ "avg_engagement": metrics["avg_engagement"],
217
+ "hashtags": metrics["hashtags"],
218
+ },
219
+ }).execute()
220
+ saved += 1
221
+ except Exception as e:
222
+ logger.warning("spot_trends insert ์‹คํŒจ (instagram post_count, %s): %s", spot_id, e)
223
+
224
+ # avg_engagement ๋ฉ”ํŠธ๋ฆญ (์ฑ„๋„ ์Šค์ฝ”์–ด๋ง์— ํ•„์š”)
225
+ avg_eng = metrics["avg_engagement"]
226
+ if avg_eng > 0: # -1 ๊ฐ’ ํ•„ํ„ฐ๋ง (Apify ๋ฏธ์ˆ˜์ง‘ ์ผ€์ด์Šค)
227
+ try:
228
+ self.supabase.table("spot_trends").insert({
229
+ "spot_id": spot_id,
230
+ "source": "instagram",
231
+ "metric_type": "avg_engagement",
232
+ "metric_value": int(round(avg_eng)),
233
+ "period_start": today.isoformat(),
234
+ "period_end": today.isoformat(),
235
+ "raw_data": {"location_name": loc_name},
236
+ }).execute()
237
+ except Exception as e:
238
+ logger.warning("spot_trends insert ์‹คํŒจ (instagram avg_engagement, %s): %s", spot_id, e)
239
+
240
+ if skipped:
241
+ logger.info("Instagram ์žฅ์†Œ ๋งค์นญ ์‹คํŒจ๋กœ %d๊ฑด ์Šคํ‚ต", skipped)
242
+ logger.info("Instagram DB ์ €์žฅ ์™„๋ฃŒ: %d๊ฑด", saved)
243
+ return saved
244
+
245
+ # ------------------------------------------------------------------
246
+ # ๋ฉ”์ธ ์‹คํ–‰
247
+ # ------------------------------------------------------------------
248
+
249
+ def run(self) -> dict:
250
+ """
251
+ Instagram ์ˆ˜์ง‘ ํŒŒ์ดํ”„๋ผ์ธ ์ „์ฒด ์‹คํ–‰.
252
+
253
+ Returns:
254
+ {"total_posts": int, "locations_found": int, "saved": int}
255
+ """
256
+ logger.info("=== Instagram ์ˆ˜์ง‘ ์‹œ์ž‘ ===")
257
+
258
+ # 1๋‹จ๊ณ„: ํ•ด์‹œํƒœ๊ทธ ๏ฟฝ๏ฟฝ๏ฟฝ์‹œ๋ฌผ ์ˆ˜์ง‘
259
+ posts = self.collect_hashtag_posts()
260
+
261
+ if not posts:
262
+ logger.warning("์ˆ˜์ง‘๋œ ๊ฒŒ์‹œ๋ฌผ ์—†์Œ โ€” ์ข…๋ฃŒ")
263
+ return {"total_posts": 0, "locations_found": 0, "saved": 0}
264
+
265
+ # 2๋‹จ๊ณ„: ์œ„์น˜๋ณ„ ๋ฉ”ํŠธ๋ฆญ ์ง‘๊ณ„
266
+ location_metrics = self.aggregate_location_metrics(posts)
267
+
268
+ # 3๋‹จ๊ณ„: DB ์ €์žฅ
269
+ saved = self._save_to_db(location_metrics, len(posts))
270
+
271
+ result = {
272
+ "total_posts": len(posts),
273
+ "locations_found": len(location_metrics),
274
+ "saved": saved,
275
+ }
276
+ logger.info("=== Instagram ์ˆ˜์ง‘ ์™„๋ฃŒ: %s ===", result)
277
+ return result
trend_engine/collectors/kakaomap.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ KakaoMap Collector โ€” ๊ทธ๋ฆฌ๋“œ ๋ถ„ํ•  ์นดํ…Œ๊ณ ๋ฆฌ ์Šค์บ” + ๋ฆฌ๋ทฐ ์ˆ˜ ํŒŒ์‹ฑ
3
+
4
+ ์ˆ˜์ง‘ ๋ฐฉ์‹:
5
+ 1. ์• ์›” ์ง€์—ญ์„ 2kmร—2km ๊ทธ๋ฆฌ๋“œ๋กœ ๋ถ„ํ• 
6
+ 2. ๊ฐ ์…€ ร— 4๊ฐœ ์นดํ…Œ๊ณ ๋ฆฌ(CE7/FD6/AT4/CT1) ร— 3ํŽ˜์ด์ง€ ์กฐํšŒ
7
+ 3. ์ƒ์œ„ 100๊ฐœ ์žฅ์†Œ์˜ ๋ฆฌ๋ทฐ ์ˆ˜๋ฅผ ๋น„๊ณต์‹ API๋กœ ํŒŒ์‹ฑ (graceful degradation)
8
+ """
9
+
10
+ import os
11
+ import logging
12
+ from datetime import date, datetime
13
+
14
+ import httpx
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
19
+ # ์• ์›” ๊ทธ๋ฆฌ๋“œ ์„ค์ •
20
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
21
+
22
+ # ์• ์›”๋ฆฌ ์ค‘์‹ฌ ๊ทธ๋ฆฌ๋“œ (์• ์›”ํ•ญ~ํ•œ๋‹ดํ•ด์•ˆ~๊ณฝ์ง€ํ•ด๋ณ€ ์ปค๋ฒ„)
23
+ AEWOL_GRID = {
24
+ "west": 126.30,
25
+ "east": 126.36,
26
+ "south": 33.44,
27
+ "north": 33.47,
28
+ "cell_lng": 0.023, # ~2km ๊ฒฝ๋„
29
+ "cell_lat": 0.018, # ~2km ์œ„๋„
30
+ }
31
+
32
+ CATEGORY_CODES = {
33
+ "CE7": "์นดํŽ˜",
34
+ "FD6": "์Œ์‹์ ",
35
+ "AT4": "๊ด€๊ด‘๋ช…์†Œ",
36
+ "CT1": "๋ฌธํ™”์‹œ์„ค",
37
+ }
38
+
39
+ KAKAO_CATEGORY_URL = "https://dapi.kakao.com/v2/local/search/category"
40
+ KAKAO_PLACE_DETAIL_URL = "https://place.map.kakao.com/main/v/{place_id}"
41
+
42
+ REVIEW_PARSE_LIMIT = 100 # ๋ฆฌ๋ทฐ ์ˆ˜ ํŒŒ์‹ฑ ๋Œ€์ƒ ์ƒ์œ„ N๊ฐœ
43
+
44
+
45
+ class KakaoMapCollector:
46
+ """์นด์นด์˜ค๋งต ๊ทธ๋ฆฌ๋“œ ๋ถ„ํ•  ์Šค์บ” + ๋ฆฌ๋ทฐ ์ˆ˜ ํŒŒ์‹ฑ ์ˆ˜์ง‘๊ธฐ."""
47
+
48
+ def __init__(self, supabase_client):
49
+ self.supabase = supabase_client
50
+ self.api_key = os.environ["KAKAO_REST_API_KEY"]
51
+
52
+ # ------------------------------------------------------------------
53
+ # ๊ทธ๋ฆฌ๋“œ ์ƒ์„ฑ
54
+ # ------------------------------------------------------------------
55
+
56
+ def generate_grid_cells(self) -> list[dict]:
57
+ """์• ์›” ์ง€์—ญ์„ 2kmร—2km ์…€๋กœ ๋ถ„ํ• ํ•˜์—ฌ rect ํŒŒ๋ผ๋ฏธํ„ฐ ๋ชฉ๋ก์„ ์ƒ์„ฑํ•œ๋‹ค."""
58
+ cells = []
59
+ lng = AEWOL_GRID["west"]
60
+ while lng < AEWOL_GRID["east"]:
61
+ lat = AEWOL_GRID["south"]
62
+ while lat < AEWOL_GRID["north"]:
63
+ cells.append({
64
+ "x1": lng,
65
+ "y1": lat,
66
+ "x2": min(lng + AEWOL_GRID["cell_lng"], AEWOL_GRID["east"]),
67
+ "y2": min(lat + AEWOL_GRID["cell_lat"], AEWOL_GRID["north"]),
68
+ })
69
+ lat += AEWOL_GRID["cell_lat"]
70
+ lng += AEWOL_GRID["cell_lng"]
71
+
72
+ logger.info("๊ทธ๋ฆฌ๋“œ ์…€ %d๊ฐœ ์ƒ์„ฑ ์™„๋ฃŒ", len(cells))
73
+ return cells
74
+
75
+ # ------------------------------------------------------------------
76
+ # ์นดํ…Œ๊ณ ๋ฆฌ ์Šค์บ”
77
+ # ------------------------------------------------------------------
78
+
79
+ def collect_spots(self) -> dict[str, dict]:
80
+ """
81
+ ์นด์นด์˜ค๋งต ์นดํ…Œ๊ณ ๋ฆฌ ๊ฒ€์ƒ‰ API๋กœ ์• ์›” ์ „์—ญ ์žฅ์†Œ๋ฅผ ์ˆ˜์ง‘ํ•œ๋‹ค.
82
+
83
+ Returns:
84
+ {place_id: {kakao_id, name, category, lat, lng, address, place_url, phone, search_rank}}
85
+ """
86
+ all_spots: dict[str, dict] = {}
87
+ grid_cells = self.generate_grid_cells()
88
+ headers = {"Authorization": f"KakaoAK {self.api_key}"}
89
+ api_calls = 0
90
+
91
+ for cell in grid_cells:
92
+ rect_str = f"{cell['x1']},{cell['y1']},{cell['x2']},{cell['y2']}"
93
+
94
+ for code, category_name in CATEGORY_CODES.items():
95
+ for page in range(1, 4): # ์ตœ๋Œ€ 3ํŽ˜์ด์ง€
96
+ params = {
97
+ "category_group_code": code,
98
+ "rect": rect_str,
99
+ "page": page,
100
+ "size": 15,
101
+ "sort": "accuracy",
102
+ }
103
+
104
+ try:
105
+ resp = httpx.get(
106
+ KAKAO_CATEGORY_URL,
107
+ headers=headers,
108
+ params=params,
109
+ timeout=10,
110
+ )
111
+ resp.raise_for_status()
112
+ data = resp.json()
113
+ api_calls += 1
114
+ except Exception as e:
115
+ logger.warning(
116
+ "์นด์นด์˜ค๋งต API ํ˜ธ์ถœ ์‹คํŒจ (cell=%s, code=%s, page=%d): %s",
117
+ rect_str, code, page, e,
118
+ )
119
+ break
120
+
121
+ for place in data.get("documents", []):
122
+ place_id = place["id"]
123
+ if place_id not in all_spots:
124
+ all_spots[place_id] = {
125
+ "kakao_id": place_id,
126
+ "name": place["place_name"],
127
+ "category": category_name,
128
+ "lat": float(place["y"]),
129
+ "lng": float(place["x"]),
130
+ "address": place["address_name"],
131
+ "place_url": place.get("place_url", ""),
132
+ "phone": place.get("phone", ""),
133
+ "search_rank": len(all_spots) + 1,
134
+ }
135
+
136
+ # ๋งˆ์ง€๋ง‰ ํŽ˜์ด์ง€๋ฉด ๋‹ค์Œ ์นดํ…Œ๊ณ ๋ฆฌ๋กœ
137
+ if data.get("meta", {}).get("is_end", True):
138
+ break
139
+
140
+ logger.info(
141
+ "์นด์นด์˜ค๋งต ์Šค์บ” ์™„๋ฃŒ: %d๊ฐœ ์žฅ์†Œ ์ˆ˜์ง‘ (API ํ˜ธ์ถœ %dํšŒ)",
142
+ len(all_spots), api_calls,
143
+ )
144
+ return all_spots
145
+
146
+ # ------------------------------------------------------------------
147
+ # ๋ฆฌ๋ทฐ ์ˆ˜ ํŒŒ์‹ฑ (๋น„๊ณต์‹ API)
148
+ # ------------------------------------------------------------------
149
+
150
+ async def fetch_review_counts(
151
+ self, spots: dict[str, dict], limit: int = REVIEW_PARSE_LIMIT
152
+ ) -> dict[str, dict]:
153
+ """
154
+ ์ƒ์œ„ limit๊ฐœ ์žฅ์†Œ์˜ ๋ฆฌ๋ทฐ ์ˆ˜๋ฅผ ์นด์นด์˜ค๋งต ๋น„๊ณต์‹ API๋กœ ํŒŒ์‹ฑํ•œ๋‹ค.
155
+
156
+ ์‹คํŒจ ์‹œ review_count=0 ์ฒ˜๋ฆฌ (graceful degradation).
157
+ """
158
+ # search_rank ๊ธฐ์ค€ ์ƒ์œ„ N๊ฐœ๋งŒ ํŒŒ์‹ฑ
159
+ sorted_spots = sorted(spots.values(), key=lambda s: s["search_rank"])[:limit]
160
+ results: dict[str, dict] = {}
161
+ success_count = 0
162
+ fail_count = 0
163
+
164
+ headers = {
165
+ "User-Agent": (
166
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
167
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
168
+ "Chrome/120.0.0.0 Safari/537.36"
169
+ ),
170
+ "Referer": "https://map.kakao.com/",
171
+ }
172
+
173
+ async with httpx.AsyncClient(headers=headers, timeout=10) as client:
174
+ for spot in sorted_spots:
175
+ place_id = spot["kakao_id"]
176
+ url = KAKAO_PLACE_DETAIL_URL.format(place_id=place_id)
177
+
178
+ try:
179
+ resp = await client.get(url)
180
+ data = resp.json()
181
+
182
+ results[place_id] = {
183
+ "review_count": data.get("comment", {}).get("scorecnt", 0),
184
+ "avg_rating": data.get("comment", {}).get("scoretotalavgstar", 0),
185
+ "blog_review_count": data.get("blogReview", {}).get("blogrvwcnt", 0),
186
+ "success": True,
187
+ }
188
+ success_count += 1
189
+ except Exception as e:
190
+ results[place_id] = {
191
+ "review_count": 0,
192
+ "avg_rating": 0,
193
+ "blog_review_count": 0,
194
+ "success": False,
195
+ "error": str(e),
196
+ }
197
+ fail_count += 1
198
+
199
+ total = success_count + fail_count
200
+ fail_rate = fail_count / max(total, 1)
201
+ logger.info(
202
+ "๋ฆฌ๋ทฐ ํŒŒ์‹ฑ ์™„๋ฃŒ: ์„ฑ๊ณต %d, ์‹คํŒจ %d (์‹คํŒจ์œจ %.1f%%)",
203
+ success_count, fail_count, fail_rate * 100,
204
+ )
205
+
206
+ if fail_rate > 0.5:
207
+ logger.warning(
208
+ "๋ฆฌ๋ทฐ ํŒŒ์‹ฑ ์‹คํŒจ์œจ 50%% ์ดˆ๊ณผ โ€” ์ด์ „ ๋ฐ์ดํ„ฐ ์œ ์ง€ ๊ถŒ์žฅ"
209
+ )
210
+
211
+ return results
212
+
213
+ # ------------------------------------------------------------------
214
+ # DB ์ €์žฅ
215
+ # ------------------------------------------------------------------
216
+
217
+ def _save_to_db(self, spots: dict[str, dict], reviews: dict[str, dict]) -> int:
218
+ """์ˆ˜์ง‘๋œ ์žฅ์†Œ+๋ฆฌ๋ทฐ๋ฅผ trend_spots / spot_trends ํ…Œ์ด๋ธ”์— ์ €์žฅํ•œ๋‹ค."""
219
+ today = date.today()
220
+ saved = 0
221
+
222
+ for place_id, spot in spots.items():
223
+ spot_id = f"trend_{spot['name']}_{place_id[:8]}"
224
+
225
+ # trend_spots upsert
226
+ try:
227
+ self.supabase.table("trend_spots").upsert(
228
+ {
229
+ "id": spot_id,
230
+ "name": spot["name"],
231
+ "category": spot["category"],
232
+ "lat": spot["lat"],
233
+ "lng": spot["lng"],
234
+ "address": spot["address"],
235
+ "source_ids": {"kakaomap": place_id},
236
+ },
237
+ on_conflict="id",
238
+ ).execute()
239
+ except Exception as e:
240
+ logger.warning("trend_spots upsert ์‹คํŒจ (%s): %s", spot_id, e)
241
+ continue
242
+
243
+ # spot_trends โ€” search_rank ๋ฉ”ํŠธ๋ฆญ
244
+ try:
245
+ self.supabase.table("spot_trends").insert({
246
+ "spot_id": spot_id,
247
+ "source": "kakaomap",
248
+ "metric_type": "search_rank",
249
+ "metric_value": spot["search_rank"],
250
+ "period_start": today.isoformat(),
251
+ "period_end": today.isoformat(),
252
+ "raw_data": {"kakao_id": place_id, "address": spot["address"]},
253
+ }).execute()
254
+ except Exception as e:
255
+ logger.warning("spot_trends insert ์‹คํŒจ (search_rank, %s): %s", spot_id, e)
256
+
257
+ # spot_trends โ€” review_count ๋ฉ”ํŠธ๋ฆญ (ํŒŒ์‹ฑ ์„ฑ๊ณต ์‹œ)
258
+ review = reviews.get(place_id, {})
259
+ if review.get("success"):
260
+ try:
261
+ self.supabase.table("spot_trends").insert({
262
+ "spot_id": spot_id,
263
+ "source": "kakaomap",
264
+ "metric_type": "review_count",
265
+ "metric_value": review["review_count"],
266
+ "period_start": today.isoformat(),
267
+ "period_end": today.isoformat(),
268
+ "raw_data": {
269
+ "avg_rating": review["avg_rating"],
270
+ "blog_review_count": review["blog_review_count"],
271
+ },
272
+ }).execute()
273
+ except Exception as e:
274
+ logger.warning("spot_trends insert ์‹คํŒจ (review_count, %s): %s", spot_id, e)
275
+
276
+ saved += 1
277
+
278
+ logger.info("DB ์ €์žฅ ์™„๋ฃŒ: %d๊ฑด", saved)
279
+ return saved
280
+
281
+ # ------------------------------------------------------------------
282
+ # ๋ฉ”์ธ ์‹คํ–‰
283
+ # ------------------------------------------------------------------
284
+
285
+ async def run(self) -> dict:
286
+ """
287
+ ์นด์นด์˜ค๋งต ์ˆ˜์ง‘ ํŒŒ์ดํ”„๋ผ์ธ ์ „์ฒด ์‹คํ–‰.
288
+
289
+ Returns:
290
+ {"spots_count": int, "reviews_parsed": int, "saved": int}
291
+ """
292
+ logger.info("=== ์นด์นด์˜ค๋งต ์ˆ˜์ง‘ ์‹œ์ž‘ ===")
293
+
294
+ # 1๋‹จ๊ณ„: ๊ทธ๋ฆฌ๋“œ ์Šค์บ”์œผ๋กœ ์žฅ์†Œ ์ˆ˜์ง‘
295
+ spots = self.collect_spots()
296
+
297
+ # 2๋‹จ๊ณ„: ์ƒ์œ„ ์žฅ์†Œ ๋ฆฌ๋ทฐ ์ˆ˜ ํŒŒ์‹ฑ
298
+ reviews = await self.fetch_review_counts(spots)
299
+
300
+ # 3๋‹จ๊ณ„: DB ์ €์žฅ
301
+ saved = self._save_to_db(spots, reviews)
302
+
303
+ result = {
304
+ "spots_count": len(spots),
305
+ "reviews_parsed": len(reviews),
306
+ "saved": saved,
307
+ }
308
+ logger.info("=== ์นด์นด์˜ค๋งต ์ˆ˜์ง‘ ์™„๋ฃŒ: %s ===", result)
309
+ return result
trend_engine/collectors/naver_blog.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ 2๋‹จ๊ณ„ ์ˆ˜์ง‘๊ธฐ (Naver Blog 2-Stage Collector)
3
+
4
+ 1๋‹จ๊ณ„: ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ API๋กœ ๋ธ”๋กœ๊ทธ URL ๋ชฉ๋ก ํ™•๋ณด
5
+ 2๋‹จ๊ณ„: ๋ชจ๋ฐ”์ผ URL ํฌ๋กค๋ง์œผ๋กœ ๋ณธ๋ฌธ ์ „์ฒด ์ˆ˜์ง‘
6
+
7
+ ์ˆ˜์ง‘๋œ ๋ณธ๋ฌธ์€ spot_trends ํ…Œ์ด๋ธ”์— ์ €์žฅ๋˜๋ฉฐ,
8
+ ์žฅ์†Œ๋ช… ์ถ”์ถœ์€ ๋ณ„๋„ PlaceNameExtractor๊ฐ€ ๋‹ด๋‹นํ•œ๋‹ค.
9
+ """
10
+
11
+ import logging
12
+ import os
13
+ import time
14
+ import threading
15
+ from concurrent.futures import ThreadPoolExecutor, as_completed
16
+ from datetime import date, datetime, timedelta
17
+
18
+ import requests
19
+ from bs4 import BeautifulSoup
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ๋ชฉ๋ก (์• ์›”๋ฆฌ ์ง‘์ค‘)
24
+ NAVER_BLOG_KEYWORDS = [
25
+ "์• ์›” ์นดํŽ˜", "์• ์›” ๋ง›์ง‘", "์• ์›” ๊ฐ€๋ณผ๋งŒํ•œ๊ณณ", "์• ์›” ์‚ฐ์ฑ…",
26
+ "์• ์›” ํ•ด์•ˆ", "์• ์›” ๋ทฐ", "์• ์›” ๋””์ €ํŠธ", "์• ์›” ๋ธŒ๋Ÿฐ์น˜",
27
+ "์• ์›” ๊ฐ์„ฑ", "์• ์›” ์ˆจ์€ ๋ช…์†Œ", "์• ์›” ์˜ค์…˜๋ทฐ",
28
+ "์• ์›”๋ฆฌ ์นดํŽ˜", "์• ์›”๋ฆฌ ๋ง›์ง‘",
29
+ "ํ•œ๋‹ด ํ•ด์•ˆ ์‚ฐ์ฑ…๋กœ", "๊ณฝ์ง€ ํ•ด๋ณ€",
30
+ ]
31
+
32
+ # ๋ชจ๋ฐ”์ผ User-Agent
33
+ MOBILE_USER_AGENT = (
34
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) "
35
+ "AppleWebKit/605.1.15 (KHTML, like Gecko) "
36
+ "Version/16.0 Mobile/15E148 Safari/604.1"
37
+ )
38
+
39
+
40
+ class NaverBlogCollector:
41
+ """๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ 2๋‹จ๊ณ„ ์ˆ˜์ง‘๊ธฐ."""
42
+
43
+ def __init__(self, supabase_client):
44
+ self.supabase = supabase_client
45
+ self.client_id = os.environ["NAVER_CLIENT_ID"]
46
+ self.client_secret = os.environ["NAVER_CLIENT_SECRET"]
47
+ self.keywords = NAVER_BLOG_KEYWORDS
48
+ self._stats = {
49
+ "api_calls": 0,
50
+ "urls_found": 0,
51
+ "urls_unique": 0,
52
+ "crawl_success": 0,
53
+ "crawl_fail": 0,
54
+ "saved": 0,
55
+ }
56
+
57
+ # โ”€โ”€ 1๋‹จ๊ณ„: ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ API๋กœ URL ๋ชฉ๋ก ํ™•๋ณด โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
58
+
59
+ def collect_blog_urls(self) -> list[dict]:
60
+ """๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ API๋กœ ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŠธ URL ๋ชฉ๋ก ํ™•๋ณด.
61
+
62
+ ํ‚ค์›Œ๋“œ๋‹น ์ตœ๋Œ€ 1,100๊ฑด(start 1~1000, display 100) ์กฐํšŒ ๊ฐ€๋Šฅํ•˜๋‚˜,
63
+ ์ตœ๊ทผ 1์ฃผ ํฌ์ŠคํŒ… ๊ธฐ์ค€์œผ๋กœ ์‹ค์ œ 100๊ฑด ๋ฏธ๋งŒ์ด ๋Œ€๋ถ€๋ถ„์ด๋‹ค.
64
+
65
+ Returns:
66
+ URL ๊ธฐ์ค€ ์ค‘๋ณต ์ œ๊ฑฐ๋œ ํฌ์ŠคํŠธ ๋ชฉ๋ก
67
+ """
68
+ all_posts: list[dict] = []
69
+ headers = {
70
+ "X-Naver-Client-Id": self.client_id,
71
+ "X-Naver-Client-Secret": self.client_secret,
72
+ }
73
+
74
+ for keyword in self.keywords:
75
+ keyword_count = 0
76
+ for start in range(1, 1001, 100):
77
+ params = {
78
+ "query": keyword,
79
+ "display": 100,
80
+ "start": start,
81
+ "sort": "date",
82
+ }
83
+ try:
84
+ resp = requests.get(
85
+ "https://openapi.naver.com/v1/search/blog",
86
+ headers=headers,
87
+ params=params,
88
+ timeout=10,
89
+ )
90
+ resp.raise_for_status()
91
+ data = resp.json()
92
+ self._stats["api_calls"] += 1
93
+ except requests.RequestException as e:
94
+ logger.warning("๊ฒ€์ƒ‰ API ์‹คํŒจ [%s start=%d]: %s", keyword, start, e)
95
+ break
96
+
97
+ items = data.get("items", [])
98
+ for item in items:
99
+ all_posts.append({
100
+ "title": item["title"],
101
+ "link": item["link"],
102
+ "bloggername": item["bloggername"],
103
+ "postdate": item["postdate"], # YYYYMMDD
104
+ "keyword": keyword,
105
+ })
106
+ keyword_count += 1
107
+
108
+ # ๊ฒฐ๊ณผ๊ฐ€ 100๊ฑด ๋ฏธ๋งŒ์ด๋ฉด ๋” ์ด์ƒ ๋ฐ์ดํ„ฐ ์—†์Œ
109
+ if len(items) < 100:
110
+ break
111
+
112
+ time.sleep(0.1) # API ๋ถ€ํ•˜ ๋ฐฉ์ง€
113
+
114
+ logger.info("ํ‚ค์›Œ๋“œ [%s]: %d๊ฑด ์ˆ˜์ง‘", keyword, keyword_count)
115
+
116
+ self._stats["urls_found"] = len(all_posts)
117
+
118
+ # URL ๊ธฐ์ค€ ์ค‘๋ณต ์ œ๊ฑฐ
119
+ seen_urls: set[str] = set()
120
+ unique_posts: list[dict] = []
121
+ for post in all_posts:
122
+ if post["link"] not in seen_urls:
123
+ seen_urls.add(post["link"])
124
+ unique_posts.append(post)
125
+
126
+ self._stats["urls_unique"] = len(unique_posts)
127
+ logger.info(
128
+ "1๋‹จ๊ณ„ ์™„๋ฃŒ: ์ด %d๊ฑด โ†’ ์ค‘๋ณต ์ œ๊ฑฐ ํ›„ %d๊ฑด (API ํ˜ธ์ถœ %dํšŒ)",
129
+ self._stats["urls_found"],
130
+ self._stats["urls_unique"],
131
+ self._stats["api_calls"],
132
+ )
133
+ return unique_posts
134
+
135
+ # โ”€โ”€ 2๋‹จ๊ณ„: ๋ชจ๋ฐ”์ผ ํฌ๋กค๋ง์œผ๋กœ ๋ณธ๋ฌธ ์ „์ฒด ์ˆ˜์ง‘ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
136
+
137
+ def _convert_to_mobile_url(self, url: str) -> str:
138
+ """PC ๋ธ”๋กœ๊ทธ URL์„ ๏ฟฝ๏ฟฝ๋ฐ”์ผ URL๋กœ ๋ณ€ํ™˜."""
139
+ url = url.replace("https://blog.naver.com", "https://m.blog.naver.com")
140
+ url = url.replace("http://blog.naver.com", "https://m.blog.naver.com")
141
+ return url
142
+
143
+ def _crawl_single_blog(self, url: str) -> str | None:
144
+ """๋‹จ์ผ ๋ธ”๋กœ๊ทธ ๋ชจ๋ฐ”์ผ ํŽ˜์ด์ง€์—์„œ ๋ณธ๋ฌธ ํ…์ŠคํŠธ ์ถ”์ถœ."""
145
+ m_url = self._convert_to_mobile_url(url)
146
+ headers = {"User-Agent": MOBILE_USER_AGENT}
147
+
148
+ try:
149
+ resp = requests.get(m_url, headers=headers, timeout=10)
150
+ resp.raise_for_status()
151
+ except requests.RequestException as e:
152
+ logger.debug("ํฌ๋กค๋ง HTTP ์‹คํŒจ: %s โ€” %s", m_url, e)
153
+ return None
154
+
155
+ soup = BeautifulSoup(resp.text, "lxml")
156
+
157
+ # ์…€๋ ‰ํ„ฐ ์šฐ์„ ์ˆœ์œ„: ์Šค๋งˆํŠธ์—๋””ํ„ฐ3 > ๊ตฌํ˜• ์—๋””ํ„ฐ > ๊ธฐํƒ€
158
+ content_div = (
159
+ soup.find("div", {"class": "se-main-container"})
160
+ or soup.find("div", {"id": "postViewArea"})
161
+ or soup.find("div", {"class": "post_ct"})
162
+ )
163
+
164
+ if content_div:
165
+ return content_div.get_text(separator=" ", strip=True)
166
+
167
+ return None
168
+
169
+ def crawl_blog_contents(
170
+ self, posts: list[dict], max_workers: int = 5
171
+ ) -> list[dict]:
172
+ """๋ธ”๋กœ๊ทธ URL ๋ชฉ๋ก์„ ๋ฐ›์•„ ๋ณธ๋ฌธ ๋ณ‘๋ ฌ ํฌ๋กค๋ง.
173
+
174
+ ThreadPoolExecutor๋กœ max_workers๊ฐœ ๋™์‹œ ์š”์ฒญ.
175
+ ๋„ค์ด๋ฒ„ ์„œ๋ฒ„ ๋ถ€ํ•˜ ๋ฐฉ์ง€๋ฅผ ์œ„ํ•ด ์›Œ์ปค ๊ฐ„ 0.2์ดˆ ๊ฐ„๊ฒฉ ์œ ์ง€.
176
+
177
+ Args:
178
+ posts: collect_blog_urls()์˜ ๋ฐ˜ํ™˜๊ฐ’
179
+ max_workers: ๋™์‹œ ํฌ๋กค๋ง ์›Œ์ปค ์ˆ˜ (๊ธฐ๋ณธ 5)
180
+
181
+ Returns:
182
+ full_content ํ•„๋“œ๊ฐ€ ์ถ”๊ฐ€๋œ ํฌ์ŠคํŠธ ๋ชฉ๋ก (ํฌ๋กค๋ง ์„ฑ๊ณต๋ถ„๋งŒ)
183
+ """
184
+ results: list[dict] = []
185
+ total = len(posts)
186
+ lock = threading.Lock()
187
+
188
+ def crawl_one(post: dict) -> tuple[dict, str | None]:
189
+ content = self._crawl_single_blog(post["link"])
190
+ return post, content
191
+
192
+ logger.info(
193
+ "๋ณ‘๋ ฌ ํฌ๋กค๋ง ์‹œ์ž‘: %d๊ฑด (์›Œ์ปค %d๊ฐœ)", total, max_workers
194
+ )
195
+
196
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
197
+ # ์›Œ์ปค ๊ฐ„ 0.2์ดˆ ๊ฐ„๊ฒฉ์œผ๋กœ submit (์ดˆ๊ธฐ burst ๋ฐฉ์ง€)
198
+ futures = []
199
+ for i, post in enumerate(posts):
200
+ futures.append(executor.submit(crawl_one, post))
201
+ if (i + 1) % max_workers == 0:
202
+ time.sleep(0.2)
203
+
204
+ done_count = 0
205
+ for future in as_completed(futures):
206
+ try:
207
+ post, content = future.result()
208
+ except Exception as e:
209
+ logger.debug("ํฌ๋กค๋ง ์˜ˆ์™ธ: %s", e)
210
+ with lock:
211
+ self._stats["crawl_fail"] += 1
212
+ done_count += 1
213
+ continue
214
+
215
+ with lock:
216
+ if content:
217
+ post["full_content"] = content
218
+ results.append(post)
219
+ self._stats["crawl_success"] += 1
220
+ else:
221
+ self._stats["crawl_fail"] += 1
222
+
223
+ done_count += 1
224
+ if done_count % 500 == 0 or done_count == total:
225
+ logger.info(
226
+ "ํฌ๋กค๋ง ์ง„ํ–‰: %d/%d (์„ฑ๊ณต: %d, ์‹คํŒจ: %d)",
227
+ done_count, total,
228
+ self._stats["crawl_success"],
229
+ self._stats["crawl_fail"],
230
+ )
231
+
232
+ logger.info(
233
+ "2๋‹จ๊ณ„ ์™„๋ฃŒ: %d๊ฑด ํฌ๋กค๋ง โ†’ ์„ฑ๊ณต %d๊ฑด, ์‹คํŒจ %d๊ฑด",
234
+ total,
235
+ self._stats["crawl_success"],
236
+ self._stats["crawl_fail"],
237
+ )
238
+ return results
239
+
240
+ # โ”€โ”€ DB ์ €์žฅ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
241
+
242
+ def save_to_db(self, posts: list[dict]) -> int:
243
+ """ํฌ๋กค๋ง ๊ฒฐ๊ณผ๋ฅผ spot_trends ํ…Œ์ด๋ธ”์— ์ €์žฅ.
244
+
245
+ source='naver_blog', metric_type='blog_post'๋กœ ์›๋ณธ ๋ฐ์ดํ„ฐ ๋ณด๊ด€.
246
+ ์žฅ์†Œ๋ช… ์ถ”์ถœ ๋ฐ mention_count ์ง‘๊ณ„๋Š” ๋ณ„๋„ ํŒŒ์ดํ”„๋ผ์ธ์—์„œ ์ฒ˜๋ฆฌํ•œ๋‹ค.
247
+
248
+ Args:
249
+ posts: crawl_blog_contents()์˜ ๋ฐ˜ํ™˜๊ฐ’ (full_content ํฌํ•จ)
250
+
251
+ Returns:
252
+ ์ €์žฅ๋œ ๋ ˆ์ฝ”๋“œ ์ˆ˜
253
+ """
254
+ today = date.today()
255
+ period_start = today - timedelta(days=7)
256
+ saved_count = 0
257
+
258
+ # ๋ฐฐ์น˜ insert๋ฅผ ์œ„ํ•œ rows ์ˆ˜์ง‘
259
+ rows: list[dict] = []
260
+ for post in posts:
261
+ rows.append({
262
+ "spot_id": "__pending__", # ์žฅ์†Œ๋ช… ์ถ”์ถœ ์ „์ด๋ฏ€๋กœ placeholder
263
+ "source": "naver_blog",
264
+ "metric_type": "blog_post",
265
+ "metric_value": 1,
266
+ "period_start": period_start.isoformat(),
267
+ "period_end": today.isoformat(),
268
+ "raw_data": {
269
+ "title": post["title"],
270
+ "link": post["link"],
271
+ "bloggername": post["bloggername"],
272
+ "postdate": post["postdate"],
273
+ "keyword": post["keyword"],
274
+ "content_length": len(post.get("full_content", "")),
275
+ "content_preview": post.get("full_content", "")[:1500],
276
+ },
277
+ })
278
+
279
+ # Supabase bulk insert (1000๊ฑด์”ฉ ๋ฐฐ์น˜)
280
+ batch_size = 1000
281
+ for i in range(0, len(rows), batch_size):
282
+ batch = rows[i:i + batch_size]
283
+ try:
284
+ result = self.supabase.table("spot_trends").insert(batch).execute()
285
+ saved_count += len(result.data) if result.data else 0
286
+ except Exception as e:
287
+ logger.error("spot_trends ์ €์žฅ ์‹คํŒจ (batch %d): %s", i // batch_size, e)
288
+
289
+ self._stats["saved"] = saved_count
290
+ logger.info("DB ์ €์žฅ ์™„๋ฃŒ: %d๊ฑด", saved_count)
291
+ return saved_count
292
+
293
+ # โ”€โ”€ ์ „์ฒด ํŒŒ์ดํ”„๋ผ์ธ ์‹คํ–‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
294
+
295
+ def run(self) -> dict:
296
+ """์ „์ฒด ์ˆ˜์ง‘ ํŒŒ์ดํ”„๋ผ์ธ ์‹คํ–‰.
297
+
298
+ 1๋‹จ๊ณ„: ๊ฒ€์ƒ‰ API๋กœ URL ํ™•๋ณด
299
+ 2๋‹จ๊ณ„: ๋ชจ๋ฐ”์ผ ํฌ๋กค๋ง์œผ๋กœ ๋ณธ๋ฌธ ์ˆ˜์ง‘
300
+ 3๋‹จ๊ณ„: DB ์ €์žฅ (์›๋ณธ ๋ณด๊ด€)
301
+
302
+ Returns:
303
+ ์ˆ˜์ง‘ ํ†ต๊ณ„ dict
304
+ """
305
+ logger.info("=== ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์ˆ˜์ง‘ ์‹œ์ž‘ ===")
306
+ start_time = datetime.now()
307
+
308
+ # 1๋‹จ๊ณ„
309
+ posts = self.collect_blog_urls()
310
+ if not posts:
311
+ logger.warning("๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์—†์Œ โ€” ์ˆ˜์ง‘ ์ข…๋ฃŒ")
312
+ return {**self._stats, "duration_seconds": 0}
313
+
314
+ # 2๋‹จ๊ณ„
315
+ crawled = self.crawl_blog_contents(posts)
316
+ if not crawled:
317
+ logger.warning("ํฌ๋กค๋ง ๊ฒฐ๊ณผ ์—†์Œ โ€” ์ˆ˜์ง‘ ์ข…๋ฃŒ")
318
+ return {**self._stats, "duration_seconds": 0}
319
+
320
+ # 3๋‹จ๊ณ„
321
+ self.save_to_db(crawled)
322
+
323
+ duration = (datetime.now() - start_time).total_seconds()
324
+ self._stats["duration_seconds"] = duration
325
+ logger.info(
326
+ "=== ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์ˆ˜์ง‘ ์™„๋ฃŒ (%.1f์ดˆ) === %s",
327
+ duration,
328
+ self._stats,
329
+ )
330
+ return self._stats
trend_engine/collectors/naver_place.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ๋ณด์กฐ ์ˆ˜์ง‘๊ธฐ (Naver Place Auxiliary Collector)
3
+
4
+ ๋ณด์กฐ ์ฑ„๋„ (๊ฐ€์ค‘์น˜ 5%): ์นด์นด์˜ค๋งต ์Šค์บ” ๊ฒฐ๊ณผ ๊ธฐ๋ฐ˜์œผ๋กœ ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ๋ฆฌ๋ทฐ ์ˆ˜๋งŒ ์ถ”๊ฐ€ ์ˆ˜์ง‘.
5
+ ๋น„๊ณต์‹ API ์˜์กด โ€” Graceful Degradation ์„ค๊ณ„.
6
+
7
+ ์•Œ๋ ค์ง„ ์ œ์•ฝ (2026-02):
8
+ - ๋„ค์ด๋ฒ„ ์ง€์—ญ ๊ฒ€์ƒ‰ API link ํ•„๋“œ: ์™ธ๋ถ€ URL๋งŒ ๋ฐ˜ํ™˜ (Place ID ๋ฏธํฌํ•จ)
9
+ - ๋„ค์ด๋ฒ„ ๋งต ๋‚ด๋ถ€ ๊ฒ€์ƒ‰ API: ๋ด‡ ํŠธ๋ž˜ํ”ฝ CAPTCHA ์ฐจ๋‹จ
10
+ - ๊ฒฐ๊ณผ: Place ID ๋งค์นญ ๋ถˆ๊ฐ€ โ†’ ๊ฐ€์ค‘์น˜ ์žฌ๋ถ„๋ฐฐ (trend_scorer์—์„œ None ์ฒ˜๋ฆฌ)
11
+
12
+ - ๋„ค์ด๋ฒ„ ์ง€์—ญ ๊ฒ€์ƒ‰ API: ์žฅ์†Œ๋ช…โ†’ํ”Œ๋ ˆ์ด์Šค ID ๋งค์นญ (display ์ตœ๋Œ€ 5๊ฑด)
13
+ - ๋น„๊ณต์‹ API: https://map.naver.com/p/api/place/summary/{place_id} โ†’ ๋ฆฌ๋ทฐ ์ˆ˜
14
+ - ์‹คํŒจ์œจ 50% ์ด์ƒ ์‹œ ํ•ด๋‹น ์ฃผ๊ธฐ ๊ฑด๋„ˆ๋›ฐ๊ธฐ
15
+ """
16
+
17
+ import logging
18
+ import os
19
+ import re
20
+ from datetime import date, datetime, timedelta
21
+
22
+ import httpx
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def _is_similar_address(naver_addr: str, kakao_addr: str) -> bool:
28
+ """๋„ค์ด๋ฒ„/์นด์นด์˜ค ์ฃผ์†Œ ์œ ์‚ฌ๋„ ๋น„๊ต.
29
+
30
+ ์ •ํ™•ํ•œ ์ฃผ์†Œ ์ผ์น˜๊ฐ€ ์•„๋‹ˆ๋ผ ํ•ต์‹ฌ ๊ตฌ์„ฑ ์š”์†Œ(์/๋ฉด/๋™, ๋ฒˆ์ง€/๋„๋กœ๋ช…)๊ฐ€
31
+ ๊ฒน์น˜๋Š”์ง€ ํ™•์ธํ•œ๋‹ค.
32
+ """
33
+ if not naver_addr or not kakao_addr:
34
+ return False
35
+
36
+ # HTML ํƒœ๊ทธ ์ œ๊ฑฐ (๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ์— <b> ํƒœ๊ทธ๊ฐ€ ํฌํ•จ๋  ์ˆ˜ ์žˆ์Œ)
37
+ naver_clean = re.sub(r"<[^>]+>", "", naver_addr).strip()
38
+ kakao_clean = kakao_addr.strip()
39
+
40
+ # ๊ณต๋ฐฑ/ํŠน์ˆ˜๋ฌธ์ž ์ •๊ทœํ™”
41
+ naver_tokens = set(re.findall(r"[\w๊ฐ€-ํžฃ]+", naver_clean))
42
+ kakao_tokens = set(re.findall(r"[\w๊ฐ€-ํžฃ]+", kakao_clean))
43
+
44
+ # ํ•ต์‹ฌ ํ† ํฐ(์๋ฉด๋™ ์ดํ•˜) ๊ฒน์นจ ๋น„์œจ ํ™•์ธ
45
+ overlap = naver_tokens & kakao_tokens
46
+ if not kakao_tokens:
47
+ return False
48
+
49
+ return len(overlap) / len(kakao_tokens) >= 0.4
50
+
51
+
52
+ class NaverPlaceCollector:
53
+ """๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ๋ณด์กฐ ์ˆ˜์ง‘๊ธฐ."""
54
+
55
+ def __init__(self, supabase_client):
56
+ self.supabase = supabase_client
57
+ self.client_id = os.environ["NAVER_CLIENT_ID"]
58
+ self.client_secret = os.environ["NAVER_CLIENT_SECRET"]
59
+ self._stats = {
60
+ "total_spots": 0,
61
+ "matched": 0,
62
+ "match_failed": 0,
63
+ "review_success": 0,
64
+ "review_failed": 0,
65
+ "saved": 0,
66
+ "skipped_high_failure": False,
67
+ }
68
+
69
+ # โ”€โ”€ ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ID ๋งค์นญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
70
+
71
+ async def match_naver_place_id(self, name: str, address: str) -> str | None:
72
+ """์นด์นด์˜ค๋งต ์žฅ์†Œ๋ช… โ†’ ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ID ๋งค์นญ.
73
+
74
+ ๋„ค์ด๋ฒ„ ์ง€์—ญ ๊ฒ€์ƒ‰ API๋กœ ์žฅ์†Œ๋ช… ๊ฒ€์ƒ‰ ํ›„ ์ฃผ์†Œ ๋น„๊ต๋กœ ๋™์ผ ์žฅ์†Œ ํŒ๋ณ„.
75
+ display ์ตœ๋Œ€ 5๊ฑด๋งŒ ๋ฐ˜ํ™˜๋˜๋ฏ€๋กœ ์ •ํ™•ํ•œ ๋งค์นญ์ด ์ค‘์š”ํ•˜๋‹ค.
76
+
77
+ Args:
78
+ name: ์นด์นด์˜ค๋งต ์žฅ์†Œ๋ช…
79
+ address: ์นด์นด์˜ค๋งต ์ฃผ์†Œ
80
+
81
+ Returns:
82
+ ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ID (๋งค์นญ ์‹คํŒจ ์‹œ None)
83
+ """
84
+ headers = {
85
+ "X-Naver-Client-Id": self.client_id,
86
+ "X-Naver-Client-Secret": self.client_secret,
87
+ }
88
+ params = {
89
+ "query": f"์• ์›” {name}",
90
+ "display": 5,
91
+ "start": 1,
92
+ "sort": "comment",
93
+ }
94
+
95
+ try:
96
+ async with httpx.AsyncClient() as client:
97
+ resp = await client.get(
98
+ "https://openapi.naver.com/v1/search/local",
99
+ headers=headers,
100
+ params=params,
101
+ timeout=10,
102
+ )
103
+ resp.raise_for_status()
104
+ data = resp.json()
105
+ except httpx.HTTPError as e:
106
+ logger.debug("์ง€์—ญ ๊ฒ€์ƒ‰ API ์‹คํŒจ [%s]: %s", name, e)
107
+ return None
108
+
109
+ for item in data.get("items", []):
110
+ item_addr = item.get("address", "")
111
+ if _is_similar_address(item_addr, address):
112
+ # link์—์„œ ํ”Œ๋ ˆ์ด์Šค ID ์ถ”์ถœ
113
+ link = item.get("link", "")
114
+ match = re.search(r"/(\d{5,})/?", link)
115
+ if match:
116
+ return match.group(1)
117
+
118
+ return None
119
+
120
+ # โ”€โ”€ ๋น„๊ณต์‹ API๋กœ ๋ฆฌ๋ทฐ ์ˆ˜ ์กฐํšŒ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
121
+
122
+ async def fetch_review_count(self, place_id: str) -> dict:
123
+ """๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ๋‚ด๋ถ€ API๋กœ ๋ฆฌ๋ทฐ ์ˆ˜ ์กฐํšŒ.
124
+
125
+ ๋น„๊ณต์‹ API โ€” ์–ธ์ œ๋“  ๋ณ€๊ฒฝ/์ฐจ๋‹จ๋  ์ˆ˜ ์žˆ๋‹ค.
126
+ ์‹คํŒจ ์‹œ ํ•ด๋‹น ์žฅ์†Œ๋งŒ None ์ฒ˜๋ฆฌ, ๋‹ค๋ฅธ ์ฑ„๋„์— ์˜ํ–ฅ ์—†์Œ.
127
+
128
+ Args:
129
+ place_id: ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ID
130
+
131
+ Returns:
132
+ ๋ฆฌ๋ทฐ ์ˆ˜ ๋ฐ์ดํ„ฐ dict (success=False์ธ ๊ฒฝ์šฐ 0๊ฐ’)
133
+ """
134
+ api_url = f"https://map.naver.com/p/api/place/summary/{place_id}"
135
+ headers = {
136
+ "User-Agent": (
137
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
138
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
139
+ "Chrome/120.0.0.0 Safari/537.36"
140
+ ),
141
+ "Referer": "https://map.naver.com/",
142
+ }
143
+
144
+ try:
145
+ async with httpx.AsyncClient() as client:
146
+ resp = await client.get(api_url, headers=headers, timeout=10)
147
+ resp.raise_for_status()
148
+ data = resp.json()
149
+
150
+ return {
151
+ "visitor_review_count": data.get("visitorReviewCount", 0),
152
+ "blog_review_count": data.get("blogReviewCount", 0),
153
+ "avg_rating": data.get("visitorReviewScore", 0),
154
+ "success": True,
155
+ }
156
+ except Exception as e:
157
+ logger.debug("ํ”Œ๋ ˆ์ด์Šค ๋ฆฌ๋ทฐ ์กฐํšŒ ์‹คํŒจ [%s]: %s", place_id, e)
158
+ return {
159
+ "visitor_review_count": 0,
160
+ "blog_review_count": 0,
161
+ "avg_rating": 0,
162
+ "success": False,
163
+ "error": str(e),
164
+ }
165
+
166
+ # โ”€โ”€ DB ์ €์žฅ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
167
+
168
+ def _save_results(self, results: list[dict]) -> int:
169
+ """์ˆ˜์ง‘ ๊ฒฐ๊ณผ๋ฅผ spot_trends ํ…Œ์ด๋ธ”์— ์ €์žฅ."""
170
+ today = date.today()
171
+ period_start = today - timedelta(days=7)
172
+ saved_count = 0
173
+
174
+ rows: list[dict] = []
175
+ for r in results:
176
+ if not r.get("review_data") or not r["review_data"].get("success"):
177
+ continue
178
+ rd = r["review_data"]
179
+ rows.append({
180
+ "spot_id": r.get("spot_id", r.get("kakao_id", "__unknown__")),
181
+ "source": "naver_place",
182
+ "metric_type": "review_count",
183
+ "metric_value": rd["visitor_review_count"],
184
+ "period_start": period_start.isoformat(),
185
+ "period_end": today.isoformat(),
186
+ "raw_data": {
187
+ "name": r.get("name"),
188
+ "naver_place_id": r.get("naver_place_id"),
189
+ "visitor_review_count": rd["visitor_review_count"],
190
+ "blog_review_count": rd["blog_review_count"],
191
+ "avg_rating": rd["avg_rating"],
192
+ },
193
+ })
194
+
195
+ if not rows:
196
+ return 0
197
+
198
+ batch_size = 500
199
+ for i in range(0, len(rows), batch_size):
200
+ batch = rows[i:i + batch_size]
201
+ try:
202
+ result = self.supabase.table("spot_trends").insert(batch).execute()
203
+ saved_count += len(result.data) if result.data else 0
204
+ except Exception as e:
205
+ logger.error("spot_trends ์ €์žฅ ์‹คํŒจ (naver_place batch %d): %s", i // batch_size, e)
206
+
207
+ self._stats["saved"] = saved_count
208
+ return saved_count
209
+
210
+ # โ”€โ”€ ์ „์ฒด ํŒŒ์ดํ”„๋ผ์ธ ์‹คํ–‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
211
+
212
+ async def run(self, kakaomap_spots: list[dict]) -> dict:
213
+ """๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ๋ณด์กฐ ์ˆ˜์ง‘ ์‹คํ–‰.
214
+
215
+ ์นด์นด์˜ค๋งต ์Šค์บ” ๊ฒฐ๊ณผ(์ƒ์œ„ 100๊ฐœ ์ดํ•˜) ๊ธฐ๋ฐ˜์œผ๋กœ:
216
+ 1. ์žฅ์†Œ๋ช… โ†’ ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ID ๋งค์นญ
217
+ 2. ๋น„๊ณต์‹ API๋กœ ๋ฆฌ๋ทฐ ์ˆ˜ ์กฐํšŒ
218
+ 3. ์‹คํŒจ์œจ 50% ์ด์ƒ ์‹œ ํ•ด๋‹น ์ฃผ๊ธฐ ๊ฑด๋„ˆ๋›ฐ๊ธฐ
219
+
220
+ Args:
221
+ kakaomap_spots: ์นด์นด์˜ค๋งต ์ˆ˜์ง‘๊ธฐ์—์„œ ์ „๋‹ฌ๋ฐ›์€ ์žฅ์†Œ ๋ชฉ๋ก
222
+ [{"kakao_id": "...", "name": "...", "address": "...", ...}, ...]
223
+
224
+ Returns:
225
+ ์ˆ˜์ง‘ ํ†ต๊ณ„ dict
226
+ """
227
+ logger.info("=== ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ๋ณด์กฐ ์ˆ˜์ง‘ ์‹œ์ž‘ ===")
228
+ start_time = datetime.now()
229
+
230
+ spots = kakaomap_spots[:100] # ์ƒ์œ„ 100๊ฐœ๋งŒ ๋Œ€์ƒ
231
+ self._stats["total_spots"] = len(spots)
232
+
233
+ results: list[dict] = []
234
+
235
+ # 1๋‹จ๊ณ„: ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ID ๋งค์นญ
236
+ for spot in spots:
237
+ name = spot.get("name", "")
238
+ address = spot.get("address", "")
239
+
240
+ place_id = await self.match_naver_place_id(name, address)
241
+ if place_id:
242
+ self._stats["matched"] += 1
243
+ spot["naver_place_id"] = place_id
244
+ else:
245
+ self._stats["match_failed"] += 1
246
+ spot["naver_place_id"] = None
247
+
248
+ # ๋งค์นญ ์„ฑ๊ณตํ•œ ์žฅ์†Œ๋งŒ ๋ฆฌ๋ทฐ ์กฐํšŒ
249
+ matched_spots = [s for s in spots if s.get("naver_place_id")]
250
+ logger.info(
251
+ "ID ๋งค์นญ: %d/%d ์„ฑ๊ณต",
252
+ self._stats["matched"],
253
+ self._stats["total_spots"],
254
+ )
255
+
256
+ if not matched_spots:
257
+ logger.warning("๋งค์นญ๋œ ์žฅ์†Œ ์—†์Œ โ€” ์ˆ˜์ง‘ ์ข…๋ฃŒ")
258
+ duration = (datetime.now() - start_time).total_seconds()
259
+ self._stats["duration_seconds"] = duration
260
+ return self._stats
261
+
262
+ # 2๋‹จ๊ณ„: ๋ฆฌ๋ทฐ ์ˆ˜ ์กฐํšŒ (์‹คํŒจ์œจ ๋ชจ๋‹ˆํ„ฐ๋ง)
263
+ for spot in matched_spots:
264
+ review_data = await self.fetch_review_count(spot["naver_place_id"])
265
+ spot["review_data"] = review_data
266
+
267
+ if review_data["success"]:
268
+ self._stats["review_success"] += 1
269
+ else:
270
+ self._stats["review_failed"] += 1
271
+
272
+ results.append(spot)
273
+
274
+ # ์‹คํŒจ์œจ 50% ์ด์ƒ ์‹œ ์กฐ๊ธฐ ์ค‘๋‹จ
275
+ total_attempts = self._stats["review_success"] + self._stats["review_failed"]
276
+ if total_attempts >= 10:
277
+ failure_rate = self._stats["review_failed"] / total_attempts
278
+ if failure_rate >= 0.5:
279
+ logger.warning(
280
+ "๋ฆฌ๋ทฐ API ์‹คํŒจ์œจ %.0f%% โ€” ํ•ด๋‹น ์ฃผ๊ธฐ ๊ฑด๋„ˆ๋›ฐ๊ธฐ",
281
+ failure_rate * 100,
282
+ )
283
+ self._stats["skipped_high_failure"] = True
284
+ break
285
+
286
+ # 3๋‹จ๊ณ„: DB ์ €์žฅ
287
+ if not self._stats["skipped_high_failure"]:
288
+ self._save_results(results)
289
+
290
+ duration = (datetime.now() - start_time).total_seconds()
291
+ self._stats["duration_seconds"] = duration
292
+ logger.info(
293
+ "=== ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค ๋ณด์กฐ ์ˆ˜์ง‘ ์™„๋ฃŒ (%.1f์ดˆ) === %s",
294
+ duration,
295
+ self._stats,
296
+ )
297
+ return self._stats
trend_engine/collectors/youtube.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ YouTube Collector โ€” YouTube Data API v3 ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰ + ์œ„์น˜ ๊ธฐ๋ฐ˜ ๊ฒ€์ƒ‰
3
+
4
+ ์ˆ˜์ง‘ ๋ฐฉ์‹:
5
+ 1. 6๊ฐœ ํ‚ค์›Œ๋“œ๋กœ ์ตœ๊ทผ 1์ฃผ ์˜์ƒ ๊ฒ€์ƒ‰ (search.list)
6
+ 2. ์˜์ƒ ์ƒ์„ธ ์ •๋ณด ์กฐํšŒ (videos.list โ€” ์กฐํšŒ์ˆ˜, ์ข‹์•„์š”)
7
+ 3. ์œ„์น˜ ๊ธฐ๋ฐ˜ ๋ณด์กฐ ๊ฒ€์ƒ‰ (์• ์›” ์ค‘์‹ฌ 10km)
8
+ 4. ์˜์ƒ ์ œ๋ชฉ+์„ค๋ช…์—์„œ ์žฅ์†Œ๋ช… ์ถ”์ถœ (PlaceNameExtractor ์—ฐ๋™)
9
+ """
10
+
11
+ import os
12
+ import logging
13
+ from datetime import date, datetime, timedelta, timezone
14
+
15
+ from googleapiclient.discovery import build
16
+
17
+ from trend_engine.place_extractor import PlaceNameExtractor
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
22
+ # ์„ค์ •
23
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
24
+
25
+ YOUTUBE_KEYWORDS = [
26
+ "์• ์›” ์—ฌํ–‰",
27
+ "์ œ์ฃผ ์• ์›” ์นดํŽ˜",
28
+ "์• ์›” ๋ธŒ์ด๋กœ๊ทธ",
29
+ "์• ์›” ํ•ด์•ˆ ์‚ฐ์ฑ…",
30
+ "์• ์›” ๋ง›์ง‘ ์ถ”์ฒœ",
31
+ "ํ•œ๋‹ด ํ•ด์•ˆ",
32
+ ]
33
+
34
+ # ์• ์›” ์ค‘์‹ฌ ์ขŒํ‘œ
35
+ AEWOL_CENTER = {"lat": "33.46", "lng": "126.31"}
36
+ LOCATION_RADIUS = "10km"
37
+
38
+ MAX_RESULTS_PER_KEYWORD = 20
39
+ MAX_RESULTS_LOCATION = 30
40
+
41
+
42
+ class YouTubeCollector:
43
+ """YouTube Data API v3 ๊ธฐ๋ฐ˜ ํŠธ๋ Œ๋“œ ์˜์ƒ ์ˆ˜์ง‘๊ธฐ."""
44
+
45
+ def __init__(self, supabase_client, spot_matcher=None):
46
+ self.supabase = supabase_client
47
+ api_key = os.environ["AEWOL_AI_SYSTEM"]
48
+ self.youtube = build("youtube", "v3", developerKey=api_key)
49
+ self.extractor = PlaceNameExtractor(supabase_client)
50
+ self.spot_matcher = spot_matcher
51
+
52
+ # ------------------------------------------------------------------
53
+ # ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰
54
+ # ------------------------------------------------------------------
55
+
56
+ def collect_keyword_videos(self) -> list[dict]:
57
+ """6๊ฐœ ํ‚ค์›Œ๋“œ๋กœ ์ตœ๊ทผ 1์ฃผ ์˜์ƒ์„ ๊ฒ€์ƒ‰ํ•œ๋‹ค."""
58
+ one_week_ago = (
59
+ datetime.now(timezone.utc) - timedelta(days=7)
60
+ ).strftime("%Y-%m-%dT%H:%M:%SZ")
61
+
62
+ all_video_ids: list[str] = []
63
+ keyword_map: dict[str, list[str]] = {} # video_id โ†’ keywords
64
+
65
+ for keyword in YOUTUBE_KEYWORDS:
66
+ try:
67
+ search_resp = (
68
+ self.youtube.search()
69
+ .list(
70
+ q=keyword,
71
+ type="video",
72
+ part="id,snippet",
73
+ order="date",
74
+ publishedAfter=one_week_ago,
75
+ maxResults=MAX_RESULTS_PER_KEYWORD,
76
+ )
77
+ .execute()
78
+ )
79
+ except Exception as e:
80
+ logger.warning("YouTube ๊ฒ€์ƒ‰ ์‹คํŒจ (keyword=%s): %s", keyword, e)
81
+ continue
82
+
83
+ for item in search_resp.get("items", []):
84
+ vid = item["id"]["videoId"]
85
+ if vid not in keyword_map:
86
+ keyword_map[vid] = []
87
+ all_video_ids.append(vid)
88
+ keyword_map[vid].append(keyword)
89
+
90
+ logger.info(
91
+ "ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰ ์™„๋ฃŒ: %d๊ฐœ ๊ณ ์œ  ์˜์ƒ ๋ฐœ๊ฒฌ (%d๊ฐœ ํ‚ค์›Œ๋“œ)",
92
+ len(all_video_ids), len(YOUTUBE_KEYWORDS),
93
+ )
94
+
95
+ # ์˜์ƒ ์ƒ์„ธ ์ •๋ณด ์กฐํšŒ (50๊ฐœ์”ฉ ๋ฐฐ์น˜)
96
+ videos = self._fetch_video_details(all_video_ids, keyword_map)
97
+ return videos
98
+
99
+ # ------------------------------------------------------------------
100
+ # ์œ„์น˜ ๊ธฐ๋ฐ˜ ๊ฒ€์ƒ‰
101
+ # ------------------------------------------------------------------
102
+
103
+ def collect_location_videos(self) -> list[dict]:
104
+ """์• ์›” ์ค‘์‹ฌ 10km ๋ฐ˜๊ฒฝ ๋‚ด ์ตœ๊ทผ 1์ฃผ ์˜์ƒ์„ ๊ฒ€์ƒ‰ํ•œ๋‹ค."""
105
+ one_week_ago = (
106
+ datetime.now(timezone.utc) - timedelta(days=7)
107
+ ).strftime("%Y-%m-%dT%H:%M:%SZ")
108
+
109
+ try:
110
+ search_resp = (
111
+ self.youtube.search()
112
+ .list(
113
+ part="id,snippet",
114
+ type="video",
115
+ location=f"{AEWOL_CENTER['lat']},{AEWOL_CENTER['lng']}",
116
+ locationRadius=LOCATION_RADIUS,
117
+ order="date",
118
+ publishedAfter=one_week_ago,
119
+ maxResults=MAX_RESULTS_LOCATION,
120
+ )
121
+ .execute()
122
+ )
123
+ except Exception as e:
124
+ logger.warning("YouTube ์œ„์น˜ ๊ฒ€์ƒ‰ ์‹คํŒจ: %s", e)
125
+ return []
126
+
127
+ video_ids = [item["id"]["videoId"] for item in search_resp.get("items", [])]
128
+ logger.info("์œ„์น˜ ๊ฒ€์ƒ‰ ์™„๋ฃŒ: %d๊ฐœ ์˜์ƒ ๋ฐœ๊ฒฌ", len(video_ids))
129
+
130
+ keyword_map = {vid: ["location_search"] for vid in video_ids}
131
+ return self._fetch_video_details(video_ids, keyword_map)
132
+
133
+ # ------------------------------------------------------------------
134
+ # ์˜์ƒ ์ƒ์„ธ ์กฐํšŒ
135
+ # ------------------------------------------------------------------
136
+
137
+ def _fetch_video_details(
138
+ self,
139
+ video_ids: list[str],
140
+ keyword_map: dict[str, list[str]],
141
+ ) -> list[dict]:
142
+ """videos.list API๋กœ ์กฐํšŒ์ˆ˜/์ข‹์•„์š” ๋“ฑ ์ƒ์„ธ ์ •๋ณด๋ฅผ ๊ฐ€์ ธ์˜จ๋‹ค."""
143
+ videos: list[dict] = []
144
+
145
+ # 50๊ฐœ์”ฉ ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ
146
+ for i in range(0, len(video_ids), 50):
147
+ batch = video_ids[i : i + 50]
148
+ try:
149
+ resp = (
150
+ self.youtube.videos()
151
+ .list(
152
+ part="statistics,snippet",
153
+ id=",".join(batch),
154
+ )
155
+ .execute()
156
+ )
157
+ except Exception as e:
158
+ logger.warning("YouTube videos.list ์‹คํŒจ: %s", e)
159
+ continue
160
+
161
+ for video in resp.get("items", []):
162
+ stats = video.get("statistics", {})
163
+ videos.append({
164
+ "video_id": video["id"],
165
+ "title": video["snippet"]["title"],
166
+ "description": video["snippet"].get("description", ""),
167
+ "channel_title": video["snippet"].get("channelTitle", ""),
168
+ "view_count": int(stats.get("viewCount", 0)),
169
+ "like_count": int(stats.get("likeCount", 0)),
170
+ "comment_count": int(stats.get("commentCount", 0)),
171
+ "published_at": video["snippet"]["publishedAt"],
172
+ "keywords": keyword_map.get(video["id"], []),
173
+ })
174
+
175
+ return videos
176
+
177
+ # ------------------------------------------------------------------
178
+ # ์žฅ์†Œ๋ช… ์ถ”์ถœ + ๋ฉ”ํŠธ๋ฆญ ์ง‘๊ณ„
179
+ # ------------------------------------------------------------------
180
+
181
+ def _extract_place_mentions(self, videos: list[dict]) -> dict[str, dict]:
182
+ """
183
+ ์˜์ƒ ์ œ๋ชฉ+์„ค๋ช…์—์„œ ์žฅ์†Œ๋ช…์„ ์ถ”์ถœํ•˜๊ณ  ๋ฉ”ํŠธ๋ฆญ์„ ์ง‘๊ณ„ํ•œ๋‹ค.
184
+
185
+ Returns:
186
+ {spot_id_or_name: {name, spot_id, mention_video_count, total_views, total_likes}}
187
+ """
188
+ place_metrics: dict[str, dict] = {}
189
+
190
+ for video in videos:
191
+ text = video["title"] + " " + video["description"]
192
+ places = self.extractor.extract(text)
193
+
194
+ for place in places:
195
+ key = place["spot_id"] or place["name"]
196
+ if key not in place_metrics:
197
+ place_metrics[key] = {
198
+ "name": place["name"],
199
+ "spot_id": place["spot_id"],
200
+ "method": place["method"],
201
+ "mention_video_count": 0,
202
+ "total_views": 0,
203
+ "total_likes": 0,
204
+ }
205
+ place_metrics[key]["mention_video_count"] += 1
206
+ place_metrics[key]["total_views"] += video["view_count"]
207
+ place_metrics[key]["total_likes"] += video["like_count"]
208
+
209
+ logger.info("์žฅ์†Œ๋ช… ์ถ”์ถœ ์™„๋ฃŒ: %d๊ฐœ ์žฅ์†Œ ์‹๋ณ„", len(place_metrics))
210
+ return place_metrics
211
+
212
+ # ------------------------------------------------------------------
213
+ # DB ์ €์žฅ
214
+ # ------------------------------------------------------------------
215
+
216
+ def _save_to_db(self, place_metrics: dict[str, dict], videos: list[dict]) -> int:
217
+ """์ถ”์ถœ๋œ ๋ฉ”ํŠธ๋ฆญ์„ spot_trends ํ…Œ์ด๋ธ”์— ์ €์žฅํ•œ๋‹ค.
218
+
219
+ SpotMatcher๊ฐ€ ์„ค์ •๋œ ๊ฒฝ์šฐ trend_spots.id๋กœ ์ •๊ทœํ™”.
220
+ ๋งค์นญ ์‹คํŒจํ•œ ์žฅ์†Œ๋Š” ์Šคํ‚ต.
221
+ """
222
+ today = date.today()
223
+ saved = 0
224
+ skipped = 0
225
+
226
+ for key, metrics in place_metrics.items():
227
+ # SpotMatcher๋กœ spot_id ์ •๊ทœํ™”
228
+ if self.spot_matcher:
229
+ spot_id = self.spot_matcher.match(metrics["name"])
230
+ if not spot_id:
231
+ skipped += 1
232
+ continue
233
+ else:
234
+ spot_id = metrics["spot_id"] or key
235
+
236
+ # view_count ๋ฉ”ํŠธ๋ฆญ
237
+ try:
238
+ self.supabase.table("spot_trends").insert({
239
+ "spot_id": spot_id,
240
+ "source": "youtube",
241
+ "metric_type": "view_count",
242
+ "metric_value": metrics["total_views"],
243
+ "period_start": today.isoformat(),
244
+ "period_end": today.isoformat(),
245
+ "raw_data": {
246
+ "mention_video_count": metrics["mention_video_count"],
247
+ "total_likes": metrics["total_likes"],
248
+ "name": metrics["name"],
249
+ "method": metrics["method"],
250
+ },
251
+ }).execute()
252
+ saved += 1
253
+ except Exception as e:
254
+ logger.warning("spot_trends insert ์‹คํŒจ (youtube view_count, %s): %s", spot_id, e)
255
+
256
+ # video_count ๋ฉ”ํŠธ๋ฆญ (์ฑ„๋„ ์Šค์ฝ”์–ด๋ง์— ํ•„์š”)
257
+ try:
258
+ self.supabase.table("spot_trends").insert({
259
+ "spot_id": spot_id,
260
+ "source": "youtube",
261
+ "metric_type": "video_count",
262
+ "metric_value": metrics["mention_video_count"],
263
+ "period_start": today.isoformat(),
264
+ "period_end": today.isoformat(),
265
+ "raw_data": {"name": metrics["name"]},
266
+ }).execute()
267
+ except Exception as e:
268
+ logger.warning("spot_trends insert ์‹คํŒจ (youtube video_count, %s): %s", spot_id, e)
269
+
270
+ if skipped:
271
+ logger.info("YouTube ์žฅ์†Œ ๋งค์นญ ์‹คํŒจ๋กœ %d๊ฑด ์Šคํ‚ต", skipped)
272
+ logger.info("YouTube DB ์ €์žฅ ์™„๋ฃŒ: %d๊ฑด", saved)
273
+ return saved
274
+
275
+ # ------------------------------------------------------------------
276
+ # ๋ฉ”์ธ ์‹คํ–‰
277
+ # ------------------------------------------------------------------
278
+
279
+ def run(self) -> dict:
280
+ """
281
+ YouTube ์ˆ˜์ง‘ ํŒŒ์ดํ”„๋ผ์ธ ์ „์ฒด ์‹คํ–‰.
282
+
283
+ Returns:
284
+ {"keyword_videos": int, "location_videos": int, "places_found": int, "saved": int}
285
+ """
286
+ logger.info("=== YouTube ์ˆ˜์ง‘ ์‹œ์ž‘ ===")
287
+
288
+ # 1๋‹จ๊ณ„: ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰
289
+ keyword_videos = self.collect_keyword_videos()
290
+
291
+ # 2๋‹จ๊ณ„: ์œ„์น˜ ๊ธฐ๋ฐ˜ ๋ณด์กฐ ๊ฒ€์ƒ‰
292
+ location_videos = self.collect_location_videos()
293
+
294
+ # ์ค‘๋ณต ์ œ๊ฑฐ (video_id ๊ธฐ์ค€)
295
+ seen_ids = {v["video_id"] for v in keyword_videos}
296
+ for v in location_videos:
297
+ if v["video_id"] not in seen_ids:
298
+ keyword_videos.append(v)
299
+ seen_ids.add(v["video_id"])
300
+
301
+ all_videos = keyword_videos
302
+
303
+ # 3๋‹จ๊ณ„: ์žฅ์†Œ๋ช… ์ถ”์ถœ + ๋ฉ”ํŠธ๋ฆญ ์ง‘๊ณ„
304
+ place_metrics = self._extract_place_mentions(all_videos)
305
+
306
+ # 4๋‹จ๊ณ„: DB ์ €์žฅ
307
+ saved = self._save_to_db(place_metrics, all_videos)
308
+
309
+ result = {
310
+ "keyword_videos": len(keyword_videos),
311
+ "location_videos": len(location_videos),
312
+ "total_unique_videos": len(all_videos),
313
+ "places_found": len(place_metrics),
314
+ "saved": saved,
315
+ }
316
+ logger.info("=== YouTube ์ˆ˜์ง‘ ์™„๋ฃŒ: %s ===", result)
317
+ return result
trend_engine/place_extractor.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PlaceNameExtractor โ€” ๋ธ”๋กœ๊ทธ/์œ ํŠœ๋ธŒ ํ…์ŠคํŠธ์—์„œ ์žฅ์†Œ๋ช…์„ ์ถ”์ถœํ•˜๋Š” ๊ณตํ†ต ๋ชจ๋“ˆ
3
+
4
+ ์ถ”์ถœ ์šฐ์„ ์ˆœ์œ„:
5
+ 1. ์žฅ์†Œ๋ช… ์‚ฌ์ „ ๋งค์นญ (trend_spots + story_spots ์ด๋ฆ„)
6
+ 2. ์ ‘๋ฏธ์‚ฌ ํŒจํ„ด ๋งค์นญ (์นดํŽ˜, ์‹๋‹น, ํ•ด๋ณ€ ๋“ฑ)
7
+ """
8
+
9
+ import os
10
+ import re
11
+ import logging
12
+
13
+ from supabase import create_client, Client
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # ์žฅ์†Œ๋ช… ์ ‘๋ฏธ์‚ฌ ํŒจํ„ด (์‚ฌ์ „์— ์—†๋Š” ์ƒˆ ์žฅ์†Œ ๋ฐœ๊ฒฌ์šฉ)
18
+ SUFFIX_PATTERNS = [
19
+ re.compile(r"[\w๊ฐ€-ํžฃ]{2,}(?:์นดํŽ˜|์ปคํ”ผ)"),
20
+ re.compile(r"[\w๊ฐ€-ํžฃ]{2,}(?:์‹๋‹น|๋ฐฅ์ง‘|๊ตญ์ˆ˜)"),
21
+ re.compile(r"[\w๊ฐ€-ํžฃ]{2,}(?:ํ•ด๋ณ€|ํ•ด์•ˆ|ํฌ๊ตฌ)"),
22
+ re.compile(r"[\w๊ฐ€-ํžฃ]{2,}(?:์˜ค๋ฆ„|๊ณต์›|์ˆฒ)"),
23
+ re.compile(r"[\w๊ฐ€-ํžฃ]{2,}(?:์ „๋ง๋Œ€|๋ทฐ)"),
24
+ re.compile(r"[\w๊ฐ€-ํžฃ]{2,}(?:๋ฒ ์ด์ปค๋ฆฌ|๋ธŒ๋Ÿฐ์น˜|๋””์ €ํŠธ)"),
25
+ ]
26
+
27
+ # ๋„ˆ๋ฌด ์ผ๋ฐ˜์ ์ธ ๋‹จ์–ด โ€” ์žฅ์†Œ๋ช…์ด ์•„๋‹Œ ๊ฒฝ์šฐ ํ•„ํ„ฐ๋ง
28
+ STOPWORDS = frozenset({
29
+ # ์ง€์‹œ๋Œ€๋ช…์‚ฌ + ์นดํ…Œ๊ณ ๋ฆฌ
30
+ "์ด์นดํŽ˜", "๊ทธ์นดํŽ˜", "์ €์นดํŽ˜", "์ด์‹๋‹น", "๊ทธ์‹๋‹น",
31
+ "์ดํ•ด๋ณ€", "๊ทธํ•ด๋ณ€", "์ด์˜ค๋ฆ„", "๊ทธ์˜ค๋ฆ„",
32
+ # ํ˜•์šฉ์‚ฌ + ์นดํ…Œ๊ณ ๋ฆฌ
33
+ "์ข‹์€์นดํŽ˜", "์˜ˆ์œ์นดํŽ˜", "๋ง›์žˆ๋Š”์‹๋‹น", "์ข‹์€์‹๋‹น",
34
+ # ์œ„์น˜/์ˆ˜์‹์–ด + ์นดํ…Œ๊ณ ๋ฆฌ
35
+ "๊ทผ์ฒ˜์นดํŽ˜", "์ฃผ๋ณ€์นดํŽ˜", "๋™๋„ค์นดํŽ˜", "์œ ๋ช…์นดํŽ˜",
36
+ "๊ฐ์„ฑ์นดํŽ˜", "๋ทฐ์นดํŽ˜", "์˜ค์…˜๋ทฐ์นดํŽ˜",
37
+ "์• ์›”์นดํŽ˜", "ํ•œ๋ฆผ์นดํŽ˜", "ํ˜‘์žฌ์นดํŽ˜",
38
+ "์ œ์ฃผ์นดํŽ˜", "์ œ์ฃผ์‹๋‹น", "์ œ์ฃผํ•ด๋ณ€", "์ œ์ฃผ์˜ค๋ฆ„",
39
+ "์˜ˆ์œ์‹๋‹น", "์ œ์ฃผ๋ง›์ง‘", "์• ์›”๋ง›์ง‘",
40
+ # ์นดํ…Œ๊ณ ๋ฆฌ ์ผ๋ฐ˜๋ช…์‚ฌ (๋‹จ๋… ์‚ฌ์šฉ ์‹œ ์žฅ์†Œ๋ช… ์•„๋‹˜)
41
+ "์นดํŽ˜", "์ปคํ”ผ", "์‹๋‹น", "๋ง›์ง‘", "๋ฐฅ์ง‘", "๊ตญ์ˆ˜",
42
+ "ํ•ด๋ณ€", "ํ•ด์•ˆ", "ํฌ๊ตฌ", "์˜ค๋ฆ„", "๊ณต์›", "์ˆฒ",
43
+ "์ „๋ง๋Œ€", "๋ทฐ", "๋ฒ ์ด์ปค๋ฆฌ", "๋ธŒ๋Ÿฐ์น˜", "๋””์ €ํŠธ",
44
+ "์‚ฐ์ฑ…", "์‚ฐ์ฑ…๋กœ", "๋“œ๋ผ์ด๋ธŒ",
45
+ # ์ง€์—ญ + ์ผ๋ฐ˜ ํ‘œํ˜„
46
+ "์ œ์ฃผ์—ฌํ–‰", "์• ์›”์—ฌํ–‰", "์ œ์ฃผ๋„", "์• ์›”", "์• ์›”๋ฆฌ",
47
+ })
48
+
49
+
50
+ def _get_supabase_client() -> Client:
51
+ url = os.environ.get("SUPABASE_URL") or os.environ.get("VITE_SUPABASE_URL")
52
+ key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_SERVICE_KEY")
53
+ if not url or not key:
54
+ raise ValueError("SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY must be set")
55
+ return create_client(url, key)
56
+
57
+
58
+ class PlaceNameExtractor:
59
+ """ํ…์ŠคํŠธ์—์„œ ์žฅ์†Œ๋ช…์„ ์ถ”์ถœํ•˜๋Š” ํŒŒ์ดํ”„๋ผ์ธ."""
60
+
61
+ def __init__(self, supabase: Client | None = None):
62
+ self.supabase = supabase or _get_supabase_client()
63
+ # {์žฅ์†Œ๋ช…: spot_id} โ€” ์ •ํ™• ๋งค์นญ์šฉ
64
+ self.known_places: dict[str, str] = {}
65
+ self._load_place_dictionary()
66
+
67
+ # ------------------------------------------------------------------
68
+ # ์‚ฌ์ „ ๊ตฌ์ถ•
69
+ # ------------------------------------------------------------------
70
+
71
+ def _load_place_dictionary(self) -> None:
72
+ """trend_spots + story_spots์—์„œ ์žฅ์†Œ๋ช… ์‚ฌ์ „์„ ๊ตฌ์ถ•ํ•œ๋‹ค."""
73
+
74
+ # 1) trend_spots
75
+ try:
76
+ resp = self.supabase.table("trend_spots").select("id, name").execute()
77
+ for row in resp.data or []:
78
+ self._register_name(row["name"], row["id"])
79
+ except Exception as e:
80
+ logger.warning("trend_spots ๋กœ๋“œ ์‹คํŒจ (ํ…Œ์ด๋ธ” ๋ฏธ์กด์žฌ ๊ฐ€๋Šฅ): %s", e)
81
+
82
+ # 2) story_spots
83
+ try:
84
+ resp = (
85
+ self.supabase.table("story_spots")
86
+ .select("id, name, aliases")
87
+ .execute()
88
+ )
89
+ for row in resp.data or []:
90
+ self._register_name(row["name"], row["id"])
91
+ # aliases ๋ฐฐ์—ด ๋“ฑ๋ก
92
+ for alias in row.get("aliases") or []:
93
+ if alias:
94
+ self._register_name(alias, row["id"])
95
+ except Exception as e:
96
+ logger.warning("story_spots ๋กœ๋“œ ์‹คํŒจ: %s", e)
97
+
98
+ logger.info("์žฅ์†Œ๋ช… ์‚ฌ์ „ ๊ตฌ์ถ• ์™„๋ฃŒ: %d๊ฑด", len(self.known_places))
99
+
100
+ def _register_name(self, name: str, spot_id: str) -> None:
101
+ """์ด๋ฆ„๊ณผ ๊ณต๋ฐฑ ์ œ๊ฑฐ ๋ณ€ํ˜•์„ ์‚ฌ์ „์— ๋“ฑ๋กํ•œ๋‹ค."""
102
+ name = name.strip()
103
+ if not name:
104
+ return
105
+ self.known_places[name] = spot_id
106
+ # ๊ณต๋ฐฑ ์ œ๊ฑฐ ๋ณ€ํ˜• ("๋ด„๋‚ ์˜ ์นดํŽ˜" โ†’ "๋ด„๋‚ ์˜์นดํŽ˜")
107
+ no_space = name.replace(" ", "")
108
+ if no_space != name:
109
+ self.known_places[no_space] = spot_id
110
+
111
+ # ------------------------------------------------------------------
112
+ # ์ถ”์ถœ
113
+ # ------------------------------------------------------------------
114
+
115
+ def extract(self, text: str) -> list[dict]:
116
+ """
117
+ ํ…์ŠคํŠธ์—์„œ ์žฅ์†Œ๋ช…์„ ์ถ”์ถœํ•œ๋‹ค.
118
+
119
+ Returns:
120
+ [{"name": str, "spot_id": str|None, "method": "dictionary"|"pattern"}, ...]
121
+ """
122
+ if not text:
123
+ return []
124
+
125
+ found: list[dict] = []
126
+ found_names: set[str] = set()
127
+
128
+ # 1์ˆœ์œ„: ์‚ฌ์ „ ๋งค์นญ โ€” ๊ธด ์ด๋ฆ„๋ถ€ํ„ฐ ๋งค์นญ (๋ถ€๋ถ„ ๋งค์นญ ๋ฐฉ์ง€)
129
+ for name in sorted(self.known_places, key=len, reverse=True):
130
+ # 2๊ธ€์ž ์ดํ•˜ ์ผ๋ฐ˜๋ช…์‚ฌ ํ•„ํ„ฐ + STOPWORDS ์ฒดํฌ
131
+ if len(name) <= 2 and name in STOPWORDS:
132
+ continue
133
+ if name in text and name not in found_names:
134
+ found.append({
135
+ "name": name,
136
+ "spot_id": self.known_places[name],
137
+ "method": "dictionary",
138
+ })
139
+ found_names.add(name)
140
+
141
+ # 2์ˆœ์œ„: ์ ‘๋ฏธ์‚ฌ ํŒจํ„ด
142
+ for pattern in SUFFIX_PATTERNS:
143
+ for match in pattern.findall(text):
144
+ if match not in found_names and match not in STOPWORDS:
145
+ found.append({
146
+ "name": match,
147
+ "spot_id": None,
148
+ "method": "pattern",
149
+ })
150
+ found_names.add(match)
151
+
152
+ return found
trend_engine/spot_matcher.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SpotMatcher โ€” ์ฑ„๋„๋ณ„ ์ˆ˜์ง‘ ๊ฒฐ๊ณผ์˜ spot_id๋ฅผ trend_spots ๋งˆ์Šคํ„ฐ ID๋กœ ํ†ตํ•ฉ ๋งค์นญ
3
+
4
+ ๋ชจ๋“  ์ฑ„๋„์˜ ์ˆ˜์ง‘ ๊ฒฐ๊ณผ๋ฅผ trend_spots(์นด์นด์˜ค๋งต ๊ธฐ๋ฐ˜)๊ณผ
5
+ story_spots(ํ–ฅํ† ์ง€ ๊ธฐ๋ฐ˜)์—์„œ ์ •์˜๋œ ์žฅ์†Œ ID๋กœ ๋งค์นญํ•œ๋‹ค.
6
+
7
+ ๋งค์นญ ์šฐ์„ ์ˆœ์œ„:
8
+ 1. trend_spots ์ •ํ™• ๋งค์นญ (๊ณต๋ฐฑ ์ œ๊ฑฐ ๋ณ€ํ˜• ํฌํ•จ)
9
+ 2. story_spots ์ •ํ™• ๋งค์นญ
10
+ 3. trend_spots ๋ถ€๋ถ„ ๋งค์นญ (์ตœ์†Œ 3๊ธ€์ž ๊ฒน์นจ)
11
+ """
12
+
13
+ import logging
14
+ import os
15
+
16
+ from supabase import create_client, Client
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def _get_supabase_client() -> Client:
22
+ url = os.environ.get("SUPABASE_URL") or os.environ.get("VITE_SUPABASE_URL")
23
+ key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_SERVICE_KEY")
24
+ if not url or not key:
25
+ raise ValueError("SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY must be set")
26
+ return create_client(url, key)
27
+
28
+
29
+ class SpotMatcher:
30
+ """๋ชจ๋“  ์ฑ„๋„์˜ ์ˆ˜์ง‘ ๊ฒฐ๊ณผ๋ฅผ trend_spots/story_spots ID๋กœ ๋งค์นญ.
31
+
32
+ ์นด์นด์˜ค๋งต ์Šค์บ” ๊ฒฐ๊ณผ(trend_spots)๊ฐ€ ๊ธฐ์ค€ ์‚ฌ์ „ ์—ญํ• ์„ ํ•œ๋‹ค.
33
+ """
34
+
35
+ def __init__(self, supabase: Client | None = None):
36
+ self.supabase = supabase or _get_supabase_client()
37
+ self.trend_spots: dict[str, str] = {}
38
+ self.story_spots: dict[str, str] = {}
39
+ self._load_dictionaries()
40
+
41
+ def _load_dictionaries(self) -> None:
42
+ """trend_spots + story_spots์—์„œ ์žฅ์†Œ๋ช… ์‚ฌ์ „ ๋กœ๋“œ."""
43
+
44
+ # 1) trend_spots (์นด์นด์˜ค๋งต ๊ธฐ๋ฐ˜)
45
+ try:
46
+ resp = self.supabase.table("trend_spots").select("id, name").execute()
47
+ for row in resp.data or []:
48
+ name = row["name"].strip()
49
+ if not name:
50
+ continue
51
+ self.trend_spots[name] = row["id"]
52
+ no_space = name.replace(" ", "")
53
+ if no_space != name:
54
+ self.trend_spots[no_space] = row["id"]
55
+ except Exception as e:
56
+ logger.warning("trend_spots ๋กœ๋“œ ์‹คํŒจ: %s", e)
57
+
58
+ # 2) story_spots (์ด๋ฆ„์ด ์žˆ๋Š” ๊ฒƒ๋งŒ)
59
+ try:
60
+ resp = (
61
+ self.supabase.table("story_spots")
62
+ .select("id, name")
63
+ .not_.is_("name", "null")
64
+ .execute()
65
+ )
66
+ for row in resp.data or []:
67
+ name = (row.get("name") or "").strip()
68
+ if name:
69
+ self.story_spots[name] = row["id"]
70
+ except Exception as e:
71
+ logger.warning("story_spots ๋กœ๋“œ ์‹คํŒจ: %s", e)
72
+
73
+ logger.info(
74
+ "SpotMatcher ์‚ฌ์ „ ๋กœ๋“œ: trend_spots %d๊ฑด, story_spots %d๊ฑด",
75
+ len(self.trend_spots),
76
+ len(self.story_spots),
77
+ )
78
+
79
+ def match(self, name: str) -> str | None:
80
+ """์žฅ์†Œ๋ช… โ†’ trend_spots.id ๋˜๋Š” story_spots.id ๋ฐ˜ํ™˜.
81
+
82
+ ๋งค์นญ ์šฐ์„ ์ˆœ์œ„:
83
+ 1. trend_spots ์ •ํ™• ๋งค์นญ (๊ณต๋ฐฑ ์ œ๊ฑฐ ํฌํ•จ)
84
+ 2. story_spots ์ •ํ™• ๋งค์นญ
85
+ 3. trend_spots ๋ถ€๋ถ„ ๋งค์นญ (์ตœ์†Œ 3๊ธ€์ž ๊ฒน์นจ)
86
+
87
+ Returns:
88
+ ๋งค์นญ๋œ spot_id, ์‹คํŒจ ์‹œ None
89
+ """
90
+ if not name:
91
+ return None
92
+ name = name.strip()
93
+
94
+ # 1. trend_spots ์ •ํ™• ๋งค์นญ
95
+ if name in self.trend_spots:
96
+ return self.trend_spots[name]
97
+ no_space = name.replace(" ", "")
98
+ if no_space in self.trend_spots:
99
+ return self.trend_spots[no_space]
100
+
101
+ # 2. story_spots ์ •ํ™• ๋งค์นญ
102
+ if name in self.story_spots:
103
+ return self.story_spots[name]
104
+
105
+ # 3. ๋ถ€๋ถ„ ๋งค์นญ (๊ธด ์ด๋ฆ„๋ถ€ํ„ฐ ์‹œ๋„)
106
+ for known_name in sorted(self.trend_spots.keys(), key=len, reverse=True):
107
+ shorter = min(known_name, name, key=len)
108
+ if len(shorter) >= 3 and (known_name in name or name in known_name):
109
+ return self.trend_spots[known_name]
110
+
111
+ return None
trend_engine/trend_scorer.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Trend Scorer โ€” ์ฑ„๋„๋ณ„ ์ธ๊ธฐ๋„ ์Šค์ฝ”์–ด ๊ณ„์‚ฐ + ์ข…ํ•ฉ ๋žญํ‚น ์ƒ์„ฑ
3
+
4
+ v3 ์ฑ„๋„ ๊ฐ€์ค‘์น˜:
5
+ naver_blog 0.30
6
+ kakaomap 0.25
7
+ instagram 0.25
8
+ youtube 0.15
9
+ naver_place 0.05
10
+ """
11
+
12
+ import os
13
+ import logging
14
+ from datetime import datetime, timedelta, date
15
+
16
+ from supabase import create_client, Client
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # ์ฑ„๋„ ๊ฐ€์ค‘์น˜ (v3 โ€” naver_place ๋น„ํ™œ์„ฑ, 4์ฑ„๋„ ์ฒด์ œ)
21
+ CHANNEL_WEIGHTS: dict[str, float] = {
22
+ "naver_blog": 0.30,
23
+ "kakaomap": 0.25,
24
+ "instagram": 0.25,
25
+ "youtube": 0.15,
26
+ # "naver_place": 0.05, # ๋น„ํ™œ์„ฑ โ€” Place ID ๋งค์นญ ๋ถˆ๊ฐ€ (2026-02)
27
+ }
28
+
29
+
30
+ def _get_supabase_client() -> Client:
31
+ url = os.environ.get("SUPABASE_URL") or os.environ.get("VITE_SUPABASE_URL")
32
+ key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_SERVICE_KEY")
33
+ if not url or not key:
34
+ raise ValueError("SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY must be set")
35
+ return create_client(url, key)
36
+
37
+
38
+ # ------------------------------------------------------------------
39
+ # ์ •๊ทœํ™” ์œ ํ‹ธ
40
+ # ------------------------------------------------------------------
41
+
42
+
43
+ def normalize_score(value: float, max_value: float) -> int:
44
+ """์ฑ„๋„ ๋‚ด ์ตœ๋Œ€๊ฐ’ ๊ธฐ์ค€ 0~100 ์ •๊ทœํ™”."""
45
+ if max_value <= 0:
46
+ return 0
47
+ return min(100, int((value / max_value) * 100))
48
+
49
+
50
+ # ------------------------------------------------------------------
51
+ # ์ฑ„๋„๋ณ„ ์Šค์ฝ”์–ด ๊ณ„์‚ฐ
52
+ # ------------------------------------------------------------------
53
+
54
+
55
+ def calc_naver_blog_score(
56
+ weekly_mentions: int,
57
+ max_weekly_mentions: int,
58
+ mention_growth: float = 0.0,
59
+ ) -> int:
60
+ """๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ: ์ฃผ๊ฐ„ ์–ธ๊ธ‰ ์ˆ˜ + ๊ธ‰์ƒ์Šน ๊ฐ€์‚ฐ์ ."""
61
+ base = normalize_score(weekly_mentions, max_weekly_mentions)
62
+ growth_bonus = min(20, int(mention_growth * 10)) if mention_growth > 0 else 0
63
+ return min(100, base + growth_bonus)
64
+
65
+
66
+ def calc_kakaomap_score(
67
+ review_count: int,
68
+ max_review_count: int,
69
+ review_growth: int = 0,
70
+ max_review_growth: int = 1,
71
+ search_rank: int = 0,
72
+ max_rank: int = 256,
73
+ ) -> int:
74
+ """์นด์นด์˜ค๋งต: ๋ฆฌ๋ทฐ ์ˆ˜(60%) + ์ „์ฃผ ๋Œ€๋น„ ๋ฆฌ๋ทฐ ์ฆ๊ฐ(40%).
75
+
76
+ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ ์—†์œผ๋ฉด search_rank ๊ธฐ๋ฐ˜ ํด๋ฐฑ (1์œ„=100์ , max_rank์œ„=0์ ).
77
+ """
78
+ if review_count > 0:
79
+ review_base = normalize_score(review_count, max_review_count) * 0.6
80
+ growth_base = normalize_score(review_growth, max_review_growth) * 0.4
81
+ return min(100, int(review_base + growth_base))
82
+
83
+ # search_rank ํด๋ฐฑ (๋น„๊ณต์‹ ๋ฆฌ๋ทฐ API ์ฐจ๋‹จ ์‹œ)
84
+ if search_rank > 0:
85
+ return max(0, int((1 - search_rank / max(max_rank, 1)) * 100))
86
+
87
+ return 0
88
+
89
+
90
+ def calc_instagram_score(
91
+ hashtag_post_count: int,
92
+ max_post_count: int,
93
+ avg_engagement: float = 0.0,
94
+ max_engagement: float = 1.0,
95
+ ) -> int:
96
+ """์ธ์Šคํƒ€๊ทธ๋žจ: ๊ฒŒ์‹œ๋ฌผ ์ˆ˜(50%) + ํ‰๊ท  ์ฐธ์—ฌ๋„(50%)."""
97
+ post_base = normalize_score(hashtag_post_count, max_post_count) * 0.5
98
+ engagement_base = normalize_score(avg_engagement, max_engagement) * 0.5
99
+ return min(100, int(post_base + engagement_base))
100
+
101
+
102
+ def calc_youtube_score(
103
+ mention_video_count: int,
104
+ max_video_count: int,
105
+ total_views: int = 0,
106
+ max_total_views: int = 1,
107
+ ) -> int:
108
+ """์œ ํŠœ๋ธŒ: ์–ธ๊ธ‰ ์˜์ƒ ์ˆ˜(40%) + ์ด ์กฐํšŒ์ˆ˜(60%)."""
109
+ video_base = normalize_score(mention_video_count, max_video_count) * 0.4
110
+ views_base = normalize_score(total_views, max_total_views) * 0.6
111
+ return min(100, int(video_base + views_base))
112
+
113
+
114
+ def calc_naver_place_score(
115
+ visitor_review_count: int,
116
+ max_visitor_reviews: int,
117
+ naver_review_growth: int = 0,
118
+ max_review_growth: int = 1,
119
+ data_available: bool = True,
120
+ ) -> int | None:
121
+ """๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค (๋ณด์กฐ): ๋ฐ์ดํ„ฐ ์—†์œผ๋ฉด None ๋ฐ˜ํ™˜."""
122
+ if not data_available:
123
+ return None
124
+ review_base = normalize_score(visitor_review_count, max_visitor_reviews) * 0.7
125
+ growth_base = normalize_score(naver_review_growth, max_review_growth) * 0.3
126
+ return min(100, int(review_base + growth_base))
127
+
128
+
129
+ # ------------------------------------------------------------------
130
+ # ์ข…ํ•ฉ ์Šค์ฝ”์–ด
131
+ # ------------------------------------------------------------------
132
+
133
+
134
+ def calc_composite_score(channel_scores: dict[str, int | None]) -> int:
135
+ """
136
+ ์ฑ„๋„๋ณ„ ์Šค์ฝ”์–ด๋ฅผ ๊ฐ€์ค‘ ํ•ฉ์‚ฐํ•˜์—ฌ ์ข…ํ•ฉ ์ธ๊ธฐ๋„ ์Šค์ฝ”์–ด (0~100) ์‚ฐ์ถœ.
137
+
138
+ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†๋Š” ์ฑ„๋„(None)์˜ ๊ฐ€์ค‘์น˜๋Š” ๋‚˜๋จธ์ง€ ์ฑ„๋„์— ๋น„๋ก€ ์žฌ๋ถ„๋ฐฐ.
139
+
140
+ Args:
141
+ channel_scores: {"naver_blog": 80, "kakaomap": 60, ..., "naver_place": None}
142
+ """
143
+ available_weight = 0.0
144
+ weighted_parts: list[tuple[float, float]] = [] # (score, weight)
145
+
146
+ for channel, weight in CHANNEL_WEIGHTS.items():
147
+ score = channel_scores.get(channel)
148
+ if score is not None:
149
+ weighted_parts.append((float(score), weight))
150
+ available_weight += weight
151
+
152
+ if available_weight == 0:
153
+ return 0
154
+
155
+ composite = sum(score * (weight / available_weight) for score, weight in weighted_parts)
156
+ return min(100, int(composite))
157
+
158
+
159
+ # ------------------------------------------------------------------
160
+ # ์ฃผ๊ฐ„ ๋žญํ‚น ์ƒ์„ฑ
161
+ # ------------------------------------------------------------------
162
+
163
+
164
+ def generate_weekly_ranking(supabase: Client | None = None) -> dict:
165
+ """
166
+ ์ฃผ๊ฐ„ ์ข…ํ•ฉ ์ธ๊ธฐ ์žฅ์†Œ ๋žญํ‚น ์ƒ์„ฑ.
167
+
168
+ 1. spot_trends์—์„œ ์ด๋ฒˆ ์ฃผ + ์ง€๋‚œ์ฃผ ๋ฉ”ํŠธ๋ฆญ ์กฐํšŒ
169
+ 2. ์ฑ„๋„๋ณ„ ์Šค์ฝ”์–ด ๊ณ„์‚ฐ โ†’ ์ข…ํ•ฉ ์Šค์ฝ”์–ด
170
+ 3. trend_spots.popularity_score ์—…๋ฐ์ดํŠธ
171
+ 4. ์ „์ฒด/์ฑ„๋„๋ณ„ ๋žญํ‚น ๋ฐ˜ํ™˜
172
+
173
+ Returns:
174
+ {"overall": [...], "naver_blog": [...], ..., "trending_up": [...]}
175
+ """
176
+ sb = supabase or _get_supabase_client()
177
+ now = datetime.utcnow()
178
+ this_week_start = (now - timedelta(days=now.weekday())).date()
179
+ last_week_start = this_week_start - timedelta(days=7)
180
+
181
+ # -- ์ด๋ฒˆ ์ฃผ ๋ฉ”ํŠธ๋ฆญ ์กฐํšŒ (์œ ํšจํ•œ spot_id๋งŒ) --
182
+ this_week_resp = (
183
+ sb.table("spot_trends")
184
+ .select("spot_id, source, metric_type, metric_value")
185
+ .gte("period_end", this_week_start.isoformat())
186
+ .neq("spot_id", "__pending__")
187
+ .execute()
188
+ )
189
+ this_week_rows = this_week_resp.data or []
190
+
191
+ # -- ์ง€๋‚œ์ฃผ ๋ฉ”ํŠธ๋ฆญ ์กฐํšŒ (์œ ํšจํ•œ spot_id๋งŒ) --
192
+ last_week_resp = (
193
+ sb.table("spot_trends")
194
+ .select("spot_id, source, metric_type, metric_value")
195
+ .gte("period_end", last_week_start.isoformat())
196
+ .lt("period_end", this_week_start.isoformat())
197
+ .neq("spot_id", "__pending__")
198
+ .execute()
199
+ )
200
+ last_week_rows = last_week_resp.data or []
201
+
202
+ # -- ์ŠคํŒŸ๋ณ„ ๋ฉ”ํŠธ๋ฆญ ์ง‘๊ณ„ --
203
+ spots_this: dict[str, dict[str, dict[str, int]]] = {}
204
+ for row in this_week_rows:
205
+ sid = row["spot_id"]
206
+ src = row["source"]
207
+ mt = row["metric_type"]
208
+ spots_this.setdefault(sid, {}).setdefault(src, {})[mt] = row["metric_value"]
209
+
210
+ spots_last: dict[str, dict[str, dict[str, int]]] = {}
211
+ for row in last_week_rows:
212
+ sid = row["spot_id"]
213
+ src = row["source"]
214
+ mt = row["metric_type"]
215
+ spots_last.setdefault(sid, {}).setdefault(src, {})[mt] = row["metric_value"]
216
+
217
+ # -- ์ „์ฒด ์ตœ๋Œ€๊ฐ’ ๊ณ„์‚ฐ (์ •๊ทœํ™”์šฉ) --
218
+ maxes = _calc_maxes(spots_this)
219
+
220
+ # -- ์ŠคํŒŸ๋ณ„ ์ฑ„๋„ ์Šค์ฝ”์–ด + ์ข…ํ•ฉ ์Šค์ฝ”์–ด ๊ณ„์‚ฐ --
221
+ spot_scores: list[dict] = []
222
+
223
+ for spot_id, src_data in spots_this.items():
224
+ last_src = spots_last.get(spot_id, {})
225
+
226
+ # ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ
227
+ blog = src_data.get("naver_blog", {})
228
+ blog_last = last_src.get("naver_blog", {})
229
+ wm = blog.get("mention_count", 0)
230
+ wm_last = blog_last.get("mention_count", 0)
231
+ growth = (wm - wm_last) / max(wm_last, 1) if wm_last else 0.0
232
+ nb_score = calc_naver_blog_score(wm, maxes["naver_blog_mentions"], growth)
233
+
234
+ # ์นด์นด์˜ค๋งต (search_rank ํด๋ฐฑ ์ง€์›)
235
+ km = src_data.get("kakaomap", {})
236
+ km_last = last_src.get("kakaomap", {})
237
+ rc = km.get("review_count", 0)
238
+ rc_last = km_last.get("review_count", 0)
239
+ sr = km.get("search_rank", 0)
240
+ km_score = calc_kakaomap_score(
241
+ rc, maxes["kakaomap_reviews"],
242
+ rc - rc_last, maxes["kakaomap_review_growth"],
243
+ search_rank=sr, max_rank=maxes["kakaomap_max_rank"],
244
+ )
245
+
246
+ # ์ธ์Šคํƒ€๊ทธ๋žจ
247
+ ig = src_data.get("instagram", {})
248
+ ig_score = calc_instagram_score(
249
+ ig.get("post_count", 0), maxes["instagram_posts"],
250
+ ig.get("avg_engagement", 0), maxes["instagram_engagement"],
251
+ )
252
+
253
+ # ์œ ํŠœ๋ธŒ
254
+ yt = src_data.get("youtube", {})
255
+ yt_score = calc_youtube_score(
256
+ yt.get("video_count", 0), maxes["youtube_videos"],
257
+ yt.get("view_count", 0), maxes["youtube_views"],
258
+ )
259
+
260
+ # ๋„ค์ด๋ฒ„ ํ”Œ๋ ˆ์ด์Šค โ€” ๋น„ํ™œ์„ฑ (Place ID ๋งค์นญ ๋ถˆ๊ฐ€, 2026-02)
261
+ # np_score๋Š” ํ•ญ์ƒ None โ†’ CHANNEL_WEIGHTS์—์„œ๋„ ์ œ๊ฑฐ๋จ
262
+
263
+ channel_scores = {
264
+ "naver_blog": nb_score,
265
+ "kakaomap": km_score,
266
+ "instagram": ig_score,
267
+ "youtube": yt_score,
268
+ }
269
+ composite = calc_composite_score(channel_scores)
270
+
271
+ spot_scores.append({
272
+ "spot_id": spot_id,
273
+ "popularity_score": composite,
274
+ "naver_blog_score": nb_score,
275
+ "kakaomap_score": km_score,
276
+ "instagram_score": ig_score,
277
+ "youtube_score": yt_score,
278
+ "channel_scores": channel_scores,
279
+ })
280
+
281
+ # -- trend_spots ์—…๋ฐ์ดํŠธ --
282
+ for entry in spot_scores:
283
+ try:
284
+ sb.table("trend_spots").update({
285
+ "popularity_score": entry["popularity_score"],
286
+ "popularity_updated_at": now.isoformat(),
287
+ }).eq("id", entry["spot_id"]).execute()
288
+ except Exception as e:
289
+ logger.warning("trend_spots ์—…๋ฐ์ดํŠธ ์‹คํŒจ (%s): %s", entry["spot_id"], e)
290
+
291
+ # -- ๋žญํ‚น ์ƒ์„ฑ --
292
+ overall = sorted(spot_scores, key=lambda s: s["popularity_score"], reverse=True)
293
+
294
+ rankings = {
295
+ "overall": overall[:50],
296
+ "naver_blog": sorted(spot_scores, key=lambda s: s["naver_blog_score"], reverse=True)[:30],
297
+ "kakaomap": sorted(spot_scores, key=lambda s: s["kakaomap_score"], reverse=True)[:30],
298
+ "instagram": sorted(spot_scores, key=lambda s: s["instagram_score"], reverse=True)[:30],
299
+ "youtube": sorted(spot_scores, key=lambda s: s["youtube_score"], reverse=True)[:30],
300
+ "trending_up": overall[:10], # ์ „์ฃผ ๋Œ€๋น„ ๋น„๊ต๋Š” ๋‹ค์Œ ์ฃผ๊ธฐ๋ถ€ํ„ฐ ๊ฐ€๋Šฅ
301
+ }
302
+
303
+ logger.info("์ฃผ๊ฐ„ ๋žญํ‚น ์ƒ์„ฑ ์™„๋ฃŒ: %d๊ฐœ ์ŠคํŒŸ", len(spot_scores))
304
+ return rankings
305
+
306
+
307
+ # ------------------------------------------------------------------
308
+ # ๋‚ด๋ถ€ ํ—ฌํผ
309
+ # ------------------------------------------------------------------
310
+
311
+
312
+ def _calc_maxes(spots_data: dict[str, dict[str, dict[str, int]]]) -> dict[str, int]:
313
+ """์ „์ฒด ์ŠคํŒŸ์—์„œ ์ฑ„๋„๋ณ„ ์ตœ๋Œ€๊ฐ’ ๊ณ„์‚ฐ."""
314
+ maxes = {
315
+ "naver_blog_mentions": 1,
316
+ "kakaomap_reviews": 1,
317
+ "kakaomap_review_growth": 1,
318
+ "kakaomap_max_rank": 1,
319
+ "instagram_posts": 1,
320
+ "instagram_engagement": 1,
321
+ "youtube_videos": 1,
322
+ "youtube_views": 1,
323
+ }
324
+
325
+ for src_data in spots_data.values():
326
+ blog = src_data.get("naver_blog", {})
327
+ maxes["naver_blog_mentions"] = max(maxes["naver_blog_mentions"], blog.get("mention_count", 0))
328
+
329
+ km = src_data.get("kakaomap", {})
330
+ maxes["kakaomap_reviews"] = max(maxes["kakaomap_reviews"], km.get("review_count", 0))
331
+ maxes["kakaomap_max_rank"] = max(maxes["kakaomap_max_rank"], km.get("search_rank", 0))
332
+
333
+ ig = src_data.get("instagram", {})
334
+ maxes["instagram_posts"] = max(maxes["instagram_posts"], ig.get("post_count", 0))
335
+ # avg_engagement: -1 ๊ฐ’ ํ•„ํ„ฐ๋ง (Apify ๋ฏธ์ˆ˜์ง‘ ์ผ€์ด์Šค)
336
+ ig_eng = ig.get("avg_engagement", 0)
337
+ if ig_eng > 0:
338
+ maxes["instagram_engagement"] = max(maxes["instagram_engagement"], ig_eng)
339
+
340
+ yt = src_data.get("youtube", {})
341
+ maxes["youtube_videos"] = max(maxes["youtube_videos"], yt.get("video_count", 0))
342
+ maxes["youtube_views"] = max(maxes["youtube_views"], yt.get("view_count", 0))
343
+
344
+ return maxes