Spaces:
Running
Running
| """ | |
| RE:Play Trend Engine v3 โ ์ฃผ๊ฐ ๋ฐฐ์น ์ค์ผ์คํธ๋ ์ดํฐ | |
| ์์ฐจ ์คํ ํ์ดํ๋ผ์ธ: | |
| 1. ์นด์นด์ค๋งต ๊ทธ๋ฆฌ๋ ์ค์บ + ๋ฆฌ๋ทฐ ํ์ฑ (trend_spots ๋ง์คํฐ ์์ฑ) | |
| 2. SpotMatcher ์ด๊ธฐํ (trend_spots + story_spots ์ฌ์ ๋ก๋) | |
| 3. ์ ํ๋ธ API (SpotMatcher ์ฐ๋) | |
| 4. ์ธ์คํ๊ทธ๋จ ์ธํ๋ฃจ์ธ์ ๋ชจ๋ํฐ๋ง v5.0 (SpotMatcher ์ฐ๋) | |
| 5. ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์์ง (URL ํ๋ณด + ํฌ๋กค๋ง + DB ์ ์ฅ) | |
| 6. ๋ธ๋ก๊ทธ ๋ณธ๋ฌธ โ ์ฅ์๋ช ์ถ์ถ + mention_count ์ง๊ณ | |
| 7. ์ข ํฉ ์ค์ฝ์ด ๊ณ์ฐ + ๋ญํน ์์ฑ | |
| Usage: | |
| python backend/scripts/run_trend_engine.py | |
| """ | |
| import asyncio | |
| import json | |
| import logging | |
| import os | |
| import re | |
| import sys | |
| import time | |
| from datetime import date, timedelta | |
| # backend/ ๋๋ ํ ๋ฆฌ๋ฅผ import path์ ์ถ๊ฐ | |
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) | |
| # ๋ก์ปฌ ์คํ ์ .env ํ์ผ ๋ก๋ | |
| try: | |
| from dotenv import load_dotenv | |
| # ํ๋ก์ ํธ ๋ฃจํธ์ .env ํ์ผ ๋ก๋ | |
| env_path = os.path.join(os.path.dirname(__file__), "..", "..", ".env") | |
| load_dotenv(env_path) | |
| except ImportError: | |
| pass # GitHub Actions ๋ฑ dotenv ์๋ ํ๊ฒฝ | |
| from supabase import create_client | |
| from trend_engine.collectors.naver_blog import NaverBlogCollector | |
| from trend_engine.collectors.kakaomap import KakaoMapCollector | |
| from trend_engine.collectors.youtube import YouTubeCollector | |
| from trend_engine.collectors.instagram import InstagramCollector | |
| from trend_engine.collectors.naver_datalab import NaverDataLabCollector | |
| from trend_engine.spot_matcher import SpotMatcher | |
| from trend_engine.spot_registrar import TrendSpotRegistrar | |
| from trend_engine.trend_scorer import generate_weekly_ranking | |
| from trend_engine.place_extractor import PlaceNameExtractor | |
| from trend_engine.utils import get_week_period, cleanup_old_trends, safe_upsert_spot_trend | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", | |
| datefmt="%Y-%m-%d %H:%M:%S", | |
| ) | |
| logger = logging.getLogger("trend_engine.orchestrator") | |
| def _get_supabase_client(): | |
| url = os.environ.get("SUPABASE_URL") or os.environ.get("VITE_SUPABASE_URL") | |
| key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") | |
| if not url or not key: | |
| raise ValueError("SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY must be set") | |
| return create_client(url, key) | |
| def run_step(name: str, func, results: dict): | |
| """๋จ์ผ ํ์ดํ๋ผ์ธ ๋จ๊ณ๋ฅผ ์คํํ๊ณ ๊ฒฐ๊ณผ๋ฅผ ๊ธฐ๋กํ๋ค. ๋ฐํ๊ฐ์ ๋๋ ค์ค๋ค.""" | |
| logger.info("โโโ [START] %s โโโ", name) | |
| start = time.time() | |
| try: | |
| result = func() | |
| elapsed = time.time() - start | |
| results[name] = { | |
| "status": "ok", | |
| "result": _summarize(result), | |
| "result_raw": result if isinstance(result, dict) else None, | |
| "elapsed_sec": round(elapsed, 1), | |
| } | |
| logger.info("โ [DONE] %s โ %.1f์ด", name, elapsed) | |
| return result | |
| except Exception as e: | |
| elapsed = time.time() - start | |
| results[name] = {"status": "error", "error": str(e), "elapsed_sec": round(elapsed, 1)} | |
| logger.error("โ [FAIL] %s โ %s (%.1f์ด)", name, e, elapsed) | |
| return None | |
| def run_async_step(name: str, coro, results: dict): | |
| """asyncio ์ฝ๋ฃจํด์ ์คํํ๋ run_step ๋ณํ.""" | |
| def wrapper(): | |
| return asyncio.run(coro) | |
| return run_step(name, wrapper, results) | |
| def _summarize(result) -> str: | |
| """๊ฒฐ๊ณผ๋ฅผ ๋ก๊ทธ์ฉ ์์ฝ ๋ฌธ์์ด๋ก ๋ณํ.""" | |
| if isinstance(result, dict): | |
| return json.dumps(result, ensure_ascii=False, default=str)[:200] | |
| return str(result)[:200] | |
| def main() -> None: | |
| total_start = time.time() | |
| results: dict = {} | |
| sb = _get_supabase_client() | |
| # โโ 1. ์นด์นด์ค๋งต ๊ทธ๋ฆฌ๋ ์ค์บ (๋จผ์ ์คํ โ trend_spots ๋ง์คํฐ ์์ฑ) โโ | |
| kakao = KakaoMapCollector(sb) | |
| run_async_step("1_kakaomap", kakao.run(), results) | |
| # โโ 2. SpotMatcher + TrendSpotRegistrar ์ด๊ธฐํ โโ | |
| matcher = SpotMatcher(sb) | |
| registrar = TrendSpotRegistrar(sb) | |
| extractor = PlaceNameExtractor(sb) | |
| logger.info( | |
| "SpotMatcher ์ค๋น ์๋ฃ โ trend_spots %d๊ฑด, story_spots %d๊ฑด", | |
| len(matcher.trend_spots), | |
| len(matcher.story_spots), | |
| ) | |
| # โโ 3. ์ ํ๋ธ API (SpotMatcher + Registrar ์ฐ๋) โโ | |
| youtube = YouTubeCollector(sb, spot_matcher=matcher, spot_registrar=registrar) | |
| run_step("3_youtube", youtube.run, results) | |
| # โโ 4. ์ธ์คํ๊ทธ๋จ ์ธํ๋ฃจ์ธ์ ๋ชจ๋ํฐ๋ง v5.1 Multimodal (SpotMatcher + Registrar ์ฐ๋) โโ | |
| logger.info("์ธ์คํ๊ทธ๋จ: ์ธํ๋ฃจ์ธ์ ๋ชจ๋ํฐ๋ง v5.1 (Multimodal AI)") | |
| instagram = InstagramCollector(sb, spot_matcher=matcher, spot_registrar=registrar) | |
| run_step("4_instagram_influencer", instagram.run, results) | |
| # โโ 5. ๋ค์ด๋ฒ ํ๋ ์ด์ค โ ๋นํ์ฑ (Place ID ๋งค์นญ ๋ถ๊ฐ) โโ | |
| logger.info("๋ค์ด๋ฒ ํ๋ ์ด์ค: ๋นํ์ฑ (Place ID ๋งค์นญ ๋ถ๊ฐ, 2026-02)") | |
| results["5_naver_place"] = { | |
| "status": "skipped", | |
| "reason": "Place ID matching unavailable", | |
| "elapsed_sec": 0, | |
| } | |
| # โโ 5b. ๋ค์ด๋ฒ DataLab ๊ฒ์ ํธ๋ ๋ โ ๋นํ์ฑ โโ | |
| # trend_scorer.py CHANNEL_WEIGHTS์์ ๋นํ์ฑํ๋จ (๋ฐ์ดํฐ ์์ง ์์ ํ ํ์) | |
| # ์ค์ฝ์ด๋ง์ ๋ฏธ์ฌ์ฉ๋๋ฏ๋ก ์์ง๋ ์คํต โ API ์ฟผํฐ ์ ์ฝ | |
| logger.info("๋ค์ด๋ฒ DataLab: ๋นํ์ฑ (์ค์ฝ์ด๋ง ๋ฏธ์ฌ์ฉ, API ์ฟผํฐ ์ ์ฝ)") | |
| results["5b_naver_datalab"] = { | |
| "status": "skipped", | |
| "reason": "Disabled in CHANNEL_WEIGHTS โ re-enable in trend_scorer.py first", | |
| "elapsed_sec": 0, | |
| } | |
| # โโ 6. ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์์ง (URL + ๋ณ๋ ฌ ํฌ๋กค๋ง + ์ ์ฅ) โโ | |
| blog = NaverBlogCollector(sb) | |
| run_step("6_naver_blog", blog.run, results) | |
| # โโ 7. ๋ธ๋ก๊ทธ ๋ณธ๋ฌธ โ ์ฅ์๋ช ์ถ์ถ + mention_count ์ง๊ณ โโ | |
| def extract_blog_places(): | |
| """๋ธ๋ก๊ทธ ํฌ์คํธ์์ ์ฅ์๋ช ์ถ์ถ โ mention_count ์ง๊ณ โ spot_trends ์ ์ฅ. | |
| S6: get_week_period()๋ก period ํ์คํ. | |
| S8: upsert๋ก ์ฌ์คํ ์ ์ค๋ณต ๋ฐฉ์ง. | |
| """ | |
| period_start, period_end = get_week_period() | |
| # spot_trends์์ naver_blog + __pending__ ๋ ์ฝ๋ ์กฐํ (ํ์ด์ง๋ค์ด์ ) | |
| records = [] | |
| page_size = 1000 | |
| offset = 0 | |
| try: | |
| while True: | |
| batch = ( | |
| sb.table("spot_trends") | |
| .select("id, raw_data") | |
| .eq("source", "naver_blog") | |
| .eq("spot_id", "__pending__") | |
| .range(offset, offset + page_size - 1) | |
| .execute() | |
| ) | |
| rows = batch.data or [] | |
| records.extend(rows) | |
| if len(rows) < page_size: | |
| break | |
| offset += page_size | |
| except Exception as e: | |
| logger.warning("๋ธ๋ก๊ทธ pending ๋ ์ฝ๋ ์กฐํ ์คํจ: %s", e) | |
| return {"error": str(e)} | |
| logger.info("๋ธ๋ก๊ทธ pending ๋ ์ฝ๋: %d๊ฑด ์กฐํ", len(records)) | |
| if not records: | |
| return {"pending_records": 0, "places_found": 0} | |
| # ์ฅ์๋ณ ์ธ๊ธ ํ์ ์ง๊ณ | |
| place_mentions: dict[str, int] = {} | |
| for record in records: | |
| raw = record.get("raw_data", {}) | |
| content = raw.get("content_preview", "") | |
| title = raw.get("title", "") | |
| text = f"{title} {content}" | |
| text = re.sub(r"<[^>]+>", "", text) # HTML ํ๊ทธ ์ ๊ฑฐ | |
| places = extractor.extract(text) | |
| for place in places: | |
| matched_id = matcher.match(place["name"]) | |
| # ๊ธฐ์กด DB ๋งค์นญ ์คํจ โ Registrar๋ก ์ ์ฅ์ ๋ฑ๋ก | |
| if not matched_id: | |
| matched_id = registrar.register(place["name"], "naver_blog") | |
| if matched_id: | |
| place_mentions[matched_id] = place_mentions.get(matched_id, 0) + 1 | |
| # ์ง๊ณ ๊ฒฐ๊ณผ๋ฅผ spot_trends์ ์ ์ฅ (safe_upsert๋ก partial index ํธํ) | |
| saved = 0 | |
| for spot_id, count in place_mentions.items(): | |
| try: | |
| safe_upsert_spot_trend(sb, { | |
| "spot_id": spot_id, | |
| "source": "naver_blog", | |
| "metric_type": "mention_count", | |
| "metric_value": count, | |
| "period_start": period_start.isoformat(), | |
| "period_end": period_end.isoformat(), | |
| "raw_data": {"aggregated_from": "blog_post_extraction"}, | |
| }) | |
| saved += 1 | |
| except Exception as e: | |
| logger.warning("mention_count ์ ์ฅ ์คํจ (%s): %s", spot_id, e) | |
| return { | |
| "pending_records": len(records), | |
| "places_found": len(place_mentions), | |
| "mention_records_saved": saved, | |
| } | |
| run_step("7_blog_place_extraction", extract_blog_places, results) | |
| # โโ 7a. SpotMatcher + Registrar ํต๊ณ ์ถ๋ ฅ โโ | |
| match_stats = matcher.log_stats() | |
| registrar_stats = registrar.log_stats() | |
| results["7a_match_stats"] = {"status": "ok", "result": str(match_stats), "elapsed_sec": 0} | |
| results["7a_registrar_stats"] = {"status": "ok", "result": str(registrar_stats), "elapsed_sec": 0} | |
| # โโ 7b. __pending__ ๋ธ๋ก๊ทธ ์๋ณธ ๋ ์ฝ๋ ์ ๋ฆฌ (์ด์ 8) โโ | |
| def cleanup_pending(): | |
| # ์์ ์ฅ์น: ์ถ์ถ ์ฑ๊ณต ์์๋ง pending ์ญ์ (๋ฐ์ดํฐ ์์ค ๋ฐฉ์ง) | |
| extraction_result = results.get("7_blog_place_extraction", {}) | |
| extraction_ok = extraction_result.get("status") == "ok" | |
| raw = extraction_result.get("result_raw") or {} | |
| places_found = raw.get("places_found", 0) if isinstance(raw, dict) else 0 | |
| pending_records = raw.get("pending_records", 0) if isinstance(raw, dict) else 0 | |
| if not extraction_ok: | |
| logger.warning( | |
| "__pending__ ์ ๋ฆฌ ์คํต: ์ฅ์๋ช ์ถ์ถ ๋จ๊ณ๊ฐ ์คํจํ์ผ๋ฏ๋ก ์๋ณธ ๋ณด์กด" | |
| ) | |
| return {"skipped": True, "reason": "extraction_failed"} | |
| if pending_records > 0 and places_found == 0: | |
| logger.warning( | |
| "__pending__ ์ ๋ฆฌ ์คํต: %d๊ฑด์ pending ๋ ์ฝ๋์์ ์ฅ์๋ฅผ 0๊ฑด ์ถ์ถ โ ์๋ณธ ๋ณด์กด", | |
| pending_records, | |
| ) | |
| return {"skipped": True, "reason": "zero_extraction", "pending_records": pending_records} | |
| try: | |
| result = ( | |
| sb.table("spot_trends") | |
| .delete() | |
| .eq("source", "naver_blog") | |
| .eq("metric_type", "blog_post") | |
| .eq("spot_id", "__pending__") | |
| .execute() | |
| ) | |
| deleted = len(result.data) if result.data else 0 | |
| logger.info("__pending__ ๋ธ๋ก๊ทธ ๋ ์ฝ๋ %d๊ฑด ์ญ์ (์ถ์ถ %d๊ฑด ์ฑ๊ณต)", deleted, places_found) | |
| return {"deleted_pending": deleted, "places_found": places_found} | |
| except Exception as e: | |
| logger.warning("__pending__ ์ ๋ฆฌ ์คํจ: %s", e) | |
| return {"error": str(e)} | |
| run_step("7b_cleanup_pending", cleanup_pending, results) | |
| # โโ 8. ์ข ํฉ ์ค์ฝ์ด ๊ณ์ฐ + ๋ญํน ์์ฑ (์ต์ 2์ฑ๋ ์ฑ๊ณต ์) โโ | |
| # ์์ง ์ฑ๋ ๋จ๊ณ๋ง ์นด์ดํธ (1, 3, 4, 6) | |
| collection_steps = ["1_kakaomap", "3_youtube", "4_instagram_influencer", "6_naver_blog"] | |
| successful_channels = [s for s in collection_steps if results.get(s, {}).get("status") == "ok"] | |
| def calc_scores(): | |
| if len(successful_channels) < 2: | |
| logger.error( | |
| "์์ง ์ฑ๊ณต ์ฑ๋ %d๊ฐ (์ต์ 2๊ฐ ํ์). ์ค์ฝ์ด๋ง ์คํต: %s", | |
| len(successful_channels), successful_channels, | |
| ) | |
| return {"skipped": True, "reason": f"Only {len(successful_channels)} channels succeeded"} | |
| logger.info("์ค์ฝ์ด๋ง ์คํ: %d๊ฐ ์ฑ๋ ์ฑ๊ณต โ %s", len(successful_channels), successful_channels) | |
| return generate_weekly_ranking(sb) | |
| run_step("8_score_calculation", calc_scores, results) | |
| # โโ 9. S6: ์ค๋๋ ๋ฐ์ดํฐ ์ ๋ฆฌ (12์ฃผ ์ด์) โโ | |
| def retention_cleanup(): | |
| deleted = cleanup_old_trends(sb, retention_weeks=12) | |
| return {"deleted_records": deleted} | |
| run_step("9_retention_cleanup", retention_cleanup, results) | |
| # โโ 9b. trend_spots โ story_spots ์ฃผ๊ฐ ๋ง์ด๊ทธ๋ ์ด์ (7์ผ ์ด์ ๋ ์คํ) โโ | |
| def migrate_trend_to_story(): | |
| """1์ฃผ ์ด์ ๋ trend_spots๋ฅผ story_spots๋ก ์ด๊ดํ๋ค. | |
| SpotMatcher๋ก ๊ธฐ์กด story_spots ์ค๋ณต ํ์ธ ํ, ๋ฏธ์ค๋ณต ์ ์ ๊ท ์์ฑ. | |
| """ | |
| from datetime import datetime as dt, timedelta as td, timezone as tz | |
| cutoff = (dt.now(tz.utc) - td(days=7)).isoformat() | |
| try: | |
| resp = ( | |
| sb.table("trend_spots") | |
| .select("id, name, category, lat, lng, address") | |
| .lt("trending_since", cutoff) | |
| .eq("is_course_eligible", False) | |
| .limit(100) | |
| .execute() | |
| ) | |
| except Exception as e: | |
| logger.warning("trend_spots ๋ง์ด๊ทธ๋ ์ด์ ์กฐํ ์คํจ: %s", e) | |
| return {"error": str(e)} | |
| candidates = resp.data or [] | |
| if not candidates: | |
| return {"candidates": 0, "promoted": 0} | |
| promoted = 0 | |
| for ts in candidates: | |
| name = ts.get("name", "") | |
| # SpotMatcher๋ก ๊ธฐ์กด story_spots ์ค๋ณต ํ์ธ | |
| existing = matcher.match(name) | |
| if existing: | |
| # ์ด๋ฏธ story_spots์ ์์ผ๋ฏ๋ก is_course_eligible๋ง ๋งํน | |
| try: | |
| sb.table("trend_spots").update( | |
| {"is_course_eligible": True} | |
| ).eq("id", ts["id"]).execute() | |
| except Exception: | |
| pass | |
| continue | |
| # ์ ๊ท story_spots ์ํธ๋ฆฌ ์์ฑ | |
| story_id = f"promoted_{ts['id']}" | |
| try: | |
| sb.table("story_spots").upsert({ | |
| "id": story_id, | |
| "name": name, | |
| "category": ts.get("category", "etc"), | |
| "lat": ts.get("lat"), | |
| "lng": ts.get("lng"), | |
| "address": ts.get("address", ""), | |
| "status": "active", | |
| "priority_score": 5, | |
| }, on_conflict="id").execute() | |
| sb.table("trend_spots").update( | |
| {"is_course_eligible": True} | |
| ).eq("id", ts["id"]).execute() | |
| promoted += 1 | |
| except Exception as e: | |
| logger.warning("story_spots ํ๋ก๋ชจ์ ์คํจ (%s): %s", name, e) | |
| logger.info("trendโstory ๋ง์ด๊ทธ๋ ์ด์ : %d๊ฑด ํ๋ณด, %d๊ฑด ํ๋ก๋ชจ์ ", len(candidates), promoted) | |
| return {"candidates": len(candidates), "promoted": promoted} | |
| run_step("9b_trend_migration", migrate_trend_to_story, results) | |
| # โโ ๊ฒฐ๊ณผ ์์ฝ โโ | |
| total_elapsed = time.time() - total_start | |
| ok_count = sum(1 for r in results.values() if r.get("status") == "ok") | |
| err_count = sum(1 for r in results.values() if r.get("status") == "error") | |
| skip_count = sum(1 for r in results.values() if r.get("status") == "skipped") | |
| summary = { | |
| "total_steps": len(results), | |
| "succeeded": ok_count, | |
| "failed": err_count, | |
| "skipped": skip_count, | |
| "total_elapsed_sec": round(total_elapsed, 1), | |
| "steps": { | |
| k: {"status": v.get("status"), "elapsed_sec": v.get("elapsed_sec", 0)} | |
| for k, v in results.items() | |
| }, | |
| } | |
| logger.info("โโโ TREND ENGINE COMPLETE โโโ") | |
| logger.info( | |
| "์ฑ๊ณต: %d / ์คํจ: %d / ์คํต: %d / ์ด ์์: %.1f์ด", | |
| ok_count, err_count, skip_count, total_elapsed, | |
| ) | |
| # JSON ์์ฝ ์ถ๋ ฅ (GitHub Actions ๋ก๊ทธ์ฉ) | |
| print(json.dumps(summary, ensure_ascii=False, indent=2)) | |
| # ์ ์ฒด ์คํจ ์์๋ง ๋น์ ์ ์ข ๋ฃ | |
| if ok_count == 0: | |
| logger.error("๋ชจ๋ ๋จ๊ณ๊ฐ ์คํจํ์ต๋๋ค.") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() | |