Spaces:
Paused
Paused
| import os, json, asyncio, time | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| from collections import Counter, defaultdict | |
| from fastapi import APIRouter, Request, BackgroundTasks | |
| from fastapi.responses import JSONResponse, HTMLResponse, FileResponse | |
| from shared import ADMIN_EMAIL, HF_TOKEN, SA_DB_PATH, _db_read, _db_read_one, _db_write, _db_write_sync | |
| import aiosqlite | |
| router = APIRouter() | |
| async def analytics_collect(request: Request, background_tasks: BackgroundTasks): | |
| import time as _t | |
| try: | |
| body = await request.json() | |
| event = (body.get("event") or "")[:50] | |
| if not event: | |
| return JSONResponse({"ok": False}, status_code=400) | |
| background_tasks.add_task(_analytics_bg_write, body, _t.time()) | |
| return {"ok": True} | |
| except Exception as e: | |
| return JSONResponse({"ok": False}, status_code=500) | |
| async def _chat_log_bg(email, url_hash, ref, user_msg, feat, ts): | |
| try: | |
| async with aiosqlite.connect(SA_DB_PATH, timeout=10.0) as db: | |
| await db.execute("PRAGMA journal_mode=WAL") | |
| if user_msg: | |
| await db.execute("INSERT INTO user_inputs (email,url_hash,url,input_type,input_text,feature,created_at) VALUES (?,?,?,?,?,?,?)", | |
| (email, url_hash, ref[:500], "chat", user_msg, feat, ts)) | |
| print(f"[log-bg] ✅ {email}/{feat}/{user_msg[:60]}") | |
| await db.execute("INSERT INTO feature_usage (email,feature,use_count,last_used_at) VALUES (?,?,1,?) ON CONFLICT(email,feature) DO UPDATE SET use_count=use_count+1, last_used_at=?", | |
| (email, feat, ts, ts)) | |
| await db.commit() | |
| except Exception as e: | |
| print(f"[log-bg] {e}") | |
| async def _analytics_bg_write(body, now): | |
| try: | |
| email = (body.get("email") or "").strip().lower() | |
| event = (body.get("event") or "")[:50] | |
| async with aiosqlite.connect(SA_DB_PATH, timeout=10.0) as db: | |
| await db.execute("PRAGMA journal_mode=WAL") | |
| await db.execute("""INSERT INTO analytics ( | |
| email,event,feature,domain,url,provider,client,model, | |
| mode,engine,response_ms,tokens_used,success,error_msg, | |
| screen,os,browser,language,timezone,referrer,country, | |
| session_id,meta,created_at | |
| ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", ( | |
| email, event, | |
| (body.get("feature") or "")[:50], | |
| (body.get("domain") or "")[:200], | |
| (body.get("url") or "")[:500], | |
| (body.get("provider") or "demo")[:20], | |
| (body.get("client") or "")[:20], | |
| (body.get("model") or "")[:50], | |
| (body.get("mode") or "")[:20], | |
| (body.get("engine") or "")[:20], | |
| int(body.get("response_ms") or 0), | |
| int(body.get("tokens_used") or 0), | |
| 1 if body.get("success", True) else 0, | |
| (body.get("error_msg") or "")[:200], | |
| (body.get("screen") or "")[:20], | |
| (body.get("os") or "")[:30], | |
| (body.get("browser") or "")[:50], | |
| (body.get("language") or "")[:10], | |
| (body.get("timezone") or "")[:40], | |
| (body.get("referrer") or "")[:500], | |
| (body.get("country") or "")[:10], | |
| (body.get("session_id") or "")[:30], | |
| (body.get("meta") or "")[:500], | |
| now, | |
| )) | |
| if email: | |
| updates = [] | |
| params = [] | |
| for field, col, maxlen in [("browser","last_browser",50),("os","last_os",30),("screen","last_screen",20),("language","last_language",10),("timezone","last_timezone",40),("country","last_country",10),("provider","last_provider",20),("client","last_client",20)]: | |
| val = (body.get(field) or "")[:maxlen] | |
| if val: updates.append(f"{col}=?"); params.append(val) | |
| ref = (body.get("referrer") or "")[:500] | |
| if ref: updates.append("first_referrer=CASE WHEN first_referrer='' OR first_referrer IS NULL THEN ? ELSE first_referrer END"); params.append(ref) | |
| if event in ("ai_response","marl_response","imagine_response"): | |
| updates.append("total_ai_calls=total_ai_calls+1") | |
| if not body.get("success", True): updates.append("total_ai_errors=total_ai_errors+1") | |
| if updates: | |
| params.append(email) | |
| await db.execute(f"UPDATE users SET {', '.join(updates)} WHERE email=?", tuple(params)) | |
| await db.commit() | |
| except Exception as e: | |
| print(f"[analytics-bg] {e}") | |
| async def admin_analytics(admin: str = ""): | |
| if admin != ADMIN_EMAIL: | |
| return JSONResponse({"error": "unauthorized"}, status_code=403) | |
| import time as _t | |
| now = _t.time() | |
| day_ago = now - 86400 | |
| week_ago = now - 604800 | |
| today_cnt = await _db_read_one("SELECT COUNT(*) as cnt FROM analytics WHERE created_at>?", (day_ago,)) | |
| week_cnt = await _db_read_one("SELECT COUNT(*) as cnt FROM analytics WHERE created_at>?", (week_ago,)) | |
| events_by_type = await _db_read("SELECT event, COUNT(*) as cnt FROM analytics WHERE created_at>? GROUP BY event ORDER BY cnt DESC LIMIT 20", (week_ago,)) | |
| features_week = await _db_read("SELECT feature, COUNT(*) as cnt FROM analytics WHERE feature!='' AND created_at>? GROUP BY feature ORDER BY cnt DESC LIMIT 20", (week_ago,)) | |
| clients = await _db_read("SELECT client, COUNT(*) as cnt FROM analytics WHERE client!='' AND created_at>? GROUP BY client ORDER BY cnt DESC", (week_ago,)) | |
| providers = await _db_read("SELECT provider, COUNT(*) as cnt FROM analytics WHERE provider!='' AND created_at>? GROUP BY provider ORDER BY cnt DESC", (week_ago,)) | |
| ai_perf = await _db_read_one("SELECT AVG(response_ms) as avg_ms, COUNT(*) as cnt, SUM(CASE WHEN success=0 THEN 1 ELSE 0 END) as errors FROM analytics WHERE event IN ('ai_response','marl_response') AND created_at>?", (week_ago,)) | |
| os_stats = await _db_read("SELECT os, COUNT(*) as cnt FROM analytics WHERE os!='' AND created_at>? GROUP BY os ORDER BY cnt DESC LIMIT 10", (week_ago,)) | |
| browser_stats = await _db_read("SELECT browser, COUNT(*) as cnt FROM analytics WHERE browser!='' AND created_at>? GROUP BY browser ORDER BY cnt DESC LIMIT 10", (week_ago,)) | |
| engine_stats = await _db_read("SELECT engine, COUNT(*) as cnt FROM analytics WHERE engine!='' AND created_at>? GROUP BY engine ORDER BY cnt DESC LIMIT 10", (week_ago,)) | |
| return { | |
| "today_events": dict(today_cnt)["cnt"] if today_cnt else 0, | |
| "week_events": dict(week_cnt)["cnt"] if week_cnt else 0, | |
| "events_by_type": [dict(e) for e in events_by_type], | |
| "features_week": [dict(f) for f in features_week], | |
| "clients": [dict(c) for c in clients], | |
| "providers": [dict(p) for p in providers], | |
| "ai_performance": dict(ai_perf) if ai_perf else {}, | |
| "os_stats": [dict(o) for o in os_stats], | |
| "browser_stats": [dict(b) for b in browser_stats], | |
| "engine_stats": [dict(e) for e in engine_stats], | |
| } | |
| async def admin_delete_user(email: str, request: Request): | |
| admin_email = request.headers.get("X-Admin-Email", "") | |
| if admin_email != ADMIN_EMAIL: | |
| return JSONResponse({"error": "unauthorized"}, status_code=403) | |
| await _db_write_sync("DELETE FROM user_inputs WHERE email=?", (email,)) | |
| await _db_write_sync("DELETE FROM page_visits WHERE email=?", (email,)) | |
| await _db_write_sync("DELETE FROM feature_usage WHERE email=?", (email,)) | |
| await _db_write_sync("DELETE FROM users WHERE email=?", (email,)) | |
| return {"status": "deleted", "email": email} | |
| async def admin_delete_record(table: str, record_id: int, request: Request): | |
| admin_email = request.headers.get("X-Admin-Email", "") | |
| if admin_email != ADMIN_EMAIL: | |
| return JSONResponse({"error": "unauthorized"}, status_code=403) | |
| allowed = {"users","page_visits","user_inputs","feature_usage"} | |
| if table not in allowed: | |
| return JSONResponse({"error": f"table not allowed: {table}"}, status_code=400) | |
| await _db_write_sync(f"DELETE FROM {table} WHERE id=?", (record_id,)) | |
| return {"status": "deleted", "table": table, "id": record_id} | |
| async def admin_dashboard(request: Request): | |
| admin_email = request.query_params.get("admin", "") | |
| if admin_email != ADMIN_EMAIL: | |
| return JSONResponse({"error": "unauthorized"}, status_code=403) | |
| users = await _db_read("SELECT COUNT(*) as cnt FROM users") | |
| visits = await _db_read("SELECT COUNT(*) as cnt FROM page_visits") | |
| inputs = await _db_read("SELECT COUNT(*) as cnt FROM user_inputs") | |
| top_domains = await _db_read("SELECT domain, SUM(visit_count) as cnt FROM page_visits GROUP BY domain ORDER BY cnt DESC LIMIT 20") | |
| top_features = await _db_read("SELECT feature, SUM(use_count) as cnt FROM feature_usage GROUP BY feature ORDER BY cnt DESC") | |
| recent_users = await _db_read("SELECT email, nickname, role, visit_count, last_seen_at FROM users ORDER BY last_seen_at DESC LIMIT 20") | |
| return { | |
| "total_users": dict(users[0])["cnt"], | |
| "total_visits": dict(visits[0])["cnt"], | |
| "total_inputs": dict(inputs[0])["cnt"], | |
| "top_domains": [dict(d) for d in top_domains], | |
| "top_features": [dict(f) for f in top_features], | |
| "recent_users": [dict(u) for u in recent_users], | |
| } | |
| async def privacy_policy(): | |
| return HTMLResponse("""<!DOCTYPE html> | |
| <html lang="ko"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1"> | |
| <title>SiteAgent Privacy Policy</title> | |
| <style>*{margin:0;padding:0;box-sizing:border-box;}body{font-family:-apple-system,sans-serif;background:#fafafa;color:#333;line-height:1.8;} | |
| .w{max-width:720px;margin:0 auto;padding:40px 20px 60px;}h1{font-size:24px;font-weight:800;color:#1a1a2e;margin-bottom:8px;} | |
| .sub{font-size:13px;color:#888;margin-bottom:32px;}h2{font-size:16px;font-weight:700;color:#5856D6;margin:28px 0 10px;padding-bottom:6px;border-bottom:2px solid rgba(88,86,214,.1);} | |
| p{margin-bottom:12px;font-size:14px;}ul{margin:8px 0 12px 20px;font-size:14px;}li{margin-bottom:4px;} | |
| .ft{margin-top:40px;padding-top:20px;border-top:1px solid #eee;font-size:12px;color:#999;text-align:center;}a{color:#5856D6;}b{color:#1a1a2e;}</style> | |
| </head><body><div class="w"> | |
| <h1>SiteAgent 개인정보처리방침</h1> | |
| <p class="sub">Privacy Policy | Last Updated: March 16, 2026</p> | |
| <h2>1. 수집하는 정보</h2> | |
| <ul><li><b>Google 이메일 주소</b>: 사용자 식별 및 개인화 서비스 (Chrome Identity API)</li> | |
| <li><b>방문 URL 및 페이지 제목</b>: AI 분석 기능 제공</li> | |
| <li><b>사용자 입력 텍스트</b>: AI 채팅, 검색, 분석 요청</li> | |
| <li><b>기능 사용 통계</b>: 서비스 개선용</li></ul> | |
| <h2>2. 이용 목적</h2> | |
| <ul><li>AI 기반 웹 분석, 번역, 검색, 이미지 생성 서비스 제공</li> | |
| <li>사용자별 개인화 경험 제공</li><li>서비스 품질 개선</li></ul> | |
| <h2>3. 저장 및 보관</h2> | |
| <ul><li>사용자 설정: 브라우저 로컬 스토리지 (chrome.storage.local)</li> | |
| <li>활동 로그: Hugging Face 서버 저장</li> | |
| <li>API 키: 로컬에만 저장, 외부 전송 없음</li></ul> | |
| <h2>4. 제3자 제공</h2> | |
| <p>개인정보를 <b>제3자에게 판매/공유하지 않습니다</b>. AI 처리를 위해 Groq API, fal.ai API와 통신합니다.</p> | |
| <h2>5. 사용자 권리</h2> | |
| <ul><li>확장 제거 시 로컬 데이터 자동 삭제</li> | |
| <li>서버 데이터 삭제: arxivgpt@gmail.com으로 요청</li></ul> | |
| <h2>6. 보안</h2><p>HTTPS 암호화 통신, Hugging Face 보안 인프라 사용.</p> | |
| <h2>7. 아동 보호</h2><p>13세 미만 아동의 정보를 고의로 수집하지 않습니다.</p> | |
| <h2>8. 문의</h2><p>Email: <a href="mailto:arxivgpt@gmail.com">arxivgpt@gmail.com</a><br>Ginigen AI | Seoul, South Korea</p> | |
| <hr style="margin:24px 0;border:none;border-top:1px solid #eee;"> | |
| <h2>English Summary</h2> | |
| <p>SiteAgent collects Google email (via Chrome Identity API), visited URLs, user input text, and usage statistics for AI service delivery. We do NOT sell or share personal data. Data is transmitted to Groq API and fal.ai solely for AI processing. Uninstalling removes all local data. Server data deletion: arxivgpt@gmail.com</p> | |
| <div class="ft">© 2026 Ginigen AI. <a href="https://ginigen-ai-siteagent.hf.space">ginigen-ai-siteagent.hf.space</a></div> | |
| </div></body></html>""") | |
| DOMAIN_CATEGORIES = { | |
| "shopping": ["coupang.com","gmarket.co.kr","11st.co.kr","auction.co.kr","ssg.com","musinsa.com","kurly.com","oliveyoung.co.kr","amazon.com","ebay.com","aliexpress.com","temu.com","zigzag.kr","ably.co.kr","brandi.co.kr","29cm.co.kr","wconcept.com"], | |
| "food_delivery": ["baemin.com","yogiyo.co.kr","coupangeats.com","ddangyo.com"], | |
| "news": ["naver.com","daum.net","chosun.com","donga.com","joongang.co.kr","hankyung.com","mk.co.kr","sedaily.com","hani.co.kr","yna.co.kr","yonhapnews.co.kr","bbc.com","cnn.com","reuters.com","nytimes.com"], | |
| "tech": ["github.com","stackoverflow.com","medium.com","dev.to","velog.io","tistory.com","brunch.co.kr","huggingface.co","arxiv.org","kaggle.com","producthunt.com","techcrunch.com"], | |
| "finance": ["finance.naver.com","investing.com","tradingview.com","coinmarketcap.com","upbit.com","bithumb.com","kbstar.com","shinhan.com","wooribank.com","toss.im","kakaopay.com","samsung.com/sec"], | |
| "entertainment": ["youtube.com","netflix.com","watcha.com","tving.com","wavve.com","twitch.tv","afreecatv.com","spotify.com","melon.com","flo.com","webtoon.com","series.naver.com"], | |
| "social": ["instagram.com","twitter.com","x.com","facebook.com","threads.net","reddit.com","linkedin.com","kakaotalk.com","band.us"], | |
| "travel": ["booking.com","agoda.com","hotels.com","airbnb.com","skyscanner.co.kr","trip.com","yanolja.com","goodchoice.kr","klook.com","myrealtrip.com"], | |
| "education": ["coursera.org","udemy.com","edx.org","inflearn.com","class101.net","fastcampus.co.kr","coloso.co.kr","megastudy.net","etoos.com"], | |
| "realestate": ["zigbang.com","dabangapp.com","peterpanz.com","land.naver.com","hogangnono.com"], | |
| "health": ["hidoc.co.kr","gangnam.go.kr","health.kr","pilates.com","yoga.com"], | |
| "job": ["jobkorea.co.kr","saramin.co.kr","wanted.co.kr","rocketpunch.com","linkedin.com/jobs","programmers.co.kr"], | |
| } | |
| FEATURE_PERSONA = { | |
| "요약": "efficiency_seeker", | |
| "분석": "analytical_mind", | |
| "번역": "multilingual_user", | |
| "검색": "info_hunter", | |
| "딥분석": "deep_thinker", | |
| "아이디어": "creative_innovator", | |
| "금융": "investor", | |
| "XRay": "tech_specialist", | |
| "날씨": "planner", | |
| "타이머": "productivity_focused", | |
| "image_gen": "visual_creator", | |
| "image_edit": "visual_creator", | |
| "chat": "conversationalist", | |
| } | |
| def _infer_user_profile(domains, features, inputs, analytics_events): | |
| interest_scores = {} | |
| for d in domains: | |
| domain = d.get("domain", "") | |
| cnt = d.get("cnt", 0) or d.get("count", 0) | |
| for cat, cat_domains in DOMAIN_CATEGORIES.items(): | |
| if any(cd in domain for cd in cat_domains): | |
| interest_scores[cat] = interest_scores.get(cat, 0) + cnt | |
| persona_scores = {} | |
| for f in features: | |
| feat = f.get("feature", "") | |
| cnt = f.get("cnt", 0) or f.get("count", 0) | |
| for key, persona in FEATURE_PERSONA.items(): | |
| if key in feat: | |
| persona_scores[persona] = persona_scores.get(persona, 0) + cnt | |
| keywords = {} | |
| stopwords = {"현재","페이지","내용","요약","분석","번역","해줘","해주세요","알려줘","뭐야","어떻게", | |
| "무엇","있나","어디","검색","관련","최신","정보","사이트","보여줘","좀","것","수","이", | |
| "그","저","더","를","을","에","의","가","는","은","도","로","와","과","한","된","하는", | |
| "the","what","how","is","a","an","to","and","of","for","in","on","this","that", | |
| "about","can","you","me","please","tell","show","find","get"} | |
| for inp in inputs: | |
| text = inp.get("input_text", "") or "" | |
| words = [w.strip().lower() for w in text.replace(",","").replace(".","").replace("?","").replace("!","").split() if len(w.strip()) >= 2] | |
| for w in words: | |
| if w not in stopwords and len(w) <= 20: | |
| keywords[w] = keywords.get(w, 0) + 1 | |
| hour_dist = [0] * 24 | |
| for ev in analytics_events: | |
| ts = ev.get("created_at", 0) | |
| if ts: | |
| from datetime import datetime, timezone, timedelta | |
| try: | |
| dt = datetime.fromtimestamp(ts, tz=timezone(timedelta(hours=9))) | |
| hour_dist[dt.hour] += 1 | |
| except: pass | |
| peak_hours = sorted(range(24), key=lambda h: hour_dist[h], reverse=True)[:3] | |
| if hour_dist[peak_hours[0]] > 0: | |
| if peak_hours[0] < 6: time_persona = "night_owl" | |
| elif peak_hours[0] < 12: time_persona = "morning_person" | |
| elif peak_hours[0] < 18: time_persona = "daytime_worker" | |
| else: time_persona = "evening_user" | |
| else: | |
| time_persona = "unknown" | |
| INTENT_PATTERNS = { | |
| "purchase_intent": {"keywords":["가격","비교","할인","쿠폰","최저가","구매","사다","주문","배송","무료배송","세일","가성비","추천","리뷰","후기","평점","별점","cheapest","buy","order","price","deal","discount","coupon","review"],"label":"구매 의도","cta_type":"shopping","emoji":"🛒"}, | |
| "food_intent": {"keywords":["맛집","배달","치킨","피자","족발","떡볶이","초밥","카페","커피","점심","저녁","식당","메뉴","음식","recipe","food","restaurant","delivery","lunch","dinner"],"label":"음식/배달","cta_type":"food","emoji":"🍕"}, | |
| "investment_intent": {"keywords":["주식","코인","비트코인","이더리움","투자","수익률","배당","etf","매수","매도","차트","시세","종목","펀드","금리","환율","stock","crypto","invest","trading","portfolio","dividend"],"label":"투자/금융","cta_type":"finance","emoji":"📈"}, | |
| "job_intent": {"keywords":["이력서","자소서","면접","채용","연봉","취업","이직","경력","포트폴리오","지원","합격","resume","interview","salary","career","hiring","job"],"label":"취업/이직","cta_type":"job","emoji":"💼"}, | |
| "learning_intent": {"keywords":["배우다","공부","강의","튜토리얼","입문","기초","심화","자격증","시험","learn","study","course","tutorial","beginner","certification","exam","교육"],"label":"학습/교육","cta_type":"education","emoji":"📚"}, | |
| "travel_intent": {"keywords":["여행","항공","호텔","숙소","관광","비행기","예약","가볼만한","travel","flight","hotel","booking","vacation","trip","tour","airbnb"],"label":"여행 계획","cta_type":"travel","emoji":"✈️"}, | |
| "health_intent": {"keywords":["건강","운동","다이어트","헬스","필라테스","요가","병원","약","증상","치료","영양","health","exercise","diet","gym","yoga","symptom","medical"],"label":"건강/운동","cta_type":"health","emoji":"🏃"}, | |
| "tech_intent": {"keywords":["코딩","프로그래밍","개발","api","python","javascript","react","서버","데이터베이스","클라우드","ai","인공지능","모델","github","code","programming","developer","deploy"],"label":"기술/개발","cta_type":"saas","emoji":"💻"}, | |
| "realestate_intent": {"keywords":["아파트","전세","월세","매매","부동산","분양","청약","임대","평수","이사","집","rent","apartment","housing","mortgage","property"],"label":"부동산","cta_type":"realestate","emoji":"🏠"}, | |
| "entertainment_intent": {"keywords":["영화","드라마","넷플릭스","웹툰","게임","음악","유튜브","스트리밍","movie","drama","netflix","game","music","youtube","anime","webtoon"],"label":"엔터테인먼트","cta_type":"entertainment","emoji":"🎬"}, | |
| "comparison_intent": {"keywords":["비교","차이","뭐가 나은","뭐가 좋","어떤 게","vs","versus","compare","difference","better","which","recommend"],"label":"비교/의사결정","cta_type":"general","emoji":"⚖️"}, | |
| "creative_intent": {"keywords":["디자인","이미지","그림","로고","포스터","만들어","생성","그려","아이디어","영감","design","image","create","generate","logo","poster","creative","inspiration"],"label":"창작/디자인","cta_type":"creative","emoji":"🎨"}, | |
| } | |
| intent_counts = {} | |
| input_analysis = [] | |
| topic_clusters = {} | |
| complexity_dist = {"simple":0,"medium":0,"complex":0} | |
| action_signals = [] | |
| for inp in inputs: | |
| text = (inp.get("input_text","") or "").strip() | |
| if not text or len(text) < 3: continue | |
| feature = inp.get("feature","") or "" | |
| url = inp.get("url","") or "" | |
| created = inp.get("created_at",0) | |
| text_lower = text.lower() | |
| matched_intents = [] | |
| for intent_key, pattern in INTENT_PATTERNS.items(): | |
| score = sum(1 for kw in pattern["keywords"] if kw in text_lower) | |
| if score > 0: | |
| matched_intents.append({"intent":intent_key,"score":score,"label":pattern["label"],"emoji":pattern["emoji"],"cta_type":pattern["cta_type"]}) | |
| intent_counts[intent_key] = intent_counts.get(intent_key,0) + score | |
| matched_intents.sort(key=lambda x: x["score"], reverse=True) | |
| primary_intent = matched_intents[0] if matched_intents else {"intent":"general","label":"일반","emoji":"💬","score":0,"cta_type":"general"} | |
| word_count = len(text.split()) | |
| has_question = "?" in text or "뭐" in text or "어떻게" in text or "왜" in text | |
| if word_count <= 5: complexity = "simple" | |
| elif word_count <= 20 or not has_question: complexity = "medium" | |
| else: complexity = "complex" | |
| complexity_dist[complexity] += 1 | |
| topic_key = primary_intent["label"] | |
| if topic_key not in topic_clusters: topic_clusters[topic_key] = [] | |
| topic_clusters[topic_key].append({"text":text[:100],"feature":feature,"created_at":created}) | |
| purchase_signals = ["사고 싶","구매","주문","결제","신청","가입","등록","subscribe","buy","order","signup"] | |
| if any(sig in text_lower for sig in purchase_signals): | |
| action_signals.append({"text":text[:100],"intent":primary_intent["label"],"feature":feature,"created_at":created}) | |
| input_analysis.append({"text":text[:100],"intent":primary_intent["label"],"intent_emoji":primary_intent["emoji"],"complexity":complexity,"feature":feature,"url_domain":url.split("/")[2] if url.count("/")>=2 else "","created_at":created}) | |
| top_intents = sorted(intent_counts.items(), key=lambda x: x[1], reverse=True)[:8] | |
| intent_labels = {k:v["label"] for k,v in INTENT_PATTERNS.items()} | |
| intent_emojis = {k:v["emoji"] for k,v in INTENT_PATTERNS.items()} | |
| topic_summary = [] | |
| for topic, items in sorted(topic_clusters.items(), key=lambda x: len(x[1]), reverse=True)[:8]: | |
| topic_summary.append({"topic":topic,"count":len(items),"recent":items[0]["text"] if items else "","last_at":items[0].get("created_at",0) if items else 0}) | |
| behavior_summary = [] | |
| if top_intents: | |
| top_label = intent_labels.get(top_intents[0][0], top_intents[0][0]) | |
| behavior_summary.append(f"가장 많이 관심 있는 주제: {top_label} ({top_intents[0][1]}회 언급)") | |
| if complexity_dist["complex"] > complexity_dist["simple"]: | |
| behavior_summary.append("심층 분석형 사용자 — 복잡한 질문을 자주 합니다") | |
| elif complexity_dist["simple"] > complexity_dist["complex"] * 2: | |
| behavior_summary.append("간편 사용형 — 짧은 명령 위주로 사용합니다") | |
| if action_signals: | |
| behavior_summary.append(f"구매/전환 신호 {len(action_signals)}건 감지 — 광고 반응 가능성 높음") | |
| if len(topic_clusters) >= 4: | |
| behavior_summary.append(f"다양한 관심사 보유 ({len(topic_clusters)}개 주제) — 종합 타겟팅 적합") | |
| elif len(topic_clusters) == 1: | |
| behavior_summary.append("단일 주제 집중형 — 정밀 타겟팅 적합") | |
| cta_recommendations = [] | |
| for intent_key, count in top_intents: | |
| pattern = INTENT_PATTERNS.get(intent_key, {}) | |
| if count >= 2 and pattern: | |
| cta_recommendations.append({"type":pattern.get("cta_type","general"),"label":f"{pattern['emoji']} {pattern['label']} 관련 추천","priority":min(10,6+count),"reason":f"입력에서 '{pattern['label']}' 의도 {count}회 감지","source":"input_analysis"}) | |
| interest_names = [i[0] for i in sorted(interest_scores.items(), key=lambda x: x[1], reverse=True)[:5]] | |
| DOMAIN_CTA_MAP = {"shopping":{"label":"🛒 쿠팡 최저가 비교","priority":9},"food_delivery":{"label":"🍕 배달 할인 쿠폰","priority":9},"finance":{"label":"📈 투자 리포트 구독","priority":8},"tech":{"label":"💻 AI 도구 프리미엄","priority":8},"travel":{"label":"✈️ 항공권 특가 알림","priority":7},"education":{"label":"📚 온라인 강의 할인","priority":7},"entertainment":{"label":"🎬 OTT 무료 체험","priority":6},"realestate":{"label":"🏠 신규 매물 알림","priority":8},"job":{"label":"💼 맞춤 채용 추천","priority":8}} | |
| for cat_name in interest_names: | |
| if cat_name in DOMAIN_CTA_MAP: | |
| existing_types = [c["type"] for c in cta_recommendations] | |
| if cat_name not in existing_types: | |
| info = DOMAIN_CTA_MAP[cat_name] | |
| cta_recommendations.append({"type":cat_name,"label":info["label"],"priority":info["priority"],"reason":f"'{cat_name}' 사이트 빈번 방문","source":"domain_analysis"}) | |
| if "visual_creator" in [p[0] for p in sorted(persona_scores.items(), key=lambda x: x[1], reverse=True)[:3]]: | |
| cta_recommendations.append({"type":"creative","label":"🎨 디자인 툴 프리미엄","priority":7,"reason":"이미지 생성/편집 다수 사용","source":"feature_analysis"}) | |
| total_usage = sum(f.get("cnt",0) or f.get("count",0) for f in features) | |
| if total_usage > 20: | |
| cta_recommendations.append({"type":"upsell","label":"⭐ SiteAgent Pro 업그레이드","priority":10,"reason":f"누적 {total_usage}회 사용 — 파워유저","source":"usage"}) | |
| if action_signals: | |
| cta_recommendations.append({"type":"conversion","label":"🔥 즉시 전환 가능","priority":10,"reason":f"구매/신청 의도 {len(action_signals)}건 직접 감지","source":"action_signal"}) | |
| cta_recommendations.sort(key=lambda x: x["priority"], reverse=True) | |
| engagement = min(100, total_usage * 2 + len(domains) * 3 + len(inputs)) | |
| top_interests = sorted(interest_scores.items(), key=lambda x: x[1], reverse=True)[:5] | |
| top_personas = sorted(persona_scores.items(), key=lambda x: x[1], reverse=True)[:3] | |
| top_keywords = sorted(keywords.items(), key=lambda x: x[1], reverse=True)[:15] | |
| return { | |
| "interests":[{"category":i[0],"score":i[1]} for i in top_interests], | |
| "personas":[{"type":p[0],"score":p[1]} for p in top_personas], | |
| "keywords":[{"word":k[0],"count":k[1]} for k in top_keywords], | |
| "time_persona":time_persona,"peak_hours":peak_hours,"hour_distribution":hour_dist, | |
| "engagement_score":engagement,"cta_recommendations":cta_recommendations[:7], | |
| "input_behavior":{ | |
| "total_inputs":len(inputs),"analyzed":len(input_analysis), | |
| "intent_ranking":[{"intent":intent_labels.get(i[0],i[0]),"emoji":intent_emojis.get(i[0],"💬"),"score":i[1]} for i in top_intents], | |
| "topic_clusters":topic_summary,"complexity":complexity_dist, | |
| "action_signals":action_signals[:10],"behavior_summary":behavior_summary, | |
| "recent_analysis":input_analysis[:20], | |
| }, | |
| } | |
| async def admin_user_full_profile(email: str, admin: str = ""): | |
| if admin != ADMIN_EMAIL: | |
| return JSONResponse({"error": "unauthorized"}, status_code=403) | |
| email = email.strip().lower() | |
| user = await _db_read_one("SELECT * FROM users WHERE email=?", (email,)) | |
| if not user: | |
| return JSONResponse({"error": "user not found"}, status_code=404) | |
| domains = await _db_read("SELECT domain, SUM(visit_count) as cnt FROM page_visits WHERE email=? GROUP BY domain ORDER BY cnt DESC LIMIT 50", (email,)) | |
| features = await _db_read("SELECT feature, use_count as cnt FROM feature_usage WHERE email=? ORDER BY cnt DESC", (email,)) | |
| inputs = await _db_read("SELECT input_text, feature, url, created_at FROM user_inputs WHERE email=? ORDER BY created_at DESC LIMIT 100", (email,)) | |
| events = await _db_read("SELECT event, feature, domain, created_at FROM analytics WHERE email=? ORDER BY created_at DESC LIMIT 200", (email,)) | |
| profile = _infer_user_profile( | |
| [dict(d) for d in domains], | |
| [dict(f) for f in features], | |
| [dict(i) for i in inputs], | |
| [dict(e) for e in events], | |
| ) | |
| return { | |
| "user": dict(user), | |
| "profile": profile, | |
| "domains": [dict(d) for d in domains[:20]], | |
| "features": [dict(f) for f in features], | |
| "recent_inputs": [dict(i) for i in inputs[:20]], | |
| } | |
| async def admin_segments(admin: str = ""): | |
| if admin != ADMIN_EMAIL: | |
| return JSONResponse({"error": "unauthorized"}, status_code=403) | |
| users = await _db_read("SELECT email, visit_count, total_ai_calls, last_provider, last_client, last_browser, last_os FROM users ORDER BY visit_count DESC") | |
| segments = {"power_users": [], "casual_users": [], "new_users": [], "by_client": {}, "by_provider": {}, "by_os": {}, "by_browser": {}} | |
| for u in users: | |
| ud = dict(u) | |
| vc = ud.get("visit_count", 0) or 0 | |
| ai = ud.get("total_ai_calls", 0) or 0 | |
| if vc > 10 or ai > 20: | |
| segments["power_users"].append(ud["email"]) | |
| elif vc > 3: | |
| segments["casual_users"].append(ud["email"]) | |
| else: | |
| segments["new_users"].append(ud["email"]) | |
| cl = ud.get("last_client") or "unknown" | |
| segments["by_client"].setdefault(cl, []).append(ud["email"]) | |
| pv = ud.get("last_provider") or "demo" | |
| segments["by_provider"].setdefault(pv, []).append(ud["email"]) | |
| os_name = ud.get("last_os") or "unknown" | |
| segments["by_os"].setdefault(os_name, []).append(ud["email"]) | |
| br = ud.get("last_browser") or "unknown" | |
| segments["by_browser"].setdefault(br, []).append(ud["email"]) | |
| for key in ["by_client", "by_provider", "by_os", "by_browser"]: | |
| segments[key] = {k: len(v) for k, v in segments[key].items()} | |
| segments["power_users_count"] = len(segments["power_users"]) | |
| segments["casual_users_count"] = len(segments["casual_users"]) | |
| segments["new_users_count"] = len(segments["new_users"]) | |
| return segments | |
| async def admin_insights(admin: str = ""): | |
| if admin != ADMIN_EMAIL: | |
| return JSONResponse({"error": "unauthorized"}, status_code=403) | |
| users = await _db_read("SELECT email FROM users WHERE role!='admin'") | |
| all_ctas = {} | |
| all_intents = {} | |
| all_topics = {} | |
| all_keywords = {} | |
| all_personas = {} | |
| all_interests = {} | |
| user_profiles = [] | |
| complexity_total = {"simple":0,"medium":0,"complex":0} | |
| total_action_signals = 0 | |
| total_engagement = 0 | |
| behavior_flags = [] | |
| for u in users: | |
| email = dict(u)["email"] | |
| try: | |
| domains = await _db_read("SELECT domain, SUM(visit_count) as cnt FROM page_visits WHERE email=? GROUP BY domain ORDER BY cnt DESC LIMIT 30", (email,)) | |
| features = await _db_read("SELECT feature, use_count as cnt FROM feature_usage WHERE email=? ORDER BY cnt DESC", (email,)) | |
| inputs = await _db_read("SELECT input_text, feature, url, created_at FROM user_inputs WHERE email=? ORDER BY created_at DESC LIMIT 50", (email,)) | |
| events = await _db_read("SELECT event, feature, domain, created_at FROM analytics WHERE email=? ORDER BY created_at DESC LIMIT 100", (email,)) | |
| if not inputs and not domains: continue | |
| profile = _infer_user_profile([dict(d) for d in domains],[dict(f) for f in features],[dict(i) for i in inputs],[dict(e) for e in events]) | |
| for cta in profile.get("cta_recommendations",[]): | |
| key = cta["label"] | |
| if key not in all_ctas: all_ctas[key] = {"label":key,"type":cta.get("type",""),"reason":cta.get("reason",""),"priority":cta.get("priority",0),"users":[],"source":cta.get("source","")} | |
| all_ctas[key]["users"].append(email) | |
| all_ctas[key]["priority"] = max(all_ctas[key]["priority"], cta.get("priority",0)) | |
| ib = profile.get("input_behavior",{}) | |
| for ir in ib.get("intent_ranking",[]): | |
| k = ir["intent"] | |
| all_intents[k] = all_intents.get(k,0) + ir["score"] | |
| for tc in ib.get("topic_clusters",[]): | |
| k = tc["topic"] | |
| all_topics[k] = all_topics.get(k,0) + tc["count"] | |
| cx = ib.get("complexity",{}) | |
| for ck in ["simple","medium","complex"]: | |
| complexity_total[ck] += cx.get(ck,0) | |
| total_action_signals += len(ib.get("action_signals",[])) | |
| for kw in profile.get("keywords",[]): | |
| k = kw["word"] | |
| all_keywords[k] = all_keywords.get(k,0) + kw["count"] | |
| for p in profile.get("personas",[]): | |
| k = p["type"] | |
| all_personas[k] = all_personas.get(k,0) + p["score"] | |
| for i in profile.get("interests",[]): | |
| k = i["category"] | |
| all_interests[k] = all_interests.get(k,0) + i["score"] | |
| total_engagement += profile.get("engagement_score",0) | |
| summary_line = email.split("@")[0] | |
| top_cta = profile["cta_recommendations"][0]["label"] if profile.get("cta_recommendations") else "없음" | |
| top_intent_label = ib["intent_ranking"][0]["intent"] if ib.get("intent_ranking") else "미확인" | |
| user_profiles.append({"email":email,"engagement":profile.get("engagement_score",0),"top_cta":top_cta,"top_intent":top_intent_label,"action_signals":len(ib.get("action_signals",[])),"input_count":ib.get("total_inputs",0)}) | |
| except Exception as e: | |
| print(f"[insights] skip {email}: {e}") | |
| continue | |
| cta_list = sorted(all_ctas.values(), key=lambda x: len(x["users"]), reverse=True) | |
| for c in cta_list: c["user_count"] = len(c["users"]); c["users"] = c["users"][:5] | |
| user_profiles.sort(key=lambda x: x["engagement"], reverse=True) | |
| return { | |
| "total_analyzed": len(user_profiles), | |
| "avg_engagement": round(total_engagement/max(len(user_profiles),1),1), | |
| "total_action_signals": total_action_signals, | |
| "complexity": complexity_total, | |
| "cta_ranking": cta_list[:15], | |
| "intent_ranking": sorted([{"intent":k,"score":v} for k,v in all_intents.items()], key=lambda x:x["score"], reverse=True)[:12], | |
| "topic_ranking": sorted([{"topic":k,"count":v} for k,v in all_topics.items()], key=lambda x:x["count"], reverse=True)[:12], | |
| "keyword_ranking": sorted([{"word":k,"count":v} for k,v in all_keywords.items()], key=lambda x:x["count"], reverse=True)[:20], | |
| "persona_ranking": sorted([{"type":k,"score":v} for k,v in all_personas.items()], key=lambda x:x["score"], reverse=True)[:8], | |
| "interest_ranking": sorted([{"category":k,"score":v} for k,v in all_interests.items()], key=lambda x:x["score"], reverse=True)[:10], | |
| "user_profiles": user_profiles[:30], | |
| } | |
| async def admin_page(request: Request): | |
| admin = request.query_params.get("key", "") | |
| if admin != ADMIN_EMAIL: | |
| return HTMLResponse("<h1>🔒 접근 거부</h1><p>?key=관리자이메일 필요</p>", status_code=403) | |
| for p in [Path(__file__).parent/"admin_dashboard.html", Path(".")/"admin_dashboard.html", Path("/home/user/app")/"admin_dashboard.html"]: | |
| try: | |
| if p.exists(): | |
| return HTMLResponse(p.read_text(encoding="utf-8")) | |
| except: pass | |
| return HTMLResponse("""<!DOCTYPE html><html><head><meta charset="UTF-8"><title>SiteAgent Admin</title></head><body style="font-family:sans-serif;background:#f5f5f7;padding:40px;text-align:center;"> | |
| <h1>❌ admin_dashboard.html 파일 없음</h1> | |
| <p>HF Space 파일 탐색기에 admin_dashboard.html을 업로드하세요.</p> | |
| <hr><h3>API 직접 테스트</h3> | |
| <pre id="r" style="text-align:left;background:#fff;padding:20px;border-radius:8px;max-width:600px;margin:20px auto;font-size:12px;overflow:auto;">테스트 중...</pre> | |
| <script> | |
| async function t(){var o=''; | |
| var apis=['/api/admin/dashboard','/api/admin/analytics','/api/admin/all-visits','/api/admin/inputs','/api/admin/waitlist','/api/admin/segments','/api/admin/insights']; | |
| for(var a of apis){try{var r=await fetch(a+'?admin="""+ADMIN_EMAIL+"""');o+=a+' → '+r.status+'\\n';if(r.ok){var d=await r.json();o+=JSON.stringify(d).substring(0,100)+'\\n\\n';}else{var t=await r.text();o+=t.substring(0,100)+'\\n\\n';}}catch(e){o+=a+' → ERROR: '+e.message+'\\n\\n';}} | |
| document.getElementById('r').textContent=o;}t(); | |
| </script></body></html>""", status_code=200) | |
| async def admin_all_visits(request: Request): | |
| admin_email = request.query_params.get("admin", "") | |
| if admin_email != ADMIN_EMAIL: | |
| return JSONResponse({"error": "unauthorized"}, status_code=403) | |
| rows = await _db_read( | |
| "SELECT id,email,url_hash,url,title,domain,visit_count,last_visited_at FROM page_visits ORDER BY last_visited_at DESC LIMIT 100" | |
| ) | |
| return {"visits": [dict(r) for r in rows]} | |
| async def admin_inputs(request: Request): | |
| admin_email = request.query_params.get("admin", "") | |
| if admin_email != ADMIN_EMAIL: | |
| return JSONResponse({"error": "unauthorized"}, status_code=403) | |
| rows = await _db_read("SELECT id,email,feature,input_text,url,created_at FROM user_inputs ORDER BY created_at DESC LIMIT 100") | |
| return {"inputs": [dict(r) for r in rows]} |