File size: 38,633 Bytes
40ee4e3
 
 
 
8b4fe47
40ee4e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b4fe47
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
import os, json, asyncio, time
from datetime import datetime, timezone
from pathlib import Path
from collections import Counter, defaultdict
from fastapi import APIRouter, Request, BackgroundTasks
from fastapi.responses import JSONResponse, HTMLResponse, FileResponse
from shared import ADMIN_EMAIL, HF_TOKEN, SA_DB_PATH, _db_read, _db_read_one, _db_write, _db_write_sync
import aiosqlite
router = APIRouter()
@router.post("/api/analytics")
async def analytics_collect(request: Request, background_tasks: BackgroundTasks):
    import time as _t
    try:
        body = await request.json()
        event = (body.get("event") or "")[:50]
        if not event:
            return JSONResponse({"ok": False}, status_code=400)
        background_tasks.add_task(_analytics_bg_write, body, _t.time())
        return {"ok": True}
    except Exception as e:
        return JSONResponse({"ok": False}, status_code=500)
async def _chat_log_bg(email, url_hash, ref, user_msg, feat, ts):
    try:
        async with aiosqlite.connect(SA_DB_PATH, timeout=10.0) as db:
            await db.execute("PRAGMA journal_mode=WAL")
            if user_msg:
                await db.execute("INSERT INTO user_inputs (email,url_hash,url,input_type,input_text,feature,created_at) VALUES (?,?,?,?,?,?,?)",
                    (email, url_hash, ref[:500], "chat", user_msg, feat, ts))
                print(f"[log-bg] ✅ {email}/{feat}/{user_msg[:60]}")
            await db.execute("INSERT INTO feature_usage (email,feature,use_count,last_used_at) VALUES (?,?,1,?) ON CONFLICT(email,feature) DO UPDATE SET use_count=use_count+1, last_used_at=?",
                (email, feat, ts, ts))
            await db.commit()
    except Exception as e:
        print(f"[log-bg] {e}")
async def _analytics_bg_write(body, now):
    try:
        email = (body.get("email") or "").strip().lower()
        event = (body.get("event") or "")[:50]
        async with aiosqlite.connect(SA_DB_PATH, timeout=10.0) as db:
            await db.execute("PRAGMA journal_mode=WAL")
            await db.execute("""INSERT INTO analytics (
                email,event,feature,domain,url,provider,client,model,
                mode,engine,response_ms,tokens_used,success,error_msg,
                screen,os,browser,language,timezone,referrer,country,
                session_id,meta,created_at
            ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", (
                email, event,
                (body.get("feature") or "")[:50],
                (body.get("domain") or "")[:200],
                (body.get("url") or "")[:500],
                (body.get("provider") or "demo")[:20],
                (body.get("client") or "")[:20],
                (body.get("model") or "")[:50],
                (body.get("mode") or "")[:20],
                (body.get("engine") or "")[:20],
                int(body.get("response_ms") or 0),
                int(body.get("tokens_used") or 0),
                1 if body.get("success", True) else 0,
                (body.get("error_msg") or "")[:200],
                (body.get("screen") or "")[:20],
                (body.get("os") or "")[:30],
                (body.get("browser") or "")[:50],
                (body.get("language") or "")[:10],
                (body.get("timezone") or "")[:40],
                (body.get("referrer") or "")[:500],
                (body.get("country") or "")[:10],
                (body.get("session_id") or "")[:30],
                (body.get("meta") or "")[:500],
                now,
            ))
            if email:
                updates = []
                params = []
                for field, col, maxlen in [("browser","last_browser",50),("os","last_os",30),("screen","last_screen",20),("language","last_language",10),("timezone","last_timezone",40),("country","last_country",10),("provider","last_provider",20),("client","last_client",20)]:
                    val = (body.get(field) or "")[:maxlen]
                    if val: updates.append(f"{col}=?"); params.append(val)
                ref = (body.get("referrer") or "")[:500]
                if ref: updates.append("first_referrer=CASE WHEN first_referrer='' OR first_referrer IS NULL THEN ? ELSE first_referrer END"); params.append(ref)
                if event in ("ai_response","marl_response","imagine_response"):
                    updates.append("total_ai_calls=total_ai_calls+1")
                    if not body.get("success", True): updates.append("total_ai_errors=total_ai_errors+1")
                if updates:
                    params.append(email)
                    await db.execute(f"UPDATE users SET {', '.join(updates)} WHERE email=?", tuple(params))
            await db.commit()
    except Exception as e:
        print(f"[analytics-bg] {e}")
@router.get("/api/admin/analytics")
async def admin_analytics(admin: str = ""):
    if admin != ADMIN_EMAIL:
        return JSONResponse({"error": "unauthorized"}, status_code=403)
    import time as _t
    now = _t.time()
    day_ago = now - 86400
    week_ago = now - 604800
    today_cnt = await _db_read_one("SELECT COUNT(*) as cnt FROM analytics WHERE created_at>?", (day_ago,))
    week_cnt = await _db_read_one("SELECT COUNT(*) as cnt FROM analytics WHERE created_at>?", (week_ago,))
    events_by_type = await _db_read("SELECT event, COUNT(*) as cnt FROM analytics WHERE created_at>? GROUP BY event ORDER BY cnt DESC LIMIT 20", (week_ago,))
    features_week = await _db_read("SELECT feature, COUNT(*) as cnt FROM analytics WHERE feature!='' AND created_at>? GROUP BY feature ORDER BY cnt DESC LIMIT 20", (week_ago,))
    clients = await _db_read("SELECT client, COUNT(*) as cnt FROM analytics WHERE client!='' AND created_at>? GROUP BY client ORDER BY cnt DESC", (week_ago,))
    providers = await _db_read("SELECT provider, COUNT(*) as cnt FROM analytics WHERE provider!='' AND created_at>? GROUP BY provider ORDER BY cnt DESC", (week_ago,))
    ai_perf = await _db_read_one("SELECT AVG(response_ms) as avg_ms, COUNT(*) as cnt, SUM(CASE WHEN success=0 THEN 1 ELSE 0 END) as errors FROM analytics WHERE event IN ('ai_response','marl_response') AND created_at>?", (week_ago,))
    os_stats = await _db_read("SELECT os, COUNT(*) as cnt FROM analytics WHERE os!='' AND created_at>? GROUP BY os ORDER BY cnt DESC LIMIT 10", (week_ago,))
    browser_stats = await _db_read("SELECT browser, COUNT(*) as cnt FROM analytics WHERE browser!='' AND created_at>? GROUP BY browser ORDER BY cnt DESC LIMIT 10", (week_ago,))
    engine_stats = await _db_read("SELECT engine, COUNT(*) as cnt FROM analytics WHERE engine!='' AND created_at>? GROUP BY engine ORDER BY cnt DESC LIMIT 10", (week_ago,))
    return {
        "today_events": dict(today_cnt)["cnt"] if today_cnt else 0,
        "week_events": dict(week_cnt)["cnt"] if week_cnt else 0,
        "events_by_type": [dict(e) for e in events_by_type],
        "features_week": [dict(f) for f in features_week],
        "clients": [dict(c) for c in clients],
        "providers": [dict(p) for p in providers],
        "ai_performance": dict(ai_perf) if ai_perf else {},
        "os_stats": [dict(o) for o in os_stats],
        "browser_stats": [dict(b) for b in browser_stats],
        "engine_stats": [dict(e) for e in engine_stats],
    }
@router.delete("/api/admin/user/{email}")
async def admin_delete_user(email: str, request: Request):
    admin_email = request.headers.get("X-Admin-Email", "")
    if admin_email != ADMIN_EMAIL:
        return JSONResponse({"error": "unauthorized"}, status_code=403)
    await _db_write_sync("DELETE FROM user_inputs WHERE email=?", (email,))
    await _db_write_sync("DELETE FROM page_visits WHERE email=?", (email,))
    await _db_write_sync("DELETE FROM feature_usage WHERE email=?", (email,))
    await _db_write_sync("DELETE FROM users WHERE email=?", (email,))
    return {"status": "deleted", "email": email}
@router.delete("/api/admin/record/{table}/{record_id}")
async def admin_delete_record(table: str, record_id: int, request: Request):
    admin_email = request.headers.get("X-Admin-Email", "")
    if admin_email != ADMIN_EMAIL:
        return JSONResponse({"error": "unauthorized"}, status_code=403)
    allowed = {"users","page_visits","user_inputs","feature_usage"}
    if table not in allowed:
        return JSONResponse({"error": f"table not allowed: {table}"}, status_code=400)
    await _db_write_sync(f"DELETE FROM {table} WHERE id=?", (record_id,))
    return {"status": "deleted", "table": table, "id": record_id}
@router.get("/api/admin/dashboard")
async def admin_dashboard(request: Request):
    admin_email = request.query_params.get("admin", "")
    if admin_email != ADMIN_EMAIL:
        return JSONResponse({"error": "unauthorized"}, status_code=403)
    users = await _db_read("SELECT COUNT(*) as cnt FROM users")
    visits = await _db_read("SELECT COUNT(*) as cnt FROM page_visits")
    inputs = await _db_read("SELECT COUNT(*) as cnt FROM user_inputs")
    top_domains = await _db_read("SELECT domain, SUM(visit_count) as cnt FROM page_visits GROUP BY domain ORDER BY cnt DESC LIMIT 20")
    top_features = await _db_read("SELECT feature, SUM(use_count) as cnt FROM feature_usage GROUP BY feature ORDER BY cnt DESC")
    recent_users = await _db_read("SELECT email, nickname, role, visit_count, last_seen_at FROM users ORDER BY last_seen_at DESC LIMIT 20")
    return {
        "total_users": dict(users[0])["cnt"],
        "total_visits": dict(visits[0])["cnt"],
        "total_inputs": dict(inputs[0])["cnt"],
        "top_domains": [dict(d) for d in top_domains],
        "top_features": [dict(f) for f in top_features],
        "recent_users": [dict(u) for u in recent_users],
    }
@router.get("/privacy")
async def privacy_policy():
    return HTMLResponse("""<!DOCTYPE html>
<html lang="ko"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1">
<title>SiteAgent Privacy Policy</title>
<style>*{margin:0;padding:0;box-sizing:border-box;}body{font-family:-apple-system,sans-serif;background:#fafafa;color:#333;line-height:1.8;}
.w{max-width:720px;margin:0 auto;padding:40px 20px 60px;}h1{font-size:24px;font-weight:800;color:#1a1a2e;margin-bottom:8px;}
.sub{font-size:13px;color:#888;margin-bottom:32px;}h2{font-size:16px;font-weight:700;color:#5856D6;margin:28px 0 10px;padding-bottom:6px;border-bottom:2px solid rgba(88,86,214,.1);}
p{margin-bottom:12px;font-size:14px;}ul{margin:8px 0 12px 20px;font-size:14px;}li{margin-bottom:4px;}
.ft{margin-top:40px;padding-top:20px;border-top:1px solid #eee;font-size:12px;color:#999;text-align:center;}a{color:#5856D6;}b{color:#1a1a2e;}</style>
</head><body><div class="w">
<h1>SiteAgent 개인정보처리방침</h1>
<p class="sub">Privacy Policy | Last Updated: March 16, 2026</p>
<h2>1. 수집하는 정보</h2>
<ul><li><b>Google 이메일 주소</b>: 사용자 식별 및 개인화 서비스 (Chrome Identity API)</li>
<li><b>방문 URL 및 페이지 제목</b>: AI 분석 기능 제공</li>
<li><b>사용자 입력 텍스트</b>: AI 채팅, 검색, 분석 요청</li>
<li><b>기능 사용 통계</b>: 서비스 개선용</li></ul>
<h2>2. 이용 목적</h2>
<ul><li>AI 기반 웹 분석, 번역, 검색, 이미지 생성 서비스 제공</li>
<li>사용자별 개인화 경험 제공</li><li>서비스 품질 개선</li></ul>
<h2>3. 저장 및 보관</h2>
<ul><li>사용자 설정: 브라우저 로컬 스토리지 (chrome.storage.local)</li>
<li>활동 로그: Hugging Face 서버 저장</li>
<li>API 키: 로컬에만 저장, 외부 전송 없음</li></ul>
<h2>4. 제3자 제공</h2>
<p>개인정보를 <b>제3자에게 판매/공유하지 않습니다</b>. AI 처리를 위해 Groq API, fal.ai API와 통신합니다.</p>
<h2>5. 사용자 권리</h2>
<ul><li>확장 제거 시 로컬 데이터 자동 삭제</li>
<li>서버 데이터 삭제: arxivgpt@gmail.com으로 요청</li></ul>
<h2>6. 보안</h2><p>HTTPS 암호화 통신, Hugging Face 보안 인프라 사용.</p>
<h2>7. 아동 보호</h2><p>13세 미만 아동의 정보를 고의로 수집하지 않습니다.</p>
<h2>8. 문의</h2><p>Email: <a href="mailto:arxivgpt@gmail.com">arxivgpt@gmail.com</a><br>Ginigen AI | Seoul, South Korea</p>
<hr style="margin:24px 0;border:none;border-top:1px solid #eee;">
<h2>English Summary</h2>
<p>SiteAgent collects Google email (via Chrome Identity API), visited URLs, user input text, and usage statistics for AI service delivery. We do NOT sell or share personal data. Data is transmitted to Groq API and fal.ai solely for AI processing. Uninstalling removes all local data. Server data deletion: arxivgpt@gmail.com</p>
<div class="ft">&copy; 2026 Ginigen AI. <a href="https://ginigen-ai-siteagent.hf.space">ginigen-ai-siteagent.hf.space</a></div>
</div></body></html>""")
DOMAIN_CATEGORIES = {
    "shopping": ["coupang.com","gmarket.co.kr","11st.co.kr","auction.co.kr","ssg.com","musinsa.com","kurly.com","oliveyoung.co.kr","amazon.com","ebay.com","aliexpress.com","temu.com","zigzag.kr","ably.co.kr","brandi.co.kr","29cm.co.kr","wconcept.com"],
    "food_delivery": ["baemin.com","yogiyo.co.kr","coupangeats.com","ddangyo.com"],
    "news": ["naver.com","daum.net","chosun.com","donga.com","joongang.co.kr","hankyung.com","mk.co.kr","sedaily.com","hani.co.kr","yna.co.kr","yonhapnews.co.kr","bbc.com","cnn.com","reuters.com","nytimes.com"],
    "tech": ["github.com","stackoverflow.com","medium.com","dev.to","velog.io","tistory.com","brunch.co.kr","huggingface.co","arxiv.org","kaggle.com","producthunt.com","techcrunch.com"],
    "finance": ["finance.naver.com","investing.com","tradingview.com","coinmarketcap.com","upbit.com","bithumb.com","kbstar.com","shinhan.com","wooribank.com","toss.im","kakaopay.com","samsung.com/sec"],
    "entertainment": ["youtube.com","netflix.com","watcha.com","tving.com","wavve.com","twitch.tv","afreecatv.com","spotify.com","melon.com","flo.com","webtoon.com","series.naver.com"],
    "social": ["instagram.com","twitter.com","x.com","facebook.com","threads.net","reddit.com","linkedin.com","kakaotalk.com","band.us"],
    "travel": ["booking.com","agoda.com","hotels.com","airbnb.com","skyscanner.co.kr","trip.com","yanolja.com","goodchoice.kr","klook.com","myrealtrip.com"],
    "education": ["coursera.org","udemy.com","edx.org","inflearn.com","class101.net","fastcampus.co.kr","coloso.co.kr","megastudy.net","etoos.com"],
    "realestate": ["zigbang.com","dabangapp.com","peterpanz.com","land.naver.com","hogangnono.com"],
    "health": ["hidoc.co.kr","gangnam.go.kr","health.kr","pilates.com","yoga.com"],
    "job": ["jobkorea.co.kr","saramin.co.kr","wanted.co.kr","rocketpunch.com","linkedin.com/jobs","programmers.co.kr"],
}
FEATURE_PERSONA = {
    "요약": "efficiency_seeker",
    "분석": "analytical_mind",
    "번역": "multilingual_user",
    "검색": "info_hunter",
    "딥분석": "deep_thinker",
    "아이디어": "creative_innovator",
    "금융": "investor",
    "XRay": "tech_specialist",
    "날씨": "planner",
    "타이머": "productivity_focused",
    "image_gen": "visual_creator",
    "image_edit": "visual_creator",
    "chat": "conversationalist",
}
def _infer_user_profile(domains, features, inputs, analytics_events):
    interest_scores = {}
    for d in domains:
        domain = d.get("domain", "")
        cnt = d.get("cnt", 0) or d.get("count", 0)
        for cat, cat_domains in DOMAIN_CATEGORIES.items():
            if any(cd in domain for cd in cat_domains):
                interest_scores[cat] = interest_scores.get(cat, 0) + cnt
    persona_scores = {}
    for f in features:
        feat = f.get("feature", "")
        cnt = f.get("cnt", 0) or f.get("count", 0)
        for key, persona in FEATURE_PERSONA.items():
            if key in feat:
                persona_scores[persona] = persona_scores.get(persona, 0) + cnt
    keywords = {}
    stopwords = {"현재","페이지","내용","요약","분석","번역","해줘","해주세요","알려줘","뭐야","어떻게",
                 "무엇","있나","어디","검색","관련","최신","정보","사이트","보여줘","좀","것","수","이",
                 "그","저","더","를","을","에","의","가","는","은","도","로","와","과","한","된","하는",
                 "the","what","how","is","a","an","to","and","of","for","in","on","this","that",
                 "about","can","you","me","please","tell","show","find","get"}
    for inp in inputs:
        text = inp.get("input_text", "") or ""
        words = [w.strip().lower() for w in text.replace(",","").replace(".","").replace("?","").replace("!","").split() if len(w.strip()) >= 2]
        for w in words:
            if w not in stopwords and len(w) <= 20:
                keywords[w] = keywords.get(w, 0) + 1
    hour_dist = [0] * 24
    for ev in analytics_events:
        ts = ev.get("created_at", 0)
        if ts:
            from datetime import datetime, timezone, timedelta
            try:
                dt = datetime.fromtimestamp(ts, tz=timezone(timedelta(hours=9)))
                hour_dist[dt.hour] += 1
            except: pass
    peak_hours = sorted(range(24), key=lambda h: hour_dist[h], reverse=True)[:3]
    if hour_dist[peak_hours[0]] > 0:
        if peak_hours[0] < 6: time_persona = "night_owl"
        elif peak_hours[0] < 12: time_persona = "morning_person"
        elif peak_hours[0] < 18: time_persona = "daytime_worker"
        else: time_persona = "evening_user"
    else:
        time_persona = "unknown"
    INTENT_PATTERNS = {
        "purchase_intent": {"keywords":["가격","비교","할인","쿠폰","최저가","구매","사다","주문","배송","무료배송","세일","가성비","추천","리뷰","후기","평점","별점","cheapest","buy","order","price","deal","discount","coupon","review"],"label":"구매 의도","cta_type":"shopping","emoji":"🛒"},
        "food_intent": {"keywords":["맛집","배달","치킨","피자","족발","떡볶이","초밥","카페","커피","점심","저녁","식당","메뉴","음식","recipe","food","restaurant","delivery","lunch","dinner"],"label":"음식/배달","cta_type":"food","emoji":"🍕"},
        "investment_intent": {"keywords":["주식","코인","비트코인","이더리움","투자","수익률","배당","etf","매수","매도","차트","시세","종목","펀드","금리","환율","stock","crypto","invest","trading","portfolio","dividend"],"label":"투자/금융","cta_type":"finance","emoji":"📈"},
        "job_intent": {"keywords":["이력서","자소서","면접","채용","연봉","취업","이직","경력","포트폴리오","지원","합격","resume","interview","salary","career","hiring","job"],"label":"취업/이직","cta_type":"job","emoji":"💼"},
        "learning_intent": {"keywords":["배우다","공부","강의","튜토리얼","입문","기초","심화","자격증","시험","learn","study","course","tutorial","beginner","certification","exam","교육"],"label":"학습/교육","cta_type":"education","emoji":"📚"},
        "travel_intent": {"keywords":["여행","항공","호텔","숙소","관광","비행기","예약","가볼만한","travel","flight","hotel","booking","vacation","trip","tour","airbnb"],"label":"여행 계획","cta_type":"travel","emoji":"✈️"},
        "health_intent": {"keywords":["건강","운동","다이어트","헬스","필라테스","요가","병원","약","증상","치료","영양","health","exercise","diet","gym","yoga","symptom","medical"],"label":"건강/운동","cta_type":"health","emoji":"🏃"},
        "tech_intent": {"keywords":["코딩","프로그래밍","개발","api","python","javascript","react","서버","데이터베이스","클라우드","ai","인공지능","모델","github","code","programming","developer","deploy"],"label":"기술/개발","cta_type":"saas","emoji":"💻"},
        "realestate_intent": {"keywords":["아파트","전세","월세","매매","부동산","분양","청약","임대","평수","이사","집","rent","apartment","housing","mortgage","property"],"label":"부동산","cta_type":"realestate","emoji":"🏠"},
        "entertainment_intent": {"keywords":["영화","드라마","넷플릭스","웹툰","게임","음악","유튜브","스트리밍","movie","drama","netflix","game","music","youtube","anime","webtoon"],"label":"엔터테인먼트","cta_type":"entertainment","emoji":"🎬"},
        "comparison_intent": {"keywords":["비교","차이","뭐가 나은","뭐가 좋","어떤 게","vs","versus","compare","difference","better","which","recommend"],"label":"비교/의사결정","cta_type":"general","emoji":"⚖️"},
        "creative_intent": {"keywords":["디자인","이미지","그림","로고","포스터","만들어","생성","그려","아이디어","영감","design","image","create","generate","logo","poster","creative","inspiration"],"label":"창작/디자인","cta_type":"creative","emoji":"🎨"},
    }
    intent_counts = {}
    input_analysis = []
    topic_clusters = {}
    complexity_dist = {"simple":0,"medium":0,"complex":0}
    action_signals = []
    for inp in inputs:
        text = (inp.get("input_text","") or "").strip()
        if not text or len(text) < 3: continue
        feature = inp.get("feature","") or ""
        url = inp.get("url","") or ""
        created = inp.get("created_at",0)
        text_lower = text.lower()
        matched_intents = []
        for intent_key, pattern in INTENT_PATTERNS.items():
            score = sum(1 for kw in pattern["keywords"] if kw in text_lower)
            if score > 0:
                matched_intents.append({"intent":intent_key,"score":score,"label":pattern["label"],"emoji":pattern["emoji"],"cta_type":pattern["cta_type"]})
                intent_counts[intent_key] = intent_counts.get(intent_key,0) + score
        matched_intents.sort(key=lambda x: x["score"], reverse=True)
        primary_intent = matched_intents[0] if matched_intents else {"intent":"general","label":"일반","emoji":"💬","score":0,"cta_type":"general"}
        word_count = len(text.split())
        has_question = "?" in text or "뭐" in text or "어떻게" in text or "왜" in text
        if word_count <= 5: complexity = "simple"
        elif word_count <= 20 or not has_question: complexity = "medium"
        else: complexity = "complex"
        complexity_dist[complexity] += 1
        topic_key = primary_intent["label"]
        if topic_key not in topic_clusters: topic_clusters[topic_key] = []
        topic_clusters[topic_key].append({"text":text[:100],"feature":feature,"created_at":created})
        purchase_signals = ["사고 싶","구매","주문","결제","신청","가입","등록","subscribe","buy","order","signup"]
        if any(sig in text_lower for sig in purchase_signals):
            action_signals.append({"text":text[:100],"intent":primary_intent["label"],"feature":feature,"created_at":created})
        input_analysis.append({"text":text[:100],"intent":primary_intent["label"],"intent_emoji":primary_intent["emoji"],"complexity":complexity,"feature":feature,"url_domain":url.split("/")[2] if url.count("/")>=2 else "","created_at":created})
    top_intents = sorted(intent_counts.items(), key=lambda x: x[1], reverse=True)[:8]
    intent_labels = {k:v["label"] for k,v in INTENT_PATTERNS.items()}
    intent_emojis = {k:v["emoji"] for k,v in INTENT_PATTERNS.items()}
    topic_summary = []
    for topic, items in sorted(topic_clusters.items(), key=lambda x: len(x[1]), reverse=True)[:8]:
        topic_summary.append({"topic":topic,"count":len(items),"recent":items[0]["text"] if items else "","last_at":items[0].get("created_at",0) if items else 0})
    behavior_summary = []
    if top_intents:
        top_label = intent_labels.get(top_intents[0][0], top_intents[0][0])
        behavior_summary.append(f"가장 많이 관심 있는 주제: {top_label} ({top_intents[0][1]}회 언급)")
    if complexity_dist["complex"] > complexity_dist["simple"]:
        behavior_summary.append("심층 분석형 사용자 — 복잡한 질문을 자주 합니다")
    elif complexity_dist["simple"] > complexity_dist["complex"] * 2:
        behavior_summary.append("간편 사용형 — 짧은 명령 위주로 사용합니다")
    if action_signals:
        behavior_summary.append(f"구매/전환 신호 {len(action_signals)}건 감지 — 광고 반응 가능성 높음")
    if len(topic_clusters) >= 4:
        behavior_summary.append(f"다양한 관심사 보유 ({len(topic_clusters)}개 주제) — 종합 타겟팅 적합")
    elif len(topic_clusters) == 1:
        behavior_summary.append("단일 주제 집중형 — 정밀 타겟팅 적합")
    cta_recommendations = []
    for intent_key, count in top_intents:
        pattern = INTENT_PATTERNS.get(intent_key, {})
        if count >= 2 and pattern:
            cta_recommendations.append({"type":pattern.get("cta_type","general"),"label":f"{pattern['emoji']} {pattern['label']} 관련 추천","priority":min(10,6+count),"reason":f"입력에서 '{pattern['label']}' 의도 {count}회 감지","source":"input_analysis"})
    interest_names = [i[0] for i in sorted(interest_scores.items(), key=lambda x: x[1], reverse=True)[:5]]
    DOMAIN_CTA_MAP = {"shopping":{"label":"🛒 쿠팡 최저가 비교","priority":9},"food_delivery":{"label":"🍕 배달 할인 쿠폰","priority":9},"finance":{"label":"📈 투자 리포트 구독","priority":8},"tech":{"label":"💻 AI 도구 프리미엄","priority":8},"travel":{"label":"✈️ 항공권 특가 알림","priority":7},"education":{"label":"📚 온라인 강의 할인","priority":7},"entertainment":{"label":"🎬 OTT 무료 체험","priority":6},"realestate":{"label":"🏠 신규 매물 알림","priority":8},"job":{"label":"💼 맞춤 채용 추천","priority":8}}
    for cat_name in interest_names:
        if cat_name in DOMAIN_CTA_MAP:
            existing_types = [c["type"] for c in cta_recommendations]
            if cat_name not in existing_types:
                info = DOMAIN_CTA_MAP[cat_name]
                cta_recommendations.append({"type":cat_name,"label":info["label"],"priority":info["priority"],"reason":f"'{cat_name}' 사이트 빈번 방문","source":"domain_analysis"})
    if "visual_creator" in [p[0] for p in sorted(persona_scores.items(), key=lambda x: x[1], reverse=True)[:3]]:
        cta_recommendations.append({"type":"creative","label":"🎨 디자인 툴 프리미엄","priority":7,"reason":"이미지 생성/편집 다수 사용","source":"feature_analysis"})
    total_usage = sum(f.get("cnt",0) or f.get("count",0) for f in features)
    if total_usage > 20:
        cta_recommendations.append({"type":"upsell","label":"⭐ SiteAgent Pro 업그레이드","priority":10,"reason":f"누적 {total_usage}회 사용 — 파워유저","source":"usage"})
    if action_signals:
        cta_recommendations.append({"type":"conversion","label":"🔥 즉시 전환 가능","priority":10,"reason":f"구매/신청 의도 {len(action_signals)}건 직접 감지","source":"action_signal"})
    cta_recommendations.sort(key=lambda x: x["priority"], reverse=True)
    engagement = min(100, total_usage * 2 + len(domains) * 3 + len(inputs))
    top_interests = sorted(interest_scores.items(), key=lambda x: x[1], reverse=True)[:5]
    top_personas = sorted(persona_scores.items(), key=lambda x: x[1], reverse=True)[:3]
    top_keywords = sorted(keywords.items(), key=lambda x: x[1], reverse=True)[:15]
    return {
        "interests":[{"category":i[0],"score":i[1]} for i in top_interests],
        "personas":[{"type":p[0],"score":p[1]} for p in top_personas],
        "keywords":[{"word":k[0],"count":k[1]} for k in top_keywords],
        "time_persona":time_persona,"peak_hours":peak_hours,"hour_distribution":hour_dist,
        "engagement_score":engagement,"cta_recommendations":cta_recommendations[:7],
        "input_behavior":{
            "total_inputs":len(inputs),"analyzed":len(input_analysis),
            "intent_ranking":[{"intent":intent_labels.get(i[0],i[0]),"emoji":intent_emojis.get(i[0],"💬"),"score":i[1]} for i in top_intents],
            "topic_clusters":topic_summary,"complexity":complexity_dist,
            "action_signals":action_signals[:10],"behavior_summary":behavior_summary,
            "recent_analysis":input_analysis[:20],
        },
    }
@router.get("/api/admin/user-profile/{email}")
async def admin_user_full_profile(email: str, admin: str = ""):
    if admin != ADMIN_EMAIL:
        return JSONResponse({"error": "unauthorized"}, status_code=403)
    email = email.strip().lower()
    user = await _db_read_one("SELECT * FROM users WHERE email=?", (email,))
    if not user:
        return JSONResponse({"error": "user not found"}, status_code=404)
    domains = await _db_read("SELECT domain, SUM(visit_count) as cnt FROM page_visits WHERE email=? GROUP BY domain ORDER BY cnt DESC LIMIT 50", (email,))
    features = await _db_read("SELECT feature, use_count as cnt FROM feature_usage WHERE email=? ORDER BY cnt DESC", (email,))
    inputs = await _db_read("SELECT input_text, feature, url, created_at FROM user_inputs WHERE email=? ORDER BY created_at DESC LIMIT 100", (email,))
    events = await _db_read("SELECT event, feature, domain, created_at FROM analytics WHERE email=? ORDER BY created_at DESC LIMIT 200", (email,))
    profile = _infer_user_profile(
        [dict(d) for d in domains],
        [dict(f) for f in features],
        [dict(i) for i in inputs],
        [dict(e) for e in events],
    )
    return {
        "user": dict(user),
        "profile": profile,
        "domains": [dict(d) for d in domains[:20]],
        "features": [dict(f) for f in features],
        "recent_inputs": [dict(i) for i in inputs[:20]],
    }
@router.get("/api/admin/segments")
async def admin_segments(admin: str = ""):
    if admin != ADMIN_EMAIL:
        return JSONResponse({"error": "unauthorized"}, status_code=403)
    users = await _db_read("SELECT email, visit_count, total_ai_calls, last_provider, last_client, last_browser, last_os FROM users ORDER BY visit_count DESC")
    segments = {"power_users": [], "casual_users": [], "new_users": [], "by_client": {}, "by_provider": {}, "by_os": {}, "by_browser": {}}
    for u in users:
        ud = dict(u)
        vc = ud.get("visit_count", 0) or 0
        ai = ud.get("total_ai_calls", 0) or 0
        if vc > 10 or ai > 20:
            segments["power_users"].append(ud["email"])
        elif vc > 3:
            segments["casual_users"].append(ud["email"])
        else:
            segments["new_users"].append(ud["email"])
        cl = ud.get("last_client") or "unknown"
        segments["by_client"].setdefault(cl, []).append(ud["email"])
        pv = ud.get("last_provider") or "demo"
        segments["by_provider"].setdefault(pv, []).append(ud["email"])
        os_name = ud.get("last_os") or "unknown"
        segments["by_os"].setdefault(os_name, []).append(ud["email"])
        br = ud.get("last_browser") or "unknown"
        segments["by_browser"].setdefault(br, []).append(ud["email"])
    for key in ["by_client", "by_provider", "by_os", "by_browser"]:
        segments[key] = {k: len(v) for k, v in segments[key].items()}
    segments["power_users_count"] = len(segments["power_users"])
    segments["casual_users_count"] = len(segments["casual_users"])
    segments["new_users_count"] = len(segments["new_users"])
    return segments
@router.get("/api/admin/insights")
async def admin_insights(admin: str = ""):
    if admin != ADMIN_EMAIL:
        return JSONResponse({"error": "unauthorized"}, status_code=403)
    users = await _db_read("SELECT email FROM users WHERE role!='admin'")
    all_ctas = {}
    all_intents = {}
    all_topics = {}
    all_keywords = {}
    all_personas = {}
    all_interests = {}
    user_profiles = []
    complexity_total = {"simple":0,"medium":0,"complex":0}
    total_action_signals = 0
    total_engagement = 0
    behavior_flags = []
    for u in users:
        email = dict(u)["email"]
        try:
            domains = await _db_read("SELECT domain, SUM(visit_count) as cnt FROM page_visits WHERE email=? GROUP BY domain ORDER BY cnt DESC LIMIT 30", (email,))
            features = await _db_read("SELECT feature, use_count as cnt FROM feature_usage WHERE email=? ORDER BY cnt DESC", (email,))
            inputs = await _db_read("SELECT input_text, feature, url, created_at FROM user_inputs WHERE email=? ORDER BY created_at DESC LIMIT 50", (email,))
            events = await _db_read("SELECT event, feature, domain, created_at FROM analytics WHERE email=? ORDER BY created_at DESC LIMIT 100", (email,))
            if not inputs and not domains: continue
            profile = _infer_user_profile([dict(d) for d in domains],[dict(f) for f in features],[dict(i) for i in inputs],[dict(e) for e in events])
            for cta in profile.get("cta_recommendations",[]):
                key = cta["label"]
                if key not in all_ctas: all_ctas[key] = {"label":key,"type":cta.get("type",""),"reason":cta.get("reason",""),"priority":cta.get("priority",0),"users":[],"source":cta.get("source","")}
                all_ctas[key]["users"].append(email)
                all_ctas[key]["priority"] = max(all_ctas[key]["priority"], cta.get("priority",0))
            ib = profile.get("input_behavior",{})
            for ir in ib.get("intent_ranking",[]):
                k = ir["intent"]
                all_intents[k] = all_intents.get(k,0) + ir["score"]
            for tc in ib.get("topic_clusters",[]):
                k = tc["topic"]
                all_topics[k] = all_topics.get(k,0) + tc["count"]
            cx = ib.get("complexity",{})
            for ck in ["simple","medium","complex"]:
                complexity_total[ck] += cx.get(ck,0)
            total_action_signals += len(ib.get("action_signals",[]))
            for kw in profile.get("keywords",[]):
                k = kw["word"]
                all_keywords[k] = all_keywords.get(k,0) + kw["count"]
            for p in profile.get("personas",[]):
                k = p["type"]
                all_personas[k] = all_personas.get(k,0) + p["score"]
            for i in profile.get("interests",[]):
                k = i["category"]
                all_interests[k] = all_interests.get(k,0) + i["score"]
            total_engagement += profile.get("engagement_score",0)
            summary_line = email.split("@")[0]
            top_cta = profile["cta_recommendations"][0]["label"] if profile.get("cta_recommendations") else "없음"
            top_intent_label = ib["intent_ranking"][0]["intent"] if ib.get("intent_ranking") else "미확인"
            user_profiles.append({"email":email,"engagement":profile.get("engagement_score",0),"top_cta":top_cta,"top_intent":top_intent_label,"action_signals":len(ib.get("action_signals",[])),"input_count":ib.get("total_inputs",0)})
        except Exception as e:
            print(f"[insights] skip {email}: {e}")
            continue
    cta_list = sorted(all_ctas.values(), key=lambda x: len(x["users"]), reverse=True)
    for c in cta_list: c["user_count"] = len(c["users"]); c["users"] = c["users"][:5]
    user_profiles.sort(key=lambda x: x["engagement"], reverse=True)
    return {
        "total_analyzed": len(user_profiles),
        "avg_engagement": round(total_engagement/max(len(user_profiles),1),1),
        "total_action_signals": total_action_signals,
        "complexity": complexity_total,
        "cta_ranking": cta_list[:15],
        "intent_ranking": sorted([{"intent":k,"score":v} for k,v in all_intents.items()], key=lambda x:x["score"], reverse=True)[:12],
        "topic_ranking": sorted([{"topic":k,"count":v} for k,v in all_topics.items()], key=lambda x:x["count"], reverse=True)[:12],
        "keyword_ranking": sorted([{"word":k,"count":v} for k,v in all_keywords.items()], key=lambda x:x["count"], reverse=True)[:20],
        "persona_ranking": sorted([{"type":k,"score":v} for k,v in all_personas.items()], key=lambda x:x["score"], reverse=True)[:8],
        "interest_ranking": sorted([{"category":k,"score":v} for k,v in all_interests.items()], key=lambda x:x["score"], reverse=True)[:10],
        "user_profiles": user_profiles[:30],
    }
@router.get("/admin")
async def admin_page(request: Request):
    admin = request.query_params.get("key", "")
    if admin != ADMIN_EMAIL:
        return HTMLResponse("<h1>🔒 접근 거부</h1><p>?key=관리자이메일 필요</p>", status_code=403)
    for p in [Path(__file__).parent/"admin_dashboard.html", Path(".")/"admin_dashboard.html", Path("/home/user/app")/"admin_dashboard.html"]:
        try:
            if p.exists():
                return HTMLResponse(p.read_text(encoding="utf-8"))
        except: pass
    return HTMLResponse("""<!DOCTYPE html><html><head><meta charset="UTF-8"><title>SiteAgent Admin</title></head><body style="font-family:sans-serif;background:#f5f5f7;padding:40px;text-align:center;">
<h1>❌ admin_dashboard.html 파일 없음</h1>
<p>HF Space 파일 탐색기에 admin_dashboard.html을 업로드하세요.</p>
<hr><h3>API 직접 테스트</h3>
<pre id="r" style="text-align:left;background:#fff;padding:20px;border-radius:8px;max-width:600px;margin:20px auto;font-size:12px;overflow:auto;">테스트 중...</pre>
<script>
async function t(){var o='';
var apis=['/api/admin/dashboard','/api/admin/analytics','/api/admin/all-visits','/api/admin/inputs','/api/admin/waitlist','/api/admin/segments','/api/admin/insights'];
for(var a of apis){try{var r=await fetch(a+'?admin="""+ADMIN_EMAIL+"""');o+=a+' → '+r.status+'\\n';if(r.ok){var d=await r.json();o+=JSON.stringify(d).substring(0,100)+'\\n\\n';}else{var t=await r.text();o+=t.substring(0,100)+'\\n\\n';}}catch(e){o+=a+' → ERROR: '+e.message+'\\n\\n';}}
document.getElementById('r').textContent=o;}t();
</script></body></html>""", status_code=200)
@router.get("/api/admin/all-visits")
async def admin_all_visits(request: Request):
    admin_email = request.query_params.get("admin", "")
    if admin_email != ADMIN_EMAIL:
        return JSONResponse({"error": "unauthorized"}, status_code=403)
    rows = await _db_read(
        "SELECT id,email,url_hash,url,title,domain,visit_count,last_visited_at FROM page_visits ORDER BY last_visited_at DESC LIMIT 100"
    )
    return {"visits": [dict(r) for r in rows]}
@router.get("/api/admin/inputs")
async def admin_inputs(request: Request):
    admin_email = request.query_params.get("admin", "")
    if admin_email != ADMIN_EMAIL:
        return JSONResponse({"error": "unauthorized"}, status_code=403)
    rows = await _db_read("SELECT id,email,feature,input_text,url,created_at FROM user_inputs ORDER BY created_at DESC LIMIT 100")
    return {"inputs": [dict(r) for r in rows]}