"""VNEWS - FastAPI backend with livescore + xemlaibongda highlights + YouTube FPT shorts"""
import hashlib, re, time, subprocess, json, os, threading
import html as html_lib
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from fastapi import FastAPI, Query, Request
from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse, Response
from fastapi.staticfiles import StaticFiles
from urllib.parse import unquote, quote, urlencode
import requests
from bs4 import BeautifulSoup

app = FastAPI()
HEADERS = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36","Accept-Language":"vi-VN,vi;q=0.9,en;q=0.8"}
BONGDA_HEADERS = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36","Accept-Language":"vi-VN,vi;q=0.9","Referer":"https://bongda.com.vn/lich-thi-dau","X-Requested-With":"XMLHttpRequest"}
BASE_BDP = "https://bongdaplus.vn"
SPACE_URL = "https://bep40-vnews.hf.space"
_cache = {}
_cache_ttl = 300
_cache_ttl_live = 60
_cache_ttl_yt = 1800
SHORTS_FALLBACK = [
{"id":"Lu_iCQ5YwNM","title":"Công an lập hồ sơ xử lý người phụ nữ chửi bới, tát nam tài xế ô tô ở Hà Nội | #shorts","channel":"baodantri7941"},
{"id":"CwWvijF8BOA","title":"Chú rể Ninh Bình bật khóc nhận món quà bí mật người cha quá cố gửi 26 năm trước | #shorts","channel":"baodantri7941"},
{"id":"tvPewsc2ph4","title":"Tính năng ẩn trên iPhone giúp giảm mỏi mắt | #shorts","channel":"baodantri7941"},
{"id":"b1Nxzv9ixlU","title":"Y án 3 năm tù với nữ tài xế uống 8 lon bia lái xe tông chủ tịch xã tử vong | #shorts","channel":"baodantri7941"},
{"id":"Xp5eTwAZAis","title":"Người đánh hàng xóm tại chung cư ở Hà Nội bị tuyên hơn 4 tháng tù | #shorts","channel":"baodantri7941"},
{"id":"Htzvwg6iOBM","title":"Xe điện Audi S6 Sportback e-tron có gì đặc biệt? | #shorts","channel":"baodantri7941"},
{"id":"iMdFmWvYdlo","title":"Cô gái người Nga yêu thời trang và đất nước Việt Nam | #shorts","channel":"baodantri7941"},
{"id":"IVaRc6moEv8","title":"Người nông dân Trung Quốc đột quỵ, bệnh viện giúp bán sạch 4 tấn táo | #shorts","channel":"baodantri7941"},
{"id":"uVxqPxToItU","title":"Công an vào cuộc vụ người phụ nữ chửi bới, hành hung tài xế ô tô ở Hà Nội | #shorts","channel":"baodantri7941"},
{"id":"VAfgNNgZDRs","title":"Khởi tố 4 đối tượng ném bom xăng vào nhà dân ở Đồng Nai | #shorts","channel":"baodantri7941"},
{"id":"sBH_-zGh0Xw","title":"Vì sao Times New Roman vẫn nổi tiếng sau hàng chục năm? | #shorts","channel":"baodantri7941"},
{"id":"woKn5f2bLHM","title":"Quảng Ninh ngập sâu diện rộng sau đợt mưa lớn | #shorts","channel":"baodantri7941"},
{"id":"bcpgRoxbLPw","title":"Giông lốc quật bay mái tôn ở TP.HCM | #shorts","channel":"baodantri7941"},
{"id":"ZIIC5osy544","title":"Bé trai Trung Quốc rơi từ tầng 11 vẫn sống sót kỳ diệu | #shorts","channel":"baodantri7941"},
{"id":"uTMJ49NQpyc","title":"Sau lớp mascot 40kg: Câu chuyện mưu sinh của người trẻ ở TPHCM | #shorts","channel":"baodantri7941"},
{"id":"7Pd6vZ2Lz1M","title":"Hành động ấm lòng của người đàn ông tham gia tìm kiếm 5 học sinh tử vong ở sông Lô | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"SlHLt_ZyPiE","title":"Xử phạt người đàn ông xóa số điện thoại cứu hộ trên cao tốc Bắc - Nam | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"IUOprcJyYr4","title":"Phụ nữ táo bón có phải do lười ăn rau? | SKĐS #shorts","channel":"baosuckhoedoisongboyte"},
{"id":"YY8ojFNE-AU","title":"Quái xế tự quay clip nẹt pô, đánh võng đăng TikTok bị xử lý | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"OV7_oGdQGII","title":"Bố cô dâu khóc sụt sùi rồi quẩy cực sung gây bão mạng | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"FoxhFyz2skY","title":"Người đàn ông nước ngoài đập phá ô tô, bẻ cần gạt nước ở Đà Nẵng | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"R1oC_I8dFPU","title":"Thanh niên buông tay lái, đứng trên xe máy khi đổ đèo ở Đắk Lắk | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"U0Ft6ChWAIo","title":"Cô giáo kể phút tháo chạy khỏi xe khách trước khi bị lũ vò nát ở Cao Bằng | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"hH0ANeze_4E","title":"Liên tiếp hàng chục con bò bị sét đánh chết trong ngày mưa dông | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"pXWt0QbAzRQ","title":"Va chạm giao thông, người phụ nữ lăng mạ tài xế ô tô | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"UWWLPY1OYt4","title":"CSGT chặn xe khách khống chế đối tượng cướp dây chuyền tại Gia Lai | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"AxhVTQutsuo","title":"Xuất tinh sớm và những hiểu lầm thường gặp | SKĐS #shorts","channel":"baosuckhoedoisongboyte"},
{"id":"cNy6FgaNxYM","title":"Cô dâu khóc sưng mắt vì 6 chỉ vàng không cánh mà bay trong ngày cưới | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"IDt_S6q59Ro","title":"Chở bạn gái không đội mũ bảo hiểm, thanh niên đấm CSGT | SKĐS","channel":"baosuckhoedoisongboyte"},
{"id":"LFxJ9Ik6W0A","title":"Mệnh lệnh từ trái tim: CSGT Hà Nội mở đường đưa bé 5 tháng tuổi đi cấp cứu | SKĐS","channel":"baosuckhoedoisongboyte"}
]
for _v in SHORTS_FALLBACK:
    _v["link"]="https://www.youtube.com/watch?v="+_v["id"]
    _v["img"]="https://i.ytimg.com/vi/"+_v["id"]+"/hqdefault.jpg"
    _v["source"]="yt"
SHORT_STATS_FILE = "/data/short_stats.json" if os.path.isdir("/data") else "/app/short_stats.json"
_short_lock = threading.Lock()
def _load_short_db():
    try:
        if os.path.exists(SHORT_STATS_FILE):
            with open(SHORT_STATS_FILE,"r",encoding="utf-8") as f:return json.load(f)
    except:pass
    return {}
def _save_short_db(db):
    try:
        os.makedirs(os.path.dirname(SHORT_STATS_FILE),exist_ok=True)
        tmp=SHORT_STATS_FILE+".tmp"
        with open(tmp,"w",encoding="utf-8") as f:json.dump(db,f,ensure_ascii=False)
        os.replace(tmp,SHORT_STATS_FILE)
    except:pass

def _short_default():return {"views":0,"likes":0,"shares":0,"comments":[]}
WALL_FILE = "/data/wall_posts.json" if os.path.isdir("/data") else "/app/wall_posts.json"
def _load_wall():
    try:
        if os.path.exists(WALL_FILE):
            with open(WALL_FILE,"r",encoding="utf-8") as f:return json.load(f)
    except:pass
    return []
def _save_wall(posts):
    try:
        os.makedirs(os.path.dirname(WALL_FILE),exist_ok=True)
        tmp=WALL_FILE+".tmp"
        with open(tmp,"w",encoding="utf-8") as f:json.dump(posts[:100],f,ensure_ascii=False)
        os.replace(tmp,WALL_FILE)
    except:pass
PRIORITY_LEAGUES = ["Ngoại Hạng Anh","FA Cup","Champions League","LaLiga","Copa del Rey","Serie A","Bundesliga","Ligue 1","V-League"]
LEAGUE_IDS = {"nha":27110,"laliga":27233,"seriea":27044,"bundesliga":26891,"ligue1":27212}
HL_LEAGUES = {"premier-league":{"path":"anh/premier-league","name":"Premier League","emoji":"🏴󠁧󠁢󠁥󠁮󠁧󠁿"},"fa-cup":{"path":"anh/fa-cup","name":"FA Cup","emoji":"🏆"},"bundesliga":{"path":"duc/bundesliga","name":"Bundesliga","emoji":"🇩🇪"},"serie-a":{"path":"italy/serie-a","name":"Serie A","emoji":"🇮🇹"},"la-liga":{"path":"tay-ban-nha/la-liga","name":"La Liga","emoji":"🇪🇸"},"champions-league":{"path":"cup-chau-au/uefa-champions-league","name":"Champions League","emoji":"⭐"},"europa-league":{"path":"cup-chau-au/uefa-europa-league","name":"Europa League","emoji":"🟠"},"world-cup":{"path":"the-gioi/world-cup-qualifiers","name":"World Cup 2026","emoji":"🌍"}}
def _cached(key, fn, ttl=None):
    now=time.time();t=ttl or _cache_ttl
    if key in _cache and now-_cache[key]["t"]<t:return _cache[key]["d"]
    try:data=fn()
    except:data=_cache.get(key,{}).get("d",[])
    _cache[key]={"d":data,"t":now};return data
def _get(url,headers=None):
    h=headers or HEADERS;r=requests.get(url,headers=h,timeout=15);r.encoding="utf-8"
    return BeautifulSoup(r.text,"lxml")
def fetch_bongda_api(endpoint):
    try:
        r=requests.get(f"https://bongda.com.vn{endpoint}",headers=BONGDA_HEADERS,timeout=10)
        if r.status_code==200:
            data=r.json()
            if data.get("status")=="success":return data.get("html","")
        return ""
    except:return ""
def _parse_match_from_li(li, status_type="live"):
    match_div=li.select_one("div.match")
    if not match_div:return None
    home_el=match_div.select_one(".home-team .name");away_el=match_div.select_one(".away-team .name")
    if not home_el or not away_el:return None
    status_el=match_div.select_one(".status a");league_el=li.find_previous("strong");time_el=match_div.select_one(".match-time")
    home_logo=match_div.select_one(".home-team .logo img");away_logo=match_div.select_one(".away-team .logo img")
    event_id=""
    if status_el:
        href=status_el.get("href","");m=re.search(r'/tran-dau/(\d+)/',href)
        if m:event_id=m.group(1)
    spans=status_el.find_all("span") if status_el else [];score="";minute=""
    if len(spans)>=3:score=f"{spans[0].get_text(strip=True)} - {spans[2].get_text(strip=True)}"
    if len(spans)>=4:minute=spans[3].get_text(strip=True)
    if not score and status_el and status_el.select_one(".vs"):score="VS"
    league=league_el.get_text(strip=True) if league_el else ""
    return{"home":home_el.get_text(strip=True),"away":away_el.get_text(strip=True),"score":score or"VS","minute":minute,"league":league,"time":time_el.get_text(strip=True) if time_el else "","event_id":event_id,"home_logo":home_logo.get("src","") if home_logo else "","away_logo":away_logo.get("src","") if away_logo else "","status":status_type}

# ===== VIDEO PROXY =====
@app.get("/api/proxy/m3u8")
def proxy_m3u8(url: str = Query(...)):
    try:
        r = requests.get(url, headers=HEADERS, timeout=15)
        if r.status_code != 200:return Response(status_code=502, content="upstream error")
        lines = r.text.strip().split('\n');rewritten = []
        for line in lines:
            if line.startswith('#') or not line.strip():rewritten.append(line)
            else:rewritten.append("/api/proxy/seg?url=" + quote(line.strip(), safe=""))
        return Response(content='\n'.join(rewritten).encode('utf-8'),media_type="application/vnd.apple.mpegurl",headers={"Access-Control-Allow-Origin":"*","Cache-Control":"public, max-age=300"})
    except:return Response(status_code=502, content="proxy error")

@app.get("/api/proxy/seg")
def proxy_segment(url: str = Query(...)):
    try:
        r = requests.get(url, headers=HEADERS, timeout=30)
        if r.status_code != 200:return Response(status_code=502, content="upstream error")
        data = r.content
        if len(data) > 188 and data[0:4] == b'\x89PNG' and data[188] == 0x47:data = data[188:]
        return Response(content=data,media_type="video/mp2t",headers={"Access-Control-Allow-Origin":"*","Cache-Control":"public, max-age=3600"})
    except:return Response(status_code=502, content="proxy error")

@app.get("/api/proxy/video")
def proxy_video(url: str = Query(...), request: Request = None):
    try:
        req_headers = dict(HEADERS)
        if request and request.headers.get("range"):req_headers["Range"] = request.headers["range"]
        r = requests.get(url, headers=req_headers, timeout=30, stream=True)
        resp_headers = {"Access-Control-Allow-Origin":"*","Accept-Ranges":"bytes","Content-Type":r.headers.get("Content-Type","video/mp4")}
        if "Content-Range" in r.headers:resp_headers["Content-Range"] = r.headers["Content-Range"]
        if "Content-Length" in r.headers:resp_headers["Content-Length"] = r.headers["Content-Length"]
        return StreamingResponse(r.iter_content(chunk_size=256*1024),status_code=r.status_code,headers=resp_headers)
    except:return Response(status_code=502, content="proxy error")

@app.get("/api/proxy/img")
def proxy_img(url: str = Query(...)):
    """Proxy images from sources that block hotlinking (DanTri CDN)."""
    try:
        r = requests.get(url, headers={**HEADERS, "Referer": "https://dantri.com.vn/"}, timeout=10)
        if r.status_code != 200:return Response(status_code=502)
        ct = r.headers.get("Content-Type", "image/jpeg")
        return Response(content=r.content, media_type=ct, headers={"Cache-Control": "public, max-age=86400", "Access-Control-Allow-Origin": "*"})
    except:return Response(status_code=502)

# ===== XEMLAIBONGDA HIGHLIGHTS =====
def _scrape_xemlaibongda_page(page_path, limit=20):
    try:
        url = f"https://xemlaibongda.top/{page_path}" if page_path else "https://xemlaibongda.top/"
        r=requests.get(url,headers=HEADERS,timeout=15)
        if r.status_code!=200:return[]
        r.encoding="utf-8";soup=BeautifulSoup(r.text,"lxml");videos=[];seen=set()
        for a in soup.find_all("a",href=True):
            href=a.get("href","")
            if"/video/" not in href:continue
            if not href.startswith("http"):href="https://xemlaibongda.top"+href
            if href in seen:continue
            seen.add(href);slug=href.split("/video/")[-1].rstrip("/")
            title=slug.replace("-"," ").title()
            title=re.sub(r'\d{4}\s*\d{2}\s*\d{2}$','',title).strip()
            title=re.sub(r'\s+V\s+',' vs ',title);title=re.sub(r'\s+Vs\s+',' vs ',title)
            img=a.find("img") or (a.parent.find("img") if a.parent else None)
            img_src=""
            if img:img_src=img.get("data-src","") or img.get("src","") or img.get("data-lazy","")
            if not img_src:img_src=f"https://img.refooty.com/thumbnail/{slug}.webp"
            videos.append({"title":title,"link":href,"img":img_src,"source":"xemlaibongda"})
            if len(videos)>=limit:break
        return videos
    except:return[]

def scrape_xemlaibongda():return _scrape_xemlaibongda_page("",20)
def scrape_highlights_by_league(league_key):
    if league_key not in HL_LEAGUES:return[]
    return _scrape_xemlaibongda_page(HL_LEAGUES[league_key]["path"],20)

def scrape_all_league_highlights():
    results = {}
    def _fetch(key):return key, scrape_highlights_by_league(key)
    with ThreadPoolExecutor(8) as ex:
        futs = [ex.submit(_fetch, k) for k in HL_LEAGUES]
        for f in as_completed(futs):
            try:
                key, vids = f.result()
                if vids:results[key] = vids
            except:pass
    return results

def extract_xemlaibongda_video(url):
    try:
        r=requests.get(url,headers=HEADERS,timeout=15)
        if r.status_code!=200:return None
        r.encoding="utf-8";soup=BeautifulSoup(r.text,"lxml");video=soup.find("video")
        if video:
            src=video.get("src","");poster=video.get("poster","")
            if not src:
                source=video.find("source")
                if source:src=source.get("src","")
            if src:return{"src":src,"poster":poster,"type":"hls" if".m3u8" in src else"video"}
        m3u8s=re.findall(r'(https?://[^\s"\'<>]+\.m3u8)',r.text)
        if m3u8s:
            og=soup.find("meta",property="og:image");poster=og.get("content","") if og else ""
            return{"src":m3u8s[0],"poster":poster,"type":"hls"}
        return None
    except:return None

# ===== YOUTUBE SHORTS =====
def _yt_channel_shorts(channel, count=15):
    """Fast scrape YouTube shorts tab without yt-dlp. Returns newest-first IDs/titles."""
    try:
        url=f"https://www.youtube.com/@{channel}/shorts"
        r=requests.get(url,headers={**HEADERS,"Accept-Language":"vi,en;q=0.8"},timeout=15)
        if r.status_code!=200:return[]
        html=r.text
        ids=[];items=[]
        for m in re.finditer(r'"videoId":"([A-Za-z0-9_-]{11})"',html):
            vid=m.group(1)
            if vid in ids:continue
            ids.append(vid)
            snip=html[max(0,m.start()-900):m.start()+1600]
            title=""
            mt=re.search(r'"title":\{"runs":\[\{"text":"([^"]+)"',snip)
            if not mt:mt=re.search(r'"accessibilityText":"([^"]+)"',snip)
            if mt:title=html_lib.unescape(mt.group(1)).replace('\n',' ').strip()
            if not title:title="YouTube Short"
            items.append({"title":title,"link":f"https://www.youtube.com/watch?v={vid}","img":f"https://i.ytimg.com/vi/{vid}/hqdefault.jpg","source":"yt","id":vid,"channel":channel})
            if len(items)>=count:break
        return items
    except:return[]
def scrape_shorts():
    """Stable shorts feed: fast HTML scrape + static fallback so slide never disappears."""
    vids=[]
    with ThreadPoolExecutor(2) as ex:
        futs=[ex.submit(_yt_channel_shorts,ch,24) for ch in ["baodantri7941","baosuckhoedoisongboyte"]]
        for f in as_completed(futs):
            try:
                r=f.result()
                if r:vids.extend(r)
            except:pass
    merged=[];seen=set()
    for v in vids+SHORTS_FALLBACK:
        vid=v.get("id")
        if not vid or vid in seen:continue
        seen.add(vid);merged.append(v)
    return merged[:40]

# ===== LIVESCORE =====
@app.get("/api/livescore/live")
def api_livescore_live():return JSONResponse({"html":_cached("ls_live",lambda:fetch_bongda_api("/api/fixtures/live"),ttl=_cache_ttl_live)})
@app.get("/api/livescore/incoming")
def api_livescore_incoming():return JSONResponse({"html":_cached("ls_incoming",lambda:fetch_bongda_api("/api/fixtures/incoming"),ttl=_cache_ttl_live)})
@app.get("/api/livescore/today")
def api_livescore_today():
    today=datetime.now().strftime("%Y-%m-%d");return JSONResponse({"html":_cached("ls_today",lambda:fetch_bongda_api(f"/api/fixtures/get-by-date?date={today}"),ttl=_cache_ttl)})
@app.get("/api/livescore/results")
def api_livescore_results():
    today=datetime.now().strftime("%Y-%m-%d");return JSONResponse({"html":_cached("ls_results",lambda:fetch_bongda_api(f"/api/fixtures/get-by-date?date={today}&status=finished"),ttl=_cache_ttl)})
@app.get("/api/livescore/standings/{league}")
def api_livescore_standings(league:str):
    tid=LEAGUE_IDS.get(league,27110);return JSONResponse({"html":_cached(f"ls_bxh_{league}",lambda:fetch_bongda_api(f"/api/league-table/home?tournament_id={tid}&is_detail=True"),ttl=_cache_ttl)})
@app.get("/api/livescore/date/{date}")
def api_livescore_date(date:str):return JSONResponse({"html":fetch_bongda_api(f"/api/fixtures/get-by-date?date={date}")})
@app.get("/api/match/{event_id}/commentaries")
def api_match_commentaries(event_id:int):return JSONResponse({"html":fetch_bongda_api(f"/api/fixtures/commentaries?event_id={event_id}")})
@app.get("/api/match/{event_id}/stats")
def api_match_stats(event_id:int):return JSONResponse({"html":fetch_bongda_api(f"/api/event-standing/player-performance?event_id={event_id}")})
@app.get("/api/livescore/featured")
def api_livescore_featured():
    def _f():
        sources=[("/api/fixtures/live","live"),("/api/fixtures/get-by-date?date="+datetime.now().strftime("%Y-%m-%d"),"today"),("/api/fixtures/incoming","upcoming")]
        for endpoint, stype in sources:
            html=fetch_bongda_api(endpoint)
            if not html or len(html)<100:continue
            soup=BeautifulSoup(html,"lxml");all_matches=[]
            for li in soup.select("li.match-detail"):
                match=_parse_match_from_li(li, stype)
                if not match or not match["event_id"]:continue
                if stype=="today" and "KT" in match.get("minute",""):continue
                all_matches.append(match)
            if not all_matches:continue
            for pl in PRIORITY_LEAGUES:
                for match in all_matches:
                    if pl in match["league"]:return match
            return all_matches[0]
        return None
    return JSONResponse(_cached("ls_featured",_f,ttl=30))

# ===== VIDEO APIs =====
@app.get("/api/shorts")
def api_shorts():return JSONResponse(_cached("yt_shorts_v3",scrape_shorts,ttl=_cache_ttl_yt))
@app.get("/api/short-stats")
def api_short_stats(ids:str=Query(default="")):
    arr=[x for x in ids.split(",") if x]
    with _short_lock:
        db=_load_short_db();out={}
        for vid in arr:
            st=db.get(vid) or _short_default()
            out[vid]={"views":int(st.get("views",0)),"likes":int(st.get("likes",0)),"shares":int(st.get("shares",0)),"comments":st.get("comments",[])[:80]}
    return JSONResponse({"stats":out})

@app.post("/api/short-action")
async def api_short_action(request:Request):
    try:body=await request.json()
    except:body={}
    vid=str(body.get("id","")).strip();action=str(body.get("action","")).strip();txt=str(body.get("text","")).strip()
    if not vid:return JSONResponse({"error":"missing id"},status_code=400)
    with _short_lock:
        db=_load_short_db();st=db.get(vid) or _short_default()
        if action=="view":st["views"]=int(st.get("views",0))+1
        elif action=="like":st["likes"]=int(st.get("likes",0))+1
        elif action=="share":st["shares"]=int(st.get("shares",0))+1
        elif action=="comment" and txt:
            comments=st.get("comments",[])
            comments.insert(0,{"text":txt[:180],"ts":int(time.time())})
            st["comments"]=comments[:80]
        st["updated"]=int(time.time());db[vid]=st;_save_short_db(db)
        out={"views":int(st.get("views",0)),"likes":int(st.get("likes",0)),"shares":int(st.get("shares",0)),"comments":st.get("comments",[])[:80]}
    return JSONResponse({"stats":out})

@app.get("/api/highlights")
def api_highlights():return JSONResponse(_cached("xemlaibongda_hl",scrape_xemlaibongda,ttl=_cache_ttl))
@app.get("/api/highlights/leagues")
def api_highlights_leagues():return JSONResponse(_cached("hl_leagues",scrape_all_league_highlights,ttl=_cache_ttl))
@app.get("/api/highlights/{league}")
def api_highlights_league(league:str):
    if league not in HL_LEAGUES:return JSONResponse({"error":"league not found"})
    return JSONResponse(_cached(f"hl_{league}",lambda:scrape_highlights_by_league(league),ttl=_cache_ttl))
@app.get("/api/highlights_config")
def api_highlights_config():return JSONResponse(HL_LEAGUES)
@app.get("/api/video_url")
def api_video_url(url:str=Query(...)):
    if "youtube.com" in url or "youtu.be" in url:
        m=re.search(r'(?:v=|shorts/|youtu\.be/)([a-zA-Z0-9_-]{11})',url)
        if m:vid=m.group(1);return JSONResponse({"src":f"https://www.youtube.com/embed/{vid}?autoplay=1&rel=0&enablejsapi=1","poster":f"https://i.ytimg.com/vi/{vid}/hqdefault.jpg","type":"youtube"})
    if "xemlaibongda.top" in url:
        v=extract_xemlaibongda_video(url)
        if v:
            if v["type"]=="hls":v["src"]="/api/proxy/m3u8?url="+quote(v["src"],safe="")
            return JSONResponse(v)
    if "bongdaplus.vn" in url:
        try:
            m=re.search(r'-(\d{6,})\.html',url)
            if m:
                r=requests.get(f"{BASE_BDP}/video-embed/{m.group(1)}.html",headers=HEADERS,timeout=10);r.encoding="utf-8"
                soup=BeautifulSoup(r.text,"lxml");video=soup.select_one("video#videoPlayer")
                if video:
                    source=video.find("source");src=source.get("src","") if source else "";poster=video.get("poster","")
                    if src:return JSONResponse({"src":"/api/proxy/video?url="+quote(src,safe=""),"poster":poster,"type":"video"})
        except:pass
    return JSONResponse({"error":"not found"})
@app.get("/api/bdp_videos")
def api_bdp_videos():
    def _f():
        try:
            soup=_get(f"{BASE_BDP}/video");arts=[];seen=set()
            for a in soup.find_all("a",href=True):
                href=a.get("href","")
                if"/video/" not in href or href in("/video/","/video/ban-thang-dep","/video/highlight"):continue
                if not href.startswith("http"):href=BASE_BDP+href
                if href in seen:continue
                title=re.sub(r'^\d{2}:\d{2}','',a.get_text(strip=True)).strip()
                if not title or len(title)<5:continue
                img_tag=a.find("img") or(a.parent.find("img") if a.parent else None)
                img=(img_tag.get("data-src") or img_tag.get("src","")) if img_tag else ""
                seen.add(href);arts.append({"title":title,"link":href,"img":img,"source":"bdp"})
            return arts[:20]
        except:return[]
    return JSONResponse(_cached("bdp_videos",_f))
# ===== NEWS =====
def scrape_vne(cat_url):
    try:
        soup=_get(cat_url);arts=[]
        for it in soup.select("article.item-news")[:15]:
            a=it.select_one("h2.title-news a") or it.select_one("h3.title-news a")
            if not a:continue
            t=a.get("title","") or a.get_text(strip=True);lk=a.get("href","")
            if not t or not lk:continue
            im=it.find("img");img=(im.get("data-src") or im.get("src","")) if im else ""
            if img and'blank'in img:
                src=it.find("source")
                if src:img=src.get("srcset","").split(",")[0].strip().split(" ")[0]
            arts.append({"title":t,"link":lk,"img":img,"source":"vne"})
        return arts
    except:return[]
def scrape_vne_article(url):
    try:
        soup=_get(url);h1=soup.select_one("h1.title-detail");desc=soup.select_one("p.description")
        og=soup.find("meta",property="og:image");og_img=og.get("content","") if og else ""
        cd=soup.select_one("article.fck_detail");body=[]
        if cd:
            for ch in cd.children:
                if not hasattr(ch,'name') or not ch.name:continue
                if ch.name=="p":t=ch.get_text(strip=True);(body.append({"type":"p","text":t}) if t else None)
                elif ch.name=="figure":
                    im=ch.find("img")
                    if im:s=im.get("data-src") or im.get("src","");body.append({"type":"img","src":s})
                elif ch.name in("h2","h3"):body.append({"type":"heading","text":ch.get_text(strip=True)})
        return{"title":h1.get_text(strip=True) if h1 else "","summary":desc.get_text(strip=True) if desc else "","og_image":og_img,"body":body,"source":"vne","url":url}
    except:return None
def _scrape_dantri_homepage(cat_filter=None):
    try:
        soup=_get("https://dantri.com.vn/");arts=[];seen=set()
        for a in soup.find_all("a",href=True):
            href=a.get("href","");title=a.get("title","") or a.get_text(strip=True)
            if not title or len(title)<15 or"javascript:" in href:continue
            if not href.startswith("http"):href="https://dantri.com.vn"+href
            if href in seen or not href.endswith(".htm"):continue
            if cat_filter and f"/{cat_filter}/" not in href:continue
            img_tag=a.find("img")
            if not img_tag and a.parent:img_tag=a.parent.find("img")
            img_src=""
            if img_tag:img_src=img_tag.get("data-src","") or img_tag.get("src","")
            if not img_src or "cdn" not in img_src:continue
            proxied_img="/api/proxy/img?url="+quote(img_src,safe="")
            seen.add(href);arts.append({"title":title,"link":href,"img":proxied_img,"source":"dantri"})
            if len(arts)>=15:break
        return arts
    except:return[]
def scrape_dantri_hot():return _scrape_dantri_homepage()
def scrape_dantri_congnghe():
    try:
        soup=_get("https://dantri.com.vn/");arts=[];seen=set()
        for a in soup.find_all("a",href=True):
            href=a.get("href","");title=a.get("title","") or a.get_text(strip=True)
            if not title or len(title)<15 or"javascript:" in href:continue
            if not href.startswith("http"):href="https://dantri.com.vn"+href
            if href in seen or not href.endswith(".htm"):continue
            if"/cong-nghe/" not in href:continue
            img_tag=a.find("img")
            if not img_tag and a.parent:img_tag=a.parent.find("img")
            img_src=""
            if img_tag:img_src=img_tag.get("data-src","") or img_tag.get("src","")
            if img_src and "cdn" in img_src:img_src="/api/proxy/img?url="+quote(img_src,safe="")
            else:img_src=""
            seen.add(href);arts.append({"title":title,"link":href,"img":img_src,"source":"dantri"})
            if len(arts)>=15:break
        return arts
    except:return[]
def scrape_genk_ai():
    """Scrape AI articles from genk.vn - readable in-app"""
    try:
        r=requests.get("https://genk.vn/ai.chn",headers=HEADERS,timeout=15)
        if r.status_code!=200:return[]
        r.encoding="utf-8";soup=BeautifulSoup(r.text,"lxml")
        articles=[];seen=set()
        for a in soup.find_all("a",href=True):
            href=a.get("href","")
            if not href.endswith(".chn") or href=="/ai.chn":continue
            if href.startswith("/"):href="https://genk.vn"+href
            if href in seen or "genk.vn" not in href:continue
            title=a.get("title","") or a.get_text(strip=True)
            if not title or len(title)<20:continue
            container=a.parent;img_src=""
            for _ in range(6):
                if container is None:break
                for img in container.find_all("img"):
                    s=img.get("data-src","") or img.get("src","")
                    if s and "mediacdn" in s and "avatar" not in s and "logo" not in s:
                        img_src=s;break
                if img_src:break
                container=container.parent
            seen.add(href)
            if not img_src:
                try:
                    og_r=requests.get(href,headers=HEADERS,timeout=8);og_r.encoding="utf-8"
                    og_soup=BeautifulSoup(og_r.text,"lxml");og_tag=og_soup.find("meta",property="og:image")
                    if og_tag:img_src=og_tag.get("content","")
                except:pass
            articles.append({"title":title,"link":href,"img":img_src,"source":"genk"})
            if len(articles)>=30:break
        return articles
    except:return[]

def scrape_dantri_article(url):
    try:
        r=requests.get(url,headers=HEADERS,timeout=15);r.encoding="utf-8";soup=BeautifulSoup(r.text,"lxml")
        for tag in soup.find_all(["script","style","nav","footer","aside"]):tag.decompose()
        h1=soup.find("h1");og=soup.find("meta",property="og:image");og_img=og.get("content","") if og else ""
        if og_img and "cdnphoto.dantri" in og_img:og_img="/api/proxy/img?url="+quote(og_img,safe="")
        content=soup.select_one("main") or soup.select_one("div.singular-content") or soup.select_one("article");body=[]
        if content:
            for el in content.find_all(["p","h2","h3","figure","img"],recursive=True):
                if el.name=="p":t=el.get_text(strip=True);(body.append({"type":"p","text":t}) if t and len(t)>15 else None)
                elif el.name in("h2","h3"):t=el.get_text(strip=True);(body.append({"type":"heading","text":t}) if t else None)
                elif el.name in("figure","img"):
                    im=el if el.name=="img" else el.find("img")
                    if im:
                        s=im.get("data-src") or im.get("src","")
                        if s and"base64" not in s:
                            if "cdnphoto.dantri" in s:s="/api/proxy/img?url="+quote(s,safe="")
                            body.append({"type":"img","src":s})
        desc="";sapo=soup.select_one("h2.singular-sapo") or soup.select_one("h2[class*=sapo]")
        if not sapo:
            og_desc=soup.find("meta",property="og:description")
            if og_desc:desc=og_desc.get("content","")
        else:desc=sapo.get_text(strip=True)
        return{"title":h1.get_text(strip=True) if h1 else "","summary":desc,"og_image":og_img,"body":body,"source":"dantri","url":url}
    except:return None
def scrape_bbc_vietnamese():
    try:
        r=requests.get("https://www.bbc.com/vietnamese",headers={"User-Agent":"Mozilla/5.0","Accept-Language":"en-GB"},timeout=15);r.encoding="utf-8"
        soup=BeautifulSoup(r.text,"lxml");arts=[];seen=set()
        for a in soup.select("a[href*='/vietnamese/']"):
            href=a.get("href","")
            if not href or href=="/vietnamese" or href.count("/")<3:continue
            if not href.startswith("http"):href="https://www.bbc.com"+href
            if href in seen:continue
            title=a.get_text(strip=True)
            if not title or len(title)<15 or any(x in title.lower() for x in["đăng nhập","trang chủ","bbc news"]):continue
            img="";container=a.parent
            for _ in range(3):
                if container:
                    im=container.find("img")
                    if im:img=im.get("src","") or im.get("data-src","");break
                    container=container.parent
            seen.add(href);arts.append({"title":title,"link":href,"img":img,"source":"bbc"})
            if len(arts)>=15:break
        return arts
    except:return[]
def scrape_bbc_article(url):
    try:
        r=requests.get(url,headers={"User-Agent":"Mozilla/5.0","Accept-Language":"en-GB"},timeout=15);r.encoding="utf-8"
        soup=BeautifulSoup(r.text,"lxml");h1=soup.find("h1")
        og=soup.find("meta",property="og:image");og_img=og.get("content","") if og else ""
        body=[]
        for p in soup.select("[data-component='text-block'] p, article p, main p"):
            t=p.get_text(strip=True)
            if t and len(t)>20:body.append({"type":"p","text":t})
        return{"title":h1.get_text(strip=True) if h1 else "","summary":"","og_image":og_img,"body":body,"source":"bbc","url":url}
    except:return None

def scrape_ttvh_worldcup():
    """Scrape all World Cup 2026 articles from The Thao Van Hoa RSS."""
    try:
        r=requests.get("https://thethaovanhoa.vn/rss/world-cup-2026.rss",headers=HEADERS,timeout=15);r.encoding="utf-8"
        soup=BeautifulSoup(r.text,"xml");arts=[];seen=set()
        for it in soup.find_all("item"):
            title=(it.find("title").get_text(strip=True) if it.find("title") else "")
            link=(it.find("link").get_text(strip=True) if it.find("link") else "")
            desc=(it.find("description").get_text(" ",strip=True) if it.find("description") else "")
            img="";ds=BeautifulSoup(desc,"lxml");im=ds.find("img")
            if im:img=im.get("src","") or im.get("data-src","")
            if title and link and link not in seen:
                seen.add(link);arts.append({"title":title,"link":link,"img":img,"source":"ttvh"})
        if arts:return arts
    except:pass
    try:
        soup=_get("https://thethaovanhoa.vn/world-cup-2026.htm");arts=[];seen=set()
        for a in soup.find_all("a",href=True):
            href=a.get("href","")
            if not href.startswith("http"):href="https://thethaovanhoa.vn"+href
            if href in seen or "thethaovanhoa.vn" not in href:continue
            if not re.search(r"/[^/]+-\d{8,}\.htm",href):continue
            title=a.get("title","") or a.get_text(" ",strip=True)
            img=None;p=a
            for _ in range(5):
                if p is None:break
                img=p.find("img")
                if img:break
                p=p.parent
            img_src=""
            if img:
                img_src=img.get("data-src","") or img.get("src","") or img.get("data-original","") or img.get("data-thumb","")
                if len(title)<15:title=img.get("alt","") or img.get("title","") or title
            if not title or len(title)<15:continue
            seen.add(href);arts.append({"title":title,"link":href,"img":img_src,"source":"ttvh"})
            if len(arts)>=24:break
        return arts
    except:return[]

def scrape_ttvh_article(url):
    try:
        soup=_get(url);h1=soup.find("h1");og=soup.find("meta",property="og:image");og_img=og.get("content","") if og else ""
        og_title=soup.find("meta",property="og:title");fallback_title=og_title.get("content","") if og_title else ""
        desc_el=soup.find("meta",property="og:description");desc=desc_el.get("content","") if desc_el else ""
        cd=soup.select_one(".detail-content") or soup.select_one(".content-detail") or soup.select_one("article") or soup.select_one("main")
        body=[]
        if cd:
            for el in cd.find_all(["p","h2","h3","figure","img"],recursive=True):
                if el.name=="p":
                    t=el.get_text(strip=True)
                    if t and len(t)>20 and "Theo dõi" not in t:body.append({"type":"p","text":t})
                elif el.name in ("h2","h3"):
                    t=el.get_text(strip=True)
                    if t:body.append({"type":"heading","text":t})
                elif el.name in ("figure","img"):
                    im=el if el.name=="img" else el.find("img")
                    if im:
                        src=im.get("data-src") or im.get("src","") or im.get("data-original","")
                        if src and "base64" not in src:body.append({"type":"img","src":src})
        if not body and desc:body=[{"type":"p","text":desc}]
        return {"title":h1.get_text(strip=True) if h1 else fallback_title,"summary":desc,"og_image":og_img,"body":body,"source":"ttvh","url":url}
    except:return None

VNE_CATS={"thoi-su":("https://vnexpress.net/thoi-su","Thời Sự"),"the-gioi":("https://vnexpress.net/the-gioi","Thế Giới"),"kinh-doanh":("https://vnexpress.net/kinh-doanh","Kinh Doanh"),"the-thao":("https://vnexpress.net/the-thao","Thể Thao"),"giai-tri":("https://vnexpress.net/giai-tri","Giải Trí"),"suc-khoe":("https://vnexpress.net/suc-khoe","Sức Khỏe"),"phap-luat":("https://vnexpress.net/phap-luat","Pháp Luật"),"giao-duc":("https://vnexpress.net/giao-duc","Giáo Dục"),"du-lich":("https://vnexpress.net/du-lich","Du Lịch"),"doi-song":("https://vnexpress.net/doi-song","Đời Sống")}
@app.get("/api/homepage")
def api_homepage():
    def _f():
        articles=[]
        with ThreadPoolExecutor(12) as ex:
            futs={ex.submit(scrape_vne,VNE_CATS[k][0]):VNE_CATS[k][1] for k in["thoi-su","the-gioi","kinh-doanh","the-thao","giai-tri","phap-luat","giao-duc","du-lich","doi-song"]}
            futs[ex.submit(scrape_bbc_vietnamese)]="BBC"
            for f in as_completed(futs):
                try:
                    for a in f.result():a["group"]=futs[f];articles.append(a)
                except:pass
        return articles
    return JSONResponse(_cached("homepage",_f))
@app.get("/api/category/{cat_id}")
def api_category(cat_id:str):
    def _f():
        if cat_id=="bbc":return scrape_bbc_vietnamese()
        if cat_id=="cong-nghe":return scrape_genk_ai()
        if cat_id in VNE_CATS:arts=scrape_vne(VNE_CATS[cat_id][0]);[a.update({"group":VNE_CATS[cat_id][1]}) for a in arts];return arts
        return[]
    return JSONResponse(_cached(f"cat_{cat_id}",_f))
@app.get("/api/categories")
def api_categories():
    cats=[{"id":"bbc","name":"BBC Tiếng Việt","source":"bbc"},{"id":"cong-nghe","name":"Công Nghệ","source":"genk"}]
    for k,(u,n) in VNE_CATS.items():cats.append({"id":k,"name":n,"source":"vne"})
    return JSONResponse(cats)
@app.get("/api/dantri_hot")
def api_dantri_hot():return JSONResponse(_cached("dantri_hot",scrape_dantri_hot))
@app.get("/api/genk_ai")
def api_genk_ai():return JSONResponse(_cached("genk_ai",scrape_genk_ai,ttl=_cache_ttl))
@app.get("/api/worldcup2026")
def api_worldcup2026():return JSONResponse(_cached("ttvh_worldcup",scrape_ttvh_worldcup,ttl=_cache_ttl))
def scrape_genk_article(url):
    try:
        r=requests.get(url,headers=HEADERS,timeout=15);r.encoding="utf-8";soup=BeautifulSoup(r.text,"lxml")
        h1=soup.find("h1");og=soup.find("meta",property="og:image");og_img=og.get("content","") if og else ""
        og_title=soup.find("meta",property="og:title");fallback_title=og_title.get("content","") if og_title else ""
        desc_el=soup.find("meta",property="og:description");desc=desc_el.get("content","") if desc_el else ""
        cd=soup.select_one(".knc-content");body=[]
        if cd:
            for el in cd.find_all(["p","h2","h3","figure","img"],recursive=True):
                if el.name=="p":t=el.get_text(strip=True);(body.append({"type":"p","text":t}) if t and len(t)>15 else None)
                elif el.name in("h2","h3"):t=el.get_text(strip=True);(body.append({"type":"heading","text":t}) if t else None)
                elif el.name in("figure","img"):
                    im=el if el.name=="img" else el.find("img")
                    if im:s=im.get("data-src") or im.get("src","");(body.append({"type":"img","src":s}) if s and"base64" not in s else None)
        return{"title":h1.get_text(strip=True) if h1 else "","summary":desc,"og_image":og_img,"body":body,"source":"genk","url":url}
    except:return None

@app.get("/api/article")
def api_article(url:str=Query(...)):
    if"vnexpress.net" in url:data=scrape_vne_article(url)
    elif"bbc.com" in url:data=scrape_bbc_article(url)
    elif"dantri.com.vn" in url:data=scrape_dantri_article(url)
    elif"genk.vn" in url:data=scrape_genk_article(url)
    elif"thethaovanhoa.vn" in url:data=scrape_ttvh_article(url)
    else:data=None
    return JSONResponse(data if data else{"error":"not supported"})
def _web_context(topic):
    """Collect real web/news context for a topic."""
    bits=[]
    try:
        rss="https://news.google.com/rss/search?q="+quote(topic)+"&hl=vi&gl=VN&ceid=VN:vi"
        r=requests.get(rss,headers=HEADERS,timeout=12);r.encoding="utf-8"
        soup=BeautifulSoup(r.text,"xml")
        for it in soup.find_all("item")[:8]:
            title=it.find("title").get_text(" ",strip=True) if it.find("title") else ""
            src=it.find("source").get_text(" ",strip=True) if it.find("source") else ""
            if title:bits.append((title+(" — "+src if src else ""))[:280])
    except:pass
    if bits:return "\n".join(bits)
    try:
        r=requests.get("https://html.duckduckgo.com/html/?q="+quote(topic),headers=HEADERS,timeout=12);r.encoding="utf-8"
        soup=BeautifulSoup(r.text,"lxml")
        for res in soup.select(".result")[:6]:
            t=res.select_one(".result__title");sn=res.select_one(".result__snippet")
            line=((t.get_text(" ",strip=True) if t else "")+" — "+(sn.get_text(" ",strip=True) if sn else "")).strip(" —")
            if line:bits.append(line[:280])
    except:pass
    return "\n".join(bits)

def _jina_read(url):
    try:
        ju="https://r.jina.ai/http://"+url
        r=requests.get(ju,headers=HEADERS,timeout=25);r.encoding="utf-8"
        if r.status_code!=200 or not r.text:return None
        lines=[x.rstrip() for x in r.text.splitlines()]
        title="";img="";body=[];summary=""
        for ln in lines[:40]:
            if ln.startswith("Title:"):title=ln.replace("Title:","",1).strip()
            elif ln.startswith("Image:"):img=ln.replace("Image:","",1).strip()
            elif ln.startswith("Description:"):summary=ln.replace("Description:","",1).strip()
        for ln in lines:
            t=ln.strip()
            if not t or t.startswith(("Title:","URL Source:","Published Time:","Markdown Content:","Image:","Description:")):continue
            if len(t)>40:body.append({"type":"p","text":t})
        if not body and summary:body=[{"type":"p","text":summary}]
        return {"title":title or url,"summary":summary,"og_image":img,"body":body[:80],"source":"jina","url":url}
    except:return None

def _scrape_generic_article(url):
    try:
        hdr={**HEADERS,"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"}
        r=requests.get(url,headers=hdr,timeout=15);r.encoding="utf-8"
        ct=r.headers.get("content-type","").lower()
        if r.status_code>=400 or "text/html" not in ct:
            jr=_jina_read(url)
            if jr:return jr
        soup=BeautifulSoup(r.text,"lxml")
        for tag in soup.find_all(["script","style","nav","footer","aside","form"]):tag.decompose()
        h1=soup.find("h1")
        ogt=soup.find("meta",property="og:title");title=h1.get_text(strip=True) if h1 else (ogt.get("content","") if ogt else "")
        ogd=soup.find("meta",property="og:description");desc=ogd.get("content","") if ogd else ""
        ogi=soup.find("meta",property="og:image");img=ogi.get("content","") if ogi else ""
        main=soup.find("article") or soup.find("main") or soup.body
        body=[]
        if main:
            for el in main.find_all(["p","h2","h3","figure","img"],recursive=True):
                if el.name=="p":
                    t=el.get_text(" ",strip=True)
                    if t and len(t)>35:body.append({"type":"p","text":t})
                elif el.name in ("h2","h3"):
                    t=el.get_text(" ",strip=True)
                    if t:body.append({"type":"heading","text":t})
                elif el.name in ("figure","img"):
                    im=el if el.name=="img" else el.find("img")
                    if im:
                        src=im.get("data-src") or im.get("src","") or im.get("data-original","")
                        if src and "base64" not in src:body.append({"type":"img","src":src})
        if not body:
            jr=_jina_read(url)
            if jr and jr.get("body"):return jr
        if not body and desc:body=[{"type":"p","text":desc}]
        return {"title":title or url,"summary":desc,"og_image":img,"body":body,"source":"generic","url":url}
    except:
        return _jina_read(url)

def _article_by_url(url):
    if "vnexpress.net" in url:return scrape_vne_article(url)
    if "bbc.com" in url:return scrape_bbc_article(url)
    if "dantri.com.vn" in url:return scrape_dantri_article(url)
    if "genk.vn" in url:return scrape_genk_article(url)
    if "thethaovanhoa.vn" in url:return scrape_ttvh_article(url)
    return _scrape_generic_article(url)

def _call_qwen(prompt, max_tokens=1800):
    """Try Qwen2.5-VL via HF router; return None if unavailable."""
    try:
        token=os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN") or os.environ.get("VAISTUDIO")
        if not token:return None
        headers={"Authorization":"Bearer "+token,"Content-Type":"application/json"}
        payload={"model":"Qwen/Qwen2.5-VL-7B-Instruct","messages":[{"role":"user","content":prompt}],"max_tokens":max_tokens,"temperature":0.7}
        r=requests.post("https://router.huggingface.co/v1/chat/completions",headers=headers,json=payload,timeout=75)
        if r.status_code>=300:return None
        j=r.json();return j.get("choices",[{}])[0].get("message",{}).get("content")
    except:return None

def _collect_article_text(data, limit=28000):
    title=(data or {}).get("title","");summary=(data or {}).get("summary","")
    parts=[]
    if summary:parts.append(summary)
    for b in (data or {}).get("body",[]):
        if b.get("type")=="heading":parts.append("## "+b.get("text","") )
        elif b.get("type")=="p":parts.append(b.get("text","") )
    text="\n".join([p.strip() for p in parts if p and p.strip()])
    return title,text[:limit]

def _ai_rewrite_article(data,tone="tu-nhien"):
    title,text=_collect_article_text(data)
    prompt=("Bạn là biên tập viên báo điện tử tiếng Việt. Hãy viết lại bài dưới đây bằng ngôn ngữ tự nhiên, mạch lạc, không cắt khúc, không bỏ ý quan trọng. "
            "Giữ đúng sự thật, không bịa, không thêm thông tin ngoài bài. Văn phong: "+tone+". "
            "Đầu ra gồm: tiêu đề hấp dẫn, đoạn sapo 2-3 câu, các đoạn nội dung ngắn dễ đọc, và 3 gạch đầu dòng điểm chính.\n\n"
            "TIÊU ĐỀ GỐC: "+title+"\n\nNỘI DUNG GỐC:\n"+text)
    out=_call_qwen(prompt,2200)
    if out and len(out)>300:return out.strip()
    # Fallback: complete non-truncated rewrite using full collected text chunks
    paras=[p.strip() for p in text.split("\n") if len(p.strip())>30]
    body="\n\n".join(paras[:18])
    bullets="\n".join(["• "+p[:220]+("..." if len(p)>220 else "") for p in paras[:5]])
    return ("Bản tin AI viết lại: "+title+"\n\n"+
            (paras[0] if paras else "")+"\n\n"+body+"\n\nĐiểm chính:\n"+bullets).strip()

def _image_for_topic(topic):
    return "https://image.pollinations.ai/prompt/"+quote("editorial illustration, Vietnamese news, "+topic,safe="")+"?width=1024&height=576&nologo=true"

def _topic_articles(topic,limit=5):
    items=[];seen=set()
    try:
        rss="https://news.google.com/rss/search?q="+quote(topic)+"&hl=vi&gl=VN&ceid=VN:vi"
        r=requests.get(rss,headers=HEADERS,timeout=12);r.encoding="utf-8"
        soup=BeautifulSoup(r.text,"xml")
        for it in soup.find_all("item")[:limit*3]:
            title=it.find("title").get_text(" ",strip=True) if it.find("title") else ""
            link=it.find("link").get_text(strip=True) if it.find("link") else ""
            src=it.find("source").get_text(" ",strip=True) if it.find("source") else ""
            if not title or not link or link in seen:continue
            seen.add(link);items.append({"title":title,"link":link,"source":src})
            if len(items)>=limit:break
    except:pass
    return items

def _topic_article_context(topic):
    """Filter readable article sources by topic, then summarize actual article bodies."""
    raw_keys=[k.lower() for k in re.findall(r"[\wÀ-ỹ]+",topic) if len(k)>2]
    # Drop ultra-generic tokens; keep domain words such as giáo/dục, bóng/đá, world/cup.
    stop={"trong","năm","the","and","của","cho","với","một","các","những","hiện","nay"}
    keys=[k for k in raw_keys if k not in stop]
    candidates=[];seen=set()
    def add_items(items):
        for a in items or []:
            link=a.get("link","");title=a.get("title","")
            if not link or link in seen:continue
            seen.add(link);candidates.append(a)
    try:add_items(scrape_genk_ai())
    except:pass
    try:add_items(scrape_dantri_congnghe())
    except:pass
    try:add_items(scrape_ttvh_worldcup())
    except:pass
    scored=[];img=""
    for a in candidates[:40]:
        data=_article_by_url(a.get("link",""))
        if not data or not data.get("body"):continue
        title=data.get("title") or a.get("title","")
        ps=[b.get("text","") for b in data.get("body",[]) if b.get("type")=="p" and len(b.get("text",""))>40]
        excerpt=" ".join(ps)[:1800] or data.get("summary","")
        hay=(title+" "+excerpt).lower()
        score=sum(1 for k in keys if k in hay)
        # Require topic relevance when we have meaningful keys.
        if keys and score==0:continue
        if len(keys)>=2 and score<2 and not any(" ".join(keys[i:i+2]) in hay for i in range(len(keys)-1)):continue
        scored.append((score,title,a.get("link",""),excerpt,data.get("og_image") or a.get("img","") or ""))
    scored=sorted(scored,key=lambda x:x[0],reverse=True)[:5]
    chunks=[]
    for score,title,link,excerpt,im in scored:
        if not img and im:img=im
        chunks.append("BÀI: "+title+"\nURL: "+link+"\nNỘI DUNG LỌC: "+excerpt)
    if chunks:return "\n\n".join(chunks),img
    return _web_context(topic),""

def _topic_post_text(topic):
    ctx,img=_topic_article_context(topic)
    prompt=("Bạn là cây bút báo điện tử tiếng Việt. Hãy lọc các thông tin thực tế trong những nguồn dưới đây để viết một bài tóm tắt theo chủ đề: "+topic+
            ". Không viết chung chung. Chỉ dùng dữ kiện có trong nguồn; nếu nguồn khác nhau thì tổng hợp khách quan. "
            "Đầu ra gồm: tiêu đề, sapo, các ý chính theo bullet, phần phân tích ngắn và kết luận.\n\nNGUỒN THỰC TẾ:\n"+ctx)
    out=_call_qwen(prompt,1800)
    if out and len(out)>300:return out.strip()
    if ctx:
        return "Bài tóm tắt theo chủ đề: "+topic+"\n\nDữ liệu thực tế đã lọc:\n"+ctx[:3500]+"\n\nTóm tắt: Các nguồn trên cho thấy chủ đề này đang có nhiều diễn biến đáng chú ý. Khi viết bài, nên nêu rõ bối cảnh, các điểm mới, tác động thực tế và những điều còn cần kiểm chứng."
    return "Chưa thu thập được dữ liệu đủ rõ cho chủ đề: "+topic

@app.get("/api/wall")
def api_wall():return JSONResponse({"posts":_load_wall()[:50]})

@app.post("/api/rewrite_share")
async def api_rewrite_share(request:Request):
    try:body=await request.json()
    except:body={}
    url=str(body.get("url","")).strip();tone=str(body.get("tone","tu-nhien")).strip()
    if not url:return JSONResponse({"error":"missing url"},status_code=400)
    data=_article_by_url(url)
    if not data or not data.get("title") or (not data.get("body") and not data.get("summary")):
        return JSONResponse({"error":"Không đọc được bài viết"},status_code=422)
    post={"id":hashlib.md5((url+str(time.time())).encode()).hexdigest()[:12],"url":url,"title":data.get("title",""),"img":data.get("og_image","") or "","text":_ai_rewrite_article(data,tone),"ts":int(time.time()),"source":data.get("source","")}
    posts=_load_wall();posts.insert(0,post);_save_wall(posts)
    return JSONResponse({"post":post})

@app.post("/api/topic_post")
async def api_topic_post(request:Request):
    try:body=await request.json()
    except:body={}
    topic=str(body.get("topic","")).strip()
    if not topic:return JSONResponse({"error":"missing topic"},status_code=400)
    ctx_img=_topic_article_context(topic)[1]
    post={"id":hashlib.md5((topic+str(time.time())).encode()).hexdigest()[:12],"url":"","title":topic,"img":ctx_img or _image_for_topic(topic),"text":_topic_post_text(topic),"ts":int(time.time()),"source":"ai-topic"}
    posts=_load_wall();posts.insert(0,post);_save_wall(posts)
    return JSONResponse({"post":post})

@app.post("/api/url_wall")
async def api_url_wall(request:Request):
    try:body=await request.json()
    except:body={}
    url=str(body.get("url","")).strip()
    if not url:return JSONResponse({"error":"missing url"},status_code=400)
    data=_article_by_url(url)
    if not data or not data.get("title"):
        return JSONResponse({"error":"Không đọc được URL"},status_code=422)
    post={"id":hashlib.md5((url+str(time.time())).encode()).hexdigest()[:12],"url":url,"title":data.get("title",""),"img":data.get("og_image","") or "","text":_ai_rewrite_article(data,"ngan-gon-tu-nhien"),"ts":int(time.time()),"source":data.get("source","")}
    posts=_load_wall();posts.insert(0,post);_save_wall(posts)
    return JSONResponse({"post":post})

@app.get("/v")
async def video_share(url:str=Query(default=""),title:str=Query(default="VNEWS Video"),img:str=Query(default=""),type:str=Query(default="highlights")):
    decoded_url=unquote(url);decoded_title=unquote(title)
    redirect_script=f'<script>localStorage.setItem("pending_video",JSON.stringify({{"url":"{decoded_url}","type":"{type}"}}));location.href="{SPACE_URL}";</script>' if decoded_url else f'<script>location.href="{SPACE_URL}";</script>'
    return HTMLResponse(f'<!DOCTYPE html><html><head><meta charset="utf-8"><title>{decoded_title}</title></head><body style="background:#111;color:#fff;text-align:center;padding:40px"><p>⏳</p>{redirect_script}</body></html>')
@app.get("/s")
async def share_redirect(url:str=Query(default=""),title:str=Query(default="VNEWS"),img:str=Query(default="")):
    decoded_url=unquote(url)
    redirect_script=f'<script>localStorage.setItem("pending_article","{decoded_url}");location.href="{SPACE_URL}";</script>' if decoded_url else f'<script>location.href="{SPACE_URL}";</script>'
    return HTMLResponse(f'<!DOCTYPE html><html><head><meta charset="utf-8"><title>{unquote(title)}</title></head><body>{redirect_script}</body></html>')
@app.get("/")
async def index():
    with open("/app/static/index.html","r",encoding="utf-8") as f:return HTMLResponse(content=f.read())
app.mount("/static",StaticFiles(directory="/app/static"),name="static")