"""VNEWS - FastAPI backend with livescore + xemlaibongda highlights + YouTube FPT shorts""" import hashlib, re, time, subprocess, json, os, threading import html as html_lib from datetime import datetime from concurrent.futures import ThreadPoolExecutor, as_completed from fastapi import FastAPI, Query, Request from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse, Response from fastapi.staticfiles import StaticFiles from urllib.parse import unquote, quote, urlencode import requests from bs4 import BeautifulSoup app = FastAPI() HEADERS = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36","Accept-Language":"vi-VN,vi;q=0.9,en;q=0.8"} BONGDA_HEADERS = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36","Accept-Language":"vi-VN,vi;q=0.9","Referer":"https://bongda.com.vn/lich-thi-dau","X-Requested-With":"XMLHttpRequest"} BASE_BDP = "https://bongdaplus.vn" SPACE_URL = "https://bep40-vnews.hf.space" _cache = {} _cache_ttl = 300 _cache_ttl_live = 60 _cache_ttl_yt = 1800 SHORTS_FALLBACK = [ {"id":"Lu_iCQ5YwNM","title":"Công an lập hồ sơ xử lý người phụ nữ chửi bới, tát nam tài xế ô tô ở Hà Nội | #shorts","channel":"baodantri7941"}, {"id":"CwWvijF8BOA","title":"Chú rể Ninh Bình bật khóc nhận món quà bí mật người cha quá cố gửi 26 năm trước | #shorts","channel":"baodantri7941"}, {"id":"tvPewsc2ph4","title":"Tính năng ẩn trên iPhone giúp giảm mỏi mắt | #shorts","channel":"baodantri7941"}, {"id":"b1Nxzv9ixlU","title":"Y án 3 năm tù với nữ tài xế uống 8 lon bia lái xe tông chủ tịch xã tử vong | #shorts","channel":"baodantri7941"}, {"id":"Xp5eTwAZAis","title":"Người đánh hàng xóm tại chung cư ở Hà Nội bị tuyên hơn 4 tháng tù | #shorts","channel":"baodantri7941"}, {"id":"Htzvwg6iOBM","title":"Xe điện Audi S6 Sportback e-tron có gì đặc biệt? | #shorts","channel":"baodantri7941"}, {"id":"iMdFmWvYdlo","title":"Cô gái người Nga yêu thời trang và đất nước Việt Nam | #shorts","channel":"baodantri7941"}, {"id":"IVaRc6moEv8","title":"Người nông dân Trung Quốc đột quỵ, bệnh viện giúp bán sạch 4 tấn táo | #shorts","channel":"baodantri7941"}, {"id":"uVxqPxToItU","title":"Công an vào cuộc vụ người phụ nữ chửi bới, hành hung tài xế ô tô ở Hà Nội | #shorts","channel":"baodantri7941"}, {"id":"VAfgNNgZDRs","title":"Khởi tố 4 đối tượng ném bom xăng vào nhà dân ở Đồng Nai | #shorts","channel":"baodantri7941"}, {"id":"sBH_-zGh0Xw","title":"Vì sao Times New Roman vẫn nổi tiếng sau hàng chục năm? | #shorts","channel":"baodantri7941"}, {"id":"woKn5f2bLHM","title":"Quảng Ninh ngập sâu diện rộng sau đợt mưa lớn | #shorts","channel":"baodantri7941"}, {"id":"bcpgRoxbLPw","title":"Giông lốc quật bay mái tôn ở TP.HCM | #shorts","channel":"baodantri7941"}, {"id":"ZIIC5osy544","title":"Bé trai Trung Quốc rơi từ tầng 11 vẫn sống sót kỳ diệu | #shorts","channel":"baodantri7941"}, {"id":"uTMJ49NQpyc","title":"Sau lớp mascot 40kg: Câu chuyện mưu sinh của người trẻ ở TPHCM | #shorts","channel":"baodantri7941"}, {"id":"7Pd6vZ2Lz1M","title":"Hành động ấm lòng của người đàn ông tham gia tìm kiếm 5 học sinh tử vong ở sông Lô | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"SlHLt_ZyPiE","title":"Xử phạt người đàn ông xóa số điện thoại cứu hộ trên cao tốc Bắc - Nam | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"IUOprcJyYr4","title":"Phụ nữ táo bón có phải do lười ăn rau? | SKĐS #shorts","channel":"baosuckhoedoisongboyte"}, {"id":"YY8ojFNE-AU","title":"Quái xế tự quay clip nẹt pô, đánh võng đăng TikTok bị xử lý | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"OV7_oGdQGII","title":"Bố cô dâu khóc sụt sùi rồi quẩy cực sung gây bão mạng | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"FoxhFyz2skY","title":"Người đàn ông nước ngoài đập phá ô tô, bẻ cần gạt nước ở Đà Nẵng | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"R1oC_I8dFPU","title":"Thanh niên buông tay lái, đứng trên xe máy khi đổ đèo ở Đắk Lắk | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"U0Ft6ChWAIo","title":"Cô giáo kể phút tháo chạy khỏi xe khách trước khi bị lũ vò nát ở Cao Bằng | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"hH0ANeze_4E","title":"Liên tiếp hàng chục con bò bị sét đánh chết trong ngày mưa dông | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"pXWt0QbAzRQ","title":"Va chạm giao thông, người phụ nữ lăng mạ tài xế ô tô | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"UWWLPY1OYt4","title":"CSGT chặn xe khách khống chế đối tượng cướp dây chuyền tại Gia Lai | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"AxhVTQutsuo","title":"Xuất tinh sớm và những hiểu lầm thường gặp | SKĐS #shorts","channel":"baosuckhoedoisongboyte"}, {"id":"cNy6FgaNxYM","title":"Cô dâu khóc sưng mắt vì 6 chỉ vàng không cánh mà bay trong ngày cưới | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"IDt_S6q59Ro","title":"Chở bạn gái không đội mũ bảo hiểm, thanh niên đấm CSGT | SKĐS","channel":"baosuckhoedoisongboyte"}, {"id":"LFxJ9Ik6W0A","title":"Mệnh lệnh từ trái tim: CSGT Hà Nội mở đường đưa bé 5 tháng tuổi đi cấp cứu | SKĐS","channel":"baosuckhoedoisongboyte"} ] for _v in SHORTS_FALLBACK: _v["link"]="https://www.youtube.com/watch?v="+_v["id"] _v["img"]="https://i.ytimg.com/vi/"+_v["id"]+"/hqdefault.jpg" _v["source"]="yt" SHORT_STATS_FILE = "/data/short_stats.json" if os.path.isdir("/data") else "/app/short_stats.json" _short_lock = threading.Lock() def _load_short_db(): try: if os.path.exists(SHORT_STATS_FILE): with open(SHORT_STATS_FILE,"r",encoding="utf-8") as f:return json.load(f) except:pass return {} def _save_short_db(db): try: os.makedirs(os.path.dirname(SHORT_STATS_FILE),exist_ok=True) tmp=SHORT_STATS_FILE+".tmp" with open(tmp,"w",encoding="utf-8") as f:json.dump(db,f,ensure_ascii=False) os.replace(tmp,SHORT_STATS_FILE) except:pass def _short_default():return {"views":0,"likes":0,"shares":0,"comments":[]} WALL_FILE = "/data/wall_posts.json" if os.path.isdir("/data") else "/app/wall_posts.json" def _load_wall(): try: if os.path.exists(WALL_FILE): with open(WALL_FILE,"r",encoding="utf-8") as f:return json.load(f) except:pass return [] def _save_wall(posts): try: os.makedirs(os.path.dirname(WALL_FILE),exist_ok=True) tmp=WALL_FILE+".tmp" with open(tmp,"w",encoding="utf-8") as f:json.dump(posts[:100],f,ensure_ascii=False) os.replace(tmp,WALL_FILE) except:pass PRIORITY_LEAGUES = ["Ngoại Hạng Anh","FA Cup","Champions League","LaLiga","Copa del Rey","Serie A","Bundesliga","Ligue 1","V-League"] LEAGUE_IDS = {"nha":27110,"laliga":27233,"seriea":27044,"bundesliga":26891,"ligue1":27212} HL_LEAGUES = {"premier-league":{"path":"anh/premier-league","name":"Premier League","emoji":"🏴󠁧󠁢󠁥󠁮󠁧󠁿"},"fa-cup":{"path":"anh/fa-cup","name":"FA Cup","emoji":"🏆"},"bundesliga":{"path":"duc/bundesliga","name":"Bundesliga","emoji":"🇩🇪"},"serie-a":{"path":"italy/serie-a","name":"Serie A","emoji":"🇮🇹"},"la-liga":{"path":"tay-ban-nha/la-liga","name":"La Liga","emoji":"🇪🇸"},"champions-league":{"path":"cup-chau-au/uefa-champions-league","name":"Champions League","emoji":"⭐"},"europa-league":{"path":"cup-chau-au/uefa-europa-league","name":"Europa League","emoji":"🟠"},"world-cup":{"path":"the-gioi/world-cup-qualifiers","name":"World Cup 2026","emoji":"🌍"}} def _cached(key, fn, ttl=None): now=time.time();t=ttl or _cache_ttl if key in _cache and now-_cache[key]["t"]=3:score=f"{spans[0].get_text(strip=True)} - {spans[2].get_text(strip=True)}" if len(spans)>=4:minute=spans[3].get_text(strip=True) if not score and status_el and status_el.select_one(".vs"):score="VS" league=league_el.get_text(strip=True) if league_el else "" return{"home":home_el.get_text(strip=True),"away":away_el.get_text(strip=True),"score":score or"VS","minute":minute,"league":league,"time":time_el.get_text(strip=True) if time_el else "","event_id":event_id,"home_logo":home_logo.get("src","") if home_logo else "","away_logo":away_logo.get("src","") if away_logo else "","status":status_type} # ===== VIDEO PROXY ===== @app.get("/api/proxy/m3u8") def proxy_m3u8(url: str = Query(...)): try: r = requests.get(url, headers=HEADERS, timeout=15) if r.status_code != 200:return Response(status_code=502, content="upstream error") lines = r.text.strip().split('\n');rewritten = [] for line in lines: if line.startswith('#') or not line.strip():rewritten.append(line) else:rewritten.append("/api/proxy/seg?url=" + quote(line.strip(), safe="")) return Response(content='\n'.join(rewritten).encode('utf-8'),media_type="application/vnd.apple.mpegurl",headers={"Access-Control-Allow-Origin":"*","Cache-Control":"public, max-age=300"}) except:return Response(status_code=502, content="proxy error") @app.get("/api/proxy/seg") def proxy_segment(url: str = Query(...)): try: r = requests.get(url, headers=HEADERS, timeout=30) if r.status_code != 200:return Response(status_code=502, content="upstream error") data = r.content if len(data) > 188 and data[0:4] == b'\x89PNG' and data[188] == 0x47:data = data[188:] return Response(content=data,media_type="video/mp2t",headers={"Access-Control-Allow-Origin":"*","Cache-Control":"public, max-age=3600"}) except:return Response(status_code=502, content="proxy error") @app.get("/api/proxy/video") def proxy_video(url: str = Query(...), request: Request = None): try: req_headers = dict(HEADERS) if request and request.headers.get("range"):req_headers["Range"] = request.headers["range"] r = requests.get(url, headers=req_headers, timeout=30, stream=True) resp_headers = {"Access-Control-Allow-Origin":"*","Accept-Ranges":"bytes","Content-Type":r.headers.get("Content-Type","video/mp4")} if "Content-Range" in r.headers:resp_headers["Content-Range"] = r.headers["Content-Range"] if "Content-Length" in r.headers:resp_headers["Content-Length"] = r.headers["Content-Length"] return StreamingResponse(r.iter_content(chunk_size=256*1024),status_code=r.status_code,headers=resp_headers) except:return Response(status_code=502, content="proxy error") @app.get("/api/proxy/img") def proxy_img(url: str = Query(...)): """Proxy images from sources that block hotlinking (DanTri CDN).""" try: r = requests.get(url, headers={**HEADERS, "Referer": "https://dantri.com.vn/"}, timeout=10) if r.status_code != 200:return Response(status_code=502) ct = r.headers.get("Content-Type", "image/jpeg") return Response(content=r.content, media_type=ct, headers={"Cache-Control": "public, max-age=86400", "Access-Control-Allow-Origin": "*"}) except:return Response(status_code=502) # ===== XEMLAIBONGDA HIGHLIGHTS ===== def _scrape_xemlaibongda_page(page_path, limit=20): try: url = f"https://xemlaibongda.top/{page_path}" if page_path else "https://xemlaibongda.top/" r=requests.get(url,headers=HEADERS,timeout=15) if r.status_code!=200:return[] r.encoding="utf-8";soup=BeautifulSoup(r.text,"lxml");videos=[];seen=set() for a in soup.find_all("a",href=True): href=a.get("href","") if"/video/" not in href:continue if not href.startswith("http"):href="https://xemlaibongda.top"+href if href in seen:continue seen.add(href);slug=href.split("/video/")[-1].rstrip("/") title=slug.replace("-"," ").title() title=re.sub(r'\d{4}\s*\d{2}\s*\d{2}$','',title).strip() title=re.sub(r'\s+V\s+',' vs ',title);title=re.sub(r'\s+Vs\s+',' vs ',title) img=a.find("img") or (a.parent.find("img") if a.parent else None) img_src="" if img:img_src=img.get("data-src","") or img.get("src","") or img.get("data-lazy","") if not img_src:img_src=f"https://img.refooty.com/thumbnail/{slug}.webp" videos.append({"title":title,"link":href,"img":img_src,"source":"xemlaibongda"}) if len(videos)>=limit:break return videos except:return[] def scrape_xemlaibongda():return _scrape_xemlaibongda_page("",20) def scrape_highlights_by_league(league_key): if league_key not in HL_LEAGUES:return[] return _scrape_xemlaibongda_page(HL_LEAGUES[league_key]["path"],20) def scrape_all_league_highlights(): results = {} def _fetch(key):return key, scrape_highlights_by_league(key) with ThreadPoolExecutor(8) as ex: futs = [ex.submit(_fetch, k) for k in HL_LEAGUES] for f in as_completed(futs): try: key, vids = f.result() if vids:results[key] = vids except:pass return results def extract_xemlaibongda_video(url): try: r=requests.get(url,headers=HEADERS,timeout=15) if r.status_code!=200:return None r.encoding="utf-8";soup=BeautifulSoup(r.text,"lxml");video=soup.find("video") if video: src=video.get("src","");poster=video.get("poster","") if not src: source=video.find("source") if source:src=source.get("src","") if src:return{"src":src,"poster":poster,"type":"hls" if".m3u8" in src else"video"} m3u8s=re.findall(r'(https?://[^\s"\'<>]+\.m3u8)',r.text) if m3u8s: og=soup.find("meta",property="og:image");poster=og.get("content","") if og else "" return{"src":m3u8s[0],"poster":poster,"type":"hls"} return None except:return None # ===== YOUTUBE SHORTS ===== def _yt_channel_shorts(channel, count=15): """Fast scrape YouTube shorts tab without yt-dlp. Returns newest-first IDs/titles.""" try: url=f"https://www.youtube.com/@{channel}/shorts" r=requests.get(url,headers={**HEADERS,"Accept-Language":"vi,en;q=0.8"},timeout=15) if r.status_code!=200:return[] html=r.text ids=[];items=[] for m in re.finditer(r'"videoId":"([A-Za-z0-9_-]{11})"',html): vid=m.group(1) if vid in ids:continue ids.append(vid) snip=html[max(0,m.start()-900):m.start()+1600] title="" mt=re.search(r'"title":\{"runs":\[\{"text":"([^"]+)"',snip) if not mt:mt=re.search(r'"accessibilityText":"([^"]+)"',snip) if mt:title=html_lib.unescape(mt.group(1)).replace('\n',' ').strip() if not title:title="YouTube Short" items.append({"title":title,"link":f"https://www.youtube.com/watch?v={vid}","img":f"https://i.ytimg.com/vi/{vid}/hqdefault.jpg","source":"yt","id":vid,"channel":channel}) if len(items)>=count:break return items except:return[] def scrape_shorts(): """Stable shorts feed: fast HTML scrape + static fallback so slide never disappears.""" vids=[] with ThreadPoolExecutor(2) as ex: futs=[ex.submit(_yt_channel_shorts,ch,24) for ch in ["baodantri7941","baosuckhoedoisongboyte"]] for f in as_completed(futs): try: r=f.result() if r:vids.extend(r) except:pass merged=[];seen=set() for v in vids+SHORTS_FALLBACK: vid=v.get("id") if not vid or vid in seen:continue seen.add(vid);merged.append(v) return merged[:40] # ===== LIVESCORE ===== @app.get("/api/livescore/live") def api_livescore_live():return JSONResponse({"html":_cached("ls_live",lambda:fetch_bongda_api("/api/fixtures/live"),ttl=_cache_ttl_live)}) @app.get("/api/livescore/incoming") def api_livescore_incoming():return JSONResponse({"html":_cached("ls_incoming",lambda:fetch_bongda_api("/api/fixtures/incoming"),ttl=_cache_ttl_live)}) @app.get("/api/livescore/today") def api_livescore_today(): today=datetime.now().strftime("%Y-%m-%d");return JSONResponse({"html":_cached("ls_today",lambda:fetch_bongda_api(f"/api/fixtures/get-by-date?date={today}"),ttl=_cache_ttl)}) @app.get("/api/livescore/results") def api_livescore_results(): today=datetime.now().strftime("%Y-%m-%d");return JSONResponse({"html":_cached("ls_results",lambda:fetch_bongda_api(f"/api/fixtures/get-by-date?date={today}&status=finished"),ttl=_cache_ttl)}) @app.get("/api/livescore/standings/{league}") def api_livescore_standings(league:str): tid=LEAGUE_IDS.get(league,27110);return JSONResponse({"html":_cached(f"ls_bxh_{league}",lambda:fetch_bongda_api(f"/api/league-table/home?tournament_id={tid}&is_detail=True"),ttl=_cache_ttl)}) @app.get("/api/livescore/date/{date}") def api_livescore_date(date:str):return JSONResponse({"html":fetch_bongda_api(f"/api/fixtures/get-by-date?date={date}")}) @app.get("/api/match/{event_id}/commentaries") def api_match_commentaries(event_id:int):return JSONResponse({"html":fetch_bongda_api(f"/api/fixtures/commentaries?event_id={event_id}")}) @app.get("/api/match/{event_id}/stats") def api_match_stats(event_id:int):return JSONResponse({"html":fetch_bongda_api(f"/api/event-standing/player-performance?event_id={event_id}")}) @app.get("/api/livescore/featured") def api_livescore_featured(): def _f(): sources=[("/api/fixtures/live","live"),("/api/fixtures/get-by-date?date="+datetime.now().strftime("%Y-%m-%d"),"today"),("/api/fixtures/incoming","upcoming")] for endpoint, stype in sources: html=fetch_bongda_api(endpoint) if not html or len(html)<100:continue soup=BeautifulSoup(html,"lxml");all_matches=[] for li in soup.select("li.match-detail"): match=_parse_match_from_li(li, stype) if not match or not match["event_id"]:continue if stype=="today" and "KT" in match.get("minute",""):continue all_matches.append(match) if not all_matches:continue for pl in PRIORITY_LEAGUES: for match in all_matches: if pl in match["league"]:return match return all_matches[0] return None return JSONResponse(_cached("ls_featured",_f,ttl=30)) # ===== VIDEO APIs ===== @app.get("/api/shorts") def api_shorts():return JSONResponse(_cached("yt_shorts_v3",scrape_shorts,ttl=_cache_ttl_yt)) @app.get("/api/short-stats") def api_short_stats(ids:str=Query(default="")): arr=[x for x in ids.split(",") if x] with _short_lock: db=_load_short_db();out={} for vid in arr: st=db.get(vid) or _short_default() out[vid]={"views":int(st.get("views",0)),"likes":int(st.get("likes",0)),"shares":int(st.get("shares",0)),"comments":st.get("comments",[])[:80]} return JSONResponse({"stats":out}) @app.post("/api/short-action") async def api_short_action(request:Request): try:body=await request.json() except:body={} vid=str(body.get("id","")).strip();action=str(body.get("action","")).strip();txt=str(body.get("text","")).strip() if not vid:return JSONResponse({"error":"missing id"},status_code=400) with _short_lock: db=_load_short_db();st=db.get(vid) or _short_default() if action=="view":st["views"]=int(st.get("views",0))+1 elif action=="like":st["likes"]=int(st.get("likes",0))+1 elif action=="share":st["shares"]=int(st.get("shares",0))+1 elif action=="comment" and txt: comments=st.get("comments",[]) comments.insert(0,{"text":txt[:180],"ts":int(time.time())}) st["comments"]=comments[:80] st["updated"]=int(time.time());db[vid]=st;_save_short_db(db) out={"views":int(st.get("views",0)),"likes":int(st.get("likes",0)),"shares":int(st.get("shares",0)),"comments":st.get("comments",[])[:80]} return JSONResponse({"stats":out}) @app.get("/api/highlights") def api_highlights():return JSONResponse(_cached("xemlaibongda_hl",scrape_xemlaibongda,ttl=_cache_ttl)) @app.get("/api/highlights/leagues") def api_highlights_leagues():return JSONResponse(_cached("hl_leagues",scrape_all_league_highlights,ttl=_cache_ttl)) @app.get("/api/highlights/{league}") def api_highlights_league(league:str): if league not in HL_LEAGUES:return JSONResponse({"error":"league not found"}) return JSONResponse(_cached(f"hl_{league}",lambda:scrape_highlights_by_league(league),ttl=_cache_ttl)) @app.get("/api/highlights_config") def api_highlights_config():return JSONResponse(HL_LEAGUES) @app.get("/api/video_url") def api_video_url(url:str=Query(...)): if "youtube.com" in url or "youtu.be" in url: m=re.search(r'(?:v=|shorts/|youtu\.be/)([a-zA-Z0-9_-]{11})',url) if m:vid=m.group(1);return JSONResponse({"src":f"https://www.youtube.com/embed/{vid}?autoplay=1&rel=0&enablejsapi=1","poster":f"https://i.ytimg.com/vi/{vid}/hqdefault.jpg","type":"youtube"}) if "xemlaibongda.top" in url: v=extract_xemlaibongda_video(url) if v: if v["type"]=="hls":v["src"]="/api/proxy/m3u8?url="+quote(v["src"],safe="") return JSONResponse(v) if "bongdaplus.vn" in url: try: m=re.search(r'-(\d{6,})\.html',url) if m: r=requests.get(f"{BASE_BDP}/video-embed/{m.group(1)}.html",headers=HEADERS,timeout=10);r.encoding="utf-8" soup=BeautifulSoup(r.text,"lxml");video=soup.select_one("video#videoPlayer") if video: source=video.find("source");src=source.get("src","") if source else "";poster=video.get("poster","") if src:return JSONResponse({"src":"/api/proxy/video?url="+quote(src,safe=""),"poster":poster,"type":"video"}) except:pass return JSONResponse({"error":"not found"}) @app.get("/api/bdp_videos") def api_bdp_videos(): def _f(): try: soup=_get(f"{BASE_BDP}/video");arts=[];seen=set() for a in soup.find_all("a",href=True): href=a.get("href","") if"/video/" not in href or href in("/video/","/video/ban-thang-dep","/video/highlight"):continue if not href.startswith("http"):href=BASE_BDP+href if href in seen:continue title=re.sub(r'^\d{2}:\d{2}','',a.get_text(strip=True)).strip() if not title or len(title)<5:continue img_tag=a.find("img") or(a.parent.find("img") if a.parent else None) img=(img_tag.get("data-src") or img_tag.get("src","")) if img_tag else "" seen.add(href);arts.append({"title":title,"link":href,"img":img,"source":"bdp"}) return arts[:20] except:return[] return JSONResponse(_cached("bdp_videos",_f)) # ===== NEWS ===== def scrape_vne(cat_url): try: soup=_get(cat_url);arts=[] for it in soup.select("article.item-news")[:15]: a=it.select_one("h2.title-news a") or it.select_one("h3.title-news a") if not a:continue t=a.get("title","") or a.get_text(strip=True);lk=a.get("href","") if not t or not lk:continue im=it.find("img");img=(im.get("data-src") or im.get("src","")) if im else "" if img and'blank'in img: src=it.find("source") if src:img=src.get("srcset","").split(",")[0].strip().split(" ")[0] arts.append({"title":t,"link":lk,"img":img,"source":"vne"}) return arts except:return[] def scrape_vne_article(url): try: soup=_get(url);h1=soup.select_one("h1.title-detail");desc=soup.select_one("p.description") og=soup.find("meta",property="og:image");og_img=og.get("content","") if og else "" cd=soup.select_one("article.fck_detail");body=[] if cd: for ch in cd.children: if not hasattr(ch,'name') or not ch.name:continue if ch.name=="p":t=ch.get_text(strip=True);(body.append({"type":"p","text":t}) if t else None) elif ch.name=="figure": im=ch.find("img") if im:s=im.get("data-src") or im.get("src","");body.append({"type":"img","src":s}) elif ch.name in("h2","h3"):body.append({"type":"heading","text":ch.get_text(strip=True)}) return{"title":h1.get_text(strip=True) if h1 else "","summary":desc.get_text(strip=True) if desc else "","og_image":og_img,"body":body,"source":"vne","url":url} except:return None def _scrape_dantri_homepage(cat_filter=None): try: soup=_get("https://dantri.com.vn/");arts=[];seen=set() for a in soup.find_all("a",href=True): href=a.get("href","");title=a.get("title","") or a.get_text(strip=True) if not title or len(title)<15 or"javascript:" in href:continue if not href.startswith("http"):href="https://dantri.com.vn"+href if href in seen or not href.endswith(".htm"):continue if cat_filter and f"/{cat_filter}/" not in href:continue img_tag=a.find("img") if not img_tag and a.parent:img_tag=a.parent.find("img") img_src="" if img_tag:img_src=img_tag.get("data-src","") or img_tag.get("src","") if not img_src or "cdn" not in img_src:continue proxied_img="/api/proxy/img?url="+quote(img_src,safe="") seen.add(href);arts.append({"title":title,"link":href,"img":proxied_img,"source":"dantri"}) if len(arts)>=15:break return arts except:return[] def scrape_dantri_hot():return _scrape_dantri_homepage() def scrape_dantri_congnghe(): try: soup=_get("https://dantri.com.vn/");arts=[];seen=set() for a in soup.find_all("a",href=True): href=a.get("href","");title=a.get("title","") or a.get_text(strip=True) if not title or len(title)<15 or"javascript:" in href:continue if not href.startswith("http"):href="https://dantri.com.vn"+href if href in seen or not href.endswith(".htm"):continue if"/cong-nghe/" not in href:continue img_tag=a.find("img") if not img_tag and a.parent:img_tag=a.parent.find("img") img_src="" if img_tag:img_src=img_tag.get("data-src","") or img_tag.get("src","") if img_src and "cdn" in img_src:img_src="/api/proxy/img?url="+quote(img_src,safe="") else:img_src="" seen.add(href);arts.append({"title":title,"link":href,"img":img_src,"source":"dantri"}) if len(arts)>=15:break return arts except:return[] def scrape_genk_ai(): """Scrape AI articles from genk.vn - readable in-app""" try: r=requests.get("https://genk.vn/ai.chn",headers=HEADERS,timeout=15) if r.status_code!=200:return[] r.encoding="utf-8";soup=BeautifulSoup(r.text,"lxml") articles=[];seen=set() for a in soup.find_all("a",href=True): href=a.get("href","") if not href.endswith(".chn") or href=="/ai.chn":continue if href.startswith("/"):href="https://genk.vn"+href if href in seen or "genk.vn" not in href:continue title=a.get("title","") or a.get_text(strip=True) if not title or len(title)<20:continue container=a.parent;img_src="" for _ in range(6): if container is None:break for img in container.find_all("img"): s=img.get("data-src","") or img.get("src","") if s and "mediacdn" in s and "avatar" not in s and "logo" not in s: img_src=s;break if img_src:break container=container.parent seen.add(href) if not img_src: try: og_r=requests.get(href,headers=HEADERS,timeout=8);og_r.encoding="utf-8" og_soup=BeautifulSoup(og_r.text,"lxml");og_tag=og_soup.find("meta",property="og:image") if og_tag:img_src=og_tag.get("content","") except:pass articles.append({"title":title,"link":href,"img":img_src,"source":"genk"}) if len(articles)>=30:break return articles except:return[] def scrape_dantri_article(url): try: r=requests.get(url,headers=HEADERS,timeout=15);r.encoding="utf-8";soup=BeautifulSoup(r.text,"lxml") for tag in soup.find_all(["script","style","nav","footer","aside"]):tag.decompose() h1=soup.find("h1");og=soup.find("meta",property="og:image");og_img=og.get("content","") if og else "" if og_img and "cdnphoto.dantri" in og_img:og_img="/api/proxy/img?url="+quote(og_img,safe="") content=soup.select_one("main") or soup.select_one("div.singular-content") or soup.select_one("article");body=[] if content: for el in content.find_all(["p","h2","h3","figure","img"],recursive=True): if el.name=="p":t=el.get_text(strip=True);(body.append({"type":"p","text":t}) if t and len(t)>15 else None) elif el.name in("h2","h3"):t=el.get_text(strip=True);(body.append({"type":"heading","text":t}) if t else None) elif el.name in("figure","img"): im=el if el.name=="img" else el.find("img") if im: s=im.get("data-src") or im.get("src","") if s and"base64" not in s: if "cdnphoto.dantri" in s:s="/api/proxy/img?url="+quote(s,safe="") body.append({"type":"img","src":s}) desc="";sapo=soup.select_one("h2.singular-sapo") or soup.select_one("h2[class*=sapo]") if not sapo: og_desc=soup.find("meta",property="og:description") if og_desc:desc=og_desc.get("content","") else:desc=sapo.get_text(strip=True) return{"title":h1.get_text(strip=True) if h1 else "","summary":desc,"og_image":og_img,"body":body,"source":"dantri","url":url} except:return None def scrape_bbc_vietnamese(): try: r=requests.get("https://www.bbc.com/vietnamese",headers={"User-Agent":"Mozilla/5.0","Accept-Language":"en-GB"},timeout=15);r.encoding="utf-8" soup=BeautifulSoup(r.text,"lxml");arts=[];seen=set() for a in soup.select("a[href*='/vietnamese/']"): href=a.get("href","") if not href or href=="/vietnamese" or href.count("/")<3:continue if not href.startswith("http"):href="https://www.bbc.com"+href if href in seen:continue title=a.get_text(strip=True) if not title or len(title)<15 or any(x in title.lower() for x in["đăng nhập","trang chủ","bbc news"]):continue img="";container=a.parent for _ in range(3): if container: im=container.find("img") if im:img=im.get("src","") or im.get("data-src","");break container=container.parent seen.add(href);arts.append({"title":title,"link":href,"img":img,"source":"bbc"}) if len(arts)>=15:break return arts except:return[] def scrape_bbc_article(url): try: r=requests.get(url,headers={"User-Agent":"Mozilla/5.0","Accept-Language":"en-GB"},timeout=15);r.encoding="utf-8" soup=BeautifulSoup(r.text,"lxml");h1=soup.find("h1") og=soup.find("meta",property="og:image");og_img=og.get("content","") if og else "" body=[] for p in soup.select("[data-component='text-block'] p, article p, main p"): t=p.get_text(strip=True) if t and len(t)>20:body.append({"type":"p","text":t}) return{"title":h1.get_text(strip=True) if h1 else "","summary":"","og_image":og_img,"body":body,"source":"bbc","url":url} except:return None def scrape_ttvh_worldcup(): """Scrape all World Cup 2026 articles from The Thao Van Hoa RSS.""" try: r=requests.get("https://thethaovanhoa.vn/rss/world-cup-2026.rss",headers=HEADERS,timeout=15);r.encoding="utf-8" soup=BeautifulSoup(r.text,"xml");arts=[];seen=set() for it in soup.find_all("item"): title=(it.find("title").get_text(strip=True) if it.find("title") else "") link=(it.find("link").get_text(strip=True) if it.find("link") else "") desc=(it.find("description").get_text(" ",strip=True) if it.find("description") else "") img="";ds=BeautifulSoup(desc,"lxml");im=ds.find("img") if im:img=im.get("src","") or im.get("data-src","") if title and link and link not in seen: seen.add(link);arts.append({"title":title,"link":link,"img":img,"source":"ttvh"}) if arts:return arts except:pass try: soup=_get("https://thethaovanhoa.vn/world-cup-2026.htm");arts=[];seen=set() for a in soup.find_all("a",href=True): href=a.get("href","") if not href.startswith("http"):href="https://thethaovanhoa.vn"+href if href in seen or "thethaovanhoa.vn" not in href:continue if not re.search(r"/[^/]+-\d{8,}\.htm",href):continue title=a.get("title","") or a.get_text(" ",strip=True) img=None;p=a for _ in range(5): if p is None:break img=p.find("img") if img:break p=p.parent img_src="" if img: img_src=img.get("data-src","") or img.get("src","") or img.get("data-original","") or img.get("data-thumb","") if len(title)<15:title=img.get("alt","") or img.get("title","") or title if not title or len(title)<15:continue seen.add(href);arts.append({"title":title,"link":href,"img":img_src,"source":"ttvh"}) if len(arts)>=24:break return arts except:return[] def scrape_ttvh_article(url): try: soup=_get(url);h1=soup.find("h1");og=soup.find("meta",property="og:image");og_img=og.get("content","") if og else "" og_title=soup.find("meta",property="og:title");fallback_title=og_title.get("content","") if og_title else "" desc_el=soup.find("meta",property="og:description");desc=desc_el.get("content","") if desc_el else "" cd=soup.select_one(".detail-content") or soup.select_one(".content-detail") or soup.select_one("article") or soup.select_one("main") body=[] if cd: for el in cd.find_all(["p","h2","h3","figure","img"],recursive=True): if el.name=="p": t=el.get_text(strip=True) if t and len(t)>20 and "Theo dõi" not in t:body.append({"type":"p","text":t}) elif el.name in ("h2","h3"): t=el.get_text(strip=True) if t:body.append({"type":"heading","text":t}) elif el.name in ("figure","img"): im=el if el.name=="img" else el.find("img") if im: src=im.get("data-src") or im.get("src","") or im.get("data-original","") if src and "base64" not in src:body.append({"type":"img","src":src}) if not body and desc:body=[{"type":"p","text":desc}] return {"title":h1.get_text(strip=True) if h1 else fallback_title,"summary":desc,"og_image":og_img,"body":body,"source":"ttvh","url":url} except:return None VNE_CATS={"thoi-su":("https://vnexpress.net/thoi-su","Thời Sự"),"the-gioi":("https://vnexpress.net/the-gioi","Thế Giới"),"kinh-doanh":("https://vnexpress.net/kinh-doanh","Kinh Doanh"),"the-thao":("https://vnexpress.net/the-thao","Thể Thao"),"giai-tri":("https://vnexpress.net/giai-tri","Giải Trí"),"suc-khoe":("https://vnexpress.net/suc-khoe","Sức Khỏe"),"phap-luat":("https://vnexpress.net/phap-luat","Pháp Luật"),"giao-duc":("https://vnexpress.net/giao-duc","Giáo Dục"),"du-lich":("https://vnexpress.net/du-lich","Du Lịch"),"doi-song":("https://vnexpress.net/doi-song","Đời Sống")} @app.get("/api/homepage") def api_homepage(): def _f(): articles=[] with ThreadPoolExecutor(12) as ex: futs={ex.submit(scrape_vne,VNE_CATS[k][0]):VNE_CATS[k][1] for k in["thoi-su","the-gioi","kinh-doanh","the-thao","giai-tri","phap-luat","giao-duc","du-lich","doi-song"]} futs[ex.submit(scrape_bbc_vietnamese)]="BBC" for f in as_completed(futs): try: for a in f.result():a["group"]=futs[f];articles.append(a) except:pass return articles return JSONResponse(_cached("homepage",_f)) @app.get("/api/category/{cat_id}") def api_category(cat_id:str): def _f(): if cat_id=="bbc":return scrape_bbc_vietnamese() if cat_id=="cong-nghe":return scrape_genk_ai() if cat_id in VNE_CATS:arts=scrape_vne(VNE_CATS[cat_id][0]);[a.update({"group":VNE_CATS[cat_id][1]}) for a in arts];return arts return[] return JSONResponse(_cached(f"cat_{cat_id}",_f)) @app.get("/api/categories") def api_categories(): cats=[{"id":"bbc","name":"BBC Tiếng Việt","source":"bbc"},{"id":"cong-nghe","name":"Công Nghệ","source":"genk"}] for k,(u,n) in VNE_CATS.items():cats.append({"id":k,"name":n,"source":"vne"}) return JSONResponse(cats) @app.get("/api/dantri_hot") def api_dantri_hot():return JSONResponse(_cached("dantri_hot",scrape_dantri_hot)) @app.get("/api/genk_ai") def api_genk_ai():return JSONResponse(_cached("genk_ai",scrape_genk_ai,ttl=_cache_ttl)) @app.get("/api/worldcup2026") def api_worldcup2026():return JSONResponse(_cached("ttvh_worldcup",scrape_ttvh_worldcup,ttl=_cache_ttl)) def scrape_genk_article(url): try: r=requests.get(url,headers=HEADERS,timeout=15);r.encoding="utf-8";soup=BeautifulSoup(r.text,"lxml") h1=soup.find("h1");og=soup.find("meta",property="og:image");og_img=og.get("content","") if og else "" og_title=soup.find("meta",property="og:title");fallback_title=og_title.get("content","") if og_title else "" desc_el=soup.find("meta",property="og:description");desc=desc_el.get("content","") if desc_el else "" cd=soup.select_one(".knc-content");body=[] if cd: for el in cd.find_all(["p","h2","h3","figure","img"],recursive=True): if el.name=="p":t=el.get_text(strip=True);(body.append({"type":"p","text":t}) if t and len(t)>15 else None) elif el.name in("h2","h3"):t=el.get_text(strip=True);(body.append({"type":"heading","text":t}) if t else None) elif el.name in("figure","img"): im=el if el.name=="img" else el.find("img") if im:s=im.get("data-src") or im.get("src","");(body.append({"type":"img","src":s}) if s and"base64" not in s else None) return{"title":h1.get_text(strip=True) if h1 else "","summary":desc,"og_image":og_img,"body":body,"source":"genk","url":url} except:return None @app.get("/api/article") def api_article(url:str=Query(...)): if"vnexpress.net" in url:data=scrape_vne_article(url) elif"bbc.com" in url:data=scrape_bbc_article(url) elif"dantri.com.vn" in url:data=scrape_dantri_article(url) elif"genk.vn" in url:data=scrape_genk_article(url) elif"thethaovanhoa.vn" in url:data=scrape_ttvh_article(url) else:data=None return JSONResponse(data if data else{"error":"not supported"}) def _web_context(topic): """Collect real web/news context for a topic.""" bits=[] try: rss="https://news.google.com/rss/search?q="+quote(topic)+"&hl=vi&gl=VN&ceid=VN:vi" r=requests.get(rss,headers=HEADERS,timeout=12);r.encoding="utf-8" soup=BeautifulSoup(r.text,"xml") for it in soup.find_all("item")[:8]: title=it.find("title").get_text(" ",strip=True) if it.find("title") else "" src=it.find("source").get_text(" ",strip=True) if it.find("source") else "" if title:bits.append((title+(" — "+src if src else ""))[:280]) except:pass if bits:return "\n".join(bits) try: r=requests.get("https://html.duckduckgo.com/html/?q="+quote(topic),headers=HEADERS,timeout=12);r.encoding="utf-8" soup=BeautifulSoup(r.text,"lxml") for res in soup.select(".result")[:6]: t=res.select_one(".result__title");sn=res.select_one(".result__snippet") line=((t.get_text(" ",strip=True) if t else "")+" — "+(sn.get_text(" ",strip=True) if sn else "")).strip(" —") if line:bits.append(line[:280]) except:pass return "\n".join(bits) def _jina_read(url): try: ju="https://r.jina.ai/http://"+url r=requests.get(ju,headers=HEADERS,timeout=25);r.encoding="utf-8" if r.status_code!=200 or not r.text:return None lines=[x.rstrip() for x in r.text.splitlines()] title="";img="";body=[];summary="" for ln in lines[:40]: if ln.startswith("Title:"):title=ln.replace("Title:","",1).strip() elif ln.startswith("Image:"):img=ln.replace("Image:","",1).strip() elif ln.startswith("Description:"):summary=ln.replace("Description:","",1).strip() for ln in lines: t=ln.strip() if not t or t.startswith(("Title:","URL Source:","Published Time:","Markdown Content:","Image:","Description:")):continue if len(t)>40:body.append({"type":"p","text":t}) if not body and summary:body=[{"type":"p","text":summary}] return {"title":title or url,"summary":summary,"og_image":img,"body":body[:80],"source":"jina","url":url} except:return None def _scrape_generic_article(url): try: hdr={**HEADERS,"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"} r=requests.get(url,headers=hdr,timeout=15);r.encoding="utf-8" ct=r.headers.get("content-type","").lower() if r.status_code>=400 or "text/html" not in ct: jr=_jina_read(url) if jr:return jr soup=BeautifulSoup(r.text,"lxml") for tag in soup.find_all(["script","style","nav","footer","aside","form"]):tag.decompose() h1=soup.find("h1") ogt=soup.find("meta",property="og:title");title=h1.get_text(strip=True) if h1 else (ogt.get("content","") if ogt else "") ogd=soup.find("meta",property="og:description");desc=ogd.get("content","") if ogd else "" ogi=soup.find("meta",property="og:image");img=ogi.get("content","") if ogi else "" main=soup.find("article") or soup.find("main") or soup.body body=[] if main: for el in main.find_all(["p","h2","h3","figure","img"],recursive=True): if el.name=="p": t=el.get_text(" ",strip=True) if t and len(t)>35:body.append({"type":"p","text":t}) elif el.name in ("h2","h3"): t=el.get_text(" ",strip=True) if t:body.append({"type":"heading","text":t}) elif el.name in ("figure","img"): im=el if el.name=="img" else el.find("img") if im: src=im.get("data-src") or im.get("src","") or im.get("data-original","") if src and "base64" not in src:body.append({"type":"img","src":src}) if not body: jr=_jina_read(url) if jr and jr.get("body"):return jr if not body and desc:body=[{"type":"p","text":desc}] return {"title":title or url,"summary":desc,"og_image":img,"body":body,"source":"generic","url":url} except: return _jina_read(url) def _article_by_url(url): if "vnexpress.net" in url:return scrape_vne_article(url) if "bbc.com" in url:return scrape_bbc_article(url) if "dantri.com.vn" in url:return scrape_dantri_article(url) if "genk.vn" in url:return scrape_genk_article(url) if "thethaovanhoa.vn" in url:return scrape_ttvh_article(url) return _scrape_generic_article(url) def _call_qwen(prompt, max_tokens=1800): """Try Qwen2.5-VL via HF router; return None if unavailable.""" try: token=os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN") or os.environ.get("VAISTUDIO") if not token:return None headers={"Authorization":"Bearer "+token,"Content-Type":"application/json"} payload={"model":"Qwen/Qwen2.5-VL-7B-Instruct","messages":[{"role":"user","content":prompt}],"max_tokens":max_tokens,"temperature":0.7} r=requests.post("https://router.huggingface.co/v1/chat/completions",headers=headers,json=payload,timeout=75) if r.status_code>=300:return None j=r.json();return j.get("choices",[{}])[0].get("message",{}).get("content") except:return None def _collect_article_text(data, limit=28000): title=(data or {}).get("title","");summary=(data or {}).get("summary","") parts=[] if summary:parts.append(summary) for b in (data or {}).get("body",[]): if b.get("type")=="heading":parts.append("## "+b.get("text","") ) elif b.get("type")=="p":parts.append(b.get("text","") ) text="\n".join([p.strip() for p in parts if p and p.strip()]) return title,text[:limit] def _ai_rewrite_article(data,tone="tu-nhien"): title,text=_collect_article_text(data) prompt=("Bạn là biên tập viên báo điện tử tiếng Việt. Hãy viết lại bài dưới đây bằng ngôn ngữ tự nhiên, mạch lạc, không cắt khúc, không bỏ ý quan trọng. " "Giữ đúng sự thật, không bịa, không thêm thông tin ngoài bài. Văn phong: "+tone+". " "Đầu ra gồm: tiêu đề hấp dẫn, đoạn sapo 2-3 câu, các đoạn nội dung ngắn dễ đọc, và 3 gạch đầu dòng điểm chính.\n\n" "TIÊU ĐỀ GỐC: "+title+"\n\nNỘI DUNG GỐC:\n"+text) out=_call_qwen(prompt,2200) if out and len(out)>300:return out.strip() # Fallback: complete non-truncated rewrite using full collected text chunks paras=[p.strip() for p in text.split("\n") if len(p.strip())>30] body="\n\n".join(paras[:18]) bullets="\n".join(["• "+p[:220]+("..." if len(p)>220 else "") for p in paras[:5]]) return ("Bản tin AI viết lại: "+title+"\n\n"+ (paras[0] if paras else "")+"\n\n"+body+"\n\nĐiểm chính:\n"+bullets).strip() def _image_for_topic(topic): return "https://image.pollinations.ai/prompt/"+quote("editorial illustration, Vietnamese news, "+topic,safe="")+"?width=1024&height=576&nologo=true" def _topic_articles(topic,limit=5): items=[];seen=set() try: rss="https://news.google.com/rss/search?q="+quote(topic)+"&hl=vi&gl=VN&ceid=VN:vi" r=requests.get(rss,headers=HEADERS,timeout=12);r.encoding="utf-8" soup=BeautifulSoup(r.text,"xml") for it in soup.find_all("item")[:limit*3]: title=it.find("title").get_text(" ",strip=True) if it.find("title") else "" link=it.find("link").get_text(strip=True) if it.find("link") else "" src=it.find("source").get_text(" ",strip=True) if it.find("source") else "" if not title or not link or link in seen:continue seen.add(link);items.append({"title":title,"link":link,"source":src}) if len(items)>=limit:break except:pass return items def _topic_article_context(topic): """Filter readable article sources by topic, then summarize actual article bodies.""" raw_keys=[k.lower() for k in re.findall(r"[\wÀ-ỹ]+",topic) if len(k)>2] # Drop ultra-generic tokens; keep domain words such as giáo/dục, bóng/đá, world/cup. stop={"trong","năm","the","and","của","cho","với","một","các","những","hiện","nay"} keys=[k for k in raw_keys if k not in stop] candidates=[];seen=set() def add_items(items): for a in items or []: link=a.get("link","");title=a.get("title","") if not link or link in seen:continue seen.add(link);candidates.append(a) try:add_items(scrape_genk_ai()) except:pass try:add_items(scrape_dantri_congnghe()) except:pass try:add_items(scrape_ttvh_worldcup()) except:pass scored=[];img="" for a in candidates[:40]: data=_article_by_url(a.get("link","")) if not data or not data.get("body"):continue title=data.get("title") or a.get("title","") ps=[b.get("text","") for b in data.get("body",[]) if b.get("type")=="p" and len(b.get("text",""))>40] excerpt=" ".join(ps)[:1800] or data.get("summary","") hay=(title+" "+excerpt).lower() score=sum(1 for k in keys if k in hay) # Require topic relevance when we have meaningful keys. if keys and score==0:continue if len(keys)>=2 and score<2 and not any(" ".join(keys[i:i+2]) in hay for i in range(len(keys)-1)):continue scored.append((score,title,a.get("link",""),excerpt,data.get("og_image") or a.get("img","") or "")) scored=sorted(scored,key=lambda x:x[0],reverse=True)[:5] chunks=[] for score,title,link,excerpt,im in scored: if not img and im:img=im chunks.append("BÀI: "+title+"\nURL: "+link+"\nNỘI DUNG LỌC: "+excerpt) if chunks:return "\n\n".join(chunks),img return _web_context(topic),"" def _topic_post_text(topic): ctx,img=_topic_article_context(topic) prompt=("Bạn là cây bút báo điện tử tiếng Việt. Hãy lọc các thông tin thực tế trong những nguồn dưới đây để viết một bài tóm tắt theo chủ đề: "+topic+ ". Không viết chung chung. Chỉ dùng dữ kiện có trong nguồn; nếu nguồn khác nhau thì tổng hợp khách quan. " "Đầu ra gồm: tiêu đề, sapo, các ý chính theo bullet, phần phân tích ngắn và kết luận.\n\nNGUỒN THỰC TẾ:\n"+ctx) out=_call_qwen(prompt,1800) if out and len(out)>300:return out.strip() if ctx: return "Bài tóm tắt theo chủ đề: "+topic+"\n\nDữ liệu thực tế đã lọc:\n"+ctx[:3500]+"\n\nTóm tắt: Các nguồn trên cho thấy chủ đề này đang có nhiều diễn biến đáng chú ý. Khi viết bài, nên nêu rõ bối cảnh, các điểm mới, tác động thực tế và những điều còn cần kiểm chứng." return "Chưa thu thập được dữ liệu đủ rõ cho chủ đề: "+topic @app.get("/api/wall") def api_wall():return JSONResponse({"posts":_load_wall()[:50]}) @app.post("/api/rewrite_share") async def api_rewrite_share(request:Request): try:body=await request.json() except:body={} url=str(body.get("url","")).strip();tone=str(body.get("tone","tu-nhien")).strip() if not url:return JSONResponse({"error":"missing url"},status_code=400) data=_article_by_url(url) if not data or not data.get("title") or (not data.get("body") and not data.get("summary")): return JSONResponse({"error":"Không đọc được bài viết"},status_code=422) post={"id":hashlib.md5((url+str(time.time())).encode()).hexdigest()[:12],"url":url,"title":data.get("title",""),"img":data.get("og_image","") or "","text":_ai_rewrite_article(data,tone),"ts":int(time.time()),"source":data.get("source","")} posts=_load_wall();posts.insert(0,post);_save_wall(posts) return JSONResponse({"post":post}) @app.post("/api/topic_post") async def api_topic_post(request:Request): try:body=await request.json() except:body={} topic=str(body.get("topic","")).strip() if not topic:return JSONResponse({"error":"missing topic"},status_code=400) ctx_img=_topic_article_context(topic)[1] post={"id":hashlib.md5((topic+str(time.time())).encode()).hexdigest()[:12],"url":"","title":topic,"img":ctx_img or _image_for_topic(topic),"text":_topic_post_text(topic),"ts":int(time.time()),"source":"ai-topic"} posts=_load_wall();posts.insert(0,post);_save_wall(posts) return JSONResponse({"post":post}) @app.post("/api/url_wall") async def api_url_wall(request:Request): try:body=await request.json() except:body={} url=str(body.get("url","")).strip() if not url:return JSONResponse({"error":"missing url"},status_code=400) data=_article_by_url(url) if not data or not data.get("title"): return JSONResponse({"error":"Không đọc được URL"},status_code=422) post={"id":hashlib.md5((url+str(time.time())).encode()).hexdigest()[:12],"url":url,"title":data.get("title",""),"img":data.get("og_image","") or "","text":_ai_rewrite_article(data,"ngan-gon-tu-nhien"),"ts":int(time.time()),"source":data.get("source","")} posts=_load_wall();posts.insert(0,post);_save_wall(posts) return JSONResponse({"post":post}) @app.get("/v") async def video_share(url:str=Query(default=""),title:str=Query(default="VNEWS Video"),img:str=Query(default=""),type:str=Query(default="highlights")): decoded_url=unquote(url);decoded_title=unquote(title) redirect_script=f'' if decoded_url else f'' return HTMLResponse(f'{decoded_title}

{redirect_script}') @app.get("/s") async def share_redirect(url:str=Query(default=""),title:str=Query(default="VNEWS"),img:str=Query(default="")): decoded_url=unquote(url) redirect_script=f'' if decoded_url else f'' return HTMLResponse(f'{unquote(title)}{redirect_script}') @app.get("/") async def index(): with open("/app/static/index.html","r",encoding="utf-8") as f:return HTMLResponse(content=f.read()) app.mount("/static",StaticFiles(directory="/app/static"),name="static")