Spaces:
Running
Running
Add match detail API (commentaries + stats) + featured match endpoint
Browse files
main.py
CHANGED
|
@@ -17,7 +17,7 @@ BASE_24H = "https://www.24h.com.vn"
|
|
| 17 |
SPACE_URL = "https://bep40-vnews.hf.space"
|
| 18 |
_cache = {}
|
| 19 |
_cache_ttl = 300
|
| 20 |
-
_cache_ttl_live = 60
|
| 21 |
def _cached(key, fn, ttl=None):
|
| 22 |
now=time.time();t=ttl or _cache_ttl
|
| 23 |
if key in _cache and now-_cache[key]["t"]<t:return _cache[key]["d"]
|
|
@@ -32,7 +32,6 @@ def _get(url,headers=None):
|
|
| 32 |
LEAGUE_IDS = {"nha":27110,"laliga":27233,"seriea":27044,"bundesliga":26891,"ligue1":27212}
|
| 33 |
|
| 34 |
def fetch_bongda_api(endpoint):
|
| 35 |
-
"""Fetch bongda.com.vn API and return HTML content."""
|
| 36 |
try:
|
| 37 |
r=requests.get(f"https://bongda.com.vn{endpoint}",headers=BONGDA_HEADERS,timeout=10)
|
| 38 |
if r.status_code==200:
|
|
@@ -43,43 +42,107 @@ def fetch_bongda_api(endpoint):
|
|
| 43 |
|
| 44 |
@app.get("/api/livescore/live")
|
| 45 |
def api_livescore_live():
|
| 46 |
-
"""Trận đang diễn ra."""
|
| 47 |
html=_cached("ls_live",lambda:fetch_bongda_api("/api/fixtures/live"),ttl=_cache_ttl_live)
|
| 48 |
return JSONResponse({"html":html})
|
| 49 |
|
| 50 |
@app.get("/api/livescore/incoming")
|
| 51 |
def api_livescore_incoming():
|
| 52 |
-
"""Trận sắp diễn ra."""
|
| 53 |
html=_cached("ls_incoming",lambda:fetch_bongda_api("/api/fixtures/incoming"),ttl=_cache_ttl_live)
|
| 54 |
return JSONResponse({"html":html})
|
| 55 |
|
| 56 |
@app.get("/api/livescore/today")
|
| 57 |
def api_livescore_today():
|
| 58 |
-
"""Tất cả trận hôm nay."""
|
| 59 |
today=datetime.now().strftime("%Y-%m-%d")
|
| 60 |
html=_cached("ls_today",lambda:fetch_bongda_api(f"/api/fixtures/get-by-date?date={today}"),ttl=_cache_ttl)
|
| 61 |
return JSONResponse({"html":html})
|
| 62 |
|
| 63 |
@app.get("/api/livescore/results")
|
| 64 |
def api_livescore_results():
|
| 65 |
-
"""Kết quả hôm nay."""
|
| 66 |
today=datetime.now().strftime("%Y-%m-%d")
|
| 67 |
html=_cached("ls_results",lambda:fetch_bongda_api(f"/api/fixtures/get-by-date?date={today}&status=finished"),ttl=_cache_ttl)
|
| 68 |
return JSONResponse({"html":html})
|
| 69 |
|
| 70 |
@app.get("/api/livescore/standings/{league}")
|
| 71 |
def api_livescore_standings(league:str):
|
| 72 |
-
"""Bảng xếp hạng theo giải."""
|
| 73 |
tid=LEAGUE_IDS.get(league,27110)
|
| 74 |
html=_cached(f"ls_bxh_{league}",lambda:fetch_bongda_api(f"/api/league-table/home?tournament_id={tid}&is_detail=True"),ttl=_cache_ttl)
|
| 75 |
return JSONResponse({"html":html})
|
| 76 |
|
| 77 |
@app.get("/api/livescore/date/{date}")
|
| 78 |
def api_livescore_date(date:str):
|
| 79 |
-
"""Lịch thi đấu theo ngày (YYYY-MM-DD)."""
|
| 80 |
html=fetch_bongda_api(f"/api/fixtures/get-by-date?date={date}")
|
| 81 |
return JSONResponse({"html":html})
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
# ===== NEWS SCRAPERS =====
|
| 84 |
def scrape_vne(cat_url):
|
| 85 |
try:
|
|
@@ -112,6 +175,41 @@ def scrape_vne_article(url):
|
|
| 112 |
elif ch.name in("h2","h3"):body.append({"type":"heading","text":ch.get_text(strip=True)})
|
| 113 |
return{"title":h1.get_text(strip=True) if h1 else "","summary":desc.get_text(strip=True) if desc else "","og_image":og_img,"body":body,"source":"vne","url":url}
|
| 114 |
except:return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
def scrape_bdp_videos():
|
| 116 |
try:
|
| 117 |
soup=_get(f"{BASE_BDP}/video");arts=[];seen=set()
|
|
@@ -165,19 +263,6 @@ def scrape_24h_shorts():
|
|
| 165 |
if img_tag:img_src=img_tag.get("data-original") or img_tag.get("data-src") or img_tag.get("src","")
|
| 166 |
if img_src and"base64" in img_src:img_src=""
|
| 167 |
seen.add(href);arts.append({"title":title,"link":href,"img":img_src,"source":"24h-shorts"})
|
| 168 |
-
if len(arts)<3:
|
| 169 |
-
for a in soup.find_all("a",href=True):
|
| 170 |
-
href=a.get("href","")
|
| 171 |
-
if not href.endswith(".html") or"javascript:" in href or"-cvd" in href:continue
|
| 172 |
-
if not href.startswith("http"):href=BASE_24H+href
|
| 173 |
-
if href in seen:continue
|
| 174 |
-
img=a.find("img") or(a.parent.find("img") if a.parent else None)
|
| 175 |
-
if not img:continue
|
| 176 |
-
img_src=img.get("data-original") or img.get("data-src") or img.get("src","")
|
| 177 |
-
if not img_src or"base64" in img_src:continue
|
| 178 |
-
title=img.get("alt","") or a.get("title","") or a.get_text(strip=True)
|
| 179 |
-
if not title or len(title)<8:continue
|
| 180 |
-
seen.add(href);arts.append({"title":title,"link":href,"img":img_src,"source":"24h-shorts"})
|
| 181 |
return arts[:20]
|
| 182 |
except:return[]
|
| 183 |
def scrape_bbc_vietnamese():
|
|
@@ -212,9 +297,6 @@ def scrape_bbc_article(url):
|
|
| 212 |
for p in soup.select("[data-component='text-block'] p, article p, main p"):
|
| 213 |
t=p.get_text(strip=True)
|
| 214 |
if t and len(t)>20:body.append({"type":"p","text":t})
|
| 215 |
-
for img in soup.select("main img, article img"):
|
| 216 |
-
src=img.get("src","")
|
| 217 |
-
if src and("ichef" in src or"bbci" in src):body.append({"type":"img","src":src,"alt":img.get("alt","")})
|
| 218 |
return{"title":h1.get_text(strip=True) if h1 else "","summary":"","og_image":og_img,"body":body,"source":"bbc","url":url}
|
| 219 |
except:return None
|
| 220 |
def extract_video_url(article_url):
|
|
@@ -222,8 +304,7 @@ def extract_video_url(article_url):
|
|
| 222 |
r=requests.get(article_url,headers={**HEADERS,"Referer":"https://www.24h.com.vn/"},timeout=10);r.encoding="utf-8"
|
| 223 |
m3u8s=re.findall(r'(https?://cdn\.24h\.com\.vn/[^\s"\'\\<>]+\.m3u8)',r.text)
|
| 224 |
esc=[u.replace('\\//','/').replace('\\/','/') for u in re.findall(r'(https?:\\/\\/cdn\.24h\.com\.vn\\/[^\s"\'<>]+\.m3u8)',r.text)]
|
| 225 |
-
all_urls=list(dict.fromkeys(m3u8s+esc));
|
| 226 |
-
primary=full or p720
|
| 227 |
if not primary:return None
|
| 228 |
soup=BeautifulSoup(r.text,"lxml");og=soup.find("meta",property="og:image");poster=og.get("content","") if og else ""
|
| 229 |
return{"src":primary[0],"poster":poster}
|
|
@@ -270,6 +351,8 @@ def api_categories():
|
|
| 270 |
def api_highlights():return JSONResponse(_cached("highlights",scrape_24h_highlights))
|
| 271 |
@app.get("/api/shorts")
|
| 272 |
def api_shorts():return JSONResponse(_cached("shorts",scrape_24h_shorts))
|
|
|
|
|
|
|
| 273 |
@app.get("/api/bdp_videos")
|
| 274 |
def api_bdp_videos():return JSONResponse(_cached("bdp_videos",scrape_bdp_videos))
|
| 275 |
@app.get("/api/video_url")
|
|
@@ -282,6 +365,7 @@ def api_video_url(url:str=Query(...)):
|
|
| 282 |
def api_article(url:str=Query(...)):
|
| 283 |
if"vnexpress.net" in url:data=scrape_vne_article(url)
|
| 284 |
elif"bbc.com" in url:data=scrape_bbc_article(url)
|
|
|
|
| 285 |
else:data=None
|
| 286 |
return JSONResponse(data if data else{"error":"not supported"})
|
| 287 |
@app.get("/v")
|
|
@@ -289,13 +373,13 @@ async def video_share(url:str=Query(default=""),title:str=Query(default="VNEWS V
|
|
| 289 |
og_image=unquote(img) if img else "https://s1.vnecdn.net/vnexpress/restruct/i/v9505/logo_default.jpg"
|
| 290 |
decoded_url=unquote(url);decoded_title=unquote(title)
|
| 291 |
redirect_script=f'<script>localStorage.setItem("pending_video",JSON.stringify({{"url":"{decoded_url}","type":"{type}"}}));location.href="{SPACE_URL}";</script>' if decoded_url else f'<script>location.href="{SPACE_URL}";</script>'
|
| 292 |
-
return HTMLResponse(f'''<!DOCTYPE html><html><head><meta charset="utf-8"><title>{decoded_title}</title><meta property="og:title" content="{decoded_title}"><meta property="og:image" content="{og_image}"><
|
| 293 |
@app.get("/s")
|
| 294 |
async def share_redirect(url:str=Query(default=""),title:str=Query(default="VNEWS"),img:str=Query(default="")):
|
| 295 |
og_image=unquote(img) if img else "https://s1.vnecdn.net/vnexpress/restruct/i/v9505/logo_default.jpg"
|
| 296 |
decoded_url=unquote(url)
|
| 297 |
redirect_script=f'<script>localStorage.setItem("pending_article","{decoded_url}");location.href="{SPACE_URL}";</script>' if decoded_url else f'<script>location.href="{SPACE_URL}";</script>'
|
| 298 |
-
return HTMLResponse(f'''<!DOCTYPE html><html><head><meta charset="utf-8"><title>{unquote(title)}</title><meta property="og:title" content="{unquote(title)}"><meta property="og:image" content="{og_image}"><
|
| 299 |
@app.get("/")
|
| 300 |
async def index():
|
| 301 |
with open("/app/static/index.html","r",encoding="utf-8") as f:return HTMLResponse(content=f.read())
|
|
|
|
| 17 |
SPACE_URL = "https://bep40-vnews.hf.space"
|
| 18 |
_cache = {}
|
| 19 |
_cache_ttl = 300
|
| 20 |
+
_cache_ttl_live = 60
|
| 21 |
def _cached(key, fn, ttl=None):
|
| 22 |
now=time.time();t=ttl or _cache_ttl
|
| 23 |
if key in _cache and now-_cache[key]["t"]<t:return _cache[key]["d"]
|
|
|
|
| 32 |
LEAGUE_IDS = {"nha":27110,"laliga":27233,"seriea":27044,"bundesliga":26891,"ligue1":27212}
|
| 33 |
|
| 34 |
def fetch_bongda_api(endpoint):
|
|
|
|
| 35 |
try:
|
| 36 |
r=requests.get(f"https://bongda.com.vn{endpoint}",headers=BONGDA_HEADERS,timeout=10)
|
| 37 |
if r.status_code==200:
|
|
|
|
| 42 |
|
| 43 |
@app.get("/api/livescore/live")
|
| 44 |
def api_livescore_live():
|
|
|
|
| 45 |
html=_cached("ls_live",lambda:fetch_bongda_api("/api/fixtures/live"),ttl=_cache_ttl_live)
|
| 46 |
return JSONResponse({"html":html})
|
| 47 |
|
| 48 |
@app.get("/api/livescore/incoming")
|
| 49 |
def api_livescore_incoming():
|
|
|
|
| 50 |
html=_cached("ls_incoming",lambda:fetch_bongda_api("/api/fixtures/incoming"),ttl=_cache_ttl_live)
|
| 51 |
return JSONResponse({"html":html})
|
| 52 |
|
| 53 |
@app.get("/api/livescore/today")
|
| 54 |
def api_livescore_today():
|
|
|
|
| 55 |
today=datetime.now().strftime("%Y-%m-%d")
|
| 56 |
html=_cached("ls_today",lambda:fetch_bongda_api(f"/api/fixtures/get-by-date?date={today}"),ttl=_cache_ttl)
|
| 57 |
return JSONResponse({"html":html})
|
| 58 |
|
| 59 |
@app.get("/api/livescore/results")
|
| 60 |
def api_livescore_results():
|
|
|
|
| 61 |
today=datetime.now().strftime("%Y-%m-%d")
|
| 62 |
html=_cached("ls_results",lambda:fetch_bongda_api(f"/api/fixtures/get-by-date?date={today}&status=finished"),ttl=_cache_ttl)
|
| 63 |
return JSONResponse({"html":html})
|
| 64 |
|
| 65 |
@app.get("/api/livescore/standings/{league}")
|
| 66 |
def api_livescore_standings(league:str):
|
|
|
|
| 67 |
tid=LEAGUE_IDS.get(league,27110)
|
| 68 |
html=_cached(f"ls_bxh_{league}",lambda:fetch_bongda_api(f"/api/league-table/home?tournament_id={tid}&is_detail=True"),ttl=_cache_ttl)
|
| 69 |
return JSONResponse({"html":html})
|
| 70 |
|
| 71 |
@app.get("/api/livescore/date/{date}")
|
| 72 |
def api_livescore_date(date:str):
|
|
|
|
| 73 |
html=fetch_bongda_api(f"/api/fixtures/get-by-date?date={date}")
|
| 74 |
return JSONResponse({"html":html})
|
| 75 |
|
| 76 |
+
@app.get("/api/match/{event_id}/commentaries")
|
| 77 |
+
def api_match_commentaries(event_id:int):
|
| 78 |
+
"""Diễn biến trận đấu."""
|
| 79 |
+
html=fetch_bongda_api(f"/api/fixtures/commentaries?event_id={event_id}")
|
| 80 |
+
return JSONResponse({"html":html})
|
| 81 |
+
|
| 82 |
+
@app.get("/api/match/{event_id}/stats")
|
| 83 |
+
def api_match_stats(event_id:int):
|
| 84 |
+
"""Thống kê cầu thủ trận đấu."""
|
| 85 |
+
html=fetch_bongda_api(f"/api/event-standing/player-performance?event_id={event_id}")
|
| 86 |
+
return JSONResponse({"html":html})
|
| 87 |
+
|
| 88 |
+
@app.get("/api/livescore/featured")
|
| 89 |
+
def api_livescore_featured():
|
| 90 |
+
"""Trận tâm điểm: lấy trận live đầu tiên từ giải lớn, hoặc trận sắp tới."""
|
| 91 |
+
def _f():
|
| 92 |
+
# Try live first
|
| 93 |
+
html=fetch_bongda_api("/api/fixtures/live")
|
| 94 |
+
if html and len(html)>100:
|
| 95 |
+
soup=BeautifulSoup(html,"lxml")
|
| 96 |
+
# Find first match from top leagues
|
| 97 |
+
for li in soup.select("li.match-detail"):
|
| 98 |
+
match_div=li.select_one("div.match")
|
| 99 |
+
if not match_div:continue
|
| 100 |
+
home_el=match_div.select_one(".home-team .name")
|
| 101 |
+
away_el=match_div.select_one(".away-team .name")
|
| 102 |
+
status_el=match_div.select_one(".status a")
|
| 103 |
+
league_el=li.find_previous("strong")
|
| 104 |
+
time_el=match_div.select_one(".match-time")
|
| 105 |
+
home_logo=match_div.select_one(".home-team .logo img")
|
| 106 |
+
away_logo=match_div.select_one(".away-team .logo img")
|
| 107 |
+
if home_el and away_el:
|
| 108 |
+
# Get event_id from link
|
| 109 |
+
event_id=""
|
| 110 |
+
if status_el:
|
| 111 |
+
href=status_el.get("href","")
|
| 112 |
+
m=re.search(r'/tran-dau/(\d+)/',href)
|
| 113 |
+
if m:event_id=m.group(1)
|
| 114 |
+
spans=status_el.find_all("span") if status_el else []
|
| 115 |
+
score=""
|
| 116 |
+
minute=""
|
| 117 |
+
if len(spans)>=3:
|
| 118 |
+
score=f"{spans[0].get_text(strip=True)} - {spans[2].get_text(strip=True)}"
|
| 119 |
+
if len(spans)>=4:
|
| 120 |
+
minute=spans[3].get_text(strip=True)
|
| 121 |
+
return{"home":home_el.get_text(strip=True),"away":away_el.get_text(strip=True),"score":score,"minute":minute,"league":league_el.get_text(strip=True) if league_el else "","time":time_el.get_text(strip=True) if time_el else "","event_id":event_id,"home_logo":home_logo.get("src","") if home_logo else "","away_logo":away_logo.get("src","") if away_logo else "","status":"live"}
|
| 122 |
+
# Fallback: incoming
|
| 123 |
+
html2=fetch_bongda_api("/api/fixtures/incoming")
|
| 124 |
+
if html2 and len(html2)>100:
|
| 125 |
+
soup2=BeautifulSoup(html2,"lxml")
|
| 126 |
+
for li in soup2.select("li.match-detail"):
|
| 127 |
+
match_div=li.select_one("div.match")
|
| 128 |
+
if not match_div:continue
|
| 129 |
+
home_el=match_div.select_one(".home-team .name")
|
| 130 |
+
away_el=match_div.select_one(".away-team .name")
|
| 131 |
+
time_el=match_div.select_one(".match-time")
|
| 132 |
+
league_el=li.find_previous("strong")
|
| 133 |
+
home_logo=match_div.select_one(".home-team .logo img")
|
| 134 |
+
away_logo=match_div.select_one(".away-team .logo img")
|
| 135 |
+
status_el=match_div.select_one(".status a")
|
| 136 |
+
event_id=""
|
| 137 |
+
if status_el:
|
| 138 |
+
href=status_el.get("href","")
|
| 139 |
+
m=re.search(r'/tran-dau/(\d+)/',href)
|
| 140 |
+
if m:event_id=m.group(1)
|
| 141 |
+
if home_el and away_el:
|
| 142 |
+
return{"home":home_el.get_text(strip=True),"away":away_el.get_text(strip=True),"score":"VS","minute":"","league":league_el.get_text(strip=True) if league_el else "","time":time_el.get_text(strip=True) if time_el else "","event_id":event_id,"home_logo":home_logo.get("src","") if home_logo else "","away_logo":away_logo.get("src","") if away_logo else "","status":"upcoming"}
|
| 143 |
+
return None
|
| 144 |
+
return JSONResponse(_cached("ls_featured",_f,ttl=_cache_ttl_live))
|
| 145 |
+
|
| 146 |
# ===== NEWS SCRAPERS =====
|
| 147 |
def scrape_vne(cat_url):
|
| 148 |
try:
|
|
|
|
| 175 |
elif ch.name in("h2","h3"):body.append({"type":"heading","text":ch.get_text(strip=True)})
|
| 176 |
return{"title":h1.get_text(strip=True) if h1 else "","summary":desc.get_text(strip=True) if desc else "","og_image":og_img,"body":body,"source":"vne","url":url}
|
| 177 |
except:return None
|
| 178 |
+
def scrape_dantri_hot():
|
| 179 |
+
try:
|
| 180 |
+
soup=_get("https://dantri.com.vn/tin-nong.htm");arts=[];seen=set()
|
| 181 |
+
for a in soup.find_all("a",href=True):
|
| 182 |
+
href=a.get("href","");title=a.get("title","") or a.get_text(strip=True)
|
| 183 |
+
if not title or len(title)<15 or"javascript:" in href:continue
|
| 184 |
+
if not href.startswith("http"):href="https://dantri.com.vn"+href
|
| 185 |
+
if href in seen or not href.endswith(".htm"):continue
|
| 186 |
+
if href=="https://dantri.com.vn/tin-nong.htm":continue
|
| 187 |
+
seen.add(href);arts.append({"title":title,"link":href,"img":"","source":"dantri"})
|
| 188 |
+
if len(arts)>=12:break
|
| 189 |
+
def _og(art):
|
| 190 |
+
try:r2=requests.get(art["link"],headers=HEADERS,timeout=8);r2.encoding="utf-8";og=BeautifulSoup(r2.text,"lxml").find("meta",property="og:image");art["img"]=og.get("content","") if og else ""
|
| 191 |
+
except:pass
|
| 192 |
+
with ThreadPoolExecutor(4) as ex:list(ex.map(_og,arts[:10]))
|
| 193 |
+
return[a for a in arts if a["img"]][:10]
|
| 194 |
+
except:return[]
|
| 195 |
+
def scrape_dantri_article(url):
|
| 196 |
+
try:
|
| 197 |
+
r=requests.get(url,headers=HEADERS,timeout=15);r.encoding="utf-8";soup=BeautifulSoup(r.text,"lxml")
|
| 198 |
+
for tag in soup.find_all(["script","style","nav","footer","aside"]):tag.decompose()
|
| 199 |
+
h1=soup.find("h1");og=soup.find("meta",property="og:image");og_img=og.get("content","") if og else ""
|
| 200 |
+
content=soup.select_one("div.singular-content") or soup.select_one("article")
|
| 201 |
+
body=[]
|
| 202 |
+
if content:
|
| 203 |
+
for el in content.find_all(["p","h2","h3","figure","img"],recursive=True):
|
| 204 |
+
if el.name=="p":t=el.get_text(strip=True);(body.append({"type":"p","text":t}) if t and len(t)>15 else None)
|
| 205 |
+
elif el.name in("h2","h3"):t=el.get_text(strip=True);(body.append({"type":"heading","text":t}) if t else None)
|
| 206 |
+
elif el.name=="figure" or el.name=="img":
|
| 207 |
+
im=el if el.name=="img" else el.find("img")
|
| 208 |
+
if im:s=im.get("data-src") or im.get("src","");(body.append({"type":"img","src":s,"alt":im.get("alt","")}) if s and"base64" not in s else None)
|
| 209 |
+
desc="";sapo=soup.select_one("h2.singular-sapo");
|
| 210 |
+
if sapo:desc=sapo.get_text(strip=True)
|
| 211 |
+
return{"title":h1.get_text(strip=True) if h1 else "","summary":desc,"og_image":og_img,"body":body,"source":"dantri","url":url}
|
| 212 |
+
except:return None
|
| 213 |
def scrape_bdp_videos():
|
| 214 |
try:
|
| 215 |
soup=_get(f"{BASE_BDP}/video");arts=[];seen=set()
|
|
|
|
| 263 |
if img_tag:img_src=img_tag.get("data-original") or img_tag.get("data-src") or img_tag.get("src","")
|
| 264 |
if img_src and"base64" in img_src:img_src=""
|
| 265 |
seen.add(href);arts.append({"title":title,"link":href,"img":img_src,"source":"24h-shorts"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
return arts[:20]
|
| 267 |
except:return[]
|
| 268 |
def scrape_bbc_vietnamese():
|
|
|
|
| 297 |
for p in soup.select("[data-component='text-block'] p, article p, main p"):
|
| 298 |
t=p.get_text(strip=True)
|
| 299 |
if t and len(t)>20:body.append({"type":"p","text":t})
|
|
|
|
|
|
|
|
|
|
| 300 |
return{"title":h1.get_text(strip=True) if h1 else "","summary":"","og_image":og_img,"body":body,"source":"bbc","url":url}
|
| 301 |
except:return None
|
| 302 |
def extract_video_url(article_url):
|
|
|
|
| 304 |
r=requests.get(article_url,headers={**HEADERS,"Referer":"https://www.24h.com.vn/"},timeout=10);r.encoding="utf-8"
|
| 305 |
m3u8s=re.findall(r'(https?://cdn\.24h\.com\.vn/[^\s"\'\\<>]+\.m3u8)',r.text)
|
| 306 |
esc=[u.replace('\\//','/').replace('\\/','/') for u in re.findall(r'(https?:\\/\\/cdn\.24h\.com\.vn\\/[^\s"\'<>]+\.m3u8)',r.text)]
|
| 307 |
+
all_urls=list(dict.fromkeys(m3u8s+esc));primary=[u for u in all_urls if'_720p' not in u] or [u for u in all_urls if'_720p' in u]
|
|
|
|
| 308 |
if not primary:return None
|
| 309 |
soup=BeautifulSoup(r.text,"lxml");og=soup.find("meta",property="og:image");poster=og.get("content","") if og else ""
|
| 310 |
return{"src":primary[0],"poster":poster}
|
|
|
|
| 351 |
def api_highlights():return JSONResponse(_cached("highlights",scrape_24h_highlights))
|
| 352 |
@app.get("/api/shorts")
|
| 353 |
def api_shorts():return JSONResponse(_cached("shorts",scrape_24h_shorts))
|
| 354 |
+
@app.get("/api/dantri_hot")
|
| 355 |
+
def api_dantri_hot():return JSONResponse(_cached("dantri_hot",scrape_dantri_hot))
|
| 356 |
@app.get("/api/bdp_videos")
|
| 357 |
def api_bdp_videos():return JSONResponse(_cached("bdp_videos",scrape_bdp_videos))
|
| 358 |
@app.get("/api/video_url")
|
|
|
|
| 365 |
def api_article(url:str=Query(...)):
|
| 366 |
if"vnexpress.net" in url:data=scrape_vne_article(url)
|
| 367 |
elif"bbc.com" in url:data=scrape_bbc_article(url)
|
| 368 |
+
elif"dantri.com.vn" in url:data=scrape_dantri_article(url)
|
| 369 |
else:data=None
|
| 370 |
return JSONResponse(data if data else{"error":"not supported"})
|
| 371 |
@app.get("/v")
|
|
|
|
| 373 |
og_image=unquote(img) if img else "https://s1.vnecdn.net/vnexpress/restruct/i/v9505/logo_default.jpg"
|
| 374 |
decoded_url=unquote(url);decoded_title=unquote(title)
|
| 375 |
redirect_script=f'<script>localStorage.setItem("pending_video",JSON.stringify({{"url":"{decoded_url}","type":"{type}"}}));location.href="{SPACE_URL}";</script>' if decoded_url else f'<script>location.href="{SPACE_URL}";</script>'
|
| 376 |
+
return HTMLResponse(f'''<!DOCTYPE html><html><head><meta charset="utf-8"><title>{decoded_title}</title><meta property="og:title" content="{decoded_title}"><meta property="og:image" content="{og_image}"></head><body style="background:#111;color:#fff;text-align:center;padding:40px"><p>⏳</p>{redirect_script}</body></html>''')
|
| 377 |
@app.get("/s")
|
| 378 |
async def share_redirect(url:str=Query(default=""),title:str=Query(default="VNEWS"),img:str=Query(default="")):
|
| 379 |
og_image=unquote(img) if img else "https://s1.vnecdn.net/vnexpress/restruct/i/v9505/logo_default.jpg"
|
| 380 |
decoded_url=unquote(url)
|
| 381 |
redirect_script=f'<script>localStorage.setItem("pending_article","{decoded_url}");location.href="{SPACE_URL}";</script>' if decoded_url else f'<script>location.href="{SPACE_URL}";</script>'
|
| 382 |
+
return HTMLResponse(f'''<!DOCTYPE html><html><head><meta charset="utf-8"><title>{unquote(title)}</title><meta property="og:title" content="{unquote(title)}"><meta property="og:image" content="{og_image}"></head><body>{redirect_script}</body></html>''')
|
| 383 |
@app.get("/")
|
| 384 |
async def index():
|
| 385 |
with open("/app/static/index.html","r",encoding="utf-8") as f:return HTMLResponse(content=f.read())
|