bep40 commited on
Commit
f2df10e
·
verified ·
1 Parent(s): bce5b40

Revert to working version (24h carousel on homepage, before broken fixes)

Browse files
Files changed (1) hide show
  1. app.py +42 -251
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # rebuild 2026-05-01T05:10:04.377718
2
  import gradio as gr
3
  import requests
4
  import re
@@ -15,7 +14,6 @@ HEADERS = {
15
  }
16
  BASE_BDP = "https://bongdaplus.vn"
17
  BASE_24H = "https://www.24h.com.vn"
18
- BASE_DANTRI = "https://dantri.com.vn"
19
  REFRESH_SECONDS = 300
20
  SPACE_URL = "https://bep40-bongdaplus-news.hf.space"
21
 
@@ -231,7 +229,7 @@ def scrape_24h_video_list():
231
  return []
232
 
233
  def scrape_24h_article(url):
234
- """Scrape a 24h.com.vn article - extract ALL m3u8 video parts (playlist đầy đủ)."""
235
  try:
236
  r = requests.get(url, headers=HEADERS, timeout=15); r.encoding="utf-8"
237
  soup = BeautifulSoup(r.text, "lxml")
@@ -241,13 +239,13 @@ def scrape_24h_article(url):
241
  og_img = og.get("content","") if og else ""
242
  desc_meta = soup.find("meta", property="og:description")
243
  summary = desc_meta.get("content","") if desc_meta else ""
244
- # Extract ALL video parts using the multi-part probe function
245
- all_parts = _extract_24h_video_urls(url)
 
 
246
  body = []
247
- for pi, part in enumerate(all_parts):
248
- label = f" (Phần {pi+1})" if len(all_parts) > 1 else ""
249
- body.append({"type": "video", "src": part["src"], "poster": part.get("poster","") or og_img,
250
- "vtype": "hls", "caption": f"{title}{label}" if len(all_parts) > 1 else ""})
251
  # Extract text content
252
  content_selectors = ["div.the-article-body", "div.nws-mainContent", "div.nwsCt",
253
  "div#ctl00_mainContent_ctl00_divNewsContent", "div.detail-content"]
@@ -299,74 +297,6 @@ def _extract_24h_video_urls(article_url):
299
  return results
300
  except: return []
301
 
302
- # ── Dantri Scrapers ────────────────────────────────────────────────────────────
303
- def scrape_dantri_video_list():
304
- """Scrape dantri.com.vn/video/video-ngan.htm - video ngắn."""
305
- try:
306
- r = requests.get(f"{BASE_DANTRI}/video/video-ngan.htm", headers=HEADERS, timeout=15); r.encoding="utf-8"
307
- soup = BeautifulSoup(r.text, "lxml")
308
- articles, seen = [], set()
309
- for item in soup.select("div.video-item"):
310
- a = item.find("a", href=True)
311
- if not a: continue
312
- href = a.get("href","")
313
- if not href.startswith("http"): href = BASE_DANTRI + href
314
- if href in seen: continue
315
- img = item.find("img")
316
- title = img.get("alt","") if img else ""
317
- if not title: title = a.get("title","") or a.get_text(strip=True)
318
- if not title or len(title) < 10: continue
319
- img_src = None
320
- if img:
321
- for attr in ["data-src","data-original","src"]:
322
- v = img.get(attr,"")
323
- if v and "base64" not in v and len(v) > 20:
324
- img_src = v; break
325
- seen.add(href)
326
- articles.append({"title": title, "link": href, "img": img_src, "summary": "", "time": "",
327
- "featured": len(articles) < 3, "source": "dantri", "group": "Dân Trí Video", "is_video": True})
328
- return articles[:20]
329
- except:
330
- return []
331
-
332
- def scrape_dantri_article(url):
333
- """Scrape a dantri.com.vn article - extract m3u8 from direct URL or DT_GLOBAL JS."""
334
- try:
335
- r = requests.get(url, headers=HEADERS, timeout=15); r.encoding="utf-8"
336
- soup = BeautifulSoup(r.text, "lxml")
337
- h1 = soup.select_one("h1")
338
- title = h1.get_text(strip=True) if h1 else ""
339
- og = soup.find("meta", property="og:image")
340
- og_img = og.get("content","") if og else ""
341
- desc = soup.find("meta", property="og:description")
342
- summary = desc.get("content","") if desc else ""
343
- body = []
344
- # Method 1: Direct m3u8 URL
345
- m3u8s = re.findall(r'(https?://vcdn\.dantri\.com\.vn/vod/[^\s"\'<>;]+\.m3u8)', r.text)
346
- if m3u8s:
347
- for vsrc in list(dict.fromkeys(m3u8s))[:3]:
348
- body.append({"type": "video", "src": vsrc, "poster": og_img, "vtype": "hls"})
349
- else:
350
- # Method 2: Relative path in DT_GLOBAL JS (for /thoi-su/, /doi-song/ articles)
351
- m = re.search(r'"streamLocalPath"\s*:\s*"(/vod/[^"]+\.m3u8)"', r.text)
352
- if m:
353
- vsrc = "https://vcdn.dantri.com.vn" + m.group(1)
354
- poster = og_img
355
- m_thumb = re.search(r'"thumbnailUrl"\s*:\s*"(/thumb-video/[^"]+)"', r.text)
356
- if m_thumb: poster = "https://cdnphoto.dantri.com.vn" + m_thumb.group(1)
357
- body.append({"type": "video", "src": vsrc, "poster": poster, "vtype": "hls"})
358
- cd = soup.select_one("div.singular-content") or soup.select_one("div.detail-content")
359
- if cd:
360
- for ch in cd.children:
361
- if not hasattr(ch,'name') or not ch.name: continue
362
- if ch.name == "p":
363
- t = ch.get_text(strip=True)
364
- if t: body.append({"type":"p","html":strip_links(''.join(str(c) for c in ch.children))})
365
- return {"title": title, "time": "", "summary": summary[:200], "body": body,
366
- "related": [], "source_url": url, "source": "dantri", "og_image": og_img}
367
- except Exception as e:
368
- return {"title":"⚠️ Lỗi","time":"","summary":str(e),"body":[],"related":[],"source_url":url,"source":"dantri","og_image":""}
369
-
370
  # ── VNE Scrapers ───────────────────────────────────────────────────────────────
371
  def scrape_vne_list(url):
372
  try:
@@ -502,7 +432,6 @@ def fetch_tiktok_feed_videos():
502
  def fetch_homepage():
503
  all_articles=[]
504
  h24_videos=[]
505
- dantri_videos=[]
506
  def _fetch(src,url,group):
507
  arts=scrape_bdp_list(url) if src=="bdp" else scrape_vne_list(url)
508
  for a in arts: a["group"]=group
@@ -511,25 +440,20 @@ def fetch_homepage():
511
  nonlocal h24_videos
512
  try: h24_videos=scrape_24h_video_list()[:15]
513
  except: pass
514
- def _fetch_dantri():
515
- nonlocal dantri_videos
516
- try: dantri_videos=scrape_dantri_video_list()[:15]
517
- except: pass
518
- with ThreadPoolExecutor(max_workers=8) as ex:
519
  ex.submit(_fetch_24h)
520
- ex.submit(_fetch_dantri)
521
  futures={ex.submit(_fetch,s,u,g):g for s,u,g in HOMEPAGE_SOURCES}
522
  for f in as_completed(futures):
523
  try: all_articles.extend(f.result())
524
  except: pass
525
- return all_articles, h24_videos, dantri_videos
526
 
527
  def fetch_news_list(category):
528
  val=CATEGORIES.get(category,list(CATEGORIES.values())[0])
529
  parts=val.split("::"); src,url_or_key,group=parts[0],parts[1],parts[2]
530
  if src=="mix" and url_or_key=="home":
531
- articles, h24_videos, dantri_videos = fetch_homepage()
532
- return render_homepage_html(articles, h24_videos, dantri_videos)
533
  if src=="mix" and url_or_key=="video":
534
  return render_video_page_html()
535
  articles=scrape_bdp_list(url_or_key) if src=="bdp" else scrape_vne_list(url_or_key)
@@ -540,7 +464,6 @@ def read_article(url):
540
  if not url or url=="#" or len(url)<10: return "<p>Không tìm thấy bài viết.</p>"
541
  if "vnexpress.net" in url: return render_article_html(scrape_vne_article(url))
542
  if "24h.com.vn" in url: return render_article_html(scrape_24h_article(url))
543
- if "dantri.com.vn" in url: return render_article_html(scrape_dantri_article(url))
544
  return render_article_html(scrape_bdp_article(url))
545
 
546
  # ══════════════════════════════════════════════════════════════════════════════
@@ -548,7 +471,7 @@ def read_article(url):
548
  # ══════════════════════════════════════════════════════════════════════════════
549
 
550
  def render_video_carousel_html(videos):
551
- """Carousel video 24h highlights + nút TikTok."""
552
  vids_with_img = [v for v in videos if v.get("img")]
553
  if not vids_with_img: return ""
554
  items = []
@@ -564,33 +487,10 @@ def render_video_carousel_html(videos):
564
  <p class="vslide-title">{title}</p></div>''')
565
  return f'''<div class="vslide-wrap">
566
  <div class="vslide-header"><span class="vslide-label">🎬 Video Highlight</span>
567
- <div class="vslide-header-right">
568
- <button class="vslide-tiktok-btn" onclick="window.bdpGoVideo()">📱 Xem TikTok</button>
569
  <div class="vslide-nav"><button class="vslide-btn" onclick="window.bdpSlideScroll(-1,'vslide-video')">◀</button>
570
- <button class="vslide-btn" onclick="window.bdpSlideScroll(1,'vslide-video')">▶</button></div></div></div>
571
  <div class="vslide-track" id="vslide-video">{''.join(items)}</div></div>'''
572
 
573
- def render_dantri_carousel_html(videos):
574
- """Carousel video ngắn Dân Trí."""
575
- vids_with_img = [v for v in videos if v.get("img")]
576
- if not vids_with_img: return ""
577
- items = []
578
- for i, v in enumerate(vids_with_img[:15]):
579
- img = safe_url(v.get("img",""))
580
- link = v.get("link","#")
581
- title = v.get("title","")
582
- aid = make_id(link); sl = slug(title)
583
- click_js = f"window.bdpOpen('{esc(link)}','{aid}','{sl}')"
584
- items.append(f'''<div class="vslide-item" onclick="{click_js}">
585
- <div class="vslide-thumb"><img src="{img}" alt="" class="bdp-lazy-img">
586
- <div class="vslide-play">▶</div><span class="vslide-badge vslide-badge-dantri">Dân Trí</span></div>
587
- <p class="vslide-title">{title}</p></div>''')
588
- return f'''<div class="vslide-wrap">
589
- <div class="vslide-header"><span class="vslide-label">📹 Video Ngắn · Dân Trí</span>
590
- <div class="vslide-nav"><button class="vslide-btn" onclick="window.bdpSlideScroll(-1,'vslide-dantri')">◀</button>
591
- <button class="vslide-btn" onclick="window.bdpSlideScroll(1,'vslide-dantri')">▶</button></div></div>
592
- <div class="vslide-track" id="vslide-dantri">{''.join(items)}</div></div>'''
593
-
594
  def render_featured_carousel_html(articles):
595
  """Carousel tin nổi bật + mới nhất, lấy từ articles đã fetch sẵn (không fetch thêm)."""
596
  # Lấy bài featured có ảnh, ưu tiên bài đầu mỗi nhóm
@@ -623,15 +523,14 @@ def render_featured_carousel_html(articles):
623
  <button class="vslide-btn" onclick="window.bdpSlideScroll(1,'vslide-news')">▶</button></div></div>
624
  <div class="vslide-track" id="vslide-news">{''.join(items)}</div></div>'''
625
 
626
- def render_homepage_html(articles, h24_videos=None, dantri_videos=None):
627
  if not articles: return "<p class='bdp-empty'>Không tìm thấy tin tức.</p>"
628
  now=datetime.now(timezone(timedelta(hours=7))).strftime("%H:%M:%S %d/%m/%Y")
629
  video_carousel = render_video_carousel_html(h24_videos or [])
630
- dantri_carousel = render_dantri_carousel_html(dantri_videos or [])
631
  news_carousel = render_featured_carousel_html(articles)
632
  groups={}
633
  for a in articles: groups.setdefault(a.get("group","Khác"),[]).append(a)
634
- parts=[f'<div class="bdp-wrap">{video_carousel}{dantri_carousel}{news_carousel}<div class="bdp-topbar"><span>⏱ {now}</span><span>📰 Tin nổi bật</span></div>']
635
  for gn in ["Thời Sự","Thế Giới","Kinh Doanh","Công Nghệ","Thể Thao","Giải Trí","Bóng Đá"]:
636
  arts=groups.get(gn,[])
637
  if not arts: continue
@@ -747,7 +646,6 @@ def _list_card(art,big,tiktok=False):
747
  grp=art.get("group",""); badge=""
748
  if art.get("source")=="vne": badge=f'<span class="bdp-badge bdp-badge-vne">{grp or "VnExpress"}</span>'
749
  elif art.get("source")=="24h": badge=f'<span class="bdp-badge bdp-badge-24h">{grp or "24h"}</span>'
750
- elif art.get("source")=="dantri": badge=f'<span class="bdp-badge bdp-badge-dantri">{grp or "Dân Trí"}</span>'
751
  elif art.get("source")=="bdp": badge=f'<span class="bdp-badge bdp-badge-bdp">{grp or "BongDaPlus"}</span>'
752
  sl=slug(art["title"])
753
  share_js=f"event.stopPropagation();window.bdpShareHash('{esc(art['title'])}','{sl}','{aid}')"
@@ -765,60 +663,29 @@ def render_article_html(article):
765
  src_url=article.get("source_url","")
766
  og_img=safe_url(article.get("og_image",""))
767
  share_js=f"window.bdpShareHash('{esc(article['title'])}','{sl}','{aid}')"
768
- src_map={"vne":"VnExpress","bdp":"BongDaPlus","24h":"24h.com.vn","dantri":"Dân Trí"}
769
  src_label=src_map.get(article.get("source",""),"")
770
- source=article.get("source","")
771
 
772
- parts=[f'<div class="bdp-article"><h1 class="bdp-article-title">{article["title"]}</h1>'
773
- f'<div class="bdp-article-meta"><span>🕐 {article["time"]} · {src_label}</span>'
774
- f'<button class="bdp-share-article-btn" onclick="{share_js}">📤 Chia sẻ</button></div>']
 
775
  if article.get("summary"):
776
  parts.append(f'<div class="bdp-article-summary">{article["summary"]}</div>')
777
 
778
- # Collect all videos
779
- all_vids = [item for item in article.get("body",[]) if item["type"]=="video"]
780
-
781
- # === PLAYLIST UI for 24h / dantri videos ===
782
- if all_vids and source in ("24h","dantri"):
783
- # Playlist tabs (nếu nhiều video)
784
- if len(all_vids) > 1:
785
- tabs = []
786
- for vi in range(len(all_vids)):
787
- cap = all_vids[vi].get("caption","") or f"Phần {vi+1}"
788
- active = "playlist-tab-active" if vi == 0 else ""
789
- tabs.append(f'<button class="playlist-tab {active}" onclick="window.bdpPlaylistTab(this,{vi},\'{aid}\')">{cap}</button>')
790
- parts.append(f'<div class="playlist-bar">{"".join(tabs)}</div>')
791
-
792
- # Video players (chỉ hiện cái đầu, ẩn phần còn lại)
793
- for vi, v in enumerate(all_vids):
794
- poster = safe_url(v.get("poster",""))
795
- poster_attr = f' poster="{poster}"' if poster else ""
796
- cap = v.get("caption","")
797
- cap_html = f'<p class="bdp-figcap">{cap}</p>' if cap else ""
798
- vsrc = v["src"]; vtype = v.get("vtype","mp4")
799
- display = "" if vi == 0 else ' style="display:none"'
800
- if vtype == "hls":
801
- parts.append(f'<div class="bdp-video-wrap playlist-video" data-playlist="{aid}" data-vi="{vi}"{display}><video controls playsinline preload="metadata"{poster_attr} class="bdp-video" data-hls-src="{vsrc}"></video>{cap_html}</div>')
802
- else:
803
- parts.append(f'<div class="bdp-video-wrap playlist-video" data-playlist="{aid}" data-vi="{vi}"{display}><video controls playsinline preload="metadata"{poster_attr} class="bdp-video"><source src="{safe_url(vsrc)}" type="video/mp4"></video>{cap_html}</div>')
804
-
805
- # TikTok fullscreen button for dantri
806
- if source == "dantri" and all_vids:
807
- parts.append(f'<button class="tiktok-open-btn" onclick="window.bdpOpenTikTokInline(\'{aid}\')">📱 Xem toàn màn hình</button>')
808
-
809
- # Non-video body items
810
  for item in article.get("body",[]):
811
  if item["type"]=="video":
812
- # Already rendered above for 24h/dantri, render normally for other sources
813
- if source not in ("24h","dantri"):
814
- poster=safe_url(item.get("poster",""))
815
- poster_attr=f' poster="{poster}"' if poster else ""
816
- cap_html=f'<p class="bdp-figcap">{item.get("caption","")}</p>' if item.get("caption") else ""
817
- vsrc=item["src"]; vtype=item.get("vtype","mp4")
818
- if vtype=="hls":
819
- parts.append(f'<div class="bdp-video-wrap"><video controls playsinline preload="metadata"{poster_attr} class="bdp-video" data-hls-src="{vsrc}"></video>{cap_html}</div>')
820
- else:
821
- parts.append(f'<div class="bdp-video-wrap"><video controls playsinline preload="metadata"{poster_attr} class="bdp-video"><source src="{safe_url(vsrc)}" type="video/mp4"></video>{cap_html}</div>')
822
  elif item["type"]=="img":
823
  alt=item.get("alt",""); cap=f'<figcaption class="bdp-figcap">{alt}</figcaption>' if alt else ""
824
  parts.append(f'<figure class="bdp-figure"><img src="{safe_url(item["src"])}" alt="{alt}" class="bdp-lazy-img">{cap}</figure>')
@@ -834,11 +701,11 @@ def render_article_html(article):
834
  rid=make_id(rel["link"]); rs=slug(rel["title"])
835
  parts.append(f'<div class="bdp-related-item" onclick="window.bdpOpen(\'{esc(rel["link"])}\',\'{rid}\',\'{rs}\')"><span>▸ {rel["title"]}</span></div>')
836
  parts.append('</div>')
837
- parts.append(f'<div class="bdp-comments" id="comments-{aid}"><h3>💬 Bình luận</h3>'
838
- f'<div id="cmt-list-{aid}"></div><div class="bdp-cmt-form">'
839
- f'<input id="cmt-name-{aid}" class="bdp-cmt-input" placeholder="Tên của bạn..." maxlength="50">'
840
- f'<textarea id="cmt-text-{aid}" class="bdp-cmt-textarea" placeholder="Viết bình luận..." rows="3" maxlength="500"></textarea>'
841
- f'<button class="bdp-cmt-submit" onclick="window.bdpAddCmt(\'{aid}\')">Gửi bình luận</button></div></div>')
842
  parts.append('</div>')
843
  return '\n'.join(parts)
844
 
@@ -850,7 +717,7 @@ body,html{margin:0!important;padding:0!important;overflow-x:hidden;background:#1
850
  .gradio-container>.main>.contain{padding-top:0!important}
851
  .gap{gap:0!important}
852
  footer,.built-with{display:none!important}
853
- #article-url-input,#btn-read-article{position:absolute!important;width:1px!important;height:1px!important;padding:0!important;margin:-1px!important;overflow:hidden!important;clip:rect(0,0,0,0)!important;border:0!important}
854
  .bdp-header{background:linear-gradient(135deg,#0d1117,#1a3a2a 50%,#8b7500);padding:14px 16px;text-align:center}
855
  .bdp-header h1{color:#fff;font-size:20px;margin:0;font-weight:800;text-shadow:0 2px 6px rgba(0,0,0,.4)}
856
  .bdp-header p{color:rgba(255,255,255,.6);font-size:11px;margin:2px 0 0}
@@ -908,7 +775,6 @@ footer,.built-with{display:none!important}
908
  .bdp-badge-vne{background:#c0392b;color:#fff}
909
  .bdp-badge-bdp{background:#1a5c35;color:#fff}
910
  .bdp-badge-24h{background:#e67e22;color:#fff}
911
- .bdp-badge-dantri{background:#1e88e5;color:#fff}
912
  .bdp-article{padding:14px 12px 30px;max-width:720px;margin:0 auto;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif}
913
  @media(min-width:768px){.bdp-article{padding:20px 16px 50px}}
914
  .bdp-article-title{font-size:21px;font-weight:800;color:#f0f0f0;line-height:1.3;margin:0 0 8px}
@@ -925,11 +791,6 @@ footer,.built-with{display:none!important}
925
  .bdp-figure img{max-width:100%;height:auto;border-radius:8px}
926
  .bdp-figcap{color:#666;font-size:11.5px;margin-top:4px;font-style:italic}
927
  .bdp-video-wrap{margin:14px 0;border-radius:10px;overflow:hidden;background:#000}
928
- .playlist-bar{display:flex;gap:6px;margin:10px 0;flex-wrap:wrap}
929
- .playlist-tab{background:#2a2a2a;color:#aaa;border:1px solid #333;padding:8px 16px;border-radius:20px;cursor:pointer;font-size:13px;font-weight:600;transition:all .15s}
930
- .playlist-tab:hover{background:#333;color:#fff}
931
- .playlist-tab-active{background:#2d8659;color:#fff;border-color:#2d8659}
932
- .tiktok-open-btn{display:block;width:100%;background:linear-gradient(135deg,#fe2c55,#25f4ee);color:#fff;border:none;padding:12px;border-radius:10px;font-size:14px;font-weight:700;cursor:pointer;margin:12px 0;text-align:center}
933
  .bdp-video{width:100%;max-height:70vh;display:block;border-radius:10px}
934
  .bdp-related{margin-top:24px;padding-top:14px;border-top:1px solid #2a2a2a}
935
  .bdp-related h3{font-size:16px;color:#eee;margin:0 0 8px}
@@ -975,9 +836,6 @@ footer,.built-with{display:none!important}
975
  .tiktok-seek-btn:active{transform:scale(.9)}
976
  .vslide-badge{position:absolute;top:6px;left:6px;font-size:9px;padding:1px 6px;border-radius:3px;font-weight:700;z-index:2}
977
  .vslide-badge-24h{background:#e67e22;color:#fff}
978
- .vslide-badge-dantri{background:#1e88e5;color:#fff}
979
- .vslide-header-right{display:flex;align-items:center;gap:8px}
980
- .vslide-tiktok-btn{background:linear-gradient(135deg,#fe2c55,#25f4ee);color:#fff;border:none;padding:5px 12px;border-radius:14px;font-size:11px;font-weight:700;cursor:pointer}
981
  """
982
 
983
  # ══════════════════════════════════════════════════════════════════════════════
@@ -1000,18 +858,9 @@ window.bdpOpen=function(url,aid,sl){
1000
  window.location.hash='#/'+sl+'/'+aid;
1001
  try{localStorage.setItem('bdp_url_'+aid,url);}catch(e){}
1002
  var el=document.getElementById('article-url-input');
1003
- if(el){
1004
- var ta=el.querySelector('textarea')||el.querySelector('input');
1005
- if(ta){
1006
- var d=Object.getOwnPropertyDescriptor(ta.tagName==='TEXTAREA'?HTMLTextAreaElement.prototype:HTMLInputElement.prototype,'value');
1007
- if(d&&d.set)d.set.call(ta,url);else ta.value=url;
1008
- ta.dispatchEvent(new Event('input',{bubbles:true}));
1009
- }
1010
- }
1011
- setTimeout(function(){
1012
- var btn=document.getElementById('btn-read-article');
1013
- if(btn){var b=btn.querySelector('button');if(b)b.click();else btn.click();}
1014
- },100);
1015
  window.scrollTo({top:0,behavior:'smooth'});
1016
  };
1017
 
@@ -1033,64 +882,6 @@ window.bdpSlideScroll=function(dir,trackId){
1033
  if(track){track.scrollBy({left:dir*260,behavior:'smooth'});}
1034
  };
1035
 
1036
- /* ── Playlist tabs: switch video parts ── */
1037
- window.bdpPlaylistTab=function(btn,vi,playlistId){
1038
- /* Deactivate all tabs, activate clicked */
1039
- btn.parentElement.querySelectorAll('.playlist-tab').forEach(function(t){t.classList.remove('playlist-tab-active');});
1040
- btn.classList.add('playlist-tab-active');
1041
- /* Hide all videos, show selected */
1042
- document.querySelectorAll('.playlist-video[data-playlist="'+playlistId+'"]').forEach(function(wrap,i){
1043
- if(i===vi){wrap.style.display='';var v=wrap.querySelector('video');if(v){if(v.getAttribute('data-hls-src')&&!v._hlsInit)initHlsVideo(v);if(!v._initDone){v._initDone=true;v.load();}}}
1044
- else{wrap.style.display='none';var v2=wrap.querySelector('video');if(v2)try{v2.pause();}catch(e){}}
1045
- });
1046
- };
1047
-
1048
- /* ── Open TikTok fullscreen for article video ── */
1049
- window.bdpOpenTikTokInline=function(playlistId){
1050
- var wraps=document.querySelectorAll('.playlist-video[data-playlist="'+playlistId+'"]');
1051
- if(!wraps.length) wraps=document.querySelectorAll('.bdp-video-wrap');
1052
- if(!wraps.length) return;
1053
- var old=document.getElementById('tk-inline');if(old)old.remove();
1054
- var container=document.createElement('div');container.id='tk-inline';container.className='tiktok-fullscreen-container';container.style.cssText='position:fixed;top:0;left:0;width:100%;height:100%;z-index:10000';
1055
- var html='<div style="display:flex;justify-content:space-between;align-items:center;padding:10px 16px;color:#fff;background:rgba(0,0,0,.8);z-index:2"><span>📱 Video</span><button onclick="document.getElementById(\'tk-inline\').remove()" style="background:none;border:none;color:#fff;font-size:22px;cursor:pointer">✕</button></div>';
1056
- html+='<div class="tiktok-fullscreen-feed" style="flex:1">';
1057
- wraps.forEach(function(w,i){
1058
- var v=w.querySelector('video');if(!v)return;
1059
- var src=v.getAttribute('data-hls-src')||'';
1060
- var mp4src=v.querySelector('source');if(mp4src)src=mp4src.src;
1061
- var poster=v.getAttribute('poster')||'';
1062
- var isHls=v.getAttribute('data-hls-src')?true:false;
1063
- var vtag=isHls?'<video class="tiktok-video" playsinline preload="metadata" poster="'+poster+'" data-hls-src="'+src+'" muted loop></video>':'<video class="tiktok-video" playsinline preload="metadata" poster="'+poster+'" muted loop><source src="'+src+'" type="video/mp4"></video>';
1064
- html+='<div class="tiktok-slide" data-index="'+i+'">'+vtag+'<div class="tiktok-bottom"><p class="tiktok-title">Phần '+(i+1)+'</p></div><div class="tiktok-unmute-hint" onclick="window.bdpTikTokUnmute(this)">🔇 Nhấn để bật tiếng</div></div>';
1065
- });
1066
- html+='</div>';
1067
- container.innerHTML=html;
1068
- document.body.appendChild(container);
1069
- /* Init TikTok scroll */
1070
- setTimeout(function(){if(typeof initTikTokFullscreen==='function')initTikTokFullscreen(container);},300);
1071
- };
1072
-
1073
- window.bdpGoVideo=function(){
1074
- /* Switch to Video Tổng Hợp tab by setting dropdown value with native setter */
1075
- var container=document.querySelector('.controls-row');
1076
- if(!container) return;
1077
- var ddInput=container.querySelector('input');
1078
- if(!ddInput) return;
1079
- /* Use native setter to bypass Svelte reactivity */
1080
- var d=Object.getOwnPropertyDescriptor(HTMLInputElement.prototype,'value');
1081
- if(d&&d.set) d.set.call(ddInput,'🎬 Video Tổng Hợp');
1082
- else ddInput.value='🎬 Video Tổng Hợp';
1083
- ddInput.dispatchEvent(new Event('input',{bubbles:true}));
1084
- /* Also try opening dropdown and clicking option */
1085
- ddInput.focus();ddInput.click();
1086
- setTimeout(function(){
1087
- document.querySelectorAll('[role="option"],ul.options li').forEach(function(o){
1088
- if(o.textContent.indexOf('Video')>-1) o.click();
1089
- });
1090
- },300);
1091
- window.scrollTo({top:0,behavior:'smooth'});
1092
- };
1093
-
1094
  function gc(a){try{return JSON.parse(localStorage.getItem('bdp_cmt_'+a))||[];}catch(e){return[];}}
1095
  function sc(a,c){try{localStorage.setItem('bdp_cmt_'+a,JSON.stringify(c));}catch(e){}}
1096
  window.bdpRenderCmt=function(a){var l=document.getElementById('cmt-list-'+a);if(!l)return;var c=gc(a);if(!c.length){l.innerHTML='<div class="bdp-cmt-empty">Chưa có bình luận. Hãy là người đầu tiên!</div>';return;}var h='';for(var i=c.length-1;i>=0;i--){var x=c[i];h+='<div class="bdp-cmt-item"><span class="bdp-cmt-author">'+x.name+'</span><span class="bdp-cmt-date">'+x.date+'</span><div class="bdp-cmt-body">'+x.text.replace(/</g,'&lt;').replace(/>/g,'&gt;')+'</div></div>';}l.innerHTML=h;};
@@ -1265,14 +1056,14 @@ if(hh&&hh.startsWith('#/')){var ps=hh.slice(2).split('/');if(ps.length>=2){var a
1265
  # ══════════════════════════════════════════════════════════════════════════════
1266
  with gr.Blocks(title="Tin Tức Việt Nam",css=CSS,head=HEAD_META,js=JS_FUNC,theme=gr.themes.Base(),fill_width=True) as demo:
1267
  gr.HTML('<div class="bdp-header"><h1>📰 Tin Tức Việt Nam</h1><p>VnExpress · BongDaPlus · 24h · Thời sự · Thế giới · Kinh doanh · Công nghệ · Thể thao · Giải trí · Video</p></div>')
1268
- article_url=gr.Textbox(value="",elem_id="article-url-input")
1269
  with gr.Row(elem_classes=["controls-row"]):
1270
  cat=gr.Dropdown(choices=list(CATEGORIES.keys()),value="🏠 Trang Chủ (Nổi Bật)",label="Chuyên mục",scale=3,interactive=True)
1271
  ref_btn=gr.Button("🔄 Làm mới",variant="primary",scale=1)
1272
  back_btn=gr.Button("← Quay lại",variant="secondary",scale=1,visible=False)
1273
  news_list=gr.HTML()
1274
  article_view=gr.HTML(visible=False)
1275
- read_btn=gr.Button("Đọc",elem_id="btn-read-article")
1276
  def show_article(url):
1277
  if not url or url=="#" or len(url)<10:
1278
  return gr.update(visible=True),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),""
 
 
1
  import gradio as gr
2
  import requests
3
  import re
 
14
  }
15
  BASE_BDP = "https://bongdaplus.vn"
16
  BASE_24H = "https://www.24h.com.vn"
 
17
  REFRESH_SECONDS = 300
18
  SPACE_URL = "https://bep40-bongdaplus-news.hf.space"
19
 
 
229
  return []
230
 
231
  def scrape_24h_article(url):
232
+ """Scrape a 24h.com.vn article - extract m3u8 video URL."""
233
  try:
234
  r = requests.get(url, headers=HEADERS, timeout=15); r.encoding="utf-8"
235
  soup = BeautifulSoup(r.text, "lxml")
 
239
  og_img = og.get("content","") if og else ""
240
  desc_meta = soup.find("meta", property="og:description")
241
  summary = desc_meta.get("content","") if desc_meta else ""
242
+ # Extract m3u8 video URLs
243
+ m3u8s = re.findall(r'(https?://cdn\.24h\.com\.vn/[^\s"\'\\]+\.m3u8)', r.text)
244
+ videos = [u for u in m3u8s if '_720p' not in u]
245
+ if not videos: videos = m3u8s
246
  body = []
247
+ for vsrc in videos[:3]:
248
+ body.append({"type": "video", "src": vsrc, "poster": og_img, "vtype": "hls"})
 
 
249
  # Extract text content
250
  content_selectors = ["div.the-article-body", "div.nws-mainContent", "div.nwsCt",
251
  "div#ctl00_mainContent_ctl00_divNewsContent", "div.detail-content"]
 
297
  return results
298
  except: return []
299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  # ── VNE Scrapers ───────────────────────────────────────────────────────────────
301
  def scrape_vne_list(url):
302
  try:
 
432
  def fetch_homepage():
433
  all_articles=[]
434
  h24_videos=[]
 
435
  def _fetch(src,url,group):
436
  arts=scrape_bdp_list(url) if src=="bdp" else scrape_vne_list(url)
437
  for a in arts: a["group"]=group
 
440
  nonlocal h24_videos
441
  try: h24_videos=scrape_24h_video_list()[:15]
442
  except: pass
443
+ with ThreadPoolExecutor(max_workers=6) as ex:
 
 
 
 
444
  ex.submit(_fetch_24h)
 
445
  futures={ex.submit(_fetch,s,u,g):g for s,u,g in HOMEPAGE_SOURCES}
446
  for f in as_completed(futures):
447
  try: all_articles.extend(f.result())
448
  except: pass
449
+ return all_articles, h24_videos
450
 
451
  def fetch_news_list(category):
452
  val=CATEGORIES.get(category,list(CATEGORIES.values())[0])
453
  parts=val.split("::"); src,url_or_key,group=parts[0],parts[1],parts[2]
454
  if src=="mix" and url_or_key=="home":
455
+ articles, h24_videos = fetch_homepage()
456
+ return render_homepage_html(articles, h24_videos)
457
  if src=="mix" and url_or_key=="video":
458
  return render_video_page_html()
459
  articles=scrape_bdp_list(url_or_key) if src=="bdp" else scrape_vne_list(url_or_key)
 
464
  if not url or url=="#" or len(url)<10: return "<p>Không tìm thấy bài viết.</p>"
465
  if "vnexpress.net" in url: return render_article_html(scrape_vne_article(url))
466
  if "24h.com.vn" in url: return render_article_html(scrape_24h_article(url))
 
467
  return render_article_html(scrape_bdp_article(url))
468
 
469
  # ══════════════════════════════════════════════════════════════════════════════
 
471
  # ══════════════════════════════════════════════════════════════════════════════
472
 
473
  def render_video_carousel_html(videos):
474
+ """Carousel video 24h highlights, dùng dữ liệu list page (không fetch từng bài)."""
475
  vids_with_img = [v for v in videos if v.get("img")]
476
  if not vids_with_img: return ""
477
  items = []
 
487
  <p class="vslide-title">{title}</p></div>''')
488
  return f'''<div class="vslide-wrap">
489
  <div class="vslide-header"><span class="vslide-label">🎬 Video Highlight</span>
 
 
490
  <div class="vslide-nav"><button class="vslide-btn" onclick="window.bdpSlideScroll(-1,'vslide-video')">◀</button>
491
+ <button class="vslide-btn" onclick="window.bdpSlideScroll(1,'vslide-video')">▶</button></div></div>
492
  <div class="vslide-track" id="vslide-video">{''.join(items)}</div></div>'''
493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  def render_featured_carousel_html(articles):
495
  """Carousel tin nổi bật + mới nhất, lấy từ articles đã fetch sẵn (không fetch thêm)."""
496
  # Lấy bài featured có ảnh, ưu tiên bài đầu mỗi nhóm
 
523
  <button class="vslide-btn" onclick="window.bdpSlideScroll(1,'vslide-news')">▶</button></div></div>
524
  <div class="vslide-track" id="vslide-news">{''.join(items)}</div></div>'''
525
 
526
+ def render_homepage_html(articles, h24_videos=None):
527
  if not articles: return "<p class='bdp-empty'>Không tìm thấy tin tức.</p>"
528
  now=datetime.now(timezone(timedelta(hours=7))).strftime("%H:%M:%S %d/%m/%Y")
529
  video_carousel = render_video_carousel_html(h24_videos or [])
 
530
  news_carousel = render_featured_carousel_html(articles)
531
  groups={}
532
  for a in articles: groups.setdefault(a.get("group","Khác"),[]).append(a)
533
+ parts=[f'<div class="bdp-wrap">{video_carousel}{news_carousel}<div class="bdp-topbar"><span>⏱ {now}</span><span>📰 Tin nổi bật</span></div>']
534
  for gn in ["Thời Sự","Thế Giới","Kinh Doanh","Công Nghệ","Thể Thao","Giải Trí","Bóng Đá"]:
535
  arts=groups.get(gn,[])
536
  if not arts: continue
 
646
  grp=art.get("group",""); badge=""
647
  if art.get("source")=="vne": badge=f'<span class="bdp-badge bdp-badge-vne">{grp or "VnExpress"}</span>'
648
  elif art.get("source")=="24h": badge=f'<span class="bdp-badge bdp-badge-24h">{grp or "24h"}</span>'
 
649
  elif art.get("source")=="bdp": badge=f'<span class="bdp-badge bdp-badge-bdp">{grp or "BongDaPlus"}</span>'
650
  sl=slug(art["title"])
651
  share_js=f"event.stopPropagation();window.bdpShareHash('{esc(art['title'])}','{sl}','{aid}')"
 
663
  src_url=article.get("source_url","")
664
  og_img=safe_url(article.get("og_image",""))
665
  share_js=f"window.bdpShareHash('{esc(article['title'])}','{sl}','{aid}')"
666
+ src_map={"vne":"VnExpress","bdp":"BongDaPlus","24h":"24h.com.vn"}
667
  src_label=src_map.get(article.get("source",""),"")
668
+ seo=f'<div style="display:none" itemscope itemtype="https://schema.org/NewsArticle"><meta itemprop="headline" content="{esc(article["title"])}"><meta itemprop="image" content="{og_img}"><meta itemprop="description" content="{esc(article.get("summary","")[:160])}"></div>'
669
 
670
+ parts=[f"""{seo}<div class="bdp-article">
671
+ <h1 class="bdp-article-title">{article['title']}</h1>
672
+ <div class="bdp-article-meta"><span>🕐 {article['time']} · {src_label}</span>
673
+ <button class="bdp-share-article-btn" onclick="{share_js}">📤 Chia sẻ</button></div>"""]
674
  if article.get("summary"):
675
  parts.append(f'<div class="bdp-article-summary">{article["summary"]}</div>')
676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
  for item in article.get("body",[]):
678
  if item["type"]=="video":
679
+ poster=safe_url(item.get("poster",""))
680
+ poster_attr=f' poster="{poster}"' if poster else ""
681
+ caption=item.get("caption","")
682
+ cap_html=f'<p class="bdp-figcap">{caption}</p>' if caption else ""
683
+ vtype=item.get("vtype","mp4")
684
+ vsrc=item["src"]
685
+ if vtype=="hls":
686
+ parts.append(f'<div class="bdp-video-wrap"><video controls playsinline preload="metadata"{poster_attr} class="bdp-video" data-hls-src="{vsrc}"></video>{cap_html}</div>')
687
+ else:
688
+ parts.append(f'<div class="bdp-video-wrap"><video controls playsinline preload="metadata"{poster_attr} class="bdp-video"><source src="{safe_url(vsrc)}" type="video/mp4"></video>{cap_html}</div>')
689
  elif item["type"]=="img":
690
  alt=item.get("alt",""); cap=f'<figcaption class="bdp-figcap">{alt}</figcaption>' if alt else ""
691
  parts.append(f'<figure class="bdp-figure"><img src="{safe_url(item["src"])}" alt="{alt}" class="bdp-lazy-img">{cap}</figure>')
 
701
  rid=make_id(rel["link"]); rs=slug(rel["title"])
702
  parts.append(f'<div class="bdp-related-item" onclick="window.bdpOpen(\'{esc(rel["link"])}\',\'{rid}\',\'{rs}\')"><span>▸ {rel["title"]}</span></div>')
703
  parts.append('</div>')
704
+ parts.append(f"""<div class="bdp-comments" id="comments-{aid}"><h3>💬 Bình luận</h3>
705
+ <div id="cmt-list-{aid}"></div><div class="bdp-cmt-form">
706
+ <input id="cmt-name-{aid}" class="bdp-cmt-input" placeholder="Tên của bạn..." maxlength="50">
707
+ <textarea id="cmt-text-{aid}" class="bdp-cmt-textarea" placeholder="Viết bình luận..." rows="3" maxlength="500"></textarea>
708
+ <button class="bdp-cmt-submit" onclick="window.bdpAddCmt('{aid}')">Gửi bình luận</button></div></div>""")
709
  parts.append('</div>')
710
  return '\n'.join(parts)
711
 
 
717
  .gradio-container>.main>.contain{padding-top:0!important}
718
  .gap{gap:0!important}
719
  footer,.built-with{display:none!important}
720
+ #article-url-input,#btn-read-article{display:none!important;height:0!important;overflow:hidden!important}
721
  .bdp-header{background:linear-gradient(135deg,#0d1117,#1a3a2a 50%,#8b7500);padding:14px 16px;text-align:center}
722
  .bdp-header h1{color:#fff;font-size:20px;margin:0;font-weight:800;text-shadow:0 2px 6px rgba(0,0,0,.4)}
723
  .bdp-header p{color:rgba(255,255,255,.6);font-size:11px;margin:2px 0 0}
 
775
  .bdp-badge-vne{background:#c0392b;color:#fff}
776
  .bdp-badge-bdp{background:#1a5c35;color:#fff}
777
  .bdp-badge-24h{background:#e67e22;color:#fff}
 
778
  .bdp-article{padding:14px 12px 30px;max-width:720px;margin:0 auto;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif}
779
  @media(min-width:768px){.bdp-article{padding:20px 16px 50px}}
780
  .bdp-article-title{font-size:21px;font-weight:800;color:#f0f0f0;line-height:1.3;margin:0 0 8px}
 
791
  .bdp-figure img{max-width:100%;height:auto;border-radius:8px}
792
  .bdp-figcap{color:#666;font-size:11.5px;margin-top:4px;font-style:italic}
793
  .bdp-video-wrap{margin:14px 0;border-radius:10px;overflow:hidden;background:#000}
 
 
 
 
 
794
  .bdp-video{width:100%;max-height:70vh;display:block;border-radius:10px}
795
  .bdp-related{margin-top:24px;padding-top:14px;border-top:1px solid #2a2a2a}
796
  .bdp-related h3{font-size:16px;color:#eee;margin:0 0 8px}
 
836
  .tiktok-seek-btn:active{transform:scale(.9)}
837
  .vslide-badge{position:absolute;top:6px;left:6px;font-size:9px;padding:1px 6px;border-radius:3px;font-weight:700;z-index:2}
838
  .vslide-badge-24h{background:#e67e22;color:#fff}
 
 
 
839
  """
840
 
841
  # ══════════════════════════════════════════════════════════════════════════════
 
858
  window.location.hash='#/'+sl+'/'+aid;
859
  try{localStorage.setItem('bdp_url_'+aid,url);}catch(e){}
860
  var el=document.getElementById('article-url-input');
861
+ if(el){var ta=el.querySelector('textarea');if(ta){ta.value=url;ta.dispatchEvent(new Event('input',{bubbles:true}));}}
862
+ var btn=document.getElementById('btn-read-article');
863
+ if(btn){var b=btn.querySelector('button');if(b)b.click();else btn.click();}
 
 
 
 
 
 
 
 
 
864
  window.scrollTo({top:0,behavior:'smooth'});
865
  };
866
 
 
882
  if(track){track.scrollBy({left:dir*260,behavior:'smooth'});}
883
  };
884
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
  function gc(a){try{return JSON.parse(localStorage.getItem('bdp_cmt_'+a))||[];}catch(e){return[];}}
886
  function sc(a,c){try{localStorage.setItem('bdp_cmt_'+a,JSON.stringify(c));}catch(e){}}
887
  window.bdpRenderCmt=function(a){var l=document.getElementById('cmt-list-'+a);if(!l)return;var c=gc(a);if(!c.length){l.innerHTML='<div class="bdp-cmt-empty">Chưa có bình luận. Hãy là người đầu tiên!</div>';return;}var h='';for(var i=c.length-1;i>=0;i--){var x=c[i];h+='<div class="bdp-cmt-item"><span class="bdp-cmt-author">'+x.name+'</span><span class="bdp-cmt-date">'+x.date+'</span><div class="bdp-cmt-body">'+x.text.replace(/</g,'&lt;').replace(/>/g,'&gt;')+'</div></div>';}l.innerHTML=h;};
 
1056
  # ══════════════════════════════════════════════════════════════════════════════
1057
  with gr.Blocks(title="Tin Tức Việt Nam",css=CSS,head=HEAD_META,js=JS_FUNC,theme=gr.themes.Base(),fill_width=True) as demo:
1058
  gr.HTML('<div class="bdp-header"><h1>📰 Tin Tức Việt Nam</h1><p>VnExpress · BongDaPlus · 24h · Thời sự · Thế giới · Kinh doanh · Công nghệ · Thể thao · Giải trí · Video</p></div>')
1059
+ article_url=gr.Textbox(value="",visible=False,elem_id="article-url-input")
1060
  with gr.Row(elem_classes=["controls-row"]):
1061
  cat=gr.Dropdown(choices=list(CATEGORIES.keys()),value="🏠 Trang Chủ (Nổi Bật)",label="Chuyên mục",scale=3,interactive=True)
1062
  ref_btn=gr.Button("🔄 Làm mới",variant="primary",scale=1)
1063
  back_btn=gr.Button("← Quay lại",variant="secondary",scale=1,visible=False)
1064
  news_list=gr.HTML()
1065
  article_view=gr.HTML(visible=False)
1066
+ read_btn=gr.Button("��c",visible=False,elem_id="btn-read-article")
1067
  def show_article(url):
1068
  if not url or url=="#" or len(url)<10:
1069
  return gr.update(visible=True),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),""