bep40 commited on
Commit
f02fb9d
·
verified ·
1 Parent(s): 7897c57

More AI articles (30), OG fallback for all, replace Công Nghệ with Ứng dụng AI on homepage

Browse files
Files changed (1) hide show
  1. main.py +8 -8
main.py CHANGED
@@ -323,7 +323,7 @@ def scrape_dantri_congnghe():
323
  return arts
324
  except:return[]
325
  def scrape_genk_ai():
326
- """Scrape AI articles from genk.vn with OG image fallback"""
327
  try:
328
  r=requests.get("https://genk.vn/ai.chn",headers=HEADERS,timeout=15)
329
  if r.status_code!=200:return[]
@@ -346,8 +346,8 @@ def scrape_genk_ai():
346
  if img_src:break
347
  container=container.parent
348
  seen.add(href);articles.append({"title":title,"link":href,"img":img_src,"source":"genk"})
349
- if len(articles)>=15:break
350
- # OG image fallback for articles missing thumbnails
351
  def _fetch_og(art):
352
  if art["img"]:return
353
  try:
@@ -355,9 +355,9 @@ def scrape_genk_ai():
355
  s2=BeautifulSoup(r2.text,"lxml").find("meta",property="og:image")
356
  if s2:art["img"]=s2.get("content","")
357
  except:pass
358
- need=[a for a in articles if not a["img"]][:8]
359
  if need:
360
- with ThreadPoolExecutor(4) as ex:list(ex.map(_fetch_og,need))
361
  return articles
362
  except:return[]
363
  def scrape_dantri_article(url):
@@ -425,7 +425,7 @@ def api_homepage():
425
  with ThreadPoolExecutor(12) as ex:
426
  futs={ex.submit(scrape_vne,VNE_CATS[k][0]):VNE_CATS[k][1] for k in["thoi-su","the-gioi","kinh-doanh","the-thao","giai-tri","phap-luat","giao-duc","du-lich","doi-song"]}
427
  futs[ex.submit(scrape_bbc_vietnamese)]="BBC"
428
- futs[ex.submit(scrape_dantri_congnghe)]="Công Nghệ"
429
  for f in as_completed(futs):
430
  try:
431
  for a in f.result():a["group"]=futs[f];articles.append(a)
@@ -436,13 +436,13 @@ def api_homepage():
436
  def api_category(cat_id:str):
437
  def _f():
438
  if cat_id=="bbc":return scrape_bbc_vietnamese()
439
- if cat_id=="cong-nghe":return scrape_dantri_congnghe()
440
  if cat_id in VNE_CATS:arts=scrape_vne(VNE_CATS[cat_id][0]);[a.update({"group":VNE_CATS[cat_id][1]}) for a in arts];return arts
441
  return[]
442
  return JSONResponse(_cached(f"cat_{cat_id}",_f))
443
  @app.get("/api/categories")
444
  def api_categories():
445
- cats=[{"id":"bbc","name":"BBC Tiếng Việt","source":"bbc"},{"id":"cong-nghe","name":"Công Nghệ","source":"dantri"}]
446
  for k,(u,n) in VNE_CATS.items():cats.append({"id":k,"name":n,"source":"vne"})
447
  return JSONResponse(cats)
448
  @app.get("/api/dantri_hot")
 
323
  return arts
324
  except:return[]
325
  def scrape_genk_ai():
326
+ """Scrape AI articles from genk.vn with OG image fallback - up to 30 articles"""
327
  try:
328
  r=requests.get("https://genk.vn/ai.chn",headers=HEADERS,timeout=15)
329
  if r.status_code!=200:return[]
 
346
  if img_src:break
347
  container=container.parent
348
  seen.add(href);articles.append({"title":title,"link":href,"img":img_src,"source":"genk"})
349
+ if len(articles)>=30:break
350
+ # OG image fallback for ALL articles missing thumbnails
351
  def _fetch_og(art):
352
  if art["img"]:return
353
  try:
 
355
  s2=BeautifulSoup(r2.text,"lxml").find("meta",property="og:image")
356
  if s2:art["img"]=s2.get("content","")
357
  except:pass
358
+ need=[a for a in articles if not a["img"]]
359
  if need:
360
+ with ThreadPoolExecutor(6) as ex:list(ex.map(_fetch_og,need))
361
  return articles
362
  except:return[]
363
  def scrape_dantri_article(url):
 
425
  with ThreadPoolExecutor(12) as ex:
426
  futs={ex.submit(scrape_vne,VNE_CATS[k][0]):VNE_CATS[k][1] for k in["thoi-su","the-gioi","kinh-doanh","the-thao","giai-tri","phap-luat","giao-duc","du-lich","doi-song"]}
427
  futs[ex.submit(scrape_bbc_vietnamese)]="BBC"
428
+ futs[ex.submit(scrape_genk_ai)]="Ứng dụng AI"
429
  for f in as_completed(futs):
430
  try:
431
  for a in f.result():a["group"]=futs[f];articles.append(a)
 
436
  def api_category(cat_id:str):
437
  def _f():
438
  if cat_id=="bbc":return scrape_bbc_vietnamese()
439
+ if cat_id=="ai":return scrape_genk_ai()
440
  if cat_id in VNE_CATS:arts=scrape_vne(VNE_CATS[cat_id][0]);[a.update({"group":VNE_CATS[cat_id][1]}) for a in arts];return arts
441
  return[]
442
  return JSONResponse(_cached(f"cat_{cat_id}",_f))
443
  @app.get("/api/categories")
444
  def api_categories():
445
+ cats=[{"id":"bbc","name":"BBC Tiếng Việt","source":"bbc"},{"id":"ai","name":"Ứng dụng AI","source":"genk"}]
446
  for k,(u,n) in VNE_CATS.items():cats.append({"id":k,"name":n,"source":"vne"})
447
  return JSONResponse(cats)
448
  @app.get("/api/dantri_hot")