More AI articles (30), OG fallback for all, replace Công Nghệ with Ứng dụng AI on homepage
Browse files
main.py
CHANGED
|
@@ -323,7 +323,7 @@ def scrape_dantri_congnghe():
|
|
| 323 |
return arts
|
| 324 |
except:return[]
|
| 325 |
def scrape_genk_ai():
|
| 326 |
-
"""Scrape AI articles from genk.vn with OG image fallback"""
|
| 327 |
try:
|
| 328 |
r=requests.get("https://genk.vn/ai.chn",headers=HEADERS,timeout=15)
|
| 329 |
if r.status_code!=200:return[]
|
|
@@ -346,8 +346,8 @@ def scrape_genk_ai():
|
|
| 346 |
if img_src:break
|
| 347 |
container=container.parent
|
| 348 |
seen.add(href);articles.append({"title":title,"link":href,"img":img_src,"source":"genk"})
|
| 349 |
-
if len(articles)>=
|
| 350 |
-
# OG image fallback for articles missing thumbnails
|
| 351 |
def _fetch_og(art):
|
| 352 |
if art["img"]:return
|
| 353 |
try:
|
|
@@ -355,9 +355,9 @@ def scrape_genk_ai():
|
|
| 355 |
s2=BeautifulSoup(r2.text,"lxml").find("meta",property="og:image")
|
| 356 |
if s2:art["img"]=s2.get("content","")
|
| 357 |
except:pass
|
| 358 |
-
need=[a for a in articles if not a["img"]]
|
| 359 |
if need:
|
| 360 |
-
with ThreadPoolExecutor(
|
| 361 |
return articles
|
| 362 |
except:return[]
|
| 363 |
def scrape_dantri_article(url):
|
|
@@ -425,7 +425,7 @@ def api_homepage():
|
|
| 425 |
with ThreadPoolExecutor(12) as ex:
|
| 426 |
futs={ex.submit(scrape_vne,VNE_CATS[k][0]):VNE_CATS[k][1] for k in["thoi-su","the-gioi","kinh-doanh","the-thao","giai-tri","phap-luat","giao-duc","du-lich","doi-song"]}
|
| 427 |
futs[ex.submit(scrape_bbc_vietnamese)]="BBC"
|
| 428 |
-
futs[ex.submit(
|
| 429 |
for f in as_completed(futs):
|
| 430 |
try:
|
| 431 |
for a in f.result():a["group"]=futs[f];articles.append(a)
|
|
@@ -436,13 +436,13 @@ def api_homepage():
|
|
| 436 |
def api_category(cat_id:str):
|
| 437 |
def _f():
|
| 438 |
if cat_id=="bbc":return scrape_bbc_vietnamese()
|
| 439 |
-
if cat_id=="
|
| 440 |
if cat_id in VNE_CATS:arts=scrape_vne(VNE_CATS[cat_id][0]);[a.update({"group":VNE_CATS[cat_id][1]}) for a in arts];return arts
|
| 441 |
return[]
|
| 442 |
return JSONResponse(_cached(f"cat_{cat_id}",_f))
|
| 443 |
@app.get("/api/categories")
|
| 444 |
def api_categories():
|
| 445 |
-
cats=[{"id":"bbc","name":"BBC Tiếng Việt","source":"bbc"},{"id":"
|
| 446 |
for k,(u,n) in VNE_CATS.items():cats.append({"id":k,"name":n,"source":"vne"})
|
| 447 |
return JSONResponse(cats)
|
| 448 |
@app.get("/api/dantri_hot")
|
|
|
|
| 323 |
return arts
|
| 324 |
except:return[]
|
| 325 |
def scrape_genk_ai():
|
| 326 |
+
"""Scrape AI articles from genk.vn with OG image fallback - up to 30 articles"""
|
| 327 |
try:
|
| 328 |
r=requests.get("https://genk.vn/ai.chn",headers=HEADERS,timeout=15)
|
| 329 |
if r.status_code!=200:return[]
|
|
|
|
| 346 |
if img_src:break
|
| 347 |
container=container.parent
|
| 348 |
seen.add(href);articles.append({"title":title,"link":href,"img":img_src,"source":"genk"})
|
| 349 |
+
if len(articles)>=30:break
|
| 350 |
+
# OG image fallback for ALL articles missing thumbnails
|
| 351 |
def _fetch_og(art):
|
| 352 |
if art["img"]:return
|
| 353 |
try:
|
|
|
|
| 355 |
s2=BeautifulSoup(r2.text,"lxml").find("meta",property="og:image")
|
| 356 |
if s2:art["img"]=s2.get("content","")
|
| 357 |
except:pass
|
| 358 |
+
need=[a for a in articles if not a["img"]]
|
| 359 |
if need:
|
| 360 |
+
with ThreadPoolExecutor(6) as ex:list(ex.map(_fetch_og,need))
|
| 361 |
return articles
|
| 362 |
except:return[]
|
| 363 |
def scrape_dantri_article(url):
|
|
|
|
| 425 |
with ThreadPoolExecutor(12) as ex:
|
| 426 |
futs={ex.submit(scrape_vne,VNE_CATS[k][0]):VNE_CATS[k][1] for k in["thoi-su","the-gioi","kinh-doanh","the-thao","giai-tri","phap-luat","giao-duc","du-lich","doi-song"]}
|
| 427 |
futs[ex.submit(scrape_bbc_vietnamese)]="BBC"
|
| 428 |
+
futs[ex.submit(scrape_genk_ai)]="Ứng dụng AI"
|
| 429 |
for f in as_completed(futs):
|
| 430 |
try:
|
| 431 |
for a in f.result():a["group"]=futs[f];articles.append(a)
|
|
|
|
| 436 |
def api_category(cat_id:str):
|
| 437 |
def _f():
|
| 438 |
if cat_id=="bbc":return scrape_bbc_vietnamese()
|
| 439 |
+
if cat_id=="ai":return scrape_genk_ai()
|
| 440 |
if cat_id in VNE_CATS:arts=scrape_vne(VNE_CATS[cat_id][0]);[a.update({"group":VNE_CATS[cat_id][1]}) for a in arts];return arts
|
| 441 |
return[]
|
| 442 |
return JSONResponse(_cached(f"cat_{cat_id}",_f))
|
| 443 |
@app.get("/api/categories")
|
| 444 |
def api_categories():
|
| 445 |
+
cats=[{"id":"bbc","name":"BBC Tiếng Việt","source":"bbc"},{"id":"ai","name":"Ứng dụng AI","source":"genk"}]
|
| 446 |
for k,(u,n) in VNE_CATS.items():cats.append({"id":k,"name":n,"source":"vne"})
|
| 447 |
return JSONResponse(cats)
|
| 448 |
@app.get("/api/dantri_hot")
|