ZSdbvASBDV / tvb.py
chaowenguoback's picture
Create tvb.py
d309fbf verified
import asyncio, aiohttp.web, huggingface_hub, zhconv, bs4, os, itertools, uvloop, pathlib, math, re, urllib.parse, posixpath
async def main():
app = aiohttp.web.Application()
app.add_routes([aiohttp.web.static('/', pathlib.Path(__file__).resolve().parent, show_index=True)])
runner = aiohttp.web.AppRunner(app)
await runner.setup()
site = aiohttp.web.TCPSite(runner, port=7860)
await site.start()
async with aiohttp.ClientSession() as client:
async with client.get('https://tvbanywherena.com/cantonese/category/USA_AllDramas') as dramas:
for _ in itertools.islice(bs4.BeautifulSoup(await dramas.text(), 'lxml').find_all('a', attrs={'href':re.compile('^/cantonese/series')}), 1, None):
async with client.get(urllib.parse.urljoin('https://tvbanywherena.com', _.get('href'))) as program:
for episode in bs4.BeautifulSoup(await program.text(), 'lxml').find('div', attrs={'class':'episodeDiv'}).find_all('a'):
async with client.get(f'https://edge.api.brightcove.com/playback/v1/accounts/5324042807001/videos/{episode.get("href").split("/")[-1]}', headers={'accept':'application/json;pk=BCpkADawqM105amwEKXAkX7W_l4jcpUMMPNr331wjQzRwTMHyoZ_qxPNx8KG3SCWEylM62XxHZXjuFl2EzrVsCKAAOlBuMFX4KAu3BW3NCqhEobE5Vcxknb6TV_anuQZUp8wfI3zcyatmzYor7rx9opPSQ_71RkQmktElORv1l98AqgNbeYQlwWt6GoAMidUC3cR65WrWYBctr5lz6U_u-TGGWdO_JUIuHiMfxs2oygZNHWVUhl0R5qWlZaM32dkny102bhHDr8wzR24z1XH9yDlL93O58cBxi23o97WDluICmIr5Tn4fZ-qLrg8bRkpkhh5qCyjYcaiM5WQ332wyortFVEn7vN27r7imEMPVVbjlFSugd2XuRpPbvtezQfWmVd80BRpcvUDPLSdfDM4VhcpgGu-BXbXOSAk1vmlgMNfGGi19TJbZQiHyJY', 'origin':'https://www.tvbanywherena.com'}) as _:
json = await _.json()
customFields = json.get('custom_fields')
programName = zhconv.convert(customFields.get('program_name') or customFields.get('beacon_episode_seriename').split(' ')[0], 'zh-cn').replace(' ', '')
name = posixpath.join('cantonese', programName, customFields.get('beacon_episode_number').zfill(2) + '.mp4')
if not huggingface_hub.file_exists(filename=name, repo_id='chaowenguoback/video', repo_type='dataset', token=os.getenv('huggingface')):
ffmpeg = await asyncio.create_subprocess_exec('ffmpeg', '-y', '-protocol_whitelist', 'http,tcp', '-i', json.get('sources')[0].get('src'), '-c', 'copy', '-bsf:a', 'aac_adtstoasc', '-movflags', 'frag_keyframe+empty_moov', '-f', 'mp4', 'pipe:1', stdout=asyncio.subprocess.PIPE)
stdout, _ = await ffmpeg.communicate()
future = huggingface_hub.upload_file(path_or_fileobj=stdout, path_in_repo=name, repo_id='chaowenguoback/video', repo_type='dataset', run_as_future=True, token=os.getenv('huggingface'))
await asyncio.sleep(math.inf)
uvloop.run(main())