Spaces:
Paused
Paused
| import asyncio, aiohttp.web, huggingface_hub, zhconv, bs4, os, itertools, uvloop, pathlib, math, re, urllib.parse, posixpath | |
| async def main(): | |
| app = aiohttp.web.Application() | |
| app.add_routes([aiohttp.web.static('/', pathlib.Path(__file__).resolve().parent, show_index=True)]) | |
| runner = aiohttp.web.AppRunner(app) | |
| await runner.setup() | |
| site = aiohttp.web.TCPSite(runner, port=7860) | |
| await site.start() | |
| async with aiohttp.ClientSession() as client: | |
| async with client.get('https://tvbanywherena.com/cantonese/category/USA_AllDramas') as dramas: | |
| for _ in itertools.islice(bs4.BeautifulSoup(await dramas.text(), 'lxml').find_all('a', attrs={'href':re.compile('^/cantonese/series')}), 1, None): | |
| async with client.get(urllib.parse.urljoin('https://tvbanywherena.com', _.get('href'))) as program: | |
| for episode in bs4.BeautifulSoup(await program.text(), 'lxml').find('div', attrs={'class':'episodeDiv'}).find_all('a'): | |
| async with client.get(f'https://edge.api.brightcove.com/playback/v1/accounts/5324042807001/videos/{episode.get("href").split("/")[-1]}', headers={'accept':'application/json;pk=BCpkADawqM105amwEKXAkX7W_l4jcpUMMPNr331wjQzRwTMHyoZ_qxPNx8KG3SCWEylM62XxHZXjuFl2EzrVsCKAAOlBuMFX4KAu3BW3NCqhEobE5Vcxknb6TV_anuQZUp8wfI3zcyatmzYor7rx9opPSQ_71RkQmktElORv1l98AqgNbeYQlwWt6GoAMidUC3cR65WrWYBctr5lz6U_u-TGGWdO_JUIuHiMfxs2oygZNHWVUhl0R5qWlZaM32dkny102bhHDr8wzR24z1XH9yDlL93O58cBxi23o97WDluICmIr5Tn4fZ-qLrg8bRkpkhh5qCyjYcaiM5WQ332wyortFVEn7vN27r7imEMPVVbjlFSugd2XuRpPbvtezQfWmVd80BRpcvUDPLSdfDM4VhcpgGu-BXbXOSAk1vmlgMNfGGi19TJbZQiHyJY', 'origin':'https://www.tvbanywherena.com'}) as _: | |
| json = await _.json() | |
| customFields = json.get('custom_fields') | |
| programName = zhconv.convert(customFields.get('program_name') or customFields.get('beacon_episode_seriename').split(' ')[0], 'zh-cn').replace(' ', '') | |
| name = posixpath.join('cantonese', programName, customFields.get('beacon_episode_number').zfill(2) + '.mp4') | |
| if not huggingface_hub.file_exists(filename=name, repo_id='chaowenguoback/video', repo_type='dataset', token=os.getenv('huggingface')): | |
| ffmpeg = await asyncio.create_subprocess_exec('ffmpeg', '-y', '-protocol_whitelist', 'http,tcp', '-i', json.get('sources')[0].get('src'), '-c', 'copy', '-bsf:a', 'aac_adtstoasc', '-movflags', 'frag_keyframe+empty_moov', '-f', 'mp4', 'pipe:1', stdout=asyncio.subprocess.PIPE) | |
| stdout, _ = await ffmpeg.communicate() | |
| future = huggingface_hub.upload_file(path_or_fileobj=stdout, path_in_repo=name, repo_id='chaowenguoback/video', repo_type='dataset', run_as_future=True, token=os.getenv('huggingface')) | |
| await asyncio.sleep(math.inf) | |
| uvloop.run(main()) |