chaowenguoback commited on
Commit
d309fbf
·
verified ·
1 Parent(s): 528a1ba

Create tvb.py

Browse files
Files changed (1) hide show
  1. tvb.py +26 -0
tvb.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio, aiohttp.web, huggingface_hub, zhconv, bs4, os, itertools, uvloop, pathlib, math, re, urllib.parse, posixpath
2
+
3
+ async def main():
4
+ app = aiohttp.web.Application()
5
+ app.add_routes([aiohttp.web.static('/', pathlib.Path(__file__).resolve().parent, show_index=True)])
6
+ runner = aiohttp.web.AppRunner(app)
7
+ await runner.setup()
8
+ site = aiohttp.web.TCPSite(runner, port=7860)
9
+ await site.start()
10
+ async with aiohttp.ClientSession() as client:
11
+ async with client.get('https://tvbanywherena.com/cantonese/category/USA_AllDramas') as dramas:
12
+ for _ in itertools.islice(bs4.BeautifulSoup(await dramas.text(), 'lxml').find_all('a', attrs={'href':re.compile('^/cantonese/series')}), 1, None):
13
+ async with client.get(urllib.parse.urljoin('https://tvbanywherena.com', _.get('href'))) as program:
14
+ for episode in bs4.BeautifulSoup(await program.text(), 'lxml').find('div', attrs={'class':'episodeDiv'}).find_all('a'):
15
+ async with client.get(f'https://edge.api.brightcove.com/playback/v1/accounts/5324042807001/videos/{episode.get("href").split("/")[-1]}', headers={'accept':'application/json;pk=BCpkADawqM105amwEKXAkX7W_l4jcpUMMPNr331wjQzRwTMHyoZ_qxPNx8KG3SCWEylM62XxHZXjuFl2EzrVsCKAAOlBuMFX4KAu3BW3NCqhEobE5Vcxknb6TV_anuQZUp8wfI3zcyatmzYor7rx9opPSQ_71RkQmktElORv1l98AqgNbeYQlwWt6GoAMidUC3cR65WrWYBctr5lz6U_u-TGGWdO_JUIuHiMfxs2oygZNHWVUhl0R5qWlZaM32dkny102bhHDr8wzR24z1XH9yDlL93O58cBxi23o97WDluICmIr5Tn4fZ-qLrg8bRkpkhh5qCyjYcaiM5WQ332wyortFVEn7vN27r7imEMPVVbjlFSugd2XuRpPbvtezQfWmVd80BRpcvUDPLSdfDM4VhcpgGu-BXbXOSAk1vmlgMNfGGi19TJbZQiHyJY', 'origin':'https://www.tvbanywherena.com'}) as _:
16
+ json = await _.json()
17
+ customFields = json.get('custom_fields')
18
+ programName = zhconv.convert(customFields.get('program_name') or customFields.get('beacon_episode_seriename').split(' ')[0], 'zh-cn').replace(' ', '')
19
+ name = posixpath.join('cantonese', programName, customFields.get('beacon_episode_number').zfill(2) + '.mp4')
20
+ if not huggingface_hub.file_exists(filename=name, repo_id='chaowenguoback/video', repo_type='dataset', token=os.getenv('huggingface')):
21
+ ffmpeg = await asyncio.create_subprocess_exec('ffmpeg', '-y', '-protocol_whitelist', 'http,tcp', '-i', json.get('sources')[0].get('src'), '-c', 'copy', '-bsf:a', 'aac_adtstoasc', '-movflags', 'frag_keyframe+empty_moov', '-f', 'mp4', 'pipe:1', stdout=asyncio.subprocess.PIPE)
22
+ stdout, _ = await ffmpeg.communicate()
23
+ future = huggingface_hub.upload_file(path_or_fileobj=stdout, path_in_repo=name, repo_id='chaowenguoback/video', repo_type='dataset', run_as_future=True, token=os.getenv('huggingface'))
24
+ await asyncio.sleep(math.inf)
25
+
26
+ uvloop.run(main())