Spaces:
Paused
Paused
Create tvb.py
Browse files
tvb.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio, aiohttp.web, huggingface_hub, zhconv, bs4, os, itertools, uvloop, pathlib, math, re, urllib.parse, posixpath
|
| 2 |
+
|
| 3 |
+
async def main():
|
| 4 |
+
app = aiohttp.web.Application()
|
| 5 |
+
app.add_routes([aiohttp.web.static('/', pathlib.Path(__file__).resolve().parent, show_index=True)])
|
| 6 |
+
runner = aiohttp.web.AppRunner(app)
|
| 7 |
+
await runner.setup()
|
| 8 |
+
site = aiohttp.web.TCPSite(runner, port=7860)
|
| 9 |
+
await site.start()
|
| 10 |
+
async with aiohttp.ClientSession() as client:
|
| 11 |
+
async with client.get('https://tvbanywherena.com/cantonese/category/USA_AllDramas') as dramas:
|
| 12 |
+
for _ in itertools.islice(bs4.BeautifulSoup(await dramas.text(), 'lxml').find_all('a', attrs={'href':re.compile('^/cantonese/series')}), 1, None):
|
| 13 |
+
async with client.get(urllib.parse.urljoin('https://tvbanywherena.com', _.get('href'))) as program:
|
| 14 |
+
for episode in bs4.BeautifulSoup(await program.text(), 'lxml').find('div', attrs={'class':'episodeDiv'}).find_all('a'):
|
| 15 |
+
async with client.get(f'https://edge.api.brightcove.com/playback/v1/accounts/5324042807001/videos/{episode.get("href").split("/")[-1]}', headers={'accept':'application/json;pk=BCpkADawqM105amwEKXAkX7W_l4jcpUMMPNr331wjQzRwTMHyoZ_qxPNx8KG3SCWEylM62XxHZXjuFl2EzrVsCKAAOlBuMFX4KAu3BW3NCqhEobE5Vcxknb6TV_anuQZUp8wfI3zcyatmzYor7rx9opPSQ_71RkQmktElORv1l98AqgNbeYQlwWt6GoAMidUC3cR65WrWYBctr5lz6U_u-TGGWdO_JUIuHiMfxs2oygZNHWVUhl0R5qWlZaM32dkny102bhHDr8wzR24z1XH9yDlL93O58cBxi23o97WDluICmIr5Tn4fZ-qLrg8bRkpkhh5qCyjYcaiM5WQ332wyortFVEn7vN27r7imEMPVVbjlFSugd2XuRpPbvtezQfWmVd80BRpcvUDPLSdfDM4VhcpgGu-BXbXOSAk1vmlgMNfGGi19TJbZQiHyJY', 'origin':'https://www.tvbanywherena.com'}) as _:
|
| 16 |
+
json = await _.json()
|
| 17 |
+
customFields = json.get('custom_fields')
|
| 18 |
+
programName = zhconv.convert(customFields.get('program_name') or customFields.get('beacon_episode_seriename').split(' ')[0], 'zh-cn').replace(' ', '')
|
| 19 |
+
name = posixpath.join('cantonese', programName, customFields.get('beacon_episode_number').zfill(2) + '.mp4')
|
| 20 |
+
if not huggingface_hub.file_exists(filename=name, repo_id='chaowenguoback/video', repo_type='dataset', token=os.getenv('huggingface')):
|
| 21 |
+
ffmpeg = await asyncio.create_subprocess_exec('ffmpeg', '-y', '-protocol_whitelist', 'http,tcp', '-i', json.get('sources')[0].get('src'), '-c', 'copy', '-bsf:a', 'aac_adtstoasc', '-movflags', 'frag_keyframe+empty_moov', '-f', 'mp4', 'pipe:1', stdout=asyncio.subprocess.PIPE)
|
| 22 |
+
stdout, _ = await ffmpeg.communicate()
|
| 23 |
+
future = huggingface_hub.upload_file(path_or_fileobj=stdout, path_in_repo=name, repo_id='chaowenguoback/video', repo_type='dataset', run_as_future=True, token=os.getenv('huggingface'))
|
| 24 |
+
await asyncio.sleep(math.inf)
|
| 25 |
+
|
| 26 |
+
uvloop.run(main())
|