Spaces:

privateone
/

ONesVC_bot

Paused

ONesVC_bot / YukkiMusic /platforms /YoutubeAPI /BS4scrapAPI.py

BinaryONe

Refresh Update

2f67506 over 1 year ago

10.5 kB


	from bs4 import BeautifulSoup
	from urllib.request import urlopen
	from urllib.parse import urlencode
	import requests
	import re
	import json



	async def extract_json_tranding(data):
	# Join the list into a single string
	combined_str = ''.join(data)

	# Use regular expression to find the JSON data after 'ytInitialData = '
	match = re.search(r'var ytInitialData\s=\s(\{.*\});', combined_str)

	if match:
	json_str = match.group(1) # Extract the JSON string
	json_str = json_str.encode('utf-8').decode('unicode_escape')
	try:
	# Parse the JSON data
	json_data = json.loads(json_str)
	return json_data
	except json.JSONDecodeError as e:
	print("Error decoding JSON:", e)
	return None
	else:
	print("No JSON data found.")
	return None



	async def search_videos(query):
	searchresult_list=[]

	final_query={'search_query':query,'sp':'EgIQAQ%253D%253D','persist_gl':'IN'}
	param=urlencode(final_query)
	url = urlopen(f'https://www.youtube.com/results?{param}').read()
	soup = BeautifulSoup(url, 'lxml')
	allscript = soup.findAll('script')
	text = ''
	for script in allscript:
	if 'var ytInitialData' in str(script.contents):
	text = str(script.contents)
	else:
	continue

	json_data = await extract_json_tranding(text)
	all_search_video = json_data.get('contents', {}).get(
	'twoColumnSearchResultsRenderer',
	{}).get('primaryContents',
	{}).get('sectionListRenderer',
	{}).get('contents', [])[0].get('itemSectionRenderer',
	{}).get('contents', [])

	for video in all_search_video:
	if video.get('videoRenderer', {}):
	if video.get('videoRenderer',{}).get('descriptionSnippet',{}):
	description=video['videoRenderer']['descriptionSnippet']['runs'][0]['text']
	else:
	description=''
	details={
	"id":video['videoRenderer']['videoId'],
	"title":video['videoRenderer']['title']['runs'][0]['text'],
	"thumbnails":video['videoRenderer']['thumbnail']['thumbnails'],
	"description":description,
	"publishedTime":video['videoRenderer']['publishedTimeText']['simpleText'] if video.get('videoRenderer',{}).get('publishedTimeText',{}) else '' ,
	"length":video['videoRenderer']['lengthText']['simpleText'] if video.get('videoRenderer',{}).get('lengthText',{}) else '',
	"views":video['videoRenderer']['viewCountText']['simpleText'] if video.get('videoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '',
	"url":f"https://www.youtube.com{video['videoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}",
	"short_views":video['videoRenderer']['shortViewCountText']['simpleText'] if video.get('videoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '',
	"channelThumbnail":video['videoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('videoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '',
	"channel":video['videoRenderer']['ownerText']['runs'][0]['text']

	}
	searchresult_list.append(details)
	elif video.get('childVideoRenderer', {}):
	if video.get('childVideoRenderer',{}).get('descriptionSnippet',{}):
	description=video['childVideoRenderer']['descriptionSnippet']['runs'][0]['text']
	else:
	description=''
	details={
	"id":video['childVideoRenderer']['videoId'],
	"title":video['childVideoRenderer']['title']['runs'][0]['text'],
	"thumbnails":video['childVideoRenderer']['thumbnail']['thumbnails'],
	"description":description,
	"publishedTime":video['childVideoRenderer']['publishedTimeText']['simpleText'] if video.get('childVideoRenderer',{}).get('publishedTimeText',{}) else '' ,
	"length":video['childVideoRenderer']['lengthText']['simpleText'] if video.get('childVideoRenderer',{}).get('lengthText',{}) else '',
	"views":video['childVideoRenderer']['viewCountText']['simpleText'] if video.get('childVideoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '',
	"url":f"https://www.youtube.com{video['childVideoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}",
	"short_views":video['childVideoRenderer']['shortViewCountText']['simpleText'] if video.get('childVideoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '',
	"channelThumbnail":video['childVideoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('childVideoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '',
	"channel":video['childVideoRenderer']['ownerText']['runs'][0]['text']

	}
	searchresult_list.append(details)
	elif video.get('gridVideoRenderer', {}):
	if video.get('gridVideoRenderer',{}).get('descriptionSnippet',{}):
	description=video['gridVideoRenderer']['descriptionSnippet']['runs'][0]['text']
	else:
	description=''
	details={
	"id":video['gridVideoRenderer']['videoId'],
	"title":video['gridVideoRenderer']['title']['runs'][0]['text'],
	"thumbnails":video['gridVideoRenderer']['thumbnail']['thumbnails'],
	"description":description,
	"publishedTime":video['gridVideoRenderer']['publishedTimeText']['simpleText'] if video.get('gridVideoRenderer',{}).get('publishedTimeText',{}) else '' ,
	"length":video['gridVideoRenderer']['lengthText']['simpleText'] if video.get('gridVideoRenderer',{}).get('lengthText',{}) else '',
	"views":video['gridVideoRenderer']['viewCountText']['simpleText'] if video.get('gridVideoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '',
	"url":f"https://www.youtube.com{video['gridVideoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}",
	"short_views":video['gridVideoRenderer']['shortViewCountText']['simpleText'] if video.get('gridVideoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '',
	"channelThumbnail":video['gridVideoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('gridVideoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '',
	"channel":video['gridVideoRenderer']['ownerText']['runs'][0]['text']

	}
	searchresult_list.append(details)
	#print(url,searchresult_list)
	return searchresult_list



	async def tranding_videos():
	tranding_list=[]
	url = urlopen('https://www.youtube.com/feed/trending/?persist_gl=1&gl=IN').read()
	soup = BeautifulSoup(url, 'lxml')
	allscript = soup.findAll('script')
	text = ''
	for script in allscript:
	if 'var ytInitialData' in str(script.contents):
	text = str(script.contents)
	else:
	continue

	json_data = await extract_json_tranding(text)

	all_video = json_data.get('contents', {}).get(
	'twoColumnBrowseResultsRenderer',
	{}).get('tabs', [])[0].get('tabRenderer', {}).get('content', {}).get(
	'sectionListRenderer',
	{}).get('contents', [])[3].get('itemSectionRenderer', {}).get(
	'contents',
	[])[0].get('shelfRenderer',
	{}).get('content',
	{}).get('expandedShelfContentsRenderer',
	{}).get('items', [])
	#"descriptions":video['videoRenderer']['descriptionSnippet']['runs'][0]['text'],
	'''nextPageToken = json_data['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'] if json_data.get('contents', {}).get(
	'twoColumnSearchResultsRenderer',
	{}).get('primaryContents',
	{}).get('sectionListRenderer',
	{}).get('contents', [])[1].get('continuationItemRenderer',{}).get('continuationEndpoint',{}).get('continuationCommand',{}).get('token','') else None
	'''



	for video in all_video:
	if video.get('videoRenderer',{}).get('descriptionSnippet',{}):
	description=video['videoRenderer']['descriptionSnippet']['runs'][0]['text']
	else:
	description=''
	details={
	"id":video['videoRenderer']['videoId'],
	"title":video['videoRenderer']['title']['runs'][0]['text'],
	"thumbnails":video['videoRenderer']['thumbnail']['thumbnails'],
	"description":description,
	"publishedTime":video['videoRenderer']['publishedTimeText']['simpleText'] if video.get('videoRenderer',{}).get('publishedTimeText',{}) else '' ,
	"length":video['videoRenderer']['lengthText']['simpleText'] if video.get('videoRenderer',{}).get('lengthText',{}) else '',
	"views":video['videoRenderer']['viewCountText']['simpleText'] if video.get('videoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '',
	"url":f"https://www.youtube.com{video['videoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}",
	"short_views":video['videoRenderer']['shortViewCountText']['simpleText'] if video.get('videoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '',
	"channelThumbnail":video['videoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('videoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '',
	"channel":video['videoRenderer']['ownerText']['runs'][0]['text']

	}
	tranding_list.append(details)
	#print(tranding_list)
	return tranding_list