BinaryONe
Refresh Update
2f67506
from bs4 import BeautifulSoup
from urllib.request import urlopen
from urllib.parse import urlencode
import requests
import re
import json
async def extract_json_tranding(data):
# Join the list into a single string
combined_str = ''.join(data)
# Use regular expression to find the JSON data after 'ytInitialData = '
match = re.search(r'var ytInitialData\s*=\s*(\{.*\});', combined_str)
if match:
json_str = match.group(1) # Extract the JSON string
json_str = json_str.encode('utf-8').decode('unicode_escape')
try:
# Parse the JSON data
json_data = json.loads(json_str)
return json_data
except json.JSONDecodeError as e:
print("Error decoding JSON:", e)
return None
else:
print("No JSON data found.")
return None
async def search_videos(query):
searchresult_list=[]
final_query={'search_query':query,'sp':'EgIQAQ%253D%253D','persist_gl':'IN'}
param=urlencode(final_query)
url = urlopen(f'https://www.youtube.com/results?{param}').read()
soup = BeautifulSoup(url, 'lxml')
allscript = soup.findAll('script')
text = ''
for script in allscript:
if 'var ytInitialData' in str(script.contents):
text = str(script.contents)
else:
continue
json_data = await extract_json_tranding(text)
all_search_video = json_data.get('contents', {}).get(
'twoColumnSearchResultsRenderer',
{}).get('primaryContents',
{}).get('sectionListRenderer',
{}).get('contents', [])[0].get('itemSectionRenderer',
{}).get('contents', [])
for video in all_search_video:
if video.get('videoRenderer', {}):
if video.get('videoRenderer',{}).get('descriptionSnippet',{}):
description=video['videoRenderer']['descriptionSnippet']['runs'][0]['text']
else:
description=''
details={
"id":video['videoRenderer']['videoId'],
"title":video['videoRenderer']['title']['runs'][0]['text'],
"thumbnails":video['videoRenderer']['thumbnail']['thumbnails'],
"description":description,
"publishedTime":video['videoRenderer']['publishedTimeText']['simpleText'] if video.get('videoRenderer',{}).get('publishedTimeText',{}) else '' ,
"length":video['videoRenderer']['lengthText']['simpleText'] if video.get('videoRenderer',{}).get('lengthText',{}) else '',
"views":video['videoRenderer']['viewCountText']['simpleText'] if video.get('videoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '',
"url":f"https://www.youtube.com{video['videoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}",
"short_views":video['videoRenderer']['shortViewCountText']['simpleText'] if video.get('videoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '',
"channelThumbnail":video['videoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('videoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '',
"channel":video['videoRenderer']['ownerText']['runs'][0]['text']
}
searchresult_list.append(details)
elif video.get('childVideoRenderer', {}):
if video.get('childVideoRenderer',{}).get('descriptionSnippet',{}):
description=video['childVideoRenderer']['descriptionSnippet']['runs'][0]['text']
else:
description=''
details={
"id":video['childVideoRenderer']['videoId'],
"title":video['childVideoRenderer']['title']['runs'][0]['text'],
"thumbnails":video['childVideoRenderer']['thumbnail']['thumbnails'],
"description":description,
"publishedTime":video['childVideoRenderer']['publishedTimeText']['simpleText'] if video.get('childVideoRenderer',{}).get('publishedTimeText',{}) else '' ,
"length":video['childVideoRenderer']['lengthText']['simpleText'] if video.get('childVideoRenderer',{}).get('lengthText',{}) else '',
"views":video['childVideoRenderer']['viewCountText']['simpleText'] if video.get('childVideoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '',
"url":f"https://www.youtube.com{video['childVideoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}",
"short_views":video['childVideoRenderer']['shortViewCountText']['simpleText'] if video.get('childVideoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '',
"channelThumbnail":video['childVideoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('childVideoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '',
"channel":video['childVideoRenderer']['ownerText']['runs'][0]['text']
}
searchresult_list.append(details)
elif video.get('gridVideoRenderer', {}):
if video.get('gridVideoRenderer',{}).get('descriptionSnippet',{}):
description=video['gridVideoRenderer']['descriptionSnippet']['runs'][0]['text']
else:
description=''
details={
"id":video['gridVideoRenderer']['videoId'],
"title":video['gridVideoRenderer']['title']['runs'][0]['text'],
"thumbnails":video['gridVideoRenderer']['thumbnail']['thumbnails'],
"description":description,
"publishedTime":video['gridVideoRenderer']['publishedTimeText']['simpleText'] if video.get('gridVideoRenderer',{}).get('publishedTimeText',{}) else '' ,
"length":video['gridVideoRenderer']['lengthText']['simpleText'] if video.get('gridVideoRenderer',{}).get('lengthText',{}) else '',
"views":video['gridVideoRenderer']['viewCountText']['simpleText'] if video.get('gridVideoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '',
"url":f"https://www.youtube.com{video['gridVideoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}",
"short_views":video['gridVideoRenderer']['shortViewCountText']['simpleText'] if video.get('gridVideoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '',
"channelThumbnail":video['gridVideoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('gridVideoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '',
"channel":video['gridVideoRenderer']['ownerText']['runs'][0]['text']
}
searchresult_list.append(details)
#print(url,searchresult_list)
return searchresult_list
async def tranding_videos():
tranding_list=[]
url = urlopen('https://www.youtube.com/feed/trending/?persist_gl=1&gl=IN').read()
soup = BeautifulSoup(url, 'lxml')
allscript = soup.findAll('script')
text = ''
for script in allscript:
if 'var ytInitialData' in str(script.contents):
text = str(script.contents)
else:
continue
json_data = await extract_json_tranding(text)
all_video = json_data.get('contents', {}).get(
'twoColumnBrowseResultsRenderer',
{}).get('tabs', [])[0].get('tabRenderer', {}).get('content', {}).get(
'sectionListRenderer',
{}).get('contents', [])[3].get('itemSectionRenderer', {}).get(
'contents',
[])[0].get('shelfRenderer',
{}).get('content',
{}).get('expandedShelfContentsRenderer',
{}).get('items', [])
#"descriptions":video['videoRenderer']['descriptionSnippet']['runs'][0]['text'],
'''nextPageToken = json_data['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'] if json_data.get('contents', {}).get(
'twoColumnSearchResultsRenderer',
{}).get('primaryContents',
{}).get('sectionListRenderer',
{}).get('contents', [])[1].get('continuationItemRenderer',{}).get('continuationEndpoint',{}).get('continuationCommand',{}).get('token','') else None
'''
for video in all_video:
if video.get('videoRenderer',{}).get('descriptionSnippet',{}):
description=video['videoRenderer']['descriptionSnippet']['runs'][0]['text']
else:
description=''
details={
"id":video['videoRenderer']['videoId'],
"title":video['videoRenderer']['title']['runs'][0]['text'],
"thumbnails":video['videoRenderer']['thumbnail']['thumbnails'],
"description":description,
"publishedTime":video['videoRenderer']['publishedTimeText']['simpleText'] if video.get('videoRenderer',{}).get('publishedTimeText',{}) else '' ,
"length":video['videoRenderer']['lengthText']['simpleText'] if video.get('videoRenderer',{}).get('lengthText',{}) else '',
"views":video['videoRenderer']['viewCountText']['simpleText'] if video.get('videoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '',
"url":f"https://www.youtube.com{video['videoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}",
"short_views":video['videoRenderer']['shortViewCountText']['simpleText'] if video.get('videoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '',
"channelThumbnail":video['videoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('videoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '',
"channel":video['videoRenderer']['ownerText']['runs'][0]['text']
}
tranding_list.append(details)
#print(tranding_list)
return tranding_list