from bs4 import BeautifulSoup from urllib.request import urlopen from urllib.parse import urlencode import requests import re import json async def extract_json_tranding(data): # Join the list into a single string combined_str = ''.join(data) # Use regular expression to find the JSON data after 'ytInitialData = ' match = re.search(r'var ytInitialData\s*=\s*(\{.*\});', combined_str) if match: json_str = match.group(1) # Extract the JSON string json_str = json_str.encode('utf-8').decode('unicode_escape') try: # Parse the JSON data json_data = json.loads(json_str) return json_data except json.JSONDecodeError as e: print("Error decoding JSON:", e) return None else: print("No JSON data found.") return None async def search_videos(query): searchresult_list=[] final_query={'search_query':query,'sp':'EgIQAQ%253D%253D','persist_gl':'IN'} param=urlencode(final_query) url = urlopen(f'https://www.youtube.com/results?{param}').read() soup = BeautifulSoup(url, 'lxml') allscript = soup.findAll('script') text = '' for script in allscript: if 'var ytInitialData' in str(script.contents): text = str(script.contents) else: continue json_data = await extract_json_tranding(text) all_search_video = json_data.get('contents', {}).get( 'twoColumnSearchResultsRenderer', {}).get('primaryContents', {}).get('sectionListRenderer', {}).get('contents', [])[0].get('itemSectionRenderer', {}).get('contents', []) for video in all_search_video: if video.get('videoRenderer', {}): if video.get('videoRenderer',{}).get('descriptionSnippet',{}): description=video['videoRenderer']['descriptionSnippet']['runs'][0]['text'] else: description='' details={ "id":video['videoRenderer']['videoId'], "title":video['videoRenderer']['title']['runs'][0]['text'], "thumbnails":video['videoRenderer']['thumbnail']['thumbnails'], "description":description, "publishedTime":video['videoRenderer']['publishedTimeText']['simpleText'] if video.get('videoRenderer',{}).get('publishedTimeText',{}) else '' , "length":video['videoRenderer']['lengthText']['simpleText'] if video.get('videoRenderer',{}).get('lengthText',{}) else '', "views":video['videoRenderer']['viewCountText']['simpleText'] if video.get('videoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '', "url":f"https://www.youtube.com{video['videoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}", "short_views":video['videoRenderer']['shortViewCountText']['simpleText'] if video.get('videoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '', "channelThumbnail":video['videoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('videoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '', "channel":video['videoRenderer']['ownerText']['runs'][0]['text'] } searchresult_list.append(details) elif video.get('childVideoRenderer', {}): if video.get('childVideoRenderer',{}).get('descriptionSnippet',{}): description=video['childVideoRenderer']['descriptionSnippet']['runs'][0]['text'] else: description='' details={ "id":video['childVideoRenderer']['videoId'], "title":video['childVideoRenderer']['title']['runs'][0]['text'], "thumbnails":video['childVideoRenderer']['thumbnail']['thumbnails'], "description":description, "publishedTime":video['childVideoRenderer']['publishedTimeText']['simpleText'] if video.get('childVideoRenderer',{}).get('publishedTimeText',{}) else '' , "length":video['childVideoRenderer']['lengthText']['simpleText'] if video.get('childVideoRenderer',{}).get('lengthText',{}) else '', "views":video['childVideoRenderer']['viewCountText']['simpleText'] if video.get('childVideoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '', "url":f"https://www.youtube.com{video['childVideoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}", "short_views":video['childVideoRenderer']['shortViewCountText']['simpleText'] if video.get('childVideoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '', "channelThumbnail":video['childVideoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('childVideoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '', "channel":video['childVideoRenderer']['ownerText']['runs'][0]['text'] } searchresult_list.append(details) elif video.get('gridVideoRenderer', {}): if video.get('gridVideoRenderer',{}).get('descriptionSnippet',{}): description=video['gridVideoRenderer']['descriptionSnippet']['runs'][0]['text'] else: description='' details={ "id":video['gridVideoRenderer']['videoId'], "title":video['gridVideoRenderer']['title']['runs'][0]['text'], "thumbnails":video['gridVideoRenderer']['thumbnail']['thumbnails'], "description":description, "publishedTime":video['gridVideoRenderer']['publishedTimeText']['simpleText'] if video.get('gridVideoRenderer',{}).get('publishedTimeText',{}) else '' , "length":video['gridVideoRenderer']['lengthText']['simpleText'] if video.get('gridVideoRenderer',{}).get('lengthText',{}) else '', "views":video['gridVideoRenderer']['viewCountText']['simpleText'] if video.get('gridVideoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '', "url":f"https://www.youtube.com{video['gridVideoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}", "short_views":video['gridVideoRenderer']['shortViewCountText']['simpleText'] if video.get('gridVideoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '', "channelThumbnail":video['gridVideoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('gridVideoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '', "channel":video['gridVideoRenderer']['ownerText']['runs'][0]['text'] } searchresult_list.append(details) #print(url,searchresult_list) return searchresult_list async def tranding_videos(): tranding_list=[] url = urlopen('https://www.youtube.com/feed/trending/?persist_gl=1&gl=IN').read() soup = BeautifulSoup(url, 'lxml') allscript = soup.findAll('script') text = '' for script in allscript: if 'var ytInitialData' in str(script.contents): text = str(script.contents) else: continue json_data = await extract_json_tranding(text) all_video = json_data.get('contents', {}).get( 'twoColumnBrowseResultsRenderer', {}).get('tabs', [])[0].get('tabRenderer', {}).get('content', {}).get( 'sectionListRenderer', {}).get('contents', [])[3].get('itemSectionRenderer', {}).get( 'contents', [])[0].get('shelfRenderer', {}).get('content', {}).get('expandedShelfContentsRenderer', {}).get('items', []) #"descriptions":video['videoRenderer']['descriptionSnippet']['runs'][0]['text'], '''nextPageToken = json_data['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'] if json_data.get('contents', {}).get( 'twoColumnSearchResultsRenderer', {}).get('primaryContents', {}).get('sectionListRenderer', {}).get('contents', [])[1].get('continuationItemRenderer',{}).get('continuationEndpoint',{}).get('continuationCommand',{}).get('token','') else None ''' for video in all_video: if video.get('videoRenderer',{}).get('descriptionSnippet',{}): description=video['videoRenderer']['descriptionSnippet']['runs'][0]['text'] else: description='' details={ "id":video['videoRenderer']['videoId'], "title":video['videoRenderer']['title']['runs'][0]['text'], "thumbnails":video['videoRenderer']['thumbnail']['thumbnails'], "description":description, "publishedTime":video['videoRenderer']['publishedTimeText']['simpleText'] if video.get('videoRenderer',{}).get('publishedTimeText',{}) else '' , "length":video['videoRenderer']['lengthText']['simpleText'] if video.get('videoRenderer',{}).get('lengthText',{}) else '', "views":video['videoRenderer']['viewCountText']['simpleText'] if video.get('videoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '', "url":f"https://www.youtube.com{video['videoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}", "short_views":video['videoRenderer']['shortViewCountText']['simpleText'] if video.get('videoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '', "channelThumbnail":video['videoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('videoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '', "channel":video['videoRenderer']['ownerText']['runs'][0]['text'] } tranding_list.append(details) #print(tranding_list) return tranding_list