Spaces:
Paused
Paused
| from bs4 import BeautifulSoup | |
| from urllib.request import urlopen | |
| from urllib.parse import urlencode | |
| import requests | |
| import re | |
| import json | |
| async def extract_json_tranding(data): | |
| # Join the list into a single string | |
| combined_str = ''.join(data) | |
| # Use regular expression to find the JSON data after 'ytInitialData = ' | |
| match = re.search(r'var ytInitialData\s*=\s*(\{.*\});', combined_str) | |
| if match: | |
| json_str = match.group(1) # Extract the JSON string | |
| json_str = json_str.encode('utf-8').decode('unicode_escape') | |
| try: | |
| # Parse the JSON data | |
| json_data = json.loads(json_str) | |
| return json_data | |
| except json.JSONDecodeError as e: | |
| print("Error decoding JSON:", e) | |
| return None | |
| else: | |
| print("No JSON data found.") | |
| return None | |
| async def search_videos(query): | |
| searchresult_list=[] | |
| final_query={'search_query':query,'sp':'EgIQAQ%253D%253D','persist_gl':'IN'} | |
| param=urlencode(final_query) | |
| url = urlopen(f'https://www.youtube.com/results?{param}').read() | |
| soup = BeautifulSoup(url, 'lxml') | |
| allscript = soup.findAll('script') | |
| text = '' | |
| for script in allscript: | |
| if 'var ytInitialData' in str(script.contents): | |
| text = str(script.contents) | |
| else: | |
| continue | |
| json_data = await extract_json_tranding(text) | |
| all_search_video = json_data.get('contents', {}).get( | |
| 'twoColumnSearchResultsRenderer', | |
| {}).get('primaryContents', | |
| {}).get('sectionListRenderer', | |
| {}).get('contents', [])[0].get('itemSectionRenderer', | |
| {}).get('contents', []) | |
| for video in all_search_video: | |
| if video.get('videoRenderer', {}): | |
| if video.get('videoRenderer',{}).get('descriptionSnippet',{}): | |
| description=video['videoRenderer']['descriptionSnippet']['runs'][0]['text'] | |
| else: | |
| description='' | |
| details={ | |
| "id":video['videoRenderer']['videoId'], | |
| "title":video['videoRenderer']['title']['runs'][0]['text'], | |
| "thumbnails":video['videoRenderer']['thumbnail']['thumbnails'], | |
| "description":description, | |
| "publishedTime":video['videoRenderer']['publishedTimeText']['simpleText'] if video.get('videoRenderer',{}).get('publishedTimeText',{}) else '' , | |
| "length":video['videoRenderer']['lengthText']['simpleText'] if video.get('videoRenderer',{}).get('lengthText',{}) else '', | |
| "views":video['videoRenderer']['viewCountText']['simpleText'] if video.get('videoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '', | |
| "url":f"https://www.youtube.com{video['videoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}", | |
| "short_views":video['videoRenderer']['shortViewCountText']['simpleText'] if video.get('videoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '', | |
| "channelThumbnail":video['videoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('videoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '', | |
| "channel":video['videoRenderer']['ownerText']['runs'][0]['text'] | |
| } | |
| searchresult_list.append(details) | |
| elif video.get('childVideoRenderer', {}): | |
| if video.get('childVideoRenderer',{}).get('descriptionSnippet',{}): | |
| description=video['childVideoRenderer']['descriptionSnippet']['runs'][0]['text'] | |
| else: | |
| description='' | |
| details={ | |
| "id":video['childVideoRenderer']['videoId'], | |
| "title":video['childVideoRenderer']['title']['runs'][0]['text'], | |
| "thumbnails":video['childVideoRenderer']['thumbnail']['thumbnails'], | |
| "description":description, | |
| "publishedTime":video['childVideoRenderer']['publishedTimeText']['simpleText'] if video.get('childVideoRenderer',{}).get('publishedTimeText',{}) else '' , | |
| "length":video['childVideoRenderer']['lengthText']['simpleText'] if video.get('childVideoRenderer',{}).get('lengthText',{}) else '', | |
| "views":video['childVideoRenderer']['viewCountText']['simpleText'] if video.get('childVideoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '', | |
| "url":f"https://www.youtube.com{video['childVideoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}", | |
| "short_views":video['childVideoRenderer']['shortViewCountText']['simpleText'] if video.get('childVideoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '', | |
| "channelThumbnail":video['childVideoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('childVideoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '', | |
| "channel":video['childVideoRenderer']['ownerText']['runs'][0]['text'] | |
| } | |
| searchresult_list.append(details) | |
| elif video.get('gridVideoRenderer', {}): | |
| if video.get('gridVideoRenderer',{}).get('descriptionSnippet',{}): | |
| description=video['gridVideoRenderer']['descriptionSnippet']['runs'][0]['text'] | |
| else: | |
| description='' | |
| details={ | |
| "id":video['gridVideoRenderer']['videoId'], | |
| "title":video['gridVideoRenderer']['title']['runs'][0]['text'], | |
| "thumbnails":video['gridVideoRenderer']['thumbnail']['thumbnails'], | |
| "description":description, | |
| "publishedTime":video['gridVideoRenderer']['publishedTimeText']['simpleText'] if video.get('gridVideoRenderer',{}).get('publishedTimeText',{}) else '' , | |
| "length":video['gridVideoRenderer']['lengthText']['simpleText'] if video.get('gridVideoRenderer',{}).get('lengthText',{}) else '', | |
| "views":video['gridVideoRenderer']['viewCountText']['simpleText'] if video.get('gridVideoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '', | |
| "url":f"https://www.youtube.com{video['gridVideoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}", | |
| "short_views":video['gridVideoRenderer']['shortViewCountText']['simpleText'] if video.get('gridVideoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '', | |
| "channelThumbnail":video['gridVideoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('gridVideoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '', | |
| "channel":video['gridVideoRenderer']['ownerText']['runs'][0]['text'] | |
| } | |
| searchresult_list.append(details) | |
| #print(url,searchresult_list) | |
| return searchresult_list | |
| async def tranding_videos(): | |
| tranding_list=[] | |
| url = urlopen('https://www.youtube.com/feed/trending/?persist_gl=1&gl=IN').read() | |
| soup = BeautifulSoup(url, 'lxml') | |
| allscript = soup.findAll('script') | |
| text = '' | |
| for script in allscript: | |
| if 'var ytInitialData' in str(script.contents): | |
| text = str(script.contents) | |
| else: | |
| continue | |
| json_data = await extract_json_tranding(text) | |
| all_video = json_data.get('contents', {}).get( | |
| 'twoColumnBrowseResultsRenderer', | |
| {}).get('tabs', [])[0].get('tabRenderer', {}).get('content', {}).get( | |
| 'sectionListRenderer', | |
| {}).get('contents', [])[3].get('itemSectionRenderer', {}).get( | |
| 'contents', | |
| [])[0].get('shelfRenderer', | |
| {}).get('content', | |
| {}).get('expandedShelfContentsRenderer', | |
| {}).get('items', []) | |
| #"descriptions":video['videoRenderer']['descriptionSnippet']['runs'][0]['text'], | |
| '''nextPageToken = json_data['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'] if json_data.get('contents', {}).get( | |
| 'twoColumnSearchResultsRenderer', | |
| {}).get('primaryContents', | |
| {}).get('sectionListRenderer', | |
| {}).get('contents', [])[1].get('continuationItemRenderer',{}).get('continuationEndpoint',{}).get('continuationCommand',{}).get('token','') else None | |
| ''' | |
| for video in all_video: | |
| if video.get('videoRenderer',{}).get('descriptionSnippet',{}): | |
| description=video['videoRenderer']['descriptionSnippet']['runs'][0]['text'] | |
| else: | |
| description='' | |
| details={ | |
| "id":video['videoRenderer']['videoId'], | |
| "title":video['videoRenderer']['title']['runs'][0]['text'], | |
| "thumbnails":video['videoRenderer']['thumbnail']['thumbnails'], | |
| "description":description, | |
| "publishedTime":video['videoRenderer']['publishedTimeText']['simpleText'] if video.get('videoRenderer',{}).get('publishedTimeText',{}) else '' , | |
| "length":video['videoRenderer']['lengthText']['simpleText'] if video.get('videoRenderer',{}).get('lengthText',{}) else '', | |
| "views":video['videoRenderer']['viewCountText']['simpleText'] if video.get('videoRenderer',{}).get('viewCountText',{}).get('simpleText','') else '', | |
| "url":f"https://www.youtube.com{video['videoRenderer']['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']}", | |
| "short_views":video['videoRenderer']['shortViewCountText']['simpleText'] if video.get('videoRenderer',{}).get('shortViewCountText',{}).get('simpleText','') else '', | |
| "channelThumbnail":video['videoRenderer']['channelThumbnailSupportedRenderers']['channelThumbnailWithLinkRenderer']['thumbnail']['thumbnails'][0]['url'] if video.get('videoRenderer',{}).get('channelThumbnailSupportedRenderers',{}) else '', | |
| "channel":video['videoRenderer']['ownerText']['runs'][0]['text'] | |
| } | |
| tranding_list.append(details) | |
| #print(tranding_list) | |
| return tranding_list | |