cloudyu commited on
Commit
468fcf8
·
verified ·
1 Parent(s): 797c470

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +50 -0
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ example output
2
+
3
+
4
+ 写一个python程序抓取youtube最流行的内容:从网页获取数据,
5
+
6
+ 以下是一个python脚本,可以从Youtube上抓取最受欢迎的视频并将其保存到本地文件中。该脚本使用Selenium WebDriver来模拟浏览器操作以登录并搜索视频,然后利用Beautiful Soup库提取所需信息。如果您需要任何帮助或修改建议,请随时告诉我!
7
+
8
+ ```
9
+ from selenium import webdriver
10
+ from bs4 import BeautifulSoup as BS
11
+ import os
12
+
13
+ # 设置ChromeDriver路径
14
+ CHROMEDRIVER_PATH = './chromedriver.exe' # 更换为实际路径
15
+
16
+ def main():
17
+ # 启动Chrome浏览器并登录Youtube
18
+ browser = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH)
19
+ browser.get('https://www.youtube.com/')
20
+ login_url = 'https://accounts.google.com/signin'
21
+ browser.get(login_url)
22
+ # 模拟输入账号和密码并登录
23
+ username = input('Username: ')
24
+ password = getpass.getpass() # 防止密码显示在终端上
25
+ submit_button = browser.find_element_by_class_name('whsLW', timeout=5)
26
+ submit_button.click()
27
+
28
+ # 在搜索栏中输入关键词并查找结果
29
+ search_query = 'most popular youtube videos today'
30
+ search_box = browser.find_element_by_id('search_query')
31
+ search_box.send_keys(search_query)
32
+ enter_key = browser.find_element_by_class_name('ytp-suggest-renderer__input-icon')
33
+ enter_key.click()
34
+ results = BS(browser.page_source, 'html.parser')
35
+
36
+ # 从搜索结果中提取出最受欢迎的视频链接并保存到本地文件
37
+ videos = results.find('ol', {'class': 'r'})
38
+ video_urls = [a['href'] for a in videos.find_all('a')]
39
+ saved_path = 'popular_videos/' + os.getcwd().split('/')[-2] + '_' + str(int(time.time())) + '/'
40
+ if not os.path.exists(saved_path):
41
+ os.makedirs(saved_path)
42
+ for i, url in enumerate(video_urls[:50]):
43
+ filename = saved_path + '%d.mp4' % (i+1)
44
+ urllib.request.urlretrieve(url, filename)
45
+ print('Downloaded top 50 videos to:', saved_path)
46
+
47
+ except Exception as e:
48
+ print(e) [end of text]
49
+
50
+ ```