cloudyu commited on
Commit
99ece20
·
verified ·
1 Parent(s): 9e587df

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +58 -0
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ tags:
4
+ - yi
5
+ - moe
6
+ license_name: yi-license
7
+ license_link: https://huggingface.co/01-ai/Yi-34B-200K/blob/main/LICENSE
8
+ ---
9
+ example output
10
+
11
+
12
+ 写一个python程序抓取youtube最流行的内容:从网页获取数据,
13
+
14
+ 以下是一个python脚本,可以从Youtube上抓取最受欢迎的视频并将其保存到本地文件中。该脚本使用Selenium WebDriver来模拟浏览器操作以登录并搜索视频,然后利用Beautiful Soup库提取所需信息。如果您需要任何帮助或修改建议,请随时告诉我!
15
+
16
+ ```
17
+ from selenium import webdriver
18
+ from bs4 import BeautifulSoup as BS
19
+ import os
20
+
21
+ # 设置ChromeDriver路径
22
+ CHROMEDRIVER_PATH = './chromedriver.exe' # 更换为实际路径
23
+
24
+ def main():
25
+ # 启动Chrome浏览器并登录Youtube
26
+ browser = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH)
27
+ browser.get('https://www.youtube.com/')
28
+ login_url = 'https://accounts.google.com/signin'
29
+ browser.get(login_url)
30
+ # 模拟输入账号和密码并登录
31
+ username = input('Username: ')
32
+ password = getpass.getpass() # 防止密码显示在终端上
33
+ submit_button = browser.find_element_by_class_name('whsLW', timeout=5)
34
+ submit_button.click()
35
+
36
+ # 在搜索栏中输入关键词并查找结果
37
+ search_query = 'most popular youtube videos today'
38
+ search_box = browser.find_element_by_id('search_query')
39
+ search_box.send_keys(search_query)
40
+ enter_key = browser.find_element_by_class_name('ytp-suggest-renderer__input-icon')
41
+ enter_key.click()
42
+ results = BS(browser.page_source, 'html.parser')
43
+
44
+ # 从搜索结果中提取出最受欢迎的视频链接并保存到本地文件
45
+ videos = results.find('ol', {'class': 'r'})
46
+ video_urls = [a['href'] for a in videos.find_all('a')]
47
+ saved_path = 'popular_videos/' + os.getcwd().split('/')[-2] + '_' + str(int(time.time())) + '/'
48
+ if not os.path.exists(saved_path):
49
+ os.makedirs(saved_path)
50
+ for i, url in enumerate(video_urls[:50]):
51
+ filename = saved_path + '%d.mp4' % (i+1)
52
+ urllib.request.urlretrieve(url, filename)
53
+ print('Downloaded top 50 videos to:', saved_path)
54
+
55
+ except Exception as e:
56
+ print(e)
57
+
58
+ ```