pn23 commited on
Commit
7f45966
·
verified ·
1 Parent(s): 4ae12f3

Update scraping.py

Browse files
Files changed (1) hide show
  1. scraping.py +105 -104
scraping.py CHANGED
@@ -9,110 +9,111 @@ from pyspark.sql import SparkSession
9
  from delta import configure_spark_with_delta_pip
10
  import tempfile
11
 
12
- # Set the download directory to the "music" subfolder within the current directory
13
- download_directory = os.path.join(os.getcwd(), "music")
14
- os.makedirs(download_directory, exist_ok=True)
15
-
16
- # Set up Chrome options
17
- chrome_options = Options()
18
- chrome_options.add_experimental_option("prefs", {
19
- "download.default_directory": download_directory,
20
- "download.prompt_for_download": False,
21
- "download.directory_upgrade": True,
22
- "safebrowsing.enabled": True
23
- })
24
-
25
- # Set up the Selenium WebDriver (e.g., Chrome)
26
- driver = webdriver.Chrome(options=chrome_options)
27
-
28
- # Navigate to the website
29
- driver.get("https://suno.com/me")
30
-
31
-
32
- # Wait for the sign-in button to be clickable and click it
33
- sign_in_button = WebDriverWait(driver, 10).until(
34
- EC.element_to_be_clickable((By.CSS_SELECTOR, "#__next > div > div > div > div > div.cl-main.🔒️.cl-internal-xk295g > div > button.cl-socialButtonsIconButton.cl-socialButtonsIconButton__discord.🔒️.cl-internal-855i1h"))
35
- )
36
- sign_in_button.click()
37
-
38
- # Wait for the username field to be visible and enter the username
39
- #username: applebottom_12
40
- username_field = WebDriverWait(driver, 10).until(
41
- EC.visibility_of_element_located((By.CSS_SELECTOR, "#uid_8"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  )
43
- username_field.send_keys("asfasfasfgasdfasgfsag@gmail.com")
44
-
45
- # Find the password field and enter the password
46
- password_field = WebDriverWait(driver, 10).until(
47
- EC.visibility_of_element_located((By.CSS_SELECTOR, "#uid_10"))
48
- )
49
- password_field.send_keys("AppleBottom12")
50
-
51
- # Find the password button and click it
52
- password_button = WebDriverWait(driver, 10).until(
53
- EC.element_to_be_clickable((By.CSS_SELECTOR, "#app-mount > div.appAsidePanelWrapper__5e6e2 > div.notAppAsidePanel__95814 > div.app_b1f720 > div > div > div > div > form > div.centeringWrapper__5e247 > div > div.mainLoginContainer_f58870 > div.block__681fa.marginTop20__7e0ad > button.marginBottom8_ce1fb9.button__5573c.button__581d0.lookFilled__950dd.colorBrand__27d57.sizeLarge_b395a7.fullWidth_fdb23d.grow__4c8a4"))
54
  )
55
- password_button.click()
56
-
57
- # Wait for the page to load after signing in
58
- WebDriverWait(driver, 10).until(
59
- EC.url_contains("https://suno.com/me")
60
- )
61
-
62
- # Click on the specific song
63
- three_dots = WebDriverWait(driver, 10).until(
64
- EC.element_to_be_clickable((By.LINK_TEXT, "Samba Kickoff"))
65
- )
66
- three_dots.click()
67
-
68
- # Play the song
69
- play_button = WebDriverWait(driver, 10).until(
70
- EC.element_to_be_clickable((By.CSS_SELECTOR, "body > div.css-fhtuey > div.css-bhm5u7 > div > div.css-l9hfgy > div.css-144pizt > button.chakra-button.css-15rci1t"))
71
  )
72
- play_button.click()
73
-
74
- time.sleep(3)
75
-
76
- three_dots = WebDriverWait(driver, 10).until(
77
- EC.element_to_be_clickable((By.XPATH, '//button[@aria-label="More Actions"]'))
78
- )
79
- three_dots.click()
80
-
81
- # Wait for the Download button to be clickable and click on it
82
- download_button = WebDriverWait(driver, 10).until(
83
- EC.element_to_be_clickable((By.XPATH, '//div[@role="menuitem" and contains(text(), "Download")]'))
84
- )
85
- download_button.click()
86
-
87
- # Wait for the Audio element to be clickable and click on it
88
- audio_element = WebDriverWait(driver, 10).until(
89
- EC.element_to_be_clickable((By.XPATH, '//div[@role="menuitem" and contains(text(), "Audio")]'))
90
- )
91
- audio_element.click()
92
-
93
- time.sleep(3)
94
-
95
- print("Successfully signed in!")
96
-
97
- # Create a SparkSession with Delta Lake configuration
98
- builder = SparkSession.builder.appName("SaveMP3ToDatabricks") \
99
- .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
100
- .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
101
-
102
- spark = configure_spark_with_delta_pip(builder).getOrCreate()
103
-
104
- # Setting the Retrieving Director
105
- retrieve_directory = os.path.join(download_directory, "Samba Kickoff.mp3")
106
-
107
- # Read the downloaded MP3 file as binary
108
- mp3_data = spark.sparkContext.binaryFiles(retrieve_directory).collect()[0][1]
109
-
110
- # Create a DataFrame with the MP3 data
111
- df = spark.createDataFrame([("Samba Kickoff.mp3", mp3_data,)], ["song_name", "mp3_data"])
112
-
113
- # Save the DataFrame to a Databricks table
114
- df.write.format("delta").mode("append").saveAsTable("mp3_table")
115
-
116
- print("MP3 file saved to Databricks table.")
117
-
118
- driver.quit()
 
9
  from delta import configure_spark_with_delta_pip
10
  import tempfile
11
 
12
+ def get_mp3(song_title):
13
+ # Set the download directory to the "music" subfolder within the current directory
14
+ download_directory = os.path.join(os.getcwd(), "music")
15
+ os.makedirs(download_directory, exist_ok=True)
16
+
17
+ # Set up Chrome options
18
+ chrome_options = Options()
19
+ chrome_options.add_experimental_option("prefs", {
20
+ "download.default_directory": download_directory,
21
+ "download.prompt_for_download": False,
22
+ "download.directory_upgrade": True,
23
+ "safebrowsing.enabled": True
24
+ })
25
+
26
+ # Set up the Selenium WebDriver (e.g., Chrome)
27
+ driver = webdriver.Chrome(options=chrome_options)
28
+
29
+ # Navigate to the website
30
+ driver.get("https://suno.com/me")
31
+
32
+
33
+ # Wait for the sign-in button to be clickable and click it
34
+ sign_in_button = WebDriverWait(driver, 10).until(
35
+ EC.element_to_be_clickable((By.CSS_SELECTOR, "#__next > div > div > div > div > div.cl-main.🔒️.cl-internal-xk295g > div > button.cl-socialButtonsIconButton.cl-socialButtonsIconButton__discord.🔒️.cl-internal-855i1h"))
36
+ )
37
+ sign_in_button.click()
38
+
39
+ # Wait for the username field to be visible and enter the username
40
+ #username: applebottom_12
41
+ username_field = WebDriverWait(driver, 10).until(
42
+ EC.visibility_of_element_located((By.CSS_SELECTOR, "#uid_8"))
43
+ )
44
+ username_field.send_keys("asfasfasfgasdfasgfsag@gmail.com")
45
+
46
+ # Find the password field and enter the password
47
+ password_field = WebDriverWait(driver, 10).until(
48
+ EC.visibility_of_element_located((By.CSS_SELECTOR, "#uid_10"))
49
+ )
50
+ password_field.send_keys("AppleBottom12")
51
+
52
+ # Find the password button and click it
53
+ password_button = WebDriverWait(driver, 10).until(
54
+ EC.element_to_be_clickable((By.CSS_SELECTOR, "#app-mount > div.appAsidePanelWrapper__5e6e2 > div.notAppAsidePanel__95814 > div.app_b1f720 > div > div > div > div > form > div.centeringWrapper__5e247 > div > div.mainLoginContainer_f58870 > div.block__681fa.marginTop20__7e0ad > button.marginBottom8_ce1fb9.button__5573c.button__581d0.lookFilled__950dd.colorBrand__27d57.sizeLarge_b395a7.fullWidth_fdb23d.grow__4c8a4"))
55
+ )
56
+ password_button.click()
57
+
58
+ # Wait for the page to load after signing in
59
+ WebDriverWait(driver, 10).until(
60
+ EC.url_contains("https://suno.com/me")
61
+ )
62
+
63
+ # Click on the specific song
64
+ three_dots = WebDriverWait(driver, 10).until(
65
+ EC.element_to_be_clickable((By.LINK_TEXT, song_title))
66
+ )
67
+ three_dots.click()
68
+
69
+ # Play the song
70
+ play_button = WebDriverWait(driver, 10).until(
71
+ EC.element_to_be_clickable((By.CSS_SELECTOR, "body > div.css-fhtuey > div.css-bhm5u7 > div > div.css-l9hfgy > div.css-144pizt > button.chakra-button.css-15rci1t"))
72
+ )
73
+ play_button.click()
74
+
75
+ time.sleep(3)
76
+
77
+ three_dots = WebDriverWait(driver, 10).until(
78
+ EC.element_to_be_clickable((By.XPATH, '//button[@aria-label="More Actions"]'))
79
  )
80
+ three_dots.click()
81
+
82
+ # Wait for the Download button to be clickable and click on it
83
+ download_button = WebDriverWait(driver, 10).until(
84
+ EC.element_to_be_clickable((By.XPATH, '//div[@role="menuitem" and contains(text(), "Download")]'))
 
 
 
 
 
 
85
  )
86
+ download_button.click()
87
+
88
+ # Wait for the Audio element to be clickable and click on it
89
+ audio_element = WebDriverWait(driver, 10).until(
90
+ EC.element_to_be_clickable((By.XPATH, '//div[@role="menuitem" and contains(text(), "Audio")]'))
 
 
 
 
 
 
 
 
 
 
 
91
  )
92
+ audio_element.click()
93
+
94
+ time.sleep(3)
95
+
96
+ # print("Successfully signed in!")
97
+
98
+ # # Create a SparkSession with Delta Lake configuration
99
+ # builder = SparkSession.builder.appName("SaveMP3ToDatabricks") \
100
+ # .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
101
+ # .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
102
+
103
+ # spark = configure_spark_with_delta_pip(builder).getOrCreate()
104
+
105
+ # # Setting the Retrieving Director
106
+ # retrieve_directory = os.path.join(download_directory, f"{song_title}.mp3")
107
+
108
+ # # Read the downloaded MP3 file as binary
109
+ # mp3_data = spark.sparkContext.binaryFiles(retrieve_directory).collect()[0][1]
110
+
111
+ # # Create a DataFrame with the MP3 data
112
+ # df = spark.createDataFrame([("Samba Kickoff.mp3", mp3_data,)], ["song_name", "mp3_data"])
113
+
114
+ # # Save the DataFrame to a Databricks table
115
+ # df.write.format("delta").mode("append").saveAsTable("mp3_table")
116
+
117
+ # print("MP3 file saved to Databricks table.")
118
+
119
+ driver.quit()