Spaces:
Sleeping
Sleeping
Update scrape_3gpp.py
Browse files- scrape_3gpp.py +8 -4
scrape_3gpp.py
CHANGED
|
@@ -76,7 +76,8 @@ def scrape(url, excel_file, folder_name, status_list, progress=gr.Progress()):
|
|
| 76 |
download_directory = folder_name
|
| 77 |
if not os.path.exists(download_directory):
|
| 78 |
os.makedirs(download_directory)
|
| 79 |
-
|
|
|
|
| 80 |
print(f'filenames: {status_filenames}')
|
| 81 |
if not filenames and not status_filenames:
|
| 82 |
print("No Excel file provided, or no valid URLs found in the file.")
|
|
@@ -91,8 +92,7 @@ def scrape(url, excel_file, folder_name, status_list, progress=gr.Progress()):
|
|
| 91 |
|
| 92 |
# Filtrer les liens se terminant par ".zip"
|
| 93 |
zip_links = [link['href'] for link in links if link['href'].endswith('.zip')]
|
| 94 |
-
|
| 95 |
-
pourcentss = 0.1
|
| 96 |
# Télécharger chaque fichier zip
|
| 97 |
for zip_link in zip_links:
|
| 98 |
if download_num%10 == 0:
|
|
@@ -121,7 +121,11 @@ def scrape(url, excel_file, folder_name, status_list, progress=gr.Progress()):
|
|
| 121 |
for file_url in status_filenames:
|
| 122 |
filename = os.path.basename(file_url)
|
| 123 |
save_path = os.path.join(download_directory, filename)
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
try:
|
| 126 |
with requests.get(file_url, stream=True) as r:
|
| 127 |
r.raise_for_status()
|
|
|
|
| 76 |
download_directory = folder_name
|
| 77 |
if not os.path.exists(download_directory):
|
| 78 |
os.makedirs(download_directory)
|
| 79 |
+
download_num = 0
|
| 80 |
+
pourcentss = 0.1
|
| 81 |
print(f'filenames: {status_filenames}')
|
| 82 |
if not filenames and not status_filenames:
|
| 83 |
print("No Excel file provided, or no valid URLs found in the file.")
|
|
|
|
| 92 |
|
| 93 |
# Filtrer les liens se terminant par ".zip"
|
| 94 |
zip_links = [link['href'] for link in links if link['href'].endswith('.zip')]
|
| 95 |
+
|
|
|
|
| 96 |
# Télécharger chaque fichier zip
|
| 97 |
for zip_link in zip_links:
|
| 98 |
if download_num%10 == 0:
|
|
|
|
| 121 |
for file_url in status_filenames:
|
| 122 |
filename = os.path.basename(file_url)
|
| 123 |
save_path = os.path.join(download_directory, filename)
|
| 124 |
+
if download_num%10 == 0:
|
| 125 |
+
pourcentss = pourcentss + download_num/500
|
| 126 |
+
progress(pourcentss,desc='Telechargement')
|
| 127 |
+
download_num = 0
|
| 128 |
+
download_num+=1
|
| 129 |
try:
|
| 130 |
with requests.get(file_url, stream=True) as r:
|
| 131 |
r.raise_for_status()
|