Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,12 +30,17 @@ def download_html_and_files(url, subdir):
|
|
| 30 |
html_content = requests.get(url).text
|
| 31 |
soup = BeautifulSoup(html_content, 'html.parser')
|
| 32 |
base_url = urllib.parse.urlunparse(urllib.parse.urlparse(url)._replace(path='', params='', query='', fragment=''))
|
|
|
|
| 33 |
for link in soup.find_all('a'):
|
| 34 |
file_url = urllib.parse.urljoin(base_url, link.get('href'))
|
| 35 |
local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
|
| 36 |
-
|
|
|
|
|
|
|
| 37 |
link['href'] = local_filename
|
| 38 |
download_file(file_url, local_filename)
|
|
|
|
|
|
|
| 39 |
with open(os.path.join(subdir, "index.html"), "w") as file:
|
| 40 |
file.write(str(soup))
|
| 41 |
|
|
|
|
| 30 |
html_content = requests.get(url).text
|
| 31 |
soup = BeautifulSoup(html_content, 'html.parser')
|
| 32 |
base_url = urllib.parse.urlunparse(urllib.parse.urlparse(url)._replace(path='', params='', query='', fragment=''))
|
| 33 |
+
|
| 34 |
for link in soup.find_all('a'):
|
| 35 |
file_url = urllib.parse.urljoin(base_url, link.get('href'))
|
| 36 |
local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
|
| 37 |
+
|
| 38 |
+
# Skip if the local filename is a directory
|
| 39 |
+
if not local_filename.endswith('/') and local_filename != subdir:
|
| 40 |
link['href'] = local_filename
|
| 41 |
download_file(file_url, local_filename)
|
| 42 |
+
|
| 43 |
+
# Save the modified HTML content
|
| 44 |
with open(os.path.join(subdir, "index.html"), "w") as file:
|
| 45 |
file.write(str(soup))
|
| 46 |
|