Spaces:
Runtime error
Runtime error
File size: 1,513 Bytes
09dd334 882b44e 09dd334 882b44e 09dd334 6718a3f 7c2ff90 6718a3f 09dd334 7c2ff90 09dd334 b08a63d 09dd334 6718a3f 7c2ff90 09dd334 6718a3f 09dd334 10a266f 09dd334 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import zipfile
import os
def scrape_pages(base_url, start_page, end_page):
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=chrome_options)
# ذخیره در مسیر /home/seluser/output/pages
output_dir = '/home/seluser/output/pages'
if not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)
for page in range(start_page, end_page + 1):
url = f"{base_url}?page={page}"
driver.get(url)
html = driver.page_source
with open(f'{output_dir}/t{page}.html', 'w', encoding='utf-8') as f:
f.write(html)
print(f"Saved: {output_dir}/t{page}.html")
driver.quit()
# ایجاد فایل ZIP در مسیر /home/seluser/output
with zipfile.ZipFile('/home/seluser/output/html.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, dirs, files in os.walk(output_dir):
for file in files:
zipf.write(os.path.join(root, file), file)
print("All pages saved and zipped as /home/seluser/output/html.zip")
if __name__ == "__main__":
base_url = "https://shahvani.com/dastans"
start_page = 1
end_page = 10 # برای تست، عدد کمتری انتخاب کنید
scrape_pages(base_url, start_page, end_page)
|