Spaces:

attendantelectro
/

tst

Runtime error

File size: 1,513 Bytes

09dd334
882b44e
09dd334
 
 
 
882b44e
 
 
 
 
 
09dd334
6718a3f
 
7c2ff90
6718a3f
09dd334
 
 
 
 
 
7c2ff90
09dd334
b08a63d
09dd334
 
 
6718a3f
 
7c2ff90
09dd334
 
 
6718a3f
09dd334
 
 
 
10a266f
09dd334

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import zipfile
import os

def scrape_pages(base_url, start_page, end_page):
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")

    driver = webdriver.Chrome(options=chrome_options)

    # ذخیره در مسیر /home/seluser/output/pages
    output_dir = '/home/seluser/output/pages'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)

    for page in range(start_page, end_page + 1):
        url = f"{base_url}?page={page}"
        driver.get(url)
        html = driver.page_source

        with open(f'{output_dir}/t{page}.html', 'w', encoding='utf-8') as f:
            f.write(html)
        print(f"Saved: {output_dir}/t{page}.html")

    driver.quit()

    # ایجاد فایل ZIP در مسیر /home/seluser/output
    with zipfile.ZipFile('/home/seluser/output/html.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(output_dir):
            for file in files:
                zipf.write(os.path.join(root, file), file)

    print("All pages saved and zipped as /home/seluser/output/html.zip")

if __name__ == "__main__":
    base_url = "https://shahvani.com/dastans"
    start_page = 1
    end_page = 10  # برای تست، عدد کمتری انتخاب کنید

    scrape_pages(base_url, start_page, end_page)