File size: 1,513 Bytes
09dd334
882b44e
09dd334
 
 
 
882b44e
 
 
 
 
 
09dd334
6718a3f
 
7c2ff90
6718a3f
09dd334
 
 
 
 
 
7c2ff90
09dd334
b08a63d
09dd334
 
 
6718a3f
 
7c2ff90
09dd334
 
 
6718a3f
09dd334
 
 
 
10a266f
09dd334
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import zipfile
import os

def scrape_pages(base_url, start_page, end_page):
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")

    driver = webdriver.Chrome(options=chrome_options)

    # ذخیره در مسیر /home/seluser/output/pages
    output_dir = '/home/seluser/output/pages'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)

    for page in range(start_page, end_page + 1):
        url = f"{base_url}?page={page}"
        driver.get(url)
        html = driver.page_source

        with open(f'{output_dir}/t{page}.html', 'w', encoding='utf-8') as f:
            f.write(html)
        print(f"Saved: {output_dir}/t{page}.html")

    driver.quit()

    # ایجاد فایل ZIP در مسیر /home/seluser/output
    with zipfile.ZipFile('/home/seluser/output/html.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(output_dir):
            for file in files:
                zipf.write(os.path.join(root, file), file)

    print("All pages saved and zipped as /home/seluser/output/html.zip")

if __name__ == "__main__":
    base_url = "https://shahvani.com/dastans"
    start_page = 1
    end_page = 10  # برای تست، عدد کمتری انتخاب کنید

    scrape_pages(base_url, start_page, end_page)