attendantelectro commited on
Commit
b08a63d
·
verified ·
1 Parent(s): 7c2ff90

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +6 -6
scraper.py CHANGED
@@ -11,8 +11,8 @@ def scrape_pages(base_url, start_page, end_page):
11
 
12
  driver = webdriver.Chrome(options=chrome_options)
13
 
14
- # مسیر ذخیره‌سازی در داخل کانتینر
15
- output_dir = '/app/pages'
16
  if not os.path.exists(output_dir):
17
  os.makedirs(output_dir)
18
 
@@ -23,17 +23,17 @@ def scrape_pages(base_url, start_page, end_page):
23
 
24
  with open(f'{output_dir}/t{page}.html', 'w', encoding='utf-8') as f:
25
  f.write(html)
26
- print(f"Saved: t{page}.html")
27
 
28
  driver.quit()
29
 
30
- # ایجاد فایل ZIP در مسیر متصل
31
- with zipfile.ZipFile('/app/html.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
32
  for root, dirs, files in os.walk(output_dir):
33
  for file in files:
34
  zipf.write(os.path.join(root, file), file)
35
 
36
- print("All pages saved and zipped as /app/html.zip")
37
 
38
  if __name__ == "__main__":
39
  base_url = "https://shahvani.com/dastans"
 
11
 
12
  driver = webdriver.Chrome(options=chrome_options)
13
 
14
+ # ذخیره در مسیر فعلی (همان مسیر اسکریپت)
15
+ output_dir = './pages'
16
  if not os.path.exists(output_dir):
17
  os.makedirs(output_dir)
18
 
 
23
 
24
  with open(f'{output_dir}/t{page}.html', 'w', encoding='utf-8') as f:
25
  f.write(html)
26
+ print(f"Saved: {output_dir}/t{page}.html")
27
 
28
  driver.quit()
29
 
30
+ # ایجاد فایل ZIP در مسیر فعلی
31
+ with zipfile.ZipFile('./html.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
32
  for root, dirs, files in os.walk(output_dir):
33
  for file in files:
34
  zipf.write(os.path.join(root, file), file)
35
 
36
+ print("All pages saved and zipped as ./html.zip")
37
 
38
  if __name__ == "__main__":
39
  base_url = "https://shahvani.com/dastans"