petertulip86 commited on
Commit
f007e8d
·
verified ·
1 Parent(s): 44593ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -68
app.py CHANGED
@@ -17,7 +17,7 @@ import subprocess
17
  import sys
18
 
19
  def setup_chrome_driver():
20
- """置Chrome WebDriver,Hugging Face Spaces"""
21
  chrome_options = Options()
22
  chrome_options.add_argument("--headless")
23
  chrome_options.add_argument("--no-sandbox")
@@ -27,7 +27,7 @@ def setup_chrome_driver():
27
  chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
28
 
29
  try:
30
- # 尝试使用系中的Chrome
31
  driver = webdriver.Chrome(options=chrome_options)
32
  return driver
33
  except Exception as e:
@@ -35,10 +35,10 @@ def setup_chrome_driver():
35
  return None
36
 
37
  def extract_images_from_pinterest(url, max_images, progress_callback=None):
38
- """Pinterest面提取片URL"""
39
  driver = setup_chrome_driver()
40
  if not driver:
41
- return [], "启动Chrome WebDriver"
42
 
43
  try:
44
  driver.get(url)
@@ -49,12 +49,12 @@ def extract_images_from_pinterest(url, max_images, progress_callback=None):
49
  no_new_images_count = 0
50
 
51
  while len(imgList) < max_images:
52
- # 滚动页
53
  scroll += 800
54
  driver.execute_script(f'window.scrollTo(0, {scroll})')
55
  time.sleep(1)
56
 
57
- # 片元素
58
  imgs = driver.find_elements(By.CSS_SELECTOR, 'div[data-test-id="pin"] img')
59
  new_images = 0
60
 
@@ -68,9 +68,9 @@ def extract_images_from_pinterest(url, max_images, progress_callback=None):
68
  continue
69
 
70
  if progress_callback:
71
- progress_callback(f"已找到 {len(imgList)} 张图片")
72
 
73
- # 查是否有新
74
  if new_images == 0:
75
  no_new_images_count += 1
76
  else:
@@ -82,15 +82,15 @@ def extract_images_from_pinterest(url, max_images, progress_callback=None):
82
  return imgList, None
83
 
84
  except Exception as e:
85
- return [], f"提取: {str(e)}"
86
  finally:
87
  driver.quit()
88
 
89
  def download_image(args):
90
- """下载单张图片"""
91
  index, url, temp_dir = args
92
  try:
93
- # 转换为高清片URL
94
  if '236x' in url:
95
  url = url.replace('236x', 'originals')
96
  elif '474x' in url:
@@ -99,7 +99,7 @@ def download_image(args):
99
  # 生成文件名
100
  filename = f"pinterest_img_{index+1:04d}"
101
 
102
- # URL提取原始文件名
103
  url_parts = url.split('/')
104
  if len(url_parts) > 0:
105
  original_name = url_parts[-1].split('?')[0]
@@ -107,13 +107,13 @@ def download_image(args):
107
  clean_name = re.sub(r'[^\w\-_\.]', '_', original_name)
108
  filename = f"pinterest_img_{index+1:04d}_{clean_name}"
109
 
110
- # 保文件展名
111
  if not filename.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
112
  filename += '.jpg'
113
 
114
  filepath = os.path.join(temp_dir, filename)
115
 
116
- # 下载图
117
  headers = {
118
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
119
  }
@@ -127,30 +127,30 @@ def download_image(args):
127
  return True, filename
128
 
129
  except Exception as e:
130
- return False, f"下: {str(e)}"
131
 
132
  def scrape_pinterest_images(pinterest_url, num_images, progress=gr.Progress()):
133
- """主要的爬"""
134
  if not pinterest_url:
135
- return None, "请输入Pinterest URL"
136
 
137
  if num_images <= 0:
138
- return None, "量必0"
139
 
140
- if num_images > 500: # 限制最大
141
- return None, "量不能超500"
142
 
143
- # 验证URL
144
  try:
145
  parsed_url = urlparse(pinterest_url)
146
  if 'pinterest.com' not in parsed_url.netloc:
147
- return None, "请输入有效的Pinterest URL"
148
  except:
149
- return None, "URL格式效"
150
 
151
- progress(0, desc="始提取片URL...")
152
 
153
- # 提取片URL
154
  def update_progress(msg):
155
  progress(0.3, desc=msg)
156
 
@@ -160,40 +160,40 @@ def scrape_pinterest_images(pinterest_url, num_images, progress=gr.Progress()):
160
  return None, error
161
 
162
  if not img_urls:
163
- return None, "未找到任何片"
164
 
165
- progress(0.5, desc=f"始下 {len(img_urls)} 张图片...")
166
 
167
- # 临时
168
  temp_dir = tempfile.mkdtemp()
169
 
170
  try:
171
- # 准备载参数
172
  download_args = [(i, url, temp_dir) for i, url in enumerate(img_urls)]
173
 
174
- # 多线程下
175
  successful_downloads = []
176
  failed_downloads = []
177
 
178
- with ThreadPoolExecutor(max_workers=3) as executor: # 降低并发数
179
  results = list(executor.map(download_image, download_args))
180
 
181
  for i, (success, info) in enumerate(results):
182
  if success:
183
  successful_downloads.append(info)
184
  else:
185
- failed_downloads.append(f"片 {i+1}: {info}")
186
 
187
- # 更新
188
  progress((0.5 + 0.4 * (i + 1) / len(results)),
189
- desc=f"已下 {len(successful_downloads)} / {len(img_urls)} 张图片")
190
 
191
  if not successful_downloads:
192
- return None, "所有片下"
193
 
194
- progress(0.9, desc="建ZIP文件...")
195
 
196
- # 建ZIP文件
197
  zip_path = os.path.join(tempfile.gettempdir(), "pinterest_images.zip")
198
  with zipfile.ZipFile(zip_path, 'w') as zipf:
199
  for filename in os.listdir(temp_dir):
@@ -203,49 +203,49 @@ def scrape_pinterest_images(pinterest_url, num_images, progress=gr.Progress()):
203
 
204
  progress(1.0, desc="完成!")
205
 
206
- # 准备结果信息
207
  result_info = f"""
208
- 完成!
209
 
210
- 成功下: {len(successful_downloads)} 张图
211
- : {len(failed_downloads)} 张图
212
- 总计: {len(img_urls)} 张图
213
 
214
- 请点击下方接下ZIP文件。
215
  """
216
 
217
  if failed_downloads:
218
- result_info += f"\n\n失败详情:\n" + "\n".join(failed_downloads[:10]) # 只示前10个错误
219
 
220
  return zip_path, result_info
221
 
222
  except Exception as e:
223
- return None, f"程中出: {str(e)}"
224
  finally:
225
- # 清理临时
226
  try:
227
  shutil.rmtree(temp_dir)
228
  except:
229
  pass
230
 
231
- # 建Gradio界面
232
  def create_interface():
233
- with gr.Blocks(title="Pinterest片下器", theme=gr.themes.Soft()) as interface:
234
  gr.Markdown("""
235
- # 🖼️ Pinterest 片下
236
 
237
- 入Pinterest搜索面的URL,批量下载图片。
238
 
239
- **使用明:**
240
- 1. 入Pinterest搜索面或板的完整URL
241
- 2. 置要下量(建不超100
242
- 3. 点击"始下"按
243
- 4. 等待理完成ZIP文件
244
 
245
- **注意事:**
246
- - 请确入的是有效的Pinterest URL
247
- - 下速度取决于网络状况片大小
248
- - 建议单次下不超100张图
249
  """)
250
 
251
  with gr.Row():
@@ -254,7 +254,7 @@ def create_interface():
254
  label="Pinterest URL",
255
  placeholder="https://www.pinterest.com/search/pins/?q=your-search-term",
256
  lines=2,
257
- info="入Pinterest搜索面或板的完整URL"
258
  )
259
 
260
  num_images = gr.Slider(
@@ -262,22 +262,22 @@ def create_interface():
262
  maximum=500,
263
  value=20,
264
  step=1,
265
- label="量",
266
- info="要下量(建不超100)"
267
  )
268
 
269
- download_btn = gr.Button("🚀 始下", variant="primary", size="lg")
270
 
271
  with gr.Column():
272
  result_info = gr.Textbox(
273
- label="下载结果",
274
  lines=10,
275
  interactive=False,
276
- info="示下载进度和果信息"
277
  )
278
 
279
  download_file = gr.File(
280
- label="下文件",
281
  interactive=False,
282
  visible=False
283
  )
@@ -292,7 +292,7 @@ def create_interface():
292
  ```
293
  """)
294
 
295
- # 定事件
296
  def handle_download(url, num):
297
  zip_path, info = scrape_pinterest_images(url, int(num))
298
  if zip_path:
@@ -309,7 +309,7 @@ def create_interface():
309
 
310
  return interface
311
 
312
- # 启动应
313
  if __name__ == "__main__":
314
  interface = create_interface()
315
  interface.launch(
 
17
  import sys
18
 
19
  def setup_chrome_driver():
20
+ """置Chrome WebDriver,Hugging Face Spaces"""
21
  chrome_options = Options()
22
  chrome_options.add_argument("--headless")
23
  chrome_options.add_argument("--no-sandbox")
 
27
  chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
28
 
29
  try:
30
+ # 嘗試使用系中的Chrome
31
  driver = webdriver.Chrome(options=chrome_options)
32
  return driver
33
  except Exception as e:
 
35
  return None
36
 
37
  def extract_images_from_pinterest(url, max_images, progress_callback=None):
38
+ """Pinterest面提取片URL"""
39
  driver = setup_chrome_driver()
40
  if not driver:
41
+ return [], "啟動Chrome WebDriver"
42
 
43
  try:
44
  driver.get(url)
 
49
  no_new_images_count = 0
50
 
51
  while len(imgList) < max_images:
52
+ # 滾動頁
53
  scroll += 800
54
  driver.execute_script(f'window.scrollTo(0, {scroll})')
55
  time.sleep(1)
56
 
57
+ # 片元素
58
  imgs = driver.find_elements(By.CSS_SELECTOR, 'div[data-test-id="pin"] img')
59
  new_images = 0
60
 
 
68
  continue
69
 
70
  if progress_callback:
71
+ progress_callback(f"已找到 {len(imgList)} 張圖片")
72
 
73
+ # 查是否有新
74
  if new_images == 0:
75
  no_new_images_count += 1
76
  else:
 
82
  return imgList, None
83
 
84
  except Exception as e:
85
+ return [], f"提取: {str(e)}"
86
  finally:
87
  driver.quit()
88
 
89
  def download_image(args):
90
+ """下載單張圖片"""
91
  index, url, temp_dir = args
92
  try:
93
+ # 轉換為高清片URL
94
  if '236x' in url:
95
  url = url.replace('236x', 'originals')
96
  elif '474x' in url:
 
99
  # 生成文件名
100
  filename = f"pinterest_img_{index+1:04d}"
101
 
102
+ # URL提取原始文件名
103
  url_parts = url.split('/')
104
  if len(url_parts) > 0:
105
  original_name = url_parts[-1].split('?')[0]
 
107
  clean_name = re.sub(r'[^\w\-_\.]', '_', original_name)
108
  filename = f"pinterest_img_{index+1:04d}_{clean_name}"
109
 
110
+ # 保文件展名
111
  if not filename.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
112
  filename += '.jpg'
113
 
114
  filepath = os.path.join(temp_dir, filename)
115
 
116
+ # 下載圖
117
  headers = {
118
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
119
  }
 
127
  return True, filename
128
 
129
  except Exception as e:
130
+ return False, f"下: {str(e)}"
131
 
132
  def scrape_pinterest_images(pinterest_url, num_images, progress=gr.Progress()):
133
+ """主要的爬"""
134
  if not pinterest_url:
135
+ return None, "請輸入Pinterest URL"
136
 
137
  if num_images <= 0:
138
+ return None, "量必0"
139
 
140
+ if num_images > 500: # 限制最大
141
+ return None, "量不能超500"
142
 
143
+ # 驗證URL
144
  try:
145
  parsed_url = urlparse(pinterest_url)
146
  if 'pinterest.com' not in parsed_url.netloc:
147
+ return None, "請輸入有效的Pinterest URL"
148
  except:
149
+ return None, "URL格式效"
150
 
151
+ progress(0, desc="始提取片URL...")
152
 
153
+ # 提取片URL
154
  def update_progress(msg):
155
  progress(0.3, desc=msg)
156
 
 
160
  return None, error
161
 
162
  if not img_urls:
163
+ return None, "未找到任何片"
164
 
165
+ progress(0.5, desc=f"始下 {len(img_urls)} 張圖片...")
166
 
167
+ # 臨時
168
  temp_dir = tempfile.mkdtemp()
169
 
170
  try:
171
+ # 準備載參數
172
  download_args = [(i, url, temp_dir) for i, url in enumerate(img_urls)]
173
 
174
+ # 多程下
175
  successful_downloads = []
176
  failed_downloads = []
177
 
178
+ with ThreadPoolExecutor(max_workers=3) as executor: # 降低並發數
179
  results = list(executor.map(download_image, download_args))
180
 
181
  for i, (success, info) in enumerate(results):
182
  if success:
183
  successful_downloads.append(info)
184
  else:
185
+ failed_downloads.append(f"片 {i+1}: {info}")
186
 
187
+ # 更新
188
  progress((0.5 + 0.4 * (i + 1) / len(results)),
189
+ desc=f"已下 {len(successful_downloads)} / {len(img_urls)} 張圖片")
190
 
191
  if not successful_downloads:
192
+ return None, "所有片下"
193
 
194
+ progress(0.9, desc="建ZIP文件...")
195
 
196
+ # 建ZIP文件
197
  zip_path = os.path.join(tempfile.gettempdir(), "pinterest_images.zip")
198
  with zipfile.ZipFile(zip_path, 'w') as zipf:
199
  for filename in os.listdir(temp_dir):
 
203
 
204
  progress(1.0, desc="完成!")
205
 
206
+ # 準備結果信息
207
  result_info = f"""
208
+ 完成!
209
 
210
+ 成功下: {len(successful_downloads)} 張圖
211
+ : {len(failed_downloads)} 張圖
212
+ 總計: {len(img_urls)} 張圖
213
 
214
+ 請點擊下方接下ZIP文件。
215
  """
216
 
217
  if failed_downloads:
218
+ result_info += f"\n\n失敗詳情:\n" + "\n".join(failed_downloads[:10]) # 只示前10個錯誤
219
 
220
  return zip_path, result_info
221
 
222
  except Exception as e:
223
+ return None, f"程中出: {str(e)}"
224
  finally:
225
+ # 清理臨時
226
  try:
227
  shutil.rmtree(temp_dir)
228
  except:
229
  pass
230
 
231
+ # 建Gradio界面
232
  def create_interface():
233
+ with gr.Blocks(title="Pinterest片下器", theme=gr.themes.Soft()) as interface:
234
  gr.Markdown("""
235
+ # 🖼️ Pinterest 片下
236
 
237
+ 入Pinterest搜索面的URL,批量下載圖片。
238
 
239
+ **使用明:**
240
+ 1. 入Pinterest搜索面或板的完整URL
241
+ 2. 置要下量(建不超100
242
+ 3. 點擊"始下"按
243
+ 4. 等待理完成ZIP文件
244
 
245
+ **注意事:**
246
+ - 請確入的是有效的Pinterest URL
247
+ - 下速度取決於網絡狀況片大小
248
+ - 建議單次下不超100張圖
249
  """)
250
 
251
  with gr.Row():
 
254
  label="Pinterest URL",
255
  placeholder="https://www.pinterest.com/search/pins/?q=your-search-term",
256
  lines=2,
257
+ info="入Pinterest搜索面或板的完整URL"
258
  )
259
 
260
  num_images = gr.Slider(
 
262
  maximum=500,
263
  value=20,
264
  step=1,
265
+ label="量",
266
+ info="要下量(建不超100)"
267
  )
268
 
269
+ download_btn = gr.Button("🚀 始下", variant="primary", size="lg")
270
 
271
  with gr.Column():
272
  result_info = gr.Textbox(
273
+ label="下載結果",
274
  lines=10,
275
  interactive=False,
276
+ info="示下載進度和果信息"
277
  )
278
 
279
  download_file = gr.File(
280
+ label="下文件",
281
  interactive=False,
282
  visible=False
283
  )
 
292
  ```
293
  """)
294
 
295
+ # 定事件
296
  def handle_download(url, num):
297
  zip_path, info = scrape_pinterest_images(url, int(num))
298
  if zip_path:
 
309
 
310
  return interface
311
 
312
+ # 啟動應
313
  if __name__ == "__main__":
314
  interface = create_interface()
315
  interface.launch(