vikarshana commited on
Commit
7955c3b
·
verified ·
1 Parent(s): f007e8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -295
app.py CHANGED
@@ -1,318 +1,190 @@
1
- import gradio as gr
2
- import requests
3
- from concurrent.futures import ThreadPoolExecutor
4
- import os
5
- import re
6
- import zipfile
7
- import tempfile
8
- import shutil
9
- from urllib.parse import urlparse
10
  import time
 
11
  from selenium import webdriver
12
  from selenium.webdriver.chrome.options import Options
13
- from selenium.webdriver.chrome.service import Service
14
  from selenium.webdriver.common.by import By
15
- from selenium.common.exceptions import TimeoutException, WebDriverException
16
- import subprocess
17
- import sys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- def setup_chrome_driver():
20
- """設置Chrome WebDriver,適用於Hugging Face Spaces"""
21
- chrome_options = Options()
22
- chrome_options.add_argument("--headless")
23
- chrome_options.add_argument("--no-sandbox")
24
- chrome_options.add_argument("--disable-dev-shm-usage")
25
- chrome_options.add_argument("--disable-gpu")
26
- chrome_options.add_argument("--window-size=1920,1080")
27
- chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
28
-
29
  try:
30
- # 嘗試使用系統中的Chrome
 
 
 
 
 
 
 
 
 
 
 
31
  driver = webdriver.Chrome(options=chrome_options)
32
- return driver
33
- except Exception as e:
34
- print(f"Chrome WebDriver setup failed: {e}")
35
- return None
36
 
37
- def extract_images_from_pinterest(url, max_images, progress_callback=None):
38
- """從Pinterest頁面提取圖片URL"""
39
- driver = setup_chrome_driver()
40
- if not driver:
41
- return [], "無法啟動Chrome WebDriver"
42
-
43
- try:
44
- driver.get(url)
45
- time.sleep(3)
46
-
47
- imgList = []
48
- scroll = 0
49
- no_new_images_count = 0
50
-
51
- while len(imgList) < max_images:
52
- # 滾動頁面
53
- scroll += 800
54
- driver.execute_script(f'window.scrollTo(0, {scroll})')
55
- time.sleep(1)
56
-
57
- # 獲取圖片元素
58
- imgs = driver.find_elements(By.CSS_SELECTOR, 'div[data-test-id="pin"] img')
59
- new_images = 0
60
-
61
- for img in imgs:
62
- try:
63
- img_url = img.get_attribute('src')
64
- if img_url and img_url not in imgList and len(imgList) < max_images:
65
- imgList.append(img_url)
66
- new_images += 1
67
- except:
68
- continue
69
-
70
- if progress_callback:
71
- progress_callback(f"已找到 {len(imgList)} 張圖片")
72
-
73
- # 檢查是否沒有新圖片
74
- if new_images == 0:
75
- no_new_images_count += 1
76
- else:
77
- no_new_images_count = 0
78
-
79
- if no_new_images_count >= 5:
80
  break
81
-
82
- return imgList, None
83
-
84
- except Exception as e:
85
- return [], f"提取圖片時出錯: {str(e)}"
86
- finally:
87
- driver.quit()
88
 
89
- def download_image(args):
90
- """下載單張圖片"""
91
- index, url, temp_dir = args
92
- try:
93
- # 轉換為高清圖片URL
94
- if '236x' in url:
95
- url = url.replace('236x', 'originals')
96
- elif '474x' in url:
97
- url = url.replace('474x', 'originals')
98
-
99
- # 生成文件名
100
- filename = f"pinterest_img_{index+1:04d}"
101
-
102
- # 從URL提取原始文件名
103
- url_parts = url.split('/')
104
- if len(url_parts) > 0:
105
- original_name = url_parts[-1].split('?')[0]
106
- if '.' in original_name and len(original_name) < 100:
107
- clean_name = re.sub(r'[^\w\-_\.]', '_', original_name)
108
- filename = f"pinterest_img_{index+1:04d}_{clean_name}"
109
-
110
- # 確保文件擴展名
111
- if not filename.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
112
- filename += '.jpg'
113
-
114
- filepath = os.path.join(temp_dir, filename)
115
-
116
- # 下載圖片
117
- headers = {
118
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
119
- }
120
-
121
- response = requests.get(url, headers=headers, timeout=30)
122
- response.raise_for_status()
123
-
124
- with open(filepath, 'wb') as f:
125
- f.write(response.content)
126
-
127
- return True, filename
128
-
129
- except Exception as e:
130
- return False, f"下載失敗: {str(e)}"
131
 
132
- def scrape_pinterest_images(pinterest_url, num_images, progress=gr.Progress()):
133
- """主要的爬蟲函數"""
134
- if not pinterest_url:
135
- return None, "請輸入Pinterest URL"
136
-
137
- if num_images <= 0:
138
- return None, "圖片數量必須大於0"
139
-
140
- if num_images > 500: # 限制最大數量
141
- return None, "圖片數量不能超過500張"
142
-
143
- # 驗證URL
144
- try:
145
- parsed_url = urlparse(pinterest_url)
146
- if 'pinterest.com' not in parsed_url.netloc:
147
- return None, "請輸入有效的Pinterest URL"
148
- except:
149
- return None, "URL格式無效"
150
-
151
- progress(0, desc="開始提取圖片URL...")
152
-
153
- # 提取圖片URL
154
- def update_progress(msg):
155
- progress(0.3, desc=msg)
156
-
157
- img_urls, error = extract_images_from_pinterest(pinterest_url, num_images, update_progress)
158
-
159
- if error:
160
- return None, error
161
-
162
- if not img_urls:
163
- return None, "未找到任何圖片"
164
-
165
- progress(0.5, desc=f"開始下載 {len(img_urls)} 張圖片...")
166
-
167
- # 創建臨時目錄
168
- temp_dir = tempfile.mkdtemp()
169
-
170
- try:
171
- # 準備下載參數
172
- download_args = [(i, url, temp_dir) for i, url in enumerate(img_urls)]
173
-
174
- # 多線程下載
175
- successful_downloads = []
176
- failed_downloads = []
177
-
178
- with ThreadPoolExecutor(max_workers=3) as executor: # 降低並發數
179
- results = list(executor.map(download_image, download_args))
180
-
181
- for i, (success, info) in enumerate(results):
182
- if success:
183
- successful_downloads.append(info)
184
- else:
185
- failed_downloads.append(f"圖片 {i+1}: {info}")
186
-
187
- # 更新進度
188
- progress((0.5 + 0.4 * (i + 1) / len(results)),
189
- desc=f"已下載 {len(successful_downloads)} / {len(img_urls)} 張圖片")
190
-
191
- if not successful_downloads:
192
- return None, "所有圖片下載失敗"
193
-
194
- progress(0.9, desc="創建ZIP文件...")
195
-
196
- # 創建ZIP文件
197
- zip_path = os.path.join(tempfile.gettempdir(), "pinterest_images.zip")
198
- with zipfile.ZipFile(zip_path, 'w') as zipf:
199
- for filename in os.listdir(temp_dir):
200
- file_path = os.path.join(temp_dir, filename)
201
- if os.path.isfile(file_path):
202
- zipf.write(file_path, filename)
203
-
204
- progress(1.0, desc="完成!")
205
-
206
- # 準備結果信息
207
- result_info = f"""
208
- 下載完成!
209
 
210
- 成功下載: {len(successful_downloads)} 張圖片
211
- 失敗: {len(failed_downloads)} 張圖片
212
- 總計: {len(img_urls)} 張圖片
 
 
 
 
 
213
 
214
- 請點擊下方鏈接下載ZIP文件。
215
- """
216
-
217
- if failed_downloads:
218
- result_info += f"\n\n失敗詳情:\n" + "\n".join(failed_downloads[:10]) # 只顯示前10個錯誤
219
-
220
- return zip_path, result_info
221
-
222
- except Exception as e:
223
- return None, f"處理過程中出錯: {str(e)}"
224
- finally:
225
- # 清理臨時目錄
226
  try:
227
- shutil.rmtree(temp_dir)
228
- except:
229
  pass
230
 
231
- # 創建Gradio界面
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  def create_interface():
233
- with gr.Blocks(title="Pinterest圖片下載器", theme=gr.themes.Soft()) as interface:
234
- gr.Markdown("""
235
- # 🖼️ Pinterest 圖片下載器
236
-
237
- 輸入Pinterest搜索頁面的URL,批量下載圖片。
238
-
239
- **使用說明:**
240
- 1. 輸入Pinterest搜索頁面或板塊的完整URL
241
- 2. 設置要下載的圖片數量(建議不超過100張)
242
- 3. 點擊"開始下載"按鈕
243
- 4. 等待處理完成後下載ZIP文件
244
-
245
- **注意事項:**
246
- - 請確保輸入的是有效的Pinterest URL
247
- - 下載速度取決於網絡狀況和圖片大小
248
- - 建議單次下載不超過100張圖片
249
- """)
250
-
251
  with gr.Row():
252
- with gr.Column():
253
- pinterest_url = gr.Textbox(
254
- label="Pinterest URL",
255
- placeholder="https://www.pinterest.com/search/pins/?q=your-search-term",
256
- lines=2,
257
- info="輸入Pinterest搜索頁面或板塊的完整URL"
258
- )
259
-
260
- num_images = gr.Slider(
261
- minimum=1,
262
- maximum=500,
263
- value=20,
264
- step=1,
265
- label="圖片數量",
266
- info="要下載的圖片數量(建議不超過100張)"
267
- )
268
-
269
- download_btn = gr.Button("🚀 開始下載", variant="primary", size="lg")
270
-
271
- with gr.Column():
272
- result_info = gr.Textbox(
273
- label="下載結果",
274
- lines=10,
275
- interactive=False,
276
- info="顯示下載進度和結果信息"
277
- )
278
-
279
- download_file = gr.File(
280
- label="下載文件",
281
- interactive=False,
282
- visible=False
283
- )
284
-
285
- # 示例URL
286
- gr.Markdown("""
287
- ### 示例URL:
288
- ```
289
- https://www.pinterest.com/search/pins/?q=landscape%20photography
290
- https://www.pinterest.com/search/pins/?q=interior%20design
291
- https://www.pinterest.com/search/pins/?q=food%20photography
292
- ```
293
- """)
294
-
295
- # 綁定事件
296
- def handle_download(url, num):
297
- zip_path, info = scrape_pinterest_images(url, int(num))
298
- if zip_path:
299
- return info, gr.File(value=zip_path, visible=True)
300
- else:
301
- return info, gr.File(visible=False)
302
-
303
- download_btn.click(
304
- fn=handle_download,
305
- inputs=[pinterest_url, num_images],
306
- outputs=[result_info, download_file],
307
- show_progress=True
308
- )
309
-
310
- return interface
311
 
312
- # 啟動應用
 
313
  if __name__ == "__main__":
314
- interface = create_interface()
315
- interface.launch(
 
 
 
 
316
  server_name="0.0.0.0",
317
  server_port=7860,
318
  share=True
 
1
+ import asyncio
 
 
 
 
 
 
 
 
2
  import time
3
+ import logging
4
  from selenium import webdriver
5
  from selenium.webdriver.chrome.options import Options
 
6
  from selenium.webdriver.common.by import By
7
+ from selenium.webdriver.support.ui import WebDriverWait
8
+ from selenium.webdriver.support import expected_conditions as EC
9
+ from selenium.common.exceptions import TimeoutException
10
+ import gradio as gr
11
+
12
+ # Configure logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+ async def capture_direct_download_link(url, opts=None):
17
+ if opts is None:
18
+ opts = {}
19
+
20
+ chrome_path = opts.get('chrome_path', '/opt/google/chrome/chrome')
21
+ headless = opts.get('headless', True)
22
+ wait_for_button_ms = opts.get('wait_for_button_ms', 60000)
23
+
24
+ driver = None
25
 
 
 
 
 
 
 
 
 
 
 
26
  try:
27
+ chrome_options = Options()
28
+ if headless:
29
+ chrome_options.add_argument("--headless=new")
30
+ chrome_options.binary_location = chrome_path
31
+ chrome_options.add_argument("--no-sandbox")
32
+ chrome_options.add_argument("--disable-setuid-sandbox")
33
+ chrome_options.add_argument("--disable-web-security")
34
+ chrome_options.add_argument("--disable-gpu")
35
+ chrome_options.add_argument("--disable-blink-features=AutomationControlled")
36
+ chrome_options.add_argument("--disable-features=IsolateOrigins,site-per-process")
37
+ chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36")
38
+
39
  driver = webdriver.Chrome(options=chrome_options)
40
+ driver.set_window_size(1280, 900)
 
 
 
41
 
42
+ # Stealth script
43
+ stealth_script = """
44
+ Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
45
+ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
46
+ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
47
+ window.chrome = { runtime: {} };
48
+ window.DisableDevtool = function() {};
49
+ window.qajblusk = false;
50
+ """
51
+ driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': stealth_script})
52
+
53
+ # Block images/fonts
54
+ driver.execute_cdp_cmd('Network.enable', {})
55
+ driver.execute_cdp_cmd('Network.setBlockedURLs', {
56
+ 'urls': ['*.jpg', '*.jpeg', '*.png', '*.gif', '*.webp', '*.woff', '*.woff2', '*.ttf']
57
+ })
58
+
59
+ # Navigate
60
+ await asyncio.get_event_loop().run_in_executor(None, lambda: driver.get(url))
61
+
62
+ WebDriverWait(driver, 30).until(
63
+ lambda d: d.execute_script("return document.readyState") == "complete"
64
+ )
65
+ await asyncio.sleep(2)
66
+
67
+ # Find button
68
+ buttons = driver.find_elements(By.TAG_NAME, "button")
69
+ target_button = None
70
+ for btn in buttons:
71
+ if btn.text.strip() == "Direct Download 2":
72
+ target_button = btn
 
 
 
 
 
 
 
 
 
 
 
 
73
  break
 
 
 
 
 
 
 
74
 
75
+ if not target_button:
76
+ raise Exception('No "Direct Download 2" button found on page!')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
+ button_id = target_button.get_attribute("id")
79
+ logger.info(f'✅ Found "Direct Download 2" button with ID: {button_id}')
80
+ target_button.click()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ # Wait for processing to end
83
+ def wait_condition(d):
84
+ try:
85
+ el = d.find_element(By.ID, button_id)
86
+ span = el.find_element(By.CLASS_NAME, "download-text")
87
+ return span.text.strip() != "Processing..."
88
+ except:
89
+ return True
90
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  try:
92
+ WebDriverWait(driver, 45).until(wait_condition)
93
+ except TimeoutException:
94
  pass
95
 
96
+ logger.info('✅ Processing finished. Listening for download...')
97
+
98
+ original_window = driver.current_window_handle
99
+ final_url = None
100
+
101
+ # Check for popup
102
+ async def wait_for_popup_or_nav():
103
+ nonlocal final_url
104
+ start_time = time.time()
105
+ while time.time() - start_time < 25:
106
+ handles = driver.window_handles
107
+ if len(handles) > 1:
108
+ for handle in handles:
109
+ if handle != original_window:
110
+ driver.switch_to.window(handle)
111
+ try:
112
+ WebDriverWait(driver, 30).until(
113
+ EC.presence_of_element_located((By.TAG_NAME, "body"))
114
+ )
115
+ final_url = driver.current_url
116
+ logger.info(f'🎉 Captured Download URL from NEW TAB: {final_url}')
117
+ driver.close()
118
+ driver.switch_to.window(original_window)
119
+ return
120
+ except Exception as e:
121
+ logger.error(f"Error handling popup: {e}")
122
+ driver.switch_to.window(original_window)
123
+ break
124
+ await asyncio.sleep(0.5)
125
+
126
+ # Fallback: check main tab
127
+ await asyncio.sleep(3)
128
+ current_url = driver.current_url
129
+ if current_url != url and "fordev.jpg" not in current_url and not current_url.startswith("about:"):
130
+ final_url = current_url
131
+ logger.info(f'✅ Fallback: Download URL from MAIN TAB: {final_url}')
132
+
133
+ await wait_for_popup_or_nav()
134
+
135
+ if not final_url or final_url == "about:blank" or "fordev.jpg" in final_url:
136
+ raise Exception("❌ Download URL is invalid or blocked.")
137
+
138
+ return {"success": True, "url": final_url}
139
+
140
+ except Exception as e:
141
+ logger.error(f"❌ capture_direct_download_link error: {str(e)}")
142
+ return {"success": False, "error": str(e)}
143
+
144
+ finally:
145
+ if driver:
146
+ try:
147
+ driver.quit()
148
+ except Exception as e:
149
+ logger.error(f"Error closing driver: {e}")
150
+
151
+
152
+ # --- GRADIO UI WRAPPER ---
153
+ async def gradio_wrapper(url_input):
154
+ """Gradio expects sync functions, so we run async inside"""
155
+ try:
156
+ result = await capture_direct_download_link(url_input.strip(), {
157
+ "chrome_path": "/opt/google/chrome/chrome",
158
+ "headless": True,
159
+ "wait_for_button_ms": 60000,
160
+ })
161
+ if result["success"]:
162
+ return f"✅ Success!\nDownload URL: {result['url']}"
163
+ else:
164
+ return f"❌ Failed: {result['error']}"
165
+ except Exception as e:
166
+ return f"💥 Unexpected Error: {str(e)}"
167
+
168
  def create_interface():
169
+ with gr.Blocks(title="Download Link Bypasser") as demo:
170
+ gr.Markdown("# 🚀 Direct Download Link Bypasser")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  with gr.Row():
172
+ url_input = gr.Textbox(label="Enter Target URL", placeholder="https://example.com/download")
173
+ submit_btn = gr.Button("Bypass & Get Link", variant="primary")
174
+ output = gr.Textbox(label="Result", lines=5)
175
+ submit_btn.click(fn=gradio_wrapper, inputs=url_input, outputs=output)
176
+ gr.Markdown("ℹ️ This tool finds and clicks 'Direct Download 2' button, then captures the final download URL.")
177
+ return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+
180
+ # --- MAIN ---
181
  if __name__ == "__main__":
182
+ # Option A: Run async test (uncomment to use)
183
+ # asyncio.run(main())
184
+
185
+ # Option B: Launch Gradio UI (default)
186
+ demo = create_interface()
187
+ demo.launch(
188
  server_name="0.0.0.0",
189
  server_port=7860,
190
  share=True