cloudflare-ai

Runtime error

App Files Files Community

vikarshana commited on Sep 23, 2025

Commit

7955c3b

verified ·

1 Parent(s): f007e8d

Update app.py

Browse files

Files changed (1) hide show

app.py +167 -295

app.py CHANGED Viewed

@@ -1,318 +1,190 @@
-import gradio as gr
-import requests
-from concurrent.futures import ThreadPoolExecutor
-import os
-import re
-import zipfile
-import tempfile
-import shutil
-from urllib.parse import urlparse
 import time
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.common.by import By
-from selenium.common.exceptions import TimeoutException, WebDriverException
-import subprocess
-import sys
-def setup_chrome_driver():
-    """設置Chrome WebDriver，適用於Hugging Face Spaces"""
-    chrome_options = Options()
-    chrome_options.add_argument("--headless")
-    chrome_options.add_argument("--no-sandbox")
-    chrome_options.add_argument("--disable-dev-shm-usage")
-    chrome_options.add_argument("--disable-gpu")
-    chrome_options.add_argument("--window-size=1920,1080")
-    chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
     try:
-        # 嘗試使用系統中的Chrome
         driver = webdriver.Chrome(options=chrome_options)
-        return driver
-    except Exception as e:
-        print(f"Chrome WebDriver setup failed: {e}")
-        return None
-def extract_images_from_pinterest(url, max_images, progress_callback=None):
-    """從Pinterest頁面提取圖片URL"""
-    driver = setup_chrome_driver()
-    if not driver:
-        return [], "無法啟動Chrome WebDriver"
-    try:
-        driver.get(url)
-        time.sleep(3)
-        imgList = []
-        scroll = 0
-        no_new_images_count = 0
-        while len(imgList) < max_images:
-            # 滾動頁面
-            scroll += 800
-            driver.execute_script(f'window.scrollTo(0, {scroll})')
-            time.sleep(1)
-            # 獲取圖片元素
-            imgs = driver.find_elements(By.CSS_SELECTOR, 'div[data-test-id="pin"] img')
-            new_images = 0
-            for img in imgs:
-                try:
-                    img_url = img.get_attribute('src')
-                    if img_url and img_url not in imgList and len(imgList) < max_images:
-                        imgList.append(img_url)
-                        new_images += 1
-                except:
-                    continue
-            if progress_callback:
-                progress_callback(f"已找到 {len(imgList)} 張圖片")
-            # 檢查是否沒有新圖片
-            if new_images == 0:
-                no_new_images_count += 1
-            else:
-                no_new_images_count = 0
-            if no_new_images_count >= 5:
                 break
-        return imgList, None
-    except Exception as e:
-        return [], f"提取圖片時出錯: {str(e)}"
-    finally:
-        driver.quit()
-def download_image(args):
-    """下載單張圖片"""
-    index, url, temp_dir = args
-    try:
-        # 轉換為高清圖片URL
-        if '236x' in url:
-            url = url.replace('236x', 'originals')
-        elif '474x' in url:
-            url = url.replace('474x', 'originals')
-        # 生成文件名
-        filename = f"pinterest_img_{index+1:04d}"
-        # 從URL提取原始文件名
-        url_parts = url.split('/')
-        if len(url_parts) > 0:
-            original_name = url_parts[-1].split('?')[0]
-            if '.' in original_name and len(original_name) < 100:
-                clean_name = re.sub(r'[^\w\-_\.]', '_', original_name)
-                filename = f"pinterest_img_{index+1:04d}_{clean_name}"
-        # 確保文件擴展名
-        if not filename.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
-            filename += '.jpg'
-        filepath = os.path.join(temp_dir, filename)
-        # 下載圖片
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-        }
-        response = requests.get(url, headers=headers, timeout=30)
-        response.raise_for_status()
-        with open(filepath, 'wb') as f:
-            f.write(response.content)
-        return True, filename
-    except Exception as e:
-        return False, f"下載失敗: {str(e)}"
-def scrape_pinterest_images(pinterest_url, num_images, progress=gr.Progress()):
-    """主要的爬蟲函數"""
-    if not pinterest_url:
-        return None, "請輸入Pinterest URL"
-    if num_images <= 0:
-        return None, "圖片數量必須大於0"
-    if num_images > 500:  # 限制最大數量
-        return None, "圖片數量不能超過500張"
-    # 驗證URL
-    try:
-        parsed_url = urlparse(pinterest_url)
-        if 'pinterest.com' not in parsed_url.netloc:
-            return None, "請輸入有效的Pinterest URL"
-    except:
-        return None, "URL格式無效"
-    progress(0, desc="開始提取圖片URL...")
-    # 提取圖片URL
-    def update_progress(msg):
-        progress(0.3, desc=msg)
-    img_urls, error = extract_images_from_pinterest(pinterest_url, num_images, update_progress)
-    if error:
-        return None, error
-    if not img_urls:
-        return None, "未找到任何圖片"
-    progress(0.5, desc=f"開始下載 {len(img_urls)} 張圖片...")
-    # 創建臨時目錄
-    temp_dir = tempfile.mkdtemp()
-    try:
-        # 準備下載參數
-        download_args = [(i, url, temp_dir) for i, url in enumerate(img_urls)]
-        # 多線程下載
-        successful_downloads = []
-        failed_downloads = []
-        with ThreadPoolExecutor(max_workers=3) as executor:  # 降低並發數
-            results = list(executor.map(download_image, download_args))
-        for i, (success, info) in enumerate(results):
-            if success:
-                successful_downloads.append(info)
-            else:
-                failed_downloads.append(f"圖片 {i+1}: {info}")
-            # 更新進度
-            progress((0.5 + 0.4 * (i + 1) / len(results)),
-                    desc=f"已下載 {len(successful_downloads)} / {len(img_urls)} 張圖片")
-        if not successful_downloads:
-            return None, "所有圖片下載失敗"
-        progress(0.9, desc="創建ZIP文件...")
-        # 創建ZIP文件
-        zip_path = os.path.join(tempfile.gettempdir(), "pinterest_images.zip")
-        with zipfile.ZipFile(zip_path, 'w') as zipf:
-            for filename in os.listdir(temp_dir):
-                file_path = os.path.join(temp_dir, filename)
-                if os.path.isfile(file_path):
-                    zipf.write(file_path, filename)
-        progress(1.0, desc="完成!")
-        # 準備結果信息
-        result_info = f"""
-下載完成！
-成功下載: {len(successful_downloads)} 張圖片
-失敗: {len(failed_downloads)} 張圖片
-總計: {len(img_urls)} 張圖片
-請點擊下方鏈接下載ZIP文件。
-        """
-        if failed_downloads:
-            result_info += f"\n\n失敗詳情:\n" + "\n".join(failed_downloads[:10])  # 只顯示前10個錯誤
-        return zip_path, result_info
-    except Exception as e:
-        return None, f"處理過程中出錯: {str(e)}"
-    finally:
-        # 清理臨時目錄
         try:
-            shutil.rmtree(temp_dir)
-        except:
             pass
-# 創建Gradio界面
 def create_interface():
-    with gr.Blocks(title="Pinterest圖片下載器", theme=gr.themes.Soft()) as interface:
-        gr.Markdown("""
-        # 🖼️ Pinterest 圖片下載器
-        輸入Pinterest搜索頁面的URL，批量下載圖片。
-        **使用說明:**
-        1. 輸入Pinterest搜索頁面或板塊的完整URL
-        2. 設置要下載的圖片數量（建議不超過100張）
-        3. 點擊"開始下載"按鈕
-        4. 等待處理完成後下載ZIP文件
-        **注意事項:**
-        - 請確保輸入的是有效的Pinterest URL
-        - 下載速度取決於網絡狀況和圖片大小
-        - 建議單次下載不超過100張圖片
-        """)
         with gr.Row():
-            with gr.Column():
-                pinterest_url = gr.Textbox(
-                    label="Pinterest URL",
-                    placeholder="https://www.pinterest.com/search/pins/?q=your-search-term",
-                    lines=2,
-                    info="輸入Pinterest搜索頁面或板塊的完整URL"
-                )
-                num_images = gr.Slider(
-                    minimum=1,
-                    maximum=500,
-                    value=20,
-                    step=1,
-                    label="圖片數量",
-                    info="要下載的圖片數量（建議不超過100張）"
-                )
-                download_btn = gr.Button("🚀 開始下載", variant="primary", size="lg")
-            with gr.Column():
-                result_info = gr.Textbox(
-                    label="下載結果",
-                    lines=10,
-                    interactive=False,
-                    info="顯示下載進度和結果信息"
-                )
-                download_file = gr.File(
-                    label="下載文件",
-                    interactive=False,
-                    visible=False
-                )
-        # 示例URL
-        gr.Markdown("""
-        ### 示例URL:
-        ```
-        https://www.pinterest.com/search/pins/?q=landscape%20photography
-        https://www.pinterest.com/search/pins/?q=interior%20design
-        https://www.pinterest.com/search/pins/?q=food%20photography
-        ```
-        """)
-        # 綁定事件
-        def handle_download(url, num):
-            zip_path, info = scrape_pinterest_images(url, int(num))
-            if zip_path:
-                return info, gr.File(value=zip_path, visible=True)
-            else:
-                return info, gr.File(visible=False)
-        download_btn.click(
-            fn=handle_download,
-            inputs=[pinterest_url, num_images],
-            outputs=[result_info, download_file],
-            show_progress=True
-        )
-    return interface
-# 啟動應用
 if __name__ == "__main__":
-    interface = create_interface()
-    interface.launch(
         server_name="0.0.0.0",
         server_port=7860,
         share=True

+import asyncio
 import time
+import logging
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import TimeoutException
+import gradio as gr
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+async def capture_direct_download_link(url, opts=None):
+    if opts is None:
+        opts = {}
+    chrome_path = opts.get('chrome_path', '/opt/google/chrome/chrome')
+    headless = opts.get('headless', True)
+    wait_for_button_ms = opts.get('wait_for_button_ms', 60000)
+    driver = None
     try:
+        chrome_options = Options()
+        if headless:
+            chrome_options.add_argument("--headless=new")
+        chrome_options.binary_location = chrome_path
+        chrome_options.add_argument("--no-sandbox")
+        chrome_options.add_argument("--disable-setuid-sandbox")
+        chrome_options.add_argument("--disable-web-security")
+        chrome_options.add_argument("--disable-gpu")
+        chrome_options.add_argument("--disable-blink-features=AutomationControlled")
+        chrome_options.add_argument("--disable-features=IsolateOrigins,site-per-process")
+        chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36")
         driver = webdriver.Chrome(options=chrome_options)
+        driver.set_window_size(1280, 900)
+        # Stealth script
+        stealth_script = """
+        Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
+        Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
+        Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
+        window.chrome = { runtime: {} };
+        window.DisableDevtool = function() {};
+        window.qajblusk = false;
+        """
+        driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': stealth_script})
+        # Block images/fonts
+        driver.execute_cdp_cmd('Network.enable', {})
+        driver.execute_cdp_cmd('Network.setBlockedURLs', {
+            'urls': ['*.jpg', '*.jpeg', '*.png', '*.gif', '*.webp', '*.woff', '*.woff2', '*.ttf']
+        })
+        # Navigate
+        await asyncio.get_event_loop().run_in_executor(None, lambda: driver.get(url))
+        WebDriverWait(driver, 30).until(
+            lambda d: d.execute_script("return document.readyState") == "complete"
+        )
+        await asyncio.sleep(2)
+        # Find button
+        buttons = driver.find_elements(By.TAG_NAME, "button")
+        target_button = None
+        for btn in buttons:
+            if btn.text.strip() == "Direct Download 2":
+                target_button = btn
                 break
+        if not target_button:
+            raise Exception('No "Direct Download 2" button found on page!')
+        button_id = target_button.get_attribute("id")
+        logger.info(f'✅ Found "Direct Download 2" button with ID: {button_id}')
+        target_button.click()
+        # Wait for processing to end
+        def wait_condition(d):
+            try:
+                el = d.find_element(By.ID, button_id)
+                span = el.find_element(By.CLASS_NAME, "download-text")
+                return span.text.strip() != "Processing..."
+            except:
+                return True
         try:
+            WebDriverWait(driver, 45).until(wait_condition)
+        except TimeoutException:
             pass
+        logger.info('✅ Processing finished. Listening for download...')
+        original_window = driver.current_window_handle
+        final_url = None
+        # Check for popup
+        async def wait_for_popup_or_nav():
+            nonlocal final_url
+            start_time = time.time()
+            while time.time() - start_time < 25:
+                handles = driver.window_handles
+                if len(handles) > 1:
+                    for handle in handles:
+                        if handle != original_window:
+                            driver.switch_to.window(handle)
+                            try:
+                                WebDriverWait(driver, 30).until(
+                                    EC.presence_of_element_located((By.TAG_NAME, "body"))
+                                )
+                                final_url = driver.current_url
+                                logger.info(f'🎉 Captured Download URL from NEW TAB: {final_url}')
+                                driver.close()
+                                driver.switch_to.window(original_window)
+                                return
+                            except Exception as e:
+                                logger.error(f"Error handling popup: {e}")
+                                driver.switch_to.window(original_window)
+                                break
+                await asyncio.sleep(0.5)
+            # Fallback: check main tab
+            await asyncio.sleep(3)
+            current_url = driver.current_url
+            if current_url != url and "fordev.jpg" not in current_url and not current_url.startswith("about:"):
+                final_url = current_url
+                logger.info(f'✅ Fallback: Download URL from MAIN TAB: {final_url}')
+        await wait_for_popup_or_nav()
+        if not final_url or final_url == "about:blank" or "fordev.jpg" in final_url:
+            raise Exception("❌ Download URL is invalid or blocked.")
+        return {"success": True, "url": final_url}
+    except Exception as e:
+        logger.error(f"❌ capture_direct_download_link error: {str(e)}")
+        return {"success": False, "error": str(e)}
+    finally:
+        if driver:
+            try:
+                driver.quit()
+            except Exception as e:
+                logger.error(f"Error closing driver: {e}")
+# --- GRADIO UI WRAPPER ---
+async def gradio_wrapper(url_input):
+    """Gradio expects sync functions, so we run async inside"""
+    try:
+        result = await capture_direct_download_link(url_input.strip(), {
+            "chrome_path": "/opt/google/chrome/chrome",
+            "headless": True,
+            "wait_for_button_ms": 60000,
+        })
+        if result["success"]:
+            return f"✅ Success!\nDownload URL: {result['url']}"
+        else:
+            return f"❌ Failed: {result['error']}"
+    except Exception as e:
+        return f"💥 Unexpected Error: {str(e)}"
 def create_interface():
+    with gr.Blocks(title="Download Link Bypasser") as demo:
+        gr.Markdown("# 🚀 Direct Download Link Bypasser")
         with gr.Row():
+            url_input = gr.Textbox(label="Enter Target URL", placeholder="https://example.com/download")
+            submit_btn = gr.Button("Bypass & Get Link", variant="primary")
+        output = gr.Textbox(label="Result", lines=5)
+        submit_btn.click(fn=gradio_wrapper, inputs=url_input, outputs=output)
+        gr.Markdown("ℹ️ This tool finds and clicks 'Direct Download 2' button, then captures the final download URL.")
+    return demo
+# --- MAIN ---
 if __name__ == "__main__":
+    # Option A: Run async test (uncomment to use)
+    # asyncio.run(main())
+    # Option B: Launch Gradio UI (default)
+    demo = create_interface()
+    demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
         share=True