from flask import Flask, request, jsonify, render_template
from flask_cors import CORS
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import chromedriver_autoinstaller
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import json
import os

app = Flask(__name__)
CORS(app)  # 允許跨域請求


def setup_driver():
    try:
        # 指定 ChromeDriver 的安裝路徑為 /tmp
        chromedriver_path = chromedriver_autoinstaller.install(path="/tmp")

        # 配置 Chrome 選項
        options = Options()
        options.add_argument("--headless")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("--disable-gpu")

        # 啟動 WebDriver，使用指定的 Service
        service = Service(chromedriver_path)
        return webdriver.Chrome(service=service, options=options)

    except Exception as e:
        raise RuntimeError(f"WebDriver 啟動失敗: {str(e)}")

        
def is_good_name(name):
    """
    判斷商品名稱是否具有足夠的資訊
    
    規則：
    1. 長度在 5-100 字之間
    2. 不是太通用的詞彙
    3. 包含具體描述
    """
    if not name or len(name) < 5 or len(name) > 100:
        return False
    
    # 排除一些通用或無意義的名稱
    bad_keywords = [
        'product', 'item', 'sale', 'wts', 'wtb', 'for sale', 
        'bunjang', 'global', 'sign', 'album', 'photocard'
    ]
    
    name_lower = name.lower()
    if any(keyword in name_lower for keyword in bad_keywords):
        return False
    
    return True

@app.route('/')
def index():
    return render_template('product_scraper.html')

@app.route('/scrape', methods=['POST'])
def scrape_product():
    url = request.json.get('url')
    
    if not url:
        return jsonify({"error": "未提供網址"}), 400

    driver = None
    try:
        driver = setup_driver()
        driver.get(url)

        # 等待頁面加載
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))

        # 獲取網頁內容
        html_content = driver.page_source

        # 使用 BeautifulSoup 解析 HTML
        soup = BeautifulSoup(html_content, "html.parser")

        # 先從 alt 屬性提取韓文原文
        korean_title = None
        
        # 尋找具有特定特徵的圖片
        img_tags = soup.find_all('img', {
            'fetchpriority': 'high', 
            'data-nimg': 'fill', 
            'class': 'osrq1v4',
            'alt': True
        })
        
        if img_tags:
            korean_title = img_tags[0].get('alt')

        # 查找 JSON-LD 資料
        script_tags = soup.find_all("script", type="application/ld+json")
        for script_tag in script_tags:
            try:
                product_data = json.loads(script_tag.string)

                # 確保資料包含所需字段
                if product_data.get("@type") == "Product":
                    image_url = product_data.get("image")
                    name = product_data.get("name")
                    description = product_data.get("description")
                    offers = product_data.get("offers")

                    # 如果 offers 是列表，提取第一個元素
                    if isinstance(offers, list):
                        offers = offers[0]

                    price = offers.get("price") if offers else None
                    price_currency = offers.get("priceCurrency") if offers else None

                    # 評估商品名稱的品質
                    name_quality = is_good_name(name)

                    return jsonify({
                        "image": image_url,
                        "name": name,
                        "korean_name": korean_title or name,  # 如果沒找到特定圖片的 alt，則使用原始 name
                        "description": description,
                        "price": price,
                        "currency": price_currency,
                        "name_quality": name_quality
                    })
            except json.JSONDecodeError:
                continue
        
        return jsonify({"error": "未找到商品資訊"}), 404

    except Exception as e:
        return jsonify({"error": str(e)}), 500
    
    finally:
        if driver:
            driver.quit()

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))  # 默認 Hugging Face 使用 7860
    app.run(host="0.0.0.0", port=port)