try
Browse files- Dockerfile +50 -10
- init_playwright.py +70 -19
Dockerfile
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
FROM python:3.10-slim
|
| 2 |
|
| 3 |
-
# 安装系统依赖
|
| 4 |
RUN apt-get update && apt-get install -y \
|
| 5 |
wget \
|
| 6 |
gnupg \
|
| 7 |
ca-certificates \
|
| 8 |
fonts-liberation \
|
| 9 |
libasound2 \
|
|
|
|
| 10 |
libatk-bridge2.0-0 \
|
| 11 |
libatk1.0-0 \
|
| 12 |
libc6 \
|
|
@@ -20,8 +21,10 @@ RUN apt-get update && apt-get install -y \
|
|
| 20 |
libgdk-pixbuf2.0-0 \
|
| 21 |
libglib2.0-0 \
|
| 22 |
libgtk-3-0 \
|
|
|
|
| 23 |
libnspr4 \
|
| 24 |
libnss3 \
|
|
|
|
| 25 |
libpango-1.0-0 \
|
| 26 |
libpangocairo-1.0-0 \
|
| 27 |
libstdc++6 \
|
|
@@ -39,14 +42,34 @@ RUN apt-get update && apt-get install -y \
|
|
| 39 |
libxss1 \
|
| 40 |
libxtst6 \
|
| 41 |
lsb-release \
|
| 42 |
-
wget \
|
| 43 |
xdg-utils \
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
&& rm -rf /var/lib/apt/lists/*
|
| 51 |
|
| 52 |
WORKDIR /app
|
|
@@ -57,12 +80,29 @@ COPY requirements.txt .
|
|
| 57 |
# 安装Python依赖
|
| 58 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 59 |
|
| 60 |
-
#
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# 复制应用代码
|
| 64 |
COPY . .
|
| 65 |
|
|
|
|
|
|
|
|
|
|
| 66 |
# 暴露端口
|
| 67 |
EXPOSE 7860
|
| 68 |
|
|
|
|
| 1 |
FROM python:3.10-slim
|
| 2 |
|
| 3 |
+
# 安装系统依赖 (包括Playwright需要的所有依赖)
|
| 4 |
RUN apt-get update && apt-get install -y \
|
| 5 |
wget \
|
| 6 |
gnupg \
|
| 7 |
ca-certificates \
|
| 8 |
fonts-liberation \
|
| 9 |
libasound2 \
|
| 10 |
+
libasound2-dev \
|
| 11 |
libatk-bridge2.0-0 \
|
| 12 |
libatk1.0-0 \
|
| 13 |
libc6 \
|
|
|
|
| 21 |
libgdk-pixbuf2.0-0 \
|
| 22 |
libglib2.0-0 \
|
| 23 |
libgtk-3-0 \
|
| 24 |
+
libgtk-4-1 \
|
| 25 |
libnspr4 \
|
| 26 |
libnss3 \
|
| 27 |
+
libnss3-dev \
|
| 28 |
libpango-1.0-0 \
|
| 29 |
libpangocairo-1.0-0 \
|
| 30 |
libstdc++6 \
|
|
|
|
| 42 |
libxss1 \
|
| 43 |
libxtst6 \
|
| 44 |
lsb-release \
|
|
|
|
| 45 |
xdg-utils \
|
| 46 |
+
# Playwright额外依赖
|
| 47 |
+
gstreamer1.0-base \
|
| 48 |
+
gstreamer1.0-plugins-base \
|
| 49 |
+
gstreamer1.0-plugins-good \
|
| 50 |
+
gstreamer1.0-plugins-bad \
|
| 51 |
+
libgstreamer1.0-0 \
|
| 52 |
+
libgstreamer-plugins-base1.0-0 \
|
| 53 |
+
libgstreamer-plugins-good1.0-0 \
|
| 54 |
+
libgstreamer-plugins-bad1.0-0 \
|
| 55 |
+
libgstapp1.0-0 \
|
| 56 |
+
libgstbase1.0-0 \
|
| 57 |
+
libgstpbutils1.0-0 \
|
| 58 |
+
libgstaudio1.0-0 \
|
| 59 |
+
libgsttag1.0-0 \
|
| 60 |
+
libgstvideo1.0-0 \
|
| 61 |
+
libgstgl1.0-0 \
|
| 62 |
+
libgstcodecparsers1.0-0 \
|
| 63 |
+
libgstfft1.0-0 \
|
| 64 |
+
libgstallocators1.0-0 \
|
| 65 |
+
# 图形和字体库
|
| 66 |
+
libgraphene-1.0-0 \
|
| 67 |
+
libatomic1 \
|
| 68 |
+
libxslt1.1 \
|
| 69 |
+
libwoff1 \
|
| 70 |
+
libvpx7 \
|
| 71 |
+
libevent-2.1-7 \
|
| 72 |
+
libopus0 \
|
| 73 |
&& rm -rf /var/lib/apt/lists/*
|
| 74 |
|
| 75 |
WORKDIR /app
|
|
|
|
| 80 |
# 安装Python依赖
|
| 81 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 82 |
|
| 83 |
+
# 设置环境变量(与我们的init_playwright.py脚本配合)
|
| 84 |
+
ENV PLAYWRIGHT_BROWSERS_PATH=/app/.cache/ms-playwright
|
| 85 |
+
ENV PLAYWRIGHT_SKIP_BROWSER_GC=1
|
| 86 |
+
ENV XDG_CACHE_HOME=/app/.cache
|
| 87 |
+
ENV HOME=/app
|
| 88 |
+
ENV TMPDIR=/app/.cache/tmp
|
| 89 |
+
ENV TMP=/app/.cache/tmp
|
| 90 |
+
ENV TEMP=/app/.cache/tmp
|
| 91 |
+
|
| 92 |
+
# 创建缓存目录并设置权限
|
| 93 |
+
RUN mkdir -p /app/.cache/ms-playwright /app/.cache/tmp && \
|
| 94 |
+
chmod -R 755 /app/.cache
|
| 95 |
+
|
| 96 |
+
# 安装playwright浏览器到指定目录
|
| 97 |
+
RUN playwright install chromium --with-deps || \
|
| 98 |
+
(echo "第一次安装失败,尝试不使用--with-deps" && playwright install chromium)
|
| 99 |
|
| 100 |
# 复制应用代码
|
| 101 |
COPY . .
|
| 102 |
|
| 103 |
+
# 确保所有文件有正确权限
|
| 104 |
+
RUN chmod +x init_playwright.py
|
| 105 |
+
|
| 106 |
# 暴露端口
|
| 107 |
EXPOSE 7860
|
| 108 |
|
init_playwright.py
CHANGED
|
@@ -12,39 +12,72 @@ from pathlib import Path
|
|
| 12 |
|
| 13 |
def setup_environment():
|
| 14 |
"""设置环境变量,解决权限问题"""
|
| 15 |
-
#
|
| 16 |
-
|
| 17 |
-
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
|
| 24 |
-
|
|
|
|
| 25 |
|
| 26 |
-
#
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
print(f"📁 设置缓存目录: {cache_dir}")
|
| 30 |
return str(cache_dir)
|
| 31 |
|
| 32 |
def check_playwright_installation():
|
| 33 |
"""检查playwright是否正确安装"""
|
| 34 |
try:
|
|
|
|
|
|
|
| 35 |
# 尝试导入playwright
|
| 36 |
from playwright.sync_api import sync_playwright
|
|
|
|
| 37 |
|
| 38 |
# 尝试启动浏览器
|
|
|
|
| 39 |
with sync_playwright() as p:
|
| 40 |
browser = p.chromium.launch(headless=True)
|
|
|
|
| 41 |
browser.close()
|
|
|
|
| 42 |
|
| 43 |
print("✅ Playwright 浏览器检查通过!")
|
| 44 |
return True
|
| 45 |
|
| 46 |
except Exception as e:
|
| 47 |
print(f"❌ Playwright 浏览器检查失败: {e}")
|
|
|
|
| 48 |
return False
|
| 49 |
|
| 50 |
def install_browsers():
|
|
@@ -52,19 +85,30 @@ def install_browsers():
|
|
| 52 |
try:
|
| 53 |
print("🔄 正在安装 Playwright 浏览器...")
|
| 54 |
|
| 55 |
-
#
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
"
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
if result.returncode != 0:
|
| 62 |
-
print(
|
| 63 |
-
print(f"错误输出: {result.stderr}")
|
| 64 |
return False
|
| 65 |
|
| 66 |
print("✅ Playwright 浏览器安装完成!")
|
| 67 |
return True
|
|
|
|
| 68 |
except subprocess.CalledProcessError as e:
|
| 69 |
print(f"❌ 浏览器安装失败: {e}")
|
| 70 |
return False
|
|
@@ -74,12 +118,13 @@ def install_browsers():
|
|
| 74 |
|
| 75 |
def main():
|
| 76 |
"""主函数"""
|
| 77 |
-
print("🚀 初始化 Playwright (Hugging Face Space
|
| 78 |
|
| 79 |
# 首先设置环境变量
|
| 80 |
cache_dir = setup_environment()
|
| 81 |
|
| 82 |
# 检查是否已正确安装
|
|
|
|
| 83 |
if check_playwright_installation():
|
| 84 |
return True
|
| 85 |
|
|
@@ -87,8 +132,10 @@ def main():
|
|
| 87 |
print("🔧 检测到问题,正在重新安装浏览器...")
|
| 88 |
if install_browsers():
|
| 89 |
# 重新检查
|
|
|
|
| 90 |
return check_playwright_installation()
|
| 91 |
|
|
|
|
| 92 |
return False
|
| 93 |
|
| 94 |
if __name__ == "__main__":
|
|
@@ -96,5 +143,9 @@ if __name__ == "__main__":
|
|
| 96 |
if not success:
|
| 97 |
print("❌ Playwright 初始化失败!")
|
| 98 |
print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
sys.exit(1)
|
| 100 |
print("🎉 Playwright 初始化成功!")
|
|
|
|
| 12 |
|
| 13 |
def setup_environment():
|
| 14 |
"""设置环境变量,解决权限问题"""
|
| 15 |
+
# 获取当前工作目录
|
| 16 |
+
current_dir = Path.cwd()
|
| 17 |
+
app_cache_dir = current_dir / ".cache"
|
| 18 |
|
| 19 |
+
# 如果在 /app 目录下,使用 /app/.cache
|
| 20 |
+
if str(current_dir).startswith('/app'):
|
| 21 |
+
cache_dir = app_cache_dir
|
| 22 |
+
else:
|
| 23 |
+
# 否则使用用户主目录
|
| 24 |
+
home_dir = Path.home()
|
| 25 |
+
cache_dir = home_dir / ".cache"
|
| 26 |
|
| 27 |
+
# 确保缓存目录存在并有正确权限
|
| 28 |
+
cache_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
|
| 29 |
+
playwright_cache = cache_dir / "ms-playwright"
|
| 30 |
+
playwright_cache.mkdir(parents=True, exist_ok=True, mode=0o755)
|
| 31 |
|
| 32 |
+
# 强制设置所有相关环境变量
|
| 33 |
+
env_vars = {
|
| 34 |
+
"PLAYWRIGHT_BROWSERS_PATH": str(playwright_cache),
|
| 35 |
+
"PLAYWRIGHT_SKIP_BROWSER_GC": "1",
|
| 36 |
+
"XDG_CACHE_HOME": str(cache_dir),
|
| 37 |
+
"HOME": str(cache_dir.parent), # 确保 HOME 指向正确位置
|
| 38 |
+
"TMPDIR": str(cache_dir / "tmp"),
|
| 39 |
+
"TMP": str(cache_dir / "tmp"),
|
| 40 |
+
"TEMP": str(cache_dir / "tmp"),
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
# 创建临时目录
|
| 44 |
+
tmp_dir = cache_dir / "tmp"
|
| 45 |
+
tmp_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
|
| 46 |
+
|
| 47 |
+
# 设置环境变量
|
| 48 |
+
for key, value in env_vars.items():
|
| 49 |
+
os.environ[key] = value
|
| 50 |
+
print(f"🔧 设置环境变量: {key}={value}")
|
| 51 |
+
|
| 52 |
+
print(f"📁 缓存目录: {cache_dir}")
|
| 53 |
+
print(f"🎭 Playwright缓存: {playwright_cache}")
|
| 54 |
+
print(f"📂 当前工作目录: {current_dir}")
|
| 55 |
|
|
|
|
| 56 |
return str(cache_dir)
|
| 57 |
|
| 58 |
def check_playwright_installation():
|
| 59 |
"""检查playwright是否正确安装"""
|
| 60 |
try:
|
| 61 |
+
print("🔍 开始检查 Playwright 安装...")
|
| 62 |
+
|
| 63 |
# 尝试导入playwright
|
| 64 |
from playwright.sync_api import sync_playwright
|
| 65 |
+
print("✅ Playwright 模块导入成功")
|
| 66 |
|
| 67 |
# 尝试启动浏览器
|
| 68 |
+
print("🌐 尝试启动 Chromium 浏览器...")
|
| 69 |
with sync_playwright() as p:
|
| 70 |
browser = p.chromium.launch(headless=True)
|
| 71 |
+
print("✅ 浏览器启动成功")
|
| 72 |
browser.close()
|
| 73 |
+
print("✅ 浏览器关闭成功")
|
| 74 |
|
| 75 |
print("✅ Playwright 浏览器检查通过!")
|
| 76 |
return True
|
| 77 |
|
| 78 |
except Exception as e:
|
| 79 |
print(f"❌ Playwright 浏览器检查失败: {e}")
|
| 80 |
+
print(f"🔍 错误类型: {type(e).__name__}")
|
| 81 |
return False
|
| 82 |
|
| 83 |
def install_browsers():
|
|
|
|
| 85 |
try:
|
| 86 |
print("🔄 正在安装 Playwright 浏览器...")
|
| 87 |
|
| 88 |
+
# 打印当前环境变量
|
| 89 |
+
print("🔍 当前环境变量:")
|
| 90 |
+
for key in ["PLAYWRIGHT_BROWSERS_PATH", "XDG_CACHE_HOME", "HOME", "TMPDIR"]:
|
| 91 |
+
print(f" {key}: {os.environ.get(key, 'NOT SET')}")
|
| 92 |
+
|
| 93 |
+
# 使用更详细的安装命令
|
| 94 |
+
cmd = [sys.executable, "-m", "playwright", "install", "chromium"]
|
| 95 |
+
print(f"🚀 执行命令: {' '.join(cmd)}")
|
| 96 |
+
|
| 97 |
+
result = subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy())
|
| 98 |
+
|
| 99 |
+
print(f"📤 命令返回码: {result.returncode}")
|
| 100 |
+
if result.stdout:
|
| 101 |
+
print(f"📜 标准输出:\n{result.stdout}")
|
| 102 |
+
if result.stderr:
|
| 103 |
+
print(f"⚠️ 错误输出:\n{result.stderr}")
|
| 104 |
|
| 105 |
if result.returncode != 0:
|
| 106 |
+
print("❌ 安装命令执行失败")
|
|
|
|
| 107 |
return False
|
| 108 |
|
| 109 |
print("✅ Playwright 浏览器安装完成!")
|
| 110 |
return True
|
| 111 |
+
|
| 112 |
except subprocess.CalledProcessError as e:
|
| 113 |
print(f"❌ 浏览器安装失败: {e}")
|
| 114 |
return False
|
|
|
|
| 118 |
|
| 119 |
def main():
|
| 120 |
"""主函数"""
|
| 121 |
+
print("🚀 初始化 Playwright (Hugging Face Space 增强版)...")
|
| 122 |
|
| 123 |
# 首先设置环境变量
|
| 124 |
cache_dir = setup_environment()
|
| 125 |
|
| 126 |
# 检查是否已正确安装
|
| 127 |
+
print("🔍 第一次检查...")
|
| 128 |
if check_playwright_installation():
|
| 129 |
return True
|
| 130 |
|
|
|
|
| 132 |
print("🔧 检测到问题,正在重新安装浏览器...")
|
| 133 |
if install_browsers():
|
| 134 |
# 重新检查
|
| 135 |
+
print("🔍 安装后重新检查...")
|
| 136 |
return check_playwright_installation()
|
| 137 |
|
| 138 |
+
print("😞 所有尝试都失败了")
|
| 139 |
return False
|
| 140 |
|
| 141 |
if __name__ == "__main__":
|
|
|
|
| 143 |
if not success:
|
| 144 |
print("❌ Playwright 初始化失败!")
|
| 145 |
print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖")
|
| 146 |
+
print("🔍 调试信息:")
|
| 147 |
+
print(f" - 当前用户: {os.getuid() if hasattr(os, 'getuid') else 'N/A'}")
|
| 148 |
+
print(f" - 工作目录: {Path.cwd()}")
|
| 149 |
+
print(f" - 环境变量 HOME: {os.environ.get('HOME', 'NOT SET')}")
|
| 150 |
sys.exit(1)
|
| 151 |
print("🎉 Playwright 初始化成功!")
|