tomo2chin2 commited on
Commit
0f078bd
·
verified ·
1 Parent(s): 1cb214e

Fix Playwright async API compatibility

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -9,7 +9,7 @@ from fastapi import FastAPI, HTTPException, File, UploadFile
9
  from fastapi.responses import JSONResponse, RedirectResponse
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from pydantic import BaseModel
12
- from playwright.sync_api import sync_playwright
13
  from huggingface_hub import HfApi, upload_file
14
 
15
 
@@ -24,10 +24,10 @@ class PDFResponse(BaseModel):
24
  repository_url: str
25
 
26
 
27
- def html_to_pdf_api(html_content: str) -> tuple[str, str]:
28
  """
29
  HTMLコンテンツをPDFに変換してHugging Faceデータセットリポジトリにアップロード
30
- knowledge.txtのPlaywright手法を使用
31
  """
32
  try:
33
  # 環境変数からリポジトリ情報を取得
@@ -48,26 +48,26 @@ def html_to_pdf_api(html_content: str) -> tuple[str, str]:
48
  with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_file:
49
  temp_path = temp_file.name
50
 
51
- # Playwrightでヘッドレスブラウザを起動
52
- with sync_playwright() as pw:
53
- browser = pw.chromium.launch(headless=True)
54
- page = browser.new_page()
55
 
56
  # HTMLコンテンツを設定(外部リソース読み込み待機)
57
- page.set_content(html_content, wait_until="networkidle")
58
 
59
  # 印刷メディアを有効にする
60
- page.emulate_media(media="print")
61
 
62
  # PDFを生成(test.htmlの設定に準拠)
63
- page.pdf(
64
  path=temp_path,
65
  format="A4",
66
  print_background=True,
67
  margin={"top":"15mm","bottom":"15mm","left":"15mm","right":"15mm"}
68
  )
69
 
70
- browser.close()
71
 
72
  # Hugging Face リポジトリにアップロード
73
  api = HfApi(token=hf_token)
@@ -151,7 +151,7 @@ async def convert_html_to_pdf(request: HTMLRequest):
151
  raise HTTPException(status_code=400, detail="HTMLコンテンツが空です")
152
 
153
  try:
154
- filename, pdf_url = html_to_pdf_api(request.html_content)
155
  hf_repo_id = os.getenv("HF_DATASET_REPO_ID")
156
  repository_url = f"https://huggingface.co/datasets/{hf_repo_id}"
157
 
 
9
  from fastapi.responses import JSONResponse, RedirectResponse
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from pydantic import BaseModel
12
+ from playwright.async_api import async_playwright
13
  from huggingface_hub import HfApi, upload_file
14
 
15
 
 
24
  repository_url: str
25
 
26
 
27
+ async def html_to_pdf_api(html_content: str) -> tuple[str, str]:
28
  """
29
  HTMLコンテンツをPDFに変換してHugging Faceデータセットリポジトリにアップロード
30
+ knowledge.txtのPlaywright手法を使用(Async版)
31
  """
32
  try:
33
  # 環境変数からリポジトリ情報を取得
 
48
  with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_file:
49
  temp_path = temp_file.name
50
 
51
+ # Playwrightでヘッドレスブラウザを起動(Async版)
52
+ async with async_playwright() as pw:
53
+ browser = await pw.chromium.launch(headless=True)
54
+ page = await browser.new_page()
55
 
56
  # HTMLコンテンツを設定(外部リソース読み込み待機)
57
+ await page.set_content(html_content, wait_until="networkidle")
58
 
59
  # 印刷メディアを有効にする
60
+ await page.emulate_media(media="print")
61
 
62
  # PDFを生成(test.htmlの設定に準拠)
63
+ await page.pdf(
64
  path=temp_path,
65
  format="A4",
66
  print_background=True,
67
  margin={"top":"15mm","bottom":"15mm","left":"15mm","right":"15mm"}
68
  )
69
 
70
+ await browser.close()
71
 
72
  # Hugging Face リポジトリにアップロード
73
  api = HfApi(token=hf_token)
 
151
  raise HTTPException(status_code=400, detail="HTMLコンテンツが空です")
152
 
153
  try:
154
+ filename, pdf_url = await html_to_pdf_api(request.html_content)
155
  hf_repo_id = os.getenv("HF_DATASET_REPO_ID")
156
  repository_url = f"https://huggingface.co/datasets/{hf_repo_id}"
157