tomo2chin2 commited on
Commit
80b6431
·
verified ·
1 Parent(s): b866995

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -229
app.py CHANGED
@@ -363,7 +363,8 @@ def trim_image_whitespace(image, threshold=250, padding=10):
363
  logger.info(f"画像をトリミングしました: 元サイズ {width}x{height} → トリミング後 {trimmed.width}x{trimmed.height}")
364
  return trimmed
365
 
366
- # --- Core Screenshot Logic ---
 
367
  def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0,
368
  trim_whitespace: bool = True) -> Image.Image:
369
  """
@@ -371,102 +372,14 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0
371
 
372
  Args:
373
  html_code: The HTML source code string.
374
- extension_percentage: Percentage of extra space to add vertically (e.g., 4 means 4% total).
375
  trim_whitespace: Whether to trim excess whitespace from the image.
376
 
377
  Returns:
378
- A PIL Image object of the screenshot. Returns a 1x1 black image on error.
379
- """
380
- tmp_path = None # 初期化
381
- driver = None # 初期化
382
-
383
- # JavaScript関数を定義 - 古い構文を使用して互換性を確保
384
- resource_check_script = """
385
- return (function() {
386
- var callback = arguments[arguments.length - 1];
387
-
388
- // Font Awesomeの読み込み確認
389
- function checkFontAwesome() {
390
- var icons = document.querySelectorAll('.fa, .fas, .far, .fab, [class*="fa-"]');
391
- if (icons.length > 0) {
392
- console.log('Font Awesome icons found:', icons.length);
393
- // タイムアウトベースの簡易アプローチ
394
- setTimeout(function() { callback(true); }, 1500);
395
- } else {
396
- // アイコンがない場合
397
- setTimeout(function() { callback(true); }, 800);
398
- }
399
- }
400
-
401
- // DOMContentLoadedまたはloadイベント後にチェック
402
- if (document.readyState === 'complete') {
403
- checkFontAwesome();
404
- } else {
405
- window.addEventListener('load', checkFontAwesome);
406
- }
407
- })();
408
- """
409
-
410
- scroll_script = """
411
- return (function() {
412
- var callback = arguments[arguments.length - 1];
413
-
414
- var height = Math.max(
415
- document.documentElement.scrollHeight,
416
- document.body.scrollHeight
417
- );
418
- var viewportHeight = window.innerHeight;
419
-
420
- // ページを少しずつスクロールして全体を描画させる
421
- var scrollStep = Math.floor(viewportHeight * 0.8);
422
- var currentPos = 0;
423
-
424
- function scrollDown() {
425
- if (currentPos < height) {
426
- window.scrollTo(0, currentPos);
427
- currentPos += scrollStep;
428
- setTimeout(scrollDown, 100);
429
- } else {
430
- // 最後にトップに戻す
431
- window.scrollTo(0, 0);
432
- setTimeout(function() { callback(true); }, 300);
433
- }
434
- }
435
-
436
- scrollDown();
437
- })();
438
- """
439
-
440
- stability_script = """
441
- return (function() {
442
- var callback = arguments[arguments.length - 1];
443
-
444
- var lastHeight = document.body.offsetHeight;
445
- var lastWidth = document.body.offsetWidth;
446
- var stableCount = 0;
447
-
448
- function checkStability() {
449
- var currentHeight = document.body.offsetHeight;
450
- var currentWidth = document.body.offsetWidth;
451
-
452
- if (currentHeight === lastHeight && currentWidth === lastWidth) {
453
- stableCount++;
454
- if (stableCount >= 3) { // 3回連続で同じなら安定と判断
455
- callback(true);
456
- return;
457
- }
458
- } else {
459
- stableCount = 0;
460
- lastHeight = currentHeight;
461
- lastWidth = currentWidth;
462
- }
463
-
464
- setTimeout(checkStability, 200); // 200ms間隔でチェック
465
- }
466
-
467
- checkStability();
468
- })();
469
  """
 
 
470
 
471
  # 1) Save HTML code to a temporary file
472
  try:
@@ -476,7 +389,7 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0
476
  logger.info(f"HTML saved to temporary file: {tmp_path}")
477
  except Exception as e:
478
  logger.error(f"Error writing temporary HTML file: {e}")
479
- return Image.new('RGB', (1, 1), color=(0, 0, 0)) # エラー時は黒画像
480
 
481
  # 2) Headless Chrome(Chromium) options
482
  options = Options()
@@ -484,7 +397,6 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0
484
  options.add_argument("--no-sandbox")
485
  options.add_argument("--disable-dev-shm-usage")
486
  options.add_argument("--force-device-scale-factor=1")
487
- # Font Awesomeが読み込まれない場合があるため、読み込み待機時間を長く設定
488
  options.add_argument("--disable-features=NetworkService")
489
  options.add_argument("--dns-prefetch-disable")
490
 
@@ -493,7 +405,7 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0
493
  driver = webdriver.Chrome(options=options)
494
  logger.info("WebDriver initialized.")
495
 
496
- # 3) 初期ウィンドウサイズを設定(コンテンツの種類に関わらず同じサイズ)
497
  initial_width = 1200
498
  initial_height = 1000
499
  driver.set_window_size(initial_width, initial_height)
@@ -501,63 +413,58 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0
501
  logger.info(f"Navigating to {file_url}")
502
  driver.get(file_url)
503
 
504
- # 4) Wait for page load with extended timeout
505
  logger.info("Waiting for body element...")
506
  WebDriverWait(driver, 15).until(
507
  EC.presence_of_element_located((By.TAG_NAME, "body"))
508
  )
509
- logger.info("Body element found. Waiting for potential resource loading...")
510
 
511
- # リソース読み込みの基本的な待機時間
512
- time.sleep(2)
513
 
514
- # 5) Font Awesomeと外部リソースの読み込み完了を確認 - エラーハンドリング改善
515
- logger.info("Waiting for Font Awesome and other resources to load...")
516
- try:
517
- driver.set_script_timeout(10) # タイムアウト時間を短縮
518
- driver.execute_async_script(resource_check_script)
519
- logger.info("Resources loading check completed")
520
- except Exception as e:
521
- if "not defined" in str(e):
522
- # 定義エラーは発生しないはず - ログ出力のみ
523
- logger.warning(f"スクリプト変数エラー: {e}")
524
- time.sleep(3)
525
- else:
526
- # 他のエラーでも処理を続行(短いタイムアウト)
527
- logger.info(f"リソースチェックをスキップ: {str(e).split('Stacktrace')[0]}")
528
- time.sleep(4)
529
-
530
- # 6) コンテンツ全体を描画するためのスクロール処理
531
- logger.info("Rendering full page content...")
532
- try:
533
- driver.execute_async_script(scroll_script)
534
- except Exception as e:
535
- # エラーログを簡素化
536
- logger.info(f"スクロール処理をスキップ: {str(e).split('Stacktrace')[0]}")
537
- # フォールバック: 単純なスクロール処理
538
- try:
539
- driver.execute_script("""
540
- window.scrollTo(0, document.body.scrollHeight);
541
- setTimeout(function() { window.scrollTo(0, 0); }, 500);
542
- """)
543
- time.sleep(1.5)
544
- except:
545
- pass
546
-
547
- # 7) Hide scrollbars via CSS
548
- try:
549
- driver.execute_script(
550
- "document.documentElement.style.overflow = 'hidden';"
551
- "document.body.style.overflow = 'hidden';"
552
- )
553
- logger.info("Scrollbars hidden via JS.")
554
- except Exception as e:
555
- logger.info(f"Could not hide scrollbars: {str(e).split('Stacktrace')[0]}")
556
-
557
- # 8) Get full page dimensions accurately with improved script
558
- try:
559
- # より正確なページ寸法を取得するためのJavaScriptコード
560
- dimensions_script = """
561
  return {
562
  width: Math.max(
563
  document.documentElement.scrollWidth,
@@ -576,114 +483,80 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0
576
  document.body ? document.body.clientHeight : 0
577
  )
578
  };
579
- """
580
- dimensions = driver.execute_script(dimensions_script)
581
- scroll_width = dimensions['width']
582
- scroll_height = dimensions['height']
583
-
584
- logger.info(f"Detected dimensions: width={scroll_width}, height={scroll_height}")
585
-
586
- # スクロールして確認する追加の検証
587
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
588
- time.sleep(1) # スクロール完了を待つ
589
- driver.execute_script("window.scrollTo(0, 0);")
590
- time.sleep(1) # 元の位置に戻す
591
-
592
- # 再検証
593
- dimensions_after_scroll = driver.execute_script(dimensions_script)
594
- scroll_height = max(scroll_height, dimensions_after_scroll['height'])
595
-
596
- logger.info(f"After scroll check, height={scroll_height}")
597
-
598
- # 最小値と最大値の設定
599
- scroll_width = max(scroll_width, 100) # 最小幅
600
- scroll_height = max(scroll_height, 100) # 最小高さ
601
-
602
- scroll_width = min(scroll_width, 2000) # 最大幅
603
- scroll_height = min(scroll_height, 4000) # 最大高さ
604
-
605
- except Exception as e:
606
- logger.error(f"Error getting page dimensions: {e}")
607
- # フォールバックとしてデフォルト値を設定
608
- scroll_width = 1200
609
- scroll_height = 1000
610
- logger.warning(f"Falling back to dimensions: width={scroll_width}, height={scroll_height}")
611
-
612
- # 9) レイアウト安定化の確認 - 簡素化
613
- logger.info("Checking layout stability...")
614
- try:
615
- driver.set_script_timeout(5) # 短いタイムアウト
616
- driver.execute_async_script(stability_script)
617
- logger.info("Layout stability verified")
618
- except Exception as e:
619
- # エラーログを簡素化
620
- logger.info(f"安定性チェックをスキップ: {str(e).split('Stacktrace')[0]}")
621
- # 代わりに固定待機
622
- time.sleep(2)
623
-
624
- # 10) Calculate adjusted height with user-specified margin
625
  adjusted_height = int(scroll_height * (1 + extension_percentage / 100.0))
626
- # Ensure adjusted height is not excessively large or small
627
- adjusted_height = max(adjusted_height, scroll_height, 100) # 最小高さを確保
628
  logger.info(f"Adjusted height calculated: {adjusted_height} (extension: {extension_percentage}%)")
629
 
630
- # 11) Set window size to full page dimensions
631
  adjusted_width = scroll_width
632
  logger.info(f"Resizing window to: width={adjusted_width}, height={adjusted_height}")
633
  driver.set_window_size(adjusted_width, adjusted_height)
634
- logger.info("Waiting for layout stabilization after resize...")
635
 
636
- # レイアウト安定化のための待機
637
- time.sleep(3) # 統一した待機時間
638
-
639
- # 外部リソースの読み込み状態を確認
640
- try:
641
- resource_state = driver.execute_script("""
642
  return {
643
  readyState: document.readyState,
644
  resourcesComplete: !document.querySelector('img:not([complete])') &&
645
- !document.querySelector('link[rel="stylesheet"]:not([loaded])')
646
  };
647
- """)
648
- logger.info(f"Resource state: {resource_state}")
649
-
650
- # ドキュメントの読み込みが完了していない場合、追加で待機
651
- if resource_state['readyState'] != 'complete':
652
- logger.info("Document still loading, waiting additional time...")
653
- time.sleep(2)
654
- except Exception as e:
655
- logger.info(f"Resource state check skipped: {str(e).split('Stacktrace')[0]}")
656
-
657
- # Scroll to top just in case
658
- try:
659
- driver.execute_script("window.scrollTo(0, 0)")
660
  time.sleep(1)
661
- logger.info("Scrolled to top.")
662
- except Exception as e:
663
- logger.info(f"Could not scroll to top: {str(e).split('Stacktrace')[0]}")
 
 
664
 
665
- # 12) Take screenshot
666
  logger.info("Taking screenshot...")
667
  png = driver.get_screenshot_as_png()
668
  logger.info("Screenshot taken successfully.")
669
 
670
- # Convert to PIL Image
671
  img = Image.open(BytesIO(png))
672
-
673
- # 画像サイズの確認とログ
674
  logger.info(f"Screenshot dimensions: {img.width}x{img.height}")
675
 
676
- # 余白トリミングが有効な場合
677
  if trim_whitespace:
678
- # 余分な空白をトリミング
679
  img = trim_image_whitespace(img, threshold=248, padding=20)
680
  logger.info(f"Trimmed dimensions: {img.width}x{img.height}")
681
 
682
  return img
683
 
684
  except Exception as e:
685
- logger.error(f"An error occurred during screenshot generation: {e}", exc_info=True)
686
- return Image.new('RGB', (1, 1), color=(0, 0, 0)) # Return black 1x1 image on error
687
  finally:
688
  logger.info("Cleaning up...")
689
  if driver:
@@ -698,7 +571,7 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0
698
  logger.info(f"Temporary file {tmp_path} removed.")
699
  except Exception as e:
700
  logger.error(f"Error removing temporary file {tmp_path}: {e}")
701
-
702
  # --- Geminiを使った新しい関数 ---
703
  def text_to_screenshot(text: str, extension_percentage: float, temperature: float = 0.3, trim_whitespace: bool = True) -> Image.Image:
704
  """テキストをGemini APIでHTMLに変換し、スクリーンショットを生成する統合関数"""
 
363
  logger.info(f"画像をトリミングしました: 元サイズ {width}x{height} → トリミング後 {trimmed.width}x{trimmed.height}")
364
  return trimmed
365
 
366
+ # 非同期スクリプトを使わず、同期的なスクリプトのみ使用する改善版
367
+
368
  def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0,
369
  trim_whitespace: bool = True) -> Image.Image:
370
  """
 
372
 
373
  Args:
374
  html_code: The HTML source code string.
375
+ extension_percentage: Percentage of extra space to add vertically.
376
  trim_whitespace: Whether to trim excess whitespace from the image.
377
 
378
  Returns:
379
+ A PIL Image object of the screenshot.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  """
381
+ tmp_path = None
382
+ driver = None
383
 
384
  # 1) Save HTML code to a temporary file
385
  try:
 
389
  logger.info(f"HTML saved to temporary file: {tmp_path}")
390
  except Exception as e:
391
  logger.error(f"Error writing temporary HTML file: {e}")
392
+ return Image.new('RGB', (1, 1), color=(0, 0, 0))
393
 
394
  # 2) Headless Chrome(Chromium) options
395
  options = Options()
 
397
  options.add_argument("--no-sandbox")
398
  options.add_argument("--disable-dev-shm-usage")
399
  options.add_argument("--force-device-scale-factor=1")
 
400
  options.add_argument("--disable-features=NetworkService")
401
  options.add_argument("--dns-prefetch-disable")
402
 
 
405
  driver = webdriver.Chrome(options=options)
406
  logger.info("WebDriver initialized.")
407
 
408
+ # 3) 初期ウィンドウサイズを設定
409
  initial_width = 1200
410
  initial_height = 1000
411
  driver.set_window_size(initial_width, initial_height)
 
413
  logger.info(f"Navigating to {file_url}")
414
  driver.get(file_url)
415
 
416
+ # 4) ページ読み込み待機
417
  logger.info("Waiting for body element...")
418
  WebDriverWait(driver, 15).until(
419
  EC.presence_of_element_located((By.TAG_NAME, "body"))
420
  )
421
+ logger.info("Body element found. Waiting for resource loading...")
422
 
423
+ # 5) 基本的なリソース読み込み待機 - タイムアウト回避
424
+ time.sleep(3)
425
 
426
+ # Font Awesome読み込み確認 - 非同期を使わない
427
+ logger.info("Checking for Font Awesome resources...")
428
+ fa_count = driver.execute_script("""
429
+ var icons = document.querySelectorAll('.fa, .fas, .far, .fab, [class*="fa-"]');
430
+ return icons.length;
431
+ """)
432
+ logger.info(f"Found {fa_count} Font Awesome elements")
433
+
434
+ # リソース読み込み状態を確認
435
+ doc_ready = driver.execute_script("return document.readyState;")
436
+ logger.info(f"Document ready state: {doc_ready}")
437
+
438
+ # Font Awesomeが多い場合は追加待機
439
+ if fa_count > 50:
440
+ logger.info("Many Font Awesome icons detected, waiting additional time")
441
+ time.sleep(2)
442
+
443
+ # 6) コンテンツレンダリングのためのスクロール処理 - 同期的に実行
444
+ logger.info("Performing content rendering scroll...")
445
+ total_height = driver.execute_script("return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);")
446
+ viewport_height = driver.execute_script("return window.innerHeight;")
447
+ scrolls_needed = max(1, total_height // viewport_height)
448
+
449
+ for i in range(scrolls_needed + 1):
450
+ scroll_pos = i * (viewport_height - 200) # オーバーラップさせる
451
+ driver.execute_script(f"window.scrollTo(0, {scroll_pos});")
452
+ time.sleep(0.2) # 短い待機
453
+
454
+ # トップに戻る
455
+ driver.execute_script("window.scrollTo(0, 0);")
456
+ time.sleep(0.5)
457
+ logger.info("Scroll rendering completed")
458
+
459
+ # 7) スクロールバーを非表示に
460
+ driver.execute_script("""
461
+ document.documentElement.style.overflow = 'hidden';
462
+ document.body.style.overflow = 'hidden';
463
+ """)
464
+ logger.info("Scrollbars hidden")
465
+
466
+ # 8) ページの寸法を取得
467
+ dimensions = driver.execute_script("""
 
 
 
 
 
468
  return {
469
  width: Math.max(
470
  document.documentElement.scrollWidth,
 
483
  document.body ? document.body.clientHeight : 0
484
  )
485
  };
486
+ """)
487
+ scroll_width = dimensions['width']
488
+ scroll_height = dimensions['height']
489
+ logger.info(f"Detected dimensions: width={scroll_width}, height={scroll_height}")
490
+
491
+ # 再検証 - 短いスクロールで再確認
492
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
493
+ time.sleep(0.5)
494
+ driver.execute_script("window.scrollTo(0, 0);")
495
+ time.sleep(0.5)
496
+
497
+ dimensions_after = driver.execute_script("return {height: Math.max(document.documentElement.scrollHeight, document.body.scrollHeight)};")
498
+ scroll_height = max(scroll_height, dimensions_after['height'])
499
+ logger.info(f"After scroll check, height={scroll_height}")
500
+
501
+ # 最小/最大値の設定
502
+ scroll_width = max(scroll_width, 100)
503
+ scroll_height = max(scroll_height, 100)
504
+ scroll_width = min(scroll_width, 2000)
505
+ scroll_height = min(scroll_height, 4000)
506
+
507
+ # 9) レイアウト安定化のための単純な待機 - タイムアウト回避
508
+ logger.info("Waiting for layout stabilization...")
509
+ time.sleep(2)
510
+
511
+ # 10) 高さに余白を追加
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
  adjusted_height = int(scroll_height * (1 + extension_percentage / 100.0))
513
+ adjusted_height = max(adjusted_height, scroll_height, 100)
 
514
  logger.info(f"Adjusted height calculated: {adjusted_height} (extension: {extension_percentage}%)")
515
 
516
+ # 11) ウィンドウサイズを調整
517
  adjusted_width = scroll_width
518
  logger.info(f"Resizing window to: width={adjusted_width}, height={adjusted_height}")
519
  driver.set_window_size(adjusted_width, adjusted_height)
520
+ time.sleep(1)
521
 
522
+ # リソース状態を確認 - 同期的スクリプト
523
+ resource_state = driver.execute_script("""
 
 
 
 
524
  return {
525
  readyState: document.readyState,
526
  resourcesComplete: !document.querySelector('img:not([complete])') &&
527
+ !document.querySelector('link[rel="stylesheet"]:not([loaded])')
528
  };
529
+ """)
530
+ logger.info(f"Resource state: {resource_state}")
531
+
532
+ if resource_state['readyState'] != 'complete':
533
+ logger.info("Document still loading, waiting additional time...")
 
 
 
 
 
 
 
 
534
  time.sleep(1)
535
+
536
+ # トップにスクロール
537
+ driver.execute_script("window.scrollTo(0, 0);")
538
+ time.sleep(0.5)
539
+ logger.info("Scrolled to top.")
540
 
541
+ # 12) スクリーンショット取得
542
  logger.info("Taking screenshot...")
543
  png = driver.get_screenshot_as_png()
544
  logger.info("Screenshot taken successfully.")
545
 
546
+ # PIL画像に変換
547
  img = Image.open(BytesIO(png))
 
 
548
  logger.info(f"Screenshot dimensions: {img.width}x{img.height}")
549
 
550
+ # 余白トリミング
551
  if trim_whitespace:
 
552
  img = trim_image_whitespace(img, threshold=248, padding=20)
553
  logger.info(f"Trimmed dimensions: {img.width}x{img.height}")
554
 
555
  return img
556
 
557
  except Exception as e:
558
+ logger.error(f"Error during screenshot generation: {e}")
559
+ return Image.new('RGB', (1, 1), color=(0, 0, 0))
560
  finally:
561
  logger.info("Cleaning up...")
562
  if driver:
 
571
  logger.info(f"Temporary file {tmp_path} removed.")
572
  except Exception as e:
573
  logger.error(f"Error removing temporary file {tmp_path}: {e}")
574
+
575
  # --- Geminiを使った新しい関数 ---
576
  def text_to_screenshot(text: str, extension_percentage: float, temperature: float = 0.3, trim_whitespace: bool = True) -> Image.Image:
577
  """テキストをGemini APIでHTMLに変換し、スクリーンショットを生成する統合関数"""