Web-Archive / archiver.py
WebAI Deployer
Update Camouflage App (2026-01-10)
e67896b
import time
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
class WebArchiver:
def __init__(self):
self._init_driver()
def _init_driver(self):
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
# ๐Ÿ”— Bind to the installed Chrome binary
chrome_bin = os.getenv("CHROME_BIN")
if chrome_bin:
chrome_options.binary_location = chrome_bin
self.chrome_options = chrome_options
def archive_url(self, url):
# Silent archiving
if not url.startswith("http"): return "โŒ Error: Invalid URL scheme."
try:
# Setup Headless Chrome for each job to ensure clean state
from selenium.webdriver.chrome.service import Service
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=self.chrome_options)
try:
driver.get(url)
time.sleep(2) # Wait for dynamic content
title = driver.title
screenshot_path = f"archive_store/snap_{int(time.time())}.png"
driver.save_screenshot(screenshot_path)
# Simulate WARC metadata
page_source = driver.page_source
size_kb = len(page_source) / 1024
return f"โœ… Archival Complete.\n\nTitle: {title}\nSnapshot: {screenshot_path}\nSize: {size_kb:.1f} KB\nEngine: Chrome Headless"
finally:
driver.quit()
except Exception as e:
return f"โŒ Archival Failed: {str(e)}"