Spaces:
Running
Running
| import os | |
| import re | |
| import json | |
| import zipfile | |
| import requests | |
| import hashlib | |
| import time | |
| import logging | |
| from io import BytesIO | |
| from urllib.parse import urljoin, urlparse, unquote | |
| from bs4 import BeautifulSoup | |
| from selenium.webdriver.common.by import By | |
| logger = logging.getLogger(__name__) | |
| class SiteBackup: | |
| """Backup completo com crawling recursivo otimizado.""" | |
| def __init__(self, driver, url, max_depth=2, max_pages=30): | |
| self.driver = driver | |
| self.url = url | |
| self.base_url = self._get_base_url(url) | |
| self.domain = urlparse(url).netloc | |
| self.downloaded_assets = {} | |
| self.asset_counter = 0 | |
| self.errors = [] | |
| self.url_to_local = {} | |
| self.max_depth = max_depth | |
| self.max_pages = max_pages | |
| self.visited_pages = {} | |
| self.page_queue = [] | |
| self.zip_files = {} | |
| self.discovered_urls = set() | |
| self.start_time = time.time() | |
| self.max_time = 180 | |
| def _timeout_reached(self): | |
| return (time.time() - self.start_time) > self.max_time | |
| def _get_base_url(self, url): | |
| parsed = urlparse(url) | |
| return f"{parsed.scheme}://{parsed.netloc}" | |
| def _safe_filename(self, url, extension=None): | |
| self.asset_counter += 1 | |
| parsed = urlparse(url) | |
| path = unquote(parsed.path).strip("/") | |
| query = parsed.query | |
| if path: | |
| name = path.replace("/", "_").replace("\\", "_") | |
| name = re.sub(r'[<>:"|?*]', '_', name) | |
| if query: | |
| q_hash = hashlib.md5(query.encode()).hexdigest()[:6] | |
| name = f"{name}_{q_hash}" | |
| else: | |
| name = f"asset_{self.asset_counter}" | |
| if extension and not name.lower().endswith(extension.lower()): | |
| name = f"{name}{extension}" | |
| if len(name) > 150: | |
| hash_str = hashlib.md5(url.encode()).hexdigest()[:8] | |
| ext = os.path.splitext(name)[1] or (extension or "") | |
| name = f"{hash_str}{ext}" | |
| return name | |
| def _page_filename(self, url): | |
| if url == self.url: | |
| return "index.html" | |
| parsed = urlparse(url) | |
| path = unquote(parsed.path).strip("/") | |
| query = parsed.query | |
| if path: | |
| name = path.replace("/", "_").replace("\\", "_") | |
| name = re.sub(r'[<>:"|?*]', '_', name) | |
| else: | |
| name = "page" | |
| if query: | |
| q_hash = hashlib.md5(query.encode()).hexdigest()[:6] | |
| name = f"{name}_{q_hash}" | |
| if not name.endswith(".html"): | |
| name = f"{name}.html" | |
| return f"pages/{name}" | |
| def _download_asset(self, url): | |
| if self._timeout_reached(): | |
| return None | |
| if url in self.downloaded_assets: | |
| return self.downloaded_assets[url] | |
| try: | |
| selenium_cookies = {} | |
| try: | |
| for c in self.driver.get_cookies(): | |
| selenium_cookies[c['name']] = c['value'] | |
| except: | |
| pass | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " | |
| "AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36", | |
| "Referer": self.url | |
| } | |
| response = requests.get(url, headers=headers, timeout=10, | |
| verify=False, cookies=selenium_cookies) | |
| if response.status_code == 200: | |
| self.downloaded_assets[url] = response.content | |
| return response.content | |
| except: | |
| pass | |
| return None | |
| def _classify_asset(self, url): | |
| u = url.lower().split('?')[0].split('#')[0] | |
| if '.css' in u: return "css" | |
| if any(e in u for e in ['.js', '.mjs']): return "js" | |
| if any(e in u for e in ['.png','.jpg','.jpeg','.gif','.svg','.webp','.ico','.bmp','.avif']): return "images" | |
| if any(e in u for e in ['.woff','.woff2','.ttf','.eot','.otf']): return "fonts" | |
| if any(e in u for e in ['.mp4','.webm','.ogg','.mp3','.wav']): return "media" | |
| return "assets" | |
| def _is_same_site(self, url): | |
| try: | |
| parsed = urlparse(url) | |
| return parsed.netloc == self.domain or parsed.netloc == '' | |
| except: | |
| return False | |
| def _wait_page_load(self, timeout=10): | |
| for _ in range(timeout * 2): | |
| time.sleep(0.5) | |
| try: | |
| if self.driver.execute_script("return document.readyState;") == "complete": | |
| return True | |
| except: | |
| return False | |
| return False | |
| def _collect_page_links(self): | |
| try: | |
| return self.driver.execute_script(""" | |
| var r = []; | |
| document.querySelectorAll('a[href]').forEach(function(a) { | |
| var h = a.href; | |
| if (h && !h.startsWith('javascript:') && !h.startsWith('mailto:') | |
| && !h.startsWith('#') && !h.startsWith('tel:')) | |
| r.push({url: h, text: (a.textContent||'').trim().substring(0,80)}); | |
| }); | |
| document.querySelectorAll('[data-href],[data-url],[data-link]').forEach(function(el) { | |
| var h = el.dataset.href || el.dataset.url || el.dataset.link; | |
| if (h) r.push({url: h, text: (el.textContent||'').trim().substring(0,80)}); | |
| }); | |
| return r; | |
| """) or [] | |
| except: | |
| return [] | |
| def _discover_urls_by_clicking(self): | |
| """Clica em cards clicaveis de forma rapida, max 10 cards, 1.5s cada.""" | |
| try: | |
| cards = self.driver.execute_script(""" | |
| var items = []; | |
| var seen = new Set(); | |
| var all = document.querySelectorAll('div, article, li, section'); | |
| for (var i = 0; i < all.length && items.length < 10; i++) { | |
| var el = all[i]; | |
| var style = window.getComputedStyle(el); | |
| if (style.cursor !== 'pointer') continue; | |
| if (el.offsetWidth < 80 || el.offsetHeight < 80) continue; | |
| if (el.closest('a[href]')) continue; | |
| var txt = (el.textContent || '').trim(); | |
| if (txt.length < 3 && !el.querySelector('img')) continue; | |
| var key = txt.substring(0, 40); | |
| if (seen.has(key)) continue; | |
| seen.add(key); | |
| var r = el.getBoundingClientRect(); | |
| if (r.x < 0 || r.y < 0) continue; | |
| items.push({ | |
| text: txt.substring(0, 60), | |
| x: Math.round(r.x + r.width/2), | |
| y: Math.round(r.y + r.height/2) | |
| }); | |
| } | |
| return items; | |
| """) | |
| if not cards: | |
| return | |
| original_url = self.driver.current_url | |
| logger.info(f"[BACKUP] Explorando {len(cards)} cards clicaveis") | |
| for card in cards: | |
| if self._timeout_reached(): | |
| break | |
| if len(self.discovered_urls) >= 15: | |
| break | |
| try: | |
| cx, cy = card['x'], card['y'] | |
| if cx <= 0 or cy <= 0: | |
| continue | |
| self.driver.execute_script( | |
| "var e=document.elementFromPoint(arguments[0],arguments[1]);" | |
| "if(e)e.click();", cx, cy) | |
| time.sleep(1.5) | |
| handles = self.driver.window_handles | |
| if len(handles) > 1: | |
| self.driver.switch_to.window(handles[-1]) | |
| self._wait_page_load(5) | |
| new_url = self.driver.current_url | |
| if new_url != original_url and self._is_same_site(new_url): | |
| self.discovered_urls.add(new_url.split('#')[0]) | |
| logger.info(f"[BACKUP] Card -> {new_url[:80]}") | |
| self.driver.close() | |
| self.driver.switch_to.window(handles[0]) | |
| time.sleep(0.3) | |
| else: | |
| self._wait_page_load(3) | |
| new_url = self.driver.current_url | |
| if new_url != original_url and self._is_same_site(new_url): | |
| self.discovered_urls.add(new_url.split('#')[0]) | |
| logger.info(f"[BACKUP] Card -> {new_url[:80]}") | |
| if self.driver.current_url != original_url: | |
| self.driver.get(original_url) | |
| self._wait_page_load(8) | |
| time.sleep(0.5) | |
| except: | |
| try: | |
| handles = self.driver.window_handles | |
| if len(handles) > 1: | |
| self.driver.close() | |
| self.driver.switch_to.window(handles[0]) | |
| elif self.driver.current_url != original_url: | |
| self.driver.get(original_url) | |
| self._wait_page_load(8) | |
| except: | |
| pass | |
| except Exception as e: | |
| self.errors.append(f"Erro discover cards: {str(e)[:80]}") | |
| def _navigate_and_capture(self, url, depth=0): | |
| if self._timeout_reached(): | |
| return | |
| if url in self.visited_pages: | |
| return | |
| if len(self.visited_pages) >= self.max_pages: | |
| return | |
| if depth > self.max_depth: | |
| return | |
| url = url.split('#')[0] | |
| if not url: | |
| return | |
| logger.info(f"[BACKUP] [{len(self.visited_pages)+1}/{self.max_pages}] depth={depth}: {url[:80]}") | |
| try: | |
| if url != self.driver.current_url: | |
| self.driver.get(url) | |
| self._wait_page_load(15) | |
| time.sleep(1) | |
| self._quick_scroll() | |
| html = self.driver.execute_script("return document.documentElement.outerHTML;") | |
| html = f"<!DOCTYPE html>\n{html}" | |
| local_path = "index.html" if url == self.url else self._page_filename(url) | |
| self.visited_pages[url] = local_path | |
| self._capture_page_assets() | |
| if depth < self.max_depth and not self._timeout_reached(): | |
| links = self._collect_page_links() | |
| for link in links: | |
| link_url = link.get('url', '') | |
| if not link_url: | |
| continue | |
| abs_url = urljoin(url, link_url).split('#')[0] | |
| if self._is_same_site(abs_url) and abs_url not in self.visited_pages: | |
| if abs_url not in [q[0] for q in self.page_queue]: | |
| self.page_queue.append((abs_url, depth + 1)) | |
| if depth == 0: | |
| self._discover_urls_by_clicking() | |
| for d_url in self.discovered_urls: | |
| if d_url not in self.visited_pages and d_url not in [q[0] for q in self.page_queue]: | |
| self.page_queue.append((d_url, depth + 1)) | |
| rewritten = self._rewrite_html(html, local_path) | |
| self.zip_files[local_path] = rewritten.encode('utf-8') | |
| except Exception as e: | |
| self.errors.append(f"Erro visitar {url[:80]}: {str(e)[:80]}") | |
| self.visited_pages[url] = None | |
| def _quick_scroll(self): | |
| """Scroll rapido para trigger lazy-load (max 5 scrolls).""" | |
| try: | |
| height = self.driver.execute_script("return document.body.scrollHeight;") | |
| vp = self.driver.execute_script("return window.innerHeight;") | |
| pos = 0 | |
| scrolls = 0 | |
| while pos < height and scrolls < 5: | |
| pos += vp | |
| scrolls += 1 | |
| self.driver.execute_script(f"window.scrollTo(0,{pos});") | |
| time.sleep(0.2) | |
| self.driver.execute_script("window.scrollTo(0,0);") | |
| time.sleep(0.3) | |
| except: | |
| pass | |
| def _capture_page_assets(self): | |
| if self._timeout_reached(): | |
| return | |
| try: | |
| assets = self.driver.execute_script(""" | |
| var css=[], js=[], imgs=[]; | |
| document.querySelectorAll('link[rel="stylesheet"]').forEach(function(l){ | |
| if(l.href) css.push(l.href); | |
| }); | |
| document.querySelectorAll('script[src]').forEach(function(s){ | |
| if(s.src) js.push(s.src); | |
| }); | |
| document.querySelectorAll('img').forEach(function(i){ | |
| if(i.src && !i.src.startsWith('data:')) imgs.push(i.src); | |
| if(i.dataset.src) imgs.push(i.dataset.src); | |
| }); | |
| document.querySelectorAll('video[poster]').forEach(function(v){ | |
| imgs.push(v.poster); | |
| }); | |
| return {css:css, js:js, imgs:[...new Set(imgs)]}; | |
| """) | |
| for css_url in (assets.get('css') or []): | |
| if css_url not in self.url_to_local: | |
| content = self._download_asset(css_url) | |
| if content: | |
| fn = self._safe_filename(css_url, ".css") | |
| lp = f"css/{fn}" | |
| self.url_to_local[css_url] = lp | |
| try: | |
| txt = content.decode('utf-8', errors='replace') | |
| txt = self._rewrite_css_urls(txt, css_url) | |
| self.zip_files[lp] = txt.encode('utf-8') | |
| except: | |
| self.zip_files[lp] = content | |
| for js_url in (assets.get('js') or []): | |
| if js_url not in self.url_to_local: | |
| content = self._download_asset(js_url) | |
| if content: | |
| fn = self._safe_filename(js_url, ".js") | |
| lp = f"js/{fn}" | |
| self.zip_files[lp] = content | |
| self.url_to_local[js_url] = lp | |
| for img_url in (assets.get('imgs') or []): | |
| abs_url = urljoin(self.driver.current_url, img_url) | |
| if abs_url not in self.url_to_local: | |
| content = self._download_asset(abs_url) | |
| if content: | |
| fn = self._safe_filename(abs_url) | |
| lp = f"images/{fn}" | |
| self.zip_files[lp] = content | |
| self.url_to_local[abs_url] = lp | |
| if img_url != abs_url: | |
| self.url_to_local[img_url] = lp | |
| except Exception as e: | |
| self.errors.append(f"Erro assets: {str(e)[:80]}") | |
| def _rewrite_css_urls(self, css_text, css_url): | |
| def replace_url(match): | |
| original = match.group(1).strip('\'"') | |
| if original.startswith('data:') or original.startswith('#'): | |
| return match.group(0) | |
| absolute = urljoin(css_url, original) | |
| content = self._download_asset(absolute) | |
| if content: | |
| folder = self._classify_asset(absolute) | |
| fn = self._safe_filename(absolute) | |
| lp = f"{folder}/{fn}" | |
| self.url_to_local[absolute] = lp | |
| self.zip_files[lp] = content | |
| return f"url('../{lp}')" | |
| return match.group(0) | |
| return re.sub(r'url\(([^)]+)\)', replace_url, css_text) | |
| def _rewrite_html(self, html, page_local_path): | |
| soup = BeautifulSoup(html, 'html.parser') | |
| depth = page_local_path.count('/') | |
| prefix = '../' * depth if depth > 0 else '' | |
| # Reescrever CSS links | |
| for link in soup.find_all('link', rel='stylesheet'): | |
| href = link.get('href') | |
| if href: | |
| abs_url = urljoin(self.driver.current_url, href) | |
| if abs_url in self.url_to_local: | |
| link['href'] = prefix + self.url_to_local[abs_url] | |
| # Reescrever JS scripts | |
| for script in soup.find_all('script', src=True): | |
| src = script.get('src') | |
| if src: | |
| abs_url = urljoin(self.driver.current_url, src) | |
| if abs_url in self.url_to_local: | |
| script['src'] = prefix + self.url_to_local[abs_url] | |
| # Reescrever imagens | |
| for img in soup.find_all('img'): | |
| for attr in ['src', 'data-src']: | |
| val = img.get(attr) | |
| if val and not val.startswith('data:'): | |
| abs_url = urljoin(self.driver.current_url, val) | |
| if abs_url in self.url_to_local: | |
| img[attr] = prefix + self.url_to_local[abs_url] | |
| # Reescrever links <a href> para apontar para arquivos locais | |
| for a in soup.find_all('a', href=True): | |
| href = a['href'] | |
| if href.startswith(('javascript:', 'mailto:', 'tel:')): | |
| continue | |
| abs_url = urljoin(self.driver.current_url, href).split('#')[0] | |
| if abs_url in self.visited_pages and self.visited_pages[abs_url]: | |
| a['href'] = prefix + self.visited_pages[abs_url] | |
| # Remover <base> tags | |
| for base in soup.find_all('base'): | |
| base.decompose() | |
| # Remover tracking scripts | |
| tracking = ['google-analytics','gtag','facebook','hotjar','pixel','adsbygoogle'] | |
| for script in soup.find_all('script'): | |
| src = script.get('src', '') | |
| text = script.string or '' | |
| if any(p in src.lower() or p in text.lower() for p in tracking): | |
| script.decompose() | |
| # Garantir charset UTF-8 | |
| head = soup.find('head') | |
| if head and not head.find('meta', attrs={'charset': True}): | |
| head.insert(0, soup.new_tag('meta', charset='UTF-8')) | |
| return f"<!DOCTYPE html>\n{str(soup)}" | |
| def _inject_navigation_bar(self, soup, current_local_path): | |
| """Injeta uma barra de navegação no topo de cada pagina com links para todas as paginas.""" | |
| nav_html = '<nav id="backup-nav" style="' | |
| nav_html += 'position:fixed;top:0;left:0;right:0;z-index:999999;' | |
| nav_html += 'background:#1a1a2e;padding:8px 16px;font-family:Arial,sans-serif;' | |
| nav_html += 'display:flex;flex-wrap:wrap;gap:8px;align-items:center;' | |
| nav_html += 'border-bottom:2px solid #e94560;font-size:13px;' | |
| nav_html += '">' | |
| nav_html += '<span style="color:#e94560;font-weight:bold;margin-right:12px;">📁 Backup Nav:</span>' | |
| depth = current_local_path.count('/') | |
| prefix = '../' * depth if depth > 0 else '' | |
| for page_url, local_path in sorted(self.visited_pages.items(), key=lambda x: x[1] or ''): | |
| if not local_path: | |
| continue | |
| # Nome amigavel da pagina | |
| parsed = urlparse(page_url) | |
| path_part = parsed.path.strip('/') | |
| if not path_part or path_part == '': | |
| name = 'Home' | |
| elif path_part == 'dashboard': | |
| name = 'Dashboard' | |
| else: | |
| name = path_part.split('/')[-1].replace('-', ' ').replace('_', ' ').title() | |
| if len(name) > 25: | |
| name = name[:22] + '...' | |
| href = prefix + local_path | |
| is_current = (local_path == current_local_path) | |
| if is_current: | |
| nav_html += f'<span style="color:#fff;background:#e94560;padding:3px 10px;' | |
| nav_html += f'border-radius:4px;font-weight:bold;">{name}</span>' | |
| else: | |
| nav_html += f'<a href="{href}" style="color:#eee;background:#16213e;' | |
| nav_html += f'padding:3px 10px;border-radius:4px;text-decoration:none;' | |
| nav_html += f'border:1px solid #333;">{name}</a>' | |
| nav_html += '</nav>' | |
| # Adicionar padding-top ao body para nao cobrir conteudo | |
| nav_html += '<div id="backup-nav-spacer" style="height:45px;"></div>' | |
| nav_tag = BeautifulSoup(nav_html, 'html.parser') | |
| body = soup.find('body') | |
| if body: | |
| body.insert(0, nav_tag) | |
| def capture_screenshot(self): | |
| try: | |
| return self.driver.get_screenshot_as_png() | |
| except: | |
| return None | |
| def generate_backup_zip(self, folder_name="backup"): | |
| logger.info(f"[BACKUP] Iniciando backup de {self.url}") | |
| self.start_time = time.time() | |
| self._navigate_and_capture(self.url, depth=0) | |
| while self.page_queue and len(self.visited_pages) < self.max_pages: | |
| if self._timeout_reached(): | |
| logger.warning("[BACKUP] Timeout atingido, finalizando...") | |
| break | |
| next_url, next_depth = self.page_queue.pop(0) | |
| if next_url not in self.visited_pages: | |
| self._navigate_and_capture(next_url, next_depth) | |
| # Reescrita final com navegacao injetada | |
| self._final_rewrite_all_pages() | |
| try: | |
| self.driver.get(self.url) | |
| time.sleep(1) | |
| except: | |
| pass | |
| screenshot = self.capture_screenshot() | |
| elapsed = round(time.time() - self.start_time, 1) | |
| zip_buffer = BytesIO() | |
| with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf: | |
| for fp, content in self.zip_files.items(): | |
| full = f"{folder_name}/{fp}" | |
| zf.writestr(full, content if isinstance(content, bytes) else content.encode('utf-8')) | |
| if screenshot: | |
| zf.writestr(f"{folder_name}/screenshot.png", screenshot) | |
| zf.writestr(f"{folder_name}/backup_report.txt", | |
| self._generate_report(folder_name, elapsed).encode('utf-8')) | |
| nav_map = { | |
| "pages": {u: p for u, p in self.visited_pages.items() if p}, | |
| "total_pages": len([p for p in self.visited_pages.values() if p]), | |
| "total_assets": len(self.url_to_local), | |
| "errors": len(self.errors), | |
| "time_seconds": elapsed | |
| } | |
| zf.writestr(f"{folder_name}/navigation_map.json", | |
| json.dumps(nav_map, indent=2, ensure_ascii=False).encode('utf-8')) | |
| zip_buffer.seek(0) | |
| logger.info(f"[BACKUP] Concluido em {elapsed}s: {len(self.visited_pages)} pags, " | |
| f"{len(self.url_to_local)} assets, {len(self.errors)} erros") | |
| return zip_buffer, len(self.errors) | |
| def _final_rewrite_all_pages(self): | |
| """Reescrita final: corrige todos os links + injeta barra de navegacao.""" | |
| for url, local_path in self.visited_pages.items(): | |
| if not local_path or local_path not in self.zip_files: | |
| continue | |
| try: | |
| content = self.zip_files[local_path] | |
| html = content.decode('utf-8', errors='replace') if isinstance(content, bytes) else content | |
| soup = BeautifulSoup(html, 'html.parser') | |
| depth = local_path.count('/') | |
| prefix = '../' * depth if depth > 0 else '' | |
| changed = False | |
| # Corrigir links <a href> | |
| for a in soup.find_all('a', href=True): | |
| href = a['href'] | |
| if href.startswith(('javascript:', 'mailto:', 'tel:', '#')): | |
| continue | |
| # Se ja aponta para arquivo local, pular | |
| if href.startswith(('pages/', '../pages/', 'index.html', '../index.html')): | |
| continue | |
| abs_url = urljoin(url, href).split('#')[0] | |
| if abs_url in self.visited_pages and self.visited_pages[abs_url]: | |
| a['href'] = prefix + self.visited_pages[abs_url] | |
| changed = True | |
| # Transformar divs/cards clicaveis em links | |
| for div in soup.find_all(['div', 'article', 'li', 'section']): | |
| # Pular se ja esta dentro de um <a> | |
| if div.find_parent('a'): | |
| continue | |
| # Verificar data-href, data-url, data-link | |
| target_url = None | |
| data_href = div.get('data-href') or div.get('data-url') or div.get('data-link', '') | |
| onclick = div.get('onclick', '') | |
| if data_href: | |
| target_url = urljoin(url, data_href).split('#')[0] | |
| elif 'location' in onclick or 'href' in onclick or 'navigate' in onclick: | |
| m = re.search(r"['\"]((https?://|/)[^'\"]+)['\"]", onclick) | |
| if m: | |
| target_url = urljoin(url, m.group(1)).split('#')[0] | |
| if target_url and target_url in self.visited_pages and self.visited_pages[target_url]: | |
| local_link = prefix + self.visited_pages[target_url] | |
| wrapper = soup.new_tag('a', href=local_link, | |
| style="text-decoration:none;color:inherit;display:block;cursor:pointer;") | |
| # Mover o conteudo do div para dentro do <a> | |
| children = list(div.children) | |
| for child in children: | |
| child.extract() | |
| wrapper.append(child) | |
| div.clear() | |
| div.append(wrapper) | |
| # Remover onclick para nao conflitar | |
| if div.get('onclick'): | |
| del div['onclick'] | |
| changed = True | |
| # Injetar barra de navegacao | |
| self._inject_navigation_bar(soup, local_path) | |
| changed = True | |
| if changed: | |
| self.zip_files[local_path] = f"<!DOCTYPE html>\n{str(soup)}".encode('utf-8') | |
| except Exception as e: | |
| self.errors.append(f"Erro rewrite {local_path}: {str(e)[:80]}") | |
| def _generate_report(self, folder_name, elapsed): | |
| pages_list = "\n".join([f" {u} -> {p}" for u, p in self.visited_pages.items() if p]) | |
| return f"""======================================== | |
| BACKUP REPORT | |
| ======================================== | |
| URL: {self.url} | |
| Dominio: {self.domain} | |
| Data: {time.strftime("%Y-%m-%d %H:%M:%S")} | |
| Tempo: {elapsed}s | |
| PAGINAS ({len([p for p in self.visited_pages.values() if p])}): | |
| {pages_list} | |
| CARDS DESCOBERTOS ({len(self.discovered_urls)}): | |
| {chr(10).join(' '+u for u in self.discovered_urls) if self.discovered_urls else ' Nenhum.'} | |
| ASSETS ({len(self.url_to_local)}): | |
| CSS: {len([p for p in self.url_to_local.values() if p.startswith('css/')])} | |
| JS: {len([p for p in self.url_to_local.values() if p.startswith('js/')])} | |
| Imagens: {len([p for p in self.url_to_local.values() if p.startswith('images/')])} | |
| ERROS ({len(self.errors)}): | |
| {chr(10).join(self.errors[:20]) if self.errors else ' Nenhum.'} | |
| ======================================== | |
| """ | |