Spaces:
Build error
Build error
File size: 8,439 Bytes
179f419 3c44349 179f419 37a6ead 179f419 46fdc93 37a6ead c0b5308 8dc061a 8e59b36 8dc061a c0b5308 9d69eea 37a6ead 8dc061a 37a6ead 8dc061a 37a6ead defdc0f 84c7ded dbb23b5 0160296 37a6ead 0160296 3c44349 0160296 3c44349 0160296 84c7ded 46fdc93 3c44349 c25e9d3 8e59b36 c25e9d3 3c44349 dbb23b5 3c44349 85207da 3c44349 46fdc93 3c44349 8e59b36 84c7ded 37a6ead 3c44349 c25e9d3 0f08c05 0d09c79 0f08c05 99eff3e 0f08c05 337c2ef 0f08c05 99eff3e 0f08c05 0d09c79 99eff3e 84c7ded 1dc721d 85207da 1dc721d 38ccd46 147a4a4 65dc60b 5f09e6a 0f08c05 1dc721d 513d065 147a4a4 65dc60b f1f53e2 86c6d07 5b53dd5 058a9c7 86c6d07 bd7fe45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import gradio as gr
from datetime import datetime, timedelta
import requests
import json
import re
# Define los headers para simular una petición desde Firefox
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0'
}
def get_google_cache_date(url):
cache_url = f"https://webcache.googleusercontent.com/search?q=cache:{url}"
try:
resp = requests.get(cache_url, headers=headers)
if resp.status_code == 200:
getcache = re.search("[a-zA-z]{3}\s[0-9]{1,2},\s[0-9]{4}", resp.text)
if getcache:
g_cache = getcache.group(0)
cache_date = datetime.strptime(g_cache, '%b %d, %Y')
today = datetime.now()
days_ago = (today - cache_date).days
# Manejo del singular y plural para "día/días"
day_word = "día" if days_ago == 1 else "días"
# Lógica para determinar el emoji
if days_ago <= 3:
emoji = "😎"
elif days_ago <= 30:
emoji = "🙂"
elif days_ago <= 90:
emoji = "😐"
else:
emoji = "😭"
formatted_date = cache_date.strftime('%d/%m/%Y')
return f"<p style='margin-bottom: 10px; margin-top:10px;text-align: center; background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'><a style='color:#1f2937;text-decoration:none' title='Ver la url cacheada por Google' href='cache:{url}'>👁 </a> Cacheada por Google hace {days_ago} {day_word} ({formatted_date}) {emoji}</p>"
else:
return "Url no cacheada por Google ⛔"
else:
return "Error al acceder a la caché de Google ⛔"
except Exception as e:
return str(e)
def wayback(website):
if not website:
return '<p>😭 Error: introduce una url correcta</p>'
google_cache_info = get_google_cache_date(website)
# Intenta primero con la Wayback CDX Server API
end_date = datetime.now()
start_date = end_date - timedelta(days=365)
datefrom = start_date.strftime('%Y%m%d')
dateto = end_date.strftime('%Y%m%d')
cdx_api_url = f"http://web.archive.org/cdx/search/cdx?url={website}&output=json&from={datefrom}&to={dateto}&limit=3000"
try:
response = requests.get(cdx_api_url, headers=headers)
if response.status_code == 200:
content = json.loads(response.text)
if len(content) > 1:
# Ordenar los resultados por fecha de manera descendente
sorted_content = sorted(content[1:], key=lambda x: x[1], reverse=True)
# Crear la tabla HTML
results = [f"<div>{google_cache_info}</div><table style='width: -webkit-fill-available;''><tr><th>Fecha</th><th>URL</th></tr>"]
for row in sorted_content:
date, page, status = [row[i] for i in [1, 2, 4]]
formatted_date = datetime.strptime(date, '%Y%m%d%H%M%S').strftime('%d/%m/%Y')
formatted_wayback_url = f"https://web.archive.org/web/{date}/{page}"
results.append(f"<tr><td>{formatted_date}</td><td><a href='{formatted_wayback_url}' target='_blank'>{formatted_wayback_url}</a></td></tr>")
results.append("</table>")
return "".join(results)
except Exception as e:
pass # Falla silenciosa, intentar con el siguiente método
# Si falla, intentar con la Wayback Availability JSON API
availability_api_url = f"http://archive.org/wayback/available?url={website}"
try:
response = requests.get(availability_api_url, headers=headers)
if response.status_code == 200:
data = json.loads(response.text)
if data["archived_snapshots"]:
closest_snapshot = data["archived_snapshots"]["closest"]
if closest_snapshot and closest_snapshot["available"]:
snapshot_url = closest_snapshot["url"]
timestamp = closest_snapshot["timestamp"]
formatted_date = datetime.strptime(timestamp, '%Y%m%d%H%M%S').strftime('%d/%m/%Y')
return f"<div>{google_cache_info}</div><table style='width: -webkit-fill-available;''><tr><th>Fecha</th><th>URL</th></tr><tr><td>{formatted_date}</td><td><a href='{snapshot_url}' target='_blank'>{snapshot_url}</a></td></tr></table>"
except Exception as e:
return f"<div>{google_cache_info}</div><p>😭 Error: {e}</p>"
return f"<div>{google_cache_info}</div><p>😭 Error: No se encontraron datos archivados para esta URL.</p>"
def archive_now(website):
if not website:
return "<div>😭 Error: Por favor, introduce una URL válida.</div>"
archive_url = f"https://web.archive.org/save/{website}"
try:
response = requests.get(archive_url, headers=headers, timeout=60) # Establece un tiempo límite de 60 segundos
if response.status_code == 200:
return f"<div>👌 URL archivada con éxito.</div>"
else:
# Si la respuesta no es exitosa, busca la última instantánea
return check_last_snapshot(website)
except requests.exceptions.Timeout:
# Si se supera el tiempo de espera, busca la última instantánea
return check_last_snapshot(website)
except Exception as e:
return f"<div>Error al archivar la URL: {e}</div>"
def check_last_snapshot(website):
availability_api_url = f"http://archive.org/wayback/available?url={website}"
try:
response = requests.get(availability_api_url, headers=headers)
if response.status_code == 200:
data = json.loads(response.text)
if data["archived_snapshots"]:
closest_snapshot = data["archived_snapshots"]["closest"]
if closest_snapshot and closest_snapshot["available"]:
snapshot_url = closest_snapshot["url"]
timestamp = closest_snapshot["timestamp"]
formatted_date = datetime.strptime(timestamp, '%Y%m%d%H%M%S').strftime('%d/%m/%Y')
return f"<div>Última instantánea disponible: <a href='{snapshot_url}' target='_blank'>{formatted_date}</a></div>"
except Exception as e:
return f"<div>Error al buscar la última instantánea: {e}</div>"
return "<div>😭 No se encontraron datos archivados para esta URL.</div>"
# Crear la interfaz de Gradio para la función wayback
wayback_interface = gr.Interface(
fn=wayback,
inputs="text",
outputs="html",
title="<p style='margin-top:10px;margin-bottom:-10px;font-size: 22px;'>Wayback Machine</p>",
description="<p style='margin-bottom: 10px; text-align: center; background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Busca instantáneas de una página web en Wayback Machine y guarda la página actual simlemente introduciendo la url.</p>",
article="<p style='text-align:center !important;'>Desarrollada por <a style='text-decoration:none !important;color:#e12a31 !important;' href='https://artxeweb.com'>© Artxe Web</a></p>"
)
# Crear la interfaz de Gradio para la función archive_now
archive_interface = gr.Interface(
fn=archive_now,
inputs="text",
outputs="html",
title="<p style='margin-top:10px;margin-bottom:-10px;font-size: 22px;'>Guardar en Wayback Machine<p>",
description="<p style='margin-bottom: 10px; text-align: center; background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Guarda la página web actual en Wayback Machine.</p>",
article="<p style='text-align:center !important;'>Desarrollada por <a style='text-decoration:none !important;color:#e12a31 !important;' href='https://artxeweb.com'>© Artxe Web</a></p>"
)
# Combinar ambas interfaces en una
iface = gr.TabbedInterface([wayback_interface, archive_interface], ["Buscar Instantáneas", "Archivar URL"], title="<div style='margin:0 auto;text-align:center;margin-bottom: -20px;'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/waybackmachine.jpg'><p>Wayback Fast</p></div>")
# Lanzar la aplicación
iface.launch() |