tx3bas commited on
Commit
83c73f5
·
verified ·
1 Parent(s): 1bfeefb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -24
app.py CHANGED
@@ -2,51 +2,92 @@ import gradio as gr
2
  from datetime import datetime, timedelta
3
  import requests
4
  import json
5
- import pandas as pd
6
 
7
- def wayback(website):
 
 
8
  if not website:
9
- return pd.DataFrame(columns=["Fecha", "URL"])
10
 
11
- # Rango de tiempo fijo de 365 días hacia atrás
12
  end_date = datetime.now()
13
  start_date = end_date - timedelta(days=365)
14
  datefrom = start_date.strftime('%Y%m%d')
15
  dateto = end_date.strftime('%Y%m%d')
16
 
17
- # Construir la consulta a la Wayback Machine con un límite fijo de 3000 resultados
18
- query = f"?url={website}&output=json&from={datefrom}&to={dateto}&limit=3000"
19
 
20
  try:
21
- response = requests.get(f"http://web.archive.org/cdx/search/cdx{query}")
22
  if response.status_code != 200:
23
- return pd.DataFrame([{'Fecha': 'Error', 'URL': f'Respuesta no exitosa con código de estado {response.status_code}'}])
24
  content = json.loads(response.text)
25
  if len(content) <= 1:
26
- return pd.DataFrame([{'Fecha': 'Error', 'URL': 'No hay datos para esta página web'}])
27
  except json.JSONDecodeError as e:
28
- return pd.DataFrame([{'Fecha': 'Error', 'URL': f'Error al analizar JSON: {e}'}])
29
  except Exception as e:
30
- return pd.DataFrame([{'Fecha': 'Error', 'URL': f'Error: {e}'}])
31
 
32
- # Procesar los resultados y devolverlos en forma de DataFrame
33
  results = []
34
  for row in content[1:]:
35
  date, page, status = [row[i] for i in [1, 2, 4]]
36
  formatted_date = datetime.strptime(date, '%Y%m%d%H%M%S').strftime('%d/%m/%Y')
37
  formatted_wayback_url = f"https://web.archive.org/web/{date}/{page}"
38
- results.append({'Fecha': formatted_date, 'URL': formatted_wayback_url})
39
-
40
- return pd.DataFrame(results)
41
 
42
- # Crear la interfaz de Gradio
43
- iface = gr.Interface(
44
- fn=wayback,
45
- inputs="text",
46
- outputs=gr.components.Dataframe(label="Resultados", headers=["Fecha", "URL"]),
47
- title="Wayback Machine Lookup",
48
- description="Busca instantáneas de una página web en la Wayback Machine. Introduce solo la URL."
49
- )
50
 
51
- # Lanzar la aplicación
52
  iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from datetime import datetime, timedelta
3
  import requests
4
  import json
 
5
 
6
+ def wayback(website, limit=None):
7
+ limit = 3000 if limit is None else limit
8
+
9
  if not website:
10
+ return '😭 Error: introduce una url correcta'
11
 
 
12
  end_date = datetime.now()
13
  start_date = end_date - timedelta(days=365)
14
  datefrom = start_date.strftime('%Y%m%d')
15
  dateto = end_date.strftime('%Y%m%d')
16
 
17
+ query = f"?url={website}&output=json&from={datefrom}&to={dateto}"
 
18
 
19
  try:
20
+ response = requests.get(f"http://web.archive.org/cdx/search/cdx{query}&limit={limit}")
21
  if response.status_code != 200:
22
+ return f'😭 Error: Respuesta no exitosa con código de estado {response.status_code}'
23
  content = json.loads(response.text)
24
  if len(content) <= 1:
25
+ return '😭 Error: no hay datos para esta página web'
26
  except json.JSONDecodeError as e:
27
+ return f'😭 Error al analizar JSON: {e}'
28
  except Exception as e:
29
+ return f"😭 Error: {e}"
30
 
 
31
  results = []
32
  for row in content[1:]:
33
  date, page, status = [row[i] for i in [1, 2, 4]]
34
  formatted_date = datetime.strptime(date, '%Y%m%d%H%M%S').strftime('%d/%m/%Y')
35
  formatted_wayback_url = f"https://web.archive.org/web/{date}/{page}"
36
+ results.append(f"👓 {formatted_date} {formatted_wayback_url}")
37
+ return "\n".join(results)
 
38
 
39
+ iface = gr.Interface(fn=wayback,
40
+ inputs=["text", gr.Number(label="Límite", value=3000)],
41
+ outputs="text",
42
+ title="Wayback Machine Lookup",
43
+ description="Busca instantáneas de una página web en la Wayback Machine.")
 
 
 
44
 
 
45
  iface.launch()
46
+
47
+ mejor deja simplemente el de meter la url
48
+
49
+ import gradio as gr
50
+ from datetime import datetime, timedelta
51
+ import requests
52
+ import json
53
+
54
+ def wayback(website, limit=None):
55
+ limit = 3000 if limit is None else limit
56
+
57
+ if not website:
58
+ return '😭 Error: introduce una url correcta'
59
+
60
+ end_date = datetime.now()
61
+ start_date = end_date - timedelta(days=365)
62
+ datefrom = start_date.strftime('%Y%m%d')
63
+ dateto = end_date.strftime('%Y%m%d')
64
+
65
+ query = f"?url={website}&output=json&from={datefrom}&to={dateto}"
66
+
67
+ try:
68
+ response = requests.get(f"http://web.archive.org/cdx/search/cdx{query}&limit={limit}")
69
+ if response.status_code != 200:
70
+ return f'😭 Error: Respuesta no exitosa con código de estado {response.status_code}'
71
+ content = json.loads(response.text)
72
+ if len(content) <= 1:
73
+ return '😭 Error: no hay datos para esta página web'
74
+ except json.JSONDecodeError as e:
75
+ return f'😭 Error al analizar JSON: {e}'
76
+ except Exception as e:
77
+ return f"😭 Error: {e}"
78
+
79
+ results = []
80
+ for row in content[1:]:
81
+ date, page, status = [row[i] for i in [1, 2, 4]]
82
+ formatted_date = datetime.strptime(date, '%Y%m%d%H%M%S').strftime('%d/%m/%Y')
83
+ formatted_wayback_url = f"https://web.archive.org/web/{date}/{page}"
84
+ results.append(f"👓 {formatted_date} {formatted_wayback_url}")
85
+ return "\n".join(results)
86
+
87
+ iface = gr.Interface(fn=wayback,
88
+ inputs=["text", gr.Number(label="Límite", value=3000)],
89
+ outputs="text",
90
+ title="Wayback Machine Lookup",
91
+ description="Busca instantáneas de una página web en la Wayback Machine.")
92
+
93
+ iface.launch()