Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import random | |
| import time | |
| import re | |
| def obtener_user_agent(): | |
| user_agents = [ | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.3", | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36", | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36", | |
| # Más user-agents según sea necesario | |
| ] | |
| return random.choice(user_agents) | |
| def dividir_en_grupos(urls, tamano_grupo=10): | |
| for i in range(0, len(urls), tamano_grupo): | |
| yield urls[i:i+tamano_grupo] | |
| def comprobar_indexacion(input_text): | |
| urls = input_text.splitlines() # Asume que las URLs se ingresan una por línea | |
| if not urls: | |
| return "No se proporcionaron URLs válidas." | |
| indexadas = [] | |
| no_indexadas = [] | |
| try: | |
| for grupo_urls in dividir_en_grupos(urls): | |
| if len(grupo_urls) == 1: | |
| query = f"site:{re.sub(r'https?://', '', grupo_urls[0])}" | |
| else: | |
| query = " OR ".join([f"site:{re.sub(r'https?://', '', url)}" for url in grupo_urls]) | |
| headers = {"User-Agent": obtener_user_agent()} | |
| response = requests.get(f"https://www.google.com/search?q={query}", headers=headers, timeout=10) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| search_results = soup.find_all('div', class_='tF2Cxc') | |
| for url in grupo_urls: | |
| exact_url = re.sub(r'https?://', '', url) # Quitamos http o https | |
| found = any(result.find('a', href=True)['href'].split('?')[0].split('#')[0] == exact_url for result in search_results) | |
| if found: | |
| indexadas.append(url) | |
| else: | |
| no_indexadas.append(url) | |
| time.sleep(random.uniform(1, 2)) # Un retraso más corto para el ejemplo | |
| except Exception as e: | |
| return f"Error al realizar la búsqueda en Google: {str(e)}" | |
| total_indexadas = len(indexadas) | |
| total_no_indexadas = len(no_indexadas) | |
| total_urls = total_indexadas + total_no_indexadas | |
| if total_urls == 1: | |
| if total_indexadas == 1: | |
| resultado = "Url indexada en Google 🙂" | |
| else: | |
| resultado = "La url no está indexada en Google 😔" | |
| elif total_indexadas == 0: | |
| resultado = "Ninguna url indexada en Google😔" | |
| elif total_no_indexadas == 0: | |
| resultado = "Todas las urls indexadas en Google🙂" | |
| else: | |
| resultado = f"URLs Indexadas: {total_indexadas}\n" + "\n".join(indexadas) + f"\n\nURLs No Indexadas: {total_no_indexadas}\n" + "\n".join(no_indexadas) | |
| return resultado | |
| iface = gr.Interface( | |
| fn=comprobar_indexacion, | |
| inputs="text", | |
| outputs="text", | |
| title="<div style='margin:0 auto;text-align:center'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/google-index-bulk.jpg'><p>Bulk Index Google</p></div>", | |
| description="<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Introduce las URLs que quieres comprobar si están indexadas por Google, separadas por saltos de línea. El sistema realizará una búsqueda para verificar si están indexadas.</p>", | |
| article="<div style='margin-top:10px'><p style='text-align: center !important; background: #ffffff; padding: 5px 30px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;' href='https://artxeweb.com/'>© Artxe Web</a></p></div>" | |
| ) | |
| iface.launch() |