suggest / app.py
tx3bas's picture
Update app.py
69aa93f verified
import gradio as gr
import requests
import urllib.parse
import re
import xmltodict
from collections import Counter
import unicodedata
# Funci贸n para normalizar palabras clave (elimina tildes y convierte a min煤sculas)
def normalize_keyword(keyword):
if isinstance(keyword, str): # Comprobamos si es una cadena de texto
return ''.join(c for c in unicodedata.normalize('NFD', keyword.lower()) if unicodedata.category(c) != 'Mn')
return keyword # Si no es una cadena, devolvemos el valor tal como est谩
# Funci贸n para obtener sugerencias de Google con un client espec铆fico (firefox, chrome, android, desktop, opera)
def fetch_google_suggestions(query, lang_code="es", client="firefox"):
encoded_query = urllib.parse.quote(query)
url = f"http://suggestqueries.google.com/complete/search?client={client}&hl={lang_code}&gl={lang_code}&q={encoded_query}"
response = requests.get(url)
if response.status_code == 200:
try:
return [normalize_keyword(suggestion) for suggestion in response.json()[1] if isinstance(suggestion, str)]
except ValueError:
print("Error decodificando JSON de Google")
return []
else:
return []
# Funci贸n para obtener sugerencias de Brave
def fetch_brave_suggestions(query, lang_code="es"):
encoded_query = urllib.parse.quote(query)
url = f"https://search.brave.com/api/suggest?q={encoded_query}"
response = requests.get(url)
if response.status_code == 200:
try:
data = response.json()
if len(data) > 1 and isinstance(data[1], list):
# Reducir repeticiones en un 65%
return [normalize_keyword(item) for item in data[1] for _ in range(int(0.35 * 5))]
else:
print("No se encontraron sugerencias en el formato esperado (Brave).")
return []
except ValueError:
print("Error decodificando JSON de Brave")
return []
else:
return []
# Funci贸n para obtener sugerencias de Qwant
def fetch_qwant_suggestions(query, lang_code="es"):
encoded_query = urllib.parse.quote(query)
url = f"https://api.qwant.com/api/suggest/?client=opensearch&q={encoded_query}"
response = requests.get(url)
if response.status_code == 200:
try:
data = response.json()
if len(data) > 1 and isinstance(data[1], list):
return [normalize_keyword(item) for item in data[1]]
else:
print("No se encontraron sugerencias en el formato esperado (Qwant).")
return []
except ValueError:
print("Error decodificando JSON de Qwant")
return []
else:
return []
# Funci贸n para obtener sugerencias de DuckDuckGo
def fetch_duckduckgo_suggestions(query, lang_code="es"):
encoded_query = urllib.parse.quote(query)
url = f"https://duckduckgo.com/ac/?q={encoded_query}&kl={lang_code}"
response = requests.get(url)
if response.status_code == 200:
try:
data = response.json()
return [normalize_keyword(item['phrase']) for item in data]
except ValueError:
print("Error decodificando JSON de DuckDuckGo")
return []
else:
return []
# Funci贸n para obtener sugerencias de YouTube usando la API anterior
def fetch_youtube_suggestions(query, lang_code="es"):
encoded_query = urllib.parse.quote(query)
url = f"http://suggestqueries.google.com/complete/search?client=youtube&hl={lang_code}&q={encoded_query}"
response = requests.get(url)
if response.status_code == 200:
try:
match = re.search(r'window\.google\.ac\.h\(\["[^"]*",\[(.*?)\],', response.text)
if match:
suggestions_data = match.group(1)
return [normalize_keyword(suggestion) for suggestion in re.findall(r'\["([^"]+)"', suggestions_data)]
else:
print("No se encontraron sugerencias en el formato esperado (YouTube antiguo).")
return []
except Exception as e:
print(f"Error procesando la respuesta de YouTube (antiguo): {e}")
return []
else:
return []
# Funci贸n para obtener sugerencias de YouTube usando la nueva API
def fetch_youtube_suggestions_new(query, lang_code="es"):
encoded_query = urllib.parse.quote(query)
url = f"https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&ds=yt&q={encoded_query}&hl={lang_code}"
response = requests.get(url)
if response.status_code == 200:
try:
match = re.search(r'window\.google\.ac\.h\(\["[^"]*",\[(.*?)\],', response.text)
if match:
suggestions_data = match.group(1)
return [normalize_keyword(suggestion) for suggestion in re.findall(r'\["([^"]+)"', suggestions_data)]
else:
print("No se encontraron sugerencias en el formato esperado (nueva API de YouTube).")
return []
except Exception as e:
print(f"Error procesando la respuesta de la nueva API de YouTube: {e}")
return []
else:
return []
# Funci贸n para obtener sugerencias de Bing
def fetch_bing_suggestions(query, market="en-US"):
url = "https://api.bing.com/qsml.aspx"
params = {"Market": market, "query": query}
headers = {"User-agent": "Mozilla/5.0"}
response = requests.get(url, params=params, headers=headers)
if response.status_code == 200:
try:
obj = xmltodict.parse(response.content)
suggestions = obj['SearchSuggestion']['Section']['Item']
if isinstance(suggestions, list):
return [normalize_keyword(s['Text']) for s in suggestions]
elif isinstance(suggestions, dict):
return [normalize_keyword(suggestions['Text'])]
except Exception as e:
print(f"Error procesando la respuesta de Bing: {e}")
return []
else:
return []
# Funci贸n para obtener sugerencias de Amazon
def fetch_amazon_suggestions(query, market_id="ATVPDKIKX0DER", alias="aps"):
url = "https://completion.amazon.com/api/2017/suggestions"
params = {"mid": market_id, "alias": alias, "prefix": query}
response = requests.get(url, params=params)
if response.status_code == 200:
try:
data = response.json()
return [normalize_keyword(item['value']) for item in data.get('suggestions', [])]
except ValueError:
print("Error decodificando JSON de Amazon")
return []
else:
return []
# Funci贸n para expandir la palabra clave
def expand_keyword(keyword):
expanded_keywords = [keyword]
for letter in 'abcdefghijklmnopqrstuvwxyz*_':
expanded_keywords.append(keyword + " " + letter)
expanded_keywords.append(letter + " " + keyword)
return expanded_keywords
# Funci贸n para contar y obtener las top N sugerencias m谩s repetidas
def get_top_suggestions(suggestions, top_n=10):
suggestion_counter = Counter(suggestions)
return suggestion_counter.most_common(top_n)
# Funci贸n principal que muestra el top 10 combinado, top 3 de cada plataforma y una tabla completa
def main(keyword):
expanded_keywords = expand_keyword(keyword)
all_suggestions = {}
google_suggestions_all = []
duckduckgo_suggestions_all = []
youtube_suggestions_all = []
bing_suggestions_all = []
amazon_suggestions_all = []
qwant_suggestions_all = []
brave_suggestions_all = []
# Obtener sugerencias de Google con diferentes clients
google_clients = ["firefox", "chrome", "android", "desktop", "opera", "safari"]
google_combined = Counter() # Contador combinado para sumar todas las sugerencias de Google
for exp_keyword in expanded_keywords:
for client in google_clients:
google_suggestions = fetch_google_suggestions(exp_keyword, client=client)
google_suggestions_all.extend(google_suggestions)
google_combined.update(google_suggestions) # Sumar las repeticiones de cada cliente de Google
for suggestion in google_suggestions:
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()})
all_suggestions[suggestion]["sources"].add(f"Google ({client})")
# Al final de todas las consultas a Google, combinamos las repeticiones en una 煤nica fuente de Google
for suggestion, count in google_combined.items():
all_suggestions[suggestion]["count"] += count
# Obtener sugerencias de DuckDuckGo
for exp_keyword in expanded_keywords:
suggestions = fetch_duckduckgo_suggestions(exp_keyword)
duckduckgo_suggestions_all.extend(suggestions)
for suggestion in suggestions:
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()})
all_suggestions[suggestion]["count"] += 1
all_suggestions[suggestion]["sources"].add('DuckDuckGo')
# Obtener sugerencias de YouTube (API anterior y nueva)
for exp_keyword in expanded_keywords:
suggestions = fetch_youtube_suggestions(exp_keyword)
youtube_suggestions_all.extend(suggestions)
new_suggestions = fetch_youtube_suggestions_new(exp_keyword)
youtube_suggestions_all.extend(new_suggestions)
for suggestion in suggestions + new_suggestions:
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()})
all_suggestions[suggestion]["count"] += 1
all_suggestions[suggestion]["sources"].add('YouTube')
# Obtener sugerencias de Bing
for exp_keyword in expanded_keywords:
suggestions = fetch_bing_suggestions(exp_keyword)
bing_suggestions_all.extend(suggestions)
for suggestion in suggestions:
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()})
all_suggestions[suggestion]["count"] += 1
all_suggestions[suggestion]["sources"].add('Bing')
# Obtener sugerencias de Amazon
for exp_keyword in expanded_keywords:
suggestions = fetch_amazon_suggestions(exp_keyword)
amazon_suggestions_all.extend(suggestions)
for suggestion in suggestions:
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()})
all_suggestions[suggestion]["count"] += 1
all_suggestions[suggestion]["sources"].add('Amazon')
# Obtener sugerencias de Qwant
for exp_keyword in expanded_keywords:
suggestions = fetch_qwant_suggestions(exp_keyword)
qwant_suggestions_all.extend(suggestions)
for suggestion in suggestions:
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()})
all_suggestions[suggestion]["count"] += 1
all_suggestions[suggestion]["sources"].add('Qwant')
# Obtener sugerencias de Brave
for exp_keyword in expanded_keywords:
suggestions = fetch_brave_suggestions(exp_keyword)
brave_suggestions_all.extend(suggestions)
for suggestion in suggestions:
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()})
all_suggestions[suggestion]["count"] += 1
all_suggestions[suggestion]["sources"].add('Brave')
# Calcular relevancia combinada: basado en cu谩ntos buscadores mencionan una palabra y cu谩ntas veces se repite
combined_suggestions = sorted(all_suggestions.items(), key=lambda item: (len(item[1]["sources"]), item[1]["count"]), reverse=True)
# Obtener las top 3 de cada plataforma
google_top_3 = get_top_suggestions(google_suggestions_all, top_n=3)
duckduckgo_top_3 = get_top_suggestions(duckduckgo_suggestions_all, top_n=3)
youtube_top_3 = get_top_suggestions(youtube_suggestions_all, top_n=3)
bing_top_3 = get_top_suggestions(bing_suggestions_all, top_n=3)
amazon_top_3 = get_top_suggestions(amazon_suggestions_all, top_n=3)
qwant_top_3 = get_top_suggestions(qwant_suggestions_all, top_n=3)
brave_top_3 = [(sug, int(freq * 0.4)) for sug, freq in get_top_suggestions(brave_suggestions_all, top_n=3)]
# Crear una tabla con todas las sugerencias, su relevancia y el n煤mero de buscadores
all_suggestions_str = "<table><tr><th>Keyword</th><th>Buscadores</th><th>Relevancia (Total)</th></tr>"
for suggestion, data in combined_suggestions:
# Aplicar la reducci贸n solo si la sugerencia viene de Brave
if 'Brave' in data['sources']:
count = int(data['count'] * 0.4) # Reducir el conteo un 60% para Brave
else:
count = data['count']
all_suggestions_str += f"<tr><td>{suggestion}</td><td>{', '.join(data['sources'])}</td><td>{count}</td></tr>"
all_suggestions_str += "</table>"
# Crear el HTML de salida
html_output = f"""
<div>
<h3>Top 10 combinadas (basado en la cantidad de buscadores y repeticiones):</h3>
<ul>
{''.join([f'<li>{sug} (en {len(data["sources"])} buscadores, {data["count"]} repeticiones)</li>' for sug, data in combined_suggestions[:10]])}
</ul>
<h4>Top 3 Sugerencias de Google:</h4>
<ul>
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in google_top_3])}
</ul>
<h4>Top 3 Sugerencias de DuckDuckGo:</h4>
<ul>
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in duckduckgo_top_3])}
</ul>
<h4>Top 3 Sugerencias de YouTube:</h4>
<ul>
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in youtube_top_3])}
</ul>
<h4>Top 3 Sugerencias de Bing:</h4>
<ul>
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in bing_top_3])}
</ul>
<h4>Top 3 Sugerencias de Amazon:</h4>
<ul>
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in amazon_top_3])}
</ul>
<h4>Top 3 Sugerencias de Qwant:</h4>
<ul>
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in qwant_top_3])}
</ul>
<h4>Top 3 Sugerencias de Brave:</h4>
<ul>
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in brave_top_3])}
</ul>
<h4>Tabla completa de palabras clave y su relevancia:</h4>
{all_suggestions_str}
</div>
"""
return html_output
# Interfaz de Gradio
iface = gr.Interface(
fn=main,
inputs="text",
outputs=gr.HTML(),
title="Sugerencias Combinadas de M煤ltiples Motores de B煤squeda",
description="Ingrese una palabra clave para obtener sugerencias de b煤squeda relacionadas de Google (Firefox, Chrome, Android, Desktop, Opera, Safari), DuckDuckGo, YouTube, Bing, Amazon, Qwant y Brave.",
)
iface.launch()