tx3bas commited on
Commit
916ba86
·
verified ·
1 Parent(s): 2cb8fc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -68
app.py CHANGED
@@ -3,18 +3,45 @@ import requests
3
  import urllib.parse
4
  import re
5
  import xmltodict
6
- from itertools import product
7
  import nltk
8
  from nltk.corpus import stopwords
9
  from nltk.stem import SnowballStemmer
 
 
 
 
10
 
11
- # Descargar recursos necesarios de NLTK
12
- nltk.download('stopwords', quiet=True)
13
- nltk.download('punkt', quiet=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Inicializar el stemmer y la lista de stopwords
16
  stemmer = SnowballStemmer("spanish")
17
  stop_words = set(stopwords.words('spanish'))
 
18
 
19
  # Función para obtener sugerencias de DuckDuckGo
20
  def fetch_duckduckgo_suggestions(query, lang_code="es"):
@@ -128,14 +155,14 @@ def expand_keyword(keyword):
128
  # Generar variaciones
129
  variations = []
130
  for i in range(1, len(tokens) + 1):
131
- variations.extend(list(product(tokens, repeat=i)))
132
 
133
  # Convertir tuplas a strings
134
  expanded_keywords = [" ".join(variation) for variation in variations]
135
 
136
  # Añadir variaciones con prefijos y sufijos
137
- prefixes = ['como', 'que', 'donde', 'cuando', 'por que', 'cual']
138
- suffixes = ['gratis', 'online', 'pdf', 'precios', 'opiniones']
139
 
140
  for prefix in prefixes:
141
  expanded_keywords.append(f"{prefix} {keyword}")
@@ -143,80 +170,89 @@ def expand_keyword(keyword):
143
  for suffix in suffixes:
144
  expanded_keywords.append(f"{keyword} {suffix}")
145
 
146
- # Añadir plurales (simplificado)
147
  expanded_keywords.extend([f"{kw}s" for kw in expanded_keywords])
 
 
 
 
148
 
149
  # Eliminar duplicados y la palabra clave original vacía
150
  expanded_keywords = list(set(expanded_keywords) - {''})
151
 
152
  return expanded_keywords
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  # Función principal
155
  def main(keyword):
156
  expanded_keywords = expand_keyword(keyword)
157
- all_suggestions = {}
158
- platform_suggestions = {
159
- 'Google': set(),
160
- 'DuckDuckGo': set(),
161
- 'YouTube': set(),
162
- 'Bing': set(),
163
- 'Amazon': set()
164
- }
165
-
166
- # Obtener sugerencias de todas las plataformas
167
- for exp_keyword in expanded_keywords:
168
- for platform, fetch_func in [
169
- ('Google', fetch_google_suggestions),
170
- ('DuckDuckGo', fetch_duckduckgo_suggestions),
171
- ('YouTube', fetch_youtube_suggestions),
172
- ('Bing', fetch_bing_suggestions),
173
- ('Amazon', fetch_amazon_suggestions)
174
- ]:
175
- suggestions = fetch_func(exp_keyword)
176
- platform_suggestions[platform].update(suggestions)
177
- for suggestion in suggestions:
178
- if suggestion in all_suggestions:
179
- all_suggestions[suggestion] += 1
180
- else:
181
- all_suggestions[suggestion] = 1
182
-
183
- # Ordenar y filtrar las sugerencias más frecuentes combinadas
184
- sorted_suggestions = sorted(all_suggestions.items(), key=lambda item: item[1], reverse=True)
185
- combined_top_suggestions = [sug for sug, freq in sorted_suggestions if freq >= 2][:50] # Aumentado a top 50
186
- suggestions_str = ", ".join(combined_top_suggestions)
187
-
188
- # Crear la lista de todas las palabras clave con su número de repeticiones
189
- all_suggestions_str = "<ul>"
190
- for suggestion, freq in sorted_suggestions:
191
- all_suggestions_str += f"<li>{suggestion} - {freq} repeticiones</li>"
192
- all_suggestions_str += "</ul>"
193
-
194
  # Crear el HTML de salida
195
- html_output = f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  <div>
197
- <b>Top 50 Sugerencias combinadas:</b> <span id='suggestions_text'>{suggestions_str}</span>
198
  <button class="lg secondary svelte-cmf5ev" style="font-size: small; padding: 2px; color: #808080ba; border: none; margin-left: 5px;"
199
- onclick='navigator.clipboard.writeText(document.getElementById("suggestions_text").innerText).then(() => alert("Texto copiado al portapapeles"))'>&nbsp;✂&nbsp;</button>
200
  </div>
201
  """
202
-
203
- # Agregar las top sugerencias de cada plataforma
204
- for platform, suggestions in platform_suggestions.items():
205
- html_output += f"""
206
- <h4>Top 20 Sugerencias de {platform}:</h4>
207
- <ul>
208
- """
209
- for suggestion in list(suggestions)[:20]:
210
- freq = all_suggestions[suggestion]
211
- html_output += f"<li>{suggestion} ({freq})</li>"
212
- html_output += "</ul>"
213
-
214
- # Agregar la lista completa de todas las palabras clave
215
- html_output += """
216
- <h4>Lista completa de palabras clave con su número de repeticiones:</h4>
217
- """
218
- html_output += all_suggestions_str
219
-
220
  return html_output
221
 
222
  # Interfaz de Gradio
@@ -225,8 +261,8 @@ iface = gr.Interface(
225
  inputs="text",
226
  outputs="html",
227
  title="<div style='margin:0 auto;text-align:center'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/search.jpg'><p>Sugerencias Combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon</p></div>",
228
- description="<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Ingrese una palabra clave para obtener sugerencias de búsqueda relacionadas de Google, DuckDuckGo, YouTube, Bing y Amazon. Se mostrarán las 50 primeras sugerencias combinadas y también las 20 principales de cada plataforma por separado.</p>",
229
  article="<div style='margin-top:10px'><p style='text-align: center !important; background: #ffffff; padding: 5px 30px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;' href='https://artxeweb.com'>© Artxe Web</a></p></div>"
230
  )
231
 
232
- iface.launch()
 
3
  import urllib.parse
4
  import re
5
  import xmltodict
6
+ from itertools import product, combinations
7
  import nltk
8
  from nltk.corpus import stopwords
9
  from nltk.stem import SnowballStemmer
10
+ from collections import Counter
11
+ import concurrent.futures
12
+ import ssl
13
+ import os
14
 
15
+ # Configurar SSL para la descarga de NLTK
16
+ try:
17
+ _create_unverified_https_context = ssl._create_unverified_context
18
+ except AttributeError:
19
+ pass
20
+ else:
21
+ ssl._create_default_https_context = _create_unverified_https_context
22
+
23
+ # Función para descargar recursos de NLTK
24
+ def download_nltk_resources():
25
+ resources = ['punkt', 'stopwords', 'words']
26
+ for resource in resources:
27
+ try:
28
+ nltk.data.find(f'tokenizers/{resource}')
29
+ except LookupError:
30
+ print(f"Descargando {resource}...")
31
+ nltk.download(resource, quiet=True)
32
+
33
+ # Descargar recursos de NLTK
34
+ download_nltk_resources()
35
+
36
+ # Configurar el directorio de datos de NLTK
37
+ nltk_data_dir = '/tmp/nltk_data'
38
+ os.makedirs(nltk_data_dir, exist_ok=True)
39
+ nltk.data.path.append(nltk_data_dir)
40
 
41
  # Inicializar el stemmer y la lista de stopwords
42
  stemmer = SnowballStemmer("spanish")
43
  stop_words = set(stopwords.words('spanish'))
44
+ english_words = set(nltk.corpus.words.words())
45
 
46
  # Función para obtener sugerencias de DuckDuckGo
47
  def fetch_duckduckgo_suggestions(query, lang_code="es"):
 
155
  # Generar variaciones
156
  variations = []
157
  for i in range(1, len(tokens) + 1):
158
+ variations.extend(list(combinations(tokens, i)))
159
 
160
  # Convertir tuplas a strings
161
  expanded_keywords = [" ".join(variation) for variation in variations]
162
 
163
  # Añadir variaciones con prefijos y sufijos
164
+ prefixes = ['como', 'que', 'donde', 'cuando', 'por que', 'cual', 'mejor', 'peor', 'top']
165
+ suffixes = ['gratis', 'online', 'pdf', 'precios', 'opiniones', 'review', 'tutorial', 'curso']
166
 
167
  for prefix in prefixes:
168
  expanded_keywords.append(f"{prefix} {keyword}")
 
170
  for suffix in suffixes:
171
  expanded_keywords.append(f"{keyword} {suffix}")
172
 
173
+ # Añadir plurales y singulares (simplificado)
174
  expanded_keywords.extend([f"{kw}s" for kw in expanded_keywords])
175
+ expanded_keywords.extend([kw[:-1] if kw.endswith('s') else kw for kw in expanded_keywords])
176
+
177
+ # Añadir variaciones con números
178
+ expanded_keywords.extend([f"{kw} {i}" for kw in expanded_keywords for i in range(1, 11)])
179
 
180
  # Eliminar duplicados y la palabra clave original vacía
181
  expanded_keywords = list(set(expanded_keywords) - {''})
182
 
183
  return expanded_keywords
184
 
185
+ # Función para obtener sugerencias de todas las plataformas
186
+ def fetch_all_suggestions(keyword):
187
+ platforms = {
188
+ 'Google': fetch_google_suggestions,
189
+ 'DuckDuckGo': fetch_duckduckgo_suggestions,
190
+ 'YouTube': fetch_youtube_suggestions,
191
+ 'Bing': fetch_bing_suggestions,
192
+ 'Amazon': fetch_amazon_suggestions
193
+ }
194
+
195
+ all_suggestions = []
196
+
197
+ with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
198
+ future_to_platform = {executor.submit(func, keyword): platform for platform, func in platforms.items()}
199
+ for future in concurrent.futures.as_completed(future_to_platform):
200
+ platform = future_to_platform[future]
201
+ try:
202
+ suggestions = future.result()
203
+ all_suggestions.extend([(suggestion, platform) for suggestion in suggestions])
204
+ except Exception as exc:
205
+ print(f'{platform} generated an exception: {exc}')
206
+
207
+ return all_suggestions
208
+
209
  # Función principal
210
  def main(keyword):
211
  expanded_keywords = expand_keyword(keyword)
212
+ all_suggestions = []
213
+
214
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
215
+ future_to_keyword = {executor.submit(fetch_all_suggestions, kw): kw for kw in expanded_keywords}
216
+ for future in concurrent.futures.as_completed(future_to_keyword):
217
+ kw = future_to_keyword[future]
218
+ try:
219
+ suggestions = future.result()
220
+ all_suggestions.extend(suggestions)
221
+ except Exception as exc:
222
+ print(f'{kw} generated an exception: {exc}')
223
+
224
+ # Contar las sugerencias y sus plataformas
225
+ suggestion_counter = Counter(sugg for sugg, _ in all_suggestions)
226
+ platform_counter = Counter(platform for _, platform in all_suggestions)
227
+
228
+ # Ordenar las sugerencias por frecuencia
229
+ sorted_suggestions = sorted(suggestion_counter.items(), key=lambda x: x[1], reverse=True)
230
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  # Crear el HTML de salida
232
+ html_output = "<h3>Todas las sugerencias ordenadas por relevancia:</h3><ul>"
233
+ for suggestion, count in sorted_suggestions:
234
+ platforms = [platform for sugg, platform in all_suggestions if sugg == suggestion]
235
+ platforms_str = ", ".join(set(platforms))
236
+ relevance_score = count * len(set(platforms))
237
+ html_output += f"<li><strong>{suggestion}</strong> (Frecuencia: {count}, Plataformas: {platforms_str}, Puntuación de relevancia: {relevance_score})</li>"
238
+ html_output += "</ul>"
239
+
240
+ # Añadir estadísticas por plataforma
241
+ html_output += "<h3>Estadísticas por plataforma:</h3><ul>"
242
+ for platform, count in platform_counter.most_common():
243
+ html_output += f"<li>{platform}: {count} sugerencias</li>"
244
+ html_output += "</ul>"
245
+
246
+ # Añadir botón para copiar todas las sugerencias
247
+ all_suggestions_text = ", ".join(sugg for sugg, _ in sorted_suggestions)
248
+ html_output += f"""
249
  <div>
250
+ <h3>Copiar todas las sugerencias:</h3>
251
  <button class="lg secondary svelte-cmf5ev" style="font-size: small; padding: 2px; color: #808080ba; border: none; margin-left: 5px;"
252
+ onclick='navigator.clipboard.writeText(`{all_suggestions_text}`).then(() => alert("Todas las sugerencias copiadas al portapapeles"))'>&nbsp;Copiar todas las sugerencias&nbsp;</button>
253
  </div>
254
  """
255
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  return html_output
257
 
258
  # Interfaz de Gradio
 
261
  inputs="text",
262
  outputs="html",
263
  title="<div style='margin:0 auto;text-align:center'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/search.jpg'><p>Sugerencias Combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon</p></div>",
264
+ description="<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Ingrese una palabra clave para obtener sugerencias de búsqueda relacionadas de Google, DuckDuckGo, YouTube, Bing y Amazon. Se mostrarán todas las sugerencias ordenadas por relevancia.</p>",
265
  article="<div style='margin-top:10px'><p style='text-align: center !important; background: #ffffff; padding: 5px 30px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;' href='https://artxeweb.com'>© Artxe Web</a></p></div>"
266
  )
267
 
268
+ iface.launch()