tx3bas commited on
Commit
9863bd9
·
verified ·
1 Parent(s): 916ba86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -142
app.py CHANGED
@@ -3,45 +3,7 @@ import requests
3
  import urllib.parse
4
  import re
5
  import xmltodict
6
- from itertools import product, combinations
7
- import nltk
8
- from nltk.corpus import stopwords
9
- from nltk.stem import SnowballStemmer
10
- from collections import Counter
11
- import concurrent.futures
12
- import ssl
13
- import os
14
-
15
- # Configurar SSL para la descarga de NLTK
16
- try:
17
- _create_unverified_https_context = ssl._create_unverified_context
18
- except AttributeError:
19
- pass
20
- else:
21
- ssl._create_default_https_context = _create_unverified_https_context
22
-
23
- # Función para descargar recursos de NLTK
24
- def download_nltk_resources():
25
- resources = ['punkt', 'stopwords', 'words']
26
- for resource in resources:
27
- try:
28
- nltk.data.find(f'tokenizers/{resource}')
29
- except LookupError:
30
- print(f"Descargando {resource}...")
31
- nltk.download(resource, quiet=True)
32
-
33
- # Descargar recursos de NLTK
34
- download_nltk_resources()
35
-
36
- # Configurar el directorio de datos de NLTK
37
- nltk_data_dir = '/tmp/nltk_data'
38
- os.makedirs(nltk_data_dir, exist_ok=True)
39
- nltk.data.path.append(nltk_data_dir)
40
-
41
- # Inicializar el stemmer y la lista de stopwords
42
- stemmer = SnowballStemmer("spanish")
43
- stop_words = set(stopwords.words('spanish'))
44
- english_words = set(nltk.corpus.words.words())
45
 
46
  # Función para obtener sugerencias de DuckDuckGo
47
  def fetch_duckduckgo_suggestions(query, lang_code="es"):
@@ -95,7 +57,7 @@ def fetch_youtube_suggestions(query, lang_code="es"):
95
  return []
96
 
97
  # Función para obtener sugerencias de Bing
98
- def fetch_bing_suggestions(query, market="es-ES"):
99
  url = "https://api.bing.com/qsml.aspx"
100
  params = {
101
  "Market": market,
@@ -125,7 +87,7 @@ def fetch_bing_suggestions(query, market="es-ES"):
125
  return []
126
 
127
  # Función para obtener sugerencias de Amazon
128
- def fetch_amazon_suggestions(query, market_id="A1F83G8C2ARO7P", alias="aps"):
129
  url = "https://completion.amazon.com/api/2017/suggestions"
130
  params = {
131
  "mid": market_id,
@@ -144,115 +106,160 @@ def fetch_amazon_suggestions(query, market_id="A1F83G8C2ARO7P", alias="aps"):
144
  else:
145
  return []
146
 
147
- # Función para expandir la palabra clave
148
- def expand_keyword(keyword):
149
- # Tokenizar la palabra clave
150
- tokens = nltk.word_tokenize(keyword.lower())
151
-
152
- # Eliminar stopwords y aplicar stemming
153
- tokens = [stemmer.stem(token) for token in tokens if token not in stop_words]
154
-
155
- # Generar variaciones
156
- variations = []
157
- for i in range(1, len(tokens) + 1):
158
- variations.extend(list(combinations(tokens, i)))
159
-
160
- # Convertir tuplas a strings
161
- expanded_keywords = [" ".join(variation) for variation in variations]
162
-
163
- # Añadir variaciones con prefijos y sufijos
164
- prefixes = ['como', 'que', 'donde', 'cuando', 'por que', 'cual', 'mejor', 'peor', 'top']
165
- suffixes = ['gratis', 'online', 'pdf', 'precios', 'opiniones', 'review', 'tutorial', 'curso']
166
-
167
- for prefix in prefixes:
168
- expanded_keywords.append(f"{prefix} {keyword}")
169
-
170
- for suffix in suffixes:
171
- expanded_keywords.append(f"{keyword} {suffix}")
172
-
173
- # Añadir plurales y singulares (simplificado)
174
- expanded_keywords.extend([f"{kw}s" for kw in expanded_keywords])
175
- expanded_keywords.extend([kw[:-1] if kw.endswith('s') else kw for kw in expanded_keywords])
176
-
177
- # Añadir variaciones con números
178
- expanded_keywords.extend([f"{kw} {i}" for kw in expanded_keywords for i in range(1, 11)])
179
-
180
- # Eliminar duplicados y la palabra clave original vacía
181
- expanded_keywords = list(set(expanded_keywords) - {''})
182
-
183
  return expanded_keywords
184
 
185
- # Función para obtener sugerencias de todas las plataformas
186
- def fetch_all_suggestions(keyword):
187
- platforms = {
188
- 'Google': fetch_google_suggestions,
189
- 'DuckDuckGo': fetch_duckduckgo_suggestions,
190
- 'YouTube': fetch_youtube_suggestions,
191
- 'Bing': fetch_bing_suggestions,
192
- 'Amazon': fetch_amazon_suggestions
193
- }
194
-
195
- all_suggestions = []
196
-
197
- with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
198
- future_to_platform = {executor.submit(func, keyword): platform for platform, func in platforms.items()}
199
- for future in concurrent.futures.as_completed(future_to_platform):
200
- platform = future_to_platform[future]
201
- try:
202
- suggestions = future.result()
203
- all_suggestions.extend([(suggestion, platform) for suggestion in suggestions])
204
- except Exception as exc:
205
- print(f'{platform} generated an exception: {exc}')
206
-
207
- return all_suggestions
208
 
209
  # Función principal
210
  def main(keyword):
211
  expanded_keywords = expand_keyword(keyword)
212
- all_suggestions = []
213
-
214
- with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
215
- future_to_keyword = {executor.submit(fetch_all_suggestions, kw): kw for kw in expanded_keywords}
216
- for future in concurrent.futures.as_completed(future_to_keyword):
217
- kw = future_to_keyword[future]
218
- try:
219
- suggestions = future.result()
220
- all_suggestions.extend(suggestions)
221
- except Exception as exc:
222
- print(f'{kw} generated an exception: {exc}')
223
-
224
- # Contar las sugerencias y sus plataformas
225
- suggestion_counter = Counter(sugg for sugg, _ in all_suggestions)
226
- platform_counter = Counter(platform for _, platform in all_suggestions)
227
-
228
- # Ordenar las sugerencias por frecuencia
229
- sorted_suggestions = sorted(suggestion_counter.items(), key=lambda x: x[1], reverse=True)
230
-
231
- # Crear el HTML de salida
232
- html_output = "<h3>Todas las sugerencias ordenadas por relevancia:</h3><ul>"
233
- for suggestion, count in sorted_suggestions:
234
- platforms = [platform for sugg, platform in all_suggestions if sugg == suggestion]
235
- platforms_str = ", ".join(set(platforms))
236
- relevance_score = count * len(set(platforms))
237
- html_output += f"<li><strong>{suggestion}</strong> (Frecuencia: {count}, Plataformas: {platforms_str}, Puntuación de relevancia: {relevance_score})</li>"
238
- html_output += "</ul>"
239
-
240
- # Añadir estadísticas por plataforma
241
- html_output += "<h3>Estadísticas por plataforma:</h3><ul>"
242
- for platform, count in platform_counter.most_common():
243
- html_output += f"<li>{platform}: {count} sugerencias</li>"
244
- html_output += "</ul>"
245
-
246
- # Añadir botón para copiar todas las sugerencias
247
- all_suggestions_text = ", ".join(sugg for sugg, _ in sorted_suggestions)
248
- html_output += f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  <div>
250
- <h3>Copiar todas las sugerencias:</h3>
251
  <button class="lg secondary svelte-cmf5ev" style="font-size: small; padding: 2px; color: #808080ba; border: none; margin-left: 5px;"
252
- onclick='navigator.clipboard.writeText(`{all_suggestions_text}`).then(() => alert("Todas las sugerencias copiadas al portapapeles"))'>&nbsp;Copiar todas las sugerencias&nbsp;</button>
253
  </div>
 
 
 
254
  """
255
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  return html_output
257
 
258
  # Interfaz de Gradio
@@ -261,8 +268,8 @@ iface = gr.Interface(
261
  inputs="text",
262
  outputs="html",
263
  title="<div style='margin:0 auto;text-align:center'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/search.jpg'><p>Sugerencias Combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon</p></div>",
264
- description="<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Ingrese una palabra clave para obtener sugerencias de búsqueda relacionadas de Google, DuckDuckGo, YouTube, Bing y Amazon. Se mostrarán todas las sugerencias ordenadas por relevancia.</p>",
265
  article="<div style='margin-top:10px'><p style='text-align: center !important; background: #ffffff; padding: 5px 30px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;' href='https://artxeweb.com'>© Artxe Web</a></p></div>"
266
  )
267
 
268
- iface.launch()
 
3
  import urllib.parse
4
  import re
5
  import xmltodict
6
+ import inflect
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  # Función para obtener sugerencias de DuckDuckGo
9
  def fetch_duckduckgo_suggestions(query, lang_code="es"):
 
57
  return []
58
 
59
  # Función para obtener sugerencias de Bing
60
+ def fetch_bing_suggestions(query, market="en-US"):
61
  url = "https://api.bing.com/qsml.aspx"
62
  params = {
63
  "Market": market,
 
87
  return []
88
 
89
  # Función para obtener sugerencias de Amazon
90
+ def fetch_amazon_suggestions(query, market_id="ATVPDKIKX0DER", alias="aps"):
91
  url = "https://completion.amazon.com/api/2017/suggestions"
92
  params = {
93
  "mid": market_id,
 
106
  else:
107
  return []
108
 
109
+ # Función para generar plurales y variaciones
110
+ def generate_plural_variations(keyword):
111
+ p = inflect.engine()
112
+ singular = keyword
113
+ plural = p.plural(keyword)
114
+ return [singular, plural]
115
+
116
+ # Función para expandir la palabra clave con stopwords y conjunciones
117
+ STOP_WORDS = ["de", "en", "por", "para", "con", "sin", "y", "o"]
118
+
119
+ def expand_keyword_with_stopwords(keyword):
120
+ expanded_keywords = [keyword]
121
+ for word in STOP_WORDS:
122
+ expanded_keywords.append(f"{keyword} {word}")
123
+ expanded_keywords.append(f"{word} {keyword}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  return expanded_keywords
125
 
126
+ # Función para expandir las palabras clave
127
+ def expand_keyword(keyword):
128
+ expanded_keywords = expand_keyword_with_stopwords(keyword)
129
+ expanded_keywords += generate_plural_variations(keyword)
130
+ for letter in 'abcdefghijklmnopqrstuvwxyz*_':
131
+ expanded_keywords.append(keyword + " " + letter)
132
+ expanded_keywords.append(letter + " " + keyword)
133
+ return expanded_keywords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  # Función principal
136
  def main(keyword):
137
  expanded_keywords = expand_keyword(keyword)
138
+ all_suggestions = {}
139
+ google_suggestions_all = []
140
+ duckduckgo_suggestions_all = []
141
+ youtube_suggestions_all = []
142
+ bing_suggestions_all = []
143
+ amazon_suggestions_all = []
144
+
145
+ # Obtener sugerencias de cada fuente
146
+ for exp_keyword in expanded_keywords:
147
+ suggestions = fetch_duckduckgo_suggestions(exp_keyword)
148
+ duckduckgo_suggestions_all.extend(suggestions)
149
+ for suggestion in suggestions:
150
+ if suggestion in all_suggestions:
151
+ all_suggestions[suggestion] += 1
152
+ else:
153
+ all_suggestions[suggestion] = 1
154
+
155
+ suggestions = fetch_google_suggestions(exp_keyword)
156
+ google_suggestions_all.extend(suggestions)
157
+ for suggestion in suggestions:
158
+ if suggestion in all_suggestions:
159
+ all_suggestions[suggestion] += 1
160
+ else:
161
+ all_suggestions[suggestion] = 1
162
+
163
+ suggestions = fetch_youtube_suggestions(exp_keyword)
164
+ youtube_suggestions_all.extend(suggestions)
165
+ for suggestion in suggestions:
166
+ if suggestion in all_suggestions:
167
+ all_suggestions[suggestion] += 1
168
+ else:
169
+ all_suggestions[suggestion] = 1
170
+
171
+ suggestions = fetch_bing_suggestions(exp_keyword)
172
+ bing_suggestions_all.extend(suggestions)
173
+ for suggestion in suggestions:
174
+ if suggestion in all_suggestions:
175
+ all_suggestions[suggestion] += 1
176
+ else:
177
+ all_suggestions[suggestion] = 1
178
+
179
+ suggestions = fetch_amazon_suggestions(exp_keyword)
180
+ amazon_suggestions_all.extend(suggestions)
181
+ for suggestion in suggestions:
182
+ if suggestion in all_suggestions:
183
+ all_suggestions[suggestion] += 1
184
+ else:
185
+ all_suggestions[suggestion] = 1
186
+
187
+ # Filtrar las top 10 sugerencias de cada plataforma
188
+ google_top_10 = list(set(google_suggestions_all))[:10]
189
+ duckduckgo_top_10 = list(set(duckduckgo_suggestions_all))[:10]
190
+ youtube_top_10 = list(set(youtube_suggestions_all))[:10]
191
+ bing_top_10 = list(set(bing_suggestions_all))[:10]
192
+ amazon_top_10 = list(set(amazon_suggestions_all))[:10]
193
+
194
+ # Ordenar y filtrar las sugerencias combinadas
195
+ sorted_suggestions = sorted(all_suggestions.items(), key=lambda item: item[1], reverse=True)
196
+ combined_top_10_suggestions = [sug for sug, freq in sorted_suggestions if freq >= 2][:10]
197
+ suggestions_str = ", ".join(combined_top_10_suggestions)
198
+
199
+ # Crear la lista de todas las palabras clave con su número de repeticiones
200
+ all_suggestions_str = "<ul>"
201
+ for suggestion, freq in sorted_suggestions:
202
+ all_suggestions_str += f"<li>{suggestion} - {freq} repeticiones</li>"
203
+ all_suggestions_str += "</ul>"
204
+
205
+ # Crear el HTML de salida con un botón de copia
206
+ html_output = f"""
207
  <div>
208
+ <b>Sugerencias combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon (Top 10 combinadas):</b> <span id='suggestions_text'>{suggestions_str}</span>
209
  <button class="lg secondary svelte-cmf5ev" style="font-size: small; padding: 2px; color: #808080ba; border: none; margin-left: 5px;"
210
+ onclick='navigator.clipboard.writeText(document.getElementById("suggestions_text").innerText).then(() => alert("Texto copiado al portapapeles"))'>&nbsp;&nbsp;</button>
211
  </div>
212
+
213
+ <h4>Top 10 Sugerencias de Google:</h4>
214
+ <ul>
215
  """
216
+ for suggestion in google_top_10:
217
+ freq = all_suggestions[suggestion]
218
+ html_output += f"<li>{suggestion} ({freq})</li>"
219
+ html_output += "</ul>"
220
+
221
+ html_output += """
222
+ <h4>Top 10 Sugerencias de DuckDuckGo:</h4>
223
+ <ul>
224
+ """
225
+ for suggestion in duckduckgo_top_10:
226
+ freq = all_suggestions[suggestion]
227
+ html_output += f"<li>{suggestion} ({freq})</li>"
228
+ html_output += "</ul>"
229
+
230
+ html_output += """
231
+ <h4>Top 10 Sugerencias de YouTube:</h4>
232
+ <ul>
233
+ """
234
+ for suggestion in youtube_top_10:
235
+ freq = all_suggestions[suggestion]
236
+ html_output += f"<li>{suggestion} ({freq})</li>"
237
+ html_output += "</ul>"
238
+
239
+ html_output += """
240
+ <h4>Top 10 Sugerencias de Bing:</h4>
241
+ <ul>
242
+ """
243
+ for suggestion in bing_top_10:
244
+ freq = all_suggestions[suggestion]
245
+ html_output += f"<li>{suggestion} ({freq})</li>"
246
+ html_output += "</ul>"
247
+
248
+ html_output += """
249
+ <h4>Top 10 Sugerencias de Amazon:</h4>
250
+ <ul>
251
+ """
252
+ for suggestion in amazon_top_10:
253
+ freq = all_suggestions[suggestion]
254
+ html_output += f"<li>{suggestion} ({freq})</li>"
255
+ html_output += "</ul>"
256
+
257
+ # Agregar la lista completa de todas las palabras clave
258
+ html_output += """
259
+ <h4>Lista completa de palabras clave con su número de repeticiones:</h4>
260
+ """
261
+ html_output += all_suggestions_str
262
+
263
  return html_output
264
 
265
  # Interfaz de Gradio
 
268
  inputs="text",
269
  outputs="html",
270
  title="<div style='margin:0 auto;text-align:center'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/search.jpg'><p>Sugerencias Combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon</p></div>",
271
+ description="<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Ingrese una palabra clave para obtener sugerencias de búsqueda relacionadas de Google, DuckDuckGo, YouTube, Bing y Amazon. Se mostrarán las 10 primeras sugerencias combinadas y también las 10 principales de cada plataforma por separado.</p>",
272
  article="<div style='margin-top:10px'><p style='text-align: center !important; background: #ffffff; padding: 5px 30px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;' href='https://artxeweb.com'>© Artxe Web</a></p></div>"
273
  )
274
 
275
+ iface.launch()