tx3bas commited on
Commit
ce5ecf0
·
verified ·
1 Parent(s): aaffc09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -117
app.py CHANGED
@@ -3,6 +3,18 @@ import requests
3
  import urllib.parse
4
  import re
5
  import xmltodict
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Función para obtener sugerencias de DuckDuckGo
8
  def fetch_duckduckgo_suggestions(query, lang_code="es"):
@@ -41,11 +53,9 @@ def fetch_youtube_suggestions(query, lang_code="es"):
41
 
42
  if response.status_code == 200:
43
  try:
44
- # Extraer las sugerencias del formato window.google.ac.h(["..."])
45
  match = re.search(r'window\.google\.ac\.h\(\["[^"]*",\[(.*?)\],', response.text)
46
  if match:
47
  suggestions_data = match.group(1)
48
- # Extraemos las sugerencias
49
  suggestions = re.findall(r'\["([^"]+)"', suggestions_data)
50
  return suggestions
51
  else:
@@ -58,7 +68,7 @@ def fetch_youtube_suggestions(query, lang_code="es"):
58
  return []
59
 
60
  # Función para obtener sugerencias de Bing
61
- def fetch_bing_suggestions(query, market="en-US"):
62
  url = "https://api.bing.com/qsml.aspx"
63
  params = {
64
  "Market": market,
@@ -88,7 +98,7 @@ def fetch_bing_suggestions(query, market="en-US"):
88
  return []
89
 
90
  # Función para obtener sugerencias de Amazon
91
- def fetch_amazon_suggestions(query, market_id="ATVPDKIKX0DER", alias="aps"):
92
  url = "https://completion.amazon.com/api/2017/suggestions"
93
  params = {
94
  "mid": market_id,
@@ -100,7 +110,6 @@ def fetch_amazon_suggestions(query, market_id="ATVPDKIKX0DER", alias="aps"):
100
  if response.status_code == 200:
101
  try:
102
  data = response.json()
103
- # Extraemos las sugerencias desde el JSON bajo la clave 'suggestions'
104
  return [item['value'] for item in data.get('suggestions', [])]
105
  except ValueError:
106
  print("Error decodificando JSON de Amazon")
@@ -110,83 +119,71 @@ def fetch_amazon_suggestions(query, market_id="ATVPDKIKX0DER", alias="aps"):
110
 
111
  # Función para expandir la palabra clave
112
  def expand_keyword(keyword):
113
- expanded_keywords = [keyword]
114
- for letter in 'abcdefghijklmnopqrstuvwxyz*_':
115
- expanded_keywords.append(keyword + " " + letter)
116
- expanded_keywords.append(letter + " " + keyword)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  return expanded_keywords
118
 
119
  # Función principal
120
  def main(keyword):
121
  expanded_keywords = expand_keyword(keyword)
122
  all_suggestions = {}
123
- google_suggestions_all = []
124
- duckduckgo_suggestions_all = []
125
- youtube_suggestions_all = []
126
- bing_suggestions_all = []
127
- amazon_suggestions_all = []
128
-
129
- # Obtener sugerencias de DuckDuckGo
130
- for exp_keyword in expanded_keywords:
131
- suggestions = fetch_duckduckgo_suggestions(exp_keyword)
132
- duckduckgo_suggestions_all.extend(suggestions) # Agregar todas las sugerencias
133
- for suggestion in suggestions:
134
- if suggestion in all_suggestions:
135
- all_suggestions[suggestion] += 1
136
- else:
137
- all_suggestions[suggestion] = 1
138
-
139
- # Obtener sugerencias de Google
140
- for exp_keyword in expanded_keywords:
141
- suggestions = fetch_google_suggestions(exp_keyword)
142
- google_suggestions_all.extend(suggestions) # Agregar todas las sugerencias
143
- for suggestion in suggestions:
144
- if suggestion in all_suggestions:
145
- all_suggestions[suggestion] += 1
146
- else:
147
- all_suggestions[suggestion] = 1
148
-
149
- # Obtener sugerencias de YouTube
150
- for exp_keyword in expanded_keywords:
151
- suggestions = fetch_youtube_suggestions(exp_keyword)
152
- youtube_suggestions_all.extend(suggestions) # Agregar todas las sugerencias
153
- for suggestion in suggestions:
154
- if suggestion in all_suggestions:
155
- all_suggestions[suggestion] += 1
156
- else:
157
- all_suggestions[suggestion] = 1
158
-
159
- # Obtener sugerencias de Bing
160
- for exp_keyword in expanded_keywords:
161
- suggestions = fetch_bing_suggestions(exp_keyword)
162
- bing_suggestions_all.extend(suggestions)
163
- for suggestion in suggestions:
164
- if suggestion in all_suggestions:
165
- all_suggestions[suggestion] += 1
166
- else:
167
- all_suggestions[suggestion] = 1
168
 
169
- # Obtener sugerencias de Amazon
170
  for exp_keyword in expanded_keywords:
171
- suggestions = fetch_amazon_suggestions(exp_keyword)
172
- amazon_suggestions_all.extend(suggestions)
173
- for suggestion in suggestions:
174
- if suggestion in all_suggestions:
175
- all_suggestions[suggestion] += 1
176
- else:
177
- all_suggestions[suggestion] = 1
178
-
179
- # Filtrar las top 10 de cada plataforma con su número de repeticiones
180
- google_top_10 = list(set(google_suggestions_all))[:10]
181
- duckduckgo_top_10 = list(set(duckduckgo_suggestions_all))[:10]
182
- youtube_top_10 = list(set(youtube_suggestions_all))[:10]
183
- bing_top_10 = list(set(bing_suggestions_all))[:10]
184
- amazon_top_10 = list(set(amazon_suggestions_all))[:10]
185
 
186
  # Ordenar y filtrar las sugerencias más frecuentes combinadas
187
  sorted_suggestions = sorted(all_suggestions.items(), key=lambda item: item[1], reverse=True)
188
- combined_top_10_suggestions = [sug for sug, freq in sorted_suggestions if freq >= 2][:10]
189
- suggestions_str = ", ".join(combined_top_10_suggestions)
190
 
191
  # Crear la lista de todas las palabras clave con su número de repeticiones
192
  all_suggestions_str = "<ul>"
@@ -194,57 +191,25 @@ def main(keyword):
194
  all_suggestions_str += f"<li>{suggestion} - {freq} repeticiones</li>"
195
  all_suggestions_str += "</ul>"
196
 
197
- # Crear el HTML de salida con un botón de copia
198
  html_output = f"""
199
  <div>
200
- <b>Sugerencias combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon (Top 10 combinadas):</b> <span id='suggestions_text'>{suggestions_str}</span>
201
  <button class="lg secondary svelte-cmf5ev" style="font-size: small; padding: 2px; color: #808080ba; border: none; margin-left: 5px;"
202
  onclick='navigator.clipboard.writeText(document.getElementById("suggestions_text").innerText).then(() => alert("Texto copiado al portapapeles"))'>&nbsp;✂&nbsp;</button>
203
  </div>
204
-
205
- <h4>Top 10 Sugerencias de Google:</h4>
206
- <ul>
207
  """
208
- for suggestion in google_top_10:
209
- freq = all_suggestions[suggestion]
210
- html_output += f"<li>{suggestion} ({freq})</li>"
211
- html_output += "</ul>"
212
 
213
- html_output += """
214
- <h4>Top 10 Sugerencias de DuckDuckGo:</h4>
215
- <ul>
216
- """
217
- for suggestion in duckduckgo_top_10:
218
- freq = all_suggestions[suggestion]
219
- html_output += f"<li>{suggestion} ({freq})</li>"
220
- html_output += "</ul>"
221
-
222
- html_output += """
223
- <h4>Top 10 Sugerencias de YouTube:</h4>
224
- <ul>
225
- """
226
- for suggestion in youtube_top_10:
227
- freq = all_suggestions[suggestion]
228
- html_output += f"<li>{suggestion} ({freq})</li>"
229
- html_output += "</ul>"
230
-
231
- html_output += """
232
- <h4>Top 10 Sugerencias de Bing:</h4>
233
- <ul>
234
- """
235
- for suggestion in bing_top_10:
236
- freq = all_suggestions[suggestion]
237
- html_output += f"<li>{suggestion} ({freq})</li>"
238
- html_output += "</ul>"
239
-
240
- html_output += """
241
- <h4>Top 10 Sugerencias de Amazon:</h4>
242
- <ul>
243
- """
244
- for suggestion in amazon_top_10:
245
- freq = all_suggestions[suggestion]
246
- html_output += f"<li>{suggestion} ({freq})</li>"
247
- html_output += "</ul>"
248
 
249
  # Agregar la lista completa de todas las palabras clave
250
  html_output += """
@@ -260,8 +225,8 @@ iface = gr.Interface(
260
  inputs="text",
261
  outputs="html",
262
  title="<div style='margin:0 auto;text-align:center'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/search.jpg'><p>Sugerencias Combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon</p></div>",
263
- description="<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Ingrese una palabra clave para obtener sugerencias de búsqueda relacionadas de Google, DuckDuckGo, YouTube, Bing y Amazon. Se mostrarán las 10 primeras sugerencias combinadas y también las 10 principales de cada plataforma por separado.</p>",
264
  article="<div style='margin-top:10px'><p style='text-align: center !important; background: #ffffff; padding: 5px 30px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;' href='https://artxeweb.com'>© Artxe Web</a></p></div>"
265
  )
266
 
267
- iface.launch()
 
3
  import urllib.parse
4
  import re
5
  import xmltodict
6
+ from itertools import product
7
+ import nltk
8
+ from nltk.corpus import stopwords
9
+ from nltk.stem import SnowballStemmer
10
+
11
+ # Descargar recursos de NLTK
12
+ nltk.download('stopwords', quiet=True)
13
+ nltk.download('punkt', quiet=True)
14
+
15
+ # Inicializar el stemmer y la lista de stopwords
16
+ stemmer = SnowballStemmer("spanish")
17
+ stop_words = set(stopwords.words('spanish'))
18
 
19
  # Función para obtener sugerencias de DuckDuckGo
20
  def fetch_duckduckgo_suggestions(query, lang_code="es"):
 
53
 
54
  if response.status_code == 200:
55
  try:
 
56
  match = re.search(r'window\.google\.ac\.h\(\["[^"]*",\[(.*?)\],', response.text)
57
  if match:
58
  suggestions_data = match.group(1)
 
59
  suggestions = re.findall(r'\["([^"]+)"', suggestions_data)
60
  return suggestions
61
  else:
 
68
  return []
69
 
70
  # Función para obtener sugerencias de Bing
71
+ def fetch_bing_suggestions(query, market="es-ES"):
72
  url = "https://api.bing.com/qsml.aspx"
73
  params = {
74
  "Market": market,
 
98
  return []
99
 
100
  # Función para obtener sugerencias de Amazon
101
+ def fetch_amazon_suggestions(query, market_id="A1F83G8C2ARO7P", alias="aps"):
102
  url = "https://completion.amazon.com/api/2017/suggestions"
103
  params = {
104
  "mid": market_id,
 
110
  if response.status_code == 200:
111
  try:
112
  data = response.json()
 
113
  return [item['value'] for item in data.get('suggestions', [])]
114
  except ValueError:
115
  print("Error decodificando JSON de Amazon")
 
119
 
120
  # Función para expandir la palabra clave
121
  def expand_keyword(keyword):
122
+ # Tokenizar la palabra clave
123
+ tokens = nltk.word_tokenize(keyword.lower())
124
+
125
+ # Eliminar stopwords y aplicar stemming
126
+ tokens = [stemmer.stem(token) for token in tokens if token not in stop_words]
127
+
128
+ # Generar variaciones
129
+ variations = []
130
+ for i in range(1, len(tokens) + 1):
131
+ variations.extend(list(product(tokens, repeat=i)))
132
+
133
+ # Convertir tuplas a strings
134
+ expanded_keywords = [" ".join(variation) for variation in variations]
135
+
136
+ # Añadir variaciones con prefijos y sufijos
137
+ prefixes = ['como', 'que', 'donde', 'cuando', 'por que', 'cual']
138
+ suffixes = ['gratis', 'online', 'pdf', 'precios', 'opiniones']
139
+
140
+ for prefix in prefixes:
141
+ expanded_keywords.append(f"{prefix} {keyword}")
142
+
143
+ for suffix in suffixes:
144
+ expanded_keywords.append(f"{keyword} {suffix}")
145
+
146
+ # Añadir plurales (simplificado)
147
+ expanded_keywords.extend([f"{kw}s" for kw in expanded_keywords])
148
+
149
+ # Eliminar duplicados y la palabra clave original vacía
150
+ expanded_keywords = list(set(expanded_keywords) - {''})
151
+
152
  return expanded_keywords
153
 
154
  # Función principal
155
  def main(keyword):
156
  expanded_keywords = expand_keyword(keyword)
157
  all_suggestions = {}
158
+ platform_suggestions = {
159
+ 'Google': set(),
160
+ 'DuckDuckGo': set(),
161
+ 'YouTube': set(),
162
+ 'Bing': set(),
163
+ 'Amazon': set()
164
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
+ # Obtener sugerencias de todas las plataformas
167
  for exp_keyword in expanded_keywords:
168
+ for platform, fetch_func in [
169
+ ('Google', fetch_google_suggestions),
170
+ ('DuckDuckGo', fetch_duckduckgo_suggestions),
171
+ ('YouTube', fetch_youtube_suggestions),
172
+ ('Bing', fetch_bing_suggestions),
173
+ ('Amazon', fetch_amazon_suggestions)
174
+ ]:
175
+ suggestions = fetch_func(exp_keyword)
176
+ platform_suggestions[platform].update(suggestions)
177
+ for suggestion in suggestions:
178
+ if suggestion in all_suggestions:
179
+ all_suggestions[suggestion] += 1
180
+ else:
181
+ all_suggestions[suggestion] = 1
182
 
183
  # Ordenar y filtrar las sugerencias más frecuentes combinadas
184
  sorted_suggestions = sorted(all_suggestions.items(), key=lambda item: item[1], reverse=True)
185
+ combined_top_suggestions = [sug for sug, freq in sorted_suggestions if freq >= 2][:50] # Aumentado a top 50
186
+ suggestions_str = ", ".join(combined_top_suggestions)
187
 
188
  # Crear la lista de todas las palabras clave con su número de repeticiones
189
  all_suggestions_str = "<ul>"
 
191
  all_suggestions_str += f"<li>{suggestion} - {freq} repeticiones</li>"
192
  all_suggestions_str += "</ul>"
193
 
194
+ # Crear el HTML de salida
195
  html_output = f"""
196
  <div>
197
+ <b>Top 50 Sugerencias combinadas:</b> <span id='suggestions_text'>{suggestions_str}</span>
198
  <button class="lg secondary svelte-cmf5ev" style="font-size: small; padding: 2px; color: #808080ba; border: none; margin-left: 5px;"
199
  onclick='navigator.clipboard.writeText(document.getElementById("suggestions_text").innerText).then(() => alert("Texto copiado al portapapeles"))'>&nbsp;✂&nbsp;</button>
200
  </div>
 
 
 
201
  """
 
 
 
 
202
 
203
+ # Agregar las top sugerencias de cada plataforma
204
+ for platform, suggestions in platform_suggestions.items():
205
+ html_output += f"""
206
+ <h4>Top 20 Sugerencias de {platform}:</h4>
207
+ <ul>
208
+ """
209
+ for suggestion in list(suggestions)[:20]:
210
+ freq = all_suggestions[suggestion]
211
+ html_output += f"<li>{suggestion} ({freq})</li>"
212
+ html_output += "</ul>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  # Agregar la lista completa de todas las palabras clave
215
  html_output += """
 
225
  inputs="text",
226
  outputs="html",
227
  title="<div style='margin:0 auto;text-align:center'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/search.jpg'><p>Sugerencias Combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon</p></div>",
228
+ description="<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Ingrese una palabra clave para obtener sugerencias de búsqueda relacionadas de Google, DuckDuckGo, YouTube, Bing y Amazon. Se mostrarán las 50 primeras sugerencias combinadas y también las 20 principales de cada plataforma por separado.</p>",
229
  article="<div style='margin-top:10px'><p style='text-align: center !important; background: #ffffff; padding: 5px 30px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;' href='https://artxeweb.com'>© Artxe Web</a></p></div>"
230
  )
231
 
232
+ iface.launch()