tx3bas commited on
Commit
97e7c2c
·
verified ·
1 Parent(s): 13e4bf1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -208
app.py CHANGED
@@ -1,257 +1,161 @@
1
  import gradio as gr
2
- import requests
 
3
  import urllib.parse
4
  import re
5
  import xmltodict
6
 
7
- # Función para obtener sugerencias de DuckDuckGo
8
- def fetch_duckduckgo_suggestions(query, lang_code="es"):
9
  encoded_query = urllib.parse.quote(query)
10
  url = f"https://duckduckgo.com/ac/?q={encoded_query}&kl={lang_code}"
11
- response = requests.get(url)
12
- if response.status_code == 200:
13
- try:
14
- data = response.json()
15
- return [item['phrase'] for item in data]
16
- except ValueError:
17
- print("Error decodificando JSON de DuckDuckGo")
18
- return []
19
- else:
20
  return []
21
 
22
- # Función para obtener sugerencias de Google
23
- def fetch_google_suggestions(query, lang_code="es"):
24
  encoded_query = urllib.parse.quote(query)
25
  url = f"http://suggestqueries.google.com/complete/search?client=firefox&hl={lang_code}&q={encoded_query}"
26
- response = requests.get(url)
27
- if response.status_code == 200:
28
- try:
29
- return response.json()[1]
30
- except ValueError:
31
- print("Error decodificando JSON de Google")
32
- return []
33
- else:
34
  return []
35
 
36
- # Función para obtener sugerencias de YouTube
37
- def fetch_youtube_suggestions(query, lang_code="es"):
38
  encoded_query = urllib.parse.quote(query)
39
  url = f"http://suggestqueries.google.com/complete/search?client=youtube&hl={lang_code}&q={encoded_query}"
40
- response = requests.get(url)
41
-
42
- if response.status_code == 200:
43
- try:
44
- # Extraer las sugerencias del formato window.google.ac.h(["..."])
45
- match = re.search(r'window\.google\.ac\.h\(\["[^"]*",\[(.*?)\],', response.text)
46
- if match:
47
- suggestions_data = match.group(1)
48
- # Extraemos las sugerencias
49
- suggestions = re.findall(r'\["([^"]+)"', suggestions_data)
50
- return suggestions
51
- else:
52
- print("No se encontraron sugerencias en el formato esperado.")
53
  return []
54
- except Exception as e:
55
- print(f"Error procesando la respuesta de YouTube: {e}")
56
- return []
57
- else:
58
  return []
59
 
60
- # Función para obtener sugerencias de Bing
61
- def fetch_bing_suggestions(query, market="en-US"):
62
  url = "https://api.bing.com/qsml.aspx"
63
  params = {
64
  "Market": market,
65
  "query": query
66
  }
67
- headers = {
68
- "User-agent": "Mozilla/5.0"
69
- }
70
- response = requests.get(url, params=params, headers=headers)
71
-
72
- if response.status_code == 200:
73
- try:
74
- obj = xmltodict.parse(response.content)
75
- suggestList = []
76
- if 'SearchSuggestion' in obj and obj['SearchSuggestion']['Section']:
77
  suggestions = obj['SearchSuggestion']['Section']['Item']
78
  if isinstance(suggestions, list):
79
- for s in suggestions:
80
- suggestList.append(s['Text'])
81
  elif isinstance(suggestions, dict):
82
- suggestList.append(suggestions['Text'])
83
- return suggestList
84
- except Exception as e:
85
- print(f"Error procesando la respuesta de Bing: {e}")
86
- return []
87
- else:
88
  return []
89
 
90
- # Función para obtener sugerencias de Amazon
91
- def fetch_amazon_suggestions(query, market_id="ATVPDKIKX0DER", alias="aps"):
92
  url = "https://completion.amazon.com/api/2017/suggestions"
93
  params = {
94
  "mid": market_id,
95
  "alias": alias,
96
  "prefix": query
97
  }
98
- response = requests.get(url, params=params)
99
-
100
- if response.status_code == 200:
101
- try:
102
- data = response.json()
103
- # Extraemos las sugerencias desde el JSON bajo la clave 'suggestions'
104
- return [item['value'] for item in data.get('suggestions', [])]
105
- except ValueError:
106
- print("Error decodificando JSON de Amazon")
107
- return []
108
- else:
109
  return []
110
 
111
- # Función para expandir la palabra clave
112
  def expand_keyword(keyword):
113
- expanded_keywords = [keyword]
114
- for letter in 'abcdefghijklmnopqrstuvwxyz*_':
115
- expanded_keywords.append(keyword + " " + letter)
116
- expanded_keywords.append(letter + " " + keyword)
117
- return expanded_keywords
118
-
119
- # Función principal
120
- def main(keyword):
121
- expanded_keywords = expand_keyword(keyword)
122
- all_suggestions = {}
123
- google_suggestions_all = []
124
- duckduckgo_suggestions_all = []
125
- youtube_suggestions_all = []
126
- bing_suggestions_all = []
127
- amazon_suggestions_all = []
128
-
129
- # Obtener sugerencias de DuckDuckGo
130
- for exp_keyword in expanded_keywords:
131
- suggestions = fetch_duckduckgo_suggestions(exp_keyword)
132
- duckduckgo_suggestions_all.extend(suggestions) # Agregar todas las sugerencias
133
- for suggestion in suggestions:
134
- if suggestion in all_suggestions:
135
- all_suggestions[suggestion] += 1
136
- else:
137
- all_suggestions[suggestion] = 1
138
 
139
- # Obtener sugerencias de Google
140
- for exp_keyword in expanded_keywords:
141
- suggestions = fetch_google_suggestions(exp_keyword)
142
- google_suggestions_all.extend(suggestions) # Agregar todas las sugerencias
143
- for suggestion in suggestions:
144
- if suggestion in all_suggestions:
145
- all_suggestions[suggestion] += 1
146
- else:
147
- all_suggestions[suggestion] = 1
148
 
149
- # Obtener sugerencias de YouTube
150
- for exp_keyword in expanded_keywords:
151
- suggestions = fetch_youtube_suggestions(exp_keyword)
152
- youtube_suggestions_all.extend(suggestions) # Agregar todas las sugerencias
153
- for suggestion in suggestions:
154
- if suggestion in all_suggestions:
155
- all_suggestions[suggestion] += 1
156
- else:
157
- all_suggestions[suggestion] = 1
158
 
159
- # Obtener sugerencias de Bing
160
- for exp_keyword in expanded_keywords:
161
- suggestions = fetch_bing_suggestions(exp_keyword)
162
- bing_suggestions_all.extend(suggestions)
163
- for suggestion in suggestions:
164
- if suggestion in all_suggestions:
165
- all_suggestions[suggestion] += 1
166
- else:
167
- all_suggestions[suggestion] = 1
168
 
169
- # Obtener sugerencias de Amazon
170
- for exp_keyword in expanded_keywords:
171
- suggestions = fetch_amazon_suggestions(exp_keyword)
172
- amazon_suggestions_all.extend(suggestions)
173
- for suggestion in suggestions:
174
- if suggestion in all_suggestions:
175
- all_suggestions[suggestion] += 1
176
- else:
177
- all_suggestions[suggestion] = 1
178
 
179
- # Filtrar las top 10 de cada plataforma con su número de repeticiones
180
- google_top_10 = list(set(google_suggestions_all))[:10]
181
- duckduckgo_top_10 = list(set(duckduckgo_suggestions_all))[:10]
182
- youtube_top_10 = list(set(youtube_suggestions_all))[:10]
183
- bing_top_10 = list(set(bing_suggestions_all))[:10]
184
- amazon_top_10 = list(set(amazon_suggestions_all))[:10]
185
 
186
- # Ordenar y filtrar las sugerencias más frecuentes combinadas
187
- sorted_suggestions = sorted(all_suggestions.items(), key=lambda item: item[1], reverse=True)
188
- combined_top_10_suggestions = [sug for sug, freq in sorted_suggestions if freq >= 2][:10]
189
- suggestions_str = ", ".join(combined_top_10_suggestions)
190
-
191
- # Crear la lista de todas las palabras clave con su número de repeticiones
192
- all_suggestions_str = "<ul>"
193
- for suggestion, freq in sorted_suggestions:
194
- all_suggestions_str += f"<li>{suggestion} - {freq} repeticiones</li>"
195
- all_suggestions_str += "</ul>"
196
-
197
- # Crear el HTML de salida con un botón de copia
198
- html_output = f"""
199
- <div>
200
- <b>Sugerencias combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon (Top 10 combinadas):</b> <span id='suggestions_text'>{suggestions_str}</span>
201
- <button class="lg secondary svelte-cmf5ev" style="font-size: small; padding: 2px; color: #808080ba; border: none; margin-left: 5px;"
202
- onclick='navigator.clipboard.writeText(document.getElementById("suggestions_text").innerText).then(() => alert("Texto copiado al portapapeles"))'>&nbsp;✂&nbsp;</button>
203
- </div>
204
-
205
- <h4>Top 10 Sugerencias de Google:</h4>
206
- <ul>
207
- """
208
- for suggestion in google_top_10:
209
- freq = all_suggestions[suggestion]
210
- html_output += f"<li>{suggestion} ({freq})</li>"
211
- html_output += "</ul>"
212
 
213
- html_output += """
214
- <h4>Top 10 Sugerencias de DuckDuckGo:</h4>
215
- <ul>
216
- """
217
- for suggestion in duckduckgo_top_10:
218
- freq = all_suggestions[suggestion]
219
- html_output += f"<li>{suggestion} ({freq})</li>"
220
- html_output += "</ul>"
221
 
222
- html_output += """
223
- <h4>Top 10 Sugerencias de YouTube:</h4>
224
- <ul>
225
- """
226
- for suggestion in youtube_top_10:
227
- freq = all_suggestions[suggestion]
228
- html_output += f"<li>{suggestion} ({freq})</li>"
229
- html_output += "</ul>"
230
 
231
- html_output += """
232
- <h4>Top 10 Sugerencias de Bing:</h4>
233
- <ul>
234
- """
235
- for suggestion in bing_top_10:
236
- freq = all_suggestions[suggestion]
237
- html_output += f"<li>{suggestion} ({freq})</li>"
238
- html_output += "</ul>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
- html_output += """
241
- <h4>Top 10 Sugerencias de Amazon:</h4>
242
- <ul>
243
- """
244
- for suggestion in amazon_top_10:
245
- freq = all_suggestions[suggestion]
246
- html_output += f"<li>{suggestion} ({freq})</li>"
 
247
  html_output += "</ul>"
248
 
249
- # Agregar la lista completa de todas las palabras clave
250
- html_output += """
251
- <h4>Lista completa de palabras clave con su número de repeticiones:</h4>
252
- """
253
- html_output += all_suggestions_str
254
-
255
  return html_output
256
 
257
  # Interfaz de Gradio
@@ -259,9 +163,9 @@ iface = gr.Interface(
259
  fn=main,
260
  inputs="text",
261
  outputs="html",
262
- title="<div style='margin:0 auto;text-align:center'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/search.jpg'><p>Sugerencias Combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon</p></div>",
263
- description="<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Ingrese una palabra clave para obtener sugerencias de búsqueda relacionadas de Google, DuckDuckGo, YouTube, Bing y Amazon. Se mostrarán las 10 primeras sugerencias combinadas y también las 10 principales de cada plataforma por separado.</p>",
264
- article="<div style='margin-top:10px'><p style='text-align: center !important; background: #ffffff; padding: 5px 30px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;' href='https://artxeweb.com'>© Artxe Web</a></p></div>"
265
  )
266
 
267
  iface.launch()
 
1
  import gradio as gr
2
+ import aiohttp
3
+ import asyncio
4
  import urllib.parse
5
  import re
6
  import xmltodict
7
 
8
+ # Función asíncrona para obtener sugerencias de DuckDuckGo
9
+ async def fetch_duckduckgo_suggestions(session, query, lang_code="es"):
10
  encoded_query = urllib.parse.quote(query)
11
  url = f"https://duckduckgo.com/ac/?q={encoded_query}&kl={lang_code}"
12
+ async with session.get(url) as response:
13
+ if response.status == 200:
14
+ try:
15
+ data = await response.json()
16
+ return [item['phrase'] for item in data]
17
+ except ValueError:
18
+ return []
 
 
19
  return []
20
 
21
+ # Función asíncrona para obtener sugerencias de Google
22
+ async def fetch_google_suggestions(session, query, lang_code="es"):
23
  encoded_query = urllib.parse.quote(query)
24
  url = f"http://suggestqueries.google.com/complete/search?client=firefox&hl={lang_code}&q={encoded_query}"
25
+ async with session.get(url) as response:
26
+ if response.status == 200:
27
+ try:
28
+ return (await response.json())[1]
29
+ except ValueError:
30
+ return []
 
 
31
  return []
32
 
33
+ # Función asíncrona para obtener sugerencias de YouTube
34
+ async def fetch_youtube_suggestions(session, query, lang_code="es"):
35
  encoded_query = urllib.parse.quote(query)
36
  url = f"http://suggestqueries.google.com/complete/search?client=youtube&hl={lang_code}&q={encoded_query}"
37
+ async with session.get(url) as response:
38
+ if response.status == 200:
39
+ try:
40
+ text = await response.text()
41
+ match = re.search(r'window\.google\.ac\.h\(\["[^"]*",\[(.*?)\],', text)
42
+ if match:
43
+ suggestions_data = match.group(1)
44
+ suggestions = re.findall(r'\["([^"]+)"', suggestions_data)
45
+ return suggestions
46
+ return []
47
+ except Exception:
 
 
48
  return []
 
 
 
 
49
  return []
50
 
51
+ # Función asíncrona para obtener sugerencias de Bing
52
+ async def fetch_bing_suggestions(session, query, market="en-US"):
53
  url = "https://api.bing.com/qsml.aspx"
54
  params = {
55
  "Market": market,
56
  "query": query
57
  }
58
+ async with session.get(url, params=params) as response:
59
+ if response.status == 200:
60
+ try:
61
+ obj = xmltodict.parse(await response.text())
 
 
 
 
 
 
62
  suggestions = obj['SearchSuggestion']['Section']['Item']
63
  if isinstance(suggestions, list):
64
+ return [s['Text'] for s in suggestions]
 
65
  elif isinstance(suggestions, dict):
66
+ return [suggestions['Text']]
67
+ return []
68
+ except Exception:
69
+ return []
 
 
70
  return []
71
 
72
+ # Función asíncrona para obtener sugerencias de Amazon
73
+ async def fetch_amazon_suggestions(session, query, market_id="ATVPDKIKX0DER", alias="aps"):
74
  url = "https://completion.amazon.com/api/2017/suggestions"
75
  params = {
76
  "mid": market_id,
77
  "alias": alias,
78
  "prefix": query
79
  }
80
+ async with session.get(url, params=params) as response:
81
+ if response.status == 200:
82
+ try:
83
+ data = await response.json()
84
+ return [item['value'] for item in data.get('suggestions', [])]
85
+ except ValueError:
86
+ return []
 
 
 
 
87
  return []
88
 
89
+ # Expansión de palabra clave: stopwords, prefijos/sufijos y variantes semánticas
90
  def expand_keyword(keyword):
91
+ expanded_keywords = {keyword}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ stopwords = ['de', 'para', 'con', 'en', 'y', 'o', 'sin', 'por', 'al']
94
+ prefixes = ['comprar', 'mejor', 'oferta', 'precio', 'barato']
95
+ suffixes = ['online', 'gratis', '2023', 'calidad', 'fácil', 'rápido']
 
 
 
 
 
 
96
 
97
+ # Combinaciones con letras, stopwords, prefijos y sufijos
98
+ for word in stopwords:
99
+ expanded_keywords.add(f"{keyword} {word}")
100
+ expanded_keywords.add(f"{word} {keyword}")
 
 
 
 
 
101
 
102
+ for prefix in prefixes:
103
+ expanded_keywords.add(f"{prefix} {keyword}")
 
 
 
 
 
 
 
104
 
105
+ for suffix in suffixes:
106
+ expanded_keywords.add(f"{keyword} {suffix}")
 
 
 
 
 
 
 
107
 
108
+ # Generar variantes de plurales y singulares
109
+ if not keyword.endswith('s'):
110
+ expanded_keywords.add(f"{keyword}s")
 
 
 
111
 
112
+ # Errores tipográficos comunes
113
+ if len(keyword) > 2:
114
+ expanded_keywords.add(keyword[:-1]) # Eliminamos una letra al final
115
+ expanded_keywords.add(keyword + keyword[-1]) # Duplicamos la última letra
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ return list(expanded_keywords)
 
 
 
 
 
 
 
118
 
119
+ # Función principal asíncrona para obtener todas las sugerencias
120
+ async def main_async(keyword):
121
+ expanded_keywords = expand_keyword(keyword)
122
+ all_suggestions = {}
 
 
 
 
123
 
124
+ async with aiohttp.ClientSession() as session:
125
+ tasks = []
126
+ for exp_keyword in expanded_keywords:
127
+ tasks.append(fetch_google_suggestions(session, exp_keyword))
128
+ tasks.append(fetch_duckduckgo_suggestions(session, exp_keyword))
129
+ tasks.append(fetch_youtube_suggestions(session, exp_keyword))
130
+ tasks.append(fetch_bing_suggestions(session, exp_keyword))
131
+ tasks.append(fetch_amazon_suggestions(session, exp_keyword))
132
+
133
+ results = await asyncio.gather(*tasks)
134
+
135
+ # Procesar los resultados
136
+ for result in results:
137
+ if result:
138
+ for suggestion in result:
139
+ if suggestion in all_suggestions:
140
+ all_suggestions[suggestion] += 1
141
+ else:
142
+ all_suggestions[suggestion] = 1
143
+
144
+ # Ordenar las sugerencias por su número de repeticiones
145
+ sorted_suggestions = sorted(all_suggestions.items(), key=lambda item: item[1], reverse=True)
146
+
147
+ return sorted_suggestions
148
 
149
+ # Función de Gradio para mostrar los resultados
150
+ def main(keyword):
151
+ suggestions = asyncio.run(main_async(keyword))
152
+
153
+ # Crear el HTML de salida con el número de repeticiones
154
+ html_output = "<h4>Lista completa de palabras clave con su número de repeticiones:</h4><ul>"
155
+ for suggestion, freq in suggestions:
156
+ html_output += f"<li>{suggestion} - {freq} repeticiones</li>"
157
  html_output += "</ul>"
158
 
 
 
 
 
 
 
159
  return html_output
160
 
161
  # Interfaz de Gradio
 
163
  fn=main,
164
  inputs="text",
165
  outputs="html",
166
+ title="Sugerencias Combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon",
167
+ description="Ingrese una palabra clave para obtener sugerencias de búsqueda relacionadas de varias plataformas. Las sugerencias se ordenan por su número de repeticiones.",
168
+ article="Desarrollada por © Artxe Web"
169
  )
170
 
171
  iface.launch()