tx3bas commited on
Commit
2cb8fc4
·
verified ·
1 Parent(s): 8b37454

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -71
app.py CHANGED
@@ -3,95 +3,220 @@ import requests
3
  import urllib.parse
4
  import re
5
  import xmltodict
6
- from itertools import product, combinations
7
  import nltk
8
  from nltk.corpus import stopwords
9
  from nltk.stem import SnowballStemmer
10
- from collections import Counter
11
- import concurrent.futures
12
- import ssl
13
- import os
14
-
15
- # Configurar SSL para la descarga de NLTK
16
- try:
17
- _create_unverified_https_context = ssl._create_unverified_context
18
- except AttributeError:
19
- pass
20
- else:
21
- ssl._create_default_https_context = _create_unverified_https_context
22
-
23
- # Función para descargar recursos de NLTK
24
- def download_nltk_resources():
25
- resources = ['punkt', 'stopwords', 'words']
26
- for resource in resources:
27
- try:
28
- nltk.data.find(f'tokenizers/{resource}')
29
- except LookupError:
30
- print(f"Descargando {resource}...")
31
- nltk.download(resource, quiet=True)
32
-
33
- # Descargar recursos de NLTK
34
- download_nltk_resources()
35
 
36
- # Configurar el directorio de datos de NLTK
37
- nltk_data_dir = '/tmp/nltk_data'
38
- os.makedirs(nltk_data_dir, exist_ok=True)
39
- nltk.data.path.append(nltk_data_dir)
40
 
41
  # Inicializar el stemmer y la lista de stopwords
42
  stemmer = SnowballStemmer("spanish")
43
  stop_words = set(stopwords.words('spanish'))
44
- english_words = set(nltk.corpus.words.words())
45
 
46
- # El resto del código permanece igual...
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- # Función principal
49
- def main(keyword):
50
- expanded_keywords = expand_keyword(keyword)
51
- all_suggestions = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
54
- future_to_keyword = {executor.submit(fetch_all_suggestions, kw): kw for kw in expanded_keywords}
55
- for future in concurrent.futures.as_completed(future_to_keyword):
56
- kw = future_to_keyword[future]
57
- try:
58
- suggestions = future.result()
59
- all_suggestions.extend(suggestions)
60
- except Exception as exc:
61
- print(f'{kw} generated an exception: {exc}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- # Contar las sugerencias y sus plataformas
64
- suggestion_counter = Counter(sugg for sugg, _ in all_suggestions)
65
- platform_counter = Counter(platform for _, platform in all_suggestions)
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- # Ordenar las sugerencias por frecuencia
68
- sorted_suggestions = sorted(suggestion_counter.items(), key=lambda x: x[1], reverse=True)
69
 
70
- # Crear el HTML de salida
71
- html_output = "<h3>Todas las sugerencias ordenadas por relevancia:</h3><ul>"
72
- for suggestion, count in sorted_suggestions:
73
- platforms = [platform for sugg, platform in all_suggestions if sugg == suggestion]
74
- platforms_str = ", ".join(set(platforms))
75
- relevance_score = count * len(set(platforms))
76
- html_output += f"<li><strong>{suggestion}</strong> (Frecuencia: {count}, Plataformas: {platforms_str}, Puntuación de relevancia: {relevance_score})</li>"
77
- html_output += "</ul>"
 
 
 
78
 
79
- # Añadir estadísticas por plataforma
80
- html_output += "<h3>Estadísticas por plataforma:</h3><ul>"
81
- for platform, count in platform_counter.most_common():
82
- html_output += f"<li>{platform}: {count} sugerencias</li>"
83
- html_output += "</ul>"
84
 
85
- # Añadir botón para copiar todas las sugerencias
86
- all_suggestions_text = ", ".join(sugg for sugg, _ in sorted_suggestions)
87
- html_output += f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  <div>
89
- <h3>Copiar todas las sugerencias:</h3>
90
  <button class="lg secondary svelte-cmf5ev" style="font-size: small; padding: 2px; color: #808080ba; border: none; margin-left: 5px;"
91
- onclick='navigator.clipboard.writeText(`{all_suggestions_text}`).then(() => alert("Todas las sugerencias copiadas al portapapeles"))'>&nbsp;Copiar todas las sugerencias&nbsp;</button>
92
  </div>
93
  """
94
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  return html_output
96
 
97
  # Interfaz de Gradio
@@ -100,8 +225,8 @@ iface = gr.Interface(
100
  inputs="text",
101
  outputs="html",
102
  title="<div style='margin:0 auto;text-align:center'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/search.jpg'><p>Sugerencias Combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon</p></div>",
103
- description="<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Ingrese una palabra clave para obtener sugerencias de búsqueda relacionadas de Google, DuckDuckGo, YouTube, Bing y Amazon. Se mostrarán todas las sugerencias ordenadas por relevancia.</p>",
104
  article="<div style='margin-top:10px'><p style='text-align: center !important; background: #ffffff; padding: 5px 30px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;' href='https://artxeweb.com'>© Artxe Web</a></p></div>"
105
  )
106
 
107
- iface.launch()
 
3
  import urllib.parse
4
  import re
5
  import xmltodict
6
+ from itertools import product
7
  import nltk
8
  from nltk.corpus import stopwords
9
  from nltk.stem import SnowballStemmer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # Descargar recursos necesarios de NLTK
12
+ nltk.download('stopwords', quiet=True)
13
+ nltk.download('punkt', quiet=True)
 
14
 
15
  # Inicializar el stemmer y la lista de stopwords
16
  stemmer = SnowballStemmer("spanish")
17
  stop_words = set(stopwords.words('spanish'))
 
18
 
19
+ # Función para obtener sugerencias de DuckDuckGo
20
+ def fetch_duckduckgo_suggestions(query, lang_code="es"):
21
+ encoded_query = urllib.parse.quote(query)
22
+ url = f"https://duckduckgo.com/ac/?q={encoded_query}&kl={lang_code}"
23
+ response = requests.get(url)
24
+ if response.status_code == 200:
25
+ try:
26
+ data = response.json()
27
+ return [item['phrase'] for item in data]
28
+ except ValueError:
29
+ print("Error decodificando JSON de DuckDuckGo")
30
+ return []
31
+ else:
32
+ return []
33
 
34
+ # Función para obtener sugerencias de Google
35
+ def fetch_google_suggestions(query, lang_code="es"):
36
+ encoded_query = urllib.parse.quote(query)
37
+ url = f"http://suggestqueries.google.com/complete/search?client=firefox&hl={lang_code}&q={encoded_query}"
38
+ response = requests.get(url)
39
+ if response.status_code == 200:
40
+ try:
41
+ return response.json()[1]
42
+ except ValueError:
43
+ print("Error decodificando JSON de Google")
44
+ return []
45
+ else:
46
+ return []
47
+
48
+ # Función para obtener sugerencias de YouTube
49
+ def fetch_youtube_suggestions(query, lang_code="es"):
50
+ encoded_query = urllib.parse.quote(query)
51
+ url = f"http://suggestqueries.google.com/complete/search?client=youtube&hl={lang_code}&q={encoded_query}"
52
+ response = requests.get(url)
53
+
54
+ if response.status_code == 200:
55
+ try:
56
+ match = re.search(r'window\.google\.ac\.h\(\["[^"]*",\[(.*?)\],', response.text)
57
+ if match:
58
+ suggestions_data = match.group(1)
59
+ suggestions = re.findall(r'\["([^"]+)"', suggestions_data)
60
+ return suggestions
61
+ else:
62
+ print("No se encontraron sugerencias en el formato esperado.")
63
+ return []
64
+ except Exception as e:
65
+ print(f"Error procesando la respuesta de YouTube: {e}")
66
+ return []
67
+ else:
68
+ return []
69
+
70
+ # Función para obtener sugerencias de Bing
71
+ def fetch_bing_suggestions(query, market="es-ES"):
72
+ url = "https://api.bing.com/qsml.aspx"
73
+ params = {
74
+ "Market": market,
75
+ "query": query
76
+ }
77
+ headers = {
78
+ "User-agent": "Mozilla/5.0"
79
+ }
80
+ response = requests.get(url, params=params, headers=headers)
81
 
82
+ if response.status_code == 200:
83
+ try:
84
+ obj = xmltodict.parse(response.content)
85
+ suggestList = []
86
+ if 'SearchSuggestion' in obj and obj['SearchSuggestion']['Section']:
87
+ suggestions = obj['SearchSuggestion']['Section']['Item']
88
+ if isinstance(suggestions, list):
89
+ for s in suggestions:
90
+ suggestList.append(s['Text'])
91
+ elif isinstance(suggestions, dict):
92
+ suggestList.append(suggestions['Text'])
93
+ return suggestList
94
+ except Exception as e:
95
+ print(f"Error procesando la respuesta de Bing: {e}")
96
+ return []
97
+ else:
98
+ return []
99
+
100
+ # Función para obtener sugerencias de Amazon
101
+ def fetch_amazon_suggestions(query, market_id="A1F83G8C2ARO7P", alias="aps"):
102
+ url = "https://completion.amazon.com/api/2017/suggestions"
103
+ params = {
104
+ "mid": market_id,
105
+ "alias": alias,
106
+ "prefix": query
107
+ }
108
+ response = requests.get(url, params=params)
109
 
110
+ if response.status_code == 200:
111
+ try:
112
+ data = response.json()
113
+ return [item['value'] for item in data.get('suggestions', [])]
114
+ except ValueError:
115
+ print("Error decodificando JSON de Amazon")
116
+ return []
117
+ else:
118
+ return []
119
+
120
+ # Función para expandir la palabra clave
121
+ def expand_keyword(keyword):
122
+ # Tokenizar la palabra clave
123
+ tokens = nltk.word_tokenize(keyword.lower())
124
 
125
+ # Eliminar stopwords y aplicar stemming
126
+ tokens = [stemmer.stem(token) for token in tokens if token not in stop_words]
127
 
128
+ # Generar variaciones
129
+ variations = []
130
+ for i in range(1, len(tokens) + 1):
131
+ variations.extend(list(product(tokens, repeat=i)))
132
+
133
+ # Convertir tuplas a strings
134
+ expanded_keywords = [" ".join(variation) for variation in variations]
135
+
136
+ # Añadir variaciones con prefijos y sufijos
137
+ prefixes = ['como', 'que', 'donde', 'cuando', 'por que', 'cual']
138
+ suffixes = ['gratis', 'online', 'pdf', 'precios', 'opiniones']
139
 
140
+ for prefix in prefixes:
141
+ expanded_keywords.append(f"{prefix} {keyword}")
 
 
 
142
 
143
+ for suffix in suffixes:
144
+ expanded_keywords.append(f"{keyword} {suffix}")
145
+
146
+ # Añadir plurales (simplificado)
147
+ expanded_keywords.extend([f"{kw}s" for kw in expanded_keywords])
148
+
149
+ # Eliminar duplicados y la palabra clave original vacía
150
+ expanded_keywords = list(set(expanded_keywords) - {''})
151
+
152
+ return expanded_keywords
153
+
154
+ # Función principal
155
+ def main(keyword):
156
+ expanded_keywords = expand_keyword(keyword)
157
+ all_suggestions = {}
158
+ platform_suggestions = {
159
+ 'Google': set(),
160
+ 'DuckDuckGo': set(),
161
+ 'YouTube': set(),
162
+ 'Bing': set(),
163
+ 'Amazon': set()
164
+ }
165
+
166
+ # Obtener sugerencias de todas las plataformas
167
+ for exp_keyword in expanded_keywords:
168
+ for platform, fetch_func in [
169
+ ('Google', fetch_google_suggestions),
170
+ ('DuckDuckGo', fetch_duckduckgo_suggestions),
171
+ ('YouTube', fetch_youtube_suggestions),
172
+ ('Bing', fetch_bing_suggestions),
173
+ ('Amazon', fetch_amazon_suggestions)
174
+ ]:
175
+ suggestions = fetch_func(exp_keyword)
176
+ platform_suggestions[platform].update(suggestions)
177
+ for suggestion in suggestions:
178
+ if suggestion in all_suggestions:
179
+ all_suggestions[suggestion] += 1
180
+ else:
181
+ all_suggestions[suggestion] = 1
182
+
183
+ # Ordenar y filtrar las sugerencias más frecuentes combinadas
184
+ sorted_suggestions = sorted(all_suggestions.items(), key=lambda item: item[1], reverse=True)
185
+ combined_top_suggestions = [sug for sug, freq in sorted_suggestions if freq >= 2][:50] # Aumentado a top 50
186
+ suggestions_str = ", ".join(combined_top_suggestions)
187
+
188
+ # Crear la lista de todas las palabras clave con su número de repeticiones
189
+ all_suggestions_str = "<ul>"
190
+ for suggestion, freq in sorted_suggestions:
191
+ all_suggestions_str += f"<li>{suggestion} - {freq} repeticiones</li>"
192
+ all_suggestions_str += "</ul>"
193
+
194
+ # Crear el HTML de salida
195
+ html_output = f"""
196
  <div>
197
+ <b>Top 50 Sugerencias combinadas:</b> <span id='suggestions_text'>{suggestions_str}</span>
198
  <button class="lg secondary svelte-cmf5ev" style="font-size: small; padding: 2px; color: #808080ba; border: none; margin-left: 5px;"
199
+ onclick='navigator.clipboard.writeText(document.getElementById("suggestions_text").innerText).then(() => alert("Texto copiado al portapapeles"))'>&nbsp;✂&nbsp;</button>
200
  </div>
201
  """
202
+
203
+ # Agregar las top sugerencias de cada plataforma
204
+ for platform, suggestions in platform_suggestions.items():
205
+ html_output += f"""
206
+ <h4>Top 20 Sugerencias de {platform}:</h4>
207
+ <ul>
208
+ """
209
+ for suggestion in list(suggestions)[:20]:
210
+ freq = all_suggestions[suggestion]
211
+ html_output += f"<li>{suggestion} ({freq})</li>"
212
+ html_output += "</ul>"
213
+
214
+ # Agregar la lista completa de todas las palabras clave
215
+ html_output += """
216
+ <h4>Lista completa de palabras clave con su número de repeticiones:</h4>
217
+ """
218
+ html_output += all_suggestions_str
219
+
220
  return html_output
221
 
222
  # Interfaz de Gradio
 
225
  inputs="text",
226
  outputs="html",
227
  title="<div style='margin:0 auto;text-align:center'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/search.jpg'><p>Sugerencias Combinadas de Google, DuckDuckGo, YouTube, Bing y Amazon</p></div>",
228
+ description="<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Ingrese una palabra clave para obtener sugerencias de búsqueda relacionadas de Google, DuckDuckGo, YouTube, Bing y Amazon. Se mostrarán las 50 primeras sugerencias combinadas y también las 20 principales de cada plataforma por separado.</p>",
229
  article="<div style='margin-top:10px'><p style='text-align: center !important; background: #ffffff; padding: 5px 30px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;' href='https://artxeweb.com'>© Artxe Web</a></p></div>"
230
  )
231
 
232
+ iface.launch()