tx3bas commited on
Commit
c8849a2
·
verified ·
1 Parent(s): 316281c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -4
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import streamlit as st
2
  from extract import take_webdata
3
- from PIL import Image
4
- from io import BytesIO
5
 
6
  def main():
7
  st.title("Website Content Extractor")
@@ -25,9 +24,15 @@ def visualize(url):
25
  st.info(page_title)
26
  else:
27
  st.error("Error: empty page title")
28
- st.subheader("Website preview:")
 
29
  if html_content:
30
- st.code(html_content, language='html')
 
 
 
 
 
31
  else:
32
  st.error("Error: empty HTML content")
33
 
@@ -35,5 +40,41 @@ def visualize(url):
35
  except Exception as e:
36
  st.error(f"Error: {e}")
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  if __name__ == "__main__":
39
  main()
 
1
  import streamlit as st
2
  from extract import take_webdata
3
+ import pandas as pd
 
4
 
5
  def main():
6
  st.title("Website Content Extractor")
 
24
  st.info(page_title)
25
  else:
26
  st.error("Error: empty page title")
27
+
28
+ st.subheader("Keyword Data:")
29
  if html_content:
30
+ data = parse_html(html_content)
31
+ if data:
32
+ df = pd.DataFrame(data)
33
+ st.table(df)
34
+ else:
35
+ st.error("No keyword data found")
36
  else:
37
  st.error("Error: empty HTML content")
38
 
 
40
  except Exception as e:
41
  st.error(f"Error: {e}")
42
 
43
+ def parse_html(html_content):
44
+ from bs4 import BeautifulSoup
45
+
46
+ soup = BeautifulSoup(html_content, 'html.parser')
47
+ contenedores_keywords = soup.select('div.sc-btEEuG')
48
+ todos_los_textos = []
49
+
50
+ def extract_text(element):
51
+ return ' '.join(element.stripped_strings)
52
+
53
+ for contenedor in contenedores_keywords:
54
+ texto_plano = extract_text(contenedor)
55
+ todos_los_textos.append(texto_plano)
56
+
57
+ def parsear_texto(texto):
58
+ partes = texto.split(' Generar contenido con IA ')
59
+ if len(partes) == 2:
60
+ palabra_clave = partes[0]
61
+ datos = partes[1].split()
62
+ if len(datos) >= 4:
63
+ volumen = datos[0]
64
+ cpc = datos[1]
65
+ pd = datos[2]
66
+ sd = datos[3]
67
+ return {
68
+ "Palabra clave": palabra_clave,
69
+ "Volumen": volumen,
70
+ "CPC": cpc,
71
+ "PD": pd,
72
+ "SD": sd
73
+ }
74
+ return None
75
+
76
+ datos_parseados = [parsear_texto(texto) for texto in todos_los_textos if parsear_texto(texto)]
77
+ return datos_parseados
78
+
79
  if __name__ == "__main__":
80
  main()