Spaces:

tx3bas
/

kwrl-url

Sleeping

App Files Files Community

tx3bas commited on May 25, 2024

Commit

c8849a2

verified ·

1 Parent(s): 316281c

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -4

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import streamlit as st
 from extract import take_webdata
-from PIL import Image
-from io import BytesIO
 def main():
     st.title("Website Content Extractor")
@@ -25,9 +24,15 @@ def visualize(url):
                 st.info(page_title)
             else:
                 st.error("Error: empty page title")
-            st.subheader("Website preview:")
             if html_content:
-                st.code(html_content, language='html')
             else:
                 st.error("Error: empty HTML content")
@@ -35,5 +40,41 @@ def visualize(url):
     except Exception as e:
         st.error(f"Error: {e}")
 if __name__ == "__main__":
     main()

 import streamlit as st
 from extract import take_webdata
+import pandas as pd
 def main():
     st.title("Website Content Extractor")
                 st.info(page_title)
             else:
                 st.error("Error: empty page title")
+            st.subheader("Keyword Data:")
             if html_content:
+                data = parse_html(html_content)
+                if data:
+                    df = pd.DataFrame(data)
+                    st.table(df)
+                else:
+                    st.error("No keyword data found")
             else:
                 st.error("Error: empty HTML content")
     except Exception as e:
         st.error(f"Error: {e}")
+def parse_html(html_content):
+    from bs4 import BeautifulSoup
+    soup = BeautifulSoup(html_content, 'html.parser')
+    contenedores_keywords = soup.select('div.sc-btEEuG')
+    todos_los_textos = []
+    def extract_text(element):
+        return ' '.join(element.stripped_strings)
+    for contenedor in contenedores_keywords:
+        texto_plano = extract_text(contenedor)
+        todos_los_textos.append(texto_plano)
+    def parsear_texto(texto):
+        partes = texto.split(' Generar contenido con IA ')
+        if len(partes) == 2:
+            palabra_clave = partes[0]
+            datos = partes[1].split()
+            if len(datos) >= 4:
+                volumen = datos[0]
+                cpc = datos[1]
+                pd = datos[2]
+                sd = datos[3]
+                return {
+                    "Palabra clave": palabra_clave,
+                    "Volumen": volumen,
+                    "CPC": cpc,
+                    "PD": pd,
+                    "SD": sd
+                }
+        return None
+    datos_parseados = [parsear_texto(texto) for texto in todos_los_textos if parsear_texto(texto)]
+    return datos_parseados
 if __name__ == "__main__":
     main()