Spaces:

usmanyousaf
/

AI-WebScraper-App

Paused

App Files Files Community

usmanyousaf commited on Sep 29, 2024

Commit

5926a5d

verified ·

1 Parent(s): 6a10786

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -66

app.py CHANGED Viewed

@@ -1,85 +1,41 @@
 import streamlit as st
 from scrape import scrape_website, extract_body_content, clean_body_content, split_dom_content
-from parse import parse_with_groq
-# Streamlit UI with sidebar
-st.set_page_config(page_title="Web Scraping App 🧠", page_icon="🌐")
-st.sidebar.title("🚀 Model Selection")
-selected_model = st.sidebar.selectbox(
-    "Choose a Model for Parsing:",
-    [
-        "llama3-8b-8192",
-        "distil-whisper-large-v3-en",
-        "llama3-groq-70b-8192-tool-use-preview",
-        "llama-3.1-8b-instant",
-        "llava-v1.5-7b-4096-preview",
-        "mixtral-8x7b-32768",
-    ]
-)
-# Application title
-st.title("AI Web Scraper App 🌐")
-st.write("Easily scrape and analyze web content using advanced AI models. 🌟")
-# Input for website URL
-url = st.text_input("Enter Website URL 🔗")
 # Step 1: Scrape the Website
 if st.button("Scrape Website"):
     if url:
-        st.write("🕵️‍♂️ Scraping the website...")
-        # Scrape the website
         dom_content = scrape_website(url)
-        body_content = extract_body_content(dom_content)
-        cleaned_content = clean_body_content(body_content)
-        # Store the DOM content in Streamlit session state
-        st.session_state.dom_content = cleaned_content
-        # Display the DOM content in an expandable text box
-        with st.expander("View DOM Content"):
-            st.text_area("DOM Content", cleaned_content, height=300)
-# Step 2: Parse the Content
 if "dom_content" in st.session_state:
-    parse_description = st.text_area("Describe what you want to parse 📝")
     if st.button("Parse Content"):
         if parse_description:
-            st.write(f"🤖 Parsing the content with {selected_model}...")
-            # Parse content using Groq
             dom_chunks = split_dom_content(st.session_state.dom_content)
-            parsed_result = parse_with_groq(dom_chunks, parse_description, model=selected_model)
             st.write(parsed_result)
-# CSS for footer at the bottom of the sidebar
-st.markdown(
-    """
-    <style>
-    .footer {
-        position: fixed;
-        bottom: 0;
-        left: 0;
-        width: 100%;
-        background-color: #272432;  /* Dark background for visibility */
-        color: white;
-        text-align: center;
-        padding: 10px;
-        font-size: 14px;
-    }
-    .sidebar .footer {
-        position: fixed;
-        bottom: 0;
-    }
-    </style>
-    <div class="footer">
-    Made with ❤️ by Usman Yousaf 🚀<br>
-    Feel free to improve and expand this app for more powerful insights! 🔥
-    </div>
-    """,
-    unsafe_allow_html=True
-)

 import streamlit as st
 from scrape import scrape_website, extract_body_content, clean_body_content, split_dom_content
+from parse import parse_with_ollama
+# Streamlit UI
+st.title("AI Web Scraper")
+url = st.text_input("Enter Website URL")
 # Step 1: Scrape the Website
 if st.button("Scrape Website"):
     if url:
+        st.write("Scraping the website...")
+        # Scrape the website using requests and BeautifulSoup
         dom_content = scrape_website(url)
+        if dom_content:
+            body_content = extract_body_content(dom_content)
+            cleaned_content = clean_body_content(body_content)
+            # Store the cleaned DOM content in Streamlit session state
+            st.session_state.dom_content = cleaned_content
+            # Display the cleaned DOM content in an expandable text box
+            with st.expander("View Cleaned DOM Content"):
+                st.text_area("DOM Content", cleaned_content, height=300)
+        else:
+            st.error("Failed to scrape the website. Please check the URL.")
+# Step 2: Ask Questions About the DOM Content
 if "dom_content" in st.session_state:
+    parse_description = st.text_area("Describe what you want to parse")
     if st.button("Parse Content"):
         if parse_description:
+            st.write("Parsing the content...")
+            # Parse the content with Ollama
             dom_chunks = split_dom_content(st.session_state.dom_content)
+            parsed_result = parse_with_ollama(dom_chunks, parse_description)
             st.write(parsed_result)