usmanyousaf commited on
Commit
849c08d
Β·
verified Β·
1 Parent(s): 2a0bf8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -23
app.py CHANGED
@@ -1,41 +1,84 @@
1
- import streamlit as st
2
  from scrape import scrape_website, extract_body_content, clean_body_content, split_dom_content
3
- from parse import parse_with_ollama
4
 
5
- # Streamlit UI
6
- st.title("AI Web Scraper")
7
- url = st.text_input("Enter Website URL")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Step 1: Scrape the Website
10
  if st.button("Scrape Website"):
11
  if url:
12
- st.write("Scraping the website...")
13
 
14
- # Scrape the website using requests and BeautifulSoup
15
  dom_content = scrape_website(url)
16
- if dom_content:
17
- body_content = extract_body_content(dom_content)
18
- cleaned_content = clean_body_content(body_content)
19
 
20
- # Store the cleaned DOM content in Streamlit session state
21
- st.session_state.dom_content = cleaned_content
22
 
23
- # Display the cleaned DOM content in an expandable text box
24
- with st.expander("View Cleaned DOM Content"):
25
- st.text_area("DOM Content", cleaned_content, height=300)
26
- else:
27
- st.error("Failed to scrape the website. Please check the URL.")
28
 
29
-
30
- # Step 2: Ask Questions About the DOM Content
31
  if "dom_content" in st.session_state:
32
- parse_description = st.text_area("Describe what you want to parse")
33
 
34
  if st.button("Parse Content"):
35
  if parse_description:
36
- st.write("Parsing the content...")
37
 
38
- # Parse the content with Ollama
39
  dom_chunks = split_dom_content(st.session_state.dom_content)
40
- parsed_result = parse_with_ollama(dom_chunks, parse_description)
41
  st.write(parsed_result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st # type: ignore
2
  from scrape import scrape_website, extract_body_content, clean_body_content, split_dom_content
3
+ from parse_utils import parse_with_groq # Updated to 'parse_utils'
4
 
5
+ # Streamlit UI with sidebar configuration
6
+ st.set_page_config(page_title="AI Web Scraping App", page_icon="🌐")
7
+
8
+ st.sidebar.title("πŸš€ Model Selection")
9
+ selected_model = st.sidebar.selectbox(
10
+ "Choose a Model for Parsing:",
11
+ [
12
+ "llama3-8b-8192",
13
+ "llama3-groq-70b-8192-tool-use-preview",
14
+ "llama-3.1-8b-instant",
15
+ "llava-v1.5-7b-4096-preview",
16
+ "mixtral-8x7b-32768",
17
+ ]
18
+ )
19
+
20
+ # Application title and description
21
+ st.title("AI Web Scraper App 🌐")
22
+ st.write("Easily scrape and analyze web content using advanced AI models. 🌟")
23
+
24
+ # Input for website URL
25
+ url = st.text_input("Enter Website URL πŸ”—")
26
 
27
  # Step 1: Scrape the Website
28
  if st.button("Scrape Website"):
29
  if url:
30
+ st.write("πŸ•΅οΈβ€β™‚οΈ Scraping the website...")
31
 
32
+ # Scrape the website
33
  dom_content = scrape_website(url)
34
+ body_content = extract_body_content(dom_content)
35
+ cleaned_content = clean_body_content(body_content)
 
36
 
37
+ # Store the DOM content in Streamlit session state
38
+ st.session_state.dom_content = cleaned_content
39
 
40
+ # Display the DOM content in an expandable text box
41
+ with st.expander("View DOM Content"):
42
+ st.text_area("DOM Content", cleaned_content, height=300)
 
 
43
 
44
+ # Step 2: Parse the Content
 
45
  if "dom_content" in st.session_state:
46
+ parse_description = st.text_area("Describe what you want to parse πŸ“")
47
 
48
  if st.button("Parse Content"):
49
  if parse_description:
50
+ st.write(f"πŸ€– Parsing the content with {selected_model}...")
51
 
52
+ # Parse content using Groq
53
  dom_chunks = split_dom_content(st.session_state.dom_content)
54
+ parsed_result = parse_with_groq(dom_chunks, parse_description, model=selected_model)
55
  st.write(parsed_result)
56
+
57
+ # Custom CSS for footer at the bottom of the sidebar
58
+ st.markdown(
59
+ """
60
+ <style>
61
+ .footer {
62
+ position: fixed;
63
+ bottom: 0;
64
+ left: 0;
65
+ width: 100%;
66
+ background-color: #272432; /* Dark background for visibility */
67
+ color: white;
68
+ text-align: center;
69
+ padding: 10px;
70
+ font-size: 14px;
71
+ }
72
+ .sidebar .footer {
73
+ position: fixed;
74
+ bottom: 0;
75
+ }
76
+ </style>
77
+
78
+ <div class="footer">
79
+ Made with ❀️ by Usman Yousaf πŸš€<br>
80
+ Feel free to improve and expand this app for more powerful insights! πŸ”₯
81
+ </div>
82
+ """,
83
+ unsafe_allow_html=True
84
+ )