Makima57 commited on
Commit
14ac9ce
·
verified ·
1 Parent(s): 673d2d3

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +25 -60
app.py CHANGED
@@ -3,70 +3,35 @@
3
  import streamlit as st
4
  from bs4 import BeautifulSoup
5
  import requests
6
- from urllib.parse import urlsplit
7
- from urllib.parse import unquote
8
- import os
9
 
10
- def download_page(url):
11
- try:
12
- response = requests.get(url)
13
- soup = BeautifulSoup(response.text, 'html.parser')
14
- return soup
15
- except requests.exceptions.RequestException as e:
16
- return f"Error: {e}"
17
-
18
- def download_and_save(url):
19
- try:
20
- response = requests.get(url, allow_redirects=True)
21
- soup = BeautifulSoup(response.text, 'html.parser')
22
- filename = os.path.basename(unquote(urlsplit(response.url).path))
23
- if not filename:
24
- filename = 'index.html'
25
- with open(filename, 'w', encoding='utf-8') as file:
26
- file.write(soup.prettify())
27
- return filename, soup
28
- except requests.exceptions.RequestException as e:
29
- return f"Error: {e}", None
30
-
31
- def get_first_link(query):
32
- url = f"https://www.google.com/search?q={query}"
33
  response = requests.get(url)
34
  soup = BeautifulSoup(response.text, 'html.parser')
35
- links = soup.find_all('a')
36
- for link in links:
37
- href = link.get('href')
38
- if href and href.startswith('/url?q='):
39
- return href.split('&sa=U&ved=')[0].replace('/url?q=', '')
 
 
 
 
 
 
40
 
41
- st.title("Webpage Downloader")
42
- st.write("Enter the query or URL of the webpage you want to download:")
43
 
44
- query = st.text_input("Query or URL")
45
- if st.button("Download"):
46
- if query:
47
- if "http" not in query:
48
- url = get_first_link(query)
49
- st.write(f"Opening first link: {url}")
50
- else:
51
- url = query
52
- if " from " in url:
53
- url = url.split(" from ")[1]
54
- try:
55
- filename, soup = download_and_save(url)
56
- if "Error:" in filename:
57
- st.error(f"Failed to download webpage: {filename}")
58
- else:
59
- st.success(f"Webpage downloaded and saved as {filename}")
60
- images = soup.find_all('img')
61
- for i, img in enumerate(images):
62
- img_url = img.get('src')
63
- if img_url:
64
- st.image(img_url, caption=f"Image {i+1}")
65
- titles = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
66
- for title in titles:
67
- st.write(title.text)
68
- except Exception as e:
69
- st.error(f"Failed to download webpage: {e}")
70
  else:
71
- st.error("Please enter a query or URL")
72
 
 
3
  import streamlit as st
4
  from bs4 import BeautifulSoup
5
  import requests
6
+ from urllib.parse import urljoin
 
 
7
 
8
+ def search_daraz(query):
9
+ url = f"https://www.daraz.com.np/catalog/?q={query}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  response = requests.get(url)
11
  soup = BeautifulSoup(response.text, 'html.parser')
12
+ product_links = soup.find_all('a', class_='c16H9d')
13
+ if product_links:
14
+ return urljoin("https://www.daraz.com.np", product_links[0]['href'])
15
+ else:
16
+ return None
17
+
18
+ def download_content(url):
19
+ response = requests.get(url)
20
+ return response.text
21
+
22
+ st.title("Daraz Search and Download")
23
 
24
+ query = st.text_input("Enter your query (e.g. beds)")
25
+ search_button = st.button("Search")
26
 
27
+ if search_button:
28
+ link = search_daraz(query)
29
+ if link:
30
+ st.write(f"First result: {link}")
31
+ download_button = st.button("Download Content")
32
+ if download_button:
33
+ content = download_content(link)
34
+ st.download_button("Download", content, file_name="content.html")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  else:
36
+ st.write("No results found")
37