Makima57 commited on
Commit
47813a1
·
verified ·
1 Parent(s): afdafe3

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +16 -34
app.py CHANGED
@@ -5,46 +5,28 @@ from bs4 import BeautifulSoup
5
  import requests
6
  from urllib.parse import urljoin, urlparse
7
 
8
- def is_valid(url):
9
- parsed = urlparse(url)
10
- return bool(parsed.netloc) and bool(parsed.scheme)
11
-
12
- def get_all_website_links(url, query):
13
- urls = set()
14
- domain_name = urlparse(url).netloc
15
- soup = BeautifulSoup(requests.get(url + "?q=" + query).content, "html.parser")
16
- for a_tag in soup.findAll("a"):
17
- href = a_tag.attrs.get("href")
18
- if href == "" or href is None:
19
- continue
20
- href = urljoin(url, href)
21
- parsed_href = urlparse(href)
22
- href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path
23
- if not is_valid(href):
24
- continue
25
- if href in urls:
26
- continue
27
- if domain_name not in href:
28
- continue
29
- if href != url:
30
- urls.add(href)
31
- return urls
32
 
33
  def download_content(url):
34
  response = requests.get(url)
35
  return response.text
36
 
37
  st.title("Search and Download")
 
38
  query = st.text_input("Enter your query (e.g. beds)")
39
  url = st.text_input("Enter the URL to search from (e.g. https://www.daraz.com.np/)")
40
- if st.button("Search"):
41
- links = get_all_website_links(url, query)
42
- if links:
43
- first_link = list(links)[0]
44
- st.write("First result:", first_link)
45
- if st.button("Download content"):
46
- content = download_content(first_link)
47
- st.download_button("Download", content, file_name="content.html")
48
- else:
49
- st.write("No results found")
50
 
 
5
  import requests
6
  from urllib.parse import urljoin, urlparse
7
 
8
+ def search(query, url):
9
+ if not url.startswith("http"):
10
+ url = "https://" + url
11
+ parsed_url = urlparse(url)
12
+ search_url = f"{parsed_url.scheme}://{parsed_url.netloc}/{query}/"
13
+ return search_url
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def download_content(url):
16
  response = requests.get(url)
17
  return response.text
18
 
19
  st.title("Search and Download")
20
+
21
  query = st.text_input("Enter your query (e.g. beds)")
22
  url = st.text_input("Enter the URL to search from (e.g. https://www.daraz.com.np/)")
23
+ search_button = st.button("Search")
24
+
25
+ if search_button:
26
+ link = search(query, url)
27
+ st.write(f"First result: {link}")
28
+ download_button = st.button("Download Content")
29
+ if download_button:
30
+ content = download_content(link)
31
+ st.download_button("Download", content, file_name="content.html")
 
32