Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -5,46 +5,28 @@ from bs4 import BeautifulSoup
|
|
| 5 |
import requests
|
| 6 |
from urllib.parse import urljoin, urlparse
|
| 7 |
|
| 8 |
-
def
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
domain_name = urlparse(url).netloc
|
| 15 |
-
soup = BeautifulSoup(requests.get(url + "?q=" + query).content, "html.parser")
|
| 16 |
-
for a_tag in soup.findAll("a"):
|
| 17 |
-
href = a_tag.attrs.get("href")
|
| 18 |
-
if href == "" or href is None:
|
| 19 |
-
continue
|
| 20 |
-
href = urljoin(url, href)
|
| 21 |
-
parsed_href = urlparse(href)
|
| 22 |
-
href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path
|
| 23 |
-
if not is_valid(href):
|
| 24 |
-
continue
|
| 25 |
-
if href in urls:
|
| 26 |
-
continue
|
| 27 |
-
if domain_name not in href:
|
| 28 |
-
continue
|
| 29 |
-
if href != url:
|
| 30 |
-
urls.add(href)
|
| 31 |
-
return urls
|
| 32 |
|
| 33 |
def download_content(url):
|
| 34 |
response = requests.get(url)
|
| 35 |
return response.text
|
| 36 |
|
| 37 |
st.title("Search and Download")
|
|
|
|
| 38 |
query = st.text_input("Enter your query (e.g. beds)")
|
| 39 |
url = st.text_input("Enter the URL to search from (e.g. https://www.daraz.com.np/)")
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
st.write("No results found")
|
| 50 |
|
|
|
|
| 5 |
import requests
|
| 6 |
from urllib.parse import urljoin, urlparse
|
| 7 |
|
| 8 |
+
def search(query, url):
|
| 9 |
+
if not url.startswith("http"):
|
| 10 |
+
url = "https://" + url
|
| 11 |
+
parsed_url = urlparse(url)
|
| 12 |
+
search_url = f"{parsed_url.scheme}://{parsed_url.netloc}/{query}/"
|
| 13 |
+
return search_url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
def download_content(url):
|
| 16 |
response = requests.get(url)
|
| 17 |
return response.text
|
| 18 |
|
| 19 |
st.title("Search and Download")
|
| 20 |
+
|
| 21 |
query = st.text_input("Enter your query (e.g. beds)")
|
| 22 |
url = st.text_input("Enter the URL to search from (e.g. https://www.daraz.com.np/)")
|
| 23 |
+
search_button = st.button("Search")
|
| 24 |
+
|
| 25 |
+
if search_button:
|
| 26 |
+
link = search(query, url)
|
| 27 |
+
st.write(f"First result: {link}")
|
| 28 |
+
download_button = st.button("Download Content")
|
| 29 |
+
if download_button:
|
| 30 |
+
content = download_content(link)
|
| 31 |
+
st.download_button("Download", content, file_name="content.html")
|
|
|
|
| 32 |
|