Spaces:

slimshadow
/

sl-mv-dl

Paused

App Files Files Community

sl-mv-dl / app.py

slimshadow

Update app.py

6fd430d verified over 1 year ago

raw

history blame contribute delete

3.9 kB

	import streamlit as st
	import requests
	from bs4 import BeautifulSoup
	import re

	# Define the cookies
	cookies = {
	'PHPSESSID': 'vnpu4ju7jrbqj3r96j63n058g4',
	'SMFCookie72': 'a%3A4%3A%7Bi%3A0%3Bi%3A33971%3Bi%3A1%3Bs%3A40%3A%22ca7e87dedaf9a86805cbbbfd698d35c5f00654c8%22%3Bi%3A2%3Bi%3A1716901379%3Bi%3A3%3Bi%3A0%3B%7D'
	}

	# Regular expression to find the specific pattern in URLs
	pattern = re.compile(r'https://new3.gdtot.dad/file/\d{10}')

	# Function to scrape a single URL
	def scrape_url(url):
	try:
	# Fetch the page content with cookies
	response = requests.get(url, cookies=cookies)
	response.raise_for_status() # Ensure we notice bad responses

	# Parse the content with BeautifulSoup
	soup = BeautifulSoup(response.text, 'html.parser')

	# Find all links in the page
	links = soup.find_all('a', href=True)

	# Filter links that match the pattern and get titles
	matched_links = [(link['href'], link.get_text(strip=True)) for link in links if pattern.match(link['href'])]

	# Display the matched links and their titles
	for link, title in matched_links:
	st.write(f"Title: {title}, URL: {link}")

	except Exception as e:
	st.error(f"Failed to scrape {url}: {e}")

	# Function to get the top 5 results
	def get_top_results(search_query):
	url = "https://ww1.sharespark.cfd/index.php"
	params = {
	'action': 'search2',
	'search': search_query,
	'submit': 'Search',
	'advanced': '0'
	}

	response = requests.get(url, params=params, cookies=cookies)

	if response.status_code == 200:
	soup = BeautifulSoup(response.text, 'html.parser')
	results = soup.find_all('div', class_='topic_details floatleft', limit=5)

	top_results = []
	for result in results:
	counter = result.find('div', class_='counter').text
	title_link = result.find('h5').find('a').text
	topic_link = result.find('h5').find_all('a')[1].get('href')
	topic_text = result.find('h5').find_all('a')[1].text
	author = result.find('a', title=True).text
	date = result.find('em').text
	top_results.append({
	'counter': counter,
	'title': title_link,
	'topic': topic_text,
	'topic_link': topic_link,
	'author': author,
	'date': date
	})
	return top_results
	else:
	st.error("Error:", response.status_code)
	return []

	# Main Streamlit app
	def main():
	st.title("movie downloader")

	search_query = st.text_input("Enter search query:")
	if st.button("Search"):
	top_results = get_top_results(search_query)
	if top_results:
	selected_result = st.selectbox("Select a result:", [result['title'] for result in top_results])
	for result in top_results:
	if result['title'] == selected_result:
	topic_url = result['topic_link']
	st.write(f"Selected topic link: {topic_url}")
	scrape_url(topic_url)
	st.write("Scraping gdtot links:")
	scrape_gdtot_links(topic_url)

	# Function to scrape gdtot links
	def scrape_gdtot_links(topic_url):
	# Send a GET request to the URL with cookies
	response = requests.get(topic_url, cookies=cookies)

	# Parse the HTML content
	soup = BeautifulSoup(response.text, 'html.parser')

	# Find all <a> elements within <strong> tags
	strong_tags = soup.find_all('strong')
	for strong_tag in strong_tags:
	for a_tag in strong_tag.find_all('a', href=True):
	title = a_tag.text.strip() # Extract the title text
	link = a_tag['href'] # Extract the link
	st.write(f"Title: {title}, URL: {link}")


	if __name__ == "__main__":
	main()