sl-mv-dl / app.py
slimshadow's picture
Update app.py
6fd430d verified
import streamlit as st
import requests
from bs4 import BeautifulSoup
import re
# Define the cookies
cookies = {
'PHPSESSID': 'vnpu4ju7jrbqj3r96j63n058g4',
'SMFCookie72': 'a%3A4%3A%7Bi%3A0%3Bi%3A33971%3Bi%3A1%3Bs%3A40%3A%22ca7e87dedaf9a86805cbbbfd698d35c5f00654c8%22%3Bi%3A2%3Bi%3A1716901379%3Bi%3A3%3Bi%3A0%3B%7D'
}
# Regular expression to find the specific pattern in URLs
pattern = re.compile(r'https://new3.gdtot.dad/file/\d{10}')
# Function to scrape a single URL
def scrape_url(url):
try:
# Fetch the page content with cookies
response = requests.get(url, cookies=cookies)
response.raise_for_status() # Ensure we notice bad responses
# Parse the content with BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Find all links in the page
links = soup.find_all('a', href=True)
# Filter links that match the pattern and get titles
matched_links = [(link['href'], link.get_text(strip=True)) for link in links if pattern.match(link['href'])]
# Display the matched links and their titles
for link, title in matched_links:
st.write(f"Title: {title}, URL: {link}")
except Exception as e:
st.error(f"Failed to scrape {url}: {e}")
# Function to get the top 5 results
def get_top_results(search_query):
url = "https://ww1.sharespark.cfd/index.php"
params = {
'action': 'search2',
'search': search_query,
'submit': 'Search',
'advanced': '0'
}
response = requests.get(url, params=params, cookies=cookies)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
results = soup.find_all('div', class_='topic_details floatleft', limit=5)
top_results = []
for result in results:
counter = result.find('div', class_='counter').text
title_link = result.find('h5').find('a').text
topic_link = result.find('h5').find_all('a')[1].get('href')
topic_text = result.find('h5').find_all('a')[1].text
author = result.find('a', title=True).text
date = result.find('em').text
top_results.append({
'counter': counter,
'title': title_link,
'topic': topic_text,
'topic_link': topic_link,
'author': author,
'date': date
})
return top_results
else:
st.error("Error:", response.status_code)
return []
# Main Streamlit app
def main():
st.title("movie downloader")
search_query = st.text_input("Enter search query:")
if st.button("Search"):
top_results = get_top_results(search_query)
if top_results:
selected_result = st.selectbox("Select a result:", [result['title'] for result in top_results])
for result in top_results:
if result['title'] == selected_result:
topic_url = result['topic_link']
st.write(f"Selected topic link: {topic_url}")
scrape_url(topic_url)
st.write("Scraping gdtot links:")
scrape_gdtot_links(topic_url)
# Function to scrape gdtot links
def scrape_gdtot_links(topic_url):
# Send a GET request to the URL with cookies
response = requests.get(topic_url, cookies=cookies)
# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')
# Find all <a> elements within <strong> tags
strong_tags = soup.find_all('strong')
for strong_tag in strong_tags:
for a_tag in strong_tag.find_all('a', href=True):
title = a_tag.text.strip() # Extract the title text
link = a_tag['href'] # Extract the link
st.write(f"Title: {title}, URL: {link}")
if __name__ == "__main__":
main()