import streamlit as st import requests from bs4 import BeautifulSoup import re # Define the cookies cookies = { 'PHPSESSID': 'vnpu4ju7jrbqj3r96j63n058g4', 'SMFCookie72': 'a%3A4%3A%7Bi%3A0%3Bi%3A33971%3Bi%3A1%3Bs%3A40%3A%22ca7e87dedaf9a86805cbbbfd698d35c5f00654c8%22%3Bi%3A2%3Bi%3A1716901379%3Bi%3A3%3Bi%3A0%3B%7D' } # Regular expression to find the specific pattern in URLs pattern = re.compile(r'https://new3.gdtot.dad/file/\d{10}') # Function to scrape a single URL def scrape_url(url): try: # Fetch the page content with cookies response = requests.get(url, cookies=cookies) response.raise_for_status() # Ensure we notice bad responses # Parse the content with BeautifulSoup soup = BeautifulSoup(response.text, 'html.parser') # Find all links in the page links = soup.find_all('a', href=True) # Filter links that match the pattern and get titles matched_links = [(link['href'], link.get_text(strip=True)) for link in links if pattern.match(link['href'])] # Display the matched links and their titles for link, title in matched_links: st.write(f"Title: {title}, URL: {link}") except Exception as e: st.error(f"Failed to scrape {url}: {e}") # Function to get the top 5 results def get_top_results(search_query): url = "https://ww1.sharespark.cfd/index.php" params = { 'action': 'search2', 'search': search_query, 'submit': 'Search', 'advanced': '0' } response = requests.get(url, params=params, cookies=cookies) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') results = soup.find_all('div', class_='topic_details floatleft', limit=5) top_results = [] for result in results: counter = result.find('div', class_='counter').text title_link = result.find('h5').find('a').text topic_link = result.find('h5').find_all('a')[1].get('href') topic_text = result.find('h5').find_all('a')[1].text author = result.find('a', title=True).text date = result.find('em').text top_results.append({ 'counter': counter, 'title': title_link, 'topic': topic_text, 'topic_link': topic_link, 'author': author, 'date': date }) return top_results else: st.error("Error:", response.status_code) return [] # Main Streamlit app def main(): st.title("movie downloader") search_query = st.text_input("Enter search query:") if st.button("Search"): top_results = get_top_results(search_query) if top_results: selected_result = st.selectbox("Select a result:", [result['title'] for result in top_results]) for result in top_results: if result['title'] == selected_result: topic_url = result['topic_link'] st.write(f"Selected topic link: {topic_url}") scrape_url(topic_url) st.write("Scraping gdtot links:") scrape_gdtot_links(topic_url) # Function to scrape gdtot links def scrape_gdtot_links(topic_url): # Send a GET request to the URL with cookies response = requests.get(topic_url, cookies=cookies) # Parse the HTML content soup = BeautifulSoup(response.text, 'html.parser') # Find all elements within tags strong_tags = soup.find_all('strong') for strong_tag in strong_tags: for a_tag in strong_tag.find_all('a', href=True): title = a_tag.text.strip() # Extract the title text link = a_tag['href'] # Extract the link st.write(f"Title: {title}, URL: {link}") if __name__ == "__main__": main()