Spaces:
Paused
Paused
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import re | |
| # Define the cookies | |
| cookies = { | |
| 'PHPSESSID': 'vnpu4ju7jrbqj3r96j63n058g4', | |
| 'SMFCookie72': 'a%3A4%3A%7Bi%3A0%3Bi%3A33971%3Bi%3A1%3Bs%3A40%3A%22ca7e87dedaf9a86805cbbbfd698d35c5f00654c8%22%3Bi%3A2%3Bi%3A1716901379%3Bi%3A3%3Bi%3A0%3B%7D' | |
| } | |
| # Regular expression to find the specific pattern in URLs | |
| pattern = re.compile(r'https://new3.gdtot.dad/file/\d{10}') | |
| # Function to scrape a single URL | |
| def scrape_url(url): | |
| try: | |
| # Fetch the page content with cookies | |
| response = requests.get(url, cookies=cookies) | |
| response.raise_for_status() # Ensure we notice bad responses | |
| # Parse the content with BeautifulSoup | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Find all links in the page | |
| links = soup.find_all('a', href=True) | |
| # Filter links that match the pattern and get titles | |
| matched_links = [(link['href'], link.get_text(strip=True)) for link in links if pattern.match(link['href'])] | |
| # Display the matched links and their titles | |
| for link, title in matched_links: | |
| st.write(f"Title: {title}, URL: {link}") | |
| except Exception as e: | |
| st.error(f"Failed to scrape {url}: {e}") | |
| # Function to get the top 5 results | |
| def get_top_results(search_query): | |
| url = "https://ww1.sharespark.cfd/index.php" | |
| params = { | |
| 'action': 'search2', | |
| 'search': search_query, | |
| 'submit': 'Search', | |
| 'advanced': '0' | |
| } | |
| response = requests.get(url, params=params, cookies=cookies) | |
| if response.status_code == 200: | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| results = soup.find_all('div', class_='topic_details floatleft', limit=5) | |
| top_results = [] | |
| for result in results: | |
| counter = result.find('div', class_='counter').text | |
| title_link = result.find('h5').find('a').text | |
| topic_link = result.find('h5').find_all('a')[1].get('href') | |
| topic_text = result.find('h5').find_all('a')[1].text | |
| author = result.find('a', title=True).text | |
| date = result.find('em').text | |
| top_results.append({ | |
| 'counter': counter, | |
| 'title': title_link, | |
| 'topic': topic_text, | |
| 'topic_link': topic_link, | |
| 'author': author, | |
| 'date': date | |
| }) | |
| return top_results | |
| else: | |
| st.error("Error:", response.status_code) | |
| return [] | |
| # Main Streamlit app | |
| def main(): | |
| st.title("movie downloader") | |
| search_query = st.text_input("Enter search query:") | |
| if st.button("Search"): | |
| top_results = get_top_results(search_query) | |
| if top_results: | |
| selected_result = st.selectbox("Select a result:", [result['title'] for result in top_results]) | |
| for result in top_results: | |
| if result['title'] == selected_result: | |
| topic_url = result['topic_link'] | |
| st.write(f"Selected topic link: {topic_url}") | |
| scrape_url(topic_url) | |
| st.write("Scraping gdtot links:") | |
| scrape_gdtot_links(topic_url) | |
| # Function to scrape gdtot links | |
| def scrape_gdtot_links(topic_url): | |
| # Send a GET request to the URL with cookies | |
| response = requests.get(topic_url, cookies=cookies) | |
| # Parse the HTML content | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Find all <a> elements within <strong> tags | |
| strong_tags = soup.find_all('strong') | |
| for strong_tag in strong_tags: | |
| for a_tag in strong_tag.find_all('a', href=True): | |
| title = a_tag.text.strip() # Extract the title text | |
| link = a_tag['href'] # Extract the link | |
| st.write(f"Title: {title}, URL: {link}") | |
| if __name__ == "__main__": | |
| main() | |