from flask import Flask, jsonify, request from flask_cors import CORS import requests import re import base64 import json from bs4 import BeautifulSoup from urllib.parse import quote, unquote, urljoin, urlparse app = Flask(__name__) CORS(app) # A list of domains that require the first level of bypass logic BYPASS_DOMAINS = ['4khdhub.fans', 'viralkhabarbull.com', 'taazabull24.com'] # ============================================================================== # --- PROVIDER 1: HDHub4u (hdhub4u.cologne) --- # ============================================================================== def scrape_hdhub4u_list(url): """Scrapes a list of movies from hdhub4u.""" headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} movies_data = [] try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') for item in soup.select('li.thumb'): link_tag = item.find('figcaption').find('a') if item.find('figcaption') else None poster_tag = item.find('img') if not (link_tag and poster_tag): continue title = link_tag.get_text(strip=True) url = link_tag.get('href') poster = poster_tag.get('src') year_match = re.search(r'\((\d{4})\)', title) year = year_match.group(1) if year_match else "N/A" quality_matches = re.findall(r'\b(WEB-DL|WEBDL|HDRip|HD-Rip|HDTC|HDTS|DVDScr|BluRay|BRRip|CAM|CAMRip|HDTV)\b', title, re.IGNORECASE) quality = ' | '.join(sorted(list(set(quality_matches)), reverse=True)) if quality_matches else "N/A" seasons_match = re.search(r'(Season\s*\d+)', title, re.IGNORECASE) seasons_info = seasons_match.group(1) if seasons_match else "N/A" if seasons_info == "N/A" and "Full Series" in title: seasons_info = "Full Series" movies_data.append({ 'title': title, 'poster': poster, 'year': year, 'seasons_info': seasons_info, 'url': url, 'quality': quality }) except requests.exceptions.RequestException as e: print(f"[HDHub4u] Scraping failed for {url}: {e}") return movies_data def scrape_hdhub4u_details(page_url): """Scrapes detailed information and all download links from a hdhub4u page, excluding samples and 480p.""" headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} try: response = requests.get(page_url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') main_content = soup.find('main', class_='page-body') if not main_content: return None title = soup.find('h1', class_='page-title').get_text(strip=True) poster_tag = main_content.select_one('p img.aligncenter') poster = poster_tag['src'] if poster_tag else "N/A" storyline_tag = main_content.find('div', class_='kno-rdesc') storyline = storyline_tag.get_text(strip=True).split('Review :')[0].strip() if storyline_tag else "N/A" screenshots_heading = main_content.find('h2', string=re.compile(r': Screen-Shots :')) screenshots = [] if screenshots_heading: screenshot_container = screenshots_heading.find_next_sibling('h3') if screenshot_container: screenshots = [img['src'] for img in screenshot_container.find_all('img')] batch_links, single_episode_links, current_section, current_episode = [], [], None, None tags = main_content.find_all(['h2', 'h3', 'h4']) for tag in tags: tag_text = tag.get_text(strip=True) if tag.name == 'h2': if ': DOWNLOAD LINKS :' in tag_text: current_section = 'batch' elif ': Single Episode' in tag_text: current_section = 'single' else: if current_episode: single_episode_links.append(current_episode) current_episode = None current_section = None continue if current_section == 'batch' and tag.find('a'): link_tag = tag.find('a') link_text = link_tag.get_text(strip=True).lower() if 'watch' not in link_text and 'sample' not in link_text and '480p' not in link_text: batch_links.append({'text': link_tag.get_text(strip=True), 'url': link_tag['href']}) elif current_section == 'single' and tag.name == 'h4': if 'EPiSODE' in tag_text: if current_episode: single_episode_links.append(current_episode) current_episode = {"episode": tag_text, "download_links": []} elif tag.find('a') and current_episode: quality_tag = tag.find('span', style=re.compile(r'color: #ff0000')) quality = quality_tag.get_text(strip=True).replace('–', '').strip() if quality_tag else 'N/A' if '480p' in quality.lower(): continue links = [] for a in tag.find_all('a'): link_name_lower = a.get_text(strip=True).lower() if 'watch' not in link_name_lower and 'sample' not in link_name_lower: links.append({'name': a.get_text(strip=True), 'url': a['href']}) if links: current_episode['download_links'].append({'quality': quality, 'urls': links}) if current_episode: single_episode_links.append(current_episode) return {'title': title, 'poster': poster, 'storyline': storyline, 'screenshots': screenshots, 'batch_download_links': batch_links, 'single_episode_links': single_episode_links} except Exception as e: print(f"[HDHub4u] An error occurred during detail parsing: {e}") return None # ============================================================================== # --- PROVIDER 2: 4KHDHub (4khdhub.fans) --- # ============================================================================== def scrape_4khdhub_movie_cards(soup): results = [] movie_cards = soup.select('a.movie-card') for card in movie_cards: title = card.select_one('h3.movie-card-title').text.strip() poster = card.select_one('img')['src'] year_and_meta = card.select_one('p.movie-card-meta').text.strip().split('•') year = year_and_meta[0].strip() if year_and_meta else 'N/A' seasons_info = year_and_meta[1].strip() if len(year_and_meta) > 1 else 'N/A' relative_url = card['href'] quality_element = card.select_one('.movie-card-format:last-child') quality = quality_element.text.strip() if quality_element else 'N/A' results.append({ 'title': title, 'poster': poster, 'year': year, 'seasons_info': seasons_info, 'url': relative_url, 'quality': quality }) return results def scrape_4khdhub_details_page(soup): details = {} details['title'] = soup.select_one('h1.page-title').text.strip() if soup.select_one('h1.page-title') else 'N/A' details['tagline'] = soup.select_one('p.movie-tagline').text.strip() if soup.select_one('p.movie-tagline') else '' details['poster'] = soup.select_one('.poster-image img')['src'] if soup.select_one('.poster-image img') else '' details['summary'] = soup.select_one('.content-section > p').text.strip() if soup.select_one('.content-section > p') else '' tags_container = soup.select_one('.mt-2.flex.flex-wrap') details['tags'] = [badge.text.strip() for badge in tags_container.select('.badge')] if tags_container else [] metadata = {} for item in soup.select('.metadata-item'): label = item.select_one('.metadata-label').text.strip().replace(':', '').lower() value = item.select_one('.metadata-value').text.strip() if label and value: metadata[label] = value details['metadata'] = metadata if 'print' in details['metadata']: print_text = details['metadata']['print'] quality_matches = re.findall(r'\b(4K|2160p|1080p|720p|480p|WEB-DL|WEBDL|HDRip|HD-Rip|BluRay|BRRip|HDTV)\b', print_text, re.IGNORECASE) if quality_matches: seen = set() clean_qualities = [] for q in quality_matches: if q.lower() not in seen: clean_qualities.append(q) seen.add(q.lower()) details['metadata']['quality'] = ', '.join(clean_qualities) else: details['metadata']['quality'] = print_text del details['metadata']['print'] details['screenshots'] = [img['src'] for img in soup.select('.ss-img img')] is_series = bool(soup.select_one('.series-tabs')) details['page_type'] = "Series" if is_series else "Movie" download_options = {"season_packs": [], "individual_episodes": [], "movie_files": []} if is_series: pack_tab = soup.select_one('#complete-pack') if pack_tab: for item in pack_tab.select('.download-item'): pack = {} header = item.select_one('.download-header .flex-1') pack['quality_description'] = header.find(text=True, recursive=False).strip() content = item.select_one('[id^="content-file"]') pack['full_filename'] = content.select_one('.file-title').text.strip() if content.select_one('.file-title') else 'N/A' downloads = [{'provider': link.text.strip(), 'url': link['href']} for link in content.select('a.btn')] pack['downloads'] = downloads pack['season_title'] = header.select_one('.season-title').text.strip() if header.select_one('.season-title') else pack['quality_description'] download_options['season_packs'].append(pack) episodes_tab = soup.select_one('#episodes') if episodes_tab: for season_section in episodes_tab.select('.season-item'): season_title = season_section.select_one('.episode-title').text.strip() episodes = [] for episode_item in season_section.select('.episode-download-item'): episode = {} episode['full_filename'] = episode_item.select_one('.episode-file-title').text.strip() downloads = [{'provider': link.text.strip(), 'url': link['href']} for link in episode_item.select('a.btn')] episode['downloads'] = downloads episode['quality_description'] = episode_item.select_one('.episode-file-title').find_previous('h5').text.strip() if episode_item.select_one('.episode-file-title') and episode_item.select_one('.episode-file-title').find_previous('h5') else 'Episode File' episodes.append(episode) download_options['individual_episodes'].append({'season_title': season_title, 'episodes': episodes}) else: for item in soup.select('.download-item'): movie_file = {} header = item.select_one('.download-header .flex-1') movie_file['quality_description'] = header.find(text=True, recursive=False).strip() content = item.select_one('[id^="content-file"]') movie_file['full_filename'] = content.select_one('.file-title').text.strip() if content.select_one('.file-title') else 'N/A' downloads = [{'provider': link.text.strip(), 'url': link['href']} for link in content.select('a.btn')] movie_file['downloads'] = downloads download_options['movie_files'].append(movie_file) details['download_options'] = download_options return details # ============================================================================== # --- PROVIDER 3: MoviesDrive (moviesdrive.mom) --- # ============================================================================== def scrape_moviesdrive_list(url): """Scrapes a list of movies from MoviesDrive.""" headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} movies_data = [] try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') for item in soup.select('li.thumb'): link_tag = item.find('figcaption').find('a') if item.find('figcaption') else None poster_tag = item.find('img') if not (link_tag and poster_tag): continue title = link_tag.get_text(strip=True) url = link_tag.get('href') poster = poster_tag.get('src') year_match = re.search(r'\((\d{4})\)', title) year = year_match.group(1) if year_match else "N/A" quality_matches = re.findall(r'\b(WEB-DL|WEBDL|HDRip|HD-Rip|HDTC|HDTS|DVDScr|BluRay|BRRip|CAM|CAMRip|HDTV|4K|2160p|1080p|720p|480p)\b', title, re.IGNORECASE) quality = ' | '.join(sorted(list(set(quality_matches)), reverse=True)) if quality_matches else "N/A" seasons_match = re.search(r'(Season\s*\d+)', title, re.IGNORECASE) seasons_info = seasons_match.group(1) if seasons_match else "N/A" if seasons_info == "N/A" and ("Series" in title or "S0" in title): seasons_info = "Series" movies_data.append({ 'title': title, 'poster': poster, 'year': year, 'seasons_info': seasons_info, 'url': url, 'quality': quality }) except requests.exceptions.RequestException as e: print(f"[MoviesDrive] Scraping failed for {url}: {e}") return movies_data def scrape_moviesdrive_details(page_url): """Scrapes detailed information from a MoviesDrive page.""" headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} try: response = requests.get(page_url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') main_content = soup.find('main', class_='page-body') if not main_content: return None title = soup.find('h1', class_='page-title').get_text(strip=True) # Improved poster selector poster_tag = main_content.select_one('.entry-content img.aligncenter, .entry-content p > img') poster = poster_tag['src'] if poster_tag else "N/A" all_h3s = main_content.find_all('h3') storyline_h3 = None screenshots_h3 = None for h3 in all_h3s: if 'Storyline' in h3.get_text(): storyline_h3 = h3 elif 'Screen-Shots' in h3.get_text(): screenshots_h3 = h3 storyline = storyline_h3.find_next_sibling('div').get_text(strip=True) if storyline_h3 and storyline_h3.find_next_sibling('div') else "N/A" screenshots = [] if screenshots_h3: screenshot_container = screenshots_h3.find_next_sibling('p') if screenshot_container: screenshots = [img['src'] for img in screenshot_container.find_all('img')] download_options = [] # Find all link tags, which are typically in h5 or p tags for this provider for link_container in main_content.select('.entry-content h5, .entry-content p'): link_tag = link_container.find('a') if link_tag and link_tag.get('href'): # Exclude 480p links and non-download links text_lower = link_container.get_text(strip=True).lower() if '480p' not in text_lower and 'telegram' not in text_lower: download_options.append({ 'text': link_container.get_text(strip=True), 'url': link_tag['href'] }) return { 'title': title, 'poster': poster, 'storyline': storyline, 'screenshots': screenshots, 'download_options': download_options } except Exception as e: print(f"[MoviesDrive] An error occurred during detail parsing: {e}") return None # ============================================================================== # --- SHARED BYPASS & SCRAPING FUNCTIONALITY --- # ============================================================================== def scrape_hblinks_page(page_url): """Scrapes the final cloud links from an hblinks.dad page and groups them.""" headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} groups = [] try: response = requests.get(page_url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') content = soup.select_one('.entry-content') if not content: return [] current_group = None # Iterate over all relevant tags (h3, h5) that might contain titles or links for tag in content.find_all(['h3', 'h5']): tag_text = tag.get_text(strip=True) # Check for links first links_in_tag = [] for a_tag in tag.find_all('a', href=True): href = a_tag.get('href') if href and 't.me' not in href and 'hblinks' not in href: provider = a_tag.get_text(strip=True) or "Download" links_in_tag.append({'provider': provider, 'url': href}) if links_in_tag: # If we find links, we need to decide which group they belong to. # If there's no current group, or the tag text is a title, create a new one. if current_group is None or len(links_in_tag) > 1 or not re.search(r'drive|cloud|instant', tag_text, re.I): group_title = tag_text # Clean up title if it contains the provider name if len(links_in_tag) == 1: group_title = re.sub(r'\[?'+re.escape(links_in_tag[0]['provider'])+r'\]?', '', group_title).strip() current_group = {'quality_title': group_title, 'links': []} groups.append(current_group) current_group['links'].extend(links_in_tag) # If the tag has no links but looks like a title, it's a header for the next links. elif not tag.find('a'): current_group = {'quality_title': tag_text, 'links': []} groups.append(current_group) except requests.exceptions.RequestException as e: print(f"[HBLinks] Scraping failed for {page_url}: {e}") # Clean up any groups that were created but never got links. return [g for g in groups if g['links']] def scrape_mdrive_page(page_url): """Scrapes the final cloud links from an mdrive.today page and groups them by episode.""" headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} groups = [] current_group = None try: response = requests.get(page_url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') content = soup.select_one('.entry-content') if not content: return [] for tag in content.find_all('h5'): tag_text = tag.get_text(strip=True) # Check if it's an episode/quality header. This marks the start of a new group. if re.search(r'Ep\d+|Season\s\d+', tag_text, re.IGNORECASE) or re.search(r'\d{3,4}p', tag_text): current_group = { 'quality_title': tag_text, 'links': [] } groups.append(current_group) # Check for links within this tag and add them to the current group links_in_tag = tag.find_all('a', href=True) if links_in_tag and current_group: for a_tag in links_in_tag: href = a_tag.get('href') provider = a_tag.get_text(strip=True) if href and provider and 't.me' not in href and 'moviesdrive' not in href: current_group['links'].append({'provider': provider, 'url': href}) except requests.exceptions.RequestException as e: print(f"[MDrive Page] Scraping failed for {page_url}: {e}") # Clean up empty groups that might have been created return [g for g in groups if g.get('links')] def rot13(s): result = [] for char in s: char_code = ord(char) if 65 <= char_code <= 90: new_code = (char_code - 65 + 13) % 26 + 65 elif 97 <= char_code <= 122: new_code = (char_code - 97 + 13) % 26 + 97 else: new_code = char_code result.append(chr(new_code)) return "".join(result) def decode_string(encrypted_string): try: decoded = base64.b64decode(encrypted_string) decoded = base64.b64decode(decoded) decoded = rot13(decoded.decode('utf-8')) decoded = base64.b64decode(decoded) return json.loads(decoded) except Exception: return None def get_final_link(url): headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Referer': 'https://4khdhub.fans/'} try: with requests.Session() as s: res = s.get(url, headers=headers, timeout=15); res.raise_for_status() soup = BeautifulSoup(res.text, 'html.parser') encrypted_string = None for script in soup.find_all('script'): if script.string and "s('o','" in script.string: match = re.search(r"s\('o','([^']+)'", script.string) if match: encrypted_string = match.group(1); break if not encrypted_string: return None decoded_data = decode_string(encrypted_string) if not decoded_data or 'o' not in decoded_data: return None return base64.b64decode(decoded_data.get('o')).decode('utf-8') except Exception as e: print(f"An error occurred during bypass: {e}") return None # ============================================================================== # --- FLASK ROUTES (API Endpoints) --- # ============================================================================== @app.route('/') def index(): return jsonify({"status": "ok", "message": "Welcome to the SanchitFlix Combined API"}) # --- HDHub4u Routes --- @app.route('/hdhub4u/home', methods=['GET']) def hdhub4u_home(): return jsonify(scrape_hdhub4u_list("https://hdhub4u.cologne/")) @app.route('/hdhub4u/page/', methods=['GET']) def hdhub4u_page(page_num): return jsonify(scrape_hdhub4u_list(f"https://hdhub4u.cologne/page/{page_num}/")) @app.route('/hdhub4u/search', methods=['GET']) def hdhub4u_search(): query = request.args.get('q') if not query: return jsonify({"error": "A search query 'q' is required."}), 400 return jsonify(scrape_hdhub4u_list(f"https://hdhub4u.cologne/?s={quote(query)}")) @app.route('/hdhub4u/details', methods=['GET']) def hdhub4u_details(): page_url = request.args.get('url') if not page_url: return jsonify({"error": "A 'url' parameter is required."}), 400 details = scrape_hdhub4u_details(unquote(page_url)) return jsonify(details) if details else (jsonify({"error": "Failed to retrieve details."}), 500) # --- 4KHDHub Routes --- @app.route('/4khdhub/home') def home_4khd(): try: res = requests.get("https://4khdhub.fans", timeout=10); res.raise_for_status() return jsonify({"results": scrape_4khdhub_movie_cards(BeautifulSoup(res.text, 'html.parser'))}) except requests.exceptions.RequestException as e: return jsonify({"error": str(e)}), 500 @app.route('/4khdhub/page/') def page_4khd(page_num): try: res = requests.get(f"https://4khdhub.fans/page/{page_num}.html", timeout=10); res.raise_for_status() return jsonify({"results": scrape_4khdhub_movie_cards(BeautifulSoup(res.text, 'html.parser'))}) except requests.exceptions.RequestException as e: return jsonify({"error": str(e)}), 500 @app.route('/4khdhub/search') def search_4khd(): query = request.args.get('query') page = request.args.get('page', 1, type=int) if not query: return jsonify({"results": []}) url = f"https://4khdhub.fans/page/{page}.html?s={quote(query)}" if page > 1 else f"https://4khdhub.fans/?s={quote(query)}" try: res = requests.get(url, timeout=10); res.raise_for_status() return jsonify({"results": scrape_4khdhub_movie_cards(BeautifulSoup(res.text, 'html.parser')), "page": page, "query": query}) except requests.exceptions.RequestException as e: return jsonify({"error": str(e)}), 500 @app.route('/4khdhub/details') def details_4khd(): url = request.args.get('url') if not url: return jsonify({"error": "URL parameter is required"}), 400 try: res = requests.get(unquote(url), timeout=15); res.raise_for_status() return jsonify(scrape_4khdhub_details_page(BeautifulSoup(res.text, 'html.parser'))) except requests.exceptions.RequestException as e: return jsonify({"error": str(e)}), 500 # --- MoviesDrive Routes --- @app.route('/moviesdrive/home', methods=['GET']) def moviesdrive_home(): return jsonify(scrape_moviesdrive_list("https://moviesdrive.mom/")) @app.route('/moviesdrive/page/', methods=['GET']) def moviesdrive_page(page_num): return jsonify(scrape_moviesdrive_list(f"https://moviesdrive.mom/page/{page_num}/")) @app.route('/moviesdrive/search', methods=['GET']) def moviesdrive_search(): query = request.args.get('s') if not query: return jsonify({"error": "A search query 's' is required."}), 400 return jsonify(scrape_moviesdrive_list(f"https://moviesdrive.mom/?s={quote(query)}")) @app.route('/moviesdrive/details', methods=['GET']) def moviesdrive_details(): page_url = request.args.get('url') if not page_url: return jsonify({"error": "A 'url' parameter is required."}), 400 details = scrape_moviesdrive_details(unquote(page_url)) return jsonify(details) if details else (jsonify({"error": "Failed to retrieve details."}), 500) # --- Universal Bypass Route --- @app.route('/bypass') def bypass(): url_to_bypass = request.args.get('url') if not url_to_bypass: return jsonify({"error": "A 'url' parameter is required."}), 400 decoded_url = unquote(url_to_bypass) current_url = decoded_url # First level bypass (e.g., viralkhabarbull) if any(domain in current_url for domain in BYPASS_DOMAINS): print(f"Bypass required for: '{current_url}'") bypassed_url = get_final_link(current_url) if not bypassed_url: return jsonify({"error": "Failed to perform initial bypass"}), 500 current_url = bypassed_url # Second level bypass for specific link hosts if 'hblinks.dad' in current_url: print(f"Secondary hblinks bypass required for: '{current_url}'") groups = scrape_hblinks_page(current_url) if groups: return jsonify({"download_groups": groups}) else: return jsonify({"error": "Failed to scrape final links from hblinks."}), 500 elif 'mdrive.today' in current_url: print(f"Secondary mdrive bypass required for: '{current_url}'") groups = scrape_mdrive_page(current_url) if groups: return jsonify({"download_groups": groups}) else: return jsonify({"error": "Failed to scrape final links from mdrive."}), 500 # If no secondary bypass was needed, it's the final URL print(f"Direct link after potential first bypass: '{current_url}'") return jsonify({"final_url": current_url})