flix / app.py
sanch1tx's picture
Update app.py
840863b verified
from flask import Flask, jsonify, request
from flask_cors import CORS
import requests
import re
import base64
import json
from bs4 import BeautifulSoup
from urllib.parse import quote, unquote, urljoin, urlparse
app = Flask(__name__)
CORS(app)
# A list of domains that require the first level of bypass logic
BYPASS_DOMAINS = ['4khdhub.fans', 'viralkhabarbull.com', 'taazabull24.com']
# ==============================================================================
# --- PROVIDER 1: HDHub4u (hdhub4u.cologne) ---
# ==============================================================================
def scrape_hdhub4u_list(url):
"""Scrapes a list of movies from hdhub4u."""
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
movies_data = []
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
for item in soup.select('li.thumb'):
link_tag = item.find('figcaption').find('a') if item.find('figcaption') else None
poster_tag = item.find('img')
if not (link_tag and poster_tag):
continue
title = link_tag.get_text(strip=True)
url = link_tag.get('href')
poster = poster_tag.get('src')
year_match = re.search(r'\((\d{4})\)', title)
year = year_match.group(1) if year_match else "N/A"
quality_matches = re.findall(r'\b(WEB-DL|WEBDL|HDRip|HD-Rip|HDTC|HDTS|DVDScr|BluRay|BRRip|CAM|CAMRip|HDTV)\b', title, re.IGNORECASE)
quality = ' | '.join(sorted(list(set(quality_matches)), reverse=True)) if quality_matches else "N/A"
seasons_match = re.search(r'(Season\s*\d+)', title, re.IGNORECASE)
seasons_info = seasons_match.group(1) if seasons_match else "N/A"
if seasons_info == "N/A" and "Full Series" in title:
seasons_info = "Full Series"
movies_data.append({
'title': title, 'poster': poster, 'year': year,
'seasons_info': seasons_info, 'url': url, 'quality': quality
})
except requests.exceptions.RequestException as e:
print(f"[HDHub4u] Scraping failed for {url}: {e}")
return movies_data
def scrape_hdhub4u_details(page_url):
"""Scrapes detailed information and all download links from a hdhub4u page, excluding samples and 480p."""
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
try:
response = requests.get(page_url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
main_content = soup.find('main', class_='page-body')
if not main_content: return None
title = soup.find('h1', class_='page-title').get_text(strip=True)
poster_tag = main_content.select_one('p img.aligncenter')
poster = poster_tag['src'] if poster_tag else "N/A"
storyline_tag = main_content.find('div', class_='kno-rdesc')
storyline = storyline_tag.get_text(strip=True).split('Review :')[0].strip() if storyline_tag else "N/A"
screenshots_heading = main_content.find('h2', string=re.compile(r': Screen-Shots :'))
screenshots = []
if screenshots_heading:
screenshot_container = screenshots_heading.find_next_sibling('h3')
if screenshot_container:
screenshots = [img['src'] for img in screenshot_container.find_all('img')]
batch_links, single_episode_links, current_section, current_episode = [], [], None, None
tags = main_content.find_all(['h2', 'h3', 'h4'])
for tag in tags:
tag_text = tag.get_text(strip=True)
if tag.name == 'h2':
if ': DOWNLOAD LINKS :' in tag_text: current_section = 'batch'
elif ': Single Episode' in tag_text: current_section = 'single'
else:
if current_episode:
single_episode_links.append(current_episode)
current_episode = None
current_section = None
continue
if current_section == 'batch' and tag.find('a'):
link_tag = tag.find('a')
link_text = link_tag.get_text(strip=True).lower()
if 'watch' not in link_text and 'sample' not in link_text and '480p' not in link_text:
batch_links.append({'text': link_tag.get_text(strip=True), 'url': link_tag['href']})
elif current_section == 'single' and tag.name == 'h4':
if 'EPiSODE' in tag_text:
if current_episode: single_episode_links.append(current_episode)
current_episode = {"episode": tag_text, "download_links": []}
elif tag.find('a') and current_episode:
quality_tag = tag.find('span', style=re.compile(r'color: #ff0000'))
quality = quality_tag.get_text(strip=True).replace('–', '').strip() if quality_tag else 'N/A'
if '480p' in quality.lower():
continue
links = []
for a in tag.find_all('a'):
link_name_lower = a.get_text(strip=True).lower()
if 'watch' not in link_name_lower and 'sample' not in link_name_lower:
links.append({'name': a.get_text(strip=True), 'url': a['href']})
if links:
current_episode['download_links'].append({'quality': quality, 'urls': links})
if current_episode:
single_episode_links.append(current_episode)
return {'title': title, 'poster': poster, 'storyline': storyline, 'screenshots': screenshots, 'batch_download_links': batch_links, 'single_episode_links': single_episode_links}
except Exception as e:
print(f"[HDHub4u] An error occurred during detail parsing: {e}")
return None
# ==============================================================================
# --- PROVIDER 2: 4KHDHub (4khdhub.fans) ---
# ==============================================================================
def scrape_4khdhub_movie_cards(soup):
results = []
movie_cards = soup.select('a.movie-card')
for card in movie_cards:
title = card.select_one('h3.movie-card-title').text.strip()
poster = card.select_one('img')['src']
year_and_meta = card.select_one('p.movie-card-meta').text.strip().split('•')
year = year_and_meta[0].strip() if year_and_meta else 'N/A'
seasons_info = year_and_meta[1].strip() if len(year_and_meta) > 1 else 'N/A'
relative_url = card['href']
quality_element = card.select_one('.movie-card-format:last-child')
quality = quality_element.text.strip() if quality_element else 'N/A'
results.append({
'title': title, 'poster': poster, 'year': year,
'seasons_info': seasons_info, 'url': relative_url, 'quality': quality
})
return results
def scrape_4khdhub_details_page(soup):
details = {}
details['title'] = soup.select_one('h1.page-title').text.strip() if soup.select_one('h1.page-title') else 'N/A'
details['tagline'] = soup.select_one('p.movie-tagline').text.strip() if soup.select_one('p.movie-tagline') else ''
details['poster'] = soup.select_one('.poster-image img')['src'] if soup.select_one('.poster-image img') else ''
details['summary'] = soup.select_one('.content-section > p').text.strip() if soup.select_one('.content-section > p') else ''
tags_container = soup.select_one('.mt-2.flex.flex-wrap')
details['tags'] = [badge.text.strip() for badge in tags_container.select('.badge')] if tags_container else []
metadata = {}
for item in soup.select('.metadata-item'):
label = item.select_one('.metadata-label').text.strip().replace(':', '').lower()
value = item.select_one('.metadata-value').text.strip()
if label and value: metadata[label] = value
details['metadata'] = metadata
if 'print' in details['metadata']:
print_text = details['metadata']['print']
quality_matches = re.findall(r'\b(4K|2160p|1080p|720p|480p|WEB-DL|WEBDL|HDRip|HD-Rip|BluRay|BRRip|HDTV)\b', print_text, re.IGNORECASE)
if quality_matches:
seen = set()
clean_qualities = []
for q in quality_matches:
if q.lower() not in seen:
clean_qualities.append(q)
seen.add(q.lower())
details['metadata']['quality'] = ', '.join(clean_qualities)
else:
details['metadata']['quality'] = print_text
del details['metadata']['print']
details['screenshots'] = [img['src'] for img in soup.select('.ss-img img')]
is_series = bool(soup.select_one('.series-tabs'))
details['page_type'] = "Series" if is_series else "Movie"
download_options = {"season_packs": [], "individual_episodes": [], "movie_files": []}
if is_series:
pack_tab = soup.select_one('#complete-pack')
if pack_tab:
for item in pack_tab.select('.download-item'):
pack = {}
header = item.select_one('.download-header .flex-1')
pack['quality_description'] = header.find(text=True, recursive=False).strip()
content = item.select_one('[id^="content-file"]')
pack['full_filename'] = content.select_one('.file-title').text.strip() if content.select_one('.file-title') else 'N/A'
downloads = [{'provider': link.text.strip(), 'url': link['href']} for link in content.select('a.btn')]
pack['downloads'] = downloads
pack['season_title'] = header.select_one('.season-title').text.strip() if header.select_one('.season-title') else pack['quality_description']
download_options['season_packs'].append(pack)
episodes_tab = soup.select_one('#episodes')
if episodes_tab:
for season_section in episodes_tab.select('.season-item'):
season_title = season_section.select_one('.episode-title').text.strip()
episodes = []
for episode_item in season_section.select('.episode-download-item'):
episode = {}
episode['full_filename'] = episode_item.select_one('.episode-file-title').text.strip()
downloads = [{'provider': link.text.strip(), 'url': link['href']} for link in episode_item.select('a.btn')]
episode['downloads'] = downloads
episode['quality_description'] = episode_item.select_one('.episode-file-title').find_previous('h5').text.strip() if episode_item.select_one('.episode-file-title') and episode_item.select_one('.episode-file-title').find_previous('h5') else 'Episode File'
episodes.append(episode)
download_options['individual_episodes'].append({'season_title': season_title, 'episodes': episodes})
else:
for item in soup.select('.download-item'):
movie_file = {}
header = item.select_one('.download-header .flex-1')
movie_file['quality_description'] = header.find(text=True, recursive=False).strip()
content = item.select_one('[id^="content-file"]')
movie_file['full_filename'] = content.select_one('.file-title').text.strip() if content.select_one('.file-title') else 'N/A'
downloads = [{'provider': link.text.strip(), 'url': link['href']} for link in content.select('a.btn')]
movie_file['downloads'] = downloads
download_options['movie_files'].append(movie_file)
details['download_options'] = download_options
return details
# ==============================================================================
# --- PROVIDER 3: MoviesDrive (moviesdrive.mom) ---
# ==============================================================================
def scrape_moviesdrive_list(url):
"""Scrapes a list of movies from MoviesDrive."""
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
movies_data = []
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
for item in soup.select('li.thumb'):
link_tag = item.find('figcaption').find('a') if item.find('figcaption') else None
poster_tag = item.find('img')
if not (link_tag and poster_tag):
continue
title = link_tag.get_text(strip=True)
url = link_tag.get('href')
poster = poster_tag.get('src')
year_match = re.search(r'\((\d{4})\)', title)
year = year_match.group(1) if year_match else "N/A"
quality_matches = re.findall(r'\b(WEB-DL|WEBDL|HDRip|HD-Rip|HDTC|HDTS|DVDScr|BluRay|BRRip|CAM|CAMRip|HDTV|4K|2160p|1080p|720p|480p)\b', title, re.IGNORECASE)
quality = ' | '.join(sorted(list(set(quality_matches)), reverse=True)) if quality_matches else "N/A"
seasons_match = re.search(r'(Season\s*\d+)', title, re.IGNORECASE)
seasons_info = seasons_match.group(1) if seasons_match else "N/A"
if seasons_info == "N/A" and ("Series" in title or "S0" in title):
seasons_info = "Series"
movies_data.append({
'title': title, 'poster': poster, 'year': year,
'seasons_info': seasons_info, 'url': url, 'quality': quality
})
except requests.exceptions.RequestException as e:
print(f"[MoviesDrive] Scraping failed for {url}: {e}")
return movies_data
def scrape_moviesdrive_details(page_url):
"""Scrapes detailed information from a MoviesDrive page."""
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
try:
response = requests.get(page_url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
main_content = soup.find('main', class_='page-body')
if not main_content: return None
title = soup.find('h1', class_='page-title').get_text(strip=True)
# Improved poster selector
poster_tag = main_content.select_one('.entry-content img.aligncenter, .entry-content p > img')
poster = poster_tag['src'] if poster_tag else "N/A"
all_h3s = main_content.find_all('h3')
storyline_h3 = None
screenshots_h3 = None
for h3 in all_h3s:
if 'Storyline' in h3.get_text():
storyline_h3 = h3
elif 'Screen-Shots' in h3.get_text():
screenshots_h3 = h3
storyline = storyline_h3.find_next_sibling('div').get_text(strip=True) if storyline_h3 and storyline_h3.find_next_sibling('div') else "N/A"
screenshots = []
if screenshots_h3:
screenshot_container = screenshots_h3.find_next_sibling('p')
if screenshot_container:
screenshots = [img['src'] for img in screenshot_container.find_all('img')]
download_options = []
# Find all link tags, which are typically in h5 or p tags for this provider
for link_container in main_content.select('.entry-content h5, .entry-content p'):
link_tag = link_container.find('a')
if link_tag and link_tag.get('href'):
# Exclude 480p links and non-download links
text_lower = link_container.get_text(strip=True).lower()
if '480p' not in text_lower and 'telegram' not in text_lower:
download_options.append({
'text': link_container.get_text(strip=True),
'url': link_tag['href']
})
return {
'title': title, 'poster': poster, 'storyline': storyline,
'screenshots': screenshots, 'download_options': download_options
}
except Exception as e:
print(f"[MoviesDrive] An error occurred during detail parsing: {e}")
return None
# ==============================================================================
# --- SHARED BYPASS & SCRAPING FUNCTIONALITY ---
# ==============================================================================
def scrape_hblinks_page(page_url):
"""Scrapes the final cloud links from an hblinks.dad page and groups them."""
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
groups = []
try:
response = requests.get(page_url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
content = soup.select_one('.entry-content')
if not content:
return []
current_group = None
# Iterate over all relevant tags (h3, h5) that might contain titles or links
for tag in content.find_all(['h3', 'h5']):
tag_text = tag.get_text(strip=True)
# Check for links first
links_in_tag = []
for a_tag in tag.find_all('a', href=True):
href = a_tag.get('href')
if href and 't.me' not in href and 'hblinks' not in href:
provider = a_tag.get_text(strip=True) or "Download"
links_in_tag.append({'provider': provider, 'url': href})
if links_in_tag:
# If we find links, we need to decide which group they belong to.
# If there's no current group, or the tag text is a title, create a new one.
if current_group is None or len(links_in_tag) > 1 or not re.search(r'drive|cloud|instant', tag_text, re.I):
group_title = tag_text
# Clean up title if it contains the provider name
if len(links_in_tag) == 1:
group_title = re.sub(r'\[?'+re.escape(links_in_tag[0]['provider'])+r'\]?', '', group_title).strip()
current_group = {'quality_title': group_title, 'links': []}
groups.append(current_group)
current_group['links'].extend(links_in_tag)
# If the tag has no links but looks like a title, it's a header for the next links.
elif not tag.find('a'):
current_group = {'quality_title': tag_text, 'links': []}
groups.append(current_group)
except requests.exceptions.RequestException as e:
print(f"[HBLinks] Scraping failed for {page_url}: {e}")
# Clean up any groups that were created but never got links.
return [g for g in groups if g['links']]
def scrape_mdrive_page(page_url):
"""Scrapes the final cloud links from an mdrive.today page and groups them by episode."""
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
groups = []
current_group = None
try:
response = requests.get(page_url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
content = soup.select_one('.entry-content')
if not content:
return []
for tag in content.find_all('h5'):
tag_text = tag.get_text(strip=True)
# Check if it's an episode/quality header. This marks the start of a new group.
if re.search(r'Ep\d+|Season\s\d+', tag_text, re.IGNORECASE) or re.search(r'\d{3,4}p', tag_text):
current_group = {
'quality_title': tag_text,
'links': []
}
groups.append(current_group)
# Check for links within this tag and add them to the current group
links_in_tag = tag.find_all('a', href=True)
if links_in_tag and current_group:
for a_tag in links_in_tag:
href = a_tag.get('href')
provider = a_tag.get_text(strip=True)
if href and provider and 't.me' not in href and 'moviesdrive' not in href:
current_group['links'].append({'provider': provider, 'url': href})
except requests.exceptions.RequestException as e:
print(f"[MDrive Page] Scraping failed for {page_url}: {e}")
# Clean up empty groups that might have been created
return [g for g in groups if g.get('links')]
def rot13(s):
result = []
for char in s:
char_code = ord(char)
if 65 <= char_code <= 90: new_code = (char_code - 65 + 13) % 26 + 65
elif 97 <= char_code <= 122: new_code = (char_code - 97 + 13) % 26 + 97
else: new_code = char_code
result.append(chr(new_code))
return "".join(result)
def decode_string(encrypted_string):
try:
decoded = base64.b64decode(encrypted_string)
decoded = base64.b64decode(decoded)
decoded = rot13(decoded.decode('utf-8'))
decoded = base64.b64decode(decoded)
return json.loads(decoded)
except Exception: return None
def get_final_link(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Referer': 'https://4khdhub.fans/'}
try:
with requests.Session() as s:
res = s.get(url, headers=headers, timeout=15); res.raise_for_status()
soup = BeautifulSoup(res.text, 'html.parser')
encrypted_string = None
for script in soup.find_all('script'):
if script.string and "s('o','" in script.string:
match = re.search(r"s\('o','([^']+)'", script.string)
if match: encrypted_string = match.group(1); break
if not encrypted_string: return None
decoded_data = decode_string(encrypted_string)
if not decoded_data or 'o' not in decoded_data: return None
return base64.b64decode(decoded_data.get('o')).decode('utf-8')
except Exception as e:
print(f"An error occurred during bypass: {e}")
return None
# ==============================================================================
# --- FLASK ROUTES (API Endpoints) ---
# ==============================================================================
@app.route('/')
def index():
return jsonify({"status": "ok", "message": "Welcome to the SanchitFlix Combined API"})
# --- HDHub4u Routes ---
@app.route('/hdhub4u/home', methods=['GET'])
def hdhub4u_home():
return jsonify(scrape_hdhub4u_list("https://hdhub4u.cologne/"))
@app.route('/hdhub4u/page/<int:page_num>', methods=['GET'])
def hdhub4u_page(page_num):
return jsonify(scrape_hdhub4u_list(f"https://hdhub4u.cologne/page/{page_num}/"))
@app.route('/hdhub4u/search', methods=['GET'])
def hdhub4u_search():
query = request.args.get('q')
if not query: return jsonify({"error": "A search query 'q' is required."}), 400
return jsonify(scrape_hdhub4u_list(f"https://hdhub4u.cologne/?s={quote(query)}"))
@app.route('/hdhub4u/details', methods=['GET'])
def hdhub4u_details():
page_url = request.args.get('url')
if not page_url: return jsonify({"error": "A 'url' parameter is required."}), 400
details = scrape_hdhub4u_details(unquote(page_url))
return jsonify(details) if details else (jsonify({"error": "Failed to retrieve details."}), 500)
# --- 4KHDHub Routes ---
@app.route('/4khdhub/home')
def home_4khd():
try:
res = requests.get("https://4khdhub.fans", timeout=10); res.raise_for_status()
return jsonify({"results": scrape_4khdhub_movie_cards(BeautifulSoup(res.text, 'html.parser'))})
except requests.exceptions.RequestException as e: return jsonify({"error": str(e)}), 500
@app.route('/4khdhub/page/<int:page_num>')
def page_4khd(page_num):
try:
res = requests.get(f"https://4khdhub.fans/page/{page_num}.html", timeout=10); res.raise_for_status()
return jsonify({"results": scrape_4khdhub_movie_cards(BeautifulSoup(res.text, 'html.parser'))})
except requests.exceptions.RequestException as e: return jsonify({"error": str(e)}), 500
@app.route('/4khdhub/search')
def search_4khd():
query = request.args.get('query')
page = request.args.get('page', 1, type=int)
if not query: return jsonify({"results": []})
url = f"https://4khdhub.fans/page/{page}.html?s={quote(query)}" if page > 1 else f"https://4khdhub.fans/?s={quote(query)}"
try:
res = requests.get(url, timeout=10); res.raise_for_status()
return jsonify({"results": scrape_4khdhub_movie_cards(BeautifulSoup(res.text, 'html.parser')), "page": page, "query": query})
except requests.exceptions.RequestException as e: return jsonify({"error": str(e)}), 500
@app.route('/4khdhub/details')
def details_4khd():
url = request.args.get('url')
if not url: return jsonify({"error": "URL parameter is required"}), 400
try:
res = requests.get(unquote(url), timeout=15); res.raise_for_status()
return jsonify(scrape_4khdhub_details_page(BeautifulSoup(res.text, 'html.parser')))
except requests.exceptions.RequestException as e: return jsonify({"error": str(e)}), 500
# --- MoviesDrive Routes ---
@app.route('/moviesdrive/home', methods=['GET'])
def moviesdrive_home():
return jsonify(scrape_moviesdrive_list("https://moviesdrive.mom/"))
@app.route('/moviesdrive/page/<int:page_num>', methods=['GET'])
def moviesdrive_page(page_num):
return jsonify(scrape_moviesdrive_list(f"https://moviesdrive.mom/page/{page_num}/"))
@app.route('/moviesdrive/search', methods=['GET'])
def moviesdrive_search():
query = request.args.get('s')
if not query: return jsonify({"error": "A search query 's' is required."}), 400
return jsonify(scrape_moviesdrive_list(f"https://moviesdrive.mom/?s={quote(query)}"))
@app.route('/moviesdrive/details', methods=['GET'])
def moviesdrive_details():
page_url = request.args.get('url')
if not page_url: return jsonify({"error": "A 'url' parameter is required."}), 400
details = scrape_moviesdrive_details(unquote(page_url))
return jsonify(details) if details else (jsonify({"error": "Failed to retrieve details."}), 500)
# --- Universal Bypass Route ---
@app.route('/bypass')
def bypass():
url_to_bypass = request.args.get('url')
if not url_to_bypass:
return jsonify({"error": "A 'url' parameter is required."}), 400
decoded_url = unquote(url_to_bypass)
current_url = decoded_url
# First level bypass (e.g., viralkhabarbull)
if any(domain in current_url for domain in BYPASS_DOMAINS):
print(f"Bypass required for: '{current_url}'")
bypassed_url = get_final_link(current_url)
if not bypassed_url:
return jsonify({"error": "Failed to perform initial bypass"}), 500
current_url = bypassed_url
# Second level bypass for specific link hosts
if 'hblinks.dad' in current_url:
print(f"Secondary hblinks bypass required for: '{current_url}'")
groups = scrape_hblinks_page(current_url)
if groups:
return jsonify({"download_groups": groups})
else:
return jsonify({"error": "Failed to scrape final links from hblinks."}), 500
elif 'mdrive.today' in current_url:
print(f"Secondary mdrive bypass required for: '{current_url}'")
groups = scrape_mdrive_page(current_url)
if groups:
return jsonify({"download_groups": groups})
else:
return jsonify({"error": "Failed to scrape final links from mdrive."}), 500
# If no secondary bypass was needed, it's the final URL
print(f"Direct link after potential first bypass: '{current_url}'")
return jsonify({"final_url": current_url})