Spaces:

sanch1tx
/

flix

Paused

App Files Files Community

sanch1tx commited on Oct 5, 2025

Commit

840863b

verified ·

1 Parent(s): 1fd4ce4

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -33

app.py CHANGED Viewed

@@ -278,14 +278,20 @@ def scrape_moviesdrive_details(page_url):
         if not main_content: return None
         title = soup.find('h1', class_='page-title').get_text(strip=True)
         poster_tag = main_content.select_one('.entry-content img.aligncenter, .entry-content p > img')
         poster = poster_tag['src'] if poster_tag else "N/A"
         all_h3s = main_content.find_all('h3')
-        storyline_h3, screenshots_h3 = None, None
         for h3 in all_h3s:
-            if 'Storyline' in h3.get_text(): storyline_h3 = h3
-            elif 'Screen-Shots' in h3.get_text(): screenshots_h3 = h3
         storyline = storyline_h3.find_next_sibling('div').get_text(strip=True) if storyline_h3 and storyline_h3.find_next_sibling('div') else "N/A"
@@ -296,9 +302,11 @@ def scrape_moviesdrive_details(page_url):
                 screenshots = [img['src'] for img in screenshot_container.find_all('img')]
         download_options = []
         for link_container in main_content.select('.entry-content h5, .entry-content p'):
             link_tag = link_container.find('a')
             if link_tag and link_tag.get('href'):
                 text_lower = link_container.get_text(strip=True).lower()
                 if '480p' not in text_lower and 'telegram' not in text_lower:
                     download_options.append({
@@ -306,7 +314,11 @@ def scrape_moviesdrive_details(page_url):
                         'url': link_tag['href']
                     })
-        return {'title': title, 'poster': poster, 'storyline': storyline, 'screenshots': screenshots, 'download_options': download_options}
     except Exception as e:
         print(f"[MoviesDrive] An error occurred during detail parsing: {e}")
     return None
@@ -323,66 +335,90 @@ def scrape_hblinks_page(page_url):
         response = requests.get(page_url, headers=headers, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.content, 'html.parser')
         content = soup.select_one('.entry-content')
-        if not content: return []
         current_group = None
         for tag in content.find_all(['h3', 'h5']):
             tag_text = tag.get_text(strip=True)
-            links_in_tag = [{'provider': a.get_text(strip=True) or "Download", 'url': a['href']} for a in tag.find_all('a', href=True) if a.get('href') and 't.me' not in a['href']]
             if links_in_tag:
                 if current_group is None or len(links_in_tag) > 1 or not re.search(r'drive|cloud|instant', tag_text, re.I):
                     group_title = tag_text
                     if len(links_in_tag) == 1:
                         group_title = re.sub(r'\[?'+re.escape(links_in_tag[0]['provider'])+r'\]?', '', group_title).strip()
-                    current_group = {'quality_title': group_title or "Links", 'links': []}
                     groups.append(current_group)
                 current_group['links'].extend(links_in_tag)
             elif not tag.find('a'):
                 current_group = {'quality_title': tag_text, 'links': []}
                 groups.append(current_group)
     except requests.exceptions.RequestException as e:
         print(f"[HBLinks] Scraping failed for {page_url}: {e}")
     return [g for g in groups if g['links']]
 def scrape_mdrive_page(page_url):
-    """REWRITTEN: Scrapes final cloud links from an mdrive.today page, correctly grouping by episode title."""
     headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
     groups = []
     try:
         response = requests.get(page_url, headers=headers, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.content, 'html.parser')
         content = soup.select_one('.entry-content')
-        if not content: return []
-        current_title = "Download Links"
-        current_links = []
-        for element in content.find_all(['h5', 'hr']):
-            if element.name == 'h5':
-                if not element.find('a'): # This is a title tag
-                    if current_links: # Save the previous group
-                        groups.append({'quality_title': current_title, 'links': current_links})
-                        current_links = []
-                    current_title = element.get_text(strip=True)
-                else: # This is a link tag
-                    for a_tag in element.find_all('a', href=True):
-                        href = a_tag.get('href')
-                        provider = a_tag.get_text(strip=True)
-                        if href and provider and 't.me' not in href and 'moviesdrive' not in href:
-                            current_links.append({'provider': provider, 'url': href})
-        if current_links: # Append the last collected group
-            groups.append({'quality_title': current_title, 'links': current_links})
     except requests.exceptions.RequestException as e:
         print(f"[MDrive Page] Scraping failed for {page_url}: {e}")
     return [g for g in groups if g.get('links')]
 def rot13(s):
     result = []
     for char in s:
@@ -529,14 +565,19 @@ def bypass():
     if 'hblinks.dad' in current_url:
         print(f"Secondary hblinks bypass required for: '{current_url}'")
         groups = scrape_hblinks_page(current_url)
-        return jsonify({"download_groups": groups}) if groups else (jsonify({"error": "Failed to scrape final links from hblinks."}), 500)
     elif 'mdrive.today' in current_url:
         print(f"Secondary mdrive bypass required for: '{current_url}'")
         groups = scrape_mdrive_page(current_url)
-        return jsonify({"download_groups": groups}) if groups else (jsonify({"error": "Failed to scrape final links from mdrive."}), 500)
     # If no secondary bypass was needed, it's the final URL
     print(f"Direct link after potential first bypass: '{current_url}'")
-    return jsonify({"final_url": current_url})

         if not main_content: return None
         title = soup.find('h1', class_='page-title').get_text(strip=True)
+        # Improved poster selector
         poster_tag = main_content.select_one('.entry-content img.aligncenter, .entry-content p > img')
         poster = poster_tag['src'] if poster_tag else "N/A"
         all_h3s = main_content.find_all('h3')
+        storyline_h3 = None
+        screenshots_h3 = None
         for h3 in all_h3s:
+            if 'Storyline' in h3.get_text():
+                storyline_h3 = h3
+            elif 'Screen-Shots' in h3.get_text():
+                screenshots_h3 = h3
         storyline = storyline_h3.find_next_sibling('div').get_text(strip=True) if storyline_h3 and storyline_h3.find_next_sibling('div') else "N/A"
                 screenshots = [img['src'] for img in screenshot_container.find_all('img')]
         download_options = []
+        # Find all link tags, which are typically in h5 or p tags for this provider
         for link_container in main_content.select('.entry-content h5, .entry-content p'):
             link_tag = link_container.find('a')
             if link_tag and link_tag.get('href'):
+                # Exclude 480p links and non-download links
                 text_lower = link_container.get_text(strip=True).lower()
                 if '480p' not in text_lower and 'telegram' not in text_lower:
                     download_options.append({
                         'url': link_tag['href']
                     })
+        return {
+            'title': title, 'poster': poster, 'storyline': storyline,
+            'screenshots': screenshots, 'download_options': download_options
+        }
     except Exception as e:
         print(f"[MoviesDrive] An error occurred during detail parsing: {e}")
     return None
         response = requests.get(page_url, headers=headers, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.content, 'html.parser')
         content = soup.select_one('.entry-content')
+        if not content:
+            return []
         current_group = None
+        # Iterate over all relevant tags (h3, h5) that might contain titles or links
         for tag in content.find_all(['h3', 'h5']):
             tag_text = tag.get_text(strip=True)
+            # Check for links first
+            links_in_tag = []
+            for a_tag in tag.find_all('a', href=True):
+                href = a_tag.get('href')
+                if href and 't.me' not in href and 'hblinks' not in href:
+                    provider = a_tag.get_text(strip=True) or "Download"
+                    links_in_tag.append({'provider': provider, 'url': href})
             if links_in_tag:
+                # If we find links, we need to decide which group they belong to.
+                # If there's no current group, or the tag text is a title, create a new one.
                 if current_group is None or len(links_in_tag) > 1 or not re.search(r'drive|cloud|instant', tag_text, re.I):
                     group_title = tag_text
+                    # Clean up title if it contains the provider name
                     if len(links_in_tag) == 1:
                         group_title = re.sub(r'\[?'+re.escape(links_in_tag[0]['provider'])+r'\]?', '', group_title).strip()
+                    current_group = {'quality_title': group_title, 'links': []}
                     groups.append(current_group)
                 current_group['links'].extend(links_in_tag)
+            # If the tag has no links but looks like a title, it's a header for the next links.
             elif not tag.find('a'):
                 current_group = {'quality_title': tag_text, 'links': []}
                 groups.append(current_group)
     except requests.exceptions.RequestException as e:
         print(f"[HBLinks] Scraping failed for {page_url}: {e}")
+    # Clean up any groups that were created but never got links.
     return [g for g in groups if g['links']]
 def scrape_mdrive_page(page_url):
+    """Scrapes the final cloud links from an mdrive.today page and groups them by episode."""
     headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
     groups = []
+    current_group = None
     try:
         response = requests.get(page_url, headers=headers, timeout=10)
         response.raise_for_status()
         soup = BeautifulSoup(response.content, 'html.parser')
         content = soup.select_one('.entry-content')
+        if not content:
+            return []
+        for tag in content.find_all('h5'):
+            tag_text = tag.get_text(strip=True)
+            # Check if it's an episode/quality header. This marks the start of a new group.
+            if re.search(r'Ep\d+|Season\s\d+', tag_text, re.IGNORECASE) or re.search(r'\d{3,4}p', tag_text):
+                current_group = {
+                    'quality_title': tag_text,
+                    'links': []
+                }
+                groups.append(current_group)
+            # Check for links within this tag and add them to the current group
+            links_in_tag = tag.find_all('a', href=True)
+            if links_in_tag and current_group:
+                for a_tag in links_in_tag:
+                    href = a_tag.get('href')
+                    provider = a_tag.get_text(strip=True)
+                    if href and provider and 't.me' not in href and 'moviesdrive' not in href:
+                        current_group['links'].append({'provider': provider, 'url': href})
     except requests.exceptions.RequestException as e:
         print(f"[MDrive Page] Scraping failed for {page_url}: {e}")
+    # Clean up empty groups that might have been created
     return [g for g in groups if g.get('links')]
 def rot13(s):
     result = []
     for char in s:
     if 'hblinks.dad' in current_url:
         print(f"Secondary hblinks bypass required for: '{current_url}'")
         groups = scrape_hblinks_page(current_url)
+        if groups:
+            return jsonify({"download_groups": groups})
+        else:
+            return jsonify({"error": "Failed to scrape final links from hblinks."}), 500
     elif 'mdrive.today' in current_url:
         print(f"Secondary mdrive bypass required for: '{current_url}'")
         groups = scrape_mdrive_page(current_url)
+        if groups:
+            return jsonify({"download_groups": groups})
+        else:
+            return jsonify({"error": "Failed to scrape final links from mdrive."}), 500
     # If no secondary bypass was needed, it's the final URL
     print(f"Direct link after potential first bypass: '{current_url}'")
+    return jsonify({"final_url": current_url})