Spaces:
Build error
Build error
Commit
·
23978b6
1
Parent(s):
613081c
fix episode downloading
Browse files- app.py +4 -4
- hf_scrapper.py +70 -2
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import os
|
|
| 3 |
import json
|
| 4 |
import threading
|
| 5 |
import urllib.parse
|
| 6 |
-
from hf_scrapper import
|
| 7 |
from indexer import indexer
|
| 8 |
from tvdb import fetch_and_cache_json
|
| 9 |
import re
|
|
@@ -154,7 +154,7 @@ thread.start()
|
|
| 154 |
|
| 155 |
# API Endpoints
|
| 156 |
|
| 157 |
-
@app.route('/api/
|
| 158 |
def get_movie_api():
|
| 159 |
"""Endpoint to get the movie by title."""
|
| 160 |
title = request.args.get('title')
|
|
@@ -183,7 +183,7 @@ def get_movie_api():
|
|
| 183 |
|
| 184 |
# Start the download in a separate thread if not already downloading
|
| 185 |
if film_id not in download_threads or not download_threads[film_id].is_alive():
|
| 186 |
-
thread = threading.Thread(target=
|
| 187 |
download_threads[film_id] = thread
|
| 188 |
thread.start()
|
| 189 |
|
|
@@ -236,7 +236,7 @@ def get_tv_show_api():
|
|
| 236 |
|
| 237 |
# Start the download in a separate thread if not already downloading
|
| 238 |
if episode_id not in download_threads or not download_threads[episode_id].is_alive():
|
| 239 |
-
thread = threading.Thread(target=
|
| 240 |
download_threads[episode_id] = thread
|
| 241 |
thread.start()
|
| 242 |
|
|
|
|
| 3 |
import json
|
| 4 |
import threading
|
| 5 |
import urllib.parse
|
| 6 |
+
from hf_scrapper import download_film, download_episode, get_system_proxies, get_download_progress
|
| 7 |
from indexer import indexer
|
| 8 |
from tvdb import fetch_and_cache_json
|
| 9 |
import re
|
|
|
|
| 154 |
|
| 155 |
# API Endpoints
|
| 156 |
|
| 157 |
+
@app.route('/api/film', methods=['GET'])
|
| 158 |
def get_movie_api():
|
| 159 |
"""Endpoint to get the movie by title."""
|
| 160 |
title = request.args.get('title')
|
|
|
|
| 183 |
|
| 184 |
# Start the download in a separate thread if not already downloading
|
| 185 |
if film_id not in download_threads or not download_threads[film_id].is_alive():
|
| 186 |
+
thread = threading.Thread(target=download_film, args=(file_url, TOKEN, cache_path, proxies, film_id, title))
|
| 187 |
download_threads[film_id] = thread
|
| 188 |
thread.start()
|
| 189 |
|
|
|
|
| 236 |
|
| 237 |
# Start the download in a separate thread if not already downloading
|
| 238 |
if episode_id not in download_threads or not download_threads[episode_id].is_alive():
|
| 239 |
+
thread = threading.Thread(target=download_episode, args=(file_url, TOKEN, cache_path, proxies, episode_id, title))
|
| 240 |
download_threads[episode_id] = thread
|
| 241 |
thread.start()
|
| 242 |
|
hf_scrapper.py
CHANGED
|
@@ -29,7 +29,7 @@ def get_system_proxies():
|
|
| 29 |
print(f"Error getting system proxies: {e}")
|
| 30 |
return {}
|
| 31 |
|
| 32 |
-
def
|
| 33 |
"""
|
| 34 |
Downloads a file from the specified URL and saves it to the cache path.
|
| 35 |
Tracks the download progress.
|
|
@@ -120,6 +120,74 @@ def update_film_store_json(title, cache_path):
|
|
| 120 |
json.dump(film_store_data, json_file, indent=2)
|
| 121 |
print(f'Film store updated with {title}.')
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
def get_file_structure(repo, token, path="", proxies=None):
|
| 124 |
"""
|
| 125 |
Fetches the file structure of a specified Hugging Face repository.
|
|
@@ -165,4 +233,4 @@ if __name__ == "__main__":
|
|
| 165 |
cache_path = os.path.join(CACHE_DIR, "films/Funky Monkey 2004/Funky Monkey (2004) Web-dl 1080p.mp4")
|
| 166 |
proxies = get_system_proxies()
|
| 167 |
film_id = "funky_monkey_2004" # Unique identifier for the film download
|
| 168 |
-
|
|
|
|
| 29 |
print(f"Error getting system proxies: {e}")
|
| 30 |
return {}
|
| 31 |
|
| 32 |
+
def download_film(file_url, token, cache_path, proxies, film_id, title, chunk_size=100 * 1024 * 1024):
|
| 33 |
"""
|
| 34 |
Downloads a file from the specified URL and saves it to the cache path.
|
| 35 |
Tracks the download progress.
|
|
|
|
| 120 |
json.dump(film_store_data, json_file, indent=2)
|
| 121 |
print(f'Film store updated with {title}.')
|
| 122 |
|
| 123 |
+
|
| 124 |
+
###############################################################################
|
| 125 |
+
def download_episode(file_url, token, cache_path, proxies, episode_id, title, chunk_size=100 * 1024 * 1024):
|
| 126 |
+
"""
|
| 127 |
+
Downloads a file from the specified URL and saves it to the cache path.
|
| 128 |
+
Tracks the download progress.
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
file_url (str): The URL of the file to download.
|
| 132 |
+
token (str): The authorization token for the request.
|
| 133 |
+
cache_path (str): The path to save the downloaded file.
|
| 134 |
+
proxies (dict): Proxies for the request.
|
| 135 |
+
film_id (str): Unique identifier for the film download.
|
| 136 |
+
title (str): The title of the film.
|
| 137 |
+
chunk_size (int): Size of each chunk to download.
|
| 138 |
+
"""
|
| 139 |
+
print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")
|
| 140 |
+
headers = {'Authorization': f'Bearer {token}'}
|
| 141 |
+
try:
|
| 142 |
+
response = requests.get(file_url, headers=headers, proxies=proxies, stream=True)
|
| 143 |
+
response.raise_for_status()
|
| 144 |
+
|
| 145 |
+
total_size = int(response.headers.get('content-length', 0))
|
| 146 |
+
download_progress[film_id] = {"total": total_size, "downloaded": 0, "status": "Downloading", "start_time": time.time()}
|
| 147 |
+
|
| 148 |
+
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
|
| 149 |
+
with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
|
| 150 |
+
for data in response.iter_content(chunk_size=chunk_size):
|
| 151 |
+
file.write(data)
|
| 152 |
+
pbar.update(len(data))
|
| 153 |
+
download_progress[episode_id]["downloaded"] += len(data)
|
| 154 |
+
|
| 155 |
+
print(f'File cached to {cache_path} successfully.')
|
| 156 |
+
update_tv_store_json(title, cache_path)
|
| 157 |
+
download_progress[episode_id]["status"] = "Completed"
|
| 158 |
+
except RequestException as e:
|
| 159 |
+
print(f"Error downloading file: {e}")
|
| 160 |
+
download_progress[episode_id]["status"] = "Failed"
|
| 161 |
+
except IOError as e:
|
| 162 |
+
print(f"Error writing file {cache_path}: {e}")
|
| 163 |
+
download_progress[episode_id]["status"] = "Failed"
|
| 164 |
+
finally:
|
| 165 |
+
if download_progress[episode_id]["status"] != "Downloading":
|
| 166 |
+
download_progress[episode_id]["end_time"] = time.time()
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def update_tv_store_json(title, cache_path):
|
| 170 |
+
"""
|
| 171 |
+
Updates the tv store JSON with the new file.
|
| 172 |
+
|
| 173 |
+
Args:
|
| 174 |
+
title (str): The title of the film.
|
| 175 |
+
cache_path (str): The local path where the file is saved.
|
| 176 |
+
"""
|
| 177 |
+
TV_STORE_JSON_PATH = os.path.join(CACHE_DIR, "tv_store.json")
|
| 178 |
+
|
| 179 |
+
film_store_data = {}
|
| 180 |
+
if os.path.exists(TV_STORE_JSON_PATH):
|
| 181 |
+
with open(TV_STORE_JSON_PATH, 'r') as json_file:
|
| 182 |
+
film_store_data = json.load(json_file)
|
| 183 |
+
|
| 184 |
+
film_store_data[title] = cache_path
|
| 185 |
+
|
| 186 |
+
with open(TV_STORE_JSON_PATH, 'w') as json_file:
|
| 187 |
+
json.dump(film_store_data, json_file, indent=2)
|
| 188 |
+
print(f'TV store updated with {title}.')
|
| 189 |
+
|
| 190 |
+
###############################################################################
|
| 191 |
def get_file_structure(repo, token, path="", proxies=None):
|
| 192 |
"""
|
| 193 |
Fetches the file structure of a specified Hugging Face repository.
|
|
|
|
| 233 |
cache_path = os.path.join(CACHE_DIR, "films/Funky Monkey 2004/Funky Monkey (2004) Web-dl 1080p.mp4")
|
| 234 |
proxies = get_system_proxies()
|
| 235 |
film_id = "funky_monkey_2004" # Unique identifier for the film download
|
| 236 |
+
download_film(file_url, token, cache_path, proxies=proxies, film_id=film_id)
|