Spaces:
Sleeping
Sleeping
Commit
·
0fe31f5
1
Parent(s):
858a993
add recent
Browse files- app.py +10 -0
- indexer.py +13 -2
- services.py +67 -0
- tvdb.py +41 -3
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import logging
|
|
| 5 |
import os
|
| 6 |
import urllib.parse
|
| 7 |
from utils import read_json_file, is_valid_url
|
|
|
|
| 8 |
|
| 9 |
CACHE_DIR = os.getenv("CACHE_DIR")
|
| 10 |
TOKEN = os.getenv("TOKEN")
|
|
@@ -15,6 +16,7 @@ app = FastAPI()
|
|
| 15 |
@app.on_event("startup")
|
| 16 |
async def startup_event():
|
| 17 |
global load_balancer
|
|
|
|
| 18 |
load_balancer = LoadBalancer(cache_dir=CACHE_DIR, token=TOKEN, repo=REPO)
|
| 19 |
|
| 20 |
@app.get("/")
|
|
@@ -62,6 +64,14 @@ async def get_all_films_api():
|
|
| 62 |
async def get_all_tvshows_api():
|
| 63 |
return load_balancer.get_all_tv_shows()
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
@app.get("/api/get/film/metadata/{title}")
|
| 66 |
async def get_film_metadata_api(title: str):
|
| 67 |
"""Endpoint to get the film metadata by title."""
|
|
|
|
| 5 |
import os
|
| 6 |
import urllib.parse
|
| 7 |
from utils import read_json_file, is_valid_url
|
| 8 |
+
from tvdb import recent_list
|
| 9 |
|
| 10 |
CACHE_DIR = os.getenv("CACHE_DIR")
|
| 11 |
TOKEN = os.getenv("TOKEN")
|
|
|
|
| 16 |
@app.on_event("startup")
|
| 17 |
async def startup_event():
|
| 18 |
global load_balancer
|
| 19 |
+
|
| 20 |
load_balancer = LoadBalancer(cache_dir=CACHE_DIR, token=TOKEN, repo=REPO)
|
| 21 |
|
| 22 |
@app.get("/")
|
|
|
|
| 64 |
async def get_all_tvshows_api():
|
| 65 |
return load_balancer.get_all_tv_shows()
|
| 66 |
|
| 67 |
+
@app.get("/api/get/film/recent")
|
| 68 |
+
async def get_recent_films():
|
| 69 |
+
return recent_list.get_sorted_entries('film')
|
| 70 |
+
|
| 71 |
+
@app.get("/api/get/tv/recent")
|
| 72 |
+
async def get_recent_shows():
|
| 73 |
+
return recent_list.get_sorted_entries('series')
|
| 74 |
+
|
| 75 |
@app.get("/api/get/film/metadata/{title}")
|
| 76 |
async def get_film_metadata_api(title: str):
|
| 77 |
"""Endpoint to get the film metadata by title."""
|
indexer.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from hf_scrapper import get_file_structure
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
import os
|
|
|
|
| 4 |
|
| 5 |
load_dotenv()
|
| 6 |
|
|
@@ -19,7 +20,7 @@ def index_repository(token, repo, current_path=""):
|
|
| 19 |
file_item = {
|
| 20 |
"type": item['type'],
|
| 21 |
"size": item['size'],
|
| 22 |
-
"path": item['path']
|
| 23 |
}
|
| 24 |
full_structure.append(file_item)
|
| 25 |
return full_structure
|
|
@@ -31,4 +32,14 @@ def indexer():
|
|
| 31 |
print(f"Full file structure for repository '{repo}' has been indexed.")
|
| 32 |
return full_structure
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from hf_scrapper import get_file_structure
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
import os
|
| 4 |
+
import json
|
| 5 |
|
| 6 |
load_dotenv()
|
| 7 |
|
|
|
|
| 20 |
file_item = {
|
| 21 |
"type": item['type'],
|
| 22 |
"size": item['size'],
|
| 23 |
+
"path": item['path'],
|
| 24 |
}
|
| 25 |
full_structure.append(file_item)
|
| 26 |
return full_structure
|
|
|
|
| 32 |
print(f"Full file structure for repository '{repo}' has been indexed.")
|
| 33 |
return full_structure
|
| 34 |
|
| 35 |
+
if __name__ == '__main__':
|
| 36 |
+
files = indexer()
|
| 37 |
+
|
| 38 |
+
# Define the output file name based on the repository name
|
| 39 |
+
output_file = f"{os.getenv('CACHE_DIR')}file_structure.json"
|
| 40 |
+
|
| 41 |
+
# Save the structure to a JSON file
|
| 42 |
+
with open(output_file, 'w') as f:
|
| 43 |
+
json.dump(files, f, indent=4)
|
| 44 |
+
|
| 45 |
+
print(f"File structure saved to {output_file}")
|
services.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import bisect
|
| 2 |
+
|
| 3 |
+
class RecentList:
|
| 4 |
+
def __init__(self):
|
| 5 |
+
# Initialize dictionaries to store titles and years
|
| 6 |
+
self.films = {}
|
| 7 |
+
self.series = {}
|
| 8 |
+
# Initialize lists to keep track of the sorted entries
|
| 9 |
+
self.sorted_films = []
|
| 10 |
+
self.sorted_series = []
|
| 11 |
+
|
| 12 |
+
def add_entry(self, title, year, media_type):
|
| 13 |
+
if media_type == 'film':
|
| 14 |
+
self._update_entry(self.films, self.sorted_films, title, year)
|
| 15 |
+
|
| 16 |
+
elif media_type == 'series':
|
| 17 |
+
self._update_entry(self.series, self.sorted_series, title, year)
|
| 18 |
+
|
| 19 |
+
def _update_entry(self, dictionary, sorted_list, title, year):
|
| 20 |
+
try:
|
| 21 |
+
# Convert year to integer
|
| 22 |
+
year = int(year)
|
| 23 |
+
except ValueError:
|
| 24 |
+
raise ValueError(f"Invalid year: {year}. Year must be an integer.")
|
| 25 |
+
|
| 26 |
+
if title in dictionary:
|
| 27 |
+
# Remove the old year from the sorted list if it exists
|
| 28 |
+
old_year = dictionary[title]
|
| 29 |
+
try:
|
| 30 |
+
sorted_list.remove((-old_year, title))
|
| 31 |
+
except ValueError:
|
| 32 |
+
pass # Ignore if the old entry does not exist in the sorted list
|
| 33 |
+
|
| 34 |
+
# Update or add the new entry in the dictionary
|
| 35 |
+
dictionary[title] = year
|
| 36 |
+
|
| 37 |
+
# Insert the new year into the sorted list
|
| 38 |
+
bisect.insort(sorted_list, (-year, title))
|
| 39 |
+
|
| 40 |
+
def get_sorted_entries(self, media_type):
|
| 41 |
+
if media_type == 'film':
|
| 42 |
+
# Convert sorted list back to the correct format with positive years
|
| 43 |
+
return [ (title, -year) for year, title in self.sorted_films ]
|
| 44 |
+
|
| 45 |
+
elif media_type == 'series':
|
| 46 |
+
# Convert sorted list back to the correct format with positive years
|
| 47 |
+
return [ (title, -year) for year, title in self.sorted_series ]
|
| 48 |
+
|
| 49 |
+
# Example usage:
|
| 50 |
+
if __name__ == "__main__":
|
| 51 |
+
media_list = RecentList()
|
| 52 |
+
|
| 53 |
+
# Adding entries
|
| 54 |
+
media_list.add_entry("Film A", 2022, 'film')
|
| 55 |
+
media_list.add_entry("Series A", 2023, 'series')
|
| 56 |
+
media_list.add_entry("Film B", 2021, 'film')
|
| 57 |
+
media_list.add_entry("Film A", 2024, 'film') # Updating the year of "Film A"
|
| 58 |
+
media_list.add_entry("Series B", 2021, 'series')
|
| 59 |
+
|
| 60 |
+
# Retrieving and printing sorted entries
|
| 61 |
+
print("Sorted Films:")
|
| 62 |
+
for title, year in media_list.get_sorted_entries('film'):
|
| 63 |
+
print(f"Title: {title}, Year: {year}")
|
| 64 |
+
|
| 65 |
+
print("\nSorted Series:")
|
| 66 |
+
for title, year in media_list.get_sorted_entries('series'):
|
| 67 |
+
print(f"Title: {title}, Year: {year}")
|
tvdb.py
CHANGED
|
@@ -8,6 +8,7 @@ import json
|
|
| 8 |
import asyncio
|
| 9 |
import aiofiles
|
| 10 |
from tvdbApiClient import fetch_and_cache_seasons, save_to_json
|
|
|
|
| 11 |
|
| 12 |
load_dotenv()
|
| 13 |
THETVDB_API_KEY = os.getenv("THETVDB_API_KEY")
|
|
@@ -15,6 +16,7 @@ THETVDB_API_URL = os.getenv("THETVDB_API_URL")
|
|
| 15 |
CACHE_DIR = os.getenv("CACHE_DIR")
|
| 16 |
TOKEN_EXPIRY = None
|
| 17 |
THETVDB_TOKEN = None
|
|
|
|
| 18 |
|
| 19 |
def authenticate_thetvdb():
|
| 20 |
global THETVDB_TOKEN, TOKEN_EXPIRY
|
|
@@ -39,6 +41,38 @@ def get_thetvdb_token():
|
|
| 39 |
authenticate_thetvdb()
|
| 40 |
return THETVDB_TOKEN
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
async def fetch_and_cache_json(original_title, title, media_type, year=None):
|
| 43 |
if year:
|
| 44 |
search_url = f"{THETVDB_API_URL}/search?query={urllib.parse.quote(title)}&type={media_type}&year={year}"
|
|
@@ -76,7 +110,7 @@ async def fetch_and_cache_json(original_title, title, media_type, year=None):
|
|
| 76 |
extended_url = f"{THETVDB_API_URL}/movies/{tvdb_id}/extended?meta=translations"
|
| 77 |
elif media_type == 'series':
|
| 78 |
extended_url = f"{THETVDB_API_URL}/series/{tvdb_id}/extended?meta=translations"
|
| 79 |
-
await fetch_and_cache_seasons(tvdb_id)
|
| 80 |
else:
|
| 81 |
print(f"Unsupported media type: {media_type}")
|
| 82 |
return
|
|
@@ -85,10 +119,14 @@ async def fetch_and_cache_json(original_title, title, media_type, year=None):
|
|
| 85 |
response = requests.get(extended_url, headers=headers)
|
| 86 |
response.raise_for_status()
|
| 87 |
extended_data = response.json()
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
# Cache the extended JSON response
|
| 90 |
json_cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(original_title)}.json")
|
| 91 |
-
await save_to_json(
|
| 92 |
|
| 93 |
except requests.RequestException as e:
|
| 94 |
print(f"Error fetching data: {e}")
|
|
|
|
| 8 |
import asyncio
|
| 9 |
import aiofiles
|
| 10 |
from tvdbApiClient import fetch_and_cache_seasons, save_to_json
|
| 11 |
+
from services import RecentList
|
| 12 |
|
| 13 |
load_dotenv()
|
| 14 |
THETVDB_API_KEY = os.getenv("THETVDB_API_KEY")
|
|
|
|
| 16 |
CACHE_DIR = os.getenv("CACHE_DIR")
|
| 17 |
TOKEN_EXPIRY = None
|
| 18 |
THETVDB_TOKEN = None
|
| 19 |
+
recent_list = RecentList()
|
| 20 |
|
| 21 |
def authenticate_thetvdb():
|
| 22 |
global THETVDB_TOKEN, TOKEN_EXPIRY
|
|
|
|
| 41 |
authenticate_thetvdb()
|
| 42 |
return THETVDB_TOKEN
|
| 43 |
|
| 44 |
+
def clean_data(data):
|
| 45 |
+
fields_to_keep = {
|
| 46 |
+
"data": {
|
| 47 |
+
'id': None,
|
| 48 |
+
'name': None,
|
| 49 |
+
'image': None,
|
| 50 |
+
'score': None,
|
| 51 |
+
'runtime': None,
|
| 52 |
+
'releases': None,
|
| 53 |
+
'year': None,
|
| 54 |
+
'contentRatings': None,
|
| 55 |
+
'originalCountry': None,
|
| 56 |
+
'originalLanguage': None,
|
| 57 |
+
'translations': {},
|
| 58 |
+
'artworks': [],
|
| 59 |
+
'characters': [],
|
| 60 |
+
'spoken_languages': [],
|
| 61 |
+
'translations': {}
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
"""Clean up the data to retain only necessary fields."""
|
| 65 |
+
cleaned_data = {}
|
| 66 |
+
|
| 67 |
+
for key, value in fields_to_keep.items():
|
| 68 |
+
if key in data:
|
| 69 |
+
cleaned_data[key] = {}
|
| 70 |
+
for field in fields_to_keep[key]:
|
| 71 |
+
if field in data[key]:
|
| 72 |
+
cleaned_data[key][field] = data[key][field]
|
| 73 |
+
|
| 74 |
+
return cleaned_data
|
| 75 |
+
|
| 76 |
async def fetch_and_cache_json(original_title, title, media_type, year=None):
|
| 77 |
if year:
|
| 78 |
search_url = f"{THETVDB_API_URL}/search?query={urllib.parse.quote(title)}&type={media_type}&year={year}"
|
|
|
|
| 110 |
extended_url = f"{THETVDB_API_URL}/movies/{tvdb_id}/extended?meta=translations"
|
| 111 |
elif media_type == 'series':
|
| 112 |
extended_url = f"{THETVDB_API_URL}/series/{tvdb_id}/extended?meta=translations"
|
| 113 |
+
await fetch_and_cache_seasons(tvdb_id)
|
| 114 |
else:
|
| 115 |
print(f"Unsupported media type: {media_type}")
|
| 116 |
return
|
|
|
|
| 119 |
response = requests.get(extended_url, headers=headers)
|
| 120 |
response.raise_for_status()
|
| 121 |
extended_data = response.json()
|
| 122 |
+
cleaned_data=clean_data(extended_data)
|
| 123 |
+
if media_type == 'movie':
|
| 124 |
+
recent_list.add_entry(original_title, cleaned_data['data']['year'], 'film')
|
| 125 |
+
elif media_type == 'series':
|
| 126 |
+
recent_list.add_entry(original_title, cleaned_data['data']['year'], 'series')
|
| 127 |
# Cache the extended JSON response
|
| 128 |
json_cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(original_title)}.json")
|
| 129 |
+
await save_to_json(cleaned_data, json_cache_path)
|
| 130 |
|
| 131 |
except requests.RequestException as e:
|
| 132 |
print(f"Error fetching data: {e}")
|