Spaces:
Runtime error
Runtime error
| import json | |
| import re | |
| import numpy as np | |
| from langchain.vectorstores import DeepLake | |
| # Used to clean the inconsistencies in the format in which ChatGPT generated output. | |
| # Also convert all characters to lower case | |
| # Usage: clean_emotions_json("../data/spotify_song_url_emotions.json") | |
| def clean_emotions_json(filename:str) -> None: | |
| with open(filename, "r") as f: | |
| input_data = json.load(f) | |
| output_data = [] | |
| # Clean emotions data - Use only lower case letters and remove any ordered listing | |
| for song in input_data: | |
| emotions = song['emotions'] | |
| cleaned_emotions = re.sub(r'\d+\.\s+', '', emotions.lower().replace('\n', ', ')) | |
| output_data.append( | |
| { | |
| "song_name": song["song_name"], | |
| "iframe": song["iframe"], | |
| "emotions": cleaned_emotions | |
| }) | |
| print(emotions, "\n", cleaned_emotions) | |
| # Write to output file which will be used to store the song emotions as embeddings | |
| with open(filename, "w") as f: | |
| json.dump(output_data, f, indent=4) | |
| print(f"Spotify song, url and song emotions saved to {filename}") | |
| # Does np.random.choice and ensures we don't have duplicates in the final result | |
| def weighted_random_sample(items: np.array, weights: np.array, n: int) -> np.array: | |
| indices = np.arange(len(items)) | |
| out_indices = [] | |
| for _ in range(n): | |
| chosen_index = np.random.choice(indices, p=weights) | |
| out_indices.append(chosen_index) | |
| mask = indices != chosen_index | |
| indices = indices[mask] | |
| weights = weights[mask] | |
| if weights.sum() != 0: | |
| weights = weights / weights.sum() | |
| return items[out_indices] | |
| # Load DeepLake db | |
| def load_db(dataset_path: str, *args, **kwargs) -> DeepLake: | |
| db = DeepLake(dataset_path, *args, **kwargs) | |
| return db |