niharika17032001 commited on
Commit
54f2f60
·
1 Parent(s): 93af9a4

Create Dockerfile

Browse files
Files changed (1) hide show
  1. convert_song_format_for_pagalworld.py +67 -15
convert_song_format_for_pagalworld.py CHANGED
@@ -1,5 +1,5 @@
1
  import json
2
-
3
 
4
  def convert_song_format(original_song: dict, song_id: int) -> dict:
5
  """
@@ -12,11 +12,49 @@ def convert_song_format(original_song: dict, song_id: int) -> dict:
12
  Returns:
13
  dict: Converted song dictionary.
14
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  return {
16
  "id": str(song_id),
17
- "title": original_song.get("Song Name ", "").strip(),
18
- "artist": original_song.get("Singer(s) ", "").strip(),
19
- "albumArtUrl": original_song.get("Thumbnail", "").strip(),
20
  "audioUrl": original_song.get("Play Online", "").strip()
21
  }
22
 
@@ -31,8 +69,15 @@ def read_json_file(file_path: str) -> list:
31
  Returns:
32
  dict or list: Parsed JSON data.
33
  """
34
- with open(file_path, 'r', encoding='utf-8') as file:
35
- return json.load(file)
 
 
 
 
 
 
 
36
 
37
 
38
  def write_json_file(data, file_path: str, indent: int = 2):
@@ -49,29 +94,36 @@ def write_json_file(data, file_path: str, indent: int = 2):
49
 
50
 
51
  def main():
52
- bollywood_song_metadata_json = "bollywood_song_metadata.json"
53
  list_of_original_song = read_json_file(bollywood_song_metadata_json)
54
- # print(list_of_original_song)
55
  songs_json = "songs.json"
56
 
57
- list_of_song= []
58
 
 
 
 
 
 
 
 
 
59
 
60
- # list_of_song = read_json_file(songs_json)
61
 
62
  i = len(list_of_song)
63
 
 
64
 
65
- print(f"initial len of song list {len(list_of_song)}")
66
  for original_song in list_of_original_song:
67
- i = i + 1
68
  converted = convert_song_format(original_song, song_id=i)
69
  list_of_song.append(converted)
 
70
 
71
- print(f"final len of song list {len(list_of_song)}")
72
- print(list_of_song)
73
  write_json_file(list_of_song, songs_json)
 
74
 
75
 
76
  if __name__ == "__main__":
77
- main()
 
1
  import json
2
+ import os # Import os for file existence checks
3
 
4
  def convert_song_format(original_song: dict, song_id: int) -> dict:
5
  """
 
12
  Returns:
13
  dict: Converted song dictionary.
14
  """
15
+ # Extract title, handling cases where it might be a list or dict
16
+ title = ""
17
+ original_title = original_song.get("song") # Changed from "song " to "song"
18
+ if isinstance(original_title, str):
19
+ title = original_title.strip()
20
+ elif isinstance(original_title, dict) and "name" in original_title:
21
+ title = original_title["name"].strip()
22
+ elif isinstance(original_title, list) and original_title:
23
+ # If it's a list, take the first element's name if available
24
+ if isinstance(original_title[0], dict) and "name" in original_title[0]:
25
+ title = original_title[0]["name"].strip()
26
+ elif isinstance(original_title[0], str):
27
+ title = original_title[0].strip()
28
+
29
+ # Extract all artist names and join with a comma
30
+ artist_names = []
31
+ original_singer_data = original_song.get("singer")
32
+ if isinstance(original_singer_data, list):
33
+ for singer_entry in original_singer_data:
34
+ if isinstance(singer_entry, dict) and "name" in singer_entry:
35
+ artist_names.append(singer_entry["name"].strip())
36
+ elif isinstance(singer_entry, str):
37
+ artist_names.append(singer_entry.strip())
38
+ elif isinstance(original_singer_data, str): # Fallback if it's directly a string
39
+ artist_names.append(original_singer_data.strip())
40
+
41
+ artist_string = ", ".join(artist_names)
42
+
43
+
44
+ # Extract thumbnail URL
45
+ album_art_url = ""
46
+ original_thumbnail = original_song.get("thumbnail", "")
47
+ if isinstance(original_thumbnail, str):
48
+ album_art_url = original_thumbnail.strip()
49
+ elif isinstance(original_thumbnail, dict) and "url" in original_thumbnail:
50
+ album_art_url = original_thumbnail["url"].strip()
51
+
52
+
53
  return {
54
  "id": str(song_id),
55
+ "title": title,
56
+ "artist": artist_string, # Use the comma-separated string here
57
+ "albumArtUrl": album_art_url,
58
  "audioUrl": original_song.get("Play Online", "").strip()
59
  }
60
 
 
69
  Returns:
70
  dict or list: Parsed JSON data.
71
  """
72
+ try:
73
+ with open(file_path, 'r', encoding='utf-8') as file:
74
+ return json.load(file)
75
+ except FileNotFoundError:
76
+ print(f"Error: The file '{file_path}' was not found.")
77
+ return []
78
+ except json.JSONDecodeError:
79
+ print(f"Error: Could not decode JSON from '{file_path}'. Check file format.")
80
+ return []
81
 
82
 
83
  def write_json_file(data, file_path: str, indent: int = 2):
 
94
 
95
 
96
  def main():
97
+ bollywood_song_metadata_json = "combined_song_metadata.json"
98
  list_of_original_song = read_json_file(bollywood_song_metadata_json)
99
+
100
  songs_json = "songs.json"
101
 
102
+ list_of_song = []
103
 
104
+ try:
105
+ if os.path.exists(songs_json) and os.path.getsize(songs_json) > 0:
106
+ existing_songs = read_json_file(songs_json)
107
+ if isinstance(existing_songs, list):
108
+ list_of_song = existing_songs
109
+ print(f"Loaded {len(list_of_song)} existing songs from {songs_json}")
110
+ except Exception as e:
111
+ print(f"Could not load existing {songs_json} due to error: {e}. Starting with an empty list.")
112
 
 
113
 
114
  i = len(list_of_song)
115
 
116
+ print(f"Initial len of song list: {len(list_of_song)}")
117
 
 
118
  for original_song in list_of_original_song:
 
119
  converted = convert_song_format(original_song, song_id=i)
120
  list_of_song.append(converted)
121
+ i = i + 1
122
 
123
+ print(f"Final len of song list: {len(list_of_song)}")
 
124
  write_json_file(list_of_song, songs_json)
125
+ print(f"Converted song data saved to {songs_json}")
126
 
127
 
128
  if __name__ == "__main__":
129
+ main()