| import csv | |
| import re | |
| def load_lines(filename): | |
| lines = [] | |
| with open(filename) as f: | |
| for line in f: | |
| lines.append(line.strip()) | |
| return lines | |
| def remove_titles_and_bad_tracks(lines): | |
| is_track = re.compile(r"^\d.*") | |
| better_lines = [] | |
| for line in lines: | |
| if is_track.match(line) and "???" not in line: | |
| better_lines.append(line) | |
| return better_lines | |
| def group_by_set(lines): | |
| is_set_title = re.compile(r".*:$") | |
| is_track = re.compile(r"^\d.*:") | |
| grouped_lines = [] | |
| current_set = [] | |
| for line in lines: | |
| if not line.strip(): | |
| continue | |
| if is_set_title.match(line) and len(current_set) > 0: | |
| grouped_lines.append(current_set) | |
| current_set = [] | |
| elif is_track.match(line) and "???" not in line: | |
| current_set.append(line) | |
| return grouped_lines | |
| def get_grouped_artists(grouped_lines): | |
| artist_from_track = re.compile(r"\d+\: (.+?) - .+?") | |
| artist_names = [] | |
| for dj_set_lines in grouped_lines: | |
| dj_set_artists = [] | |
| for line in dj_set_lines: | |
| if artist_match := artist_from_track.match(line): | |
| artist_name = artist_match.group(1).strip().lower() | |
| dj_set_artists.append(artist_name) | |
| artist_names.append(dj_set_artists) | |
| return artist_names | |
| def write_to_csv(filename): | |
| with open(output_filename, "w", newline="") as csvfile: | |
| writer = csv.writer(csvfile) | |
| for artists in artist_names: | |
| writer.writerow(artists) | |
| if __name__ == "__main__": | |
| filename = "data/radio-original.txt" | |
| output_filename = "data/artist-names-per-row.csv" | |
| lines = load_lines(filename) | |
| grouped_lines = group_by_set(lines) | |
| artist_names = get_grouped_artists(grouped_lines) | |
| write_to_csv(output_filename) | |