Rob Learsch commited on
Commit
3d4a5a4
·
1 Parent(s): d7ed08f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -8
app.py CHANGED
@@ -44,9 +44,53 @@ def stitch_lyrics(lyrics, line_number=1):
44
  def load_lyrics(filename):
45
  with open(filename, "r", encoding="utf-8") as file:
46
  lyrics = file.readlines()
47
- return [line.strip() for line in lyrics if line.strip()] # Remove empty lines
48
  #return [line for line in lyrics]
49
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  def artist_response(gemma_response, artist):
51
  if artist == "Radiohead":
52
  artist_embeddings = radiohead_embeddings
@@ -71,9 +115,13 @@ radiohead_lyrics = load_lyrics("radiohead_lyrics.txt")
71
  kendrick_lyrics = load_lyrics("kendrick_lamar_lyrics.txt")
72
  grateful_dead_lyrics = load_lyrics('grateful_dead_lyrics.txt')
73
 
74
- stitched_radiohead_lyrics = stitch_lyrics(radiohead_lyrics)
75
- stitched_kendrick_lyrics = stitch_lyrics(kendrick_lyrics)
76
- stitched_grateful_dead_lyrics = stitch_lyrics(grateful_dead_lyrics)
 
 
 
 
77
 
78
  encoder_model = SentenceTransformer('all-MiniLM-L6-v2',
79
  #'sentence-transformers/all-MiniLM-L6-v2',
@@ -82,9 +130,13 @@ encoder_model = SentenceTransformer('all-MiniLM-L6-v2',
82
  #to increase speed:
83
  #similarity_function=SimilarityFunction.DOT_PRODUCT,
84
  )
85
- radiohead_embeddings = encoder_model.encode(stitched_radiohead_lyrics)
86
- kendrick_embeddings = encoder_model.encode(stitched_kendrick_lyrics)
87
- grateful_dead_embeddings = encoder_model.encode(stitched_grateful_dead_lyrics)
 
 
 
 
88
 
89
 
90
 
 
44
  def load_lyrics(filename):
45
  with open(filename, "r", encoding="utf-8") as file:
46
  lyrics = file.readlines()
47
+ return [line for line in lyrics]
48
  #return [line for line in lyrics]
49
+ def songs_from_text(lines):
50
+ songs = []
51
+ current_song = []
52
+ current_stanza = []
53
+
54
+ for line in lines:
55
+ line = line.strip()
56
+
57
+ if line == "===": # New song delimiter
58
+ if current_stanza:
59
+ current_song.append(current_stanza)
60
+ current_stanza = []
61
+ if current_song:
62
+ songs.append(current_song)
63
+ current_song = []
64
+ continue
65
+
66
+ if line == "": # New stanza delimiter
67
+ if current_stanza:
68
+ current_song.append(current_stanza)
69
+ current_stanza = []
70
+ continue
71
+
72
+ current_stanza.append(line)
73
+
74
+ if current_stanza:
75
+ current_song.append(current_stanza)
76
+ if current_song:
77
+ songs.append(current_song)
78
+
79
+ return songs
80
+
81
+
82
+ def generate_cumulative_phrases(songs):
83
+ all_phrases = []
84
+
85
+ for song in songs:
86
+ for stanza in song:
87
+ for i in range(len(stanza)):
88
+ cumulative = ""
89
+ for j in range(i, len(stanza)):
90
+ cumulative += (" // " if cumulative else "") + stanza[j]
91
+ all_phrases.append(cumulative)
92
+
93
+ return all_phrases
94
  def artist_response(gemma_response, artist):
95
  if artist == "Radiohead":
96
  artist_embeddings = radiohead_embeddings
 
115
  kendrick_lyrics = load_lyrics("kendrick_lamar_lyrics.txt")
116
  grateful_dead_lyrics = load_lyrics('grateful_dead_lyrics.txt')
117
 
118
+ all_phrases_radiohead = generate_cumulative_phrases(songs_from_text(radiohead_lyrics))
119
+ all_phrases_kendrick = generate_cumulative_phrases(songs_from_text(kendrick_lyrics))
120
+ all_phrases_grateful_dead = generate_cumulative_phrases(songs_from_text(grateful_dead_lyrics))
121
+
122
+ #stitched_radiohead_lyrics = stitch_lyrics(radiohead_lyrics)
123
+ #stitched_kendrick_lyrics = stitch_lyrics(kendrick_lyrics)
124
+ #stitched_grateful_dead_lyrics = stitch_lyrics(grateful_dead_lyrics)
125
 
126
  encoder_model = SentenceTransformer('all-MiniLM-L6-v2',
127
  #'sentence-transformers/all-MiniLM-L6-v2',
 
130
  #to increase speed:
131
  #similarity_function=SimilarityFunction.DOT_PRODUCT,
132
  )
133
+ #radiohead_embeddings = encoder_model.encode(stitched_radiohead_lyrics)
134
+ #kendrick_embeddings = encoder_model.encode(stitched_kendrick_lyrics)
135
+ #grateful_dead_embeddings = encoder_model.encode(stitched_grateful_dead_lyrics)
136
+ radiohead_embeddings = encoder_model.encode(all_phrases_radiohead)
137
+ kendrick_embeddings = encoder_model.encode(all_phrases_kendrick)
138
+ grateful_dead_embeddings = encoder_model.encode(all_phrases_grateful_dead)
139
+
140
 
141
 
142