Spaces:

rlearsch
/

LyricsChatBot

Sleeping

App Files Files Community

Rob Learsch commited on Apr 2, 2025

Commit

3d4a5a4

1 Parent(s): d7ed08f

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -8

app.py CHANGED Viewed

@@ -44,9 +44,53 @@ def stitch_lyrics(lyrics, line_number=1):
 def load_lyrics(filename):
     with open(filename, "r", encoding="utf-8") as file:
         lyrics = file.readlines()
-    return [line.strip() for line in lyrics if line.strip()]  # Remove empty lines
     #return [line for line in lyrics]
 def artist_response(gemma_response, artist):
     if artist == "Radiohead":
         artist_embeddings = radiohead_embeddings
@@ -71,9 +115,13 @@ radiohead_lyrics = load_lyrics("radiohead_lyrics.txt")
 kendrick_lyrics = load_lyrics("kendrick_lamar_lyrics.txt")
 grateful_dead_lyrics = load_lyrics('grateful_dead_lyrics.txt')
-stitched_radiohead_lyrics = stitch_lyrics(radiohead_lyrics)
-stitched_kendrick_lyrics = stitch_lyrics(kendrick_lyrics)
-stitched_grateful_dead_lyrics = stitch_lyrics(grateful_dead_lyrics)
 encoder_model = SentenceTransformer('all-MiniLM-L6-v2',
                                     #'sentence-transformers/all-MiniLM-L6-v2',
@@ -82,9 +130,13 @@ encoder_model = SentenceTransformer('all-MiniLM-L6-v2',
                                     #to increase speed:
                                     #similarity_function=SimilarityFunction.DOT_PRODUCT,
                                     )
-radiohead_embeddings = encoder_model.encode(stitched_radiohead_lyrics)
-kendrick_embeddings = encoder_model.encode(stitched_kendrick_lyrics)
-grateful_dead_embeddings = encoder_model.encode(stitched_grateful_dead_lyrics)

 def load_lyrics(filename):
     with open(filename, "r", encoding="utf-8") as file:
         lyrics = file.readlines()
+        return [line for line in lyrics]
     #return [line for line in lyrics]
+def songs_from_text(lines):
+    songs = []
+    current_song = []
+    current_stanza = []
+    for line in lines:
+        line = line.strip()
+        if line == "===":  # New song delimiter
+            if current_stanza:
+                current_song.append(current_stanza)
+                current_stanza = []
+            if current_song:
+                songs.append(current_song)
+                current_song = []
+            continue
+        if line == "":  # New stanza delimiter
+            if current_stanza:
+                current_song.append(current_stanza)
+                current_stanza = []
+            continue
+        current_stanza.append(line)
+    if current_stanza:
+        current_song.append(current_stanza)
+    if current_song:
+        songs.append(current_song)
+    return songs
+def generate_cumulative_phrases(songs):
+    all_phrases = []
+    for song in songs:
+        for stanza in song:
+            for i in range(len(stanza)):
+                cumulative = ""
+                for j in range(i, len(stanza)):
+                    cumulative += (" // " if cumulative else "") + stanza[j]
+                    all_phrases.append(cumulative)
+    return all_phrases
 def artist_response(gemma_response, artist):
     if artist == "Radiohead":
         artist_embeddings = radiohead_embeddings
 kendrick_lyrics = load_lyrics("kendrick_lamar_lyrics.txt")
 grateful_dead_lyrics = load_lyrics('grateful_dead_lyrics.txt')
+all_phrases_radiohead = generate_cumulative_phrases(songs_from_text(radiohead_lyrics))
+all_phrases_kendrick = generate_cumulative_phrases(songs_from_text(kendrick_lyrics))
+all_phrases_grateful_dead = generate_cumulative_phrases(songs_from_text(grateful_dead_lyrics))
+#stitched_radiohead_lyrics = stitch_lyrics(radiohead_lyrics)
+#stitched_kendrick_lyrics = stitch_lyrics(kendrick_lyrics)
+#stitched_grateful_dead_lyrics = stitch_lyrics(grateful_dead_lyrics)
 encoder_model = SentenceTransformer('all-MiniLM-L6-v2',
                                     #'sentence-transformers/all-MiniLM-L6-v2',
                                     #to increase speed:
                                     #similarity_function=SimilarityFunction.DOT_PRODUCT,
                                     )
+#radiohead_embeddings = encoder_model.encode(stitched_radiohead_lyrics)
+#kendrick_embeddings = encoder_model.encode(stitched_kendrick_lyrics)
+#grateful_dead_embeddings = encoder_model.encode(stitched_grateful_dead_lyrics)
+radiohead_embeddings = encoder_model.encode(all_phrases_radiohead)
+kendrick_embeddings = encoder_model.encode(all_phrases_kendrick)
+grateful_dead_embeddings = encoder_model.encode(all_phrases_grateful_dead)