omerariel123 commited on
Commit
faf7c48
verified
1 Parent(s): 654aa51

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +67 -0
  2. cluster_to_emotion.json +1 -0
  3. gmm_model.pkl +3 -0
  4. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import joblib
4
+ import json
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+ from sentence_transformers import SentenceTransformer
10
+
11
+ # 讟讜注谉 讗转 讛诪讜讚诇 讜讛拽讘爪讬诐
12
+ gmm = joblib.load("gmm_model.pkl")
13
+
14
+ with open("cluster_to_emotion.json", "r") as f:
15
+ cluster_to_emotion = json.load(f)
16
+
17
+ # 讟讜注谉 讗转 诪讗讙专 讛砖讬专讬诐
18
+ song_db = pd.read_parquet("hf://datasets/johanf/taylor-swift/data/train-00000-of-00001.parquet")
19
+ song_db = song_db[["lyrics", "title"]].dropna().drop_duplicates()
20
+ song_db["lyrics"] = song_db["lyrics"].str.strip()
21
+ song_db["title"] = song_db["title"].str.strip()
22
+ song_db = song_db.reset_index(drop=True)
23
+
24
+ # 诪讞砖讘 embedding 诇讻诇 讛砖讬专讬诐
25
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
26
+ lyrics_list = song_db["lyrics"].tolist()
27
+ lyrics_embeddings = embedding_model.encode(lyrics_list, show_progress_bar=True)
28
+
29
+ # 诪讜讚诇 诇讛诪专转 讟拽住讟 诇专讙砖
30
+ emotion_model = SentenceTransformer("j-hartmann/emotion-english-distilroberta-base")
31
+
32
+ def predict_emotion(text):
33
+ embedding = emotion_model.encode([text])
34
+ cluster = gmm.predict(embedding)[0]
35
+ return cluster_to_emotion[str(cluster)]
36
+
37
+ def find_matching_song_by_emotion(user_input):
38
+ emotion = predict_emotion(user_input)
39
+
40
+ # 诪讜爪讗 砖讬专讬诐 砖诪转讗讬诪讬诐 诇专讙砖 讛讝讛
41
+ candidates = song_db[song_db["lyrics"].str.lower().str.contains(emotion.lower())]
42
+
43
+ if candidates.empty:
44
+ candidates = song_db
45
+
46
+ user_embedding = embedding_model.encode([user_input])
47
+ candidate_lyrics = candidates["lyrics"].tolist()
48
+ candidate_embeddings = embedding_model.encode(candidate_lyrics)
49
+
50
+ similarities = cosine_similarity(user_embedding, candidate_embeddings)[0]
51
+ top_idx = np.argmax(similarities)
52
+
53
+ title = candidates.iloc[top_idx]["title"]
54
+ lyrics_snippet = candidates.iloc[top_idx]["lyrics"][:200].replace("\n", " ")
55
+ score = similarities[top_idx]
56
+
57
+ return f"**{title}** (match: {score:.2f})\n\n`{lyrics_snippet}...`\n\n_Emotion: {emotion}_"
58
+
59
+ demo = gr.Interface(
60
+ fn=find_matching_song_by_emotion,
61
+ inputs=gr.Textbox(placeholder="Tell me something that happened today"),
62
+ outputs="markdown",
63
+ title="Taylor Swift Mood Matcher",
64
+ description="Tell me what you're feeling and I鈥檒l match you with a Taylor Swift song that fits your mood."
65
+ )
66
+
67
+ demo.launch()
cluster_to_emotion.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"0": "sadness", "1": "joy", "2": "fear", "3": "anger", "4": "sadness", "5": "surprise"}
gmm_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b53965ce9db017583febb2c1461da646f37d1f81066ca147b090d9511707b72
3
+ size 84972567
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ numpy
4
+ joblib
5
+ scikit-learn
6
+ sentence-transformers