8
File size: 2,387 Bytes
faf7c48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

import gradio as gr
import joblib
import json
import numpy as np
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

# 讟讜注谉 讗转 讛诪讜讚诇 讜讛拽讘爪讬诐
gmm = joblib.load("gmm_model.pkl")

with open("cluster_to_emotion.json", "r") as f:
    cluster_to_emotion = json.load(f)

# 讟讜注谉 讗转 诪讗讙专 讛砖讬专讬诐
song_db = pd.read_parquet("hf://datasets/johanf/taylor-swift/data/train-00000-of-00001.parquet")
song_db = song_db[["lyrics", "title"]].dropna().drop_duplicates()
song_db["lyrics"] = song_db["lyrics"].str.strip()
song_db["title"] = song_db["title"].str.strip()
song_db = song_db.reset_index(drop=True)

# 诪讞砖讘 embedding 诇讻诇 讛砖讬专讬诐
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
lyrics_list = song_db["lyrics"].tolist()
lyrics_embeddings = embedding_model.encode(lyrics_list, show_progress_bar=True)

# 诪讜讚诇 诇讛诪专转 讟拽住讟 诇专讙砖
emotion_model = SentenceTransformer("j-hartmann/emotion-english-distilroberta-base")

def predict_emotion(text):
    embedding = emotion_model.encode([text])
    cluster = gmm.predict(embedding)[0]
    return cluster_to_emotion[str(cluster)]

def find_matching_song_by_emotion(user_input):
    emotion = predict_emotion(user_input)

    # 诪讜爪讗 砖讬专讬诐 砖诪转讗讬诪讬诐 诇专讙砖 讛讝讛
    candidates = song_db[song_db["lyrics"].str.lower().str.contains(emotion.lower())]

    if candidates.empty:
        candidates = song_db

    user_embedding = embedding_model.encode([user_input])
    candidate_lyrics = candidates["lyrics"].tolist()
    candidate_embeddings = embedding_model.encode(candidate_lyrics)

    similarities = cosine_similarity(user_embedding, candidate_embeddings)[0]
    top_idx = np.argmax(similarities)
    
    title = candidates.iloc[top_idx]["title"]
    lyrics_snippet = candidates.iloc[top_idx]["lyrics"][:200].replace("\n", " ")
    score = similarities[top_idx]

    return f"**{title}**  (match: {score:.2f})\n\n`{lyrics_snippet}...`\n\n_Emotion: {emotion}_"

demo = gr.Interface(
    fn=find_matching_song_by_emotion,
    inputs=gr.Textbox(placeholder="Tell me something that happened today"),
    outputs="markdown",
    title="Taylor Swift Mood Matcher",
    description="Tell me what you're feeling and I鈥檒l match you with a Taylor Swift song that fits your mood."
)

demo.launch()