TutuAwad commited on
Commit
7cb69af
·
verified ·
1 Parent(s): a116656

Upload app (1).py

Browse files
Files changed (1) hide show
  1. app (1).py +204 -0
app (1).py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1vb2j78WT7l9XQiXUBvl1VAtELSAzOUZJ
8
+ """
9
+
10
+ import os
11
+ import random
12
+ import numpy as np
13
+ import pandas as pd
14
+ import faiss
15
+ import gradio as gr
16
+ from sentence_transformers import SentenceTransformer
17
+ from huggingface_hub import InferenceClient
18
+ import spotipy
19
+ from spotipy.oauth2 import SpotifyClientCredentials
20
+ from difflib import SequenceMatcher
21
+
22
+ # ---------- Load data ----------
23
+ CLEAN_CSV_PATH = "df_combined_clean.csv"
24
+ EMB_PATH = "df_embed.npz"
25
+ INDEX_PATH = "hnsw.index"
26
+
27
+ df_combined = pd.read_csv(CLEAN_CSV_PATH)
28
+ emb_data = np.load(EMB_PATH)
29
+ df_embeddings = emb_data["df_embeddings"].astype("float32")
30
+ index = faiss.read_index(INDEX_PATH)
31
+
32
+ # ---------- Secrets ----------
33
+ HF_TOKEN = os.getenv("HF_TOKEN")
34
+ SPOTIFY_CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID")
35
+ SPOTIFY_CLIENT_SECRET = os.getenv("SPOTIPY_CLIENT_SECRET")
36
+
37
+ # ---------- Models ----------
38
+ query_embedder = SentenceTransformer("all-mpnet-base-v2")
39
+ hf_client = InferenceClient(model="meta-llama/Llama-2-7b-chat-hf", token=HF_TOKEN)
40
+
41
+ sp = None
42
+ if SPOTIFY_CLIENT_ID and SPOTIFY_CLIENT_SECRET:
43
+ auth = SpotifyClientCredentials(client_id=SPOTIFY_CLIENT_ID, client_secret=SPOTIFY_CLIENT_SECRET)
44
+ sp = spotipy.Spotify(auth_manager=auth)
45
+
46
+ # ---------- Helper functions ----------
47
+ def encode_query(text):
48
+ return query_embedder.encode([text], convert_to_numpy=True).astype("float32")
49
+
50
+ def expand_with_llama(query):
51
+ if not hf_client:
52
+ return query
53
+ prompt = f"""You are helping someone search a lyrics catalog.
54
+ If the input looks like lyrics or a singer name, return artist and song titles that match.
55
+ Otherwise, return a short list of lyric-style keywords related to the input sentence.
56
+
57
+ Input: {query}
58
+ Output:"""
59
+ response = hf_client.text_generation(prompt, max_new_tokens=96, temperature=0.2, repetition_penalty=1.05)
60
+ return query + " " + str(response).strip().replace("\n", " ")
61
+
62
+ def distances_to_similarity_pct(dists):
63
+ if len(dists) == 0: return np.array([])
64
+ dmin, dmax = dists.min(), dists.max()
65
+ if dmax - dmin == 0: return np.ones_like(dists) * 100
66
+ sims = 100 * (1 - (dists - dmin) / (dmax - dmin))
67
+ return sims
68
+
69
+ def label_vibes(sim):
70
+ if sim >= 90: return "dead-on"
71
+ elif sim >= 80: return "strong vibes"
72
+ elif sim >= 70: return "adjacent"
73
+ elif sim >= 60: return "stretch but related"
74
+ else: return "pretty random"
75
+
76
+ def semantic_search(query, k=10, random_extra=0, use_llama=True):
77
+ if not query.strip():
78
+ return pd.DataFrame(columns=["artist","song","similarity_pct","vibes","is_random"])
79
+ q_text = expand_with_llama(query) if use_llama else query
80
+ q_vec = encode_query(q_text)
81
+ dists, idxs = index.search(q_vec, k)
82
+ sem_df = df_combined.iloc[idxs[0]].copy()
83
+ sem_df["similarity_pct"] = distances_to_similarity_pct(dists[0])
84
+ sem_df["vibes"] = sem_df["similarity_pct"].apply(label_vibes)
85
+ sem_df["is_random"] = False
86
+ rand_df = pd.DataFrame()
87
+ if random_extra > 0:
88
+ chosen = np.random.choice(len(df_combined), size=min(random_extra, len(df_combined)), replace=False)
89
+ rand_df = df_combined.iloc[chosen].copy()
90
+ rand_df["similarity_pct"] = np.nan
91
+ rand_df["vibes"] = "pure random"
92
+ rand_df["is_random"] = True
93
+ results = pd.concat([sem_df, rand_df], ignore_index=True)
94
+ return results
95
+
96
+ def lookup_spotify_track_smart(artist, song):
97
+ if not sp: return None, None
98
+ q = f"track:{song} artist:{artist}"
99
+ try:
100
+ results = sp.search(q, type="track", limit=3)
101
+ if not results["tracks"]["items"]:
102
+ return None, None
103
+ best = max(results["tracks"]["items"],
104
+ key=lambda t: SequenceMatcher(None, t["name"].lower(), song.lower()).ratio())
105
+ return best["external_urls"]["spotify"], best["album"]["images"][0]["url"]
106
+ except Exception:
107
+ return None, None
108
+
109
+ def search_pipeline(query, k=10, random_extra=0, use_llama=True):
110
+ res = semantic_search(query, k, random_extra, use_llama)
111
+ if res.empty or not sp:
112
+ res["spotify_url"], res["album_image"] = None, None
113
+ return res
114
+ urls, imgs = [], []
115
+ for _, r in res.iterrows():
116
+ u, i = lookup_spotify_track_smart(r["artist"], r["song"])
117
+ urls.append(u); imgs.append(i)
118
+ res["spotify_url"], res["album_image"] = urls, imgs
119
+ return res
120
+
121
+ # ---------- HTML builders ----------
122
+ def make_bg_style_html(query=None):
123
+ base_top, base_bottom = "#1e293b", "#020617"
124
+ return f"<style>:root {{--hf-bg-top:{base_top};--hf-bg-bottom:{base_bottom};}}</style>"
125
+
126
+ def results_to_lux_html(results, query):
127
+ if results is None or results.empty:
128
+ return "<div class='lux-empty'>🎧 Describe a vibe to start.</div>"
129
+ cards = []
130
+ for _, r in results.iterrows():
131
+ cover = f"<img src='{r['album_image']}' class='lux-cover'/>" if isinstance(r.get("album_image"), str) else "♪"
132
+ btn = f"<a href='{r['spotify_url']}' target='_blank' class='lux-btn'>▶ Play on Spotify</a>" if isinstance(r.get("spotify_url"), str) else ""
133
+ sim = f"{int(r['similarity_pct'])}%" if pd.notnull(r['similarity_pct']) else ""
134
+ vibe = r['vibes']
135
+ chip = "🎲 random pick" if r['is_random'] else ""
136
+ cards.append(
137
+ f"<div class='lux-card'>{cover}<div class='lux-meta'><h3>{r['song']}</h3><p>{r['artist']}</p><span>{vibe}</span> {sim} {chip}{btn}</div></div>"
138
+ )
139
+ return "<div id='lux-wrapper'>" + "".join(cards) + "</div>"
140
+
141
+ def get_random_vibe():
142
+ topics = ["late-night drives","dog bloopers","breakups","sunset beaches","college nostalgia"]
143
+ perspectives = ["first-person","third-person","group","inner monologue"]
144
+ contexts = ["dreamy","chaotic","romantic","melancholic"]
145
+ return f"Lyrics about {random.choice(topics)}, told in {random.choice(perspectives)}, {random.choice(contexts)}."
146
+
147
+ def clear_all():
148
+ return "", "<div class='lux-empty'>🎧 Describe a vibe to start.</div>", make_bg_style_html()
149
+
150
+ def search_with_bg(query, k, random_extra):
151
+ res = search_pipeline(query, int(k), int(random_extra), use_llama=True)
152
+ return results_to_lux_html(res, query), make_bg_style_html(query)
153
+
154
+ # ---------- CSS ----------
155
+ app_css = """
156
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;800;900&display=swap');
157
+ body,.gradio-container{
158
+ background:radial-gradient(circle at 50% 0%,var(--hf-bg-top,#1e293b),var(--hf-bg-bottom,#020617) 80%)!important;
159
+ color:#e5e7eb;font-family:'Inter',system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif!important;
160
+ }
161
+ .lux-card{background:rgba(255,255,255,0.05);border-radius:1rem;padding:1rem;margin:0.5rem;}
162
+ .lux-cover{width:100%;border-radius:1rem;}
163
+ .lux-btn{display:inline-block;margin-top:0.5rem;padding:0.4rem 0.8rem;background:#1db954;border-radius:9999px;color:white;text-decoration:none;}
164
+ .primary-btn{background:#1db954;color:white;border-radius:1rem;}
165
+ .secondary-btn{background:#334155;color:white;border-radius:1rem;}
166
+ """
167
+
168
+ # ---------- Gradio UI ----------
169
+ with gr.Blocks(css=app_css, title="HarmoniFind") as demo:
170
+ bg_style = gr.HTML(make_bg_style_html())
171
+ gr.HTML("""
172
+ <div id="hf-shell">
173
+ <div id="lux-header">
174
+ <div class="lux-subline">HARMONIFIND • LYRICS-DRIVEN SEMANTIC SEARCH</div>
175
+ <h1>Describe Your Song.</h1>
176
+ <p>We search by what the lyrics <strong>mean</strong>, not just titles or genres.</p>
177
+ </div>
178
+ </div>
179
+ """)
180
+ with gr.Column():
181
+ with gr.Row(variant="compact"):
182
+ input_box = gr.Textbox(
183
+ placeholder="Lyrics about a carefree road trip with too many snack stops",
184
+ show_label=False,
185
+ lines=3,
186
+ scale=5,
187
+ )
188
+ with gr.Column(scale=2,min_width=160):
189
+ search_btn = gr.Button("Search",elem_classes=["primary-btn"])
190
+ surprise_btn = gr.Button("🎲 Surprise me",elem_classes=["secondary-btn"])
191
+ clear_btn = gr.Button("Clear",elem_classes=["secondary-btn"])
192
+ with gr.Accordion("Search settings",open=False):
193
+ with gr.Row():
194
+ k_slider = gr.Slider(5,50,value=10,step=1,label="# semantic matches")
195
+ rand_slider = gr.Slider(0,10,value=2,step=1,label="# extra random tracks")
196
+ output_html = gr.HTML()
197
+
198
+ input_box.submit(search_with_bg,[input_box,k_slider,rand_slider],[output_html,bg_style])
199
+ search_btn.click(search_with_bg,[input_box,k_slider,rand_slider],[output_html,bg_style])
200
+ surprise_btn.click(get_random_vibe,outputs=input_box).then(search_with_bg,[input_box,k_slider,rand_slider],[output_html,bg_style])
201
+ clear_btn.click(clear_all,None,[input_box,output_html,bg_style])
202
+
203
+ if __name__ == "__main__":
204
+ demo.launch(server_name="0.0.0.0", server_port=7860)