TutuAwad commited on
Commit
3f33df2
·
verified ·
1 Parent(s): 8de919a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +268 -0
app.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import spotipy
5
+ from spotipy.oauth2 import SpotifyClientCredentials
6
+ from sentence_transformers import SentenceTransformer
7
+ import faiss
8
+ import os
9
+ import random
10
+ import difflib
11
+ import re
12
+ import urllib.parse
13
+ from langchain_community.llms import HuggingFaceEndpoint
14
+
15
+ # ---------------------------------------------------------
16
+ # 1. SETUP & AUTHENTICATION
17
+ # ---------------------------------------------------------
18
+
19
+ # Load Environment Variables (Set these in Space Settings)
20
+ SPOTIPY_CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID")
21
+ SPOTIPY_CLIENT_SECRET = os.getenv("SPOTIPY_CLIENT_SECRET")
22
+ HF_TOKEN = os.getenv("HF_TOKEN")
23
+
24
+ # Setup Spotify
25
+ auth_manager = SpotifyClientCredentials(client_id=SPOTIPY_CLIENT_ID, client_secret=SPOTIPY_CLIENT_SECRET)
26
+ sp = spotipy.Spotify(auth_manager=auth_manager)
27
+
28
+ # Setup LLM (Serverless Inference - No massive GPU needed locally)
29
+ # We use Mistral or Zephyr (faster/better than Llama 2 for this) or Llama 2 via API
30
+ repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
31
+
32
+ llm = HuggingFaceEndpoint(
33
+ repo_id=repo_id,
34
+ max_length=128,
35
+ temperature=0.5,
36
+ huggingfacehub_api_token=HF_TOKEN
37
+ )
38
+
39
+ # ---------------------------------------------------------
40
+ # 2. DATA LOADING & VECTOR INDEXING
41
+ # ---------------------------------------------------------
42
+ print("⏳ Loading Data...")
43
+ df = pd.read_csv("data.csv")
44
+
45
+ # Data Cleaning (Same as your notebook)
46
+ df = df.replace(r"^\s*$", np.nan, regex=True)
47
+ df['text'] = df['text'].astype(str).str.replace(r"\r|\n", " ", regex=True)
48
+ df['song'] = df['song'].astype(str).str.replace(r"\r|\n", " ", regex=True)
49
+ df['artist'] = df['artist'].astype(str).str.replace(r"\r|\n", " ", regex=True)
50
+
51
+ df['combined'] = (
52
+ "Title: " + df['song'].str.strip() +
53
+ "; Artist: " + df['artist'].str.strip() +
54
+ "; Lyrics: " + df['text'].str.strip()
55
+ ).str.lower().str.replace(r"[^a-z0-9\s]", "", regex=True)
56
+
57
+ print("⏳ Loading Embedding Model...")
58
+ embedder = SentenceTransformer('all-mpnet-base-v2')
59
+
60
+ print("⏳ Creating FAISS Index (This runs once on startup)...")
61
+ # We rebuild the index on startup to ensure compatibility with CPU environment
62
+ df_embeddings = embedder.encode(df['combined'].tolist(), show_progress_bar=True)
63
+ d = df_embeddings.shape[1]
64
+ index = faiss.IndexFlatL2(d)
65
+ index.add(df_embeddings)
66
+ print(f"✅ Index built with {index.ntotal} songs.")
67
+
68
+ GENERIC_ARTISTS = ["religious music", "christmas songs", "various artists", "soundtrack", "unknown", "traditional"]
69
+
70
+ # ---------------------------------------------------------
71
+ # 3. HELPER FUNCTIONS
72
+ # ---------------------------------------------------------
73
+ def clean_metadata(text):
74
+ text = str(text)
75
+ text = text.replace("X-mas", "Xmas").replace("x-mas", "xmas")
76
+ text = re.sub(r'\([^)]*\)', '', text)
77
+ return text.strip()
78
+
79
+ def normalize_text(text):
80
+ return re.sub(r'[^a-zA-Z0-9\s]', '', str(text).lower())
81
+
82
+ def get_best_spotify_match(artist, title):
83
+ """Finds the best Spotify link/image for a song"""
84
+ artist_clean = clean_metadata(artist)
85
+ title_clean = clean_metadata(title)
86
+ query = f"{artist_clean} {title_clean}"
87
+
88
+ try:
89
+ results = sp.search(q=query, type='track', limit=5, market='US')
90
+ items = results['tracks']['items']
91
+ except:
92
+ return None, None
93
+
94
+ if not items: return None, None
95
+
96
+ best_match = None
97
+ best_score = 0.0
98
+ target_artist = normalize_text(artist)
99
+
100
+ for item in items:
101
+ track_artists = " ".join([normalize_text(a['name']) for a in item['artists']])
102
+ score = difflib.SequenceMatcher(None, target_artist, track_artists).ratio()
103
+ if score > best_score:
104
+ best_score = score
105
+ best_match = item
106
+
107
+ if best_match:
108
+ url = best_match['external_urls']['spotify']
109
+ img = best_match['album']['images'][0]['url'] if best_match['album']['images'] else None
110
+ return url, img
111
+ return None, None
112
+
113
+ def get_theme_colors(query):
114
+ """Generates a color theme based on the query hash"""
115
+ palettes = [
116
+ {"name": "Spotify Classic", "accent": "#1DB954", "bg_grad": "linear-gradient(135deg, #103018 0%, #000000 100%)", "text": "#1DB954", "btn_text": "#000000"},
117
+ {"name": "Midnight Purple", "accent": "#D0BCFF", "bg_grad": "linear-gradient(135deg, #240046 0%, #000000 100%)", "text": "#D0BCFF", "btn_text": "#000000"},
118
+ {"name": "Sunset Orange", "accent": "#FF9900", "bg_grad": "linear-gradient(135deg, #4a1c05 0%, #000000 100%)", "text": "#FFB347", "btn_text": "#000000"},
119
+ {"name": "Ocean Blue", "accent": "#00E5FF", "bg_grad": "linear-gradient(135deg, #001f3f 0%, #000000 100%)", "text": "#7FDBFF", "btn_text": "#000000"},
120
+ {"name": "Neon Pink", "accent": "#FF4081", "bg_grad": "linear-gradient(135deg, #3a0000 0%, #000000 100%)", "text": "#FF80AB", "btn_text": "#000000"},
121
+ ]
122
+ hash_int = int(hashlib.md5(query.encode()).hexdigest(), 16)
123
+ return palettes[hash_int % len(palettes)]
124
+
125
+ def get_random_vibe():
126
+ vibes = [
127
+ "A cyberpunk chase scene through Tokyo neon rain",
128
+ "Drinking coffee on a porch while it storms outside",
129
+ "The feeling of realizing you are falling out of love",
130
+ "A villain explaining their plan while drinking wine",
131
+ "Driving a convertible down the coast at sunset",
132
+ "Waking up in a spaceship alone"
133
+ ]
134
+ return random.choice(vibes)
135
+
136
+ import hashlib
137
+
138
+ # ---------------------------------------------------------
139
+ # 4. SEARCH LOGIC
140
+ # ---------------------------------------------------------
141
+ def harmonifind_search(user_query, k=7, use_llama=True):
142
+ search_query = user_query
143
+
144
+ if use_llama:
145
+ try:
146
+ # We use the inference API here
147
+ prompt = f"User Query: '{user_query}'\nOutput exactly 5 descriptive keywords regarding the mood, instruments, or genre. Do not output full sentences. Keywords:"
148
+ raw_response = llm.invoke(prompt)
149
+ keywords = raw_response.replace("\n", " ").strip()
150
+ print(f"🧠 AI Keywords: {keywords}")
151
+ search_query = f"{user_query} {keywords}"
152
+ except Exception as e:
153
+ print(f"⚠️ AI skipped: {e}")
154
+
155
+ q_vec = embedder.encode([search_query])
156
+ distances, indices = index.search(q_vec, k)
157
+
158
+ results_df = df.iloc[indices[0]].copy()
159
+
160
+ # Calculate match %
161
+ scores = []
162
+ for dist in distances[0]:
163
+ # Simple heuristic to convert L2 distance to percentage
164
+ scores.append(int(max(0, min(100, (1 - (dist / 1.5)) * 100))))
165
+ results_df['match_score'] = scores
166
+
167
+ print("🎵 Fetching Spotify metadata...")
168
+ s_urls, s_imgs = [], []
169
+ for _, row in results_df.iterrows():
170
+ u, i = get_best_spotify_match(row['artist'], row['song'])
171
+ s_urls.append(u)
172
+ s_imgs.append(i)
173
+
174
+ results_df['spotify_url'] = s_urls
175
+ results_df['image'] = s_imgs
176
+ return results_df
177
+
178
+ # ---------------------------------------------------------
179
+ # 5. UI GENERATOR
180
+ # ---------------------------------------------------------
181
+ def gradio_interface_fn(query):
182
+ if not query: return ""
183
+ df_results = harmonifind_search(query, k=7, use_llama=True)
184
+ theme = get_theme_colors(query)
185
+
186
+ # Prepare Share Links
187
+ share_text = urllib.parse.quote(f"Listening to '{query}' via HarmoniFind 🎵")
188
+ share_url_x = f"https://twitter.com/intent/tweet?text={share_text}"
189
+
190
+ html = f"""
191
+ <style>
192
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;700;800&display=swap');
193
+ .hf-container {{
194
+ background: {theme['bg_grad']}; color: white; font-family: 'Inter', sans-serif;
195
+ border-radius: 24px; padding: 40px; box-shadow: 0 20px 60px rgba(0,0,0,0.8);
196
+ }}
197
+ .hf-header {{ border-bottom: 1px solid rgba(255,255,255,0.1); padding-bottom: 20px; margin-bottom: 20px; display: flex; justify-content: space-between; align-items: flex-end; }}
198
+ .hf-title {{ font-size: 2rem; font-weight: 800; margin: 0; }}
199
+ .hf-meta {{ font-size: 0.9rem; opacity: 0.7; text-transform: uppercase; }}
200
+ .track-row {{ display: flex; align-items: center; background: rgba(0,0,0,0.2); margin-bottom: 15px; padding: 15px; border-radius: 12px; gap: 20px; }}
201
+ .track-row:hover {{ background: rgba(255,255,255,0.1); }}
202
+ .cover-img {{ width: 70px; height: 70px; border-radius: 8px; object-fit: cover; }}
203
+ .info-col {{ flex-grow: 1; }}
204
+ .song-name {{ font-weight: 700; font-size: 1.1rem; display: block; }}
205
+ .play-btn {{ background: {theme['accent']}; color: {theme['btn_text']}; padding: 10px 25px; border-radius: 50px; text-decoration: none; font-weight: 800; }}
206
+ </style>
207
+
208
+ <div class="hf-container">
209
+ <div class="hf-header">
210
+ <div>
211
+ <h1 class="hf-title">"{query}"</h1>
212
+ <div class="hf-meta">Vibe: {theme['name']}</div>
213
+ </div>
214
+ <a href="{share_url_x}" target="_blank" style="color:white; text-decoration:none; opacity:0.7;">Share on X ↗</a>
215
+ </div>
216
+ """
217
+
218
+ for _, row in df_results.iterrows():
219
+ img_url = row['image'] if row['image'] else "https://via.placeholder.com/150"
220
+ btn = f'<a href="{row["spotify_url"]}" target="_blank" class="play-btn">PLAY</a>' if row['spotify_url'] else '<span style="opacity:0.5">No Link</span>'
221
+
222
+ html += f"""
223
+ <div class="track-row">
224
+ <div style="font-weight:800; font-size:1.2rem; min-width:50px; text-align:center; color:{theme['text']}">{row['match_score']}%</div>
225
+ <img src="{img_url}" class="cover-img">
226
+ <div class="info-col">
227
+ <span class="song-name">{row['song']}</span>
228
+ <span style="opacity:0.8">{row['artist']}</span>
229
+ </div>
230
+ {btn}
231
+ </div>
232
+ """
233
+ html += "</div>"
234
+ return html
235
+
236
+ # ---------------------------------------------------------
237
+ # 6. LAUNCH
238
+ # ---------------------------------------------------------
239
+ css = """
240
+ .search-row { align-items: center !important; gap: 15px !important; }
241
+ .search-input textarea { font-size: 1.1rem !important; }
242
+ .action-btn { height: 50px !important; border-radius: 12px !important; font-weight: bold !important; }
243
+ """
244
+
245
+ theme = gr.themes.Soft(primary_hue="zinc", neutral_hue="zinc").set(
246
+ body_background_fill="#000000", block_background_fill="#121212", block_border_width="0px"
247
+ )
248
+
249
+ with gr.Blocks(theme=theme, css=css, title="HarmoniFind") as demo:
250
+ gr.HTML("""
251
+ <div style="text-align:center; padding: 40px 0; color:white;">
252
+ <h1 style="font-size: 3rem; font-weight:900; background: -webkit-linear-gradient(45deg, #eee, #999); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">HarmoniFind</h1>
253
+ <p style="opacity: 0.6;">AI-Powered Semantic Music Discovery</p>
254
+ </div>
255
+ """)
256
+
257
+ with gr.Row(elem_classes="search-row"):
258
+ input_box = gr.Textbox(show_label=False, placeholder="Describe a vibe (e.g. 'Driving fast at night')...", scale=10, elem_classes="search-input")
259
+ surprise_btn = gr.Button("🎲", scale=1, variant="secondary", elem_classes="action-btn")
260
+ search_btn = gr.Button("Search", scale=2, variant="primary", elem_classes="action-btn")
261
+
262
+ out = gr.HTML()
263
+
264
+ input_box.submit(gradio_interface_fn, input_box, out)
265
+ search_btn.click(gradio_interface_fn, input_box, out)
266
+ surprise_btn.click(lambda: get_random_vibe(), None, input_box)
267
+
268
+ demo.queue().launch()