TutuAwad commited on
Commit
bfc6270
·
verified ·
1 Parent(s): ab4c624

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -283
app.py DELETED
@@ -1,283 +0,0 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import numpy as np
4
- import spotipy
5
- from spotipy.oauth2 import SpotifyClientCredentials
6
- from sentence_transformers import SentenceTransformer
7
- import faiss
8
- import os
9
- import random
10
- import difflib
11
- import re
12
- import urllib.parse
13
- from langchain_community.llms import HuggingFaceEndpoint
14
-
15
- # ---------------------------------------------------------
16
- # 1. SETUP & AUTHENTICATION
17
- # ---------------------------------------------------------
18
-
19
- # Load Environment Variables from Space Settings
20
- SPOTIPY_CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID")
21
- SPOTIPY_CLIENT_SECRET = os.getenv("SPOTIPY_CLIENT_SECRET")
22
- HF_TOKEN = os.getenv("HF_TOKEN")
23
-
24
- # Setup Spotify
25
- auth_manager = SpotifyClientCredentials(client_id=SPOTIPY_CLIENT_ID, client_secret=SPOTIPY_CLIENT_SECRET)
26
- sp = spotipy.Spotify(auth_manager=auth_manager)
27
-
28
- # Setup LLM (Using Mistral-7B via Inference API - fast and free)
29
- repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
30
-
31
- llm = HuggingFaceEndpoint(
32
- repo_id=repo_id,
33
- max_length=128,
34
- temperature=0.5,
35
- huggingfacehub_api_token=HF_TOKEN
36
- )
37
-
38
- # ---------------------------------------------------------
39
- # 2. DATA LOADING (The Safe Way)
40
- # ---------------------------------------------------------
41
- print("⏳ Loading Data...")
42
-
43
- # 1. Load CSV
44
- try:
45
- df_combined = pd.read_csv("data.csv")
46
- # Ensure text columns are strings to prevent errors
47
- df_combined['text'] = df_combined['text'].astype(str)
48
- df_combined['song'] = df_combined['song'].astype(str)
49
- df_combined['artist'] = df_combined['artist'].astype(str)
50
- print("✅ CSV Loaded")
51
- except Exception as e:
52
- print(f"❌ Error loading data.csv: {e}")
53
-
54
- # 2. Load Embeddings (Crucial Step)
55
- print("⏳ Loading Embeddings from .npz...")
56
- try:
57
- # Load the file you uploaded
58
- data = np.load("df_embed.npz")
59
- df_embeddings = data['df_embeddings']
60
- print(f"✅ Embeddings Loaded. Shape: {df_embeddings.shape}")
61
-
62
- # Create FAISS Index on CPU
63
- # We use IndexFlatL2 which is exact, simple, and works everywhere
64
- d = df_embeddings.shape[1]
65
- index = faiss.IndexFlatL2(d)
66
- index.add(df_embeddings)
67
- print(f"✅ FAISS Index ready with {index.ntotal} vectors.")
68
-
69
- except Exception as e:
70
- print(f"❌ Error loading df_embed.npz: {e}")
71
- print("CRITICAL: Make sure you uploaded 'df_embed.npz' to the Files tab.")
72
- # Create a dummy index so the app doesn't crash immediately, but search won't work
73
- index = faiss.IndexFlatL2(768)
74
-
75
- # 3. Load Model (Only needed to encode the USER query, not the database)
76
- print("⏳ Loading Sentence Transformer...")
77
- embedder = SentenceTransformer('all-mpnet-base-v2')
78
-
79
- GENERIC_ARTISTS = ["religious music", "christmas songs", "various artists", "soundtrack", "unknown", "traditional"]
80
-
81
- # ---------------------------------------------------------
82
- # 3. HELPER FUNCTIONS
83
- # ---------------------------------------------------------
84
- def clean_metadata(text):
85
- text = str(text)
86
- text = text.replace("X-mas", "Xmas").replace("x-mas", "xmas")
87
- text = re.sub(r'\([^)]*\)', '', text)
88
- return text.strip()
89
-
90
- def normalize_text(text):
91
- return re.sub(r'[^a-zA-Z0-9\s]', '', str(text).lower())
92
-
93
- def get_best_spotify_match(artist, title):
94
- artist_clean = clean_metadata(artist)
95
- title_clean = clean_metadata(title)
96
- query = f"{artist_clean} {title_clean}"
97
-
98
- try:
99
- results = sp.search(q=query, type='track', limit=5, market='US')
100
- items = results['tracks']['items']
101
- except:
102
- return None, None
103
-
104
- if not items: return None, None
105
-
106
- best_match = None
107
- best_score = 0.0
108
- target_artist = normalize_text(artist)
109
-
110
- for item in items:
111
- track_artists = " ".join([normalize_text(a['name']) for a in item['artists']])
112
- score = difflib.SequenceMatcher(None, target_artist, track_artists).ratio()
113
-
114
- found_title = normalize_text(item['name'])
115
- t_score = difflib.SequenceMatcher(None, normalize_text(title), found_title).ratio()
116
-
117
- final_score = (score * 0.6) + (t_score * 0.4)
118
-
119
- if final_score > best_score:
120
- best_score = final_score
121
- best_match = item
122
-
123
- if best_match:
124
- url = best_match['external_urls']['spotify']
125
- img = best_match['album']['images'][0]['url'] if best_match['album']['images'] else None
126
- return url, img
127
- return None, None
128
-
129
- def get_theme_colors(query):
130
- palettes = [
131
- {"name": "Spotify Classic", "accent": "#1DB954", "bg_grad": "linear-gradient(135deg, #103018 0%, #000000 100%)", "text": "#1DB954", "btn_text": "#000000"},
132
- {"name": "Midnight Purple", "accent": "#D0BCFF", "bg_grad": "linear-gradient(135deg, #240046 0%, #000000 100%)", "text": "#D0BCFF", "btn_text": "#000000"},
133
- {"name": "Sunset Orange", "accent": "#FF9900", "bg_grad": "linear-gradient(135deg, #4a1c05 0%, #000000 100%)", "text": "#FFB347", "btn_text": "#000000"},
134
- {"name": "Ocean Blue", "accent": "#00E5FF", "bg_grad": "linear-gradient(135deg, #001f3f 0%, #000000 100%)", "text": "#7FDBFF", "btn_text": "#000000"},
135
- {"name": "Neon Pink", "accent": "#FF4081", "bg_grad": "linear-gradient(135deg, #3a0000 0%, #000000 100%)", "text": "#FF80AB", "btn_text": "#000000"},
136
- ]
137
- hash_int = int(hashlib.md5(query.encode()).hexdigest(), 16)
138
- return palettes[hash_int % len(palettes)]
139
-
140
- def get_random_vibe():
141
- vibes = [
142
- "A cyberpunk chase scene through Tokyo neon rain",
143
- "Drinking coffee on a porch while it storms outside",
144
- "The feeling of realizing you are falling out of love",
145
- "A villain explaining their plan while drinking wine",
146
- "Driving a convertible down the coast at sunset",
147
- "Waking up in a spaceship alone"
148
- ]
149
- return random.choice(vibes)
150
-
151
- import hashlib
152
-
153
- # ---------------------------------------------------------
154
- # 4. SEARCH LOGIC
155
- # ---------------------------------------------------------
156
- def harmonifind_search(user_query, k=7, use_llama=True):
157
- search_query = user_query
158
-
159
- if use_llama:
160
- try:
161
- # We use the inference API here - Safe for CPU spaces
162
- prompt = f"User Query: '{user_query}'\nOutput exactly 5 descriptive keywords regarding the mood, instruments, or genre. Do not output full sentences. Keywords:"
163
- raw_response = llm.invoke(prompt)
164
- keywords = raw_response.replace("\n", " ").strip()
165
- print(f"🧠 AI Keywords: {keywords}")
166
- search_query = f"{user_query} {keywords}"
167
- except Exception as e:
168
- print(f"⚠️ AI skipped: {e}")
169
-
170
- # Encode user query using the local CPU model
171
- q_vec = embedder.encode([search_query])
172
-
173
- # Search the Pre-loaded Index
174
- distances, indices = index.search(q_vec, k)
175
-
176
- results_df = df_combined.iloc[indices[0]].copy()
177
-
178
- scores = []
179
- for dist in distances[0]:
180
- scores.append(int(max(0, min(100, (1 - (dist / 1.5)) * 100))))
181
- results_df['match_score'] = scores
182
-
183
- print("🎵 Fetching Spotify metadata...")
184
- s_urls, s_imgs = [], []
185
- for _, row in results_df.iterrows():
186
- u, i = get_best_spotify_match(row['artist'], row['song'])
187
- s_urls.append(u)
188
- s_imgs.append(i)
189
-
190
- results_df['spotify_url'] = s_urls
191
- results_df['image'] = s_imgs
192
- return results_df
193
-
194
- # ---------------------------------------------------------
195
- # 5. UI GENERATOR
196
- # ---------------------------------------------------------
197
- def gradio_interface_fn(query):
198
- if not query: return ""
199
- df_results = harmonifind_search(query, k=7, use_llama=True)
200
- theme = get_theme_colors(query)
201
-
202
- share_text = urllib.parse.quote(f"Listening to '{query}' via HarmoniFind 🎵")
203
- share_url_x = f"https://twitter.com/intent/tweet?text={share_text}"
204
-
205
- html = f"""
206
- <style>
207
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;700;800&display=swap');
208
- .hf-container {{
209
- background: {theme['bg_grad']}; color: white; font-family: 'Inter', sans-serif;
210
- border-radius: 24px; padding: 40px; box-shadow: 0 20px 60px rgba(0,0,0,0.8);
211
- }}
212
- .hf-header {{ border-bottom: 1px solid rgba(255,255,255,0.1); padding-bottom: 20px; margin-bottom: 20px; display: flex; justify-content: space-between; align-items: flex-end; }}
213
- .hf-title {{ font-size: 2rem; font-weight: 800; margin: 0; }}
214
- .hf-meta {{ font-size: 0.9rem; opacity: 0.7; text-transform: uppercase; }}
215
- .track-row {{ display: flex; align-items: center; background: rgba(0,0,0,0.2); margin-bottom: 15px; padding: 15px; border-radius: 12px; gap: 20px; }}
216
- .track-row:hover {{ background: rgba(255,255,255,0.1); }}
217
- .cover-img {{ width: 70px; height: 70px; border-radius: 8px; object-fit: cover; }}
218
- .info-col {{ flex-grow: 1; }}
219
- .song-name {{ font-weight: 700; font-size: 1.1rem; display: block; }}
220
- .play-btn {{ background: {theme['accent']}; color: {theme['btn_text']}; padding: 10px 25px; border-radius: 50px; text-decoration: none; font-weight: 800; }}
221
- </style>
222
-
223
- <div class="hf-container">
224
- <div class="hf-header">
225
- <div>
226
- <h1 class="hf-title">"{query}"</h1>
227
- <div class="hf-meta">Vibe: {theme['name']}</div>
228
- </div>
229
- <a href="{share_url_x}" target="_blank" style="color:white; text-decoration:none; opacity:0.7;">Share on X ↗</a>
230
- </div>
231
- """
232
-
233
- for _, row in df_results.iterrows():
234
- img_url = row['image'] if row['image'] else "https://via.placeholder.com/150"
235
- btn = f'<a href="{row["spotify_url"]}" target="_blank" class="play-btn">PLAY</a>' if row['spotify_url'] else '<span style="opacity:0.5">No Link</span>'
236
-
237
- html += f"""
238
- <div class="track-row">
239
- <div style="font-weight:800; font-size:1.2rem; min-width:50px; text-align:center; color:{theme['text']}">{row['match_score']}%</div>
240
- <img src="{img_url}" class="cover-img">
241
- <div class="info-col">
242
- <span class="song-name">{row['song']}</span>
243
- <span style="opacity:0.8">{row['artist']}</span>
244
- </div>
245
- {btn}
246
- </div>
247
- """
248
- html += "</div>"
249
- return html
250
-
251
- # ---------------------------------------------------------
252
- # 6. LAUNCH
253
- # ---------------------------------------------------------
254
- css = """
255
- .search-row { align-items: center !important; gap: 15px !important; }
256
- .search-input textarea { font-size: 1.1rem !important; }
257
- .action-btn { height: 50px !important; border-radius: 12px !important; font-weight: bold !important; }
258
- """
259
-
260
- #theme = gr.themes.Soft(primary_hue="zinc", neutral_hue="zinc").set(
261
- #body_background_fill="#000000", block_background_fill="#121212", block_border_width="0px"
262
- #)
263
-
264
- with gr.Blocks(css=css, title="HarmoniFind") as demo:
265
- gr.HTML("""
266
- <div style="text-align:center; padding: 40px 0; color:white;">
267
- <h1 style="font-size: 3rem; font-weight:900; background: -webkit-linear-gradient(45deg, #eee, #999); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">HarmoniFind</h1>
268
- <p style="opacity: 0.6;">AI-Powered Semantic Music Discovery</p>
269
- </div>
270
- """)
271
-
272
- with gr.Row(elem_classes="search-row"):
273
- input_box = gr.Textbox(show_label=False, placeholder="Describe a vibe (e.g. 'Driving fast at night')...", scale=10, elem_classes="search-input")
274
- surprise_btn = gr.Button("🎲", scale=1, variant="secondary", elem_classes="action-btn")
275
- search_btn = gr.Button("Search", scale=2, variant="primary", elem_classes="action-btn")
276
-
277
- out = gr.HTML()
278
-
279
- input_box.submit(gradio_interface_fn, input_box, out)
280
- search_btn.click(gradio_interface_fn, input_box, out)
281
- surprise_btn.click(lambda: get_random_vibe(), None, input_box)
282
-
283
- demo.queue().launch()