Spaces:

TutuAwad
/

HarmoniFind

Sleeping

App Files Files Community

TutuAwad commited on Nov 28, 2025

Commit

3f33df2

verified ·

1 Parent(s): 8de919a

Upload app.py

Browse files

Files changed (1) hide show

app.py +268 -0

app.py ADDED Viewed

	@@ -0,0 +1,268 @@

+import gradio as gr
+import pandas as pd
+import numpy as np
+import spotipy
+from spotipy.oauth2 import SpotifyClientCredentials
+from sentence_transformers import SentenceTransformer
+import faiss
+import os
+import random
+import difflib
+import re
+import urllib.parse
+from langchain_community.llms import HuggingFaceEndpoint
+# ---------------------------------------------------------
+# 1. SETUP & AUTHENTICATION
+# ---------------------------------------------------------
+# Load Environment Variables (Set these in Space Settings)
+SPOTIPY_CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID")
+SPOTIPY_CLIENT_SECRET = os.getenv("SPOTIPY_CLIENT_SECRET")
+HF_TOKEN = os.getenv("HF_TOKEN")
+# Setup Spotify
+auth_manager = SpotifyClientCredentials(client_id=SPOTIPY_CLIENT_ID, client_secret=SPOTIPY_CLIENT_SECRET)
+sp = spotipy.Spotify(auth_manager=auth_manager)
+# Setup LLM (Serverless Inference - No massive GPU needed locally)
+# We use Mistral or Zephyr (faster/better than Llama 2 for this) or Llama 2 via API
+repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
+llm = HuggingFaceEndpoint(
+    repo_id=repo_id,
+    max_length=128,
+    temperature=0.5,
+    huggingfacehub_api_token=HF_TOKEN
+)
+# ---------------------------------------------------------
+# 2. DATA LOADING & VECTOR INDEXING
+# ---------------------------------------------------------
+print("⏳ Loading Data...")
+df = pd.read_csv("data.csv")
+# Data Cleaning (Same as your notebook)
+df = df.replace(r"^\s*$", np.nan, regex=True)
+df['text'] = df['text'].astype(str).str.replace(r"\r|\n", " ", regex=True)
+df['song'] = df['song'].astype(str).str.replace(r"\r|\n", " ", regex=True)
+df['artist'] = df['artist'].astype(str).str.replace(r"\r|\n", " ", regex=True)
+df['combined'] = (
+    "Title: " + df['song'].str.strip() +
+    "; Artist: " + df['artist'].str.strip() +
+    "; Lyrics: " + df['text'].str.strip()
+).str.lower().str.replace(r"[^a-z0-9\s]", "", regex=True)
+print("⏳ Loading Embedding Model...")
+embedder = SentenceTransformer('all-mpnet-base-v2')
+print("⏳ Creating FAISS Index (This runs once on startup)...")
+# We rebuild the index on startup to ensure compatibility with CPU environment
+df_embeddings = embedder.encode(df['combined'].tolist(), show_progress_bar=True)
+d = df_embeddings.shape[1]
+index = faiss.IndexFlatL2(d)
+index.add(df_embeddings)
+print(f"✅ Index built with {index.ntotal} songs.")
+GENERIC_ARTISTS = ["religious music", "christmas songs", "various artists", "soundtrack", "unknown", "traditional"]
+# ---------------------------------------------------------
+# 3. HELPER FUNCTIONS
+# ---------------------------------------------------------
+def clean_metadata(text):
+    text = str(text)
+    text = text.replace("X-mas", "Xmas").replace("x-mas", "xmas")
+    text = re.sub(r'\([^)]*\)', '', text)
+    return text.strip()
+def normalize_text(text):
+    return re.sub(r'[^a-zA-Z0-9\s]', '', str(text).lower())
+def get_best_spotify_match(artist, title):
+    """Finds the best Spotify link/image for a song"""
+    artist_clean = clean_metadata(artist)
+    title_clean = clean_metadata(title)
+    query = f"{artist_clean} {title_clean}"
+    try:
+        results = sp.search(q=query, type='track', limit=5, market='US')
+        items = results['tracks']['items']
+    except:
+        return None, None
+    if not items: return None, None
+    best_match = None
+    best_score = 0.0
+    target_artist = normalize_text(artist)
+    for item in items:
+        track_artists = " ".join([normalize_text(a['name']) for a in item['artists']])
+        score = difflib.SequenceMatcher(None, target_artist, track_artists).ratio()
+        if score > best_score:
+            best_score = score
+            best_match = item
+    if best_match:
+        url = best_match['external_urls']['spotify']
+        img = best_match['album']['images'][0]['url'] if best_match['album']['images'] else None
+        return url, img
+    return None, None
+def get_theme_colors(query):
+    """Generates a color theme based on the query hash"""
+    palettes = [
+        {"name": "Spotify Classic", "accent": "#1DB954", "bg_grad": "linear-gradient(135deg, #103018 0%, #000000 100%)", "text": "#1DB954", "btn_text": "#000000"},
+        {"name": "Midnight Purple", "accent": "#D0BCFF", "bg_grad": "linear-gradient(135deg, #240046 0%, #000000 100%)", "text": "#D0BCFF", "btn_text": "#000000"},
+        {"name": "Sunset Orange",   "accent": "#FF9900", "bg_grad": "linear-gradient(135deg, #4a1c05 0%, #000000 100%)", "text": "#FFB347", "btn_text": "#000000"},
+        {"name": "Ocean Blue",      "accent": "#00E5FF", "bg_grad": "linear-gradient(135deg, #001f3f 0%, #000000 100%)", "text": "#7FDBFF", "btn_text": "#000000"},
+        {"name": "Neon Pink",       "accent": "#FF4081", "bg_grad": "linear-gradient(135deg, #3a0000 0%, #000000 100%)", "text": "#FF80AB", "btn_text": "#000000"},
+    ]
+    hash_int = int(hashlib.md5(query.encode()).hexdigest(), 16)
+    return palettes[hash_int % len(palettes)]
+def get_random_vibe():
+    vibes = [
+        "A cyberpunk chase scene through Tokyo neon rain",
+        "Drinking coffee on a porch while it storms outside",
+        "The feeling of realizing you are falling out of love",
+        "A villain explaining their plan while drinking wine",
+        "Driving a convertible down the coast at sunset",
+        "Waking up in a spaceship alone"
+    ]
+    return random.choice(vibes)
+import hashlib
+# ---------------------------------------------------------
+# 4. SEARCH LOGIC
+# ---------------------------------------------------------
+def harmonifind_search(user_query, k=7, use_llama=True):
+    search_query = user_query
+    if use_llama:
+        try:
+            # We use the inference API here
+            prompt = f"User Query: '{user_query}'\nOutput exactly 5 descriptive keywords regarding the mood, instruments, or genre. Do not output full sentences. Keywords:"
+            raw_response = llm.invoke(prompt)
+            keywords = raw_response.replace("\n", " ").strip()
+            print(f"🧠 AI Keywords: {keywords}")
+            search_query = f"{user_query} {keywords}"
+        except Exception as e:
+            print(f"⚠️ AI skipped: {e}")
+    q_vec = embedder.encode([search_query])
+    distances, indices = index.search(q_vec, k)
+    results_df = df.iloc[indices[0]].copy()
+    # Calculate match %
+    scores = []
+    for dist in distances[0]:
+        # Simple heuristic to convert L2 distance to percentage
+        scores.append(int(max(0, min(100, (1 - (dist / 1.5)) * 100))))
+    results_df['match_score'] = scores
+    print("🎵 Fetching Spotify metadata...")
+    s_urls, s_imgs = [], []
+    for _, row in results_df.iterrows():
+        u, i = get_best_spotify_match(row['artist'], row['song'])
+        s_urls.append(u)
+        s_imgs.append(i)
+    results_df['spotify_url'] = s_urls
+    results_df['image'] = s_imgs
+    return results_df
+# ---------------------------------------------------------
+# 5. UI GENERATOR
+# ---------------------------------------------------------
+def gradio_interface_fn(query):
+    if not query: return ""
+    df_results = harmonifind_search(query, k=7, use_llama=True)
+    theme = get_theme_colors(query)
+    # Prepare Share Links
+    share_text = urllib.parse.quote(f"Listening to '{query}' via HarmoniFind 🎵")
+    share_url_x = f"https://twitter.com/intent/tweet?text={share_text}"
+    html = f"""
+    <style>
+        @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;700;800&display=swap');
+        .hf-container {{
+            background: {theme['bg_grad']}; color: white; font-family: 'Inter', sans-serif;
+            border-radius: 24px; padding: 40px; box-shadow: 0 20px 60px rgba(0,0,0,0.8);
+        }}
+        .hf-header {{ border-bottom: 1px solid rgba(255,255,255,0.1); padding-bottom: 20px; margin-bottom: 20px; display: flex; justify-content: space-between; align-items: flex-end; }}
+        .hf-title {{ font-size: 2rem; font-weight: 800; margin: 0; }}
+        .hf-meta {{ font-size: 0.9rem; opacity: 0.7; text-transform: uppercase; }}
+        .track-row {{ display: flex; align-items: center; background: rgba(0,0,0,0.2); margin-bottom: 15px; padding: 15px; border-radius: 12px; gap: 20px; }}
+        .track-row:hover {{ background: rgba(255,255,255,0.1); }}
+        .cover-img {{ width: 70px; height: 70px; border-radius: 8px; object-fit: cover; }}
+        .info-col {{ flex-grow: 1; }}
+        .song-name {{ font-weight: 700; font-size: 1.1rem; display: block; }}
+        .play-btn {{ background: {theme['accent']}; color: {theme['btn_text']}; padding: 10px 25px; border-radius: 50px; text-decoration: none; font-weight: 800; }}
+    </style>
+    <div class="hf-container">
+        <div class="hf-header">
+            <div>
+                <h1 class="hf-title">"{query}"</h1>
+                <div class="hf-meta">Vibe: {theme['name']}</div>
+            </div>
+            <a href="{share_url_x}" target="_blank" style="color:white; text-decoration:none; opacity:0.7;">Share on X ↗</a>
+        </div>
+    """
+    for _, row in df_results.iterrows():
+        img_url = row['image'] if row['image'] else "https://via.placeholder.com/150"
+        btn = f'<a href="{row["spotify_url"]}" target="_blank" class="play-btn">PLAY</a>' if row['spotify_url'] else '<span style="opacity:0.5">No Link</span>'
+        html += f"""
+        <div class="track-row">
+            <div style="font-weight:800; font-size:1.2rem; min-width:50px; text-align:center; color:{theme['text']}">{row['match_score']}%</div>
+            <img src="{img_url}" class="cover-img">
+            <div class="info-col">
+                <span class="song-name">{row['song']}</span>
+                <span style="opacity:0.8">{row['artist']}</span>
+            </div>
+            {btn}
+        </div>
+        """
+    html += "</div>"
+    return html
+# ---------------------------------------------------------
+# 6. LAUNCH
+# ---------------------------------------------------------
+css = """
+.search-row { align-items: center !important; gap: 15px !important; }
+.search-input textarea { font-size: 1.1rem !important; }
+.action-btn { height: 50px !important; border-radius: 12px !important; font-weight: bold !important; }
+"""
+theme = gr.themes.Soft(primary_hue="zinc", neutral_hue="zinc").set(
+    body_background_fill="#000000", block_background_fill="#121212", block_border_width="0px"
+)
+with gr.Blocks(theme=theme, css=css, title="HarmoniFind") as demo:
+    gr.HTML("""
+    <div style="text-align:center; padding: 40px 0; color:white;">
+        <h1 style="font-size: 3rem; font-weight:900; background: -webkit-linear-gradient(45deg, #eee, #999); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">HarmoniFind</h1>
+        <p style="opacity: 0.6;">AI-Powered Semantic Music Discovery</p>
+    </div>
+    """)
+    with gr.Row(elem_classes="search-row"):
+        input_box = gr.Textbox(show_label=False, placeholder="Describe a vibe (e.g. 'Driving fast at night')...", scale=10, elem_classes="search-input")
+        surprise_btn = gr.Button("🎲", scale=1, variant="secondary", elem_classes="action-btn")
+        search_btn = gr.Button("Search", scale=2, variant="primary", elem_classes="action-btn")
+    out = gr.HTML()
+    input_box.submit(gradio_interface_fn, input_box, out)
+    search_btn.click(gradio_interface_fn, input_box, out)
+    surprise_btn.click(lambda: get_random_vibe(), None, input_box)
+demo.queue().launch()