garvitcpp commited on
Commit
65aae53
·
verified ·
1 Parent(s): cc93b48

Upload 16 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/processed_songs.csv filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.10 slim image as base
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies required for numpy and scipy
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy requirements first to leverage Docker cache
13
+ COPY requirements.txt .
14
+
15
+ # Install Python dependencies
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy the rest of the application
19
+ COPY . .
20
+
21
+ # Expose port 7860 (HuggingFace Spaces default port)
22
+ EXPOSE 7860
23
+
24
+ # Command to run the application
25
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (143 Bytes). View file
 
app/__pycache__/main.cpython-310.pyc ADDED
Binary file (5.14 kB). View file
 
app/api/__init__.py ADDED
File without changes
app/api/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (147 Bytes). View file
 
app/api/__pycache__/itunes.cpython-310.pyc ADDED
Binary file (1.25 kB). View file
 
app/api/itunes.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from fastapi import HTTPException
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ async def search_itunes_tracks(query: str, limit: int = 1):
8
+ """
9
+ Search iTunes for tracks with 30-second previews
10
+ """
11
+ base_url = "https://itunes.apple.com/search"
12
+
13
+ try:
14
+ params = {
15
+ "term": query,
16
+ "entity": "song",
17
+ "limit": limit
18
+ }
19
+
20
+ response = requests.get(base_url, params=params)
21
+ response.raise_for_status()
22
+
23
+ results = response.json().get('results', [])
24
+ logger.info(f"Total tracks found: {len(results)}")
25
+
26
+ tracks = []
27
+ for track in results:
28
+ track_info = {
29
+ "name": track.get('trackName'),
30
+ "artist": track.get('artistName'),
31
+ "preview_url": track.get('previewUrl'),
32
+ "full_track_url": track.get('trackViewUrl'),
33
+ "album_image": track.get('artworkUrl100'),
34
+ "genre": track.get('primaryGenreName'),
35
+ "album": track.get('collectionName')
36
+ }
37
+
38
+ if track_info['preview_url']:
39
+ tracks.append(track_info)
40
+
41
+ return tracks[0] if tracks else None
42
+
43
+ except requests.RequestException as e:
44
+ logger.error(f"Error searching iTunes: {e}")
45
+ return None
app/main.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Query
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from typing import List, Optional
4
+ import pandas as pd
5
+ import joblib
6
+ from scipy.spatial.distance import cdist
7
+ from .models.schemas import Song, RecommendationWithPreview
8
+ from .api.itunes import search_itunes_tracks
9
+
10
+ app = FastAPI(title="Music Recommendation API")
11
+
12
+ app.add_middleware(
13
+ CORSMiddleware,
14
+ allow_origins=["*"],
15
+ allow_credentials=True,
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+
20
+ # Load data and model
21
+ numeric_features = ['acousticness', 'danceability', 'energy', 'instrumentalness',
22
+ 'liveness', 'loudness', 'speechiness', 'tempo', 'valence',
23
+ 'popularity', 'year', 'cluster_label']
24
+
25
+ model = joblib.load('data/song_cluster_pipeline.joblib')
26
+ df = pd.read_csv('data/processed_songs.csv', dtype={col: float for col in numeric_features})
27
+ df['artists'] = df['artists'].apply(eval)
28
+
29
+ @app.get("/search/", response_model=List[Song])
30
+ async def search_songs(q: str = Query(..., min_length=1), limit: int = 5):
31
+ q = q.lower()
32
+
33
+ # Perform separate searches
34
+ name_matches = df[df['name'].str.lower().str.contains(q, na=False)]
35
+ artist_matches = df[df['artists'].apply(lambda x: any(q in artist.lower() for artist in x))]
36
+
37
+ # Convert the artists lists to strings for deduplication
38
+ name_matches = name_matches.copy()
39
+ artist_matches = artist_matches.copy()
40
+
41
+ name_matches['artists_str'] = name_matches['artists'].apply(lambda x: ','.join(sorted(x)))
42
+ artist_matches['artists_str'] = artist_matches['artists'].apply(lambda x: ','.join(sorted(x)))
43
+
44
+ # Concatenate and drop duplicates based on name and artists_str
45
+ results = pd.concat([name_matches, artist_matches])
46
+ results = results.drop_duplicates(subset=['name', 'artists_str'])
47
+
48
+ # Get top matches by popularity
49
+ top_matches = results.nlargest(limit, 'popularity')
50
+
51
+ return [
52
+ Song(
53
+ name=row['name'],
54
+ artists=row['artists'],
55
+ year=int(row['year']),
56
+ popularity=int(row['popularity'])
57
+ )
58
+ for _, row in top_matches.iterrows()
59
+ ]
60
+
61
+ @app.get("/recommendations/", response_model=List[RecommendationWithPreview])
62
+ async def get_recommendations(song_name: str, artist_name: Optional[str] = None, number_songs: int = 6):
63
+ try:
64
+ if artist_name:
65
+ mask = (df['name'].str.lower() == song_name.lower()) & \
66
+ (df['artists'].apply(lambda x: artist_name.lower() in str(x).lower()))
67
+ song = df[mask].iloc[0]
68
+ else:
69
+ matches = df[df['name'].str.lower() == song_name.lower()]
70
+ if len(matches) > 1:
71
+ return {"error": f"Multiple songs found with name '{song_name}'. Please specify an artist."}
72
+ song = matches.iloc[0]
73
+
74
+ cluster_label = song['cluster_label']
75
+ cluster_songs = df[df['cluster_label'] == cluster_label]
76
+ cluster_songs = cluster_songs[cluster_songs['name'] != song_name]
77
+
78
+ audio_features = ['acousticness', 'danceability', 'energy', 'instrumentalness',
79
+ 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']
80
+
81
+ song_features = song[audio_features].astype(float).values.reshape(1, -1)
82
+ cluster_features = cluster_songs[audio_features].astype(float).values
83
+
84
+ distances = cdist(song_features, cluster_features, metric='euclidean')
85
+ closest_indices = distances.argsort()[0][:number_songs]
86
+
87
+ recommendations = cluster_songs.iloc[closest_indices]
88
+
89
+ result = []
90
+ for _, row in recommendations.iterrows():
91
+ # Create search query for iTunes
92
+ search_query = f"{row['name']} {row['artists'][0]}"
93
+ preview_info = await search_itunes_tracks(search_query)
94
+
95
+ rec = RecommendationWithPreview(
96
+ name=row['name'],
97
+ artists=row['artists'],
98
+ year=int(row['year']),
99
+ popularity=int(row['popularity']),
100
+ danceability=float(row['danceability']),
101
+ energy=float(row['energy']),
102
+ valence=float(row['valence']),
103
+ preview_info=preview_info
104
+ )
105
+ result.append(rec)
106
+
107
+ return result
108
+
109
+ except IndexError:
110
+ return {"error": f"Song '{song_name}' {'by ' + artist_name if artist_name else ''} not found."}
111
+
112
+ @app.get("/song_details/")
113
+ async def get_song_details(song_name: str, artist_name: Optional[str] = None):
114
+ """
115
+ Get both song data and iTunes preview info for a specific song
116
+ """
117
+ try:
118
+ # Find the song in our dataset
119
+ if artist_name:
120
+ mask = (df['name'].str.lower() == song_name.lower()) & \
121
+ (df['artists'].apply(lambda x: artist_name.lower() in str(x).lower()))
122
+ song = df[mask].iloc[0]
123
+ else:
124
+ matches = df[df['name'].str.lower() == song_name.lower()]
125
+ if len(matches) > 1:
126
+ return {"error": f"Multiple songs found with name '{song_name}'. Please specify an artist."}
127
+ song = matches.iloc[0]
128
+
129
+ # Get iTunes preview info
130
+ search_query = f"{song_name} {artist_name if artist_name else song['artists'][0]}"
131
+ preview_info = await search_itunes_tracks(search_query)
132
+
133
+ # Return flattened response
134
+ return {
135
+ "name": song['name'],
136
+ "artists": song['artists'],
137
+ "year": int(song['year']),
138
+ "popularity": int(song['popularity']),
139
+ "danceability": float(song['danceability']),
140
+ "energy": float(song['energy']),
141
+ "valence": float(song['valence']),
142
+ "acousticness": float(song['acousticness']),
143
+ "instrumentalness": float(song['instrumentalness']),
144
+ "liveness": float(song['liveness']),
145
+ "speechiness": float(song['speechiness']),
146
+ "tempo": float(song['tempo']),
147
+ "preview_info": preview_info
148
+ }
149
+
150
+ except IndexError:
151
+ return {"error": f"Song '{song_name}' {'by ' + artist_name if artist_name else ''} not found."}
152
+
153
+ @app.get("/health")
154
+ @app.head("/health")
155
+ async def health_check():
156
+ return {"status": "ok"}
157
+
158
+ if __name__ == "__main__":
159
+ import uvicorn
160
+ uvicorn.run(app, host="0.0.0.0", port=8000)
app/models/__init__.py ADDED
File without changes
app/models/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (150 Bytes). View file
 
app/models/__pycache__/schemas.cpython-310.pyc ADDED
Binary file (1.33 kB). View file
 
app/models/schemas.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Optional
3
+
4
+ class Song(BaseModel):
5
+ name: str
6
+ artists: List[str]
7
+ year: int
8
+ popularity: int
9
+
10
+ class Recommendation(BaseModel):
11
+ name: str
12
+ artists: List[str]
13
+ year: int
14
+ popularity: int
15
+ danceability: float
16
+ energy: float
17
+ valence: float
18
+
19
+ class TrackInfo(BaseModel):
20
+ name: str
21
+ artist: str
22
+ preview_url: Optional[str]
23
+ full_track_url: Optional[str]
24
+ album_image: Optional[str]
25
+ genre: Optional[str]
26
+ album: Optional[str]
27
+
28
+ class RecommendationWithPreview(Recommendation):
29
+ preview_info: Optional[TrackInfo] = None
data/processed_songs.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:358df3704e9d2457b6f1b8383137d877903907a3cf73b5773c95797c93355427
3
+ size 24572270
data/song_cluster_pipeline.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceee8795f25c27031dbc76f2a9365dc2d6bbde4dfc51788f4aa089a09f059946
3
+ size 686654
requirements.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ annotated-types==0.7.0
2
+ anyio==4.8.0
3
+ certifi==2024.12.14
4
+ charset-normalizer==3.4.1
5
+ click==8.1.8
6
+ colorama==0.4.6
7
+ exceptiongroup==1.2.2
8
+ fastapi==0.115.6
9
+ h11==0.14.0
10
+ idna==3.10
11
+ joblib==1.4.2
12
+ numpy==2.2.2
13
+ pandas==2.2.3
14
+ pydantic==2.10.5
15
+ pydantic_core==2.27.2
16
+ python-dateutil==2.9.0.post0
17
+ python-multipart==0.0.20
18
+ pytz==2024.2
19
+ requests==2.32.3
20
+ scikit-learn==1.6.1
21
+ scipy==1.15.1
22
+ six==1.17.0
23
+ sniffio==1.3.1
24
+ starlette==0.41.3
25
+ threadpoolctl==3.5.0
26
+ typing_extensions==4.12.2
27
+ tzdata==2024.2
28
+ urllib3==2.3.0
29
+ uvicorn==0.34.0