File size: 19,136 Bytes
3aba6a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dec38e9
 
 
 
 
 
 
 
 
 
 
 
3aba6a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dec38e9
 
 
 
 
 
 
3aba6a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5eabda
3aba6a9
 
dec38e9
 
 
 
 
 
 
3aba6a9
d5eabda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3aba6a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5eabda
3aba6a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5eabda
3aba6a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
"""
Unified Metadata Models

Provides consistent data models for track and artist metadata across all services.
Consolidates LastFM and Spotify metadata into unified structures.
"""

from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any, Union
from enum import Enum
from datetime import datetime

import structlog

logger = structlog.get_logger(__name__)


class MetadataSource(Enum):
    """Enumeration of metadata sources."""
    LASTFM = "lastfm"
    SPOTIFY = "spotify"
    UNIFIED = "unified"  # For merged data from multiple sources


@dataclass
class UnifiedTrackMetadata:
    """
    Unified track metadata across all services.
    
    Combines data from LastFM and Spotify into a consistent structure
    while preserving service-specific data in source_data.
    """
    # Core identification fields (always present)
    name: str
    artist: str
    
    # Common optional fields
    album: Optional[str] = None
    duration_ms: Optional[int] = None
    
    # URLs and identifiers
    spotify_id: Optional[str] = None
    lastfm_mbid: Optional[str] = None  # MusicBrainz ID from LastFM
    preview_url: Optional[str] = None
    external_urls: Optional[Dict[str, str]] = None
    
    # Popularity and statistics
    popularity: Optional[int] = None  # Spotify popularity (0-100)
    listeners: Optional[int] = None   # LastFM listeners
    playcount: Optional[int] = None   # LastFM playcount
    
    # Discovery and categorization
    tags: List[str] = field(default_factory=list)  # LastFM tags
    genres: List[str] = field(default_factory=list)  # Unified genres
    similar_tracks: List[str] = field(default_factory=list)  # Similar track names
    
    # Metadata about the metadata
    source: MetadataSource = MetadataSource.UNIFIED
    source_data: Dict[str, Any] = field(default_factory=dict)  # Raw source data
    last_updated: datetime = field(default_factory=datetime.utcnow)
    
    # Quality and underground indicators
    underground_score: Optional[float] = None  # 0-1, higher = more underground
    quality_score: Optional[float] = None      # 0-1, higher = better quality
    
    # Recommendation-specific fields (added by recommendation service)
    recommendation_score: Optional[float] = None  # Score from recommendation agent
    recommendation_reason: Optional[str] = None   # Reason for recommendation
    agent_source: Optional[str] = None            # Which agent recommended this track
    
    # Audio features (from Spotify)
    audio_features: Optional[Dict[str, Any]] = None  # Spotify audio features
    
    # Source object references (for backward compatibility)
    spotify_data: Optional[Any] = None  # Original Spotify track object
    lastfm_data: Optional[Any] = None   # Original LastFM track object
    
    def __post_init__(self):
        """Post-initialization processing."""
        # Normalize track and artist names
        self.name = self.name.strip() if self.name else ""
        self.artist = self.artist.strip() if self.artist else ""
        
        # Initialize collections if None
        if self.tags is None:
            self.tags = []
        if self.genres is None:
            self.genres = []
        if self.similar_tracks is None:
            self.similar_tracks = []
        if self.external_urls is None:
            self.external_urls = {}
        if self.source_data is None:
            self.source_data = {}
    
    @classmethod
    def from_lastfm(cls, lastfm_track: "TrackMetadata") -> "UnifiedTrackMetadata":
        """
        Create unified metadata from LastFM TrackMetadata.
        
        Args:
            lastfm_track: LastFM TrackMetadata object
            
        Returns:
            UnifiedTrackMetadata instance
        """
        return cls(
            name=lastfm_track.name,
            artist=lastfm_track.artist,
            lastfm_mbid=lastfm_track.mbid,
            external_urls={"lastfm": lastfm_track.url} if lastfm_track.url else {},
            listeners=lastfm_track.listeners,
            playcount=lastfm_track.playcount,
            tags=lastfm_track.tags or [],
            similar_tracks=lastfm_track.similar_tracks or [],
            source=MetadataSource.LASTFM,
            source_data={"lastfm": lastfm_track.__dict__}
        )
    
    @classmethod
    def from_spotify(cls, spotify_track: "SpotifyTrack") -> "UnifiedTrackMetadata":
        """
        Create unified metadata from Spotify SpotifyTrack.
        
        Args:
            spotify_track: Spotify SpotifyTrack object
            
        Returns:
            UnifiedTrackMetadata instance
        """
        return cls(
            name=spotify_track.name,
            artist=spotify_track.artist,
            album=spotify_track.album,
            spotify_id=spotify_track.id,
            duration_ms=spotify_track.duration_ms,
            preview_url=spotify_track.preview_url,
            external_urls=spotify_track.external_urls or {},
            popularity=spotify_track.popularity,
            source=MetadataSource.SPOTIFY,
            source_data={"spotify": spotify_track.__dict__}
        )
    
    def merge_with(self, other: "UnifiedTrackMetadata") -> "UnifiedTrackMetadata":
        """
        Merge this metadata with another instance.
        
        Args:
            other: Another UnifiedTrackMetadata instance
            
        Returns:
            New merged UnifiedTrackMetadata instance
        """
        # Verify tracks match
        if not self._matches_track(other):
            raise ValueError(f"Cannot merge different tracks: {self.name} vs {other.name}")
        
        # Create merged instance
        merged = UnifiedTrackMetadata(
            name=self.name,  # Use current name
            artist=self.artist,  # Use current artist
            album=self.album or other.album,
            duration_ms=self.duration_ms or other.duration_ms,
            spotify_id=self.spotify_id or other.spotify_id,
            lastfm_mbid=self.lastfm_mbid or other.lastfm_mbid,
            preview_url=self.preview_url or other.preview_url,
            external_urls={**self.external_urls, **other.external_urls},
            popularity=self.popularity or other.popularity,
            listeners=self.listeners or other.listeners,
            playcount=self.playcount or other.playcount,
            tags=list(set(self.tags + other.tags)),  # Merge and deduplicate
            genres=list(set(self.genres + other.genres)),
            similar_tracks=list(set(self.similar_tracks + other.similar_tracks)),
            source=MetadataSource.UNIFIED,
            source_data={**self.source_data, **other.source_data},
            underground_score=self.underground_score or other.underground_score,
            quality_score=self.quality_score or other.quality_score,
            recommendation_score=self.recommendation_score or other.recommendation_score,
            recommendation_reason=self.recommendation_reason or other.recommendation_reason,
            agent_source=self.agent_source or other.agent_source,
            audio_features=self.audio_features or other.audio_features,
            spotify_data=self.spotify_data or other.spotify_data,
            lastfm_data=self.lastfm_data or other.lastfm_data
        )
        
        return merged
    
    def _matches_track(self, other: "UnifiedTrackMetadata") -> bool:
        """
        Check if another track metadata represents the same track.
        
        Args:
            other: Another UnifiedTrackMetadata instance
            
        Returns:
            True if tracks match, False otherwise
        """
        # Normalize names for comparison
        name1 = self.name.lower().strip()
        name2 = other.name.lower().strip()
        artist1 = self.artist.lower().strip()
        artist2 = other.artist.lower().strip()
        
        # Basic name and artist match
        return name1 == name2 and artist1 == artist2
    
    def calculate_underground_score(self) -> float:
        """
        Calculate underground score based on available metrics.
        
        Returns:
            Underground score (0-1, higher = more underground)
        """
        score = 0.0
        factors = 0
        
        # LastFM popularity indicators
        if self.listeners is not None:
            # Lower listeners = more underground
            if self.listeners < 1000:
                score += 0.8
            elif self.listeners < 10000:
                score += 0.6
            elif self.listeners < 100000:
                score += 0.4
            else:
                score += 0.2
            factors += 1
        
        # Spotify popularity
        if self.popularity is not None:
            # Lower popularity = more underground
            underground_factor = (100 - self.popularity) / 100
            score += underground_factor
            factors += 1
        
        # Tag-based indicators
        underground_tags = [
            'experimental', 'underground', 'indie', 'lo-fi', 'avant-garde',
            'noise', 'drone', 'ambient', 'post-rock', 'math rock'
        ]
        tag_score = sum(1 for tag in self.tags if tag.lower() in underground_tags)
        if self.tags:
            score += min(tag_score / len(self.tags), 1.0)
            factors += 1
        
        # Average the factors
        final_score = score / factors if factors > 0 else 0.5
        self.underground_score = min(max(final_score, 0.0), 1.0)
        
        return self.underground_score
    
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary representation."""
        return {
            "name": self.name,
            "artist": self.artist,
            "album": self.album,
            "duration_ms": self.duration_ms,
            "spotify_id": self.spotify_id,
            "lastfm_mbid": self.lastfm_mbid,
            "preview_url": self.preview_url,
            "external_urls": self.external_urls,
            "popularity": self.popularity,
            "listeners": self.listeners,
            "playcount": self.playcount,
            "tags": self.tags,
            "genres": self.genres,
            "similar_tracks": self.similar_tracks,
            "source": self.source.value if hasattr(self.source, 'value') else str(self.source),
            "underground_score": self.underground_score,
            "quality_score": self.quality_score,
            "recommendation_score": self.recommendation_score,
            "recommendation_reason": self.recommendation_reason,
            "agent_source": self.agent_source,
            "audio_features": self.audio_features,
            "last_updated": self.last_updated.isoformat(),
            "spotify_data": self.spotify_data,
            "lastfm_data": self.lastfm_data
        }
    
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "UnifiedTrackMetadata":
        """
        Create UnifiedTrackMetadata from dictionary representation.
        
        Args:
            data: Dictionary containing track metadata
            
        Returns:
            UnifiedTrackMetadata instance
        """
        # Handle datetime conversion
        if 'last_updated' in data and isinstance(data['last_updated'], str):
            try:
                data['last_updated'] = datetime.fromisoformat(data['last_updated'])
            except (ValueError, TypeError):
                data['last_updated'] = datetime.utcnow()
        
        # Handle MetadataSource enum
        if 'source' in data:
            if isinstance(data['source'], str):
                try:
                    data['source'] = MetadataSource(data['source'])
                except ValueError:
                    data['source'] = MetadataSource.UNIFIED
        
        # Remove any extra fields not in the dataclass
        valid_fields = {
            'name', 'artist', 'album', 'duration_ms', 'spotify_id', 'lastfm_mbid',
            'preview_url', 'external_urls', 'popularity', 'listeners', 'playcount',
            'tags', 'genres', 'similar_tracks', 'source', 'source_data', 'last_updated',
            'underground_score', 'quality_score', 'recommendation_score', 'recommendation_reason',
            'agent_source', 'audio_features', 'spotify_data', 'lastfm_data'
        }
        
        filtered_data = {k: v for k, v in data.items() if k in valid_fields}
        
        # Ensure required fields have defaults
        filtered_data.setdefault('name', '')
        filtered_data.setdefault('artist', '')
        filtered_data.setdefault('tags', [])
        filtered_data.setdefault('genres', [])
        filtered_data.setdefault('similar_tracks', [])
        filtered_data.setdefault('external_urls', {})
        filtered_data.setdefault('source_data', {})
        filtered_data.setdefault('source', MetadataSource.UNIFIED)
        filtered_data.setdefault('last_updated', datetime.utcnow())
        
        return cls(**filtered_data)


@dataclass
class UnifiedArtistMetadata:
    """
    Unified artist metadata across all services.
    
    Combines data from LastFM and Spotify into a consistent structure.
    """
    # Core identification
    name: str
    
    # Identifiers
    spotify_id: Optional[str] = None
    lastfm_mbid: Optional[str] = None
    
    # URLs
    external_urls: Optional[Dict[str, str]] = None
    
    # Popularity and statistics
    popularity: Optional[int] = None  # Spotify popularity
    followers: Optional[int] = None   # Spotify followers
    listeners: Optional[int] = None   # LastFM listeners
    playcount: Optional[int] = None   # LastFM playcount
    
    # Categorization
    tags: List[str] = field(default_factory=list)
    genres: List[str] = field(default_factory=list)
    similar_artists: List[str] = field(default_factory=list)
    
    # Additional info
    bio: Optional[str] = None
    
    # Metadata
    source: MetadataSource = MetadataSource.UNIFIED
    source_data: Dict[str, Any] = field(default_factory=dict)
    last_updated: datetime = field(default_factory=datetime.utcnow)
    
    def __post_init__(self):
        """Post-initialization processing."""
        self.name = self.name.strip() if self.name else ""
        
        if self.tags is None:
            self.tags = []
        if self.genres is None:
            self.genres = []
        if self.similar_artists is None:
            self.similar_artists = []
        if self.external_urls is None:
            self.external_urls = {}
        if self.source_data is None:
            self.source_data = {}
    
    @classmethod
    def from_lastfm(cls, lastfm_artist: "ArtistMetadata") -> "UnifiedArtistMetadata":
        """
        Create unified metadata from LastFM ArtistMetadata.
        
        Args:
            lastfm_artist: LastFM ArtistMetadata object
            
        Returns:
            UnifiedArtistMetadata instance
        """
        return cls(
            name=lastfm_artist.name,
            lastfm_mbid=lastfm_artist.mbid,
            external_urls={"lastfm": lastfm_artist.url} if lastfm_artist.url else {},
            listeners=lastfm_artist.listeners,
            playcount=lastfm_artist.playcount,
            tags=lastfm_artist.tags or [],
            similar_artists=lastfm_artist.similar_artists or [],
            bio=lastfm_artist.bio,
            source=MetadataSource.LASTFM,
            source_data={"lastfm": lastfm_artist.__dict__}
        )
    
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary representation."""
        return {
            "name": self.name,
            "spotify_id": self.spotify_id,
            "lastfm_mbid": self.lastfm_mbid,
            "external_urls": self.external_urls,
            "popularity": self.popularity,
            "followers": self.followers,
            "listeners": self.listeners,
            "playcount": self.playcount,
            "tags": self.tags,
            "genres": self.genres,
            "similar_artists": self.similar_artists,
            "bio": self.bio,
            "source": self.source.value if hasattr(self.source, 'value') else str(self.source),
            "last_updated": self.last_updated.isoformat()
        }


@dataclass
class SearchResult:
    """Unified search result containing tracks and artists."""
    tracks: List[UnifiedTrackMetadata] = field(default_factory=list)
    artists: List[UnifiedArtistMetadata] = field(default_factory=list)
    query: str = ""
    source: MetadataSource = MetadataSource.UNIFIED
    total_results: int = 0
    search_time_ms: Optional[int] = None
    
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary representation."""
        return {
            "tracks": [track.to_dict() for track in self.tracks],
            "artists": [artist.to_dict() for artist in self.artists],
            "query": self.query,
            "source": self.source.value if hasattr(self.source, 'value') else str(self.source),
            "total_results": self.total_results,
            "search_time_ms": self.search_time_ms
        }


# Utility functions for metadata operations
def merge_track_metadata(
    tracks: List[UnifiedTrackMetadata]
) -> List[UnifiedTrackMetadata]:
    """
    Merge duplicate tracks from different sources.
    
    Args:
        tracks: List of track metadata to merge
        
    Returns:
        List of merged track metadata with duplicates combined
    """
    merged_tracks = {}
    
    for track in tracks:
        # Create a key for matching tracks
        key = f"{track.artist.lower().strip()}||{track.name.lower().strip()}"
        
        if key in merged_tracks:
            # Merge with existing track
            merged_tracks[key] = merged_tracks[key].merge_with(track)
        else:
            merged_tracks[key] = track
    
    return list(merged_tracks.values())


def calculate_quality_scores(
    tracks: List[UnifiedTrackMetadata]
) -> List[UnifiedTrackMetadata]:
    """
    Calculate quality scores for a list of tracks.
    
    Args:
        tracks: List of track metadata
        
    Returns:
        List of tracks with quality scores calculated
    """
    for track in tracks:
        score = 0.0
        factors = 0
        
        # Popularity indicators
        if track.popularity is not None:
            score += track.popularity / 100
            factors += 1
        
        if track.listeners is not None:
            # Normalize listeners to 0-1 scale (logarithmic)
            import math
            normalized = min(math.log10(max(track.listeners, 1)) / 6, 1.0)
            score += normalized
            factors += 1
        
        # Metadata completeness
        completeness = 0
        if track.album:
            completeness += 1
        if track.duration_ms:
            completeness += 1
        if track.tags:
            completeness += 1
        if track.preview_url:
            completeness += 1
        
        score += completeness / 4  # Normalize to 0-1
        factors += 1
        
        # Calculate final score
        track.quality_score = score / factors if factors > 0 else 0.5
    
    return tracks