Spaces:
Sleeping
Sleeping
| import os | |
| import shutil | |
| from pydub import AudioSegment | |
| RAVDESS_MAP = { | |
| '01': 'neutral', '02': 'calm', '03': 'happiness', '04': 'sadness', | |
| '05': 'anger', '06': 'fear', '07': 'disgust', '08': 'surprise' | |
| } | |
| KNOWN_EMOTIONS = ['anger', 'neutral', 'sadness', 'happiness', 'fear', 'disgust', 'surprise', 'calm'] | |
| # Handling some common synonyms/variations | |
| EMOTION_ALIASES = { | |
| 'happy': 'happiness', | |
| 'sad': 'sadness', | |
| 'angry': 'anger', | |
| 'surprised': 'surprise' | |
| } | |
| def convert_to_wav(source_path, target_path): | |
| """Converts audio file to WAV format.""" | |
| try: | |
| audio = AudioSegment.from_file(source_path) | |
| audio.export(target_path, format="wav") | |
| return True | |
| except Exception as e: | |
| print(f"Error converting {source_path}: {e}") | |
| return False | |
| def get_label_from_filename(filename): | |
| """Extracts emotion label from filename based on patterns.""" | |
| filename = os.path.basename(filename).lower() | |
| # Rule 1: RAVDESS dataset (e.g., 03-01-01-01-01-01-01.wav) | |
| if filename.count('-') == 6 and filename.startswith('03'): | |
| parts = filename.replace('.wav', '').split('-') | |
| if len(parts) > 2: | |
| return RAVDESS_MAP.get(parts[2]) | |
| # Rule 2: General emotion words | |
| # Check for exact matches or surrounded by underscores/dots | |
| for emotion in KNOWN_EMOTIONS + list(EMOTION_ALIASES.keys()): | |
| # Simple containment check might be too loose (e.g., "fearless"), | |
| # but matches the original script's logic roughly. | |
| # Improving it with delimiters for safety. | |
| patterns = [ | |
| f"_{emotion}_", | |
| f"{emotion}_", | |
| f"_{emotion}.", | |
| f"upload_{emotion}_" | |
| ] | |
| # Also check if it STARTS with the emotion (common in some datasets) | |
| if filename.startswith(f"{emotion}_") or any(p in filename for p in patterns): | |
| normalized = EMOTION_ALIASES.get(emotion, emotion) | |
| return normalized | |
| return None | |