Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import gradio as gr | |
| from typing import Dict, Tuple | |
| from tools.audio_info import get_audio_info | |
| from tools.combine_tracks import combine_tracks, create_medley | |
| from tools.stems_separation import ( | |
| separate_audio, | |
| extract_selected_stems, | |
| extract_vocal_non_vocal, | |
| create_karaoke_track, | |
| ) | |
| from tools.time_strech import align_songs_by_bpm, stretch_to_bpm | |
| from tools.youtube_extract import extract_audio_from_youtube | |
| from tools.audio_cutting import ( | |
| cut_audio, | |
| mute_time_windows, | |
| extract_segments, | |
| trim_audio, | |
| ) | |
| from tools.music_understanding import ( | |
| understand_music, | |
| analyze_music_structure, | |
| suggest_cutting_points, | |
| analyze_genre_and_style, | |
| ) | |
| def pitch_shift_with_semitones(audio_path: str, semitones: int) -> str: | |
| """ | |
| Shift the pitch of an audio file by a specified number of semitones. | |
| This function uses librosa's pitch shifting algorithm to change the musical pitch | |
| of an audio file while maintaining its tempo and duration. | |
| Args: | |
| audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC) | |
| semitones: Number of semitones to shift (positive = higher pitch, negative = lower pitch) | |
| Range: -12 to +12 semitones (1 octave up/down) | |
| Returns: | |
| Path to the pitch-shifted audio file in WAV format | |
| Examples: | |
| - semitones=2: Shift up by 2 semitones (1 whole tone) | |
| - semitones=-5: Shift down by 5 semitones (1 perfect fourth) | |
| - semitones=0: No change (returns original file) | |
| Note: | |
| The function creates a temporary WAV file that should be cleaned up by the caller | |
| """ | |
| if semitones == 0: | |
| return audio_path | |
| # Load audio to get sample rate | |
| import librosa | |
| y, sr = librosa.load(audio_path, sr=None, mono=False) | |
| # Apply pitch shift | |
| y_shifted = librosa.effects.pitch_shift(y, n_steps=semitones, sr=sr) | |
| # Save to temporary file | |
| import tempfile | |
| import soundfile as sf | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: | |
| if y_shifted.ndim == 2: | |
| y_shifted = y_shifted.T | |
| sf.write(tmp.name, y_shifted, sr, format="wav", subtype="PCM_16") | |
| return tmp.name | |
| def stretch_audio_to_bpm_wrapper(audio_path: str, target_bpm: float) -> str: | |
| """ | |
| Stretch or compress audio to match a specific BPM (beats per minute) while maintaining pitch. | |
| This function uses time-stretching algorithms to change the tempo of an audio file | |
| without affecting its musical pitch, making it useful for beat-matching and tempo alignment. | |
| Args: | |
| audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC) | |
| target_bpm: Target beats per minute (BPM) value | |
| Typical range: 60-200 BPM | |
| Common values: 90 (slow), 120 (medium), 140 (fast), 128 (electronic) | |
| Returns: | |
| Path to the time-stretched audio file in WAV format | |
| Examples: | |
| - target_bpm=128: Stretch to typical electronic dance music tempo | |
| - target_bpm=120: Stretch to standard pop/rock tempo | |
| - target_bpm=140: Stretch to fast electronic or rock tempo | |
| Note: | |
| The function automatically detects the original BPM and calculates the stretch factor | |
| Creates a new WAV file with the modified tempo | |
| """ | |
| return stretch_to_bpm(audio_path, target_bpm) | |
| def extract_selected_stems_wrapper( | |
| audio_path: str, vocals: bool, drums: bool, bass: bool, other: bool | |
| ) -> Dict[str, str]: | |
| """ | |
| Extract selected stems from an audio file based on user choices. | |
| This function allows selective extraction of specific stems rather than all four stems, | |
| which can save processing time and storage space when only certain elements are needed. | |
| Args: | |
| audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC, M4A) | |
| vocals: Whether to extract the vocals stem | |
| drums: Whether to extract the drums stem | |
| bass: Whether to extract the bass stem | |
| other: Whether to extract the other stem | |
| Returns: | |
| dict[str, str]: Dictionary mapping stem names to their file paths | |
| Examples: | |
| - vocals=True, drums=True, bass=False, other=False: Extract only vocals and drums | |
| - vocals=True, drums=False, bass=False, other=False: Extract only vocals for karaoke | |
| - vocals=False, drums=True, bass=True, other=False: Extract rhythm section (drums + bass) | |
| Note: | |
| At least one stem must be selected for extraction | |
| Uses the same high-quality Demucs model as separate_audio | |
| Processing time is the same as full separation since Demucs extracts all stems internally | |
| """ | |
| stems_to_extract = [] | |
| if vocals: | |
| stems_to_extract.append("vocals") | |
| if drums: | |
| stems_to_extract.append("drums") | |
| if bass: | |
| stems_to_extract.append("bass") | |
| if other: | |
| stems_to_extract.append("other") | |
| if not stems_to_extract: | |
| raise ValueError("At least one stem must be selected for extraction") | |
| return extract_selected_stems(audio_path, stems_to_extract) | |
| def extract_vocal_non_vocal_wrapper(audio_path: str) -> Tuple[str, str]: | |
| """ | |
| Extract vocals and non-vocals (instrumental) stems from an audio file. | |
| This function provides a simple interface to separate audio into vocal and | |
| non-vocal components, which is useful for karaoke creation, vocal isolation, | |
| or instrumental extraction. | |
| Args: | |
| audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC, M4A) | |
| Returns: | |
| tuple[str, str]: Paths to (vocals_file, instrumental_file) | |
| - vocals_file: Path to the isolated vocal track | |
| - instrumental_file: Path to the combined instrumental track (drums + bass + other) | |
| Examples: | |
| - extract_vocal_non_vocal_wrapper('song.mp3'): Separate into vocals and instrumental | |
| - extract_vocal_non_vocal_wrapper('song.wav'): Create vocal and backing track versions | |
| Note: | |
| The instrumental track combines drums, bass, and other stems into a single track | |
| Uses the same high-quality Demucs model as separate_audio | |
| Instrumental track is automatically mixed and normalized for consistent volume | |
| """ | |
| return extract_vocal_non_vocal(audio_path) | |
| def create_karaoke_track_wrapper(audio_path: str) -> str: | |
| """ | |
| Create a karaoke (instrumental) track by removing vocals from an audio file. | |
| This is a convenience function that extracts the instrumental (non-vocal) portion | |
| of a song, creating a karaoke-ready backing track. | |
| Args: | |
| audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC, M4A) | |
| Returns: | |
| Path to the karaoke (instrumental) audio file | |
| Examples: | |
| - create_karaoke_track_wrapper('song.mp3'): Create karaoke version | |
| - create_karaoke_track_wrapper('song.wav'): Create instrumental backing track | |
| Note: | |
| Uses the same high-quality Demucs model as separate_audio | |
| Combines drums, bass, and other stems into a single instrumental track | |
| Automatically normalized for consistent volume and quality | |
| Perfect for karaoke applications or backing track creation | |
| """ | |
| return create_karaoke_track(audio_path) | |
| def create_interface(): | |
| """Create the Gradio interface with all tools.""" | |
| # Tab 1: Stem Separation | |
| stem_interface = gr.Interface( | |
| fn=separate_audio, | |
| inputs=gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| outputs=[ | |
| gr.Audio(label="Vocals", type="filepath"), | |
| gr.Audio(label="Drums", type="filepath"), | |
| gr.Audio(label="Bass", type="filepath"), | |
| gr.Audio(label="Other", type="filepath"), | |
| ], | |
| title="Audio Stem Separation", | |
| description="Upload an audio file to separate it into vocals, drums, bass, and other stems.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 2: Track Combination | |
| combine_interface = gr.Interface( | |
| fn=combine_tracks, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="First Audio Track", sources=["upload"]), | |
| gr.Audio(type="filepath", label="Second Audio Track", sources=["upload"]), | |
| gr.Slider( | |
| minimum=0.0, maximum=1.0, value=0.5, label="Weight for First Track" | |
| ), | |
| gr.Slider( | |
| minimum=0.0, maximum=1.0, value=0.5, label="Weight for Second Track" | |
| ), | |
| gr.Checkbox(value=True, label="Normalize Output"), | |
| gr.Number(value=0.0, label="Fade In Duration (seconds)"), | |
| gr.Number(value=0.0, label="Fade Out Duration (seconds)"), | |
| ], | |
| outputs=gr.Audio(label="Combined Track", type="filepath"), | |
| title="Combine Audio Tracks", | |
| description="Combine two audio tracks with adjustable weights and optional fade effects.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 3: Pitch Alignment | |
| pitch_interface = gr.Interface( | |
| fn=pitch_shift_with_semitones, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| gr.Number(value=0, label="Semitones to Shift"), | |
| ], | |
| outputs=gr.Audio(label="Pitch Shifted Audio", type="filepath"), | |
| title="Pitch Shift Audio", | |
| description="Shift the pitch of an audio file by specified semitones.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 4: Time Stretching | |
| stretch_interface = gr.Interface( | |
| fn=stretch_audio_to_bpm_wrapper, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| gr.Number(value=120, label="Target BPM"), | |
| ], | |
| outputs=gr.Audio(label="Stretched Audio", type="filepath"), | |
| title="Stretch Audio to BPM", | |
| description="Stretch audio to match a specific BPM.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 5: BPM Alignment | |
| bpm_interface = gr.Interface( | |
| fn=align_songs_by_bpm, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="First Audio Track", sources=["upload"]), | |
| gr.Audio(type="filepath", label="Second Audio Track", sources=["upload"]), | |
| ], | |
| outputs=[ | |
| gr.Audio(label="Aligned First Track", type="filepath"), | |
| gr.Audio(label="Aligned Second Track", type="filepath"), | |
| ], | |
| title="Align Songs by BPM", | |
| description="Align two songs to the same BPM by stretching the faster one to match the slower one.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 6: Selective Stem Extraction | |
| selective_interface = gr.Interface( | |
| fn=extract_selected_stems_wrapper, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| gr.Checkbox(value=True, label="Extract Vocals"), | |
| gr.Checkbox(value=True, label="Extract Drums"), | |
| gr.Checkbox(value=True, label="Extract Bass"), | |
| gr.Checkbox(value=True, label="Extract Other"), | |
| ], | |
| outputs=gr.JSON(label="Extracted Stems"), | |
| title="Selective Stem Extraction", | |
| description="Extract only specific stems from an audio file to save processing time and storage.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 7: Vocal/Non-Vocal Separation | |
| vocal_nonvocal_interface = gr.Interface( | |
| fn=extract_vocal_non_vocal_wrapper, | |
| inputs=gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| outputs=[ | |
| gr.Audio(label="Vocals Track", type="filepath"), | |
| gr.Audio(label="Instrumental Track", type="filepath"), | |
| ], | |
| title="Vocal/Instrumental Separation", | |
| description="Separate audio into vocal and instrumental components for karaoke or vocal isolation.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 8: Karaoke Track Creation | |
| karaoke_interface = gr.Interface( | |
| fn=create_karaoke_track_wrapper, | |
| inputs=gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| outputs=gr.Audio(label="Karaoke Track", type="filepath"), | |
| title="Create Karaoke Track", | |
| description="Create a karaoke-ready instrumental track by removing vocals from any song.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 9: Medley Creation | |
| medley_interface = gr.Interface( | |
| fn=create_medley, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Vocals Stem", sources=["upload"]), | |
| gr.Audio(type="filepath", label="Instrumental Stem", sources=["upload"]), | |
| gr.Number( | |
| value=1.2, label="Vocals Gain", minimum=0.1, maximum=3.0, step=0.1 | |
| ), | |
| gr.Number( | |
| value=0.9, label="Instrumental Gain", minimum=0.1, maximum=3.0, step=0.1 | |
| ), | |
| gr.Textbox( | |
| value="threshold=-18dB:ratio=3:attack=50:release=200", | |
| label="Compressor Settings", | |
| placeholder="threshold=-18dB:ratio=3:attack=50:release=200", | |
| ), | |
| gr.Dropdown( | |
| choices=["libmp3lame", "aac", "flac", "pcm_s16le"], | |
| value="libmp3lame", | |
| label="Audio Codec", | |
| ), | |
| gr.Textbox(value="192k", label="Audio Bitrate", placeholder="192k"), | |
| ], | |
| outputs=gr.Audio(label="Medley Audio", type="filepath"), | |
| title="Create Vocal/Instrumental Medley", | |
| description="Mix vocals and instrumental stems into a polished medley with compression and gain control.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 10: Audio Information | |
| audio_info_interface = gr.Interface( | |
| fn=get_audio_info, | |
| inputs=gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| outputs=gr.JSON(label="Audio Information"), | |
| title="Get Audio Information", | |
| description="Get detailed information about an audio file including duration, sample rate, channels, and file size.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 11: YouTube Extraction | |
| youtube_interface = gr.Interface( | |
| fn=extract_audio_from_youtube, | |
| inputs=[ | |
| gr.Textbox( | |
| label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..." | |
| ), | |
| gr.Dropdown( | |
| choices=["wav", "mp3", "flac"], value="wav", label="Output Format" | |
| ), | |
| gr.Dropdown(choices=["best", "worst"], value="best", label="Audio Quality"), | |
| ], | |
| outputs=gr.Audio(label="Extracted Audio", type="filepath"), | |
| title="Extract Audio from YouTube", | |
| description="Extract audio from a YouTube video URL.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 11: YouTube Extraction | |
| youtube_interface = gr.Interface( | |
| fn=extract_audio_from_youtube, | |
| inputs=[ | |
| gr.Textbox( | |
| label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..." | |
| ), | |
| gr.Dropdown( | |
| choices=["wav", "mp3", "flac"], value="wav", label="Output Format" | |
| ), | |
| gr.Dropdown(choices=["best", "worst"], value="best", label="Audio Quality"), | |
| ], | |
| outputs=gr.Audio(label="Extracted Audio", type="filepath"), | |
| title="Extract Audio from YouTube", | |
| description="Extract audio from a YouTube video URL.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 12: Audio Cutting | |
| cut_interface = gr.Interface( | |
| fn=cut_audio, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| gr.Number(value=0.0, label="Start Time (seconds)"), | |
| gr.Number(value=10.0, label="End Time (seconds)"), | |
| gr.Dropdown(choices=["wav", "mp3"], value="wav", label="Output Format"), | |
| ], | |
| outputs=gr.Audio(label="Cut Audio", type="filepath"), | |
| title="Cut Audio Segment", | |
| description="Extract a segment from an audio file between specified start and end times.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 13: Mute Time Windows | |
| def mute_time_windows_wrapper(audio_path, windows_str, format_val): | |
| try: | |
| windows = eval(windows_str) if windows_str else [] | |
| return mute_time_windows( | |
| audio_path=audio_path, mute_windows=windows, output_format=format_val | |
| ) | |
| except Exception: | |
| return None | |
| mute_interface = gr.Interface( | |
| fn=mute_time_windows_wrapper, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| gr.Textbox( | |
| value="[[1.0, 2.0], [3.0, 4.0]]", | |
| label="Mute Windows (JSON format)", | |
| placeholder="[[start1, end1], [start2, end2]]", | |
| ), | |
| gr.Dropdown(choices=["wav", "mp3"], value="wav", label="Output Format"), | |
| ], | |
| outputs=gr.Audio(label="Muted Audio", type="filepath"), | |
| title="Mute Time Windows", | |
| description="Mute specific time windows in an audio file with smooth fade transitions.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 14: Extract Segments | |
| def extract_segments_wrapper(audio_path, segments_str, format_val, join): | |
| try: | |
| segments = eval(segments_str) if segments_str else [] | |
| result = extract_segments( | |
| audio_path=audio_path, | |
| segments=segments, | |
| output_format=format_val, | |
| join_segments=join, | |
| ) | |
| # If result is a list, return the first item for Gradio | |
| if isinstance(result, list): | |
| return result[0] if result else None | |
| return result | |
| except Exception: | |
| return None | |
| extract_interface = gr.Interface( | |
| fn=extract_segments_wrapper, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| gr.Textbox( | |
| value="[[0.0, 1.0], [2.0, 3.0]]", | |
| label="Segments (JSON format)", | |
| placeholder="[[start1, end1], [start2, end2]]", | |
| ), | |
| gr.Dropdown(choices=["wav", "mp3"], value="wav", label="Output Format"), | |
| gr.Checkbox(value=False, label="Join Segments"), | |
| ], | |
| outputs=gr.Audio(label="Extracted Segments", type="filepath"), | |
| title="Extract Segments", | |
| description="Extract multiple segments from an audio file.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 15: Trim Audio | |
| trim_interface = gr.Interface( | |
| fn=trim_audio, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| gr.Number(value=None, label="Trim Start (seconds, leave empty to skip)"), | |
| gr.Number(value=None, label="Trim End (seconds, leave empty to skip)"), | |
| gr.Dropdown(choices=["wav", "mp3"], value="wav", label="Output Format"), | |
| ], | |
| outputs=gr.Audio(label="Trimmed Audio", type="filepath"), | |
| title="Trim Audio", | |
| description="Trim audio from the beginning and/or end.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 16: Music Understanding | |
| def understand_music_wrapper(audio_path, prompt): | |
| try: | |
| result = understand_music(audio_path=audio_path, prompt_text=prompt) | |
| if result["status"] == "success": | |
| return result["analysis"] | |
| else: | |
| return f"Error: {result.get('error', 'Unknown error')}" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| understand_interface = gr.Interface( | |
| fn=understand_music_wrapper, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| gr.Textbox( | |
| value="Describe this track in full detail - tell me the genre, tempo, and key, then dive into the instruments, production style, and overall mood it creates.", | |
| label="Analysis Prompt", | |
| lines=3, | |
| ), | |
| ], | |
| outputs=gr.Textbox(label="Music Analysis", lines=10), | |
| title="Music Understanding (AI)", | |
| description="Analyze music using NVIDIA's Music-Flamingo Audio Language Model.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 17: Song Structure Analysis | |
| def analyze_music_structure_wrapper(audio_path): | |
| try: | |
| result = analyze_music_structure(audio_path=audio_path) | |
| if result["status"] == "success": | |
| return result["analysis"] | |
| else: | |
| return f"Error: {result.get('error', 'Unknown error')}" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| structure_interface = gr.Interface( | |
| fn=analyze_music_structure_wrapper, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| ], | |
| outputs=gr.Textbox(label="Structure Analysis", lines=10), | |
| title="Song Structure Analysis", | |
| description="Analyze song structure and identify sections (verse, chorus, bridge, etc.).", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 18: Cutting Points Suggestions | |
| def suggest_cutting_points_wrapper(audio_path, purpose): | |
| try: | |
| result = suggest_cutting_points(audio_path=audio_path, purpose=purpose) | |
| if result["status"] == "success": | |
| return result["analysis"] | |
| else: | |
| return f"Error: {result.get('error', 'Unknown error')}" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| cutting_points_interface = gr.Interface( | |
| fn=suggest_cutting_points_wrapper, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| gr.Dropdown( | |
| choices=["general", "dj_mix", "social_media", "ringtone"], | |
| value="general", | |
| label="Purpose", | |
| ), | |
| ], | |
| outputs=gr.Textbox(label="Cutting Point Suggestions", lines=10), | |
| title="AI Cutting Point Suggestions", | |
| description="Get AI-suggested optimal cutting points for different purposes.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 19: Genre and Style Analysis | |
| def analyze_genre_and_style_wrapper(audio_path): | |
| try: | |
| result = analyze_genre_and_style(audio_path=audio_path) | |
| if result["status"] == "success": | |
| return result["analysis"] | |
| else: | |
| return f"Error: {result.get('error', 'Unknown error')}" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| genre_interface = gr.Interface( | |
| fn=analyze_genre_and_style_wrapper, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| ], | |
| outputs=gr.Textbox(label="Genre & Style Analysis", lines=10), | |
| title="Genre & Style Analysis", | |
| description="Detailed analysis of genre, production style, and instrumentation.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 18: Cutting Points Suggestions | |
| cutting_points_interface = gr.Interface( | |
| fn=lambda audio, purpose: suggest_cutting_points( | |
| audio_path=audio, purpose=purpose | |
| ), | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| gr.Dropdown( | |
| choices=["general", "dj_mix", "social_media", "ringtone"], | |
| value="general", | |
| label="Purpose", | |
| ), | |
| ], | |
| outputs=gr.Textbox(label="Cutting Point Suggestions", lines=10), | |
| title="AI Cutting Point Suggestions", | |
| description="Get AI-suggested optimal cutting points for different purposes.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| # Tab 19: Genre and Style Analysis | |
| genre_interface = gr.Interface( | |
| fn=analyze_genre_and_style, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]), | |
| ], | |
| outputs=gr.Textbox(label="Genre & Style Analysis", lines=10), | |
| title="Genre & Style Analysis", | |
| description="Detailed analysis of genre, production style, and instrumentation.", | |
| examples=None, | |
| cache_examples=False, | |
| flagging_mode="never", | |
| ) | |
| return gr.TabbedInterface( | |
| [ | |
| stem_interface, | |
| combine_interface, | |
| pitch_interface, | |
| stretch_interface, | |
| bpm_interface, | |
| selective_interface, | |
| vocal_nonvocal_interface, | |
| karaoke_interface, | |
| medley_interface, | |
| audio_info_interface, | |
| youtube_interface, | |
| cut_interface, | |
| mute_interface, | |
| extract_interface, | |
| trim_interface, | |
| understand_interface, | |
| structure_interface, | |
| cutting_points_interface, | |
| genre_interface, | |
| ], | |
| [ | |
| "Stem Separation", | |
| "Track Combination", | |
| "Pitch Alignment", | |
| "Time Stretching", | |
| "BPM Alignment", | |
| "Selective Stems", | |
| "Vocal/Instrumental", | |
| "Karaoke Creation", | |
| "Medley Creation", | |
| "Audio Information", | |
| "YouTube Extraction", | |
| "Audio Cutting", | |
| "Mute Windows", | |
| "Extract Segments", | |
| "Trim Audio", | |
| "Music Understanding", | |
| "Song Structure", | |
| "Cutting Points", | |
| "Genre Analysis", | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| interface = create_interface() | |
| interface.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True) | |