import io from typing import List import gradio as gr from pydub import AudioSegment from gtts import gTTS # --------------------------- # Helper functions # --------------------------- def load_audio(file_obj) -> AudioSegment: """Load audio from file-like object or path, any ffmpeg-supported format.""" # If file_obj has read() method (BytesIO or UploadedFile) if hasattr(file_obj, "read"): file_bytes = io.BytesIO(file_obj.read()) file_bytes.seek(0) else: # fallback if already bytes file_bytes = io.BytesIO(file_obj) # Determine extension name = getattr(file_obj, "name", "audio.mp3") ext = name.rsplit(".", 1)[1].lower() if "." in name else "mp3" if ext == "mpg": ext = "mpeg" file_bytes.seek(0) return AudioSegment.from_file(file_bytes, format=ext) def replace_vocals(original: AudioSegment, new_text: str, lang='en') -> AudioSegment: """Generate speech from text and overlay on original audio.""" tts = gTTS(new_text, lang=lang) tts_io = io.BytesIO() tts.save(tts_io) tts_io.seek(0) voice = AudioSegment.from_file(tts_io, format="mp3") return original.overlay(voice) def edit_audio(files: List, new_texts: List[str], output_format: str): """Edit multiple audio files with optional vocal replacement.""" results = [] for i, f in enumerate(files): try: audio = load_audio(f) except Exception as e: return None, f"Failed to load file #{i+1}: {e}" if i < len(new_texts) and new_texts[i].strip(): audio = replace_vocals(audio, new_texts[i]) out_io = io.BytesIO() fmt = output_format.lower() if output_format.lower() in ["mp3","wav","ogg","flac"] else "mp3" audio.export(out_io, format=fmt) out_io.seek(0) results.append((out_io, f"edited_track_{i+1}.{fmt}")) return results, None # --------------------------- # Gradio UI # --------------------------- def build_ui(): with gr.Blocks() as demo: gr.Markdown("# 🎵 Audio Editor with Vocal Replacement") files = gr.File( label="Upload audio files (any ffmpeg-supported format)", file_count="multiple", file_types=None ) output_format = gr.Dropdown( label="Output format", choices=["mp3","wav","ogg","flac"], value="mp3" ) textboxes_box = gr.Column() make_btn = gr.Button("Edit Audio") status = gr.Textbox(label="Status", interactive=False) out_files = gr.File(label="Download Edited Files", file_types=None, interactive=False) # Dynamically create textboxes for each uploaded track def create_text_boxes(files_list): textboxes_box.clear() if not files_list: return for i, f in enumerate(files_list): tb = gr.Textbox( label=f"New lyrics / words for Track {i+1}", placeholder="Leave empty to keep original" ) textboxes_box.append(tb) files.change(create_text_boxes, inputs=files, outputs=None) # Collect all textbox values dynamically def on_edit(files_list, output_format_in, *textbox_values): if not files_list: return None, "Please upload audio files." results, err = edit_audio(files_list, list(textbox_values), output_format_in) if err: return None, err return results, "Edited audio ready!" make_btn.click( on_edit, inputs=[files, output_format] + textboxes_box.children, # dynamic textboxes outputs=[out_files, status] ) return demo if __name__=="__main__": app = build_ui() app.launch(debug=True, share=True)