File size: 3,889 Bytes
341f731
7a769a2
341f731
 
d61190d
341f731
 
b40f735
341f731
 
07796cb
caa8834
 
 
07796cb
caa8834
07796cb
caa8834
07796cb
caa8834
07796cb
 
 
ccb5ae8
 
07796cb
 
 
341f731
e1c62af
07796cb
e1c62af
 
 
 
 
d61190d
341b164
07796cb
 
e1c62af
ccb5ae8
341f731
e1c62af
341f731
 
07796cb
 
e1c62af
07796cb
e1c62af
 
 
 
 
 
341f731
 
 
 
 
 
 
e1c62af
d61190d
07796cb
 
 
 
 
 
 
 
 
 
d61190d
07796cb
e1c62af
fdcc449
e1c62af
d61190d
07796cb
e1c62af
07796cb
fdcc449
d61190d
fdcc449
07796cb
 
 
 
 
d61190d
 
 
07796cb
 
fdcc449
e1c62af
07796cb
341f731
e1c62af
 
d61190d
83afdb6
 
07796cb
83afdb6
 
d61190d
341f731
 
ccb5ae8
341f731
83afdb6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import io
from typing import List
import gradio as gr
from pydub import AudioSegment
from gtts import gTTS

# ---------------------------
# Helper functions
# ---------------------------

def load_audio(file_obj) -> AudioSegment:
    """Load audio from file-like object or path, any ffmpeg-supported format."""
    # If file_obj has read() method (BytesIO or UploadedFile)
    if hasattr(file_obj, "read"):
        file_bytes = io.BytesIO(file_obj.read())
        file_bytes.seek(0)
    else:
        # fallback if already bytes
        file_bytes = io.BytesIO(file_obj)
    
    # Determine extension
    name = getattr(file_obj, "name", "audio.mp3")
    ext = name.rsplit(".", 1)[1].lower() if "." in name else "mp3"
    if ext == "mpg":
        ext = "mpeg"

    file_bytes.seek(0)
    return AudioSegment.from_file(file_bytes, format=ext)

def replace_vocals(original: AudioSegment, new_text: str, lang='en') -> AudioSegment:
    """Generate speech from text and overlay on original audio."""
    tts = gTTS(new_text, lang=lang)
    tts_io = io.BytesIO()
    tts.save(tts_io)
    tts_io.seek(0)
    voice = AudioSegment.from_file(tts_io, format="mp3")
    return original.overlay(voice)

def edit_audio(files: List, new_texts: List[str], output_format: str):
    """Edit multiple audio files with optional vocal replacement."""
    results = []
    for i, f in enumerate(files):
        try:
            audio = load_audio(f)
        except Exception as e:
            return None, f"Failed to load file #{i+1}: {e}"

        if i < len(new_texts) and new_texts[i].strip():
            audio = replace_vocals(audio, new_texts[i])

        out_io = io.BytesIO()
        fmt = output_format.lower() if output_format.lower() in ["mp3","wav","ogg","flac"] else "mp3"
        audio.export(out_io, format=fmt)
        out_io.seek(0)
        results.append((out_io, f"edited_track_{i+1}.{fmt}"))
    return results, None

# ---------------------------
# Gradio UI
# ---------------------------

def build_ui():
    with gr.Blocks() as demo:
        gr.Markdown("# 🎵 Audio Editor with Vocal Replacement")

        files = gr.File(
            label="Upload audio files (any ffmpeg-supported format)", 
            file_count="multiple", 
            file_types=None
        )
        output_format = gr.Dropdown(
            label="Output format", 
            choices=["mp3","wav","ogg","flac"], 
            value="mp3"
        )

        textboxes_box = gr.Column()
        make_btn = gr.Button("Edit Audio")
        status = gr.Textbox(label="Status", interactive=False)
        out_files = gr.File(label="Download Edited Files", file_types=None, interactive=False)

        # Dynamically create textboxes for each uploaded track
        def create_text_boxes(files_list):
            textboxes_box.clear()
            if not files_list:
                return
            for i, f in enumerate(files_list):
                tb = gr.Textbox(
                    label=f"New lyrics / words for Track {i+1}", 
                    placeholder="Leave empty to keep original"
                )
                textboxes_box.append(tb)

        files.change(create_text_boxes, inputs=files, outputs=None)

        # Collect all textbox values dynamically
        def on_edit(files_list, output_format_in, *textbox_values):
            if not files_list:
                return None, "Please upload audio files."
            results, err = edit_audio(files_list, list(textbox_values), output_format_in)
            if err:
                return None, err
            return results, "Edited audio ready!"

        make_btn.click(
            on_edit,
            inputs=[files, output_format] + textboxes_box.children,  # dynamic textboxes
            outputs=[out_files, status]
        )

    return demo

if __name__=="__main__":
    app = build_ui()
    app.launch(debug=True, share=True)