NeuralFalcon commited on
Commit
329d2b4
·
verified ·
1 Parent(s): fa03508

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +200 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ import os
4
+ from utils import generate_dummy_audio, MOCK_LOGS
5
+
6
+ # -----------------------------------------------------------------------------
7
+ # Model Inference Wrapper
8
+ # -----------------------------------------------------------------------------
9
+ def run_vibevoice(
10
+ text_prompt: str,
11
+ reference_audio: str,
12
+ speed: float,
13
+ temperature: float
14
+ ):
15
+ """
16
+ Wrapper function for VibeVoice inference.
17
+
18
+ Args:
19
+ text_prompt: The text to be spoken.
20
+ reference_audio: Path to the reference audio file for style cloning.
21
+ speed: Speaking rate.
22
+ temperature: Sampling temperature (creativity/variance).
23
+ """
24
+
25
+ # 1. Input Validation
26
+ if not text_prompt:
27
+ raise gr.Error("Please enter text to synthesize.")
28
+
29
+ if not reference_audio:
30
+ # VibeVoice usually requires a reference, but we can warn if missing
31
+ gr.Warning("No reference audio provided. Using default voice style.")
32
+
33
+ # 2. Progress Simulation (Replace this block with actual model inference)
34
+ # ------------------------------------------------------------------
35
+ # Actual implementation would look like:
36
+ # model = load_vibevoice_model()
37
+ # audio_array = model.inference(text_prompt, reference_audio, ...)
38
+ # return (sample_rate, audio_array), "Generation Successful"
39
+ # ------------------------------------------------------------------
40
+
41
+ progress = gr.Progress()
42
+ progress(0, desc="Initializing VibeVoice...")
43
+ time.sleep(0.5)
44
+
45
+ progress(0.3, desc="Analyzing Reference Audio Style...")
46
+ time.sleep(0.8)
47
+
48
+ progress(0.6, desc="Synthesizing Speech...")
49
+ time.sleep(0.8)
50
+
51
+ progress(0.9, desc="Finalizing Audio...")
52
+ time.sleep(0.3)
53
+
54
+ # Generate dummy audio for demonstration purposes
55
+ output_audio_path = generate_dummy_audio(duration=3)
56
+
57
+ log_message = (
58
+ f"✅ Generation Complete\n"
59
+ f"📝 Text length: {len(text_prompt)} chars\n"
60
+ f"🎚️ Speed: {speed}x | 🌡️ Temp: {temperature}\n"
61
+ f"🎤 Reference: {os.path.basename(reference_audio) if reference_audio else 'None'}"
62
+ )
63
+
64
+ return output_audio_path, log_message
65
+
66
+ # -----------------------------------------------------------------------------
67
+ # Custom Theme Definition
68
+ # -----------------------------------------------------------------------------
69
+ # Creating a professional Microsoft-inspired blue theme
70
+ custom_theme = gr.themes.Soft(
71
+ primary_hue="blue",
72
+ secondary_hue="slate",
73
+ neutral_hue="slate",
74
+ font=gr.themes.GoogleFont("Segoe UI"),
75
+ text_size="lg",
76
+ radius_size="md"
77
+ ).set(
78
+ button_primary_background_fill="*primary_600",
79
+ button_primary_background_fill_hover="*primary_700",
80
+ block_title_text_weight="600",
81
+ block_shadow="*shadow_drop_lg"
82
+ )
83
+
84
+ # -----------------------------------------------------------------------------
85
+ # Gradio 6 UI Layout
86
+ # -----------------------------------------------------------------------------
87
+ # Note: No parameters in gr.Blocks() for Gradio 6
88
+ with gr.Blocks() as demo:
89
+
90
+ # Header Section
91
+ with gr.Row():
92
+ with gr.Column(scale=1):
93
+ gr.Markdown("# 🗣️ Microsoft VibeVoice")
94
+ gr.Markdown("### Zero-shot Text-to-Speech with Emotion & Style Transfer")
95
+
96
+ with gr.Row():
97
+ gr.Markdown(
98
+ "Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)",
99
+ elem_classes=["header-link"]
100
+ )
101
+
102
+ # Main Content
103
+ with gr.Row():
104
+
105
+ # Left Column: Inputs
106
+ with gr.Column(scale=1):
107
+ with gr.Group():
108
+ gr.Markdown("### 1. Input Text")
109
+ input_text = gr.Textbox(
110
+ label="Text to Speech",
111
+ placeholder="Enter the text you want VibeVoice to speak...",
112
+ lines=4,
113
+ max_lines=8,
114
+ value="The quick brown fox jumps over the lazy dog, demonstrating the amazing capabilities of modern voice synthesis."
115
+ )
116
+
117
+ with gr.Group():
118
+ gr.Markdown("### 2. Voice Reference (The 'Vibe')")
119
+ ref_audio = gr.Audio(
120
+ label="Reference Audio",
121
+ sources=["upload", "microphone"],
122
+ type="filepath",
123
+ editable=True
124
+ )
125
+
126
+ with gr.Accordion("⚙️ Advanced Settings", open=False):
127
+ speed_slider = gr.Slider(
128
+ minimum=0.5, maximum=2.0, value=1.0, step=0.1,
129
+ label="Speaking Speed"
130
+ )
131
+ temp_slider = gr.Slider(
132
+ minimum=0.1, maximum=1.0, value=0.7, step=0.1,
133
+ label="Temperature (Variance)"
134
+ )
135
+
136
+ generate_btn = gr.Button("Generate Speech 🎵", variant="primary", size="lg")
137
+
138
+ # Right Column: Outputs
139
+ with gr.Column(scale=1):
140
+ gr.Markdown("### 3. Generated Result")
141
+ output_audio = gr.Audio(
142
+ label="Synthesized Audio",
143
+ interactive=False,
144
+ autoplay=False
145
+ )
146
+
147
+ with gr.Group():
148
+ gr.Markdown("#### Process Logs")
149
+ logs = gr.Textbox(
150
+ label="Status",
151
+ value="Ready to generate.",
152
+ lines=5,
153
+ interactive=False,
154
+ show_copy_button=True
155
+ )
156
+
157
+ # -------------------------------------------------------------------------
158
+ # Event Listeners
159
+ # -------------------------------------------------------------------------
160
+ # Note: using api_visibility="public" (Gradio 6 standard)
161
+ generate_btn.click(
162
+ fn=run_vibevoice,
163
+ inputs=[input_text, ref_audio, speed_slider, temp_slider],
164
+ outputs=[output_audio, logs],
165
+ api_visibility="public"
166
+ )
167
+
168
+ # Example inputs to help users get started
169
+ gr.Examples(
170
+ examples=[
171
+ ["Hello! This is a test of the VibeVoice system.", None, 1.0, 0.7],
172
+ ["Dramatic reading requires a specific cadence and tone.", None, 0.8, 0.9],
173
+ ],
174
+ inputs=[input_text, ref_audio, speed_slider, temp_slider]
175
+ )
176
+
177
+ # -----------------------------------------------------------------------------
178
+ # App Launch
179
+ # -----------------------------------------------------------------------------
180
+ # Note: All app-level configs go here in Gradio 6
181
+ if __name__ == "__main__":
182
+ demo.launch(
183
+ theme=custom_theme,
184
+ footer_links=[
185
+ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
186
+ {"label": "VibeVoice Repo", "url": "https://github.com/microsoft/VibeVoice"}
187
+ ],
188
+ css="""
189
+ .header-link a {
190
+ text-decoration: none;
191
+ color: #666;
192
+ font-size: 0.9em;
193
+ font-weight: bold;
194
+ }
195
+ .header-link a:hover {
196
+ color: #2563eb;
197
+ text-decoration: underline;
198
+ }
199
+ """
200
+ )
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio>=6.0
2
+ requests
3
+ Pillow
4
+ numpy
5
+ scipy
6
+ soundfile
7
+ librosa