Spaces:
Running
Running
MCP ready
Browse files
app.py
CHANGED
|
@@ -184,6 +184,24 @@ def get_ezaudio(prompt):
|
|
| 184 |
raise gr.Error("EzAudio space API is not ready, please try again in few minutes ")
|
| 185 |
|
| 186 |
def infer(image_in, chosen_model):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
caption = get_caption_from_kosmos(image_in)
|
| 188 |
if chosen_model == "MAGNet" :
|
| 189 |
magnet_result = get_magnet(caption)
|
|
@@ -252,4 +270,4 @@ with gr.Blocks(css=css) as demo:
|
|
| 252 |
outputs=[audio_o],
|
| 253 |
)
|
| 254 |
|
| 255 |
-
demo.queue(max_size=10).launch(debug=True, show_error=True)
|
|
|
|
| 184 |
raise gr.Error("EzAudio space API is not ready, please try again in few minutes ")
|
| 185 |
|
| 186 |
def infer(image_in, chosen_model):
|
| 187 |
+
"""
|
| 188 |
+
Generate an audio clip (sound effect) from an input image using the selected generative model.
|
| 189 |
+
|
| 190 |
+
This function first generates a caption from the provided image using a vision-language model.
|
| 191 |
+
The caption is then used as a text prompt for various audio generation models.
|
| 192 |
+
|
| 193 |
+
Args:
|
| 194 |
+
image_in (str): File path to the input image. The image will be processed to generate a descriptive caption.
|
| 195 |
+
chosen_model (str): The name of the audio generation model to use. Supported options include: "AudioLDM-2", "Tango", "Stable Audio Open".
|
| 196 |
+
|
| 197 |
+
Returns:
|
| 198 |
+
str | dict: The path or result object of the generated audio clip, depending on the model used.
|
| 199 |
+
If the model returns a list or a URL, the function provides that as output.
|
| 200 |
+
|
| 201 |
+
Example usage:
|
| 202 |
+
>>> infer("cat.png", "AudioLDM-2")
|
| 203 |
+
"outputs/audio/cat_sfx.wav"
|
| 204 |
+
"""
|
| 205 |
caption = get_caption_from_kosmos(image_in)
|
| 206 |
if chosen_model == "MAGNet" :
|
| 207 |
magnet_result = get_magnet(caption)
|
|
|
|
| 270 |
outputs=[audio_o],
|
| 271 |
)
|
| 272 |
|
| 273 |
+
demo.queue(max_size=10).launch(debug=True, show_error=True, ssr_mode=False, mcp_server=True)
|