Spaces:
Running
Running
add /understand API endpoint for GGUF audio captioning
Browse files
app.py
CHANGED
|
@@ -289,6 +289,38 @@ def _start_ace_server():
|
|
| 289 |
return False
|
| 290 |
|
| 291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
# ---------------------------------------------------------------------------
|
| 293 |
# CLI mode
|
| 294 |
# ---------------------------------------------------------------------------
|
|
@@ -769,6 +801,17 @@ def gradio_main():
|
|
| 769 |
api_name="check_log",
|
| 770 |
)
|
| 771 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 772 |
demo.launch(
|
| 773 |
server_name="0.0.0.0",
|
| 774 |
server_port=7860,
|
|
|
|
| 289 |
return False
|
| 290 |
|
| 291 |
|
| 292 |
+
# ---------------------------------------------------------------------------
|
| 293 |
+
# /understand proxy -- caption audio via ace-server GGUF
|
| 294 |
+
# ---------------------------------------------------------------------------
|
| 295 |
+
|
| 296 |
+
def understand_audio(audio_file):
|
| 297 |
+
"""Send audio to ace-server /understand endpoint, return metadata."""
|
| 298 |
+
if not _server_ok():
|
| 299 |
+
return "ace-server not running"
|
| 300 |
+
if audio_file is None:
|
| 301 |
+
return "No audio file"
|
| 302 |
+
src = audio_file.name if hasattr(audio_file, "name") else str(audio_file)
|
| 303 |
+
import base64
|
| 304 |
+
with open(src, "rb") as f:
|
| 305 |
+
audio_b64 = base64.b64encode(f.read()).decode()
|
| 306 |
+
try:
|
| 307 |
+
r = requests.post(
|
| 308 |
+
f"{ACE_SERVER}/understand",
|
| 309 |
+
json={"audio": audio_b64},
|
| 310 |
+
timeout=300,
|
| 311 |
+
)
|
| 312 |
+
if r.status_code != 200:
|
| 313 |
+
return f"Error {r.status_code}: {r.text}"
|
| 314 |
+
job_id = r.json().get("id")
|
| 315 |
+
status, elapsed = _poll_job(job_id, timeout=600)
|
| 316 |
+
if status != "done":
|
| 317 |
+
return f"Understand {status} after {elapsed:.0f}s"
|
| 318 |
+
result = _fetch_result(job_id)
|
| 319 |
+
return json.dumps(result.json(), indent=2)
|
| 320 |
+
except Exception as e:
|
| 321 |
+
return f"Error: {e}"
|
| 322 |
+
|
| 323 |
+
|
| 324 |
# ---------------------------------------------------------------------------
|
| 325 |
# CLI mode
|
| 326 |
# ---------------------------------------------------------------------------
|
|
|
|
| 801 |
api_name="check_log",
|
| 802 |
)
|
| 803 |
|
| 804 |
+
# Hidden API endpoint for audio understanding (no UI, API-only)
|
| 805 |
+
understand_btn = gr.Button(visible=False)
|
| 806 |
+
understand_input = gr.File(visible=False)
|
| 807 |
+
understand_output = gr.Textbox(visible=False)
|
| 808 |
+
understand_btn.click(
|
| 809 |
+
understand_audio,
|
| 810 |
+
inputs=[understand_input],
|
| 811 |
+
outputs=[understand_output],
|
| 812 |
+
api_name="understand",
|
| 813 |
+
)
|
| 814 |
+
|
| 815 |
demo.launch(
|
| 816 |
server_name="0.0.0.0",
|
| 817 |
server_port=7860,
|