Nekochu commited on
Commit
a4457c3
·
1 Parent(s): 04ccf32

add /understand API endpoint for GGUF audio captioning

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py CHANGED
@@ -289,6 +289,38 @@ def _start_ace_server():
289
  return False
290
 
291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  # ---------------------------------------------------------------------------
293
  # CLI mode
294
  # ---------------------------------------------------------------------------
@@ -769,6 +801,17 @@ def gradio_main():
769
  api_name="check_log",
770
  )
771
 
 
 
 
 
 
 
 
 
 
 
 
772
  demo.launch(
773
  server_name="0.0.0.0",
774
  server_port=7860,
 
289
  return False
290
 
291
 
292
+ # ---------------------------------------------------------------------------
293
+ # /understand proxy -- caption audio via ace-server GGUF
294
+ # ---------------------------------------------------------------------------
295
+
296
+ def understand_audio(audio_file):
297
+ """Send audio to ace-server /understand endpoint, return metadata."""
298
+ if not _server_ok():
299
+ return "ace-server not running"
300
+ if audio_file is None:
301
+ return "No audio file"
302
+ src = audio_file.name if hasattr(audio_file, "name") else str(audio_file)
303
+ import base64
304
+ with open(src, "rb") as f:
305
+ audio_b64 = base64.b64encode(f.read()).decode()
306
+ try:
307
+ r = requests.post(
308
+ f"{ACE_SERVER}/understand",
309
+ json={"audio": audio_b64},
310
+ timeout=300,
311
+ )
312
+ if r.status_code != 200:
313
+ return f"Error {r.status_code}: {r.text}"
314
+ job_id = r.json().get("id")
315
+ status, elapsed = _poll_job(job_id, timeout=600)
316
+ if status != "done":
317
+ return f"Understand {status} after {elapsed:.0f}s"
318
+ result = _fetch_result(job_id)
319
+ return json.dumps(result.json(), indent=2)
320
+ except Exception as e:
321
+ return f"Error: {e}"
322
+
323
+
324
  # ---------------------------------------------------------------------------
325
  # CLI mode
326
  # ---------------------------------------------------------------------------
 
801
  api_name="check_log",
802
  )
803
 
804
+ # Hidden API endpoint for audio understanding (no UI, API-only)
805
+ understand_btn = gr.Button(visible=False)
806
+ understand_input = gr.File(visible=False)
807
+ understand_output = gr.Textbox(visible=False)
808
+ understand_btn.click(
809
+ understand_audio,
810
+ inputs=[understand_input],
811
+ outputs=[understand_output],
812
+ api_name="understand",
813
+ )
814
+
815
  demo.launch(
816
  server_name="0.0.0.0",
817
  server_port=7860,