Spaces:

stepfun-ai
/

Step-Audio-EditX

Running

App Files Files Community

xieli commited on 5 days ago

Commit

501ca3e

1 Parent(s): a132dcd

feat: fix

Browse files

feat: remove streaming

feat: fix

Files changed (2) hide show

app.py +28 -12
stepfun_api.py +52 -17

app.py CHANGED Viewed

@@ -170,37 +170,53 @@ class EditxTab:
         state["history_audio"] = []
         return [], state
-    def auto_transcribe_audio(self, audio_path):
         """
-        自动转录音频文件，支持增量更新和最终覆盖
         Args:
             audio_path: 音频文件路径
         Returns:
-            生成器，产生转录过程中的增量文本和最终文本
         """
         if not audio_path:
-            yield ""
-            return
         try:
             self.logger.info(f"🎙️ Starting auto transcription for: {audio_path}")
-            # 使用stepfun_api中的transcribe_audio函数，启用streaming模式
-            for update in transcribe_audio(audio_path, streaming=True):
-                yield update
         except Exception as e:
             error_msg = f"[转录失败: {str(e)}]"
             self.logger.error(f"❌ Auto transcription failed: {str(e)}")
-            yield error_msg
     def init_state(self):
         """Initialize conversation state"""
         return {
             "history_messages": [],
-            "history_audio": []
         }
     def update_edit_info(self, category):
@@ -282,8 +298,8 @@ class EditxTab:
         # 音频上传时自动转录
         self.prompt_audio_input.change(
             fn=self.auto_transcribe_audio,
-            inputs=self.prompt_audio_input,
-            outputs=self.prompt_text_input
         )

         state["history_audio"] = []
         return [], state
+    def auto_transcribe_audio(self, audio_path, state):
         """
+        自动转录音频文件，一次性返回最终结果
         Args:
             audio_path: 音频文件路径
+            state: 状态字典
         Returns:
+            转录的文本内容和更新后的状态
         """
         if not audio_path:
+            return "", state
+        # 防止重复调用 - 简化逻辑
+        if state.get("last_audio_path") == audio_path:
+            self.logger.debug(f"⚠️ Skipping duplicate transcription request for: {audio_path}")
+            return state.get("last_transcribed_text", ""), state
         try:
+            # 更新音频路径
+            state["last_audio_path"] = audio_path
             self.logger.info(f"🎙️ Starting auto transcription for: {audio_path}")
+            # 使用stepfun_api中的transcribe_audio函数，不使用streaming模式
+            transcribed_text = transcribe_audio(audio_path, streaming=False)
+            # 转录完成，缓存结果
+            state["last_transcribed_text"] = transcribed_text
+            self.logger.info(f"✅ Auto transcription completed: {transcribed_text}")
+            return transcribed_text, state
         except Exception as e:
             error_msg = f"[转录失败: {str(e)}]"
             self.logger.error(f"❌ Auto transcription failed: {str(e)}")
+            state["last_transcribed_text"] = error_msg
+            return error_msg, state
     def init_state(self):
         """Initialize conversation state"""
         return {
             "history_messages": [],
+            "history_audio": [],
+            "last_audio_path": None,  # 用于防重复调用
+            "last_transcribed_text": ""  # 缓存最后的转录结果
         }
     def update_edit_info(self, category):
         # 音频上传时自动转录
         self.prompt_audio_input.change(
             fn=self.auto_transcribe_audio,
+            inputs=[self.prompt_audio_input, state],
+            outputs=[self.prompt_text_input, state]
         )

stepfun_api.py CHANGED Viewed

@@ -369,6 +369,27 @@ def transcribe_audio(audio_path: str, progress_callback=None, streaming=False):
         如果streaming=False: 完整的转录文本
         如果streaming=True: 生成器，产生增量更新和最终文本
     Raises:
         ValueError: If API token not configured
         RuntimeError: If transcription fails
@@ -376,23 +397,37 @@ def transcribe_audio(audio_path: str, progress_callback=None, streaming=False):
     client = get_client()
     if not client.token:
-        error_msg = "API token not configured. Please set STEPFUN_API_TOKEN environment variable."
-        if streaming:
-            yield f"[错误: {error_msg}]"
-            return
-        else:
-            raise ValueError(error_msg)
     try:
-        if streaming:
-            # 返回生成器
-            for update in client.transcribe_audio_sse(audio_path, progress_callback, streaming=True):
-                yield update
-        else:
-            # 返回最终结果
-            return client.transcribe_audio_sse(audio_path, progress_callback, streaming=False)
     except Exception as e:
-        if streaming:
-            yield f"[转录失败: {str(e)}]"
-        else:
-            raise RuntimeError(f"Transcription failed: {e}")

         如果streaming=False: 完整的转录文本
         如果streaming=True: 生成器，产生增量更新和最终文本
+    Raises:
+        ValueError: If API token not configured
+        RuntimeError: If transcription fails
+    """
+    if streaming:
+        return transcribe_audio_streaming(audio_path, progress_callback)
+    else:
+        return transcribe_audio_sync(audio_path, progress_callback)
+def transcribe_audio_sync(audio_path: str, progress_callback=None) -> str:
+    """
+    同步转录音频文件，返回最终文本
+    Args:
+        audio_path: 音频文件路径
+        progress_callback: 可选的回调函数，用于处理增量文本更新
+    Returns:
+        完整的转录文本
     Raises:
         ValueError: If API token not configured
         RuntimeError: If transcription fails
     client = get_client()
     if not client.token:
+        raise ValueError("API token not configured. Please set STEPFUN_API_TOKEN environment variable.")
     try:
+        return client.transcribe_audio_sse(audio_path, progress_callback, streaming=False)
     except Exception as e:
+        raise RuntimeError(f"Transcription failed: {e}")
+def transcribe_audio_streaming(audio_path: str, progress_callback=None):
+    """
+    流式转录音频文件，返回生成器
+    Args:
+        audio_path: 音频文件路径
+        progress_callback: 可选的回调函数，用于处理增量文本更新
+    Yields:
+        增量更新和最终文本
+    Raises:
+        ValueError: If API token not configured
+        RuntimeError: If transcription fails
+    """
+    client = get_client()
+    if not client.token:
+        yield f"[错误: API token not configured]"
+        return
+    try:
+        for update in client.transcribe_audio_sse(audio_path, progress_callback, streaming=True):
+            yield update
+    except Exception as e:
+        yield f"[转录失败: {str(e)}]"