Spaces:
Running
Running
xieli
commited on
Commit
·
501ca3e
1
Parent(s):
a132dcd
feat: fix
Browse filesfeat: remove streaming
feat: fix
- app.py +28 -12
- stepfun_api.py +52 -17
app.py
CHANGED
|
@@ -170,37 +170,53 @@ class EditxTab:
|
|
| 170 |
state["history_audio"] = []
|
| 171 |
return [], state
|
| 172 |
|
| 173 |
-
def auto_transcribe_audio(self, audio_path):
|
| 174 |
"""
|
| 175 |
-
|
| 176 |
|
| 177 |
Args:
|
| 178 |
audio_path: 音频文件路径
|
|
|
|
| 179 |
|
| 180 |
Returns:
|
| 181 |
-
|
| 182 |
"""
|
| 183 |
if not audio_path:
|
| 184 |
-
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
try:
|
|
|
|
|
|
|
|
|
|
| 188 |
self.logger.info(f"🎙️ Starting auto transcription for: {audio_path}")
|
| 189 |
|
| 190 |
-
# 使用stepfun_api中的transcribe_audio
|
| 191 |
-
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
except Exception as e:
|
| 195 |
error_msg = f"[转录失败: {str(e)}]"
|
| 196 |
self.logger.error(f"❌ Auto transcription failed: {str(e)}")
|
| 197 |
-
|
|
|
|
| 198 |
|
| 199 |
def init_state(self):
|
| 200 |
"""Initialize conversation state"""
|
| 201 |
return {
|
| 202 |
"history_messages": [],
|
| 203 |
-
"history_audio": []
|
|
|
|
|
|
|
| 204 |
}
|
| 205 |
|
| 206 |
def update_edit_info(self, category):
|
|
@@ -282,8 +298,8 @@ class EditxTab:
|
|
| 282 |
# 音频上传时自动转录
|
| 283 |
self.prompt_audio_input.change(
|
| 284 |
fn=self.auto_transcribe_audio,
|
| 285 |
-
inputs=self.prompt_audio_input,
|
| 286 |
-
outputs=self.prompt_text_input
|
| 287 |
)
|
| 288 |
|
| 289 |
|
|
|
|
| 170 |
state["history_audio"] = []
|
| 171 |
return [], state
|
| 172 |
|
| 173 |
+
def auto_transcribe_audio(self, audio_path, state):
|
| 174 |
"""
|
| 175 |
+
自动转录音频文件,一次性返回最终结果
|
| 176 |
|
| 177 |
Args:
|
| 178 |
audio_path: 音频文件路径
|
| 179 |
+
state: 状态字典
|
| 180 |
|
| 181 |
Returns:
|
| 182 |
+
转录的文本内容和更新后的状态
|
| 183 |
"""
|
| 184 |
if not audio_path:
|
| 185 |
+
return "", state
|
| 186 |
+
|
| 187 |
+
# 防止重复调用 - 简化逻辑
|
| 188 |
+
if state.get("last_audio_path") == audio_path:
|
| 189 |
+
self.logger.debug(f"⚠️ Skipping duplicate transcription request for: {audio_path}")
|
| 190 |
+
return state.get("last_transcribed_text", ""), state
|
| 191 |
|
| 192 |
try:
|
| 193 |
+
# 更新音频路径
|
| 194 |
+
state["last_audio_path"] = audio_path
|
| 195 |
+
|
| 196 |
self.logger.info(f"🎙️ Starting auto transcription for: {audio_path}")
|
| 197 |
|
| 198 |
+
# 使用stepfun_api中的transcribe_audio函数,不使用streaming模式
|
| 199 |
+
transcribed_text = transcribe_audio(audio_path, streaming=False)
|
| 200 |
+
|
| 201 |
+
# 转录完成,缓存结果
|
| 202 |
+
state["last_transcribed_text"] = transcribed_text
|
| 203 |
+
|
| 204 |
+
self.logger.info(f"✅ Auto transcription completed: {transcribed_text}")
|
| 205 |
+
return transcribed_text, state
|
| 206 |
|
| 207 |
except Exception as e:
|
| 208 |
error_msg = f"[转录失败: {str(e)}]"
|
| 209 |
self.logger.error(f"❌ Auto transcription failed: {str(e)}")
|
| 210 |
+
state["last_transcribed_text"] = error_msg
|
| 211 |
+
return error_msg, state
|
| 212 |
|
| 213 |
def init_state(self):
|
| 214 |
"""Initialize conversation state"""
|
| 215 |
return {
|
| 216 |
"history_messages": [],
|
| 217 |
+
"history_audio": [],
|
| 218 |
+
"last_audio_path": None, # 用于防重复调用
|
| 219 |
+
"last_transcribed_text": "" # 缓存最后的转录结果
|
| 220 |
}
|
| 221 |
|
| 222 |
def update_edit_info(self, category):
|
|
|
|
| 298 |
# 音频上传时自动转录
|
| 299 |
self.prompt_audio_input.change(
|
| 300 |
fn=self.auto_transcribe_audio,
|
| 301 |
+
inputs=[self.prompt_audio_input, state],
|
| 302 |
+
outputs=[self.prompt_text_input, state]
|
| 303 |
)
|
| 304 |
|
| 305 |
|
stepfun_api.py
CHANGED
|
@@ -369,6 +369,27 @@ def transcribe_audio(audio_path: str, progress_callback=None, streaming=False):
|
|
| 369 |
如果streaming=False: 完整的转录文本
|
| 370 |
如果streaming=True: 生成器,产生增量更新和最终文本
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
Raises:
|
| 373 |
ValueError: If API token not configured
|
| 374 |
RuntimeError: If transcription fails
|
|
@@ -376,23 +397,37 @@ def transcribe_audio(audio_path: str, progress_callback=None, streaming=False):
|
|
| 376 |
client = get_client()
|
| 377 |
|
| 378 |
if not client.token:
|
| 379 |
-
|
| 380 |
-
if streaming:
|
| 381 |
-
yield f"[错误: {error_msg}]"
|
| 382 |
-
return
|
| 383 |
-
else:
|
| 384 |
-
raise ValueError(error_msg)
|
| 385 |
|
| 386 |
try:
|
| 387 |
-
|
| 388 |
-
# 返回生成器
|
| 389 |
-
for update in client.transcribe_audio_sse(audio_path, progress_callback, streaming=True):
|
| 390 |
-
yield update
|
| 391 |
-
else:
|
| 392 |
-
# 返回最终结果
|
| 393 |
-
return client.transcribe_audio_sse(audio_path, progress_callback, streaming=False)
|
| 394 |
except Exception as e:
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
如果streaming=False: 完整的转录文本
|
| 370 |
如果streaming=True: 生成器,产生增量更新和最终文本
|
| 371 |
|
| 372 |
+
Raises:
|
| 373 |
+
ValueError: If API token not configured
|
| 374 |
+
RuntimeError: If transcription fails
|
| 375 |
+
"""
|
| 376 |
+
if streaming:
|
| 377 |
+
return transcribe_audio_streaming(audio_path, progress_callback)
|
| 378 |
+
else:
|
| 379 |
+
return transcribe_audio_sync(audio_path, progress_callback)
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
def transcribe_audio_sync(audio_path: str, progress_callback=None) -> str:
|
| 383 |
+
"""
|
| 384 |
+
同步转录音频文件,返回最终文本
|
| 385 |
+
|
| 386 |
+
Args:
|
| 387 |
+
audio_path: 音频文件路径
|
| 388 |
+
progress_callback: 可选的回调函数,用于处理增量文本更新
|
| 389 |
+
|
| 390 |
+
Returns:
|
| 391 |
+
完整的转录文本
|
| 392 |
+
|
| 393 |
Raises:
|
| 394 |
ValueError: If API token not configured
|
| 395 |
RuntimeError: If transcription fails
|
|
|
|
| 397 |
client = get_client()
|
| 398 |
|
| 399 |
if not client.token:
|
| 400 |
+
raise ValueError("API token not configured. Please set STEPFUN_API_TOKEN environment variable.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
try:
|
| 403 |
+
return client.transcribe_audio_sse(audio_path, progress_callback, streaming=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
except Exception as e:
|
| 405 |
+
raise RuntimeError(f"Transcription failed: {e}")
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
def transcribe_audio_streaming(audio_path: str, progress_callback=None):
|
| 409 |
+
"""
|
| 410 |
+
流式转录音频文件,返回生成器
|
| 411 |
+
|
| 412 |
+
Args:
|
| 413 |
+
audio_path: 音频文件路径
|
| 414 |
+
progress_callback: 可选的回调函数,用于处理增量文本更新
|
| 415 |
+
|
| 416 |
+
Yields:
|
| 417 |
+
增量更新和最终文本
|
| 418 |
+
|
| 419 |
+
Raises:
|
| 420 |
+
ValueError: If API token not configured
|
| 421 |
+
RuntimeError: If transcription fails
|
| 422 |
+
"""
|
| 423 |
+
client = get_client()
|
| 424 |
+
|
| 425 |
+
if not client.token:
|
| 426 |
+
yield f"[错误: API token not configured]"
|
| 427 |
+
return
|
| 428 |
+
|
| 429 |
+
try:
|
| 430 |
+
for update in client.transcribe_audio_sse(audio_path, progress_callback, streaming=True):
|
| 431 |
+
yield update
|
| 432 |
+
except Exception as e:
|
| 433 |
+
yield f"[转录失败: {str(e)}]"
|