ksrgszk commited on
Commit
f77247f
·
1 Parent(s): 46859fa

Add application file

Browse files
Files changed (2) hide show
  1. app.py +70 -6
  2. requirements.txt +1 -0
app.py CHANGED
@@ -239,7 +239,8 @@ def transcribe_with_bcut(media_path):
239
  "ResourceFileType": "mp3",
240
  "model_id": "8",
241
  })
242
- r = requests.post(API_REQ_UPLOAD, data=payload, headers=headers)
 
243
  r.raise_for_status()
244
  resp_data = r.json()["data"]
245
  in_boss_key = resp_data["in_boss_key"]
@@ -251,7 +252,7 @@ def transcribe_with_bcut(media_path):
251
  for i, url in enumerate(upload_urls):
252
  start = i * per_size
253
  end = (i + 1) * per_size
254
- rr = requests.put(url, data=file_binary[start:end], headers=headers)
255
  rr.raise_for_status()
256
  etags.append(rr.headers.get("Etag"))
257
  commit_payload = json.dumps({
@@ -261,15 +262,15 @@ def transcribe_with_bcut(media_path):
261
  "UploadId": upload_id,
262
  "model_id": "8",
263
  })
264
- r = requests.post(API_COMMIT_UPLOAD, data=commit_payload, headers=headers)
265
  r.raise_for_status()
266
  download_url = r.json()["data"]["download_url"]
267
- r = requests.post(API_CREATE_TASK, json={"resource": download_url, "model_id": "8"}, headers=headers)
268
  r.raise_for_status()
269
  task_id = r.json()["data"]["task_id"]
270
  result = None
271
  for _ in range(600):
272
- rr = requests.get(API_QUERY_RESULT, params={"model_id": 7, "task_id": task_id}, headers=headers)
273
  rr.raise_for_status()
274
  jd = rr.json()["data"]
275
  if jd.get("state") == 4:
@@ -326,6 +327,53 @@ def transcribe_only_with_vc(media_path, fmt="srt"):
326
  srt_text = "\n".join(lines)
327
  return srt_text, rec
328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  @app.route("/", methods=["GET"])
330
  def index():
331
  html = """
@@ -372,6 +420,7 @@ def index():
372
  <label>引擎</label>
373
  <select id="engineSel">
374
  <option value="videocaptioner">VideoCaptioner(Bcut API)</option>
 
375
  <option value="ffsubsync">ffsubsync(CPU)</option>
376
  <option value="whisper">whisper(较慢)</option>
377
  </select>
@@ -436,6 +485,7 @@ def index():
436
  if(!f){ro.textContent='请选择音频或视频文件';return;}
437
  const fd = new FormData();
438
  fd.append('file', f);
 
439
  ro.textContent='转写中...';
440
  try{
441
  const r = await fetch('/api/transcribe',{method:'POST',body:fd});
@@ -491,6 +541,8 @@ def api_align():
491
  srt_text, items = align_with_whisper(media_path, segs, fmt=fmt)
492
  elif engine in ("videocaptioner","vc","bcut"):
493
  srt_text, items = align_with_vc(media_path, segs, fmt=fmt)
 
 
494
  else:
495
  srt_text, items = align_with_ffsubsync(media_path, segs, fmt=fmt)
496
  try:
@@ -519,7 +571,19 @@ def api_transcribe():
519
  ext = os.path.splitext(getattr(f, "filename", "") or "")[1] or ".dat"
520
  media_path = os.path.join(temp_dir, f"{uuid.uuid4()}_media{ext}")
521
  f.save(media_path)
522
- srt_text, items = transcribe_only_with_vc(media_path)
 
 
 
 
 
 
 
 
 
 
 
 
523
  try:
524
  os.remove(media_path)
525
  except Exception:
 
239
  "ResourceFileType": "mp3",
240
  "model_id": "8",
241
  })
242
+ proxies = {"http": None, "https": None}
243
+ r = requests.post(API_REQ_UPLOAD, data=payload, headers=headers, proxies=proxies)
244
  r.raise_for_status()
245
  resp_data = r.json()["data"]
246
  in_boss_key = resp_data["in_boss_key"]
 
252
  for i, url in enumerate(upload_urls):
253
  start = i * per_size
254
  end = (i + 1) * per_size
255
+ rr = requests.put(url, data=file_binary[start:end], headers=headers, proxies=proxies)
256
  rr.raise_for_status()
257
  etags.append(rr.headers.get("Etag"))
258
  commit_payload = json.dumps({
 
262
  "UploadId": upload_id,
263
  "model_id": "8",
264
  })
265
+ r = requests.post(API_COMMIT_UPLOAD, data=commit_payload, headers=headers, proxies=proxies)
266
  r.raise_for_status()
267
  download_url = r.json()["data"]["download_url"]
268
+ r = requests.post(API_CREATE_TASK, json={"resource": download_url, "model_id": "8"}, headers=headers, proxies=proxies)
269
  r.raise_for_status()
270
  task_id = r.json()["data"]["task_id"]
271
  result = None
272
  for _ in range(600):
273
+ rr = requests.get(API_QUERY_RESULT, params={"model_id": 7, "task_id": task_id}, headers=headers, proxies=proxies)
274
  rr.raise_for_status()
275
  jd = rr.json()["data"]
276
  if jd.get("state") == 4:
 
327
  srt_text = "\n".join(lines)
328
  return srt_text, rec
329
 
330
+ # Faster-Whisper
331
+ def transcribe_with_fast(media_path, model_name=None, device=None):
332
+ try:
333
+ from faster_whisper import WhisperModel
334
+ except Exception as e:
335
+ raise RuntimeError("缺少 faster-whisper 依赖") from e
336
+ model_name = model_name or os.environ.get("FAST_WHISPER_MODEL", "tiny")
337
+ device = device or os.environ.get("FAST_DEVICE", "cpu")
338
+ model = WhisperModel(model_name, device=device)
339
+ segments, _ = model.transcribe(media_path, vad_filter=True)
340
+ rec = []
341
+ for s in segments:
342
+ rec.append({
343
+ "text": (s.text or "").strip(),
344
+ "start": float(s.start or 0.0),
345
+ "end": float(s.end or (s.start or 0.0))
346
+ })
347
+ return rec
348
+
349
+ def align_with_fast(media_path, segments, fmt="srt", model_name=None):
350
+ rec = transcribe_with_fast(media_path, model_name=model_name)
351
+ items = []
352
+ for s in [x.strip() for x in segments if x.strip()]:
353
+ sn = normalize_zh(s)
354
+ best = None
355
+ best_score = -1.0
356
+ for r in rec:
357
+ rn = normalize_zh(r["text"])
358
+ if not rn:
359
+ continue
360
+ score = similarity(sn, rn)
361
+ if rn.find(sn) != -1:
362
+ score = 1.0
363
+ if score > best_score:
364
+ best_score = score
365
+ best = r
366
+ if best:
367
+ items.append({"text": s, "start": best["start"], "end": best["end"]})
368
+ lines = []
369
+ for i, it in enumerate(items, start=1):
370
+ lines.append(str(i))
371
+ lines.append(f"{srt_time(int(it['start']*1000))} --> {srt_time(int(it['end']*1000))}")
372
+ lines.append(it["text"])
373
+ lines.append("")
374
+ srt_text = "\n".join(lines)
375
+ return srt_text, items
376
+
377
  @app.route("/", methods=["GET"])
378
  def index():
379
  html = """
 
420
  <label>引擎</label>
421
  <select id="engineSel">
422
  <option value="videocaptioner">VideoCaptioner(Bcut API)</option>
423
+ <option value="fast">Faster-Whisper(本地, tiny)</option>
424
  <option value="ffsubsync">ffsubsync(CPU)</option>
425
  <option value="whisper">whisper(较慢)</option>
426
  </select>
 
485
  if(!f){ro.textContent='请选择音频或视频文件';return;}
486
  const fd = new FormData();
487
  fd.append('file', f);
488
+ fd.append('engine', document.getElementById('engineSel').value);
489
  ro.textContent='转写中...';
490
  try{
491
  const r = await fetch('/api/transcribe',{method:'POST',body:fd});
 
541
  srt_text, items = align_with_whisper(media_path, segs, fmt=fmt)
542
  elif engine in ("videocaptioner","vc","bcut"):
543
  srt_text, items = align_with_vc(media_path, segs, fmt=fmt)
544
+ elif engine in ("fast",):
545
+ srt_text, items = align_with_fast(media_path, segs, fmt=fmt)
546
  else:
547
  srt_text, items = align_with_ffsubsync(media_path, segs, fmt=fmt)
548
  try:
 
571
  ext = os.path.splitext(getattr(f, "filename", "") or "")[1] or ".dat"
572
  media_path = os.path.join(temp_dir, f"{uuid.uuid4()}_media{ext}")
573
  f.save(media_path)
574
+ engine = request.form.get("engine","fast")
575
+ if engine in ("fast",):
576
+ rec = transcribe_with_fast(media_path)
577
+ lines = []
578
+ for i, it in enumerate(rec, start=1):
579
+ lines.append(str(i))
580
+ lines.append(f"{srt_time(int(it['start']*1000))} --> {srt_time(int(it['end']*1000))}")
581
+ lines.append(it["text"])
582
+ lines.append("")
583
+ srt_text = "\n".join(lines)
584
+ items = rec
585
+ else:
586
+ srt_text, items = transcribe_only_with_vc(media_path)
587
  try:
588
  os.remove(media_path)
589
  except Exception:
requirements.txt CHANGED
@@ -5,3 +5,4 @@ ffsubsync
5
  pysubs2
6
  openai-whisper
7
  textdistance
 
 
5
  pysubs2
6
  openai-whisper
7
  textdistance
8
+ faster-whisper