Spaces:
Sleeping
Sleeping
Add application file
Browse files- app.py +70 -6
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -239,7 +239,8 @@ def transcribe_with_bcut(media_path):
|
|
| 239 |
"ResourceFileType": "mp3",
|
| 240 |
"model_id": "8",
|
| 241 |
})
|
| 242 |
-
|
|
|
|
| 243 |
r.raise_for_status()
|
| 244 |
resp_data = r.json()["data"]
|
| 245 |
in_boss_key = resp_data["in_boss_key"]
|
|
@@ -251,7 +252,7 @@ def transcribe_with_bcut(media_path):
|
|
| 251 |
for i, url in enumerate(upload_urls):
|
| 252 |
start = i * per_size
|
| 253 |
end = (i + 1) * per_size
|
| 254 |
-
rr = requests.put(url, data=file_binary[start:end], headers=headers)
|
| 255 |
rr.raise_for_status()
|
| 256 |
etags.append(rr.headers.get("Etag"))
|
| 257 |
commit_payload = json.dumps({
|
|
@@ -261,15 +262,15 @@ def transcribe_with_bcut(media_path):
|
|
| 261 |
"UploadId": upload_id,
|
| 262 |
"model_id": "8",
|
| 263 |
})
|
| 264 |
-
r = requests.post(API_COMMIT_UPLOAD, data=commit_payload, headers=headers)
|
| 265 |
r.raise_for_status()
|
| 266 |
download_url = r.json()["data"]["download_url"]
|
| 267 |
-
r = requests.post(API_CREATE_TASK, json={"resource": download_url, "model_id": "8"}, headers=headers)
|
| 268 |
r.raise_for_status()
|
| 269 |
task_id = r.json()["data"]["task_id"]
|
| 270 |
result = None
|
| 271 |
for _ in range(600):
|
| 272 |
-
rr = requests.get(API_QUERY_RESULT, params={"model_id": 7, "task_id": task_id}, headers=headers)
|
| 273 |
rr.raise_for_status()
|
| 274 |
jd = rr.json()["data"]
|
| 275 |
if jd.get("state") == 4:
|
|
@@ -326,6 +327,53 @@ def transcribe_only_with_vc(media_path, fmt="srt"):
|
|
| 326 |
srt_text = "\n".join(lines)
|
| 327 |
return srt_text, rec
|
| 328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
@app.route("/", methods=["GET"])
|
| 330 |
def index():
|
| 331 |
html = """
|
|
@@ -372,6 +420,7 @@ def index():
|
|
| 372 |
<label>引擎</label>
|
| 373 |
<select id="engineSel">
|
| 374 |
<option value="videocaptioner">VideoCaptioner(Bcut API)</option>
|
|
|
|
| 375 |
<option value="ffsubsync">ffsubsync(CPU)</option>
|
| 376 |
<option value="whisper">whisper(较慢)</option>
|
| 377 |
</select>
|
|
@@ -436,6 +485,7 @@ def index():
|
|
| 436 |
if(!f){ro.textContent='请选择音频或视频文件';return;}
|
| 437 |
const fd = new FormData();
|
| 438 |
fd.append('file', f);
|
|
|
|
| 439 |
ro.textContent='转写中...';
|
| 440 |
try{
|
| 441 |
const r = await fetch('/api/transcribe',{method:'POST',body:fd});
|
|
@@ -491,6 +541,8 @@ def api_align():
|
|
| 491 |
srt_text, items = align_with_whisper(media_path, segs, fmt=fmt)
|
| 492 |
elif engine in ("videocaptioner","vc","bcut"):
|
| 493 |
srt_text, items = align_with_vc(media_path, segs, fmt=fmt)
|
|
|
|
|
|
|
| 494 |
else:
|
| 495 |
srt_text, items = align_with_ffsubsync(media_path, segs, fmt=fmt)
|
| 496 |
try:
|
|
@@ -519,7 +571,19 @@ def api_transcribe():
|
|
| 519 |
ext = os.path.splitext(getattr(f, "filename", "") or "")[1] or ".dat"
|
| 520 |
media_path = os.path.join(temp_dir, f"{uuid.uuid4()}_media{ext}")
|
| 521 |
f.save(media_path)
|
| 522 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
try:
|
| 524 |
os.remove(media_path)
|
| 525 |
except Exception:
|
|
|
|
| 239 |
"ResourceFileType": "mp3",
|
| 240 |
"model_id": "8",
|
| 241 |
})
|
| 242 |
+
proxies = {"http": None, "https": None}
|
| 243 |
+
r = requests.post(API_REQ_UPLOAD, data=payload, headers=headers, proxies=proxies)
|
| 244 |
r.raise_for_status()
|
| 245 |
resp_data = r.json()["data"]
|
| 246 |
in_boss_key = resp_data["in_boss_key"]
|
|
|
|
| 252 |
for i, url in enumerate(upload_urls):
|
| 253 |
start = i * per_size
|
| 254 |
end = (i + 1) * per_size
|
| 255 |
+
rr = requests.put(url, data=file_binary[start:end], headers=headers, proxies=proxies)
|
| 256 |
rr.raise_for_status()
|
| 257 |
etags.append(rr.headers.get("Etag"))
|
| 258 |
commit_payload = json.dumps({
|
|
|
|
| 262 |
"UploadId": upload_id,
|
| 263 |
"model_id": "8",
|
| 264 |
})
|
| 265 |
+
r = requests.post(API_COMMIT_UPLOAD, data=commit_payload, headers=headers, proxies=proxies)
|
| 266 |
r.raise_for_status()
|
| 267 |
download_url = r.json()["data"]["download_url"]
|
| 268 |
+
r = requests.post(API_CREATE_TASK, json={"resource": download_url, "model_id": "8"}, headers=headers, proxies=proxies)
|
| 269 |
r.raise_for_status()
|
| 270 |
task_id = r.json()["data"]["task_id"]
|
| 271 |
result = None
|
| 272 |
for _ in range(600):
|
| 273 |
+
rr = requests.get(API_QUERY_RESULT, params={"model_id": 7, "task_id": task_id}, headers=headers, proxies=proxies)
|
| 274 |
rr.raise_for_status()
|
| 275 |
jd = rr.json()["data"]
|
| 276 |
if jd.get("state") == 4:
|
|
|
|
| 327 |
srt_text = "\n".join(lines)
|
| 328 |
return srt_text, rec
|
| 329 |
|
| 330 |
+
# Faster-Whisper
|
| 331 |
+
def transcribe_with_fast(media_path, model_name=None, device=None):
|
| 332 |
+
try:
|
| 333 |
+
from faster_whisper import WhisperModel
|
| 334 |
+
except Exception as e:
|
| 335 |
+
raise RuntimeError("缺少 faster-whisper 依赖") from e
|
| 336 |
+
model_name = model_name or os.environ.get("FAST_WHISPER_MODEL", "tiny")
|
| 337 |
+
device = device or os.environ.get("FAST_DEVICE", "cpu")
|
| 338 |
+
model = WhisperModel(model_name, device=device)
|
| 339 |
+
segments, _ = model.transcribe(media_path, vad_filter=True)
|
| 340 |
+
rec = []
|
| 341 |
+
for s in segments:
|
| 342 |
+
rec.append({
|
| 343 |
+
"text": (s.text or "").strip(),
|
| 344 |
+
"start": float(s.start or 0.0),
|
| 345 |
+
"end": float(s.end or (s.start or 0.0))
|
| 346 |
+
})
|
| 347 |
+
return rec
|
| 348 |
+
|
| 349 |
+
def align_with_fast(media_path, segments, fmt="srt", model_name=None):
|
| 350 |
+
rec = transcribe_with_fast(media_path, model_name=model_name)
|
| 351 |
+
items = []
|
| 352 |
+
for s in [x.strip() for x in segments if x.strip()]:
|
| 353 |
+
sn = normalize_zh(s)
|
| 354 |
+
best = None
|
| 355 |
+
best_score = -1.0
|
| 356 |
+
for r in rec:
|
| 357 |
+
rn = normalize_zh(r["text"])
|
| 358 |
+
if not rn:
|
| 359 |
+
continue
|
| 360 |
+
score = similarity(sn, rn)
|
| 361 |
+
if rn.find(sn) != -1:
|
| 362 |
+
score = 1.0
|
| 363 |
+
if score > best_score:
|
| 364 |
+
best_score = score
|
| 365 |
+
best = r
|
| 366 |
+
if best:
|
| 367 |
+
items.append({"text": s, "start": best["start"], "end": best["end"]})
|
| 368 |
+
lines = []
|
| 369 |
+
for i, it in enumerate(items, start=1):
|
| 370 |
+
lines.append(str(i))
|
| 371 |
+
lines.append(f"{srt_time(int(it['start']*1000))} --> {srt_time(int(it['end']*1000))}")
|
| 372 |
+
lines.append(it["text"])
|
| 373 |
+
lines.append("")
|
| 374 |
+
srt_text = "\n".join(lines)
|
| 375 |
+
return srt_text, items
|
| 376 |
+
|
| 377 |
@app.route("/", methods=["GET"])
|
| 378 |
def index():
|
| 379 |
html = """
|
|
|
|
| 420 |
<label>引擎</label>
|
| 421 |
<select id="engineSel">
|
| 422 |
<option value="videocaptioner">VideoCaptioner(Bcut API)</option>
|
| 423 |
+
<option value="fast">Faster-Whisper(本地, tiny)</option>
|
| 424 |
<option value="ffsubsync">ffsubsync(CPU)</option>
|
| 425 |
<option value="whisper">whisper(较慢)</option>
|
| 426 |
</select>
|
|
|
|
| 485 |
if(!f){ro.textContent='请选择音频或视频文件';return;}
|
| 486 |
const fd = new FormData();
|
| 487 |
fd.append('file', f);
|
| 488 |
+
fd.append('engine', document.getElementById('engineSel').value);
|
| 489 |
ro.textContent='转写中...';
|
| 490 |
try{
|
| 491 |
const r = await fetch('/api/transcribe',{method:'POST',body:fd});
|
|
|
|
| 541 |
srt_text, items = align_with_whisper(media_path, segs, fmt=fmt)
|
| 542 |
elif engine in ("videocaptioner","vc","bcut"):
|
| 543 |
srt_text, items = align_with_vc(media_path, segs, fmt=fmt)
|
| 544 |
+
elif engine in ("fast",):
|
| 545 |
+
srt_text, items = align_with_fast(media_path, segs, fmt=fmt)
|
| 546 |
else:
|
| 547 |
srt_text, items = align_with_ffsubsync(media_path, segs, fmt=fmt)
|
| 548 |
try:
|
|
|
|
| 571 |
ext = os.path.splitext(getattr(f, "filename", "") or "")[1] or ".dat"
|
| 572 |
media_path = os.path.join(temp_dir, f"{uuid.uuid4()}_media{ext}")
|
| 573 |
f.save(media_path)
|
| 574 |
+
engine = request.form.get("engine","fast")
|
| 575 |
+
if engine in ("fast",):
|
| 576 |
+
rec = transcribe_with_fast(media_path)
|
| 577 |
+
lines = []
|
| 578 |
+
for i, it in enumerate(rec, start=1):
|
| 579 |
+
lines.append(str(i))
|
| 580 |
+
lines.append(f"{srt_time(int(it['start']*1000))} --> {srt_time(int(it['end']*1000))}")
|
| 581 |
+
lines.append(it["text"])
|
| 582 |
+
lines.append("")
|
| 583 |
+
srt_text = "\n".join(lines)
|
| 584 |
+
items = rec
|
| 585 |
+
else:
|
| 586 |
+
srt_text, items = transcribe_only_with_vc(media_path)
|
| 587 |
try:
|
| 588 |
os.remove(media_path)
|
| 589 |
except Exception:
|
requirements.txt
CHANGED
|
@@ -5,3 +5,4 @@ ffsubsync
|
|
| 5 |
pysubs2
|
| 6 |
openai-whisper
|
| 7 |
textdistance
|
|
|
|
|
|
| 5 |
pysubs2
|
| 6 |
openai-whisper
|
| 7 |
textdistance
|
| 8 |
+
faster-whisper
|