Spaces:

soiz1
/

dall-e-x

Runtime error

App Files Files Community

soiz1 commited on Jan 28, 2025

Commit

be961e5

verified ·

1 Parent(s): a6b1b80

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -5

app.py CHANGED Viewed

@@ -12,13 +12,48 @@ langid = None  # ここでは仮定、適切なモジュールを初期化して
 # モック用の関数（本番環境では適切に実装してください）
 def clear_prompts():
-    pass
-def transcribe_one(audio, sr):
-    return "en", "transcribed text"  # 仮の戻り値
-def tokenize_audio(audio_tokenizer, audio):
-    return [torch.zeros((1, 100, 10))]  # 仮の戻り値
 def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
     clear_prompts()

 # モック用の関数（本番環境では適切に実装してください）
 def clear_prompts():
+    try:
+        path = tempfile.gettempdir()
+        for eachfile in os.listdir(path):
+            filename = os.path.join(path, eachfile)
+            if os.path.isfile(filename) and filename.endswith(".npz"):
+                lastmodifytime = os.stat(filename).st_mtime
+                endfiletime = time.time() - 60
+                if endfiletime > lastmodifytime:
+                    os.remove(filename)
+        del path, filename, lastmodifytime, endfiletime
+        gc.collect()
+    except:
+        return
+def transcribe_one(wav, sr):
+    if sr != 16000:
+        wav4trans = torchaudio.transforms.Resample(sr, 16000)(wav)
+    else:
+        wav4trans = wav
+    input_features = whisper_processor(wav4trans.squeeze(0), sampling_rate=16000, return_tensors="pt").input_features
+    # generate token ids
+    predicted_ids = whisper.generate(input_features.to(device))
+    lang = whisper_processor.batch_decode(predicted_ids[:, 1])[0].strip("<|>")
+    # decode token ids to text
+    text_pr = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+    # print the recognized text
+    print(text_pr)
+    if text_pr.strip(" ")[-1] not in "?!.,。，？！。、":
+        text_pr += "."
+    # delete all variables
+    del wav4trans, input_features, predicted_ids
+    gc.collect()
+    return lang, text_pr
+from data.tokenizer import (
+    AudioTokenizer,
+    tokenize_audio,
+)
 def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
     clear_prompts()