soiz1 commited on
Commit
be961e5
·
verified ·
1 Parent(s): a6b1b80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -5
app.py CHANGED
@@ -12,13 +12,48 @@ langid = None # ここでは仮定、適切なモジュールを初期化して
12
 
13
  # モック用の関数(本番環境では適切に実装してください)
14
  def clear_prompts():
15
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- def transcribe_one(audio, sr):
18
- return "en", "transcribed text" # 仮の戻り値
19
 
20
- def tokenize_audio(audio_tokenizer, audio):
21
- return [torch.zeros((1, 100, 10))] # 仮の戻り値
 
 
 
 
 
 
 
 
 
 
22
 
23
  def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
24
  clear_prompts()
 
12
 
13
  # モック用の関数(本番環境では適切に実装してください)
14
  def clear_prompts():
15
+ try:
16
+ path = tempfile.gettempdir()
17
+ for eachfile in os.listdir(path):
18
+ filename = os.path.join(path, eachfile)
19
+ if os.path.isfile(filename) and filename.endswith(".npz"):
20
+ lastmodifytime = os.stat(filename).st_mtime
21
+ endfiletime = time.time() - 60
22
+ if endfiletime > lastmodifytime:
23
+ os.remove(filename)
24
+ del path, filename, lastmodifytime, endfiletime
25
+ gc.collect()
26
+ except:
27
+ return
28
+ def transcribe_one(wav, sr):
29
+ if sr != 16000:
30
+ wav4trans = torchaudio.transforms.Resample(sr, 16000)(wav)
31
+ else:
32
+ wav4trans = wav
33
+
34
+ input_features = whisper_processor(wav4trans.squeeze(0), sampling_rate=16000, return_tensors="pt").input_features
35
+
36
+ # generate token ids
37
+ predicted_ids = whisper.generate(input_features.to(device))
38
+ lang = whisper_processor.batch_decode(predicted_ids[:, 1])[0].strip("<|>")
39
+ # decode token ids to text
40
+ text_pr = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
41
 
42
+ # print the recognized text
43
+ print(text_pr)
44
 
45
+ if text_pr.strip(" ")[-1] not in "?!.,。,?!。、":
46
+ text_pr += "."
47
+
48
+ # delete all variables
49
+ del wav4trans, input_features, predicted_ids
50
+ gc.collect()
51
+ return lang, text_pr
52
+
53
+ from data.tokenizer import (
54
+ AudioTokenizer,
55
+ tokenize_audio,
56
+ )
57
 
58
  def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
59
  clear_prompts()