HK0712 commited on
Commit
7111ecb
·
1 Parent(s): 5d4c1d3

ADD: japanese version

Browse files
.devcontainer/devcontainer.json CHANGED
@@ -1,11 +1,16 @@
1
  {
2
  "name": "FYP Backend (GPU)",
3
 
4
- "build": {
5
- // 假設您的 Dockerfile 位於專案根目錄
6
- "dockerfile": "../Dockerfile",
7
- "context": ".."
8
- },
 
 
 
 
 
9
 
10
  // 這是最最最關鍵的部分!
11
  "runArgs": [
 
1
  {
2
  "name": "FYP Backend (GPU)",
3
 
4
+ // 【【【【【 方案 A:快速模式 (當依賴沒變時) 】】】】】
5
+ "image": "my-project-image:latest", // 使用上次成功建置的、帶有標籤的映像
6
+ // "build": { ... },
7
+
8
+ // 【【【【【 方案 B:重建模式 (當依賴改變時) 】】】】】
9
+ // "image": "my-project-image:latest",
10
+ // "build": {
11
+ // "dockerfile": "../Dockerfile",
12
+ // "context": ".."
13
+ // },
14
 
15
  // 這是最最最關鍵的部分!
16
  "runArgs": [
Dockerfile CHANGED
@@ -4,6 +4,9 @@ FROM python:3.10-slim
4
  ENV HF_HOME=/tmp/huggingface
5
  ENV HF_DATASETS_CACHE=/tmp/huggingface/datasets
6
 
 
 
 
7
  # 2. 設定容器內的工作目錄
8
  WORKDIR /app
9
 
 
4
  ENV HF_HOME=/tmp/huggingface
5
  ENV HF_DATASETS_CACHE=/tmp/huggingface/datasets
6
 
7
+ # 為 MeCab 設定正確的設定檔路徑,解決 "no such file or directory" 錯誤
8
+ ENV MECABRC=/etc/mecabrc
9
+
10
  # 2. 設定容器內的工作目錄
11
  WORKDIR /app
12
 
analyzer/ASR_jp_jp.py CHANGED
@@ -1,11 +1,11 @@
1
  # =======================================================================
2
  # 1. 匯入區 (Imports)
3
- # 【關鍵修改】新增了 pyopenjtalk 和 MeCab 的匯入
4
  # =======================================================================
5
  import torch
6
  import soundfile as sf
7
  import librosa
8
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
9
  import os
10
  import pyopenjtalk
11
  import MeCab
@@ -16,30 +16,35 @@ import re
16
  # =======================================================================
17
  # 2. 全域變數與配置區 (Global Variables & Config)
18
  # =======================================================================
19
- # 【關鍵修改】自動檢測可用設備
20
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
21
  print(f"INFO: ASR_jp_jp.py is configured to use device: {DEVICE}")
22
 
23
- # 【關鍵修改】設定為日語 ASR 模型
24
  MODEL_NAME = "prj-beatrice/japanese-hubert-base-phoneme-ctc-v3"
25
 
26
  processor = None
27
  model = None
28
 
29
- # 【關鍵修改】初始化 MeCab 分詞器
30
- # 我們使用 -Owakati 選項來獲得以空格分隔的單詞列表
31
- mecab_tagger = MeCab.Tagger("-Owakati")
 
 
 
 
32
 
33
  # =======================================================================
34
  # 3. 核心業務邏輯區 (Core Business Logic)
35
  # =======================================================================
36
 
37
  # -----------------------------------------------------------------------
38
- # 3.1. 模型載入函數 (與其他版本邏輯相同)
 
39
  # -----------------------------------------------------------------------
40
  def load_model():
41
  """
42
- 載入日語 ASR 模型和對應的處理器。
43
  """
44
  global processor, model
45
  if processor and model:
@@ -49,7 +54,7 @@ def load_model():
49
  print(f"正在準備 ASR 模型 '{MODEL_NAME}'...")
50
  try:
51
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
52
- model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME)
53
  model.to(DEVICE)
54
  print(f"模型 '{MODEL_NAME}' 和處理器載入成功!")
55
  return True
@@ -58,56 +63,47 @@ def load_model():
58
  raise RuntimeError(f"Failed to load model '{MODEL_NAME}': {e}")
59
 
60
  # -----------------------------------------------------------------------
61
- # 3.2. 日語 G2P 輔助函數 (這是此檔案最核心的新增部分)
62
  # -----------------------------------------------------------------------
63
- def japanese_g2p(text: str) -> list[tuple[str, str]]:
64
- """
65
- 將日語句子轉換為 (單詞, 對應音素) 的元組列表。
66
- 這是我們為日語定製的 G2P 核心。
67
- """
68
- # 1. 使用 MeCab 進行分詞
69
- words = mecab_tagger.parse(text).strip().split(' ')
70
-
71
- # 2. 對整個句子使用 PyOpenJTalk 獲取完整的音素序列
72
- # 我們直接使用 pyopenjtalk.g2p,它輸出的就是以空格分隔的音素
73
- full_phonemes_str = pyopenjtalk.g2p(text)
74
-
75
- # 3. 進行音素清理,以匹配 ASR 模型的輸出
76
- # ASR 模型輸出的是清音,所以我們移除濁音、半濁音、長音等符號
77
- cleaned_phonemes = full_phonemes_str.replace('pau', ' ').replace(' ', '').replace('N', 'n').replace('cl', '')
78
 
79
- # 4. 將單詞和音素進行配對
80
- # 這是一個簡化的配對邏輯:我們假設音素的數量和假名的數量大致對應
81
- # 這在大多數情況下是有效的,因為日語是音節語言
82
- result = []
83
- phoneme_idx = 0
84
  for word in words:
85
- # 計算當前單詞大致對應多少個音素 (假名數量)
86
- num_mora = len(word)
87
 
88
- # 提取對應的音素片段
89
- word_phonemes = cleaned_phonemes[phoneme_idx : phoneme_idx + num_mora]
90
 
91
- # 檢查提取的音素是否為空,避免無效單詞的影響
92
- if word_phonemes:
93
- result.append((word, word_phonemes))
94
 
95
- phoneme_idx += num_mora
96
 
97
- return result
 
 
 
 
98
 
99
  # -----------------------------------------------------------------------
100
- # 3.3. 音素切分函數 (與其他版本邏輯相同,但更通用)
101
  # -----------------------------------------------------------------------
102
- def _tokenize_ipa(ipa_string: str) -> list:
103
  """
104
- 將音素字串切分為列表。對於日語,直接按字元切分即可。
 
105
  """
106
- # 日語 ASR 模型的輸出是單字元音素,所以直接轉換為列表
107
- return list(ipa_string)
108
 
109
  # -----------------------------------------------------------------------
110
- # 3.4. 核心分析函數 (主入口,已修改為日語邏輯)
111
  # -----------------------------------------------------------------------
112
  def analyze(audio_file_path: str, target_sentence: str) -> dict:
113
  """
@@ -116,48 +112,59 @@ def analyze(audio_file_path: str, target_sentence: str) -> dict:
116
  if not processor or not model:
117
  raise RuntimeError("模型尚未載入。請確保在呼叫 analyze 之前已成功執行 load_model()。")
118
 
119
- # 【關鍵修改】使用我們新的日語 G2P 函數
120
- g2p_result = japanese_g2p(target_sentence)
121
-
122
- # 從 G2P 結果中提取原始單詞列表和按單詞劃分的音素列表
123
- target_words_original = [item[0] for item in g2p_result]
124
- target_ipa_by_word = [_tokenize_ipa(item[1]) for item in g2p_result]
 
 
 
125
 
126
- # 載入並處理音訊 (與其他版本邏輯相同)
 
127
  try:
128
  speech, sample_rate = sf.read(audio_file_path)
129
- if sample_rate != 16000:
130
- speech = librosa.resample(y=speech, orig_sr=sample_rate, target_sr=16000)
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  except Exception as e:
132
  raise IOError(f"讀取或處理音訊時發生錯誤: {e}")
133
 
134
- # 進行 ASR 推論 (與其他版本邏輯相同)
135
- input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values
136
- input_values = input_values.to(DEVICE)
137
- with torch.no_grad():
138
- logits = model(input_values).logits
139
- predicted_ids = torch.argmax(logits, dim=-1)
140
- user_ipa_full = processor.decode(predicted_ids[0])
141
-
142
- # 進行對齊 (與其他版本邏輯相同)
143
  word_alignments = _get_phoneme_alignments_by_word(user_ipa_full, target_ipa_by_word)
144
 
145
- # 格式化輸出 (與其他版本邏輯相同)
 
146
  return _format_to_json_structure(word_alignments, target_sentence, target_words_original)
147
 
148
  # =======================================================================
149
  # 4. 對齊與格式化函數區 (Alignment & Formatting)
150
- # 【注意】這些函數是語言無關的,直接從英文版複製,無需修改
151
  # =======================================================================
152
 
153
  # -----------------------------------------------------------------------
154
- # 4.1. 對齊函數
155
  # -----------------------------------------------------------------------
156
  def _get_phoneme_alignments_by_word(user_phoneme_str, target_words_ipa_tokenized):
157
  """
158
- 執行音素對齊。此函數是語言無關的。
159
  """
160
- user_phonemes = _tokenize_ipa(user_phoneme_str)
161
 
162
  target_phonemes_flat = []
163
  word_boundaries_indices = []
@@ -167,6 +174,10 @@ def _get_phoneme_alignments_by_word(user_phoneme_str, target_words_ipa_tokenized
167
  current_idx += len(word_ipa_tokens)
168
  word_boundaries_indices.append(current_idx - 1)
169
 
 
 
 
 
170
  dp = np.zeros((len(user_phonemes) + 1, len(target_phonemes_flat) + 1))
171
  for i in range(1, len(user_phonemes) + 1): dp[i][0] = i
172
  for j in range(1, len(target_phonemes_flat) + 1): dp[0][j] = j
@@ -178,21 +189,29 @@ def _get_phoneme_alignments_by_word(user_phoneme_str, target_words_ipa_tokenized
178
  i, j = len(user_phonemes), len(target_phonemes_flat)
179
  user_path, target_path = [], []
180
  while i > 0 or j > 0:
181
- cost = float('inf') if i == 0 or j == 0 else (0 if user_phonemes[i-1] == target_phonemes_flat[j-1] else 1)
 
 
 
 
182
  if i > 0 and j > 0 and dp[i][j] == dp[i-1][j-1] + cost:
183
  user_path.insert(0, user_phonemes[i-1]); target_path.insert(0, target_phonemes_flat[j-1]); i -= 1; j -= 1
184
- elif i > 0 and dp[i][j] == dp[i-1][j] + 1:
185
  user_path.insert(0, user_phonemes[i-1]); target_path.insert(0, '-'); i -= 1
186
- else:
187
  user_path.insert(0, '-'); target_path.insert(0, target_phonemes_flat[j-1]); j -= 1
 
 
188
 
189
  alignments_by_word = []
190
  word_start_idx_in_path = 0
191
  target_phoneme_counter_in_path = 0
 
 
192
 
193
  for path_idx, p in enumerate(target_path):
194
  if p != '-':
195
- if target_phoneme_counter_in_path in word_boundaries_indices:
196
  target_alignment = target_path[word_start_idx_in_path : path_idx + 1]
197
  user_alignment = user_path[word_start_idx_in_path : path_idx + 1]
198
 
@@ -202,13 +221,14 @@ def _get_phoneme_alignments_by_word(user_phoneme_str, target_words_ipa_tokenized
202
  })
203
 
204
  word_start_idx_in_path = path_idx + 1
 
205
 
206
  target_phoneme_counter_in_path += 1
207
 
208
  return alignments_by_word
209
 
210
  # -----------------------------------------------------------------------
211
- # 4.2. 格式化函數
212
  # -----------------------------------------------------------------------
213
  def _format_to_json_structure(alignments, sentence, original_words) -> dict:
214
  """
@@ -226,7 +246,9 @@ def _format_to_json_structure(alignments, sentence, original_words) -> dict:
226
  word_is_correct = True
227
  phonemes_data = []
228
 
229
- for j in range(len(alignment['target'])):
 
 
230
  target_phoneme = alignment['target'][j]
231
  user_phoneme = alignment['user'][j]
232
  is_match = (user_phoneme == target_phoneme)
@@ -239,6 +261,7 @@ def _format_to_json_structure(alignments, sentence, original_words) -> dict:
239
 
240
  if not is_match:
241
  word_is_correct = False
 
242
  if not (user_phoneme == '-' and target_phoneme == '-'):
243
  total_errors += 1
244
 
@@ -253,16 +276,18 @@ def _format_to_json_structure(alignments, sentence, original_words) -> dict:
253
 
254
  total_phonemes += sum(1 for p in alignment['target'] if p != '-')
255
 
256
- total_words = len(original_words)
257
- if len(alignments) < total_words:
258
- for i in range(len(alignments), total_words):
259
- # 處理使用者未說出的單詞
260
- missed_word_ipa = _tokenize_ipa(japanese_g2p(original_words[i])[0][1]) # 重新獲取音素
 
261
  phonemes_data = []
262
- for p_ipa in missed_word_ipa:
263
- phonemes_data.append({"target": p_ipa, "user": "-", "isMatch": False})
264
- total_errors += 1
265
- total_phonemes += 1
 
266
 
267
  words_data.append({
268
  "word": original_words[i],
@@ -270,6 +295,7 @@ def _format_to_json_structure(alignments, sentence, original_words) -> dict:
270
  "phonemes": phonemes_data
271
  })
272
 
 
273
  overall_score = (correct_words_count / total_words) * 100 if total_words > 0 else 0
274
  phoneme_error_rate = (total_errors / total_phonemes) * 100 if total_phonemes > 0 else 0
275
 
 
1
  # =======================================================================
2
  # 1. 匯入區 (Imports)
3
+ # - 新增了 pyopenjtalk 和 MeCab
4
  # =======================================================================
5
  import torch
6
  import soundfile as sf
7
  import librosa
8
+ from transformers import Wav2Vec2Processor, HubertForCTC
9
  import os
10
  import pyopenjtalk
11
  import MeCab
 
16
  # =======================================================================
17
  # 2. 全域變數與配置區 (Global Variables & Config)
18
  # =======================================================================
19
+ # 自動檢測可用設備
20
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
21
  print(f"INFO: ASR_jp_jp.py is configured to use device: {DEVICE}")
22
 
23
+ # 設定為日語 ASR 模型
24
  MODEL_NAME = "prj-beatrice/japanese-hubert-base-phoneme-ctc-v3"
25
 
26
  processor = None
27
  model = None
28
 
29
+ # 初始化 MeCab 分詞器
30
+ # -Owakati 選項能直接輸出以空格分隔的單詞,非常方便
31
+ try:
32
+ mecab_tagger = MeCab.Tagger("-Owakati")
33
+ except RuntimeError:
34
+ print("ERROR: MeCab Tagger 初始化失敗。請確保 mecab 和 mecab-ipadic-utf8 已正確安裝。")
35
+ mecab_tagger = None
36
 
37
  # =======================================================================
38
  # 3. 核心業務邏輯區 (Core Business Logic)
39
  # =======================================================================
40
 
41
  # -----------------------------------------------------------------------
42
+ # 3.1. 模型載入函數
43
+ # - 將 Wav2Vec2ForCTC 更換為 HubertForCTC
44
  # -----------------------------------------------------------------------
45
  def load_model():
46
  """
47
+ 載入日語 ASR 模型 (HubertForCTC) 和對應的處理器。
48
  """
49
  global processor, model
50
  if processor and model:
 
54
  print(f"正在準備 ASR 模型 '{MODEL_NAME}'...")
55
  try:
56
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
57
+ model = HubertForCTC.from_pretrained(MODEL_NAME) # <-- 使用 HubertForCTC
58
  model.to(DEVICE)
59
  print(f"模型 '{MODEL_NAME}' 和處理器載入成功!")
60
  return True
 
63
  raise RuntimeError(f"Failed to load model '{MODEL_NAME}': {e}")
64
 
65
  # -----------------------------------------------------------------------
66
+ # 3.2. 日語 G2P 輔助函數 (此檔案最核心的修改)
67
  # -----------------------------------------------------------------------
68
+ def _get_target_phonemes_by_word(text: str) -> tuple[list[str], list[list[str]]]:
69
+ if not mecab_tagger:
70
+ raise RuntimeError("MeCab Tagger 未初始化,無法處理日語文本。")
71
+
72
+ words = mecab_tagger.parse(text).strip().split()
 
 
 
 
 
 
 
 
 
 
73
 
74
+ target_words_original = []
75
+ target_ipa_by_word = []
76
+
 
 
77
  for word in words:
78
+ if not word:
79
+ continue
80
 
81
+ phonemes_str = pyopenjtalk.g2p(word, kana=False)
 
82
 
83
+ # 【最終修正】完全不清理任何音素,直接使用原始輸出
84
+ # 只���基本的空格標準化
85
+ cleaned_phonemes = re.sub(r'\s+', ' ', phonemes_str).strip()
86
 
87
+ phoneme_list = cleaned_phonemes.split()
88
 
89
+ if word and phoneme_list:
90
+ target_words_original.append(word)
91
+ target_ipa_by_word.append(phoneme_list)
92
+
93
+ return target_words_original, target_ipa_by_word
94
 
95
  # -----------------------------------------------------------------------
96
+ # 3.3. 音素切分函數 (用於處理 ASR 的輸出)
97
  # -----------------------------------------------------------------------
98
+ def _tokenize_asr_output(phoneme_string: str) -> list:
99
  """
100
+ 將 ASR 模型輸出的音素字串切分為列表。
101
+ 此模型的輸出是單字元音素,以空格分隔。
102
  """
103
+ return phoneme_string.split()
 
104
 
105
  # -----------------------------------------------------------------------
106
+ # 3.4. 核心分析函數 (主入口)
107
  # -----------------------------------------------------------------------
108
  def analyze(audio_file_path: str, target_sentence: str) -> dict:
109
  """
 
112
  if not processor or not model:
113
  raise RuntimeError("模型尚未載入。請確保在呼叫 analyze 之前已成功執行 load_model()。")
114
 
115
+ # 【關鍵步驟 1: G2P
116
+ # 使用新的 G2P 函數獲取目標單詞和音素
117
+ target_words_original, target_ipa_by_word = _get_target_phonemes_by_word(target_sentence)
118
+
119
+ # 處理音訊檔案為空或句子為空的邊界情況
120
+ if not target_words_original:
121
+ print("警告: G2P 處理後目標句子為空。")
122
+ # 建立一個空的骨架結構返回
123
+ return _format_to_json_structure([], target_sentence, [])
124
 
125
+ # 【關鍵步驟 2: ASR】
126
+ # 載入並處理音訊
127
  try:
128
  speech, sample_rate = sf.read(audio_file_path)
129
+ if len(speech) == 0:
130
+ print("警告: 音訊檔案為空。")
131
+ user_ipa_full = ""
132
+ else:
133
+ if sample_rate != 16000:
134
+ speech = librosa.resample(y=speech, orig_sr=sample_rate, target_sr=16000)
135
+
136
+ # 進行 ASR 推論
137
+ input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values
138
+ input_values = input_values.to(DEVICE)
139
+ with torch.no_grad():
140
+ logits = model(input_values).logits
141
+ predicted_ids = torch.argmax(logits, dim=-1)
142
+ user_ipa_full = processor.decode(predicted_ids[0])
143
+
144
  except Exception as e:
145
  raise IOError(f"讀取或處理音訊時發生錯誤: {e}")
146
 
147
+ # 【關鍵步驟 3: 對齊】
148
+ # 執行音素對齊
 
 
 
 
 
 
 
149
  word_alignments = _get_phoneme_alignments_by_word(user_ipa_full, target_ipa_by_word)
150
 
151
+ # 【關鍵步驟 4: 格式化】
152
+ # 格式化為最終的 JSON 輸出
153
  return _format_to_json_structure(word_alignments, target_sentence, target_words_original)
154
 
155
  # =======================================================================
156
  # 4. 對齊與格式化函數區 (Alignment & Formatting)
157
+ # 【注意】這些函數是語言無關的,直接從 en_us/fr_fr 版本複製而來。
158
  # =======================================================================
159
 
160
  # -----------------------------------------------------------------------
161
+ # 4.1. 對齊函數 (語言無關)
162
  # -----------------------------------------------------------------------
163
  def _get_phoneme_alignments_by_word(user_phoneme_str, target_words_ipa_tokenized):
164
  """
165
+ 使用動態規劃執行音素對齊。此函數是語言無關的。
166
  """
167
+ user_phonemes = _tokenize_asr_output(user_phoneme_str)
168
 
169
  target_phonemes_flat = []
170
  word_boundaries_indices = []
 
174
  current_idx += len(word_ipa_tokens)
175
  word_boundaries_indices.append(current_idx - 1)
176
 
177
+ # 如果目標音素為空 (例如,輸入句子只有標點符號),返回空對齊
178
+ if not target_phonemes_flat:
179
+ return []
180
+
181
  dp = np.zeros((len(user_phonemes) + 1, len(target_phonemes_flat) + 1))
182
  for i in range(1, len(user_phonemes) + 1): dp[i][0] = i
183
  for j in range(1, len(target_phonemes_flat) + 1): dp[0][j] = j
 
189
  i, j = len(user_phonemes), len(target_phonemes_flat)
190
  user_path, target_path = [], []
191
  while i > 0 or j > 0:
192
+ # 確保索引不會越界
193
+ cost = float('inf')
194
+ if i > 0 and j > 0:
195
+ cost = 0 if user_phonemes[i-1] == target_phonemes_flat[j-1] else 1
196
+
197
  if i > 0 and j > 0 and dp[i][j] == dp[i-1][j-1] + cost:
198
  user_path.insert(0, user_phonemes[i-1]); target_path.insert(0, target_phonemes_flat[j-1]); i -= 1; j -= 1
199
+ elif i > 0 and (j == 0 or dp[i][j] == dp[i-1][j] + 1):
200
  user_path.insert(0, user_phonemes[i-1]); target_path.insert(0, '-'); i -= 1
201
+ elif j > 0 and (i == 0 or dp[i][j] == dp[i][j-1] + 1):
202
  user_path.insert(0, '-'); target_path.insert(0, target_phonemes_flat[j-1]); j -= 1
203
+ else: # i == 0 and j == 0
204
+ break
205
 
206
  alignments_by_word = []
207
  word_start_idx_in_path = 0
208
  target_phoneme_counter_in_path = 0
209
+ word_boundary_iter = iter(word_boundaries_indices)
210
+ current_word_boundary = next(word_boundary_iter, -1)
211
 
212
  for path_idx, p in enumerate(target_path):
213
  if p != '-':
214
+ if target_phoneme_counter_in_path == current_word_boundary:
215
  target_alignment = target_path[word_start_idx_in_path : path_idx + 1]
216
  user_alignment = user_path[word_start_idx_in_path : path_idx + 1]
217
 
 
221
  })
222
 
223
  word_start_idx_in_path = path_idx + 1
224
+ current_word_boundary = next(word_boundary_iter, -1)
225
 
226
  target_phoneme_counter_in_path += 1
227
 
228
  return alignments_by_word
229
 
230
  # -----------------------------------------------------------------------
231
+ # 4.2. 格式化函數 (語言無關)
232
  # -----------------------------------------------------------------------
233
  def _format_to_json_structure(alignments, sentence, original_words) -> dict:
234
  """
 
246
  word_is_correct = True
247
  phonemes_data = []
248
 
249
+ # 確保 alignment['target'] 和 alignment['user'] 長度相同
250
+ min_len = min(len(alignment['target']), len(alignment['user']))
251
+ for j in range(min_len):
252
  target_phoneme = alignment['target'][j]
253
  user_phoneme = alignment['user'][j]
254
  is_match = (user_phoneme == target_phoneme)
 
261
 
262
  if not is_match:
263
  word_is_correct = False
264
+ # 只有在 target 和 user 不都為 '-' 時才算作錯誤
265
  if not (user_phoneme == '-' and target_phoneme == '-'):
266
  total_errors += 1
267
 
 
276
 
277
  total_phonemes += sum(1 for p in alignment['target'] if p != '-')
278
 
279
+ # 【Fuse Logic】處理 ASR 結果比目標單詞少的情況 (使用者漏講了單詞)
280
+ if len(alignments) < len(original_words):
281
+ for i in range(len(alignments), len(original_words)):
282
+ # 重新獲取漏掉單詞的音素
283
+ _, missed_word_ipa_list = _get_target_phonemes_by_word(original_words[i])
284
+
285
  phonemes_data = []
286
+ if missed_word_ipa_list: # 確保列表不是空的
287
+ for p_ipa in missed_word_ipa_list[0]:
288
+ phonemes_data.append({"target": p_ipa, "user": "-", "isMatch": False})
289
+ total_errors += 1
290
+ total_phonemes += 1
291
 
292
  words_data.append({
293
  "word": original_words[i],
 
295
  "phonemes": phonemes_data
296
  })
297
 
298
+ total_words = len(original_words)
299
  overall_score = (correct_words_count / total_words) * 100 if total_words > 0 else 0
300
  phoneme_error_rate = (total_errors / total_phonemes) * 100 if total_phonemes > 0 else 0
301