hlyangster commited on
Commit
db6a296
·
verified ·
1 Parent(s): 6dcee9d

Update core.py

Browse files
Files changed (1) hide show
  1. core.py +35 -9
core.py CHANGED
@@ -3,6 +3,7 @@ import google.generativeai as genai
3
  import pysrt
4
  import re
5
  import os
 
6
 
7
  # 解析 SRT 檔案
8
  def parse_srt(srt_path):
@@ -64,7 +65,7 @@ def validate_srt(original_srt, modified_srt):
64
 
65
  return True, "驗證通過"
66
 
67
- def process_files(api_key, test_transcript_file, test_srt_file, batch_size):
68
  # 1. 配置 Gemini API
69
  try:
70
  genai.configure(api_key=api_key)
@@ -95,10 +96,10 @@ def process_files(api_key, test_transcript_file, test_srt_file, batch_size):
95
  all_reports = []
96
  keys = list(srt_data.keys()) # 取得編號
97
 
98
- # 使用重疊批次而非固定批次
99
- overlap = max(2, batch_size // 4) # 25% 的重疊
100
 
101
- # 處理每個批次
102
  for i in range(0, len(keys), batch_size - overlap):
103
  end_idx = min(i + batch_size, len(keys))
104
  batch_keys = keys[i:end_idx]
@@ -146,11 +147,36 @@ def process_files(api_key, test_transcript_file, test_srt_file, batch_size):
146
  )
147
 
148
  try:
149
- response = model.generate_content(prompt)
150
- corrected_subtitle = response.text
151
- print(f"第 {i // (batch_size - overlap) + 1} 批次 Gemini 模型的回應:")
152
- print(corrected_subtitle)
153
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  # 使用 re.split 分割字幕和報告
155
  parts = re.split(r'<<<分隔符號>>>', corrected_subtitle, maxsplit=1) # 只分割一次
156
 
 
3
  import pysrt
4
  import re
5
  import os
6
+ import time
7
 
8
  # 解析 SRT 檔案
9
  def parse_srt(srt_path):
 
65
 
66
  return True, "驗證通過"
67
 
68
+ def process_files(api_key, test_transcript_file, test_srt_file, batch_size, delay_seconds=2):
69
  # 1. 配置 Gemini API
70
  try:
71
  genai.configure(api_key=api_key)
 
96
  all_reports = []
97
  keys = list(srt_data.keys()) # 取得編號
98
 
99
+ # 使用固定數量的重疊而非百分比
100
+ overlap = 2 # 固定重疊2條字幕
101
 
102
+ # 處理每個批次,使用固定重疊數量
103
  for i in range(0, len(keys), batch_size - overlap):
104
  end_idx = min(i + batch_size, len(keys))
105
  batch_keys = keys[i:end_idx]
 
147
  )
148
 
149
  try:
150
+ # 添加重試機制與間隔時間
151
+ max_retries = 3
152
+ retry_count = 0
153
+ retry_delay = 5 # 初始等待秒數
154
+
155
+ while retry_count < max_retries:
156
+ try:
157
+ # 添加間隔時間以避免觸發限流
158
+ if i > 0:
159
+ print(f"等待 {retry_delay} 秒以避免達到API限制...")
160
+ time.sleep(retry_delay)
161
+
162
+ response = model.generate_content(prompt)
163
+ corrected_subtitle = response.text
164
+ print(f"第 {i // (batch_size - overlap) + 1} 批次 Gemini 模型的回應:")
165
+ print(corrected_subtitle)
166
+ break # 成功獲取回應,跳出重試循環
167
+
168
+ except Exception as retry_error:
169
+ retry_count += 1
170
+ if "429" in str(retry_error):
171
+ print(f"遇到配額限制 (429),重試 {retry_count}/{max_retries}...")
172
+ retry_delay *= 2 # 指數退避策略
173
+ else:
174
+ # 其他錯誤,直接拋出
175
+ raise retry_error
176
+
177
+ if retry_count >= max_retries:
178
+ raise retry_error
179
+
180
  # 使用 re.split 分割字幕和報告
181
  parts = re.split(r'<<<分隔符號>>>', corrected_subtitle, maxsplit=1) # 只分割一次
182