Spaces:
Sleeping
Sleeping
Update core.py
Browse files
core.py
CHANGED
|
@@ -3,6 +3,7 @@ import google.generativeai as genai
|
|
| 3 |
import pysrt
|
| 4 |
import re
|
| 5 |
import os
|
|
|
|
| 6 |
|
| 7 |
# 解析 SRT 檔案
|
| 8 |
def parse_srt(srt_path):
|
|
@@ -64,7 +65,7 @@ def validate_srt(original_srt, modified_srt):
|
|
| 64 |
|
| 65 |
return True, "驗證通過"
|
| 66 |
|
| 67 |
-
def process_files(api_key, test_transcript_file, test_srt_file, batch_size):
|
| 68 |
# 1. 配置 Gemini API
|
| 69 |
try:
|
| 70 |
genai.configure(api_key=api_key)
|
|
@@ -95,10 +96,10 @@ def process_files(api_key, test_transcript_file, test_srt_file, batch_size):
|
|
| 95 |
all_reports = []
|
| 96 |
keys = list(srt_data.keys()) # 取得編號
|
| 97 |
|
| 98 |
-
# 使用重疊
|
| 99 |
-
overlap =
|
| 100 |
|
| 101 |
-
# 處理每個批次
|
| 102 |
for i in range(0, len(keys), batch_size - overlap):
|
| 103 |
end_idx = min(i + batch_size, len(keys))
|
| 104 |
batch_keys = keys[i:end_idx]
|
|
@@ -146,11 +147,36 @@ def process_files(api_key, test_transcript_file, test_srt_file, batch_size):
|
|
| 146 |
)
|
| 147 |
|
| 148 |
try:
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
# 使用 re.split 分割字幕和報告
|
| 155 |
parts = re.split(r'<<<分隔符號>>>', corrected_subtitle, maxsplit=1) # 只分割一次
|
| 156 |
|
|
|
|
| 3 |
import pysrt
|
| 4 |
import re
|
| 5 |
import os
|
| 6 |
+
import time
|
| 7 |
|
| 8 |
# 解析 SRT 檔案
|
| 9 |
def parse_srt(srt_path):
|
|
|
|
| 65 |
|
| 66 |
return True, "驗證通過"
|
| 67 |
|
| 68 |
+
def process_files(api_key, test_transcript_file, test_srt_file, batch_size, delay_seconds=2):
|
| 69 |
# 1. 配置 Gemini API
|
| 70 |
try:
|
| 71 |
genai.configure(api_key=api_key)
|
|
|
|
| 96 |
all_reports = []
|
| 97 |
keys = list(srt_data.keys()) # 取得編號
|
| 98 |
|
| 99 |
+
# 使用固定數量的重疊而非百分比
|
| 100 |
+
overlap = 2 # 固定重疊2條字幕
|
| 101 |
|
| 102 |
+
# 處理每個批次,使用固定重疊數量
|
| 103 |
for i in range(0, len(keys), batch_size - overlap):
|
| 104 |
end_idx = min(i + batch_size, len(keys))
|
| 105 |
batch_keys = keys[i:end_idx]
|
|
|
|
| 147 |
)
|
| 148 |
|
| 149 |
try:
|
| 150 |
+
# 添加重試機制與間隔時間
|
| 151 |
+
max_retries = 3
|
| 152 |
+
retry_count = 0
|
| 153 |
+
retry_delay = 5 # 初始等待秒數
|
| 154 |
+
|
| 155 |
+
while retry_count < max_retries:
|
| 156 |
+
try:
|
| 157 |
+
# 添加間隔時間以避免觸發限流
|
| 158 |
+
if i > 0:
|
| 159 |
+
print(f"等待 {retry_delay} 秒以避免達到API限制...")
|
| 160 |
+
time.sleep(retry_delay)
|
| 161 |
+
|
| 162 |
+
response = model.generate_content(prompt)
|
| 163 |
+
corrected_subtitle = response.text
|
| 164 |
+
print(f"第 {i // (batch_size - overlap) + 1} 批次 Gemini 模型的回應:")
|
| 165 |
+
print(corrected_subtitle)
|
| 166 |
+
break # 成功獲取回應,跳出重試循環
|
| 167 |
+
|
| 168 |
+
except Exception as retry_error:
|
| 169 |
+
retry_count += 1
|
| 170 |
+
if "429" in str(retry_error):
|
| 171 |
+
print(f"遇到配額限制 (429),重試 {retry_count}/{max_retries}...")
|
| 172 |
+
retry_delay *= 2 # 指數退避策略
|
| 173 |
+
else:
|
| 174 |
+
# 其他錯誤,直接拋出
|
| 175 |
+
raise retry_error
|
| 176 |
+
|
| 177 |
+
if retry_count >= max_retries:
|
| 178 |
+
raise retry_error
|
| 179 |
+
|
| 180 |
# 使用 re.split 分割字幕和報告
|
| 181 |
parts = re.split(r'<<<分隔符號>>>', corrected_subtitle, maxsplit=1) # 只分割一次
|
| 182 |
|