|
|
import {WebSocketServer} from "https://deno.land/x/websocket@v0.1.4/mod.ts"; |
|
|
import {LRU} from "https://deno.land/x/lru@1.0.2/mod.ts"; |
|
|
import {franc} from 'https://esm.sh/franc-min@6.1.0'; |
|
|
|
|
|
|
|
|
const DEEPLX_TOKEN = Deno.env.get("DEEPLX_TOKEN"); |
|
|
const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY"); |
|
|
const OPENAI_API_URL = Deno.env.get("OPENAI_API_URL"); |
|
|
const OPENAI_MODEL = Deno.env.get("OPENAI_MODEL"); |
|
|
|
|
|
if (!DEEPLX_TOKEN) { |
|
|
throw new Error("缺少DEEPLX_TOKEN环境变量"); |
|
|
} |
|
|
if (!OPENAI_API_KEY) { |
|
|
throw new Error("缺少OPENAI_API_KEY环境变量"); |
|
|
} |
|
|
if (!OPENAI_API_URL) { |
|
|
throw new Error("缺少OPENAI_API_URL环境变量"); |
|
|
} |
|
|
if (!OPENAI_MODEL) { |
|
|
throw new Error("缺少OPENAI_MODEL环境变量"); |
|
|
} |
|
|
|
|
|
|
|
|
const config = { |
|
|
|
|
|
DEEPLX_API_URL: "https://api.deeplx.org/" + DEEPLX_TOKEN + "/translate", |
|
|
|
|
|
OPENAI_API_URL: OPENAI_API_URL + "/v1/chat/completions", |
|
|
OPENAI_MODEL: OPENAI_MODEL, |
|
|
BATCH_SIZE: 15, |
|
|
SUBTITLE_SEPARATOR: "\n", |
|
|
SUBTITLE_MARKER: "‖", |
|
|
OPTIMAL_TEXT_LENGTH: 1000, |
|
|
DELAY_BETWEEN_REQUESTS: 1000, |
|
|
INITIAL_BATCH_SIZE: 20, |
|
|
SEND_REPORTS: false, |
|
|
ALERT_THRESHOLD: 1000, |
|
|
NTFY_TOPIC: "aston", |
|
|
NTFY_URL: "https://ntfy.sh/aston", |
|
|
LANGUAGE_DETECTION_SAMPLE_SIZE: 10, |
|
|
LANGUAGE_DETECTION_THRESHOLD: 7, |
|
|
USE_ADAPTIVE_RATE_LIMITER: true, |
|
|
|
|
|
|
|
|
MAXWORKERS: 3, |
|
|
CHATGPT_REQUEST_INTERVAL: 300, |
|
|
CHATGPT_MAX_LINES_PER_REQUEST: 25, |
|
|
CHATGPT_MAX_CHARS_PER_REQUEST: 1500, |
|
|
CHATGPT_SAFE_SEPARATOR: "‖", |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
const languageCodeMapping: { |
|
|
[key: string]: string |
|
|
} = { |
|
|
'cmn': 'ZH', |
|
|
'zho': 'ZH', |
|
|
'yue': 'ZH-TW', |
|
|
'eng': 'EN', |
|
|
'jpn': 'JA', |
|
|
'kor': 'KO', |
|
|
'fra': 'FR', |
|
|
'deu': 'DE', |
|
|
'spa': 'ES', |
|
|
'rus': 'RU', |
|
|
'por': 'PT', |
|
|
'ita': 'IT', |
|
|
'nld': 'NL', |
|
|
'pol': 'PL', |
|
|
'bul': 'BG', |
|
|
'ces': 'CS', |
|
|
'dan': 'DA', |
|
|
'ell': 'EL', |
|
|
'est': 'ET', |
|
|
'fin': 'FI', |
|
|
'hun': 'HU', |
|
|
'ind': 'ID', |
|
|
'lit': 'LT', |
|
|
'lav': 'LV', |
|
|
'nob': 'NB', |
|
|
'nno': 'NB', |
|
|
'ron': 'RO', |
|
|
'slk': 'SK', |
|
|
'slv': 'SL', |
|
|
'swe': 'SV', |
|
|
'tur': 'TR', |
|
|
'ukr': 'UK', |
|
|
}; |
|
|
|
|
|
|
|
|
const translationCache = new LRU<string, |
|
|
string>(1000); |
|
|
|
|
|
|
|
|
interface SubtitleEntry { |
|
|
id: string; |
|
|
startTime: number; |
|
|
endTime: number; |
|
|
text: string; |
|
|
translatedText?: string; |
|
|
originalSubtitles?: SubtitleEntry[]; |
|
|
} |
|
|
|
|
|
class AdaptiveRateLimiter { |
|
|
private queue: Array<{ |
|
|
fn: () => Promise<any>, |
|
|
resolve: (value: any) => void, |
|
|
reject: (reason?: any) => void |
|
|
}> = []; |
|
|
private running = 0; |
|
|
private maxConcurrent = 8; |
|
|
private minInterval = 1000; |
|
|
private lastRunTime = 0; |
|
|
|
|
|
async schedule<T>(fn: () => Promise<T>): Promise<T> { |
|
|
return new Promise((resolve, reject) => { |
|
|
this.queue.push({ |
|
|
fn, |
|
|
resolve, |
|
|
reject |
|
|
}); |
|
|
this.runNext(); |
|
|
}); |
|
|
} |
|
|
|
|
|
private async runNext() { |
|
|
if (this.running >= this.maxConcurrent || this.queue.length === 0) return; |
|
|
|
|
|
const now = Date.now(); |
|
|
if (now - this.lastRunTime < this.minInterval) { |
|
|
setTimeout(() => this.runNext(), this.minInterval - (now - this.lastRunTime)); |
|
|
return; |
|
|
} |
|
|
|
|
|
this.running++; |
|
|
this.lastRunTime = now; |
|
|
const next = this.queue.shift(); |
|
|
if (next) { |
|
|
try { |
|
|
console.log(`[AdaptiveRateLimiter] 开始执行请求`); |
|
|
const result = await next.fn(); |
|
|
console.log(`[AdaptiveRateLimiter] 完成请求`); |
|
|
next.resolve(result); |
|
|
} catch (err) { |
|
|
if (err.status === 429) { |
|
|
this.adjustLimits(); |
|
|
} |
|
|
next.reject(err); |
|
|
} finally { |
|
|
this.running--; |
|
|
this.runNext(); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
private adjustLimits() { |
|
|
this.maxConcurrent = Math.max(1, this.maxConcurrent - 1); |
|
|
this.minInterval += 500; |
|
|
console.log(`[AdaptiveRateLimiter] Adjusted limits: maxConcurrent=${this.maxConcurrent}, minInterval=${this.minInterval}ms`); |
|
|
} |
|
|
} |
|
|
|
|
|
const rateLimiter = new AdaptiveRateLimiter(); |
|
|
|
|
|
|
|
|
class PerformanceMonitor { |
|
|
private samples: Map<string, number[]> = new Map(); |
|
|
private readonly sampleSize = 10; |
|
|
private dailyData: number[] = []; |
|
|
|
|
|
private updateMetric(metric: string, value: number) { |
|
|
if (!this.samples.has(metric)) { |
|
|
this.samples.set(metric, []); |
|
|
} |
|
|
const samples = this.samples.get(metric)!; |
|
|
samples.push(value); |
|
|
if (samples.length > this.sampleSize) { |
|
|
samples.shift(); |
|
|
} |
|
|
} |
|
|
|
|
|
private getAverageMetric(metric: string): number { |
|
|
const samples = this.samples.get(metric) || []; |
|
|
if (samples.length === 0) return 0; |
|
|
return samples.reduce((a, b) => a + b, 0) / samples.length; |
|
|
} |
|
|
|
|
|
updateApiResponseTime(responseTime: number) { |
|
|
this.updateMetric('apiResponseTime', responseTime); |
|
|
this.dailyData.push(responseTime); |
|
|
} |
|
|
|
|
|
getAverageApiResponseTime(): number { |
|
|
return this.getAverageMetric('apiResponseTime'); |
|
|
} |
|
|
|
|
|
getDailyData(): number[] { |
|
|
return [...this.dailyData]; |
|
|
} |
|
|
|
|
|
clearDailyData() { |
|
|
this.dailyData = []; |
|
|
} |
|
|
|
|
|
logPerformanceMetrics() { |
|
|
console.log(`[Performance] Average API Response Time: ${this.getAverageApiResponseTime().toFixed(2)}ms`); |
|
|
} |
|
|
} |
|
|
|
|
|
const performanceMonitor = new PerformanceMonitor(); |
|
|
|
|
|
|
|
|
class PerformanceAnalyzer { |
|
|
static async analyzeDailyPerformance(performanceData: number[]) { |
|
|
const avgResponseTime = performanceData.reduce((a, b) => a + b, 0) / performanceData.length; |
|
|
const maxResponseTime = Math.max(...performanceData); |
|
|
|
|
|
console.log(`每日性能报告:`); |
|
|
console.log(`平均响应时间:${avgResponseTime.toFixed(2)}ms`); |
|
|
console.log(`最大响应时间:${maxResponseTime}ms`); |
|
|
|
|
|
if (config.SEND_REPORTS) { |
|
|
await this.sendAlert(`每日性能报告:平均响应时间:${avgResponseTime.toFixed(2)}ms,最大响应时间:${maxResponseTime}ms`); |
|
|
} |
|
|
|
|
|
if (avgResponseTime > config.ALERT_THRESHOLD) { |
|
|
await this.sendAlert(`警告:平均响应时间 (${avgResponseTime.toFixed(2)}ms) 超过阈值`); |
|
|
} |
|
|
} |
|
|
|
|
|
private static async sendAlert(message: string) { |
|
|
const title = "字幕翻译服务性能报告"; |
|
|
|
|
|
try { |
|
|
const response = await fetch(config.NTFY_URL, { |
|
|
method: 'POST', |
|
|
headers: { |
|
|
'Content-Type': 'application/json', |
|
|
}, |
|
|
body: JSON.stringify({ |
|
|
topic: config.NTFY_TOPIC, |
|
|
title: title, |
|
|
message: message, |
|
|
priority: 3, |
|
|
}), |
|
|
}); |
|
|
|
|
|
if (response.ok) { |
|
|
console.log("警报发送成功"); |
|
|
} else { |
|
|
console.error("发送警报失败:", response.statusText); |
|
|
} |
|
|
} catch (error) { |
|
|
console.error("发送警报时出错:", error); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
type TranslationEngine = 'deeplx' | 'chatgpt'; |
|
|
|
|
|
async function translateText( |
|
|
text: string, |
|
|
sourceLanguage: string, |
|
|
targetLanguage: string, |
|
|
subtitleId: string, |
|
|
signal: AbortSignal, |
|
|
engine: TranslationEngine = 'deeplx' |
|
|
): Promise<string> { |
|
|
switch (engine) { |
|
|
case 'chatgpt': |
|
|
console.log(`[Dispatcher] 使用 ChatGPT 引擎翻译 (ID: ${subtitleId})`); |
|
|
return await translateWithChatGPT(text, sourceLanguage, targetLanguage, subtitleId, signal, 0); |
|
|
case 'deeplx': |
|
|
default: |
|
|
console.log(`[Dispatcher] 使用 DeepLX 引擎翻译 (ID: ${subtitleId})`); |
|
|
return await translateWithDeepLX(text, sourceLanguage, targetLanguage, subtitleId, signal); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ChatGPT_WorkerPool { |
|
|
private queue: Array<{ |
|
|
task: { text: string; sourceLanguage: string; targetLanguage: string; subtitleId: string; signal: AbortSignal }; |
|
|
resolve: (value: string) => void; |
|
|
reject: (reason?: any) => void; |
|
|
priority: number; |
|
|
}> = []; |
|
|
|
|
|
|
|
|
private maxWorkers: number; |
|
|
private minInterval: number; |
|
|
|
|
|
private runningWorkers = 0; |
|
|
private lastRunTime = 0; |
|
|
|
|
|
constructor() { |
|
|
|
|
|
if (config.OPENAI_MODEL.includes('glm-')) { |
|
|
config.MAXWORKERS = 2; |
|
|
} |
|
|
this.maxWorkers = config.MAXWORKERS || 3; |
|
|
this.minInterval = config.CHATGPT_REQUEST_INTERVAL || 300; |
|
|
console.log(`[WorkerPool] 初始化完成,最大并发数: ${this.maxWorkers}, 最小请求间隔: ${this.minInterval}ms`); |
|
|
} |
|
|
|
|
|
public translate(task: { text: string; sourceLanguage: string; targetLanguage: string; subtitleId: string; signal: AbortSignal }, priority: number = 0): Promise<string> { |
|
|
return new Promise((resolve, reject) => { |
|
|
|
|
|
const queueItem = { task, resolve, reject, priority }; |
|
|
this.insertByPriority(queueItem); |
|
|
this.tryStartWorker(); |
|
|
}); |
|
|
} |
|
|
|
|
|
private insertByPriority(item: any) { |
|
|
let insertIndex = this.queue.length; |
|
|
for (let i = 0; i < this.queue.length; i++) { |
|
|
if (this.queue[i].priority < item.priority) { |
|
|
insertIndex = i; |
|
|
break; |
|
|
} |
|
|
} |
|
|
this.queue.splice(insertIndex, 0, item); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private tryStartWorker() { |
|
|
|
|
|
if (this.runningWorkers >= this.maxWorkers || this.queue.length === 0) { |
|
|
return; |
|
|
} |
|
|
|
|
|
const now = Date.now(); |
|
|
const elapsed = now - this.lastRunTime; |
|
|
|
|
|
|
|
|
if (elapsed < this.minInterval) { |
|
|
|
|
|
setTimeout(() => this.tryStartWorker(), this.minInterval - elapsed); |
|
|
return; |
|
|
} |
|
|
|
|
|
this.runningWorkers++; |
|
|
this.lastRunTime = now; |
|
|
const { task, resolve, reject } = this.queue.shift()!; |
|
|
console.log(`[WorkerPool] 🚀 翻译启动, ID: ${task.subtitleId}. (正在运行: ${this.runningWorkers}, 队列剩余: ${this.queue.length})`); |
|
|
|
|
|
|
|
|
this.singleTranslationTask(task) |
|
|
.then(resolve) |
|
|
.catch(reject) |
|
|
.finally(() => { |
|
|
this.runningWorkers--; |
|
|
|
|
|
console.log(`[WorkerPool] ✅ 翻译完成, ID: ${task.subtitleId}. (正在运行: ${this.runningWorkers})`); |
|
|
this.tryStartWorker(); |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
private async singleTranslationTask( |
|
|
task: { text: string; sourceLanguage: string; targetLanguage: string; subtitleId: string; signal: AbortSignal }, |
|
|
retryCount = 0 |
|
|
): Promise<string> { |
|
|
const MAX_RETRIES = 5; |
|
|
const REQUEST_TIMEOUT = 30000; |
|
|
if (task.signal.aborted) throw new Error("Aborted by user signal"); |
|
|
|
|
|
|
|
|
try { |
|
|
const textHash = await sha256(task.text.trim()); |
|
|
const cacheKey = `chatgpt-worker-${task.sourceLanguage}-${task.targetLanguage}-${textHash}`; |
|
|
|
|
|
const cachedResult = translationCache.get(cacheKey); |
|
|
if (cachedResult) { |
|
|
console.log(`[WorkerPool-Cache] HIT for task ${task.subtitleId}`); |
|
|
return cachedResult; |
|
|
} |
|
|
console.log(`[WorkerPool-Cache] MISS for task ${task.subtitleId}`); |
|
|
} catch (cacheError) { |
|
|
console.warn(`[WorkerPool-Cache] Cache check failed: ${cacheError.message}`); |
|
|
} |
|
|
|
|
|
|
|
|
let systemPrompt = ''; |
|
|
if (config.OPENAI_MODEL.includes('glm-')) { |
|
|
systemPrompt = `You are a text translation API. Your task is to translate the user's text from ${task.sourceLanguage} to ${task.targetLanguage}. |
|
|
RULES: |
|
|
1. Translate the text inside the square brackets for each numbered item. |
|
|
2. Your response MUST be a numbered list with the exact same structure and the exact same number of items. |
|
|
3. Preserve the separator "${config.CHATGPT_SAFE_SEPARATOR}" between each translated item. |
|
|
4. Your output must ONLY be the translated numbered list. |
|
|
`; |
|
|
} else { |
|
|
systemPrompt = `You are a text translation API. Your task is to translate the user's text from ${task.sourceLanguage} to ${task.targetLanguage}. |
|
|
|
|
|
CRITICAL RULES: |
|
|
1. Each item is strictly defined as "number. [content]". Treat every item as atomic and indivisible. |
|
|
2. Translate ONLY the text inside [ ], keep numbers, brackets, punctuation, and the separator "${config.CHATGPT_SAFE_SEPARATOR}" EXACTLY unchanged. |
|
|
3. Even if an item is a sentence fragment or looks incomplete, you MUST translate it literally and output it as a separate item. NEVER merge or combine with other items. |
|
|
4. Input line N → Output line N. The number of items in the output MUST match the input EXACTLY. |
|
|
5. Before finalizing, COUNT your output. If input has 25 items, output must also have 25 items. If not, FIX it. |
|
|
|
|
|
WARNING: Merging, skipping, or changing item count will cause system failure. |
|
|
|
|
|
EXAMPLE: |
|
|
Input: |
|
|
6. [dooming us to]‖7. [spend our lives in search of the other half.] |
|
|
|
|
|
Output: |
|
|
6. [注定我们要]‖7. [用一生去寻找另一半。] |
|
|
`; |
|
|
} |
|
|
|
|
|
|
|
|
try { |
|
|
const controller = new AbortController(); |
|
|
const timeoutId = setTimeout(() => controller.abort(new Error("Request timed out after 30s")), REQUEST_TIMEOUT); |
|
|
const onAbort = () => { |
|
|
controller.abort(); |
|
|
task.signal.removeEventListener('abort', onAbort); |
|
|
}; |
|
|
task.signal.addEventListener('abort', onAbort, { once: true }); |
|
|
|
|
|
if (task.signal.aborted) throw new Error("Aborted before fetch"); |
|
|
|
|
|
const response = await fetch(config.OPENAI_API_URL, { |
|
|
method: "POST", |
|
|
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${OPENAI_API_KEY}` }, |
|
|
body: JSON.stringify({ |
|
|
model: config.OPENAI_MODEL, |
|
|
messages: [{ role: "system", content: systemPrompt }, { role: "user", content: task.text }], |
|
|
temperature: 0.2, |
|
|
stream: false, |
|
|
}), |
|
|
signal: controller.signal, |
|
|
}); |
|
|
|
|
|
clearTimeout(timeoutId); |
|
|
task.signal.removeEventListener('abort', onAbort); |
|
|
|
|
|
if (!response.ok) { |
|
|
if ([429, 500, 502, 503, 504].includes(response.status)) { |
|
|
throw new Error("RETRYABLE_PROXY_ERROR"); |
|
|
} |
|
|
const errorBody = await response.text(); |
|
|
throw new Error(`API Error: ${response.status}, Body: ${errorBody.substring(0, 100)}`); |
|
|
} |
|
|
|
|
|
const result = await response.json(); |
|
|
const translation = result.choices[0]?.message?.content?.trim(); |
|
|
|
|
|
|
|
|
if (translation && task.text.trim().length > 0) { |
|
|
try { |
|
|
const textHash = await sha256(task.text.trim()); |
|
|
const cacheKey = `chatgpt-worker-${task.sourceLanguage}-${task.targetLanguage}-${textHash}`; |
|
|
translationCache.set(cacheKey, translation); |
|
|
console.log(`[WorkerPool-Cache] STORED result for task ${task.subtitleId}`); |
|
|
} catch (cacheError) { |
|
|
console.warn(`[WorkerPool-Cache] Failed to cache result: ${cacheError.message}`); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
return translation; |
|
|
} catch (error) { |
|
|
if (task.signal.aborted) throw error; |
|
|
|
|
|
if ((error.message === "RETRYABLE_PROXY_ERROR" || error.message === "EMPTY_RESPONSE" || error.message.includes("timed out")|| |
|
|
(error instanceof TypeError && error.message.includes("fetch"))) && retryCount < MAX_RETRIES) { |
|
|
const delay = Math.pow(2, retryCount) * 1500 + Math.random() * 1000; |
|
|
console.warn(`[WorkerPool] ⚠️ 重试 ID: ${task.subtitleId} (第 ${retryCount + 1} 次), 原因: ${error.message}. 等待 ${delay.toFixed(0)}ms...`); |
|
|
await new Promise(r => setTimeout(r, delay)); |
|
|
return this.singleTranslationTask(task, retryCount + 1); |
|
|
} |
|
|
|
|
|
|
|
|
console.error(`[WorkerPool] ❌ 最终失败, ID: ${task.subtitleId}. 原因: ${error.message}`); |
|
|
throw error; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DeduplicatedWorkerPool extends ChatGPT_WorkerPool { |
|
|
private pendingRequests = new Map<string, Promise<string>>(); |
|
|
|
|
|
|
|
|
public async translate(task: { |
|
|
text: string; |
|
|
sourceLanguage: string; |
|
|
targetLanguage: string; |
|
|
subtitleId: string; |
|
|
signal: AbortSignal |
|
|
}, priority = 0): Promise<string> { |
|
|
const taskKey = `${task.sourceLanguage}-${task.targetLanguage}-${await sha256(task.text.trim())}`; |
|
|
|
|
|
if (this.pendingRequests.has(taskKey)) { |
|
|
console.log(`[Dedup] HIT: Reusing in-flight request for ID: ${task.subtitleId}`); |
|
|
return this.pendingRequests.get(taskKey)!; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const promise = super.translate(task, priority); |
|
|
|
|
|
this.pendingRequests.set(taskKey, promise); |
|
|
|
|
|
promise.finally(() => { |
|
|
|
|
|
this.pendingRequests.delete(taskKey); |
|
|
}); |
|
|
|
|
|
return promise; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const chatGptWorkerPool = new DeduplicatedWorkerPool(); |
|
|
|
|
|
|
|
|
async function sha256(str: string): Promise<string> { |
|
|
const buffer = new TextEncoder().encode(str); |
|
|
const hashBuffer = await crypto.subtle.digest('SHA-256', buffer); |
|
|
const hashArray = Array.from(new Uint8Array(hashBuffer)); |
|
|
|
|
|
const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join(''); |
|
|
return hashHex; |
|
|
} |
|
|
|
|
|
|
|
|
async function translateWithChatGPT( |
|
|
text: string, |
|
|
sourceLanguage: string, |
|
|
targetLanguage: string, |
|
|
subtitleId: string, |
|
|
signal: AbortSignal, |
|
|
priority = 0 |
|
|
): Promise<string> { |
|
|
|
|
|
const translatedText = await chatGptWorkerPool.translate({ |
|
|
text, |
|
|
sourceLanguage, |
|
|
targetLanguage, |
|
|
subtitleId, |
|
|
signal |
|
|
}, priority); |
|
|
|
|
|
return translatedText; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
async function batchTranslateWithChatGPT( |
|
|
subtitlesToTranslate: SubtitleEntry[], |
|
|
sourceLanguage: string, |
|
|
targetLanguage: string, |
|
|
signal: AbortSignal, |
|
|
onBatchComplete: (translatedBatch: SubtitleEntry[]) => void |
|
|
) { |
|
|
|
|
|
const batches: SubtitleEntry[][] = []; |
|
|
let currentBatch: SubtitleEntry[] = []; |
|
|
let currentCharCount = 0; |
|
|
if (config.OPENAI_MODEL.includes('glm-')) { |
|
|
config.CHATGPT_MAX_LINES_PER_REQUEST = 50; |
|
|
} |
|
|
for (const subtitle of subtitlesToTranslate) { |
|
|
if (currentBatch.length > 0 && ( |
|
|
currentBatch.length >= config.CHATGPT_MAX_LINES_PER_REQUEST || |
|
|
(config.CHATGPT_MAX_CHARS_PER_REQUEST && currentCharCount + subtitle.text.length > config.CHATGPT_MAX_CHARS_PER_REQUEST) |
|
|
)) { |
|
|
batches.push(currentBatch); |
|
|
currentBatch = []; |
|
|
currentCharCount = 0; |
|
|
} |
|
|
currentBatch.push(subtitle); |
|
|
currentCharCount += subtitle.text.length; |
|
|
} |
|
|
if (currentBatch.length > 0) batches.push(currentBatch); |
|
|
|
|
|
console.log(`[Director] 字幕被分成了 ${batches.length} 个初始批次。`); |
|
|
|
|
|
|
|
|
await Promise.all(batches.map(async (batch, i) => { |
|
|
if (signal.aborted) return; |
|
|
|
|
|
|
|
|
const translatedBatch = await processSingleBatchWithSmartFallback(batch, sourceLanguage, targetLanguage, signal); |
|
|
|
|
|
|
|
|
if (onBatchComplete) { |
|
|
onBatchComplete(translatedBatch); |
|
|
} |
|
|
})); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function processSingleBatchWithSmartFallback_( |
|
|
batch: SubtitleEntry[], |
|
|
sourceLanguage: string, |
|
|
targetLanguage: string, |
|
|
signal: AbortSignal, |
|
|
depth = 0 |
|
|
): Promise<SubtitleEntry[]> { |
|
|
const batchId = batch.length > 0 ? `${batch[0].id}...` : 'empty'; |
|
|
console.log(`[SmartFallback] 正在处理批次大小为 ${batch.length} 行,深度为 ${depth} (ID: ${batchId})`); |
|
|
|
|
|
|
|
|
if (depth > 5) { |
|
|
console.error(`[SmartFallback] Max recursion depth reached for ${batchId}. Switching to final line-by-line.`); |
|
|
return fallbackLineByLine(batch, sourceLanguage, targetLanguage, signal); |
|
|
} |
|
|
|
|
|
|
|
|
if (batch.length <= 3) { |
|
|
|
|
|
console.log(`[SmartFallback] Small batch (${batch.length} items), using line-by-line translation`); |
|
|
return fallbackLineByLine(batch, sourceLanguage, targetLanguage, signal); |
|
|
} |
|
|
|
|
|
|
|
|
if (batch.length === 0) { |
|
|
console.log(`[SmartFallback] Empty batch, returning empty array`); |
|
|
return []; |
|
|
} |
|
|
|
|
|
|
|
|
try { |
|
|
const mergedText = batch.map((s, index) => |
|
|
`${index + 1}. [${s.text.replace(/\n/g, " ")}]` |
|
|
).join(config.CHATGPT_SAFE_SEPARATOR); |
|
|
|
|
|
|
|
|
const translatedMergedText = await simpleTextTranslateWithChatGPT( |
|
|
mergedText, |
|
|
sourceLanguage, |
|
|
targetLanguage, |
|
|
batchId, |
|
|
signal, |
|
|
depth |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
if (translatedMergedText.trim() === "") { |
|
|
console.warn(`[SmartFallback-Triage] Batch ${batchId} returned empty content. Splitting batch in half...`); |
|
|
|
|
|
|
|
|
if (depth > 1) { |
|
|
const delayMs = 500 * depth; |
|
|
console.log(`[SmartFallback] Adding ${delayMs}ms delay before splitting (depth: ${depth})`); |
|
|
await new Promise(resolve => setTimeout(resolve, delayMs)); |
|
|
} |
|
|
|
|
|
|
|
|
const mid = Math.ceil(batch.length / 2); |
|
|
console.log(`[SmartFallback] Splitting batch ${batchId} into ${mid} + ${batch.length - mid} parts`); |
|
|
|
|
|
|
|
|
const firstHalf = await processSingleBatchWithSmartFallback( |
|
|
batch.slice(0, mid), |
|
|
sourceLanguage, |
|
|
targetLanguage, |
|
|
signal, |
|
|
depth + 1 |
|
|
); |
|
|
|
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 200)); |
|
|
|
|
|
const secondHalf = await processSingleBatchWithSmartFallback( |
|
|
batch.slice(mid), |
|
|
sourceLanguage, |
|
|
targetLanguage, |
|
|
signal, |
|
|
depth + 1 |
|
|
); |
|
|
|
|
|
return [...firstHalf, ...secondHalf]; |
|
|
} |
|
|
|
|
|
let translatedParts = translatedMergedText.split(config.CHATGPT_SAFE_SEPARATOR); |
|
|
const receivedCount = translatedParts.length; |
|
|
|
|
|
|
|
|
if (receivedCount !== batch.length) { |
|
|
const lineMatches = translatedMergedText.match(/\d+\.\s*\[?([^\]]*)\]?/g); |
|
|
if (lineMatches && lineMatches.length === batch.length) { |
|
|
translatedParts = lineMatches.map(match => |
|
|
match.replace(/^\d+\.\s*\[?/, '').replace(/\]?\s*$/, '').trim() |
|
|
); |
|
|
} else { |
|
|
console.error(`[SmartFallback-Triage] Mismatch for ${batchId} (received: ${receivedCount}, expected: ${batch.length}). Activating final fallback.`); |
|
|
console.error(`\n\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!`); |
|
|
console.error(`[CRIME SCENE] BATCH #${batchId} FAILED: Mismatch Detected!`); |
|
|
console.error(`!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!`); |
|
|
console.error(` - Batch ID: ${batchId}`); |
|
|
console.error(` - EXPECTED LINES: ${batch.length}`); |
|
|
console.error(` - RECEIVED PARTS: ${receivedCount}`); |
|
|
console.error("\n--- MERGED ORIGINAL TEXT (SENT TO API) ---"); |
|
|
console.error(mergedText); |
|
|
console.error("\n--- RECEIVED TRANSLATED TEXT (FROM API) ---"); |
|
|
console.error(translatedMergedText); |
|
|
console.error("\n--- SPLIT PARTS (FOR DEBUGGING) ---"); |
|
|
console.error(translatedParts); |
|
|
console.error(`!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!`); |
|
|
console.error(`[ACTION] Activating line-by-line fallback for this batch...`); |
|
|
console.error(`!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n\n`); |
|
|
|
|
|
return fallbackLineByLine(batch, sourceLanguage, targetLanguage, signal, 100); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const hasValidTranslations = translatedParts.every(part => { |
|
|
const cleaned = part.replace(/^\d+\.\s*\[?/, '').replace(/]?\s*$/, '').trim(); |
|
|
return cleaned.length > 0; |
|
|
}); |
|
|
|
|
|
if (!hasValidTranslations) { |
|
|
console.warn(`[SmartFallback-Triage] Poor quality translations detected for ${batchId}. Using line-by-line fallback.`); |
|
|
return fallbackLineByLine(batch, sourceLanguage, targetLanguage, signal, 100); |
|
|
} |
|
|
|
|
|
|
|
|
console.log(`[SmartFallback] ✅ 翻译成功 batch ${batchId} with merged translation.`); |
|
|
return batch.map((original, index) => ({ |
|
|
...original, |
|
|
translatedText: translatedParts[index] |
|
|
.replace(/^\d+\.\s*\[?/, '') |
|
|
.replace(/]?\s*$/, '') |
|
|
.trim(), |
|
|
})); |
|
|
|
|
|
} catch (error) { |
|
|
|
|
|
if (error.name === 'AbortError' || error.message.includes('Aborted')) { |
|
|
console.log(`[SmartFallback] 任务在 ${batchId} 处被中止。`); |
|
|
|
|
|
|
|
|
return batch.map(sub => ({ ...sub, translatedText: `[翻译中止]` })); |
|
|
} |
|
|
|
|
|
|
|
|
console.error(`[SmartFallback-Triage] Unrecoverable API error for ${batchId}: ${error.message}. Activating final fallback.`); |
|
|
|
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 1000)); |
|
|
|
|
|
return fallbackLineByLine(batch, sourceLanguage, targetLanguage, signal, 100); |
|
|
} |
|
|
} |
|
|
|
|
|
async function processSingleBatchWithSmartFallback( |
|
|
batch: SubtitleEntry[], |
|
|
sourceLanguage: string, |
|
|
targetLanguage: string, |
|
|
signal: AbortSignal, |
|
|
depth = 0, |
|
|
retryCount = 0 |
|
|
): Promise<SubtitleEntry[]> { |
|
|
|
|
|
if (!batch || !Array.isArray(batch)) { |
|
|
throw new Error("Invalid batch parameter"); |
|
|
} |
|
|
if (!sourceLanguage || !targetLanguage) { |
|
|
throw new Error("Language parameters are required"); |
|
|
} |
|
|
|
|
|
const batchId = batch.length > 0 ? `${batch[0].id} (size ${batch.length}, depth ${depth})` : 'empty'; |
|
|
console.log(`[SmartFallback] Processing batch: ${batchId}`); |
|
|
|
|
|
|
|
|
const maxDepth = Math.min(4, Math.floor(Math.log2(batch.length)) + 2); |
|
|
|
|
|
|
|
|
if (batch.length <= 1 || depth > maxDepth || retryCount > 3) { |
|
|
if (depth > maxDepth) console.error(`[SmartFallback] Max recursion depth reached for ${batchId}.`); |
|
|
if (retryCount > 3) console.error(`[SmartFallback] Max retries exceeded for ${batchId}.`); |
|
|
console.log(`[SmartFallback] Using line-by-line for ${batchId}.`); |
|
|
return fallbackLineByLine(batch, sourceLanguage, targetLanguage, signal); |
|
|
} |
|
|
|
|
|
|
|
|
try { |
|
|
const mergedText = batch.map((s, index) => |
|
|
`${index + 1}. [${s.text.replace(/\n/g, " ")}]` |
|
|
).join(config.CHATGPT_SAFE_SEPARATOR); |
|
|
|
|
|
const translatedMergedText = await simpleTextTranslateWithChatGPT( |
|
|
mergedText, sourceLanguage, targetLanguage, batchId, signal, depth |
|
|
); |
|
|
|
|
|
|
|
|
if (!translatedMergedText || translatedMergedText.trim() === "") { |
|
|
throw new Error("API returned empty content."); |
|
|
} |
|
|
|
|
|
const translatedParts = translatedMergedText.split(config.CHATGPT_SAFE_SEPARATOR); |
|
|
if (translatedParts.length !== batch.length) { |
|
|
console.warn(`[SmartFallback] Mismatch for ${batchId} (${translatedParts.length} vs ${batch.length}).`); |
|
|
throw new Error("Mismatch detected."); |
|
|
} |
|
|
|
|
|
const formatRegex = /^\s*\d+\.\s*\[.*]\s*$/; |
|
|
if (!translatedParts.every(p => formatRegex.test(p.trim()))) { |
|
|
console.warn(`[SmartFallback] Format error detected in ${batchId}.`); |
|
|
throw new Error("Format error detected."); |
|
|
} |
|
|
|
|
|
|
|
|
console.log(`[SmartFallback] ✅ SUCCESS for batch: ${batchId}`); |
|
|
return batch.map((original, index) => ({ |
|
|
...original, |
|
|
translatedText: translatedParts[index].replace(/^\d+\.\s*\[?/, '').replace(/]?\s*$/, '').trim(), |
|
|
})); |
|
|
|
|
|
} catch (error) { |
|
|
if (signal.aborted) return batch.map(s => ({...s, translatedText: "[翻译中止]"})); |
|
|
|
|
|
console.warn(`[SmartFallback-Fallback] ⚠️⚠️ Batch ${batchId} failed: ${error.message}. Splitting and retrying with controlled concurrency.`); |
|
|
|
|
|
|
|
|
const delayMs = Math.min(500 * (depth + 1) * (retryCount + 1), 5000); |
|
|
await new Promise(resolve => setTimeout(resolve, delayMs)); |
|
|
|
|
|
const mid = Math.ceil(batch.length / 2); |
|
|
|
|
|
|
|
|
const firstHalf = await processSingleBatchWithSmartFallback( |
|
|
batch.slice(0, mid), |
|
|
sourceLanguage, |
|
|
targetLanguage, |
|
|
signal, |
|
|
depth + 1, |
|
|
retryCount + 1 |
|
|
); |
|
|
|
|
|
|
|
|
if (signal.aborted) return [...firstHalf, ...batch.slice(mid).map(s => ({...s, translatedText: "[翻译中止]"}))]; |
|
|
|
|
|
|
|
|
const interBatchDelay = Math.min(1000 * (depth + 1), 3000); |
|
|
await new Promise(resolve => setTimeout(resolve, interBatchDelay)); |
|
|
|
|
|
const secondHalf = await processSingleBatchWithSmartFallback( |
|
|
batch.slice(mid), |
|
|
sourceLanguage, |
|
|
targetLanguage, |
|
|
signal, |
|
|
depth + 1, |
|
|
retryCount + 1 |
|
|
); |
|
|
|
|
|
return [...firstHalf, ...secondHalf]; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function simpleTextTranslateWithChatGPT( |
|
|
text: string, |
|
|
sourceLanguage: string, |
|
|
targetLanguage: string, |
|
|
subtitleId: string, |
|
|
signal: AbortSignal, |
|
|
priority = 0 |
|
|
): Promise<string> { |
|
|
return chatGptWorkerPool.translate({ text, sourceLanguage, targetLanguage, subtitleId, signal }, priority); |
|
|
} |
|
|
|
|
|
|
|
|
async function fallbackLineByLine_old( |
|
|
subtitles: SubtitleEntry[], |
|
|
sourceLanguage: string, |
|
|
targetLanguage: string, |
|
|
signal: AbortSignal, |
|
|
priority = 100 |
|
|
): Promise<SubtitleEntry[]> { |
|
|
|
|
|
console.log(`[Fallback-Serial] 启动串行按行翻译,处理 ${subtitles.length} 行...`); |
|
|
const translatedSubtitles: SubtitleEntry[] = []; |
|
|
|
|
|
|
|
|
for (const subtitle of subtitles) { |
|
|
|
|
|
if (signal.aborted) { |
|
|
console.log(`[Fallback-Serial] 翻译在 ${subtitle.id} 处被中止。`); |
|
|
|
|
|
translatedSubtitles.push({ ...subtitle, translatedText: `[翻译中止]` }); |
|
|
continue; |
|
|
} |
|
|
|
|
|
try { |
|
|
|
|
|
const translatedText = await simpleTextTranslateWithChatGPT( |
|
|
subtitle.text, |
|
|
sourceLanguage, |
|
|
targetLanguage, |
|
|
`${subtitle.id}-fallback`, |
|
|
signal, |
|
|
priority |
|
|
); |
|
|
translatedSubtitles.push({ ...subtitle, translatedText }); |
|
|
} catch (e) { |
|
|
|
|
|
if (signal.aborted) { |
|
|
translatedSubtitles.push({ ...subtitle, translatedText: `[翻译中止]` }); |
|
|
} else { |
|
|
translatedSubtitles.push({ ...subtitle, translatedText: `[翻译失败] ${subtitle.text}` }); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
return translatedSubtitles; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function fallbackLineByLine( |
|
|
subtitles: SubtitleEntry[], |
|
|
sourceLanguage: string, |
|
|
targetLanguage: string, |
|
|
signal: AbortSignal, |
|
|
priority = 100 |
|
|
): Promise<SubtitleEntry[]> { |
|
|
console.log(`[Fallback-Parallel] 启动【并行】按行翻译,处理 ${subtitles.length} 行...`); |
|
|
|
|
|
|
|
|
if (signal.aborted) { |
|
|
return subtitles.map(sub => ({ ...sub, translatedText: "[翻译中止]" })); |
|
|
} |
|
|
|
|
|
|
|
|
const translationPromises = subtitles.map(async (subtitle) => { |
|
|
|
|
|
|
|
|
if (signal.aborted) { |
|
|
return { ...subtitle, translatedText: `[翻译中止]` }; |
|
|
} |
|
|
try { |
|
|
const translatedText = await simpleTextTranslateWithChatGPT( |
|
|
subtitle.text, |
|
|
sourceLanguage, |
|
|
targetLanguage, |
|
|
`${subtitle.id}-fallback`, |
|
|
signal, |
|
|
priority |
|
|
); |
|
|
return { ...subtitle, translatedText }; |
|
|
} catch (error) { |
|
|
|
|
|
if (signal.aborted || error.name === 'AbortError') { |
|
|
return { ...subtitle, translatedText: `[翻译中止]` }; |
|
|
} |
|
|
|
|
|
return { ...subtitle, translatedText: `[翻译失败] ${subtitle.text}` }; |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
return Promise.all(translationPromises); |
|
|
} |
|
|
|
|
|
async function translateWithDeepLX( |
|
|
text: string, |
|
|
sourceLanguage: string, |
|
|
targetLanguage: string, |
|
|
subtitleId: string, |
|
|
signal: AbortSignal |
|
|
): Promise<string> { |
|
|
const cacheKey = `${sourceLanguage}-${targetLanguage}-${subtitleId}-${text}`; |
|
|
console.log(`[DeepLX] 尝试翻译文本 (subtitleId: ${subtitleId})`); |
|
|
const cachedTranslation = translationCache.get(cacheKey); |
|
|
if (cachedTranslation) { |
|
|
console.log(`[DeepLX] 使用缓存的翻译结果 (subtitleId: ${subtitleId})`); |
|
|
return cachedTranslation; |
|
|
} |
|
|
|
|
|
const translate = async () => { |
|
|
if (signal.aborted) { |
|
|
console.log(`[DeepLX] 翻译被中止 (subtitleId: ${subtitleId})`); |
|
|
throw new Error("Translation aborted"); |
|
|
} |
|
|
try { |
|
|
console.log(`[DeepLX] 等待 ${config.DELAY_BETWEEN_REQUESTS}ms 后发送请求 (subtitleId: ${subtitleId})`); |
|
|
await new Promise(resolve => setTimeout(resolve, config.DELAY_BETWEEN_REQUESTS)); |
|
|
|
|
|
console.log(`[DeepLX] 发送翻译请求到 API (subtitleId: ${subtitleId})`); |
|
|
const startTime = Date.now(); |
|
|
const response = await fetch(config.DEEPLX_API_URL, { |
|
|
method: "POST", |
|
|
headers: { |
|
|
"Content-Type": "application/json" |
|
|
}, |
|
|
body: JSON.stringify({ |
|
|
text, |
|
|
source_lang: sourceLanguage, |
|
|
target_lang: targetLanguage, |
|
|
}), |
|
|
signal, |
|
|
}); |
|
|
const endTime = Date.now(); |
|
|
performanceMonitor.updateApiResponseTime(endTime - startTime); |
|
|
|
|
|
if (!response.ok) { |
|
|
if (response.status === 429) { |
|
|
console.warn(`[DeepLX] 遇到限流,等待后重试 (subtitleId: ${subtitleId})`); |
|
|
await new Promise(resolve => setTimeout(resolve, 5000)); |
|
|
return translateWithDeepLX(text, sourceLanguage, targetLanguage, subtitleId, signal); |
|
|
} |
|
|
throw new Error(`DeepLX API error: ${response.statusText}`); |
|
|
} |
|
|
|
|
|
const result = await response.json(); |
|
|
const translation = result.data; |
|
|
console.log(`[DeepLX] 翻译成功: ${translation.substring(0, 50)}... (subtitleId: ${subtitleId})`); |
|
|
|
|
|
const markerRegex = new RegExp(`${config.SUBTITLE_MARKER}.*?${config.SUBTITLE_MARKER}`, 'g'); |
|
|
const markers = text.match(markerRegex) || []; |
|
|
let translatedTextWithMarkers = translation; |
|
|
markers.forEach((marker, index) => { |
|
|
translatedTextWithMarkers = translatedTextWithMarkers.replace( |
|
|
new RegExp(`^(.{${index * marker.length}})(.*)`, 's'), |
|
|
`$1${marker}$2` |
|
|
); |
|
|
}); |
|
|
|
|
|
translationCache.set(cacheKey, translatedTextWithMarkers); |
|
|
return translatedTextWithMarkers; |
|
|
} catch (error) { |
|
|
console.error(`[DeepLX] 翻译失败 (subtitleId: ${subtitleId}):`, error); |
|
|
throw error; |
|
|
} |
|
|
}; |
|
|
|
|
|
if (config.USE_ADAPTIVE_RATE_LIMITER) { |
|
|
return rateLimiter.schedule(translate); |
|
|
} else { |
|
|
return translate(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
async function translateWithFallback( |
|
|
text: string, |
|
|
sourceLanguage: string, |
|
|
targetLanguage: string, |
|
|
subtitleId: string, |
|
|
signal: AbortSignal, |
|
|
engine: TranslationEngine |
|
|
): Promise<string> { |
|
|
try { |
|
|
console.log(`[Fallback] 尝试整体翻译 (subtitleId: ${subtitleId})`); |
|
|
return await translateText(text, sourceLanguage, targetLanguage, subtitleId, signal, engine); |
|
|
} catch (error) { |
|
|
console.error(`[Fallback] 整体翻译失败 (subtitleId: ${subtitleId}):`, error); |
|
|
if (signal.aborted) { |
|
|
throw new Error("Translation aborted"); |
|
|
} |
|
|
const parts = text.split(config.SUBTITLE_SEPARATOR); |
|
|
console.log(`[Fallback] 尝试单独翻译 ${parts.length} 个部分 (subtitleId: ${subtitleId})`); |
|
|
const translatedParts = []; |
|
|
for (let i = 0; i < parts.length; i++) { |
|
|
if (signal.aborted) { |
|
|
throw new Error("Translation aborted"); |
|
|
} |
|
|
try { |
|
|
const partId = `${subtitleId}-part${i}`; |
|
|
const translatedPart = await translateText(parts[i], sourceLanguage, targetLanguage, partId, signal, engine); |
|
|
translatedParts.push(translatedPart); |
|
|
} catch (partError) { |
|
|
console.error(`[Fallback] 部分翻译失败 (subtitleId: ${subtitleId}, part: ${i}):`, partError); |
|
|
translatedParts.push(`[翻译失败] ${parts[i]}`); |
|
|
} |
|
|
} |
|
|
return translatedParts.join(config.SUBTITLE_SEPARATOR); |
|
|
} |
|
|
} |
|
|
|
|
|
async function translateWithFallback_old( |
|
|
text: string, |
|
|
sourceLanguage: string, |
|
|
targetLanguage: string, |
|
|
subtitleId: string, |
|
|
signal: AbortSignal |
|
|
): Promise<string> { |
|
|
try { |
|
|
console.log(`[Fallback] 尝试整体翻译 (subtitleId: ${subtitleId})`); |
|
|
return await translateWithDeepLX(text, sourceLanguage, targetLanguage, subtitleId, signal); |
|
|
} catch (error) { |
|
|
console.error(`[Fallback] 整体翻译失败 (subtitleId: ${subtitleId}):`, error); |
|
|
if (signal.aborted) { |
|
|
console.log(`[Fallback] 翻译被中止 (subtitleId: ${subtitleId})`); |
|
|
throw new Error("Translation aborted"); |
|
|
} |
|
|
const parts = text.split(config.SUBTITLE_SEPARATOR); |
|
|
console.log(`[Fallback] 尝试单独翻译 ${parts.length} 个部分 (subtitleId: ${subtitleId})`); |
|
|
const translatedParts = []; |
|
|
for (let i = 0; i < parts.length; i++) { |
|
|
if (signal.aborted) { |
|
|
console.log(`[Fallback] 翻译过程中被中止 (subtitleId: ${subtitleId})`); |
|
|
throw new Error("Translation aborted"); |
|
|
} |
|
|
try { |
|
|
const partId = `${subtitleId}-part${i}`; |
|
|
const translatedPart = await translateWithDeepLX(parts[i], sourceLanguage, targetLanguage, partId, signal); |
|
|
translatedParts.push(translatedPart); |
|
|
} catch (partError) { |
|
|
console.error(`[Fallback] 部分翻译失败 (subtitleId: ${subtitleId}, part: ${i}):`, partError); |
|
|
translatedParts.push(`[翻译失败] ${parts[i]}`); |
|
|
} |
|
|
} |
|
|
console.log(`[Fallback] 单独翻译完成 (subtitleId: ${subtitleId})`); |
|
|
return translatedParts.join(config.SUBTITLE_SEPARATOR); |
|
|
} |
|
|
} |
|
|
|
|
|
function initializeSubtitles(subtitles: SubtitleEntry[]): SubtitleEntry[] { |
|
|
return subtitles; |
|
|
} |
|
|
|
|
|
function mergeSubtitles(subtitles: SubtitleEntry[]): SubtitleEntry[] { |
|
|
console.log(`[Merger] 开始合并 ${subtitles.length} 条字幕`); |
|
|
const mergedSubtitles: SubtitleEntry[] = []; |
|
|
let currentGroup: SubtitleEntry[] = []; |
|
|
let currentLength = 0; |
|
|
|
|
|
for (const subtitle of subtitles) { |
|
|
if (currentLength + subtitle.text.length > config.OPTIMAL_TEXT_LENGTH && currentGroup.length > 0) { |
|
|
mergedSubtitles.push(mergeGroup(currentGroup)); |
|
|
currentGroup = []; |
|
|
currentLength = 0; |
|
|
} |
|
|
currentGroup.push(subtitle); |
|
|
currentLength += subtitle.text.length; |
|
|
} |
|
|
|
|
|
if (currentGroup.length > 0) { |
|
|
mergedSubtitles.push(mergeGroup(currentGroup)); |
|
|
} |
|
|
|
|
|
console.log(`[Merger] 合并完成,得到 ${mergedSubtitles.length} 个合并组`); |
|
|
return mergedSubtitles; |
|
|
} |
|
|
|
|
|
|
|
|
function mergeGroup(group: SubtitleEntry[]): SubtitleEntry { |
|
|
const mergedText = group.map(sub => sub.text.replace(/\n/g, '<br>')).join(config.SUBTITLE_SEPARATOR); |
|
|
return { |
|
|
id: `merged_${group[0].id}_to_${group[group.length - 1].id}`, |
|
|
startTime: group[0].startTime, |
|
|
endTime: group[group.length - 1].endTime, |
|
|
text: mergedText, |
|
|
originalSubtitles: group |
|
|
}; |
|
|
} |
|
|
|
|
|
function splitMergedSubtitle(mergedSubtitle: SubtitleEntry): SubtitleEntry[] { |
|
|
console.log(`[Splitter] 拆分合并的字幕: ${mergedSubtitle.id}`); |
|
|
const translatedText = mergedSubtitle.translatedText || ''; |
|
|
const originalSubtitles = mergedSubtitle.originalSubtitles || []; |
|
|
const translatedParts = translatedText.split(config.SUBTITLE_SEPARATOR); |
|
|
|
|
|
return originalSubtitles.map((original, index) => { |
|
|
const translatedPart = translatedParts[index] || ''; |
|
|
return { |
|
|
...original, |
|
|
translatedText: translatedPart || `[翻译失败] ${original.text}` |
|
|
}; |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
async function handleWebSocket(ws: WebSocket) { |
|
|
console.log("[WebSocket] 新的 WebSocket 连接已建立"); |
|
|
let subtitles: SubtitleEntry[] = []; |
|
|
let heartbeatInterval: number; |
|
|
let isTranslating = false; |
|
|
let isConnected = true; |
|
|
let abortController: AbortController | null = null; |
|
|
const translatedSubtitleIds = new Set<string>(); |
|
|
let shouldStopTranslation = false; |
|
|
|
|
|
const heartbeat = () => { |
|
|
if (ws.readyState === WebSocket.OPEN) { |
|
|
ws.send(JSON.stringify({ |
|
|
action: "heartbeat" |
|
|
})); |
|
|
} else { |
|
|
clearInterval(heartbeatInterval); |
|
|
} |
|
|
}; |
|
|
|
|
|
ws.on("open", () => { |
|
|
console.log("[WebSocket] 连接已打开"); |
|
|
heartbeatInterval = setInterval(heartbeat, 30000); |
|
|
console.log("[WebSocket] 心跳机制已启动"); |
|
|
}); |
|
|
|
|
|
async function stopTranslation() { |
|
|
if (abortController) { |
|
|
abortController.abort(); |
|
|
abortController = null; |
|
|
} |
|
|
isTranslating = false; |
|
|
shouldStopTranslation = false; |
|
|
console.log("[WebSocket] 翻译已停止"); |
|
|
if (isConnected) { |
|
|
ws.send(JSON.stringify({ |
|
|
action: "translationStopped" |
|
|
})); |
|
|
} |
|
|
} |
|
|
|
|
|
ws.on("message", async (message: string) => { |
|
|
if (!isConnected) return; |
|
|
|
|
|
try { |
|
|
const data = JSON.parse(message); |
|
|
|
|
|
switch (data.action) { |
|
|
case "initialize": |
|
|
subtitles = initializeSubtitles(data.subtitles); |
|
|
translatedSubtitleIds.clear(); |
|
|
isTranslating = false; |
|
|
if (!Array.isArray(subtitles) || subtitles.length === 0) { |
|
|
throw new Error("无效的字幕数组"); |
|
|
} |
|
|
console.log(`[WebSocket] 初始化字幕数组,长度: ${subtitles.length}`); |
|
|
ws.send(JSON.stringify({ |
|
|
action: "initialized" |
|
|
})); |
|
|
break; |
|
|
|
|
|
case "translate": |
|
|
console.log("[WebSocket] 收到翻译请求"); |
|
|
if (isTranslating) { |
|
|
console.log("[WebSocket] 翻译已在进行中,忽略新的请求"); |
|
|
return; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
const { |
|
|
timestamp, sourceLanguage, targetLanguage, engine = 'deeplx' |
|
|
} = data; |
|
|
const currentTime = typeof timestamp === 'number' ? timestamp : parseFloat(timestamp); |
|
|
|
|
|
|
|
|
console.log(`[WebSocket] 开始翻译,引擎: ${engine}, 时间戳: ${currentTime}, 源语言: ${sourceLanguage}, 目标语言: ${targetLanguage}`); |
|
|
|
|
|
isTranslating = true; |
|
|
shouldStopTranslation = false; |
|
|
abortController = new AbortController(); |
|
|
const signal = abortController.signal; |
|
|
|
|
|
const subtitlesToTranslate = subtitles.filter(sub => sub.startTime >= currentTime && !translatedSubtitleIds.has(sub.id)); |
|
|
console.log(`[Translator] 筛选出 ${subtitlesToTranslate.length} 条字幕需要翻译`); |
|
|
|
|
|
|
|
|
const sampleSize = Math.min(config.LANGUAGE_DETECTION_SAMPLE_SIZE, subtitlesToTranslate.length); |
|
|
const samples = subtitlesToTranslate.slice(0, sampleSize); |
|
|
const detectedLanguages = samples.map(sample => { |
|
|
const detectedCode = franc(sample.text) as string; |
|
|
return languageCodeMapping[detectedCode] || detectedCode; |
|
|
}); |
|
|
|
|
|
console.log(`[Translator] 检测到的语言: ${detectedLanguages.join(', ')}`); |
|
|
|
|
|
const validDetections = detectedLanguages.filter(lang => lang !== 'und'); |
|
|
const targetLangCount = validDetections.filter(lang => lang === targetLanguage).length; |
|
|
const detectionThreshold = Math.max(1, Math.floor(validDetections.length * 0.6)); |
|
|
|
|
|
const detectedLanguage = validDetections.length > 0 ? |
|
|
validDetections.reduce((a, b, i, arr) => |
|
|
arr.filter(v => v === a).length >= arr.filter(v => v === b).length ? a : b |
|
|
) : |
|
|
'unknown'; |
|
|
|
|
|
if (targetLangCount >= detectionThreshold) { |
|
|
console.log(`[Translator] 检测到字幕主要是目标语言 (${targetLanguage}),跳过翻译`); |
|
|
ws.send(JSON.stringify({ |
|
|
action: "languageDetected", |
|
|
language: detectedLanguage, |
|
|
message: "Source language matches target language, translation skipped" |
|
|
})); |
|
|
ws.send(JSON.stringify({ |
|
|
action: "translationComplete" |
|
|
})); |
|
|
isTranslating = false; |
|
|
abortController = null; |
|
|
} else { |
|
|
console.log(`[Translator] 检测到字幕不是目标语言,继续翻译`); |
|
|
|
|
|
ws.send(JSON.stringify({ |
|
|
action: "languageDetected", |
|
|
language: "different", |
|
|
message: "Source language differs from target language, proceeding with translation" |
|
|
})); |
|
|
|
|
|
try { |
|
|
if (engine === 'chatgpt') { |
|
|
|
|
|
await batchTranslateWithChatGPT( |
|
|
subtitlesToTranslate, |
|
|
sourceLanguage, |
|
|
targetLanguage, |
|
|
signal, |
|
|
(translatedBatch) => { |
|
|
if (isConnected && !shouldStopTranslation) { |
|
|
ws.send(JSON.stringify({ |
|
|
action: "translationResult", |
|
|
subtitles: translatedBatch, |
|
|
})); |
|
|
} |
|
|
} |
|
|
); |
|
|
|
|
|
} else { |
|
|
|
|
|
console.log(`[DeepLX-Path] 开始合并与批处理翻译...`); |
|
|
const initialBatch = subtitlesToTranslate.slice(0, config.INITIAL_BATCH_SIZE); |
|
|
console.log(`[Translator] 开始初始快速翻译,包含 ${initialBatch.length} 条字幕`); |
|
|
|
|
|
const translatedInitialBatch = await Promise.all(initialBatch.map(async (item) => { |
|
|
if (shouldStopTranslation) throw new Error("Translation stopped"); |
|
|
const translatedText = await translateWithFallback( |
|
|
item.text, |
|
|
sourceLanguage, |
|
|
targetLanguage, |
|
|
item.id, |
|
|
signal, |
|
|
|
|
|
engine |
|
|
); |
|
|
translatedSubtitleIds.add(item.id); |
|
|
return { |
|
|
...item, |
|
|
translatedText |
|
|
}; |
|
|
})); |
|
|
|
|
|
|
|
|
if (isConnected && !shouldStopTranslation) { |
|
|
console.log(`[WebSocket] 发送初始快速翻译结果,包含 ${translatedInitialBatch.length} 条字幕`); |
|
|
ws.send(JSON.stringify({ |
|
|
action: "translationResult", |
|
|
subtitles: translatedInitialBatch |
|
|
})); |
|
|
} |
|
|
|
|
|
console.log(`[Translator] 合并前的字幕数量: ${subtitlesToTranslate.length}`); |
|
|
const remainingSubtitles = subtitlesToTranslate.slice(config.INITIAL_BATCH_SIZE); |
|
|
const mergedSubtitles = mergeSubtitles(remainingSubtitles); |
|
|
console.log(`[Translator] 合并后的剩余字幕数量: ${mergedSubtitles.length}`); |
|
|
|
|
|
for (let i = 0; i < mergedSubtitles.length; i += config.BATCH_SIZE) { |
|
|
if (!isConnected || shouldStopTranslation) { |
|
|
console.log("[Translator] 连接已断开或收到停止命令,停止翻译"); |
|
|
break; |
|
|
} |
|
|
|
|
|
const batch = mergedSubtitles.slice(i, i + config.BATCH_SIZE); |
|
|
console.log(`[Translator] 处理批次 ${i / config.BATCH_SIZE + 1}, 包含 ${batch.length} 条合并字幕`); |
|
|
|
|
|
const translatedBatch = await Promise.all(batch.map(async (item) => { |
|
|
if (shouldStopTranslation) throw new Error("Translation stopped"); |
|
|
try { |
|
|
console.log(`[Translator] 翻译文本: ${item.text.substring(0, 50)}...`); |
|
|
const translatedText = await translateWithFallback( |
|
|
item.text, |
|
|
sourceLanguage, |
|
|
targetLanguage, |
|
|
item.id, |
|
|
signal, |
|
|
|
|
|
engine |
|
|
); |
|
|
console.log(`[Translator] 翻译完成: ${translatedText.substring(0, 50)}...`); |
|
|
return { |
|
|
...item, |
|
|
translatedText |
|
|
}; |
|
|
} catch (error) { |
|
|
console.error(`[Translator] 翻译失败: ${error.message}`); |
|
|
if (error.name === 'AbortError' || error.message === "Translation stopped") { |
|
|
throw error; |
|
|
} |
|
|
return { |
|
|
...item, |
|
|
translatedText: `[翻译失败] ${item.text}` |
|
|
}; |
|
|
} |
|
|
})); |
|
|
|
|
|
if (isConnected && !shouldStopTranslation) { |
|
|
const distributedResults = translatedBatch.flatMap(splitMergedSubtitle); |
|
|
distributedResults.forEach(sub => translatedSubtitleIds.add(sub.id)); |
|
|
console.log(`[WebSocket] 发送翻译结果,包含 ${distributedResults.length} 条字幕`); |
|
|
ws.send(JSON.stringify({ |
|
|
action: "translationResult", |
|
|
subtitles: distributedResults |
|
|
})); |
|
|
} |
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
if (isConnected && !shouldStopTranslation) { |
|
|
console.log(`[WebSocket] 翻译完成,共翻译 ${subtitlesToTranslate.length} 条字幕`); |
|
|
ws.send(JSON.stringify({ |
|
|
action: "translationComplete" |
|
|
})); |
|
|
} |
|
|
} catch (error) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (error.name === 'AbortError' || error.message.includes('aborted')) { |
|
|
|
|
|
|
|
|
console.log(`[Translator] 翻译任务已被用户成功中止。`); |
|
|
} else { |
|
|
|
|
|
console.error("[Translator] 翻译过程中发生意外错误:", error); |
|
|
|
|
|
|
|
|
if (isConnected) { |
|
|
ws.send(JSON.stringify({ |
|
|
action: "error", |
|
|
|
|
|
message: `An unexpected error occurred: ${error.message}` |
|
|
})); |
|
|
} |
|
|
} |
|
|
} finally { |
|
|
isTranslating = false; |
|
|
abortController = null; |
|
|
shouldStopTranslation = false; |
|
|
} |
|
|
} |
|
|
performanceMonitor.logPerformanceMetrics(); |
|
|
break; |
|
|
|
|
|
case "stopTranslation": |
|
|
console.log("[WebSocket] 收到停止翻译请求"); |
|
|
shouldStopTranslation = true; |
|
|
await stopTranslation(); |
|
|
break; |
|
|
|
|
|
case "closeConnection": |
|
|
console.log("[WebSocket] 收到关闭连接请求"); |
|
|
ws.close(); |
|
|
break; |
|
|
|
|
|
case "heartbeatResponse": |
|
|
console.log("[WebSocket] 收到心跳响应"); |
|
|
break; |
|
|
|
|
|
case "heartbeat": |
|
|
|
|
|
ws.send(JSON.stringify({ |
|
|
action: "heartbeatResponse" |
|
|
})); |
|
|
break; |
|
|
|
|
|
case "setAdaptiveRateLimiter": |
|
|
config.USE_ADAPTIVE_RATE_LIMITER = data.useAdaptiveRateLimiter; |
|
|
console.log(`[WebSocket] 设置 AdaptiveRateLimiter: ${config.USE_ADAPTIVE_RATE_LIMITER ? '启用' : '禁用'}`); |
|
|
ws.send(JSON.stringify({ |
|
|
action: "adaptiveRateLimiterSet", |
|
|
useAdaptiveRateLimiter: config.USE_ADAPTIVE_RATE_LIMITER |
|
|
})); |
|
|
break; |
|
|
|
|
|
default: |
|
|
console.warn(`[WebSocket] 收到未知操作: ${data.action}`); |
|
|
} |
|
|
} catch (error) { |
|
|
console.error("[WebSocket] 处理消息时出错:", error); |
|
|
if (isConnected) { |
|
|
ws.send(JSON.stringify({ |
|
|
action: "error", |
|
|
message: error.message |
|
|
})); |
|
|
} |
|
|
} |
|
|
}); |
|
|
|
|
|
ws.on("close", () => { |
|
|
console.log("[WebSocket] 连接已关闭"); |
|
|
isConnected = false; |
|
|
clearInterval(heartbeatInterval); |
|
|
if (isTranslating) { |
|
|
stopTranslation(); |
|
|
} |
|
|
}); |
|
|
|
|
|
ws.on("error", async (error) => { |
|
|
console.error("[WebSocket] 发生错误:", error); |
|
|
if (isTranslating) { |
|
|
await stopTranslation(); |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
const wss = new WebSocketServer(8000); |
|
|
|
|
|
wss.on("connection", (ws: WebSocket) => { |
|
|
handleWebSocket(ws); |
|
|
}); |
|
|
|
|
|
|
|
|
wss.on("error", (error: Error) => { |
|
|
if (error.message.includes("request is not acceptable") || |
|
|
error.message.includes("missing or invalid headers")) { |
|
|
console.warn(`[WebSocketServer LIB] 忽略了无效的握手或非WebSocket请求: ${error.message}`); |
|
|
|
|
|
} else { |
|
|
console.error("[WebSocketServer LIB] 服务器实例错误:", error); |
|
|
|
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
const dailyAnalysisInterval = setInterval(async () => { |
|
|
const dailyData = performanceMonitor.getDailyData(); |
|
|
await PerformanceAnalyzer.analyzeDailyPerformance(dailyData); |
|
|
performanceMonitor.clearDailyData(); |
|
|
}, 24 * 60 * 60 * 1000); |
|
|
|
|
|
|
|
|
Deno.addSignalListener("SIGINT", () => { |
|
|
clearInterval(dailyAnalysisInterval); |
|
|
|
|
|
console.log("正在关闭服务器..."); |
|
|
wss.close(() => { |
|
|
console.log("WebSocket 服务器已关闭"); |
|
|
Deno.exit(0); |
|
|
}); |
|
|
}); |
|
|
|
|
|
console.log("WebSocket 字幕翻译服务器正在运行,地址为 ws://localhost:8000"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|