Spaces:
Sleeping
Sleeping
Update static/verify.js
Browse files- static/verify.js +128 -78
static/verify.js
CHANGED
|
@@ -6,23 +6,26 @@ document.addEventListener('DOMContentLoaded', () => {
|
|
| 6 |
const progressPercent = document.getElementById('progressPercent');
|
| 7 |
const loaderText = document.getElementById('loaderText');
|
| 8 |
const resultsWrapper = document.getElementById('resultsWrapper');
|
| 9 |
-
const resultsArea = document.getElementById('results');
|
| 10 |
-
const correctedSrtOutput = document.getElementById('correctedSrtOutput');
|
|
|
|
| 11 |
const errorMessage = document.getElementById('errorMessage');
|
| 12 |
|
| 13 |
-
// Stats
|
| 14 |
const statChunks = document.getElementById('statChunks');
|
| 15 |
const statCorrections = document.getElementById('statCorrections');
|
| 16 |
const statErrors = document.getElementById('statErrors');
|
| 17 |
-
const statOcrErrors = document.getElementById('statOcrErrors');
|
| 18 |
-
|
| 19 |
-
// Output Areas
|
| 20 |
-
const ocrErrorOutput = document.getElementById('ocrErrorOutput');
|
| 21 |
|
| 22 |
-
//
|
| 23 |
const pdfUpload = document.getElementById('pdfUpload');
|
| 24 |
const srtUpload = document.getElementById('srtUpload');
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
if (pdfUpload) {
|
| 27 |
pdfUpload.addEventListener('change', function() {
|
| 28 |
if(this.files[0]) document.getElementById('pdfFileName').textContent = this.files[0].name;
|
|
@@ -31,28 +34,31 @@ document.addEventListener('DOMContentLoaded', () => {
|
|
| 31 |
|
| 32 |
if (srtUpload) {
|
| 33 |
srtUpload.addEventListener('change', function() {
|
| 34 |
-
if(this.files[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
});
|
| 36 |
}
|
| 37 |
|
| 38 |
-
// --- Main Verify Button Click ---
|
| 39 |
if (verifyBtn) {
|
| 40 |
verifyBtn.addEventListener('click', async () => {
|
| 41 |
-
// 1. Gather Data
|
| 42 |
const apiKeys = document.getElementById('apiKeyInput').value.trim();
|
| 43 |
const pdfFile = pdfUpload ? pdfUpload.files[0] : null;
|
| 44 |
const srtFile = srtUpload ? srtUpload.files[0] : null;
|
| 45 |
const pagesPerRequest = document.getElementById('pagesPerRequestInput').value;
|
| 46 |
const modelName = document.getElementById('modelSelect').value;
|
| 47 |
|
| 48 |
-
// 2. Validation
|
| 49 |
if (errorMessage) errorMessage.classList.add('hidden');
|
| 50 |
-
|
| 51 |
-
if (!
|
| 52 |
-
if (!
|
| 53 |
-
if (!srtFile) return showError("Please upload an SRT file.");
|
| 54 |
|
| 55 |
-
//
|
| 56 |
verifyBtn.disabled = true;
|
| 57 |
verifyBtn.classList.add('opacity-50', 'cursor-not-allowed');
|
| 58 |
if (loader) loader.classList.remove('hidden');
|
|
@@ -65,7 +71,7 @@ document.addEventListener('DOMContentLoaded', () => {
|
|
| 65 |
formData.append('pages_per_request', pagesPerRequest);
|
| 66 |
formData.append('model_name', modelName);
|
| 67 |
|
| 68 |
-
//
|
| 69 |
let progress = 0;
|
| 70 |
const interval = setInterval(() => {
|
| 71 |
if (progress < 90) {
|
|
@@ -73,34 +79,22 @@ document.addEventListener('DOMContentLoaded', () => {
|
|
| 73 |
if (progress > 90) progress = 90;
|
| 74 |
if (progressBar) progressBar.style.width = `${progress}%`;
|
| 75 |
if (progressPercent) progressPercent.textContent = `${Math.round(progress)}%`;
|
| 76 |
-
|
| 77 |
-
if (loaderText) {
|
| 78 |
-
if (progress > 20 && progress < 50) loaderText.textContent = "Splitting PDF into batches...";
|
| 79 |
-
if (progress > 50 && progress < 80) loaderText.textContent = "Processing with Gemini (Parallel)...";
|
| 80 |
-
if (progress > 80) loaderText.textContent = "Aggregating results...";
|
| 81 |
-
}
|
| 82 |
}
|
| 83 |
}, 800);
|
| 84 |
|
| 85 |
try {
|
| 86 |
-
// 5. Send Request
|
| 87 |
const response = await fetch('/verify_batch', {
|
| 88 |
method: 'POST',
|
| 89 |
body: formData
|
| 90 |
});
|
| 91 |
|
| 92 |
clearInterval(interval);
|
| 93 |
-
|
| 94 |
const data = await response.json();
|
| 95 |
|
| 96 |
-
if (!response.ok)
|
| 97 |
-
throw new Error(data.detail || data.error || "Server Error");
|
| 98 |
-
}
|
| 99 |
|
| 100 |
-
// 6. Success State
|
| 101 |
if (progressBar) progressBar.style.width = '100%';
|
| 102 |
if (progressPercent) progressPercent.textContent = '100%';
|
| 103 |
-
if (loaderText) loaderText.textContent = "Complete!";
|
| 104 |
|
| 105 |
setTimeout(() => {
|
| 106 |
if (loader) loader.classList.add('hidden');
|
|
@@ -126,72 +120,128 @@ document.addEventListener('DOMContentLoaded', () => {
|
|
| 126 |
errorMessage.textContent = `Error: ${msg}`;
|
| 127 |
errorMessage.classList.remove('hidden');
|
| 128 |
} else {
|
| 129 |
-
alert(
|
| 130 |
}
|
| 131 |
}
|
| 132 |
|
| 133 |
-
function displayResults(data) {
|
| 134 |
if (resultsWrapper) resultsWrapper.classList.remove('hidden');
|
| 135 |
|
| 136 |
-
// 1.
|
| 137 |
if (statChunks) statChunks.textContent = data.total_chunks || 0;
|
| 138 |
if (statErrors) statErrors.textContent = data.system_errors ? data.system_errors.length : 0;
|
| 139 |
|
| 140 |
const corrections = data.corrections || [];
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
let ocrReportText = "";
|
| 146 |
-
|
| 147 |
-
// Loop to process the prompt output
|
| 148 |
corrections.forEach((item, index) => {
|
| 149 |
-
//
|
| 150 |
if (item.correctedSrt) {
|
| 151 |
-
|
| 152 |
}
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
if (item.errorReport &&
|
| 157 |
-
!item.errorReport.includes("No significant errors") &&
|
| 158 |
-
!item.errorReport.includes("No errors")) {
|
| 159 |
-
|
| 160 |
-
ocrAlertsCount++;
|
| 161 |
-
ocrReportText += `[Batch ${index + 1}]\n${item.errorReport}\n\n----------------\n\n`;
|
| 162 |
}
|
| 163 |
});
|
| 164 |
|
| 165 |
-
|
| 166 |
-
if (
|
| 167 |
-
if (statOcrErrors) statOcrErrors.textContent = ocrAlertsCount;
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
}
|
| 181 |
-
|
| 182 |
-
/
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
}
|
| 197 |
});
|
|
|
|
| 6 |
const progressPercent = document.getElementById('progressPercent');
|
| 7 |
const loaderText = document.getElementById('loaderText');
|
| 8 |
const resultsWrapper = document.getElementById('resultsWrapper');
|
| 9 |
+
const resultsArea = document.getElementById('results');
|
| 10 |
+
const correctedSrtOutput = document.getElementById('correctedSrtOutput');
|
| 11 |
+
const ocrErrorOutput = document.getElementById('ocrErrorOutput');
|
| 12 |
const errorMessage = document.getElementById('errorMessage');
|
| 13 |
|
| 14 |
+
// Stats
|
| 15 |
const statChunks = document.getElementById('statChunks');
|
| 16 |
const statCorrections = document.getElementById('statCorrections');
|
| 17 |
const statErrors = document.getElementById('statErrors');
|
| 18 |
+
const statOcrErrors = document.getElementById('statOcrErrors');
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
// File Inputs
|
| 21 |
const pdfUpload = document.getElementById('pdfUpload');
|
| 22 |
const srtUpload = document.getElementById('srtUpload');
|
| 23 |
|
| 24 |
+
// Global Store for Original SRT Content
|
| 25 |
+
let originalSrtContent = "";
|
| 26 |
+
|
| 27 |
+
// --- Event Listeners ---
|
| 28 |
+
|
| 29 |
if (pdfUpload) {
|
| 30 |
pdfUpload.addEventListener('change', function() {
|
| 31 |
if(this.files[0]) document.getElementById('pdfFileName').textContent = this.files[0].name;
|
|
|
|
| 34 |
|
| 35 |
if (srtUpload) {
|
| 36 |
srtUpload.addEventListener('change', function() {
|
| 37 |
+
if(this.files[0]) {
|
| 38 |
+
document.getElementById('srtFileName').textContent = this.files[0].name;
|
| 39 |
+
|
| 40 |
+
// Read content immediately for later patching
|
| 41 |
+
const reader = new FileReader();
|
| 42 |
+
reader.onload = (e) => { originalSrtContent = e.target.result; };
|
| 43 |
+
reader.readAsText(this.files[0]);
|
| 44 |
+
}
|
| 45 |
});
|
| 46 |
}
|
| 47 |
|
|
|
|
| 48 |
if (verifyBtn) {
|
| 49 |
verifyBtn.addEventListener('click', async () => {
|
|
|
|
| 50 |
const apiKeys = document.getElementById('apiKeyInput').value.trim();
|
| 51 |
const pdfFile = pdfUpload ? pdfUpload.files[0] : null;
|
| 52 |
const srtFile = srtUpload ? srtUpload.files[0] : null;
|
| 53 |
const pagesPerRequest = document.getElementById('pagesPerRequestInput').value;
|
| 54 |
const modelName = document.getElementById('modelSelect').value;
|
| 55 |
|
|
|
|
| 56 |
if (errorMessage) errorMessage.classList.add('hidden');
|
| 57 |
+
if (!apiKeys) return showError("Please enter API Keys.");
|
| 58 |
+
if (!pdfFile) return showError("Please upload a PDF.");
|
| 59 |
+
if (!srtFile) return showError("Please upload an SRT.");
|
|
|
|
| 60 |
|
| 61 |
+
// Reset UI
|
| 62 |
verifyBtn.disabled = true;
|
| 63 |
verifyBtn.classList.add('opacity-50', 'cursor-not-allowed');
|
| 64 |
if (loader) loader.classList.remove('hidden');
|
|
|
|
| 71 |
formData.append('pages_per_request', pagesPerRequest);
|
| 72 |
formData.append('model_name', modelName);
|
| 73 |
|
| 74 |
+
// Fake Progress
|
| 75 |
let progress = 0;
|
| 76 |
const interval = setInterval(() => {
|
| 77 |
if (progress < 90) {
|
|
|
|
| 79 |
if (progress > 90) progress = 90;
|
| 80 |
if (progressBar) progressBar.style.width = `${progress}%`;
|
| 81 |
if (progressPercent) progressPercent.textContent = `${Math.round(progress)}%`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
}
|
| 83 |
}, 800);
|
| 84 |
|
| 85 |
try {
|
|
|
|
| 86 |
const response = await fetch('/verify_batch', {
|
| 87 |
method: 'POST',
|
| 88 |
body: formData
|
| 89 |
});
|
| 90 |
|
| 91 |
clearInterval(interval);
|
|
|
|
| 92 |
const data = await response.json();
|
| 93 |
|
| 94 |
+
if (!response.ok) throw new Error(data.detail || data.error || "Server Error");
|
|
|
|
|
|
|
| 95 |
|
|
|
|
| 96 |
if (progressBar) progressBar.style.width = '100%';
|
| 97 |
if (progressPercent) progressPercent.textContent = '100%';
|
|
|
|
| 98 |
|
| 99 |
setTimeout(() => {
|
| 100 |
if (loader) loader.classList.add('hidden');
|
|
|
|
| 120 |
errorMessage.textContent = `Error: ${msg}`;
|
| 121 |
errorMessage.classList.remove('hidden');
|
| 122 |
} else {
|
| 123 |
+
alert(msg);
|
| 124 |
}
|
| 125 |
}
|
| 126 |
|
| 127 |
+
function displayResults(data) {
|
| 128 |
if (resultsWrapper) resultsWrapper.classList.remove('hidden');
|
| 129 |
|
| 130 |
+
// 1. Stats
|
| 131 |
if (statChunks) statChunks.textContent = data.total_chunks || 0;
|
| 132 |
if (statErrors) statErrors.textContent = data.system_errors ? data.system_errors.length : 0;
|
| 133 |
|
| 134 |
const corrections = data.corrections || [];
|
| 135 |
+
let validCorrections = [];
|
| 136 |
+
let ocrReports = "";
|
| 137 |
+
|
| 138 |
+
// 2. Filter Data
|
|
|
|
|
|
|
|
|
|
| 139 |
corrections.forEach((item, index) => {
|
| 140 |
+
// Only count if it has a correctedSrt field
|
| 141 |
if (item.correctedSrt) {
|
| 142 |
+
validCorrections.push(item);
|
| 143 |
}
|
| 144 |
+
// Collect significant error reports
|
| 145 |
+
if (item.errorReport && !item.errorReport.includes("No significant errors")) {
|
| 146 |
+
ocrReports += `[Report ${index+1}]\n${item.errorReport}\n\n`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
}
|
| 148 |
});
|
| 149 |
|
| 150 |
+
if (statCorrections) statCorrections.textContent = validCorrections.length;
|
| 151 |
+
if (statOcrErrors) statOcrErrors.textContent = ocrReports ? "!" : "0";
|
|
|
|
| 152 |
|
| 153 |
+
// 3. Fill Textareas
|
| 154 |
+
if (resultsArea) resultsArea.value = JSON.stringify(data, null, 2);
|
| 155 |
+
if (ocrErrorOutput) ocrErrorOutput.value = ocrReports || "No significant OCR errors reported.";
|
| 156 |
+
if (correctedSrtOutput) correctedSrtOutput.value = JSON.stringify(validCorrections, null, 2);
|
| 157 |
+
}
|
| 158 |
|
| 159 |
+
// --- ROBUST SRT DOWNLOAD LOGIC ---
|
| 160 |
+
|
| 161 |
+
const downloadSrtBtn = document.getElementById('downloadSrtBtn');
|
| 162 |
+
if (downloadSrtBtn) {
|
| 163 |
+
downloadSrtBtn.addEventListener('click', () => {
|
| 164 |
+
if (!originalSrtContent) {
|
| 165 |
+
alert("Original SRT content is missing. Please re-upload the SRT file to enable patching.");
|
| 166 |
+
return;
|
| 167 |
+
}
|
| 168 |
|
| 169 |
+
// Parse results from the text area
|
| 170 |
+
let corrections = [];
|
| 171 |
+
try {
|
| 172 |
+
// We use the filtered list we displayed in correctedSrtOutput
|
| 173 |
+
corrections = JSON.parse(correctedSrtOutput.value);
|
| 174 |
+
} catch (e) {
|
| 175 |
+
alert("No valid corrections data found to apply.");
|
| 176 |
+
return;
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
if (!corrections || corrections.length === 0) {
|
| 180 |
+
alert("There are no corrections to apply.");
|
| 181 |
+
return;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
try {
|
| 185 |
+
const patchedSrt = patchSrtFile(originalSrtContent, corrections);
|
| 186 |
+
downloadFile(patchedSrt, 'corrected_subtitles.srt');
|
| 187 |
+
} catch (e) {
|
| 188 |
+
alert("Error applying patches: " + e.message);
|
| 189 |
+
}
|
| 190 |
+
});
|
| 191 |
}
|
| 192 |
+
|
| 193 |
+
/**
|
| 194 |
+
* Replaces blocks in the original SRT with the corrected blocks.
|
| 195 |
+
*/
|
| 196 |
+
function patchSrtFile(originalText, corrections) {
|
| 197 |
+
// 1. Split original SRT into blocks (Double newline separator)
|
| 198 |
+
// Normalize line endings to \n first
|
| 199 |
+
const normalizedText = originalText.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
| 200 |
+
let blocks = normalizedText.split(/\n\n+/);
|
| 201 |
+
|
| 202 |
+
// 2. Create a Map for faster updates
|
| 203 |
+
// We map "ID" -> Index in the blocks array
|
| 204 |
+
let blockMap = new Map();
|
| 205 |
+
|
| 206 |
+
blocks.forEach((block, index) => {
|
| 207 |
+
const lines = block.trim().split('\n');
|
| 208 |
+
if (lines.length > 0) {
|
| 209 |
+
const id = lines[0].trim();
|
| 210 |
+
// Store the index so we can update the array directly
|
| 211 |
+
blockMap.set(id, index);
|
| 212 |
+
}
|
| 213 |
+
});
|
| 214 |
+
|
| 215 |
+
// 3. Apply Corrections
|
| 216 |
+
let appliedCount = 0;
|
| 217 |
+
corrections.forEach(fix => {
|
| 218 |
+
// Extract ID from the CORRECTION block (it's usually the first line)
|
| 219 |
+
// Example fix.correctedSrt: "10\n00:00:10 --> ... \nHello"
|
| 220 |
+
if (!fix.correctedSrt) return;
|
| 221 |
+
|
| 222 |
+
const fixLines = fix.correctedSrt.trim().split('\n');
|
| 223 |
+
const fixId = fixLines[0].trim();
|
| 224 |
+
|
| 225 |
+
if (blockMap.has(fixId)) {
|
| 226 |
+
const originalIndex = blockMap.get(fixId);
|
| 227 |
+
// REPLACE the entire original block with the new corrected block
|
| 228 |
+
blocks[originalIndex] = fix.correctedSrt.trim();
|
| 229 |
+
appliedCount++;
|
| 230 |
+
}
|
| 231 |
});
|
| 232 |
+
|
| 233 |
+
console.log(`Applied ${appliedCount} corrections.`);
|
| 234 |
+
return blocks.join('\n\n');
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
function downloadFile(content, filename) {
|
| 238 |
+
const blob = new Blob([content], { type: 'text/plain' });
|
| 239 |
+
const url = URL.createObjectURL(blob);
|
| 240 |
+
const a = document.createElement('a');
|
| 241 |
+
a.href = url;
|
| 242 |
+
a.download = filename;
|
| 243 |
+
document.body.appendChild(a);
|
| 244 |
+
a.click();
|
| 245 |
+
document.body.removeChild(a);
|
| 246 |
}
|
| 247 |
});
|