capcut-flow-wizard / script.js
eubottura's picture
Cara, e o imput do audio que vai ler com whipser, transcript e ainda vai gerar o arquivo srt pro capcuty, com o tempo exato perfeito, de cada bloco
85774d3 verified
document.addEventListener('DOMContentLoaded', () => {
// UI Elements
const inputText = document.getElementById('input-text');
const outputText = document.getElementById('output-text');
const processBtn = document.getElementById('process-btn');
const processBtnMobile = document.getElementById('process-btn-mobile');
const copyBtn = document.getElementById('copy-btn');
const clearBtn = document.getElementById('clear-btn');
const blockCount = document.getElementById('block-count');
const downloadSrtBtn = document.getElementById('download-srt-btn');
// Mode Toggle Elements
const modeTextBtn = document.getElementById('mode-text');
const modeAudioBtn = document.getElementById('mode-audio');
const audioSection = document.getElementById('audio-section');
const inputSection = document.querySelector('section'); // First section is input
// Audio Elements
const audioFileInput = document.getElementById('audio-file');
const apiKeyInput = document.getElementById('api-key');
const transcribeBtn = document.getElementById('transcribe-btn');
const fileLabel = document.getElementById('file-label');
const transcribingStatus = document.getElementById('transcribing-status');
// State
let currentSrtData = null;
let currentTranscriptWords = []; // Stores {word, start, end}
// --- CONSTANTS ---
const MAX_CHARS = 11;
// Tabu words (Articles, Prepositions, Pronouns, Conjunctions)
const TABU_WORDS = new Set([
// PT
'de', 'do', 'da', 'em', 'no', 'na', 'por', 'para', 'com', 'ao', 'a', 'os', 'as',
'e', 'ou', 'mas', 'que', 'se', 'como', 'porque', 'quando', 'então', 'o', 'um', 'uma',
'nos', 'nas', 'me', 'te', 'não', 'só', 'já', 'também', 'tipo', 'nosso', 'nossa',
// EN
'a', 'an', 'the', 'of', 'in', 'on', 'at', 'to', 'for', 'with', 'from', 'by', 'and', 'or', 'but',
// ES
'el', 'la', 'los', 'las', 'un', 'una', 'de', 'en', 'a', 'por', 'para', 'con', 'y', 'o', 'pero', 'que'
]);
const CONNECTIVES = new Set(['e', 'é', 'que', 'and', 'that', 'y']); // Must start new line
// --- MODE SWITCHING ---
modeTextBtn.addEventListener('click', () => {
modeTextBtn.classList.add('mode-active');
modeAudioBtn.classList.remove('mode-active');
audioSection.classList.add('hidden');
inputSection.classList.remove('opacity-50', 'pointer-events-none');
downloadSrtBtn.classList.add('hidden');
outputText.value = '';
blockCount.textContent = '0';
currentSrtData = null;
});
modeAudioBtn.addEventListener('click', () => {
modeAudioBtn.classList.add('mode-active');
modeTextBtn.classList.remove('mode-active');
audioSection.classList.remove('hidden');
// Optional: Disable manual text input when in audio mode to avoid confusion
inputSection.classList.add('opacity-50', 'pointer-events-none');
});
audioFileInput.addEventListener('change', (e) => {
if (e.target.files.length > 0) {
fileLabel.textContent = e.target.files[0].name;
} else {
fileLabel.textContent = 'Click or drag audio file here';
}
});
// --- AUDIO TRANSCRIPTION LOGIC ---
async function handleTranscription() {
const file = audioFileInput.files[0];
const apiKey = apiKeyInput.value.trim();
if (!file) {
showToast("Please select an audio or video file.", "error");
return;
}
if (!apiKey) {
showToast("Please enter your OpenAI API Key.", "error");
return;
}
// UI Loading State
transcribeBtn.disabled = true;
transcribingStatus.classList.remove('hidden');
try {
const formData = new FormData();
formData.append('file', file);
formData.append('model', 'whisper-1');
formData.append('response_format', 'verbose_json');
formData.append('timestamp_granularities', 'word');
const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`
},
body: formData
});
if (!response.ok) {
const errData = await response.json();
throw new Error(errData.error?.message || 'Transcription failed');
}
const data = await response.json();
// Extract words with timestamps
// Whisper returns words array when timestamp_granularities is set
if (!data.words) {
throw new Error("No word-level timestamps returned. Check API plan.");
}
currentTranscriptWords = data.words.map(w => ({
word: w.word,
start: w.start,
end: w.end
}));
// Get full text
const fullText = data.text;
// Process the text with existing logic
const processedBlocks = getProcessedBlocks(fullText);
// Map timestamps to processed blocks
const srtContent = generateSRT(processedBlocks, currentTranscriptWords);
// Display Results
outputText.value = srtContent; // Show SRT format in textarea or just text? Let's show SRT content so they can see timing
blockCount.textContent = processedBlocks.length;
currentSrtData = srtContent;
downloadSrtBtn.classList.remove('hidden');
showToast("Transcription & Alignment complete!");
} catch (error) {
console.error(error);
showToast(error.message, "error");
} finally {
transcribeBtn.disabled = false;
transcribingStatus.classList.add('hidden');
}
}
transcribeBtn.addEventListener('click', handleTranscription);
// --- SRT GENERATION ---
function formatSRTTime(seconds) {
const date = new Date(0);
date.setMilliseconds(seconds * 1000);
const isoString = date.toISOString();
// Extract HH:MM:SS,ms
return isoString.substr(11, 8) + ',' + isoString.substr(20, 3);
}
function generateSRT(blocks, words) {
let srtOutput = "";
let wordIndex = 0;
let blockIndex = 1;
// Normalize punctuation in blocks to match Whisper words (roughly)
// Whisper usually returns words without punctuation attached, or with basic punctuation
// Our script adds "!" for commas.
for (const block of blocks) {
// Split block into words (removing our added punctuation for matching)
// We need to reconstruct the text for display but match based on content
// Simple approach: Count words in the block
// Calculate how many whisper words correspond to this block text
const blockWords = block.replace(/[!?.,]/g, '').trim().split(/\s+/).filter(w => w.length > 0);
const numWords = blockWords.length;
if (numWords === 0) continue;
if (wordIndex >= words.length) break;
// Determine start and end time
// Start is the start of the first word in this chunk
const startTime = words[wordIndex].start;
// End is the end of the last word in this chunk
// Look ahead 'numWords - 1'
let endIndex = wordIndex + numWords - 1;
if (endIndex >= words.length) endIndex = words.length - 1;
const endTime = words[endIndex].end;
// Format Entry
srtOutput += `${blockIndex}\n`;
srtOutput += `${formatSRTTime(startTime)} --> ${formatSRTTime(endTime)}\n`;
srtOutput += `${block}\n\n`;
wordIndex += numWords;
blockIndex++;
}
return srtOutput;
}
downloadSrtBtn.addEventListener('click', () => {
if (!currentSrtData) return;
const blob = new Blob([currentSrtData], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'capcut_aligned.srt';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
showToast("SRT file downloaded!");
});
// --- UTILITIES ---
function showToast(message, type = 'success') {
const toast = document.createElement('div');
const bgColor = type === 'success' ? 'bg-green-500' : 'bg-red-500';
toast.className = `fixed bottom-5 right-5 ${bgColor} text-white px-6 py-3 rounded-lg shadow-lg flex items-center gap-2 z-50 toast`;
toast.innerHTML = `<i data-feather="${type === 'success' ? 'check-circle' : 'alert-circle'}"></i> ${message}`;
document.body.appendChild(toast);
feather.replace();
setTimeout(() => {
toast.classList.add('hiding');
toast.addEventListener('animationend', () => toast.remove());
}, 3000);
}
function cleanText(text) {
// 1. Remove duplicate spaces
let cleaned = text.replace(/\s+/g, ' ').trim();
// 2. Comma Transformation: Replace , with ! attached to previous word
cleaned = cleaned.replace(/,/g, '!');
// 3. Normalize multiple exclamation marks
cleaned = cleaned.replace(/!+/g, '!');
return cleaned;
}
function isWeakEnding(word) {
if (!word) return false;
// Remove trailing punctuation for the check, but usually in this flow
// the word carries the punctuation from the split logic.
// We check the alphanumeric part length.
const cleanWord = word.replace(/[!?.,]/g, '').toLowerCase();
// 2-3 Letter Ban
if (cleanWord.length <= 3) return true;
// Tabu Words
if (TABU_WORDS.has(cleanWord)) return true;
return false;
}
function countCharsNoSpaces(block) {
return block.replace(/\s/g, '').length;
}
// Separate the text processing logic to be reusable by both text and audio modes
function getProcessedBlocks(raw) {
const cleanedText = cleanText(raw);
const words = cleanedText.split(' ');
let lines = [];
let currentLine = [];
let currentLength = 0;
for (let i = 0; i < words.length; i++) {
let word = words[i];
let nextWord = words[i + 1] || '';
const endsWithPunctuation = /[!?]|(\.\.)/.test(word.slice(-1));
const isConnective = CONNECTIVES.has(word.toLowerCase().replace(/[!?.,]/g, ''));
let startNewLine = false;
if (isConnective && currentLine.length > 0) {
startNewLine = true;
}
const proposedLineStr = [...currentLine, word].join(' ');
const proposedLen = countCharsNoSpaces(proposedLineStr);
if (currentLine.length > 0 && proposedLen > MAX_CHARS) {
startNewLine = true;
}
if (startNewLine) {
lines.push(currentLine.join(' '));
currentLine = [word];
} else {
currentLine.push(word);
}
if (endsWithPunctuation) {
lines.push(currentLine.join(' '));
currentLine = [];
}
}
if (currentLine.length > 0) {
lines.push(currentLine.join(' '));
}
// Anti-Weakening
let changed = true;
let iterations = 0;
while (changed && iterations < 100) {
changed = false;
iterations++;
for (let i = 0; i < lines.length - 1; i++) {
const lineWords = lines[i].split(' ');
const lastWord = lineWords[lineWords.length - 1];
if (isWeakEnding(lastWord)) {
const remainingWords = lineWords.slice(0, lineWords.length - 1);
if (remainingWords.length === 0) {
lines[i] = lines[i+1];
} else {
lines[i] = remainingWords.join(' ');
}
const nextLineWords = lines[i+1].split(' ');
lines[i+1] = [lastWord, ...nextLineWords].join(' ');
if (lines[i].trim() === '') {
lines.splice(i, 1);
i--;
}
changed = true;
}
}
}
return lines.filter(l => l.trim().length > 0);
}
function processScript() {
const raw = inputText.value;
if (!raw.trim()) {
showToast("Please enter text to process.", "error");
return;
}
const lines = getProcessedBlocks(raw);
blockCount.textContent = lines.length;
showToast("Script processed successfully!");
}
// --- Event Listeners ---
const handleProcess = (e) => {
if(e) e.preventDefault();
processScript();
};
processBtn.addEventListener('click', handleProcess);
processBtnMobile.addEventListener('click', handleProcess);
copyBtn.addEventListener('click', () => {
if (!outputText.value) return;
navigator.clipboard.writeText(outputText.value).then(() => {
showToast("Copied to clipboard!");
});
});
clearBtn.addEventListener('click', () => {
inputText.value = '';
outputText.value = '';
blockCount.textContent = '0';
inputText.focus();
});
// Feather icons init backup
setTimeout(() => feather.replace(), 100);
});