Spaces:

eubottura
/

capcut-flow-wizard

Running

App Files Files Community

capcut-flow-wizard / script.js

eubottura's picture

Cara, e o imput do audio que vai ler com whipser, transcript e ainda vai gerar o arquivo srt pro capcuty, com o tempo exato perfeito, de cada bloco

85774d3 verified 16 days ago

history blame contribute delete

14.1 kB


	document.addEventListener('DOMContentLoaded', () => {
	// UI Elements
	const inputText = document.getElementById('input-text');
	const outputText = document.getElementById('output-text');
	const processBtn = document.getElementById('process-btn');
	const processBtnMobile = document.getElementById('process-btn-mobile');
	const copyBtn = document.getElementById('copy-btn');
	const clearBtn = document.getElementById('clear-btn');
	const blockCount = document.getElementById('block-count');
	const downloadSrtBtn = document.getElementById('download-srt-btn');

	// Mode Toggle Elements
	const modeTextBtn = document.getElementById('mode-text');
	const modeAudioBtn = document.getElementById('mode-audio');
	const audioSection = document.getElementById('audio-section');
	const inputSection = document.querySelector('section'); // First section is input

	// Audio Elements
	const audioFileInput = document.getElementById('audio-file');
	const apiKeyInput = document.getElementById('api-key');
	const transcribeBtn = document.getElementById('transcribe-btn');
	const fileLabel = document.getElementById('file-label');
	const transcribingStatus = document.getElementById('transcribing-status');

	// State
	let currentSrtData = null;
	let currentTranscriptWords = []; // Stores {word, start, end}
	// --- CONSTANTS ---
	const MAX_CHARS = 11;

	// Tabu words (Articles, Prepositions, Pronouns, Conjunctions)
	const TABU_WORDS = new Set([
	// PT
	'de', 'do', 'da', 'em', 'no', 'na', 'por', 'para', 'com', 'ao', 'a', 'os', 'as',
	'e', 'ou', 'mas', 'que', 'se', 'como', 'porque', 'quando', 'então', 'o', 'um', 'uma',
	'nos', 'nas', 'me', 'te', 'não', 'só', 'já', 'também', 'tipo', 'nosso', 'nossa',
	// EN
	'a', 'an', 'the', 'of', 'in', 'on', 'at', 'to', 'for', 'with', 'from', 'by', 'and', 'or', 'but',
	// ES
	'el', 'la', 'los', 'las', 'un', 'una', 'de', 'en', 'a', 'por', 'para', 'con', 'y', 'o', 'pero', 'que'
	]);

	const CONNECTIVES = new Set(['e', 'é', 'que', 'and', 'that', 'y']); // Must start new line
	// --- MODE SWITCHING ---
	modeTextBtn.addEventListener('click', () => {
	modeTextBtn.classList.add('mode-active');
	modeAudioBtn.classList.remove('mode-active');
	audioSection.classList.add('hidden');
	inputSection.classList.remove('opacity-50', 'pointer-events-none');
	downloadSrtBtn.classList.add('hidden');
	outputText.value = '';
	blockCount.textContent = '0';
	currentSrtData = null;
	});

	modeAudioBtn.addEventListener('click', () => {
	modeAudioBtn.classList.add('mode-active');
	modeTextBtn.classList.remove('mode-active');
	audioSection.classList.remove('hidden');
	// Optional: Disable manual text input when in audio mode to avoid confusion
	inputSection.classList.add('opacity-50', 'pointer-events-none');
	});

	audioFileInput.addEventListener('change', (e) => {
	if (e.target.files.length > 0) {
	fileLabel.textContent = e.target.files[0].name;
	} else {
	fileLabel.textContent = 'Click or drag audio file here';
	}
	});

	// --- AUDIO TRANSCRIPTION LOGIC ---

	async function handleTranscription() {
	const file = audioFileInput.files[0];
	const apiKey = apiKeyInput.value.trim();

	if (!file) {
	showToast("Please select an audio or video file.", "error");
	return;
	}
	if (!apiKey) {
	showToast("Please enter your OpenAI API Key.", "error");
	return;
	}

	// UI Loading State
	transcribeBtn.disabled = true;
	transcribingStatus.classList.remove('hidden');

	try {
	const formData = new FormData();
	formData.append('file', file);
	formData.append('model', 'whisper-1');
	formData.append('response_format', 'verbose_json');
	formData.append('timestamp_granularities', 'word');

	const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
	method: 'POST',
	headers: {
	'Authorization': `Bearer ${apiKey}`
	},
	body: formData
	});

	if (!response.ok) {
	const errData = await response.json();
	throw new Error(errData.error?.message \|\| 'Transcription failed');
	}

	const data = await response.json();

	// Extract words with timestamps
	// Whisper returns words array when timestamp_granularities is set
	if (!data.words) {
	throw new Error("No word-level timestamps returned. Check API plan.");
	}

	currentTranscriptWords = data.words.map(w => ({
	word: w.word,
	start: w.start,
	end: w.end
	}));

	// Get full text
	const fullText = data.text;

	// Process the text with existing logic
	const processedBlocks = getProcessedBlocks(fullText);

	// Map timestamps to processed blocks
	const srtContent = generateSRT(processedBlocks, currentTranscriptWords);

	// Display Results
	outputText.value = srtContent; // Show SRT format in textarea or just text? Let's show SRT content so they can see timing
	blockCount.textContent = processedBlocks.length;
	currentSrtData = srtContent;
	downloadSrtBtn.classList.remove('hidden');

	showToast("Transcription & Alignment complete!");

	} catch (error) {
	console.error(error);
	showToast(error.message, "error");
	} finally {
	transcribeBtn.disabled = false;
	transcribingStatus.classList.add('hidden');
	}
	}

	transcribeBtn.addEventListener('click', handleTranscription);

	// --- SRT GENERATION ---

	function formatSRTTime(seconds) {
	const date = new Date(0);
	date.setMilliseconds(seconds * 1000);
	const isoString = date.toISOString();
	// Extract HH:MM:SS,ms
	return isoString.substr(11, 8) + ',' + isoString.substr(20, 3);
	}

	function generateSRT(blocks, words) {
	let srtOutput = "";
	let wordIndex = 0;
	let blockIndex = 1;

	// Normalize punctuation in blocks to match Whisper words (roughly)
	// Whisper usually returns words without punctuation attached, or with basic punctuation
	// Our script adds "!" for commas.

	for (const block of blocks) {
	// Split block into words (removing our added punctuation for matching)
	// We need to reconstruct the text for display but match based on content

	// Simple approach: Count words in the block
	// Calculate how many whisper words correspond to this block text
	const blockWords = block.replace(/[!?.,]/g, '').trim().split(/\s+/).filter(w => w.length > 0);
	const numWords = blockWords.length;

	if (numWords === 0) continue;

	if (wordIndex >= words.length) break;

	// Determine start and end time
	// Start is the start of the first word in this chunk
	const startTime = words[wordIndex].start;

	// End is the end of the last word in this chunk
	// Look ahead 'numWords - 1'
	let endIndex = wordIndex + numWords - 1;
	if (endIndex >= words.length) endIndex = words.length - 1;
	const endTime = words[endIndex].end;

	// Format Entry
	srtOutput += `${blockIndex}\n`;
	srtOutput += `${formatSRTTime(startTime)} --> ${formatSRTTime(endTime)}\n`;
	srtOutput += `${block}\n\n`;

	wordIndex += numWords;
	blockIndex++;
	}

	return srtOutput;
	}

	downloadSrtBtn.addEventListener('click', () => {
	if (!currentSrtData) return;
	const blob = new Blob([currentSrtData], { type: 'text/plain' });
	const url = URL.createObjectURL(blob);
	const a = document.createElement('a');
	a.href = url;
	a.download = 'capcut_aligned.srt';
	document.body.appendChild(a);
	a.click();
	document.body.removeChild(a);
	URL.revokeObjectURL(url);
	showToast("SRT file downloaded!");
	});

	// --- UTILITIES ---

	function showToast(message, type = 'success') {
	const toast = document.createElement('div');
	const bgColor = type === 'success' ? 'bg-green-500' : 'bg-red-500';
	toast.className = `fixed bottom-5 right-5 ${bgColor} text-white px-6 py-3 rounded-lg shadow-lg flex items-center gap-2 z-50 toast`;
	toast.innerHTML = `<i data-feather="${type === 'success' ? 'check-circle' : 'alert-circle'}"></i> ${message}`;
	document.body.appendChild(toast);
	feather.replace();

	setTimeout(() => {
	toast.classList.add('hiding');
	toast.addEventListener('animationend', () => toast.remove());
	}, 3000);
	}

	function cleanText(text) {
	// 1. Remove duplicate spaces
	let cleaned = text.replace(/\s+/g, ' ').trim();

	// 2. Comma Transformation: Replace , with ! attached to previous word
	cleaned = cleaned.replace(/,/g, '!');

	// 3. Normalize multiple exclamation marks
	cleaned = cleaned.replace(/!+/g, '!');

	return cleaned;
	}

	function isWeakEnding(word) {
	if (!word) return false;
	// Remove trailing punctuation for the check, but usually in this flow
	// the word carries the punctuation from the split logic.
	// We check the alphanumeric part length.
	const cleanWord = word.replace(/[!?.,]/g, '').toLowerCase();

	// 2-3 Letter Ban
	if (cleanWord.length <= 3) return true;

	// Tabu Words
	if (TABU_WORDS.has(cleanWord)) return true;

	return false;
	}

	function countCharsNoSpaces(block) {
	return block.replace(/\s/g, '').length;
	}

	// Separate the text processing logic to be reusable by both text and audio modes
	function getProcessedBlocks(raw) {
	const cleanedText = cleanText(raw);
	const words = cleanedText.split(' ');
	let lines = [];
	let currentLine = [];
	let currentLength = 0;

	for (let i = 0; i < words.length; i++) {
	let word = words[i];
	let nextWord = words[i + 1] \|\| '';

	const endsWithPunctuation = /[!?]\|(\.\.)/.test(word.slice(-1));
	const isConnective = CONNECTIVES.has(word.toLowerCase().replace(/[!?.,]/g, ''));

	let startNewLine = false;

	if (isConnective && currentLine.length > 0) {
	startNewLine = true;
	}

	const proposedLineStr = [...currentLine, word].join(' ');
	const proposedLen = countCharsNoSpaces(proposedLineStr);

	if (currentLine.length > 0 && proposedLen > MAX_CHARS) {
	startNewLine = true;
	}

	if (startNewLine) {
	lines.push(currentLine.join(' '));
	currentLine = [word];
	} else {
	currentLine.push(word);
	}

	if (endsWithPunctuation) {
	lines.push(currentLine.join(' '));
	currentLine = [];
	}
	}

	if (currentLine.length > 0) {
	lines.push(currentLine.join(' '));
	}

	// Anti-Weakening
	let changed = true;
	let iterations = 0;

	while (changed && iterations < 100) {
	changed = false;
	iterations++;

	for (let i = 0; i < lines.length - 1; i++) {
	const lineWords = lines[i].split(' ');
	const lastWord = lineWords[lineWords.length - 1];

	if (isWeakEnding(lastWord)) {
	const remainingWords = lineWords.slice(0, lineWords.length - 1);

	if (remainingWords.length === 0) {
	lines[i] = lines[i+1];
	} else {
	lines[i] = remainingWords.join(' ');
	}

	const nextLineWords = lines[i+1].split(' ');
	lines[i+1] = [lastWord, ...nextLineWords].join(' ');

	if (lines[i].trim() === '') {
	lines.splice(i, 1);
	i--;
	}

	changed = true;
	}
	}
	}

	return lines.filter(l => l.trim().length > 0);
	}

	function processScript() {
	const raw = inputText.value;
	if (!raw.trim()) {
	showToast("Please enter text to process.", "error");
	return;
	}

	const lines = getProcessedBlocks(raw);
	blockCount.textContent = lines.length;

	showToast("Script processed successfully!");
	}

	// --- Event Listeners ---

	const handleProcess = (e) => {
	if(e) e.preventDefault();
	processScript();
	};

	processBtn.addEventListener('click', handleProcess);
	processBtnMobile.addEventListener('click', handleProcess);

	copyBtn.addEventListener('click', () => {
	if (!outputText.value) return;
	navigator.clipboard.writeText(outputText.value).then(() => {
	showToast("Copied to clipboard!");
	});
	});

	clearBtn.addEventListener('click', () => {
	inputText.value = '';
	outputText.value = '';
	blockCount.textContent = '0';
	inputText.focus();
	});

	// Feather icons init backup
	setTimeout(() => feather.replace(), 100);
	});