Spaces:

eubottura
/

capcut-flow-wizard

Running

App Files Files Community

eubottura commited on Jan 25

Commit

85774d3

verified ·

1 Parent(s): 5921f34

Cara, e o imput do audio que vai ler com whipser, transcript e ainda vai gerar o arquivo srt pro capcuty, com o tempo exato perfeito, de cada bloco

Browse files

Files changed (3) hide show

index.html +51 -4
script.js +214 -55
style.css +16 -2

index.html CHANGED Viewed

@@ -88,17 +88,61 @@
     <!-- Main Content -->
     <main class="flex-grow container mx-auto px-4 py-8">
-        <header class="mb-10 text-center">
             <h1 class="text-4xl md:text-5xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-primary-400 to-secondary-400 mb-4">
                 Script Alignment Specialist
             </h1>
             <p class="text-slate-400 max-w-2xl mx-auto text-lg">
-                Transform your raw scripts into "breathable" SRT blocks optimized for CapCut.
                 Zero-error line breaks, strict character limits, and natural rhythm.
             </p>
         </header>
-        <div class="grid grid-cols-1 lg:grid-cols-2 gap-8 h-full">
             <!-- Input Section -->
             <section class="flex flex-col gap-4">
@@ -148,9 +192,12 @@
                          <button id="copy-btn" class="text-xs bg-slate-800 hover:bg-slate-700 text-white px-3 py-1 rounded flex items-center gap-1 transition-colors">
                             <i data-feather="copy" class="w-3 h-3"></i> Copy
                         </button>
                     </div>
                 </div>
-                <div class="glass-panel rounded-b-xl p-1 flex-grow relative">
                     <textarea id="output-text" readonly
                         class="w-full h-96 lg:h-[500px] bg-slate-900/50 text-primary-200 p-4 rounded-lg resize-none focus:outline-none mono-font text-sm leading-relaxed"
                         placeholder="Processed blocks will appear here..."></textarea>

     <!-- Main Content -->
     <main class="flex-grow container mx-auto px-4 py-8">
+        <header class="mb-8 text-center">
             <h1 class="text-4xl md:text-5xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-primary-400 to-secondary-400 mb-4">
                 Script Alignment Specialist
             </h1>
             <p class="text-slate-400 max-w-2xl mx-auto text-lg">
+                Transform your raw scripts or audio files into "breathable" SRT blocks optimized for CapCut.
                 Zero-error line breaks, strict character limits, and natural rhythm.
             </p>
         </header>
+        <!-- Mode Toggle -->
+        <div class="flex justify-center mb-8">
+            <div class="glass-panel p-1 rounded-xl inline-flex">
+                <button id="mode-text" class="px-6 py-2 rounded-lg text-sm font-semibold transition-all bg-slate-700 text-white shadow-md">
+                    Text Input
+                </button>
+                <button id="mode-audio" class="px-6 py-2 rounded-lg text-sm font-semibold transition-all text-slate-400 hover:text-white">
+                    Audio Transcribe
+                </button>
+            </div>
+        </div>
+        <!-- Audio Input Section (Hidden by default) -->
+        <section id="audio-section" class="hidden mb-8 glass-panel rounded-2xl p-6">
+            <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
+                <div>
+                    <label class="block text-sm font-medium text-slate-300 mb-2">OpenAI API Key</label>
+                    <input type="password" id="api-key" placeholder="sk-..."
+                        class="w-full bg-slate-900 border border-slate-700 rounded-lg px-4 py-3 text-white focus:outline-none focus:border-primary-500 transition-colors">
+                    <p class="text-xs text-slate-500 mt-2">Required for Whisper transcription. Your key is used locally and not stored.</p>
+                </div>
+                <div>
+                    <label class="block text-sm font-medium text-slate-300 mb-2">Upload Audio/Video</label>
+                    <div class="relative border-2 border-dashed border-slate-700 rounded-lg p-4 hover:border-primary-500 transition-colors cursor-pointer bg-slate-900/50">
+                        <input type="file" id="audio-file" accept="audio/*,video/*" class="absolute inset-0 w-full h-full opacity-0 cursor-pointer">
+                        <div class="text-center">
+                            <i data-feather="mic" class="w-8 h-8 mx-auto text-slate-500 mb-2"></i>
+                            <p class="text-sm text-slate-400" id="file-label">Click or drag audio file here</p>
+                        </div>
+                    </div>
+                </div>
+            </div>
+            <div class="mt-6 flex justify-end">
+                <button id="transcribe-btn" class="bg-primary-600 hover:bg-primary-500 text-white px-8 py-3 rounded-xl font-semibold shadow-lg flex items-center gap-2 transition-all disabled:opacity-50 disabled:cursor-not-allowed">
+                    <i data-feather="zap" class="w-5 h-5"></i> Transcribe & Process
+                </button>
+            </div>
+            <div id="transcribing-status" class="hidden mt-4">
+                <div class="flex items-center gap-3 text-primary-400">
+                    <i data-feather="loader" class="animate-spin w-5 h-5"></i>
+                    <span class="text-sm">Transcribing audio... this may take a moment.</span>
+                </div>
+            </div>
+        </section>
+<div class="grid grid-cols-1 lg:grid-cols-2 gap-8 h-full">
             <!-- Input Section -->
             <section class="flex flex-col gap-4">
                          <button id="copy-btn" class="text-xs bg-slate-800 hover:bg-slate-700 text-white px-3 py-1 rounded flex items-center gap-1 transition-colors">
                             <i data-feather="copy" class="w-3 h-3"></i> Copy
                         </button>
+                        <button id="download-srt-btn" class="hidden text-xs bg-primary-600 hover:bg-primary-500 text-white px-3 py-1 rounded flex items-center gap-1 transition-colors">
+                            <i data-feather="download" class="w-3 h-3"></i> .SRT
+                        </button>
                     </div>
                 </div>
+<div class="glass-panel rounded-b-xl p-1 flex-grow relative">
                     <textarea id="output-text" readonly
                         class="w-full h-96 lg:h-[500px] bg-slate-900/50 text-primary-200 p-4 rounded-lg resize-none focus:outline-none mono-font text-sm leading-relaxed"
                         placeholder="Processed blocks will appear here..."></textarea>

script.js CHANGED Viewed

@@ -1,4 +1,6 @@
 document.addEventListener('DOMContentLoaded', () => {
     const inputText = document.getElementById('input-text');
     const outputText = document.getElementById('output-text');
     const processBtn = document.getElementById('process-btn');
@@ -6,8 +8,25 @@ document.addEventListener('DOMContentLoaded', () => {
     const copyBtn = document.getElementById('copy-btn');
     const clearBtn = document.getElementById('clear-btn');
     const blockCount = document.getElementById('block-count');
-    // --- CONSTANTS ---
     const MAX_CHARS = 11;
     // Tabu words (Articles, Prepositions, Pronouns, Conjunctions)
@@ -23,11 +42,187 @@ document.addEventListener('DOMContentLoaded', () => {
     ]);
     const CONNECTIVES = new Set(['e', 'é', 'que', 'and', 'that', 'y']); // Must start new line
     // --- UTILITIES ---
     function showToast(message, type = 'success') {
-        const toast = document.createElement('div');
         const bgColor = type === 'success' ? 'bg-green-500' : 'bg-red-500';
         toast.className = `fixed bottom-5 right-5 ${bgColor} text-white px-6 py-3 rounded-lg shadow-lg flex items-center gap-2 z-50 toast`;
         toast.innerHTML = `<i data-feather="${type === 'success' ? 'check-circle' : 'alert-circle'}"></i> ${message}`;
@@ -73,18 +268,9 @@ document.addEventListener('DOMContentLoaded', () => {
         return block.replace(/\s/g, '').length;
     }
-    function processScript() {
-        const raw = inputText.value;
-        if (!raw.trim()) {
-            showToast("Please enter text to process.", "error");
-            return;
-        }
         const cleanedText = cleanText(raw);
-        // Step: Split into initial blocks based on rules
-        // We will build lines dynamically.
         const words = cleanedText.split(' ');
         let lines = [];
         let currentLine = [];
@@ -94,29 +280,18 @@ document.addEventListener('DOMContentLoaded', () => {
             let word = words[i];
             let nextWord = words[i + 1] || '';
-            // Rule: Split at Punctuation (!, ?, .)
-            // If word ends with !, ?, or .. -> End line immediately
-            const endsWithPunctuation = /[!?]|(\.\.)/.test(word.slice(-1)); // simplified check
-            // Rule: Connectives (E/É/QUE/AND/THAT) -> Start new line
             const isConnective = CONNECTIVES.has(word.toLowerCase().replace(/[!?.,]/g, ''));
-            // Logic: Should we start a new line?
             let startNewLine = false;
             if (isConnective && currentLine.length > 0) {
                 startNewLine = true;
             }
-            // Check length if we add this word to current line
-            // Calculate length of current line + space + word (without spaces in final count logic)
-            // But we need to build the string to check "No spaces" limit.
             const proposedLineStr = [...currentLine, word].join(' ');
             const proposedLen = countCharsNoSpaces(proposedLineStr);
-            // Length Enforcement (only if not single word line)
-            // Note: Single word exception allows > 11 chars.
-            // If currentLine is empty, we can take the word regardless of length (mostly).
             if (currentLine.length > 0 && proposedLen > MAX_CHARS) {
                 startNewLine = true;
             }
@@ -128,7 +303,6 @@ document.addEventListener('DOMContentLoaded', () => {
                 currentLine.push(word);
             }
-            // Punctuation Split (If word ends with punctuation, it forces a break *after* it)
             if (endsWithPunctuation) {
                  lines.push(currentLine.join(' '));
                  currentLine = [];
@@ -139,19 +313,8 @@ document.addEventListener('DOMContentLoaded', () => {
             lines.push(currentLine.join(' '));
         }
-        // --- Step: Anti-Weakening Correction ---
-        // We need to iterate and fix weak endings.
-        // A block is weak if the last word is <= 3 chars or Tabu.
-        // Correction: Move the last word to the beginning of the NEXT block.
-        // This is a delicate loop. We might need multiple passes or a specific algorithm.
-        // Let's try a pass from end to start or start to end.
-        // Since moving a word forward increases the length of the NEXT line, we might break that next line's length rule.
-        // The prompt says: "move the offending word to the beginning of the next block... even if it pushes the next block's length."
-        // So length is secondary to the anti-weakening rule.
         let changed = true;
-        // Safety break to prevent infinite loops
         let iterations = 0;
         while (changed && iterations < 100) {
@@ -163,30 +326,20 @@ document.addEventListener('DOMContentLoaded', () => {
                 const lastWord = lineWords[lineWords.length - 1];
                 if (isWeakEnding(lastWord)) {
-                    // Move lastWord to next line
                     const remainingWords = lineWords.slice(0, lineWords.length - 1);
                     if (remainingWords.length === 0) {
-                        // The line consists ONLY of this weak word.
-                        // It moves to the next line entirely.
-                        // This implies the previous line might now be weak?
-                        // But strictly, we just shift it down.
-                        lines[i] = lines[i+1]; // Pull next line up? No, that's messy.
-                        // Better: Merge this word into the start of next line.
-                        // If the current line is empty after removal, delete it.
                     } else {
                         lines[i] = remainingWords.join(' ');
                     }
-                    // Add to next line
-                    // We construct the next line: [movedWord] + [oldNextLineWords]
                     const nextLineWords = lines[i+1].split(' ');
                     lines[i+1] = [lastWord, ...nextLineWords].join(' ');
-                    // If current line became empty, remove it
                     if (lines[i].trim() === '') {
                         lines.splice(i, 1);
-                        i--; // adjust index
                     }
                     changed = true;
@@ -194,12 +347,18 @@ document.addEventListener('DOMContentLoaded', () => {
             }
         }
-        // Clean up any empty lines that might have been generated by edge cases
-        lines = lines.filter(l => l.trim().length > 0);
-        // Output
-        outputText.value = lines.join('\n');
-        blockCount.textContent = lines.length;
         showToast("Script processed successfully!");
     }

 document.addEventListener('DOMContentLoaded', () => {
+    // UI Elements
     const inputText = document.getElementById('input-text');
     const outputText = document.getElementById('output-text');
     const processBtn = document.getElementById('process-btn');
     const copyBtn = document.getElementById('copy-btn');
     const clearBtn = document.getElementById('clear-btn');
     const blockCount = document.getElementById('block-count');
+    const downloadSrtBtn = document.getElementById('download-srt-btn');
+    // Mode Toggle Elements
+    const modeTextBtn = document.getElementById('mode-text');
+    const modeAudioBtn = document.getElementById('mode-audio');
+    const audioSection = document.getElementById('audio-section');
+    const inputSection = document.querySelector('section'); // First section is input
+    // Audio Elements
+    const audioFileInput = document.getElementById('audio-file');
+    const apiKeyInput = document.getElementById('api-key');
+    const transcribeBtn = document.getElementById('transcribe-btn');
+    const fileLabel = document.getElementById('file-label');
+    const transcribingStatus = document.getElementById('transcribing-status');
+    // State
+    let currentSrtData = null;
+    let currentTranscriptWords = []; // Stores {word, start, end}
+// --- CONSTANTS ---
     const MAX_CHARS = 11;
     // Tabu words (Articles, Prepositions, Pronouns, Conjunctions)
     ]);
     const CONNECTIVES = new Set(['e', 'é', 'que', 'and', 'that', 'y']); // Must start new line
+    // --- MODE SWITCHING ---
+    modeTextBtn.addEventListener('click', () => {
+        modeTextBtn.classList.add('mode-active');
+        modeAudioBtn.classList.remove('mode-active');
+        audioSection.classList.add('hidden');
+        inputSection.classList.remove('opacity-50', 'pointer-events-none');
+        downloadSrtBtn.classList.add('hidden');
+        outputText.value = '';
+        blockCount.textContent = '0';
+        currentSrtData = null;
+    });
+    modeAudioBtn.addEventListener('click', () => {
+        modeAudioBtn.classList.add('mode-active');
+        modeTextBtn.classList.remove('mode-active');
+        audioSection.classList.remove('hidden');
+        // Optional: Disable manual text input when in audio mode to avoid confusion
+        inputSection.classList.add('opacity-50', 'pointer-events-none');
+    });
+    audioFileInput.addEventListener('change', (e) => {
+        if (e.target.files.length > 0) {
+            fileLabel.textContent = e.target.files[0].name;
+        } else {
+            fileLabel.textContent = 'Click or drag audio file here';
+        }
+    });
+    // --- AUDIO TRANSCRIPTION LOGIC ---
+    async function handleTranscription() {
+        const file = audioFileInput.files[0];
+        const apiKey = apiKeyInput.value.trim();
+        if (!file) {
+            showToast("Please select an audio or video file.", "error");
+            return;
+        }
+        if (!apiKey) {
+            showToast("Please enter your OpenAI API Key.", "error");
+            return;
+        }
+        // UI Loading State
+        transcribeBtn.disabled = true;
+        transcribingStatus.classList.remove('hidden');
+        try {
+            const formData = new FormData();
+            formData.append('file', file);
+            formData.append('model', 'whisper-1');
+            formData.append('response_format', 'verbose_json');
+            formData.append('timestamp_granularities', 'word');
+            const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
+                method: 'POST',
+                headers: {
+                    'Authorization': `Bearer ${apiKey}`
+                },
+                body: formData
+            });
+            if (!response.ok) {
+                const errData = await response.json();
+                throw new Error(errData.error?.message || 'Transcription failed');
+            }
+            const data = await response.json();
+            // Extract words with timestamps
+            // Whisper returns words array when timestamp_granularities is set
+            if (!data.words) {
+                throw new Error("No word-level timestamps returned. Check API plan.");
+            }
+            currentTranscriptWords = data.words.map(w => ({
+                word: w.word,
+                start: w.start,
+                end: w.end
+            }));
+            // Get full text
+            const fullText = data.text;
+            // Process the text with existing logic
+            const processedBlocks = getProcessedBlocks(fullText);
+            // Map timestamps to processed blocks
+            const srtContent = generateSRT(processedBlocks, currentTranscriptWords);
+            // Display Results
+            outputText.value = srtContent; // Show SRT format in textarea or just text? Let's show SRT content so they can see timing
+            blockCount.textContent = processedBlocks.length;
+            currentSrtData = srtContent;
+            downloadSrtBtn.classList.remove('hidden');
+            showToast("Transcription & Alignment complete!");
+        } catch (error) {
+            console.error(error);
+            showToast(error.message, "error");
+        } finally {
+            transcribeBtn.disabled = false;
+            transcribingStatus.classList.add('hidden');
+        }
+    }
+    transcribeBtn.addEventListener('click', handleTranscription);
+    // --- SRT GENERATION ---
+    function formatSRTTime(seconds) {
+        const date = new Date(0);
+        date.setMilliseconds(seconds * 1000);
+        const isoString = date.toISOString();
+        // Extract HH:MM:SS,ms
+        return isoString.substr(11, 8) + ',' + isoString.substr(20, 3);
+    }
+    function generateSRT(blocks, words) {
+        let srtOutput = "";
+        let wordIndex = 0;
+        let blockIndex = 1;
+        // Normalize punctuation in blocks to match Whisper words (roughly)
+        // Whisper usually returns words without punctuation attached, or with basic punctuation
+        // Our script adds "!" for commas.
+        for (const block of blocks) {
+            // Split block into words (removing our added punctuation for matching)
+            // We need to reconstruct the text for display but match based on content
+            // Simple approach: Count words in the block
+            // Calculate how many whisper words correspond to this block text
+            const blockWords = block.replace(/[!?.,]/g, '').trim().split(/\s+/).filter(w => w.length > 0);
+            const numWords = blockWords.length;
+            if (numWords === 0) continue;
+            if (wordIndex >= words.length) break;
+            // Determine start and end time
+            // Start is the start of the first word in this chunk
+            const startTime = words[wordIndex].start;
+            // End is the end of the last word in this chunk
+            // Look ahead 'numWords - 1'
+            let endIndex = wordIndex + numWords - 1;
+            if (endIndex >= words.length) endIndex = words.length - 1;
+            const endTime = words[endIndex].end;
+            // Format Entry
+            srtOutput += `${blockIndex}\n`;
+            srtOutput += `${formatSRTTime(startTime)} --> ${formatSRTTime(endTime)}\n`;
+            srtOutput += `${block}\n\n`;
+            wordIndex += numWords;
+            blockIndex++;
+        }
+        return srtOutput;
+    }
+    downloadSrtBtn.addEventListener('click', () => {
+        if (!currentSrtData) return;
+        const blob = new Blob([currentSrtData], { type: 'text/plain' });
+        const url = URL.createObjectURL(blob);
+        const a = document.createElement('a');
+        a.href = url;
+        a.download = 'capcut_aligned.srt';
+        document.body.appendChild(a);
+        a.click();
+        document.body.removeChild(a);
+        URL.revokeObjectURL(url);
+        showToast("SRT file downloaded!");
+    });
     // --- UTILITIES ---
     function showToast(message, type = 'success') {
+const toast = document.createElement('div');
         const bgColor = type === 'success' ? 'bg-green-500' : 'bg-red-500';
         toast.className = `fixed bottom-5 right-5 ${bgColor} text-white px-6 py-3 rounded-lg shadow-lg flex items-center gap-2 z-50 toast`;
         toast.innerHTML = `<i data-feather="${type === 'success' ? 'check-circle' : 'alert-circle'}"></i> ${message}`;
         return block.replace(/\s/g, '').length;
     }
+    // Separate the text processing logic to be reusable by both text and audio modes
+    function getProcessedBlocks(raw) {
         const cleanedText = cleanText(raw);
         const words = cleanedText.split(' ');
         let lines = [];
         let currentLine = [];
             let word = words[i];
             let nextWord = words[i + 1] || '';
+            const endsWithPunctuation = /[!?]|(\.\.)/.test(word.slice(-1));
             const isConnective = CONNECTIVES.has(word.toLowerCase().replace(/[!?.,]/g, ''));
             let startNewLine = false;
             if (isConnective && currentLine.length > 0) {
                 startNewLine = true;
             }
             const proposedLineStr = [...currentLine, word].join(' ');
             const proposedLen = countCharsNoSpaces(proposedLineStr);
             if (currentLine.length > 0 && proposedLen > MAX_CHARS) {
                 startNewLine = true;
             }
                 currentLine.push(word);
             }
             if (endsWithPunctuation) {
                  lines.push(currentLine.join(' '));
                  currentLine = [];
             lines.push(currentLine.join(' '));
         }
+        // Anti-Weakening
         let changed = true;
         let iterations = 0;
         while (changed && iterations < 100) {
                 const lastWord = lineWords[lineWords.length - 1];
                 if (isWeakEnding(lastWord)) {
                     const remainingWords = lineWords.slice(0, lineWords.length - 1);
                     if (remainingWords.length === 0) {
+                        lines[i] = lines[i+1];
                     } else {
                         lines[i] = remainingWords.join(' ');
                     }
                     const nextLineWords = lines[i+1].split(' ');
                     lines[i+1] = [lastWord, ...nextLineWords].join(' ');
                     if (lines[i].trim() === '') {
                         lines.splice(i, 1);
+                        i--;
                     }
                     changed = true;
             }
         }
+        return lines.filter(l => l.trim().length > 0);
+    }
+    function processScript() {
+const raw = inputText.value;
+        if (!raw.trim()) {
+            showToast("Please enter text to process.", "error");
+            return;
+        }
+        const lines = getProcessedBlocks(raw);
+blockCount.textContent = lines.length;
         showToast("Script processed successfully!");
     }

style.css CHANGED Viewed

@@ -1,6 +1,5 @@
 /* Base styles are handled by Tailwind CSS in the head */
 /* Custom overrides are handled in index.html <style> */
 /* Animation for Toast Notification */
 @keyframes slideIn {
     from { transform: translateY(100%); opacity: 0; }
@@ -12,10 +11,25 @@
     to { opacity: 0; }
 }
 .toast {
     animation: slideIn 0.3s ease-out forwards;
 }
 .toast.hiding {
     animation: fadeOut 0.3s ease-in forwards;
-}

 /* Base styles are handled by Tailwind CSS in the head */
 /* Custom overrides are handled in index.html <style> */
 /* Animation for Toast Notification */
 @keyframes slideIn {
     from { transform: translateY(100%); opacity: 0; }
     to { opacity: 0; }
 }
+@keyframes spin {
+    to { transform: rotate(360deg); }
+}
 .toast {
     animation: slideIn 0.3s ease-out forwards;
 }
 .toast.hiding {
     animation: fadeOut 0.3s ease-in forwards;
+}
+.animate-spin {
+    animation: spin 1s linear infinite;
+}
+/* Mode Toggle Active State */
+.mode-active {
+    background-color: #334155; /* Slate 700 */
+    color: white;
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+}