eubottura commited on
Commit
b73afeb
·
verified ·
1 Parent(s): 4b0c0e4

🐳 07/02 - 04:42 - mas assim cara, eu devo mandar o json, precisamente, sem precisar passar pela opção de transcrever o audio que enviar, mas que deve separar do jeito que estao nas regras

Browse files
Files changed (2) hide show
  1. index.html +30 -1
  2. script.js +145 -4
index.html CHANGED
@@ -82,6 +82,27 @@
82
  <div class="lg:col-span-1 space-y-6">
83
  <upload-zone></upload-zone>
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  <div class="bg-slate-900/80 border border-slate-800 rounded-2xl p-5 shadow-xl backdrop-blur-md">
86
  <h3 class="text-lg font-semibold text-slate-200 mb-4 flex items-center gap-2">
87
  <i data-feather="folder" class="w-5 h-5 text-primary-400"></i>
@@ -204,10 +225,18 @@
204
 
205
  <!-- Tab Content: SRT -->
206
  <div id="tab-srt" class="tab-content hidden p-6 space-y-6">
 
 
 
 
 
 
 
 
207
  <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
208
  <div class="space-y-2">
209
  <label class="text-sm font-medium text-slate-300">Modelo Whisper</label>
210
- <select id="whisper-model" class="w-full bg-slate-800 border border-slate-700 rounded-lg px-4 py-2 text-slate-200 outline-none">
211
  <option value="small">Small (balanceado)</option>
212
  <option value="medium">Medium (preciso)</option>
213
  <option value="tiny">Tiny (rápido)</option>
 
82
  <div class="lg:col-span-1 space-y-6">
83
  <upload-zone></upload-zone>
84
 
85
+ <!-- JSON Upload Zone -->
86
+ <div class="bg-slate-900/80 border border-slate-800 rounded-2xl p-5 shadow-xl backdrop-blur-md">
87
+ <h3 class="text-lg font-semibold text-slate-200 mb-4 flex items-center gap-2">
88
+ <i data-feather="code" class="w-5 h-5 text-secondary-400"></i>
89
+ Upload JSON de Timestamps
90
+ </h3>
91
+ <div id="json-upload-zone" class="border-2 border-dashed border-slate-700 rounded-xl p-4 text-center transition-all duration-300 bg-slate-800/50 hover:border-secondary-500/50 hover:bg-secondary-500/5 cursor-pointer">
92
+ <input type="file" id="json-input" accept=".json" class="hidden">
93
+ <i data-feather="upload" class="w-6 h-6 text-slate-500 mx-auto mb-2"></i>
94
+ <p class="text-sm text-slate-400">Arraste ou clique para enviar JSON</p>
95
+ <p class="text-xs text-slate-600 mt-1">Formato: array com text, start_time, end_time</p>
96
+ </div>
97
+ <div id="json-status" class="mt-3 hidden">
98
+ <div class="flex items-center gap-2 text-sm text-emerald-400">
99
+ <i data-feather="check-circle" class="w-4 h-4"></i>
100
+ <span id="json-filename">carregado.json</span>
101
+ </div>
102
+ <p class="text-xs text-slate-500 mt-1" id="json-words-count">0 palavras</p>
103
+ </div>
104
+ </div>
105
+
106
  <div class="bg-slate-900/80 border border-slate-800 rounded-2xl p-5 shadow-xl backdrop-blur-md">
107
  <h3 class="text-lg font-semibold text-slate-200 mb-4 flex items-center gap-2">
108
  <i data-feather="folder" class="w-5 h-5 text-primary-400"></i>
 
225
 
226
  <!-- Tab Content: SRT -->
227
  <div id="tab-srt" class="tab-content hidden p-6 space-y-6">
228
+ <div class="flex items-center gap-3 p-3 bg-secondary-900/20 border border-secondary-700/30 rounded-lg mb-4">
229
+ <input type="checkbox" id="use-json-timestamps" class="w-4 h-4 rounded border-slate-600 text-secondary-500 focus:ring-secondary-500 bg-slate-700">
230
+ <div>
231
+ <label for="use-json-timestamps" class="text-sm font-medium text-slate-200 cursor-pointer">Usar JSON de Timestamps carregado</label>
232
+ <p class="text-xs text-slate-500">Pula a transcrição e usa os timestamps do JSON enviado</p>
233
+ </div>
234
+ </div>
235
+
236
  <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
237
  <div class="space-y-2">
238
  <label class="text-sm font-medium text-slate-300">Modelo Whisper</label>
239
+ <select id="whisper-model" class="w-full bg-slate-800 border border-slate-700 rounded-lg px-4 py-2 text-slate-200 outline-none" ${''}>
240
  <option value="small">Small (balanceado)</option>
241
  <option value="medium">Medium (preciso)</option>
242
  <option value="tiny">Tiny (rápido)</option>
script.js CHANGED
@@ -32,6 +32,7 @@ class AudioPipeline {
32
  this.currentAudioBuffer = null;
33
  this.audioPlayer = null;
34
  this.isPlaying = false;
 
35
 
36
  this.init();
37
  }
@@ -80,6 +81,10 @@ class AudioPipeline {
80
  document.addEventListener('files-uploaded', (e) => this.handleFiles(e.detail.files));
81
  document.addEventListener('file-removed', (e) => this.removeFile(e.detail.index));
82
 
 
 
 
 
83
  // Keyboard shortcuts
84
  document.addEventListener('keydown', (e) => {
85
  if (e.ctrlKey && e.key === 'Enter') {
@@ -323,6 +328,81 @@ formatFileSize(bytes) {
323
  clean: cleanText.toLowerCase()
324
  };
325
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  async startProcessing() {
327
  if (this.files.length === 0) {
328
  this.log('Nenhum arquivo para processar', 'error');
@@ -334,6 +414,14 @@ formatFileSize(bytes) {
334
  this.log('Insira o roteiro/texto para alinhamento', 'warning');
335
  return;
336
  }
 
 
 
 
 
 
 
 
337
 
338
  this.isProcessing = true;
339
  this.updateStatus('Processando...', 'processing');
@@ -356,9 +444,15 @@ formatFileSize(bytes) {
356
  const processedAudio = await this.removeSilence(fileData.buffer);
357
  this.processedBuffers.set(fileData.name, processedAudio);
358
 
359
- // 2. Transcrição real com Whisper (Hugging Face)
360
- this.log('Etapa 2/5: Transcrevendo com Whisper AI...', 'info');
361
- const transcript = await this.transcribeWithWhisper(processedAudio.blob);
 
 
 
 
 
 
362
  this.transcriptions.set(fileData.name, transcript);
363
 
364
  // 3. Divisão inteligente do roteiro
@@ -604,6 +698,53 @@ formatFileSize(bytes) {
604
  }
605
  }
606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  generateRealisticTranscript(audioBlob) {
608
  // Gera segmentos realistas baseados na duração do áudio
609
  const duration = audioBlob.size / 16000; // estimativa aproximada
@@ -784,7 +925,7 @@ formatFileSize(bytes) {
784
  // Gera JSON com timestamps palavra-a-palavra
785
  const wordTimestamps = [];
786
 
787
- // Se tiver transcript com palavras individuais, usa ele
788
  if (transcript && transcript.words) {
789
  transcript.words.forEach(word => {
790
  wordTimestamps.push({
 
32
  this.currentAudioBuffer = null;
33
  this.audioPlayer = null;
34
  this.isPlaying = false;
35
+ this.jsonTimestamps = null; // Armazena timestamps do JSON carregado
36
 
37
  this.init();
38
  }
 
81
  document.addEventListener('files-uploaded', (e) => this.handleFiles(e.detail.files));
82
  document.addEventListener('file-removed', (e) => this.removeFile(e.detail.index));
83
 
84
+ // JSON upload handling
85
+ this.setupJSONUpload();
86
+ }
87
+
88
  // Keyboard shortcuts
89
  document.addEventListener('keydown', (e) => {
90
  if (e.ctrlKey && e.key === 'Enter') {
 
328
  clean: cleanText.toLowerCase()
329
  };
330
  }
331
+
332
+ setupJSONUpload() {
333
+ const jsonZone = document.getElementById('json-upload-zone');
334
+ const jsonInput = document.getElementById('json-input');
335
+ const jsonStatus = document.getElementById('json-status');
336
+ const jsonFilename = document.getElementById('json-filename');
337
+ const jsonWordsCount = document.getElementById('json-words-count');
338
+
339
+ jsonZone.addEventListener('click', () => jsonInput.click());
340
+
341
+ jsonInput.addEventListener('change', async (e) => {
342
+ const file = e.target.files[0];
343
+ if (!file) return;
344
+
345
+ try {
346
+ const text = await file.text();
347
+ const data = JSON.parse(text);
348
+
349
+ // Valida formato do JSON
350
+ if (!Array.isArray(data)) {
351
+ throw new Error('JSON deve ser um array');
352
+ }
353
+
354
+ // Valida estrutura dos itens
355
+ const valid = data.every(item =>
356
+ item.hasOwnProperty('text') &&
357
+ item.hasOwnProperty('start_time') &&
358
+ item.hasOwnProperty('end_time')
359
+ );
360
+
361
+ if (!valid) {
362
+ throw new Error('Cada item deve ter: text, start_time, end_time');
363
+ }
364
+
365
+ this.jsonTimestamps = data;
366
+
367
+ // Atualiza UI
368
+ jsonStatus.classList.remove('hidden');
369
+ jsonFilename.textContent = file.name;
370
+ jsonWordsCount.textContent = `${data.length} palavras carregadas`;
371
+ jsonZone.classList.add('border-secondary-500/50', 'bg-secondary-500/10');
372
+
373
+ this.log(`JSON carregado: ${data.length} palavras`, 'success');
374
+
375
+ } catch (error) {
376
+ this.log(`Erro ao carregar JSON: ${error.message}`, 'error');
377
+ this.jsonTimestamps = null;
378
+ jsonStatus.classList.add('hidden');
379
+ jsonZone.classList.remove('border-secondary-500/50', 'bg-secondary-500/10');
380
+ }
381
+ });
382
+
383
+ // Drag and drop para JSON
384
+ jsonZone.addEventListener('dragover', (e) => {
385
+ e.preventDefault();
386
+ jsonZone.classList.add('border-secondary-500', 'bg-secondary-500/20');
387
+ });
388
+
389
+ jsonZone.addEventListener('dragleave', () => {
390
+ jsonZone.classList.remove('border-secondary-500', 'bg-secondary-500/20');
391
+ });
392
+
393
+ jsonZone.addEventListener('drop', (e) => {
394
+ e.preventDefault();
395
+ jsonZone.classList.remove('border-secondary-500', 'bg-secondary-500/20');
396
+
397
+ const file = e.dataTransfer.files[0];
398
+ if (file && file.name.endsWith('.json')) {
399
+ const dataTransfer = new DataTransfer();
400
+ dataTransfer.items.add(file);
401
+ jsonInput.files = dataTransfer.files;
402
+ jsonInput.dispatchEvent(new Event('change'));
403
+ }
404
+ });
405
+ }
406
  async startProcessing() {
407
  if (this.files.length === 0) {
408
  this.log('Nenhum arquivo para processar', 'error');
 
414
  this.log('Insira o roteiro/texto para alinhamento', 'warning');
415
  return;
416
  }
417
+
418
+ // Verifica se deve usar JSON de timestamps
419
+ const useJSON = document.getElementById('use-json-timestamps').checked;
420
+
421
+ if (useJSON && !this.jsonTimestamps) {
422
+ this.log('Marque "Usar JSON" e carregue um arquivo JSON primeiro', 'warning');
423
+ return;
424
+ }
425
 
426
  this.isProcessing = true;
427
  this.updateStatus('Processando...', 'processing');
 
444
  const processedAudio = await this.removeSilence(fileData.buffer);
445
  this.processedBuffers.set(fileData.name, processedAudio);
446
 
447
+ // 2. Usa JSON ou transcreve com Whisper
448
+ let transcript;
449
+ if (useJSON && this.jsonTimestamps) {
450
+ this.log('Etapa 2/5: Usando JSON de timestamps carregado...', 'info');
451
+ transcript = this.createTranscriptFromJSON(this.jsonTimestamps);
452
+ } else {
453
+ this.log('Etapa 2/5: Transcrevendo com Whisper AI...', 'info');
454
+ transcript = await this.transcribeWithWhisper(processedAudio.blob);
455
+ }
456
  this.transcriptions.set(fileData.name, transcript);
457
 
458
  // 3. Divisão inteligente do roteiro
 
698
  }
699
  }
700
 
701
+ createTranscriptFromJSON(jsonTimestamps) {
702
+ // Cria objeto de transcript a partir do JSON carregado
703
+ const segments = [];
704
+ const words = jsonTimestamps.map(item => ({
705
+ word: item.text,
706
+ start: item.start_time,
707
+ end: item.end_time
708
+ }));
709
+
710
+ // Agrupa palavras em segmentos (3-6 palavras por segmento)
711
+ let segmentStart = null;
712
+ let segmentWords = [];
713
+
714
+ words.forEach((w, idx) => {
715
+ if (segmentStart === null) segmentStart = w.start;
716
+ segmentWords.push(w.word);
717
+
718
+ if (segmentWords.length >= 3 + Math.floor(Math.random() * 4)) {
719
+ segments.push({
720
+ start: segmentStart,
721
+ end: w.end,
722
+ text: segmentWords.join(' '),
723
+ words: words.filter((_, i) => i >= idx - segmentWords.length + 1 && i <= idx)
724
+ });
725
+ segmentStart = null;
726
+ segmentWords = [];
727
+ }
728
+ });
729
+
730
+ // Adiciona último segmento
731
+ if (segmentWords.length > 0) {
732
+ const lastWords = words.slice(-segmentWords.length);
733
+ segments.push({
734
+ start: lastWords[0].start,
735
+ end: lastWords[lastWords.length - 1].end,
736
+ text: segmentWords.join(' '),
737
+ words: lastWords
738
+ });
739
+ }
740
+
741
+ return {
742
+ segments,
743
+ text: segments.map(s => s.text).join(' '),
744
+ words: words
745
+ };
746
+ }
747
+
748
  generateRealisticTranscript(audioBlob) {
749
  // Gera segmentos realistas baseados na duração do áudio
750
  const duration = audioBlob.size / 16000; // estimativa aproximada
 
925
  // Gera JSON com timestamps palavra-a-palavra
926
  const wordTimestamps = [];
927
 
928
+ // Se tiver transcript com palavras individuais (do JSON carregado ou transcrição), usa ele
929
  if (transcript && transcript.words) {
930
  transcript.words.forEach(word => {
931
  wordTimestamps.push({