Spaces:
Running
Running
🐳 07/02 - 04:42 - mas assim cara, eu devo mandar o json, precisamente, sem precisar passar pela opção de transcrever o audio que enviar, mas que deve separar do jeito que estao nas regras
Browse files- index.html +30 -1
- script.js +145 -4
index.html
CHANGED
|
@@ -82,6 +82,27 @@
|
|
| 82 |
<div class="lg:col-span-1 space-y-6">
|
| 83 |
<upload-zone></upload-zone>
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
<div class="bg-slate-900/80 border border-slate-800 rounded-2xl p-5 shadow-xl backdrop-blur-md">
|
| 86 |
<h3 class="text-lg font-semibold text-slate-200 mb-4 flex items-center gap-2">
|
| 87 |
<i data-feather="folder" class="w-5 h-5 text-primary-400"></i>
|
|
@@ -204,10 +225,18 @@
|
|
| 204 |
|
| 205 |
<!-- Tab Content: SRT -->
|
| 206 |
<div id="tab-srt" class="tab-content hidden p-6 space-y-6">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
<div class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
| 208 |
<div class="space-y-2">
|
| 209 |
<label class="text-sm font-medium text-slate-300">Modelo Whisper</label>
|
| 210 |
-
<select id="whisper-model" class="w-full bg-slate-800 border border-slate-700 rounded-lg px-4 py-2 text-slate-200 outline-none">
|
| 211 |
<option value="small">Small (balanceado)</option>
|
| 212 |
<option value="medium">Medium (preciso)</option>
|
| 213 |
<option value="tiny">Tiny (rápido)</option>
|
|
|
|
| 82 |
<div class="lg:col-span-1 space-y-6">
|
| 83 |
<upload-zone></upload-zone>
|
| 84 |
|
| 85 |
+
<!-- JSON Upload Zone -->
|
| 86 |
+
<div class="bg-slate-900/80 border border-slate-800 rounded-2xl p-5 shadow-xl backdrop-blur-md">
|
| 87 |
+
<h3 class="text-lg font-semibold text-slate-200 mb-4 flex items-center gap-2">
|
| 88 |
+
<i data-feather="code" class="w-5 h-5 text-secondary-400"></i>
|
| 89 |
+
Upload JSON de Timestamps
|
| 90 |
+
</h3>
|
| 91 |
+
<div id="json-upload-zone" class="border-2 border-dashed border-slate-700 rounded-xl p-4 text-center transition-all duration-300 bg-slate-800/50 hover:border-secondary-500/50 hover:bg-secondary-500/5 cursor-pointer">
|
| 92 |
+
<input type="file" id="json-input" accept=".json" class="hidden">
|
| 93 |
+
<i data-feather="upload" class="w-6 h-6 text-slate-500 mx-auto mb-2"></i>
|
| 94 |
+
<p class="text-sm text-slate-400">Arraste ou clique para enviar JSON</p>
|
| 95 |
+
<p class="text-xs text-slate-600 mt-1">Formato: array com text, start_time, end_time</p>
|
| 96 |
+
</div>
|
| 97 |
+
<div id="json-status" class="mt-3 hidden">
|
| 98 |
+
<div class="flex items-center gap-2 text-sm text-emerald-400">
|
| 99 |
+
<i data-feather="check-circle" class="w-4 h-4"></i>
|
| 100 |
+
<span id="json-filename">carregado.json</span>
|
| 101 |
+
</div>
|
| 102 |
+
<p class="text-xs text-slate-500 mt-1" id="json-words-count">0 palavras</p>
|
| 103 |
+
</div>
|
| 104 |
+
</div>
|
| 105 |
+
|
| 106 |
<div class="bg-slate-900/80 border border-slate-800 rounded-2xl p-5 shadow-xl backdrop-blur-md">
|
| 107 |
<h3 class="text-lg font-semibold text-slate-200 mb-4 flex items-center gap-2">
|
| 108 |
<i data-feather="folder" class="w-5 h-5 text-primary-400"></i>
|
|
|
|
| 225 |
|
| 226 |
<!-- Tab Content: SRT -->
|
| 227 |
<div id="tab-srt" class="tab-content hidden p-6 space-y-6">
|
| 228 |
+
<div class="flex items-center gap-3 p-3 bg-secondary-900/20 border border-secondary-700/30 rounded-lg mb-4">
|
| 229 |
+
<input type="checkbox" id="use-json-timestamps" class="w-4 h-4 rounded border-slate-600 text-secondary-500 focus:ring-secondary-500 bg-slate-700">
|
| 230 |
+
<div>
|
| 231 |
+
<label for="use-json-timestamps" class="text-sm font-medium text-slate-200 cursor-pointer">Usar JSON de Timestamps carregado</label>
|
| 232 |
+
<p class="text-xs text-slate-500">Pula a transcrição e usa os timestamps do JSON enviado</p>
|
| 233 |
+
</div>
|
| 234 |
+
</div>
|
| 235 |
+
|
| 236 |
<div class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
| 237 |
<div class="space-y-2">
|
| 238 |
<label class="text-sm font-medium text-slate-300">Modelo Whisper</label>
|
| 239 |
+
<select id="whisper-model" class="w-full bg-slate-800 border border-slate-700 rounded-lg px-4 py-2 text-slate-200 outline-none" ${''}>
|
| 240 |
<option value="small">Small (balanceado)</option>
|
| 241 |
<option value="medium">Medium (preciso)</option>
|
| 242 |
<option value="tiny">Tiny (rápido)</option>
|
script.js
CHANGED
|
@@ -32,6 +32,7 @@ class AudioPipeline {
|
|
| 32 |
this.currentAudioBuffer = null;
|
| 33 |
this.audioPlayer = null;
|
| 34 |
this.isPlaying = false;
|
|
|
|
| 35 |
|
| 36 |
this.init();
|
| 37 |
}
|
|
@@ -80,6 +81,10 @@ class AudioPipeline {
|
|
| 80 |
document.addEventListener('files-uploaded', (e) => this.handleFiles(e.detail.files));
|
| 81 |
document.addEventListener('file-removed', (e) => this.removeFile(e.detail.index));
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
// Keyboard shortcuts
|
| 84 |
document.addEventListener('keydown', (e) => {
|
| 85 |
if (e.ctrlKey && e.key === 'Enter') {
|
|
@@ -323,6 +328,81 @@ formatFileSize(bytes) {
|
|
| 323 |
clean: cleanText.toLowerCase()
|
| 324 |
};
|
| 325 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
async startProcessing() {
|
| 327 |
if (this.files.length === 0) {
|
| 328 |
this.log('Nenhum arquivo para processar', 'error');
|
|
@@ -334,6 +414,14 @@ formatFileSize(bytes) {
|
|
| 334 |
this.log('Insira o roteiro/texto para alinhamento', 'warning');
|
| 335 |
return;
|
| 336 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
|
| 338 |
this.isProcessing = true;
|
| 339 |
this.updateStatus('Processando...', 'processing');
|
|
@@ -356,9 +444,15 @@ formatFileSize(bytes) {
|
|
| 356 |
const processedAudio = await this.removeSilence(fileData.buffer);
|
| 357 |
this.processedBuffers.set(fileData.name, processedAudio);
|
| 358 |
|
| 359 |
-
// 2.
|
| 360 |
-
|
| 361 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
this.transcriptions.set(fileData.name, transcript);
|
| 363 |
|
| 364 |
// 3. Divisão inteligente do roteiro
|
|
@@ -604,6 +698,53 @@ formatFileSize(bytes) {
|
|
| 604 |
}
|
| 605 |
}
|
| 606 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 607 |
generateRealisticTranscript(audioBlob) {
|
| 608 |
// Gera segmentos realistas baseados na duração do áudio
|
| 609 |
const duration = audioBlob.size / 16000; // estimativa aproximada
|
|
@@ -784,7 +925,7 @@ formatFileSize(bytes) {
|
|
| 784 |
// Gera JSON com timestamps palavra-a-palavra
|
| 785 |
const wordTimestamps = [];
|
| 786 |
|
| 787 |
-
// Se tiver transcript com palavras individuais, usa ele
|
| 788 |
if (transcript && transcript.words) {
|
| 789 |
transcript.words.forEach(word => {
|
| 790 |
wordTimestamps.push({
|
|
|
|
| 32 |
this.currentAudioBuffer = null;
|
| 33 |
this.audioPlayer = null;
|
| 34 |
this.isPlaying = false;
|
| 35 |
+
this.jsonTimestamps = null; // Armazena timestamps do JSON carregado
|
| 36 |
|
| 37 |
this.init();
|
| 38 |
}
|
|
|
|
| 81 |
document.addEventListener('files-uploaded', (e) => this.handleFiles(e.detail.files));
|
| 82 |
document.addEventListener('file-removed', (e) => this.removeFile(e.detail.index));
|
| 83 |
|
| 84 |
+
// JSON upload handling
|
| 85 |
+
this.setupJSONUpload();
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
// Keyboard shortcuts
|
| 89 |
document.addEventListener('keydown', (e) => {
|
| 90 |
if (e.ctrlKey && e.key === 'Enter') {
|
|
|
|
| 328 |
clean: cleanText.toLowerCase()
|
| 329 |
};
|
| 330 |
}
|
| 331 |
+
|
| 332 |
+
setupJSONUpload() {
|
| 333 |
+
const jsonZone = document.getElementById('json-upload-zone');
|
| 334 |
+
const jsonInput = document.getElementById('json-input');
|
| 335 |
+
const jsonStatus = document.getElementById('json-status');
|
| 336 |
+
const jsonFilename = document.getElementById('json-filename');
|
| 337 |
+
const jsonWordsCount = document.getElementById('json-words-count');
|
| 338 |
+
|
| 339 |
+
jsonZone.addEventListener('click', () => jsonInput.click());
|
| 340 |
+
|
| 341 |
+
jsonInput.addEventListener('change', async (e) => {
|
| 342 |
+
const file = e.target.files[0];
|
| 343 |
+
if (!file) return;
|
| 344 |
+
|
| 345 |
+
try {
|
| 346 |
+
const text = await file.text();
|
| 347 |
+
const data = JSON.parse(text);
|
| 348 |
+
|
| 349 |
+
// Valida formato do JSON
|
| 350 |
+
if (!Array.isArray(data)) {
|
| 351 |
+
throw new Error('JSON deve ser um array');
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
// Valida estrutura dos itens
|
| 355 |
+
const valid = data.every(item =>
|
| 356 |
+
item.hasOwnProperty('text') &&
|
| 357 |
+
item.hasOwnProperty('start_time') &&
|
| 358 |
+
item.hasOwnProperty('end_time')
|
| 359 |
+
);
|
| 360 |
+
|
| 361 |
+
if (!valid) {
|
| 362 |
+
throw new Error('Cada item deve ter: text, start_time, end_time');
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
this.jsonTimestamps = data;
|
| 366 |
+
|
| 367 |
+
// Atualiza UI
|
| 368 |
+
jsonStatus.classList.remove('hidden');
|
| 369 |
+
jsonFilename.textContent = file.name;
|
| 370 |
+
jsonWordsCount.textContent = `${data.length} palavras carregadas`;
|
| 371 |
+
jsonZone.classList.add('border-secondary-500/50', 'bg-secondary-500/10');
|
| 372 |
+
|
| 373 |
+
this.log(`JSON carregado: ${data.length} palavras`, 'success');
|
| 374 |
+
|
| 375 |
+
} catch (error) {
|
| 376 |
+
this.log(`Erro ao carregar JSON: ${error.message}`, 'error');
|
| 377 |
+
this.jsonTimestamps = null;
|
| 378 |
+
jsonStatus.classList.add('hidden');
|
| 379 |
+
jsonZone.classList.remove('border-secondary-500/50', 'bg-secondary-500/10');
|
| 380 |
+
}
|
| 381 |
+
});
|
| 382 |
+
|
| 383 |
+
// Drag and drop para JSON
|
| 384 |
+
jsonZone.addEventListener('dragover', (e) => {
|
| 385 |
+
e.preventDefault();
|
| 386 |
+
jsonZone.classList.add('border-secondary-500', 'bg-secondary-500/20');
|
| 387 |
+
});
|
| 388 |
+
|
| 389 |
+
jsonZone.addEventListener('dragleave', () => {
|
| 390 |
+
jsonZone.classList.remove('border-secondary-500', 'bg-secondary-500/20');
|
| 391 |
+
});
|
| 392 |
+
|
| 393 |
+
jsonZone.addEventListener('drop', (e) => {
|
| 394 |
+
e.preventDefault();
|
| 395 |
+
jsonZone.classList.remove('border-secondary-500', 'bg-secondary-500/20');
|
| 396 |
+
|
| 397 |
+
const file = e.dataTransfer.files[0];
|
| 398 |
+
if (file && file.name.endsWith('.json')) {
|
| 399 |
+
const dataTransfer = new DataTransfer();
|
| 400 |
+
dataTransfer.items.add(file);
|
| 401 |
+
jsonInput.files = dataTransfer.files;
|
| 402 |
+
jsonInput.dispatchEvent(new Event('change'));
|
| 403 |
+
}
|
| 404 |
+
});
|
| 405 |
+
}
|
| 406 |
async startProcessing() {
|
| 407 |
if (this.files.length === 0) {
|
| 408 |
this.log('Nenhum arquivo para processar', 'error');
|
|
|
|
| 414 |
this.log('Insira o roteiro/texto para alinhamento', 'warning');
|
| 415 |
return;
|
| 416 |
}
|
| 417 |
+
|
| 418 |
+
// Verifica se deve usar JSON de timestamps
|
| 419 |
+
const useJSON = document.getElementById('use-json-timestamps').checked;
|
| 420 |
+
|
| 421 |
+
if (useJSON && !this.jsonTimestamps) {
|
| 422 |
+
this.log('Marque "Usar JSON" e carregue um arquivo JSON primeiro', 'warning');
|
| 423 |
+
return;
|
| 424 |
+
}
|
| 425 |
|
| 426 |
this.isProcessing = true;
|
| 427 |
this.updateStatus('Processando...', 'processing');
|
|
|
|
| 444 |
const processedAudio = await this.removeSilence(fileData.buffer);
|
| 445 |
this.processedBuffers.set(fileData.name, processedAudio);
|
| 446 |
|
| 447 |
+
// 2. Usa JSON ou transcreve com Whisper
|
| 448 |
+
let transcript;
|
| 449 |
+
if (useJSON && this.jsonTimestamps) {
|
| 450 |
+
this.log('Etapa 2/5: Usando JSON de timestamps carregado...', 'info');
|
| 451 |
+
transcript = this.createTranscriptFromJSON(this.jsonTimestamps);
|
| 452 |
+
} else {
|
| 453 |
+
this.log('Etapa 2/5: Transcrevendo com Whisper AI...', 'info');
|
| 454 |
+
transcript = await this.transcribeWithWhisper(processedAudio.blob);
|
| 455 |
+
}
|
| 456 |
this.transcriptions.set(fileData.name, transcript);
|
| 457 |
|
| 458 |
// 3. Divisão inteligente do roteiro
|
|
|
|
| 698 |
}
|
| 699 |
}
|
| 700 |
|
| 701 |
+
createTranscriptFromJSON(jsonTimestamps) {
|
| 702 |
+
// Cria objeto de transcript a partir do JSON carregado
|
| 703 |
+
const segments = [];
|
| 704 |
+
const words = jsonTimestamps.map(item => ({
|
| 705 |
+
word: item.text,
|
| 706 |
+
start: item.start_time,
|
| 707 |
+
end: item.end_time
|
| 708 |
+
}));
|
| 709 |
+
|
| 710 |
+
// Agrupa palavras em segmentos (3-6 palavras por segmento)
|
| 711 |
+
let segmentStart = null;
|
| 712 |
+
let segmentWords = [];
|
| 713 |
+
|
| 714 |
+
words.forEach((w, idx) => {
|
| 715 |
+
if (segmentStart === null) segmentStart = w.start;
|
| 716 |
+
segmentWords.push(w.word);
|
| 717 |
+
|
| 718 |
+
if (segmentWords.length >= 3 + Math.floor(Math.random() * 4)) {
|
| 719 |
+
segments.push({
|
| 720 |
+
start: segmentStart,
|
| 721 |
+
end: w.end,
|
| 722 |
+
text: segmentWords.join(' '),
|
| 723 |
+
words: words.filter((_, i) => i >= idx - segmentWords.length + 1 && i <= idx)
|
| 724 |
+
});
|
| 725 |
+
segmentStart = null;
|
| 726 |
+
segmentWords = [];
|
| 727 |
+
}
|
| 728 |
+
});
|
| 729 |
+
|
| 730 |
+
// Adiciona último segmento
|
| 731 |
+
if (segmentWords.length > 0) {
|
| 732 |
+
const lastWords = words.slice(-segmentWords.length);
|
| 733 |
+
segments.push({
|
| 734 |
+
start: lastWords[0].start,
|
| 735 |
+
end: lastWords[lastWords.length - 1].end,
|
| 736 |
+
text: segmentWords.join(' '),
|
| 737 |
+
words: lastWords
|
| 738 |
+
});
|
| 739 |
+
}
|
| 740 |
+
|
| 741 |
+
return {
|
| 742 |
+
segments,
|
| 743 |
+
text: segments.map(s => s.text).join(' '),
|
| 744 |
+
words: words
|
| 745 |
+
};
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
generateRealisticTranscript(audioBlob) {
|
| 749 |
// Gera segmentos realistas baseados na duração do áudio
|
| 750 |
const duration = audioBlob.size / 16000; // estimativa aproximada
|
|
|
|
| 925 |
// Gera JSON com timestamps palavra-a-palavra
|
| 926 |
const wordTimestamps = [];
|
| 927 |
|
| 928 |
+
// Se tiver transcript com palavras individuais (do JSON carregado ou transcrição), usa ele
|
| 929 |
if (transcript && transcript.words) {
|
| 930 |
transcript.words.forEach(word => {
|
| 931 |
wordTimestamps.push({
|