GitHub Action
Sync from GitHub: 6e5a10f61469fc6c08b276cfd8f9fe1819d4dfc4
6a1d8ac
<script>
import {
Play,
Loader2,
AlertCircle,
Copy,
Share,
MoreHorizontal,
Settings,
Pause,
Layout,
Code,
X,
RotateCcw,
Mic,
Square,
Shuffle,
} from 'lucide-svelte';
import { onMount } from 'svelte';
// Voice cloning state
let isRecording = false;
let sessionRecordings = [];
let selectedRecording = null;
let recordingWaveform = [];
let mediaRecorder = null;
let audioChunks = [];
let showErrorModal = false;
let errorMessage = '';
let errorDetails = '';
let historyCount = 0;
let recordingProgress = 0;
let recordingTimer = null;
let audioAmplitude = 0;
let audioContext = null;
let analyser = null;
let playingRecording = null;
let currentAudio = null;
let transcriptionStatus = {}; // Store transcription results for each recording
let isTranscribing = false;
let isUploading = false;
let cloneName = '';
let uploadResults = {}; // Store upload results for each recording
let successMessage = '';
let userVoices = []; // Store user's existing voices
// Sample text rotation state
const sampleTexts = [
"There's a quiet kind of magic in the early hours of the morning, when the world is still half-asleep and the air feels crisp with possibility. The hum of the refrigerator becomes a rhythm, the ticking of the clock a heartbeat, and for a brief moment, everything feels perfectly in sync.",
'The aroma of fresh coffee dances through the kitchen as sunlight streams through translucent curtains, casting golden patterns on weathered wooden floors. Steam rises from the ceramic mug like incense, creating a small sanctuary of warmth and comfort in the midst of a busy day.',
'Ocean waves crash against weathered cliffs with relentless determination, their white foam reaching toward the endless sky. Seabirds call out across the salt-scented breeze, their cries echoing off ancient stone formations that have stood witness to countless storms and seasons.',
];
let currentSampleIndex = 0;
// Authentication state
let isLoggedIn = false;
let showLoginPrompt = false;
// Validation helper functions
function isRecordingLongEnough(recording) {
return recording && recording.duration >= 15;
}
function hasConsent(recording) {
if (!recording) return false;
const transcription = transcriptionStatus[recording.id];
return transcription && transcription.consent_detected;
}
function showError(message, details = '') {
errorMessage = message;
errorDetails = details;
showErrorModal = true;
}
function closeErrorModal() {
showErrorModal = false;
errorMessage = '';
errorDetails = '';
}
function shuffleSampleText() {
currentSampleIndex = (currentSampleIndex + 1) % sampleTexts.length;
}
function handleAuthAction() {
// Get OAuth config and redirect to HuggingFace OAuth
const clientId = '4831a493-1dbc-4dd4-9bb3-c3b41d2e96ba';
const scopes = 'inference-api manage-repos';
// Store current path to return to after auth
const returnPath = window.location.pathname;
// Determine the correct callback URL based on environment
let redirectUri;
if (window.location.hostname === 'localhost') {
// Development: use backend port for callback
redirectUri = 'http://localhost:7860/auth/callback';
} else {
// Production: use current origin
redirectUri = `${window.location.origin}/auth/callback`;
}
const authUrl = `https://huggingface.co/oauth/authorize?client_id=${clientId}&redirect_uri=${encodeURIComponent(redirectUri)}&scope=${encodeURIComponent(scopes)}&response_type=code&state=${encodeURIComponent(returnPath)}`;
window.location.href = authUrl;
}
async function checkAuthStatus() {
try {
const response = await fetch('/api/auth/user', { credentials: 'include' });
if (response.ok) {
const data = await response.json();
const wasLoggedIn = isLoggedIn;
isLoggedIn = data.authenticated;
// If login status changed to logged in, load voices
if (isLoggedIn && !wasLoggedIn) {
await loadUserVoices();
} else if (!isLoggedIn && wasLoggedIn) {
// Clear voices when logged out
userVoices = [];
}
} else {
isLoggedIn = false;
userVoices = [];
}
} catch (error) {
isLoggedIn = false;
userVoices = [];
}
}
// Voice cloning functions
async function startRecording() {
// Check authentication before recording
if (!isLoggedIn) {
showLoginPrompt = true;
return;
}
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// Try to use MP3 format if supported, otherwise use WebM or default
let options = {};
if (MediaRecorder.isTypeSupported('audio/mp3')) {
options.mimeType = 'audio/mp3';
} else if (MediaRecorder.isTypeSupported('audio/mpeg')) {
options.mimeType = 'audio/mpeg';
} else if (MediaRecorder.isTypeSupported('audio/webm;codecs=opus')) {
options.mimeType = 'audio/webm;codecs=opus';
} else if (MediaRecorder.isTypeSupported('audio/webm')) {
options.mimeType = 'audio/webm';
}
mediaRecorder = new MediaRecorder(stream, options);
audioChunks = [];
recordingWaveform = [];
recordingProgress = 0;
audioAmplitude = 0;
// Set up audio context for amplitude detection
audioContext = new (window.AudioContext || window.webkitAudioContext)();
analyser = audioContext.createAnalyser();
const source = audioContext.createMediaStreamSource(stream);
source.connect(analyser);
analyser.fftSize = 256;
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
mediaRecorder.ondataavailable = (event) => {
audioChunks.push(event.data);
};
mediaRecorder.onstop = () => {
const audioBlob = new Blob(audioChunks, { type: options.mimeType || 'audio/webm' });
const audioUrl = URL.createObjectURL(audioBlob);
const recording = {
id: Date.now(),
url: audioUrl,
blob: audioBlob,
timestamp: new Date(),
duration: (recordingProgress / 100) * 15,
};
sessionRecordings = [...sessionRecordings, recording];
stream.getTracks().forEach((track) => track.stop());
if (audioContext) {
audioContext.close();
audioContext = null;
}
recordingProgress = 0;
audioAmplitude = 0;
};
mediaRecorder.start();
isRecording = true;
// Function to update amplitude
function updateAmplitude() {
if (!isRecording || !analyser) return;
analyser.getByteFrequencyData(dataArray);
let sum = 0;
for (let i = 0; i < bufferLength; i++) {
sum += dataArray[i];
}
audioAmplitude = sum / bufferLength / 255; // Normalize to 0-1
requestAnimationFrame(updateAmplitude);
}
updateAmplitude();
// Start timer for 15-second progress
recordingTimer = setInterval(() => {
if (!isRecording) {
clearInterval(recordingTimer);
return;
}
recordingProgress += 100 / 15 / 10; // 100% over 15 seconds, updated every 100ms
if (recordingProgress >= 100) {
recordingProgress = 100;
// Auto-stop after 15 seconds if desired
// stopRecording();
}
}, 100);
} catch (error) {
console.error('Error accessing microphone:', error);
showError('Microphone Error', 'Could not access microphone. Please check permissions.');
}
}
function stopRecording() {
if (mediaRecorder && mediaRecorder.state === 'recording') {
mediaRecorder.stop();
isRecording = false;
recordingWaveform = [];
if (recordingTimer) {
clearInterval(recordingTimer);
recordingTimer = null;
}
}
}
function toggleRecording() {
if (isRecording) {
stopRecording();
} else {
startRecording();
}
}
function selectRecording(recording) {
selectedRecording = recording;
}
function togglePlayRecording(recording) {
// If this recording is currently playing, pause it
if (playingRecording?.id === recording.id && currentAudio && !currentAudio.paused) {
currentAudio.pause();
playingRecording = null;
return;
}
// Stop any currently playing audio
if (currentAudio) {
currentAudio.pause();
currentAudio = null;
}
// Start playing the new recording
currentAudio = new Audio(recording.url);
playingRecording = recording;
currentAudio.addEventListener('ended', () => {
playingRecording = null;
currentAudio = null;
});
currentAudio.addEventListener('pause', () => {
if (currentAudio && currentAudio.ended) {
playingRecording = null;
currentAudio = null;
}
});
currentAudio.play();
}
async function cloneVoice() {
if (!selectedRecording) {
showError('Clone Error', 'Please select a recording.');
return;
}
isTranscribing = true;
successMessage = '';
try {
// Create FormData to send the audio file
const formData = new FormData();
formData.append('audio_file', selectedRecording.blob, 'recording.mp3');
const response = await fetch('/api/voice/transcribe', {
method: 'POST',
credentials: 'include',
body: formData,
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`HTTP error! status: ${response.status}, response: ${errorText}`);
}
const result = await response.json();
if (result.success) {
// Store transcription results
transcriptionStatus[selectedRecording.id] = {
transcript: result.transcript,
first_words: result.first_words,
consent_detected: result.consent_detected,
};
// Force reactivity update
transcriptionStatus = { ...transcriptionStatus };
// Check if the recording meets criteria for upload
const meetsLengthCriteria = isRecordingLongEnough(selectedRecording);
const meetsConsentCriteria = result.consent_detected;
if (meetsLengthCriteria && meetsConsentCriteria) {
// Upload to HuggingFace dataset
isTranscribing = false;
isUploading = true;
try {
const uploadFormData = new FormData();
uploadFormData.append('audio_file', selectedRecording.blob, 'recording.mp3');
const voiceName = `Voice_${Date.now()}`;
const transcript = encodeURIComponent(result.transcript);
const duration = Math.floor(selectedRecording.duration);
const uploadResponse = await fetch(
`/api/voice/upload?voice_name=${voiceName}&transcript=${transcript}&duration=${duration}`,
{
method: 'POST',
credentials: 'include',
body: uploadFormData,
}
);
if (!uploadResponse.ok) {
const errorText = await uploadResponse.text();
throw new Error(`Upload failed: ${errorText}`);
}
const uploadResult = await uploadResponse.json();
if (uploadResult.success) {
// Store upload results
uploadResults[selectedRecording.id] = uploadResult;
uploadResults = { ...uploadResults };
successMessage = `Your voice has been saved to a temporary URL for 24 hours and will be automatically deleted. You can now use it for text-to-speech generation.`;
// Reload user voices to show the new voice
await loadUserVoices();
} else {
showError('Upload Error', uploadResult.error || 'Failed to upload voice');
}
} catch (uploadError) {
showError('Upload Error', `Failed to upload voice: ${uploadError.message}`);
} finally {
isUploading = false;
}
}
} else {
showError('Transcription Error', result.error || 'Failed to transcribe audio');
}
} catch (error) {
showError('Network Error', `Failed to process recording: ${error.message}`);
} finally {
if (!isUploading) {
isTranscribing = false;
}
}
}
async function loadHistoryCount() {
try {
const response = await fetch('/api/history/load', {
method: 'GET',
credentials: 'include',
});
if (response.ok) {
const data = await response.json();
const generationEntries = data.entries.filter((e) => e.entry_type === 'generation');
historyCount = generationEntries.length;
}
} catch (error) {
console.error('Error loading history count:', error);
historyCount = 0;
}
}
async function loadUserVoices() {
if (!isLoggedIn) return;
try {
const response = await fetch('/api/voice/user-voices', {
method: 'GET',
credentials: 'include',
});
if (response.ok) {
const data = await response.json();
userVoices = data.voices;
}
} catch (error) {
console.error('Error loading user voices:', error);
userVoices = [];
}
}
onMount(async () => {
await checkAuthStatus();
await loadHistoryCount();
await loadUserVoices();
});
</script>
<svelte:head>
<title>Voice Cloning - HFStudio</title>
</svelte:head>
<div class="flex flex-col h-full">
<div class="flex-1 flex">
<!-- Main content area -->
<div class="flex-1 flex flex-col p-6">
<!-- Script reading area -->
<div class="flex-1 pb-24 relative flex flex-col">
<!-- Script text -->
<div class="relative mb-4">
<div class="absolute top-3 left-3 flex items-center gap-2 z-10">
<span class="text-sm text-gray-400">Sample script to read</span>
<button
on:click={shuffleSampleText}
class="p-1 text-gray-400 hover:text-gray-600 hover:bg-gray-100 rounded-lg transition-colors"
title="Try a different sample text"
>
<Shuffle size={16} />
</button>
</div>
<div
class="w-full h-80 pt-12 px-6 pb-6 bg-white border-2 border-amber-400 rounded-lg text-gray-900 text-lg leading-relaxed overflow-y-auto"
>
<p>
<span class="bg-yellow-50 px-2 py-1 rounded border border-amber-200"
>I consent to cloning my voice.</span
>
{sampleTexts[currentSampleIndex]}
</p>
</div>
</div>
<!-- Record button -->
<div class="flex justify-center items-center flex-1 relative">
<!-- Concentric circles for amplitude visualization -->
{#if isRecording}
<div class="absolute inset-0 flex items-center justify-center">
<div
class="absolute rounded-full border-2 border-orange-300 transition-all duration-75"
style="width: {120 + audioAmplitude * 120}px; height: {120 +
audioAmplitude * 120}px; opacity: {0.4 + audioAmplitude * 0.6}"
></div>
<div
class="absolute rounded-full border-2 border-orange-200 transition-all duration-100"
style="width: {150 + audioAmplitude * 150}px; height: {150 +
audioAmplitude * 150}px; opacity: {0.3 + audioAmplitude * 0.5}"
></div>
<div
class="absolute rounded-full border-1 border-orange-100 transition-all duration-125"
style="width: {180 + audioAmplitude * 180}px; height: {180 +
audioAmplitude * 180}px; opacity: {0.2 + audioAmplitude * 0.4}"
></div>
</div>
{/if}
<button
on:click={toggleRecording}
class="w-24 h-24 rounded-full flex items-center justify-center transition-all duration-200 shadow-lg relative overflow-hidden z-20 cursor-pointer {isRecording
? 'border-4 border-orange-500 bg-transparent'
: 'bg-orange-500 hover:bg-orange-600'}"
>
{#if isRecording}
<!-- Filling effect -->
<div
class="absolute bottom-0 left-0 right-0 bg-orange-500 transition-all duration-100 ease-linear rounded-full"
style="height: {recordingProgress}%"
></div>
<Square
size={36}
class="{recordingProgress >= 100 ? 'text-white' : 'text-orange-700'} relative z-10"
/>
{:else}
<Mic size={36} class="text-white" />
{/if}
</button>
</div>
<div class="mb-6">
<p class="text-center">
<em
>Record your voice for at least 15 seconds to create a voice clone. To prevent
unauthorized voice cloning, you must start by clearly saying "I consent to cloning my
voice" — the rest of the text is arbitrary. Try reading the sample above.</em
>
</p>
</div>
</div>
</div>
<!-- Right panel for voice cloning -->
<div class="w-80 border-l border-gray-200 bg-white h-full overflow-hidden">
<div class="p-3 h-full overflow-y-auto">
{#if !isLoggedIn && showLoginPrompt}
<!-- Login prompt message -->
<div
class="mb-3 px-3 py-2 bg-gradient-to-r from-amber-50 to-orange-50 rounded-lg border border-amber-200 relative"
>
<!-- Close button -->
<button
on:click={() => (showLoginPrompt = false)}
class="absolute top-2 right-2 text-gray-400 hover:text-gray-600 transition-colors"
aria-label="Dismiss"
>
<svg class="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="M6 18L18 6M6 6l12 12"
/>
</svg>
</button>
<p class="text-sm font-medium text-gray-700 mb-1 pr-4">
Hugging Face <span
class="bg-gradient-to-r from-purple-500 via-pink-500 via-green-500 to-blue-500 bg-clip-text text-transparent font-bold"
>PRO</span
>
</p>
<p class="text-sm text-gray-600 pr-4">
Sign in to with your Hugging Face <a
href="https://huggingface.co/pro"
target="_blank"
class="text-amber-600 hover:text-amber-700 underline font-medium">PRO account</a
> to get started with $2 of free API credits per month. You can add a billing method for
additional pay-as-you-go usage ⤴
</p>
</div>
{/if}
<div class="mb-4">
{#if sessionRecordings.length === 0}
<div class="text-center py-8 text-gray-500">
<Mic size={32} class="mx-auto mb-2 opacity-30" />
<p class="text-sm">Pick a recording to clone</p>
<p class="text-xs text-gray-400">No recordings yet</p>
</div>
{:else}
<div class="space-y-2">
{#each sessionRecordings as recording, i}
<div
class="border rounded-lg p-3 {selectedRecording?.id === recording.id
? 'border-amber-300 bg-amber-50'
: 'border-gray-200 hover:border-gray-300'} transition-colors"
>
<div class="flex items-center justify-between mb-2">
<span class="text-sm font-medium text-gray-900">Recording {i + 1}</span>
<button
on:click={() => togglePlayRecording(recording)}
class="p-1 hover:bg-gray-100 rounded transition-colors"
>
{#if playingRecording?.id === recording.id && currentAudio && !currentAudio.paused}
<Pause size={14} class="text-gray-600" />
{:else}
<Play size={14} class="text-gray-600" />
{/if}
</button>
</div>
<div class="text-xs text-gray-500 mb-2">
{recording.timestamp.toLocaleTimeString()}
</div>
<button
on:click={() => selectRecording(recording)}
class="w-full text-xs px-2 py-1 rounded {selectedRecording?.id === recording.id
? 'bg-amber-200 text-amber-800'
: 'bg-gray-100 text-gray-700 hover:bg-gray-200'} transition-colors"
>
{selectedRecording?.id === recording.id ? 'Selected' : 'Select for cloning'}
</button>
</div>
{/each}
</div>
{/if}
</div>
<!-- Clone section -->
<div class="mt-6 pt-4 border-t border-gray-200">
<button
on:click={cloneVoice}
disabled={!selectedRecording || isTranscribing || isUploading}
class="w-full px-4 py-2 bg-gradient-to-r from-amber-400 to-orange-500 text-white rounded-lg font-medium hover:from-amber-500 hover:to-orange-600 disabled:opacity-50 disabled:cursor-not-allowed transition-colors flex items-center justify-center gap-2"
>
{#if isTranscribing}
<Loader2 size={16} class="animate-spin" />
Transcribing...
{:else if isUploading}
<Loader2 size={16} class="animate-spin" />
Uploading...
{:else}
Clone
{/if}
</button>
<!-- Validation status -->
<div class="mt-3 text-sm flex items-center gap-4">
<span
class="flex items-center gap-1 {selectedRecording
? isRecordingLongEnough(selectedRecording)
? 'text-green-600'
: 'text-red-600'
: 'text-gray-400'}"
>
{#if selectedRecording}
{#if isRecordingLongEnough(selectedRecording)}
{:else}
{/if}
{:else}
{/if}
at least 15 seconds
</span>
<span
class="flex items-center gap-1 {selectedRecording
? hasConsent(selectedRecording)
? 'text-green-600'
: transcriptionStatus[selectedRecording.id]
? 'text-red-600'
: 'text-gray-400'
: 'text-gray-400'}"
>
{#if selectedRecording && transcriptionStatus[selectedRecording.id]}
{#if hasConsent(selectedRecording)}
{:else}
{/if}
{:else}
{/if}
includes consent
</span>
</div>
<!-- Transcript preview -->
{#if selectedRecording && transcriptionStatus[selectedRecording.id]}
<div class="mt-3 p-3 bg-gray-50 rounded-lg border">
<p class="text-sm text-gray-600 italic">
"{transcriptionStatus[selectedRecording.id].first_words}..."
</p>
</div>
{/if}
<!-- Success message -->
{#if successMessage && selectedRecording && uploadResults[selectedRecording.id]}
<div class="mt-3 p-3 bg-green-50 rounded-lg border border-green-200">
<p class="text-sm text-green-700">
Your voice has been saved to a <a
href={uploadResults[selectedRecording.id].voice_url}
target="_blank"
class="text-green-800 underline hover:text-green-900">temporary URL</a
> for 24 hours and will be automatically deleted. You can now use it for text-to-speech
generation.
</p>
</div>
{:else if successMessage}
<div class="mt-3 p-3 bg-green-50 rounded-lg border border-green-200">
<p class="text-sm text-green-700">
{successMessage}
</p>
</div>
{/if}
</div>
<!-- Existing voices section -->
{#if isLoggedIn && userVoices.length > 0}
<div class="mt-6 pt-4 border-t border-gray-200">
<h3 class="text-sm font-medium text-gray-700 mb-3">Your existing voice clone</h3>
<div class="space-y-2">
{#each userVoices as voice}
<div class="border rounded-lg p-3 bg-blue-50 border-blue-200">
<div class="flex items-center justify-between mb-2">
<span class="text-sm font-medium text-blue-900">{voice.voice_name}</span>
<div class="flex items-center gap-2">
<button
on:click={() => togglePlayRecording({ url: voice.voice_url, id: voice.id })}
class="p-1 hover:bg-blue-100 rounded transition-colors"
title="Play voice sample"
>
{#if playingRecording?.id === voice.id && currentAudio && !currentAudio.paused}
<Pause size={14} class="text-blue-600" />
{:else}
<Play size={14} class="text-blue-600" />
{/if}
</button>
</div>
</div>
<div class="text-xs text-blue-600">
Expires: {new Date(voice.expires_at).toLocaleDateString()} at {new Date(
voice.expires_at
).toLocaleTimeString()}
</div>
</div>
{/each}
</div>
</div>
{/if}
</div>
</div>
</div>
</div>
<!-- Error Modal -->
{#if showErrorModal}
<div class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50 p-4">
<div class="bg-white rounded-xl shadow-2xl max-w-2xl w-full max-h-[80vh] flex flex-col">
<div
class="flex items-center justify-between p-6 border-b border-gray-200 bg-red-50 flex-shrink-0"
>
<div class="flex items-center gap-3 min-w-0">
<div
class="w-10 h-10 bg-red-100 rounded-full flex items-center justify-center flex-shrink-0"
>
<AlertCircle size={20} class="text-red-600" />
</div>
<div class="min-w-0">
<h3 class="text-lg font-semibold text-gray-900 truncate">{errorMessage}</h3>
<p class="text-sm text-gray-600">An error occurred while processing your request</p>
</div>
</div>
<button
on:click={closeErrorModal}
class="p-2 hover:bg-red-100 rounded-full transition-colors flex-shrink-0"
title="Close"
>
<X size={20} class="text-gray-500" />
</button>
</div>
<div class="p-6 overflow-y-auto flex-1 min-h-0">
{#if errorDetails}
<div class="bg-gray-50 rounded-lg p-4 border">
<h4 class="text-sm font-medium text-gray-900 mb-2">Error Details:</h4>
<pre
class="text-xs text-gray-700 whitespace-pre-wrap font-mono leading-relaxed break-words">{errorDetails}</pre>
</div>
{/if}
</div>
<div
class="flex items-center justify-end gap-3 p-6 border-t border-gray-200 bg-gray-50 flex-shrink-0"
>
<button
on:click={closeErrorModal}
class="px-6 py-2 bg-red-600 text-white rounded-lg hover:bg-red-700 transition-colors"
>
Close
</button>
</div>
</div>
</div>
{/if}