anycoder-e5c58ab0 / index.html
gagndeep's picture
Upload folder using huggingface_hub
d1b390e verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>VibeVoice Realtime 0.5B - Voice Conversion</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<style>
:root {
--primary: #ff4785;
--primary-dark: #ff2d75;
--secondary: #1e1e1e;
--secondary-light: #2d2d2d;
--accent: #00d4aa;
--text: #f8f9fa;
--text-secondary: #adb5bd;
}
body {
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
min-height: 100vh;
}
.gradient-border {
background: linear-gradient(45deg, var(--primary), var(--accent));
padding: 3px;
border-radius: 12px;
}
.gradient-border-inner {
background: var(--secondary);
border-radius: 9px;
height: 100%;
}
.btn-primary {
background: linear-gradient(45deg, var(--primary), var(--primary-dark));
transition: all 0.3s ease;
}
.btn-primary:hover {
transform: translateY(-2px);
box-shadow: 0 10px 20px rgba(255, 71, 133, 0.3);
}
.audio-visualizer {
display: flex;
align-items: center;
justify-content: center;
height: 100px;
background: rgba(255, 255, 255, 0.05);
border-radius: 8px;
margin: 15px 0;
}
.bar {
background: linear-gradient(to top, var(--accent), var(--primary));
width: 4px;
height: 20px;
margin: 0 2px;
border-radius: 2px;
animation: equalize 1.5s infinite alternate;
}
@keyframes equalize {
0% { height: 10px; }
25% { height: 30px; }
50% { height: 15px; }
75% { height: 25px; }
100% { height: 20px; }
}
.voice-preset {
transition: all 0.3s ease;
cursor: pointer;
}
.voice-preset:hover {
transform: scale(1.05);
background: rgba(255, 255, 255, 0.1);
}
.voice-preset.active {
border: 2px solid var(--accent);
background: rgba(0, 212, 170, 0.2);
}
.loading-spinner {
width: 40px;
height: 40px;
border: 4px solid rgba(255, 255, 255, 0.1);
border-radius: 50%;
border-top-color: var(--accent);
animation: spin 1s ease-in-out infinite;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.toggle-switch {
position: relative;
display: inline-block;
width: 60px;
height: 30px;
}
.toggle-switch input {
opacity: 0;
width: 0;
height: 0;
}
.slider {
position: absolute;
cursor: pointer;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: #ccc;
transition: .4s;
border-radius: 30px;
}
.slider:before {
position: absolute;
content: "";
height: 22px;
width: 22px;
left: 4px;
bottom: 4px;
background-color: white;
transition: .4s;
border-radius: 50%;
}
input:checked + .slider {
background-color: var(--accent);
}
input:checked + .slider:before {
transform: translateX(30px);
}
.param-slider {
-webkit-appearance: none;
width: 100%;
height: 6px;
border-radius: 3px;
background: rgba(255, 255, 255, 0.2);
outline: none;
}
.param-slider::-webkit-slider-thumb {
-webkit-appearance: none;
appearance: none;
width: 18px;
height: 18px;
border-radius: 50%;
background: var(--accent);
cursor: pointer;
}
.param-slider::-moz-range-thumb {
width: 18px;
height: 18px;
border-radius: 50%;
background: var(--accent);
cursor: pointer;
}
.output-waveform {
background: rgba(255, 255, 255, 0.05);
border-radius: 8px;
height: 120px;
position: relative;
overflow: hidden;
}
.waveform-line {
position: absolute;
bottom: 0;
width: 100%;
height: 0;
background: linear-gradient(to right, var(--primary), var(--accent));
transition: height 0.1s ease;
}
@media (max-width: 768px) {
.gradient-border {
padding: 2px;
border-radius: 8px;
}
.gradient-border-inner {
border-radius: 6px;
}
.voice-presets-grid {
grid-template-columns: repeat(2, 1fr);
}
}
</style>
</head>
<body class="text-white">
<!-- Header -->
<header class="p-4 border-b border-gray-700">
<div class="max-w-6xl mx-auto flex justify-between items-center">
<div class="flex items-center space-x-2">
<i class="fas fa-microphone-alt text-2xl text-accent"></i>
<h1 class="text-xl font-bold">VibeVoice Realtime 0.5B</h1>
</div>
<div class="text-sm text-text-secondary">
Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" class="text-accent hover:underline">anycoder</a>
</div>
</div>
</header>
<!-- Main Content -->
<main class="max-w-6xl mx-auto p-4">
<div class="gradient-border mb-6">
<div class="gradient-border-inner p-6">
<div class="grid grid-cols-1 lg:grid-cols-3 gap-6">
<!-- Input Section -->
<div class="lg:col-span-2">
<div class="bg-secondary-light rounded-lg p-6 mb-6">
<h2 class="text-lg font-semibold mb-4 flex items-center">
<i class="fas fa-upload mr-2"></i>
Input Audio
</h2>
<div class="audio-upload-area border-2 border-dashed border-gray-600 rounded-lg p-8 text-center cursor-pointer hover:border-accent transition-colors">
<i class="fas fa-cloud-upload-alt text-4xl text-gray-400 mb-4"></i>
<p class="text-text-secondary mb-2">Click to upload or drag and drop</p>
<p class="text-sm text-gray-500">MP3, WAV, OGG (Max 10MB)</p>
<input type="file" id="audioUpload" accept="audio/*" class="hidden">
</div>
<div class="audio-visualizer mt-6" id="inputVisualizer">
<!-- Bars will be generated by JavaScript -->
</div>
<div class="flex justify-center mt-4">
<button class="btn-primary px-6 py-2 rounded-lg font-medium flex items-center" id="recordBtn">
<i class="fas fa-microphone mr-2"></i>
Record Voice
</button>
</div>
</div>
<!-- Voice Presets -->
<div class="bg-secondary-light rounded-lg p-6">
<h2 class="text-lg font-semibold mb-4 flex items-center">
<i class="fas fa-user-circle mr-2"></i>
Voice Presets
</h2>
<div class="voice-presets-grid grid grid-cols-3 gap-4 mb-6">
<div class="voice-preset active p-4 rounded-lg border border-transparent text-center" data-voice="male-1">
<i class="fas fa-male text-3xl text-accent mb-2"></i>
<p class="font-medium">Male 1</p>
</div>
<div class="voice-preset p-4 rounded-lg border border-transparent text-center" data-voice="female-1">
<i class="fas fa-female text-3xl text-primary mb-2"></i>
<p class="font-medium">Female 1</p>
</div>
<div class="voice-preset p-4 rounded-lg border border-transparent text-center" data-voice="child">
<i class="fas fa-child text-3xl text-yellow-400 mb-2"></i>
<p class="font-medium">Child</p>
</div>
<div class="voice-preset p-4 rounded-lg border border-transparent text-center" data-voice="male-2">
<i class="fas fa-male text-3xl text-blue-400 mb-2"></i>
<p class="font-medium">Male 2</p>
</div>
<div class="voice-preset p-4 rounded-lg border border-transparent text-center" data-voice="female-2">
<i class="fas fa-female text-3xl text-purple-400 mb-2"></i>
<p class="font-medium">Female 2</p>
</div>
<div class="voice-preset p-4 rounded-lg border border-transparent text-center" data-voice="robot">
<i class="fas fa-robot text-3xl text-green-400 mb-2"></i>
<p class="font-medium">Robot</p>
</div>
</div>
<div class="mb-4">
<label class="block text-sm font-medium mb-2">Custom Voice Reference</label>
<input type="file" accept="audio/*" class="w-full p-2 bg-secondary rounded border border-gray-600">
</div>
</div>
</div>
<!-- Parameters & Output Section -->
<div class="lg:col-span-1">
<div class="bg-secondary-light rounded-lg p-6 mb-6">
<h2 class="text-lg font-semibold mb-4 flex items-center">
<i class="fas fa-sliders-h mr-2"></i>
Conversion Parameters
</h2>
<div class="space-y-6">
<div>
<label class="block text-sm font-medium mb-2">Pitch Adjustment</label>
<input type="range" class="param-slider" min="-12" max="12" value="0" id="pitchSlider">
<div class="flex justify-between text-sm text-text-secondary mt-1">
<span>-12</span>
<span>0</span>
<span>+12</span>
</div>
</div>
<div>
<label class="block text-sm font-medium mb-2">Speed</label>
<input type="range" class="param-slider" min="0.5" max="2" step="0.1" value="1" id="speedSlider">
<div class="flex justify-between text-sm text-text-secondary mt-1">
<span>0.5x</span>
<span>1x</span>
<span>2x</span>
</div>
</div>
<div>
<label class="block text-sm font-medium mb-2">Emotion Intensity</label>
<input type="range" class="param-slider" min="0" max="100" value="50" id="emotionSlider">
<div class="flex justify-between text-sm text-text-secondary mt-1">
<span>Neutral</span>
<span>Intense</span>
</div>
</div>
<div class="flex items-center justify-between">
<label class="text-sm font-medium">Real-time Processing</label>
<label class="toggle-switch">
<input type="checkbox" id="realtimeToggle">
<span class="slider"></span>
</label>
</div>
<div class="flex items-center justify-between">
<label class="text-sm font-medium">Noise Reduction</label>
<label class="toggle-switch">
<input type="checkbox" id="noiseReductionToggle" checked>
<span class="slider"></span>
</label>
</div>
</div>
</div>
<!-- Output Section -->
<div class="bg-secondary-light rounded-lg p-6">
<h2 class="text-lg font-semibold mb-4 flex items-center">
<i class="fas fa-volume-up mr-2"></i>
Output
</h2>
<div class="output-waveform mb-4" id="outputWaveform">
<div class="waveform-line" id="waveformLine"></div>
</div>
<div class="flex justify-center mb-4">
<button class="btn-primary px-8 py-3 rounded-lg font-medium flex items-center" id="convertBtn">
<i class="fas fa-magic mr-2"></i>
Convert Voice
</button>
</div>
<div class="flex justify-center space-x-4">
<button class="bg-gray-600 hover:bg-gray-500 px-4 py-2 rounded-lg flex items-center transition-colors" id="playBtn" disabled>
<i class="fas fa-play mr-2"></i>
Play
</button>
<button class="bg-gray-600 hover:bg-gray-500 px-4 py-2 rounded-lg flex items-center transition-colors" id="downloadBtn" disabled>
<i class="fas fa-download mr-2"></i>
Download
</button>
</div>
<div class="mt-4 text-center" id="statusMessage">
<p class="text-text-secondary text-sm">Ready to convert</p>
</div>
</div>
</div>
</div>
</div>
</div>
</main>
<!-- Footer -->
<footer class="p-4 border-t border-gray-700 text-center text-text-secondary text-sm">
<p>VibeVoice Realtime 0.5B - Advanced Voice Conversion Model</p>
<p class="mt-1">This is a simulation of the Hugging Face Space interface</p>
</footer>
<script>
// DOM Elements
const audioUpload = document.getElementById('audioUpload');
const audioUploadArea = document.querySelector('.audio-upload-area');
const recordBtn = document.getElementById('recordBtn');
const convertBtn = document.getElementById('convertBtn');
const playBtn = document.getElementById('playBtn');
const downloadBtn = document.getElementById('downloadBtn');
const statusMessage = document.getElementById('statusMessage');
const inputVisualizer = document.getElementById('inputVisualizer');
const outputWaveform = document.getElementById('outputWaveform');
const waveformLine = document.getElementById('waveformLine');
const voicePresets = document.querySelectorAll('.voice-preset');
const pitchSlider = document.getElementById('pitchSlider');
const speedSlider = document.getElementById('speedSlider');
const emotionSlider = document.getElementById('emotionSlider');
const realtimeToggle = document.getElementById('realtimeToggle');
const noiseReductionToggle = document.getElementById('noiseReductionToggle');
// State
let isRecording = false;
let audioContext;
let mediaRecorder;
let audioChunks = [];
let audioBlob;
let audioUrl;
let isProcessing = false;
// Initialize
document.addEventListener('DOMContentLoaded', () => {
// Set up event listeners
audioUploadArea.addEventListener('click', () => audioUpload.click());
audioUpload.addEventListener('change', handleAudioUpload);
recordBtn.addEventListener('click', toggleRecording);
convertBtn.addEventListener('click', convertVoice);
playBtn.addEventListener('click', playAudio);
downloadBtn.addEventListener('click', downloadAudio);
// Voice preset selection
voicePresets.forEach(preset => {
preset.addEventListener('click', () => {
voicePresets.forEach(p => p.classList.remove('active'));
preset.classList.add('active');
});
});
// Generate visualizer bars
generateVisualizerBars();
// Initialize audio context
audioContext = new (window.AudioContext || window.webkitAudioContext)();
});
// Generate visualizer bars
function generateVisualizerBars() {
for (let i = 0; i < 20; i++) {
const bar = document.createElement('div');
bar.className = 'bar';
bar.style.animationDelay = `${i * 0.05}s`;
inputVisualizer.appendChild(bar);
}
}
// Handle audio upload
function handleAudioUpload(e) {
const file = e.target.files[0];
if (!file) return;
if (file.size > 10 * 1024 * 1024) {
alert('File size exceeds 10MB limit');
return;
}
audioBlob = file;
audioUrl = URL.createObjectURL(file);
updateStatus('Audio uploaded successfully');
enableConvertButton();
}
// Toggle recording
function toggleRecording() {
if (isRecording) {
stopRecording();
} else {
startRecording();
}
}
// Start recording
function startRecording() {
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
mediaRecorder = new MediaRecorder(stream);
audioChunks = [];
mediaRecorder.ondataavailable = e => {
audioChunks.push(e.data);
};
mediaRecorder.onstop = () => {
audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
audioUrl = URL.createObjectURL(audioBlob);
updateStatus('Recording saved');
enableConvertButton();
};
mediaRecorder.start();
isRecording = true;
recordBtn.innerHTML = '<i class="fas fa-stop mr-2"></i> Stop Recording';
recordBtn.classList.add('bg-red-500');
recordBtn.classList.remove('btn-primary');
updateStatus('Recording...');
})
.catch(err => {
console.error('Error accessing microphone:', err);
updateStatus('Microphone access denied');
});
}
// Stop recording
function stopRecording() {
if (mediaRecorder && isRecording) {
mediaRecorder.stop();
mediaRecorder.stream.getTracks().forEach(track => track.stop());
isRecording = false;
recordBtn.innerHTML = '<i class="fas fa-microphone mr-2"></i> Record Voice';
recordBtn.classList.remove('bg-red-500');
recordBtn.classList.add('btn-primary');
}
}
// Enable convert button
function enableConvertButton() {
convertBtn.disabled = false;
convertBtn.classList.remove('opacity-50', 'cursor-not-allowed');
}
// Convert voice (simulated)
function convertVoice() {
if (!audioBlob || isProcessing) return;
isProcessing = true;
updateStatus('Processing voice conversion...');
// Disable buttons during processing
convertBtn.disabled = true;
playBtn.disabled = true;
downloadBtn.disabled = true;
// Simulate processing
setTimeout(() => {
// Generate random waveform for output
animateWaveform();
// Enable play and download buttons
playBtn.disabled = false;
downloadBtn.disabled = false;
updateStatus('Conversion complete!');
isProcessing = false;
convertBtn.disabled = false;
}, 3000);
}
// Animate waveform
function animateWaveform() {
let height = 0;
const interval = setInterval(() => {
height = Math.random() * 80 + 20;
waveformLine.style.height = `${height}%`;
if (height >= 90) {
clearInterval(interval);
}
}, 100);
}
// Play audio
function playAudio() {
if (!audioUrl) return;
const audio = new Audio(audioUrl);
audio.play();
updateStatus('Playing converted audio');
}
// Download audio
function downloadAudio() {
if (!audioBlob) return;
const url = URL.createObjectURL(audioBlob);
const a = document.createElement('a');
a.href = url;
a.download = 'converted-voice.wav';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
updateStatus('Download started');
}
// Update status message
function updateStatus(message) {
statusMessage.innerHTML = `<i class="fas fa-info-circle mr-1"></i> ${message}`;
}
// Handle drag and drop
audioUploadArea.addEventListener('dragover', (e) => {
e.preventDefault();
audioUploadArea.classList.add('border-accent');
});
audioUploadArea.addEventListener('dragleave', () => {
audioUploadArea.classList.remove('border-accent');
});
audioUploadArea.addEventListener('drop', (e) => {
e.preventDefault();
audioUploadArea.classList.remove('border-accent');
if (e.dataTransfer.files.length) {
audioUpload.files = e.dataTransfer.files;
handleAudioUpload({ target: audioUpload });
}
});
</script>
</body>
</html>