Spaces:

programindz
/

kashmiri-streaming-speech-recognition

Sleeping

App Files Files Community

kashmiri-streaming-speech-recognition / app /index.html

programindz

Update app/index.html

bfd1f21 verified 3 months ago

raw

history blame contribute delete

8.19 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8" />
	<title>🎤 Kashmiri Real-Time ASR </title>
	<style>
	/* Page layout */
	body {
	font-family: "Segoe UI", sans-serif;
	background-color: #f5f6fa;
	margin: 0;
	padding: 2rem;
	color: #2f3640;
	display: flex;
	flex-direction: column;
	align-items: center;
	}
	h1 {
	font-size: 2rem;
	margin-bottom: 1rem;
	}
	section {
	width: 100%;
	max-width: 900px;
	margin-bottom: 1.5rem;
	background: white;
	border-radius: 8px;
	padding: 1rem;
	box-shadow: 0 0 8px rgba(0,0,0,0.1);
	}
	section h2 {
	margin-top: 0;
	font-size: 1.2rem;
	border-bottom: 1px solid #dcdde1;
	padding-bottom: 0.5rem;
	color: #2f3640;
	}


	/* Mic info and volume */
	.mic-info {
	font-size: 0.9rem;
	color: #353b48;
	margin-top: 1rem;
	}
	.mic-info .label {
	font-weight: bold;
	}
	#vol {
	width: 100%;
	max-width: 500px;
	height: 20px;
	margin-top: 0.5rem;
	appearance: none;
	}
	#vol::-webkit-progress-bar {
	background-color: #dcdde1;
	border-radius: 8px;
	}
	#vol::-webkit-progress-value {
	background-color: #44bd32;
	border-radius: 8px;
	transition: width 0.2s;
	}
	#vol::-moz-progress-bar {
	background-color: #44bd32;
	border-radius: 8px;
	transition: width 0.2s;
	}

	/* Transcript */
	.transcript-container {
	margin-top: 0.5rem;
	padding: 0.5rem;
	background: #fff;
	border: 1px solid #dcdde1;
	border-radius: 8px;
	max-height: 300px;
	overflow-y: auto;

	/* Arabic/Nastaliq essentials */
	font-family: "Noto Nastaliq Urdu", serif;
	direction: rtl;
	unicode-bidi: isolate;
	text-align: right;

	white-space: pre-wrap;
	font-size: 1.3rem; /* Nastaliq needs air */
	line-height: 2.4; /* Critical */
	color: #353b48;
	}

	.transcript-container .final {
	color: #2ecc71;
	display: inline;
	margin-left: 0.5em; /* ← RTL-aware spacing */
	}

	.transcript-container .interim {
	color: #e74c3c;
	display: inline;
	}


	#translateBtn {
	background: linear-gradient(135deg, #1d241b, #2d422a);
	color: white;
	font-size: 1rem;
	padding: 0.75rem 1.5rem;
	border: none;
	border-radius: 8px;
	cursor: pointer;
	}

	#translateBtn:hover {
	background: linear-gradient(135deg, #6e8167, #7e9178);
	transform: translateY(-2px);
	}

	#translateBtn:active {
	transform: scale(0.98);
	}

	</style>
	</head>
	<body>
	<h1>Kashmiri کٲشُر Streaming Speech Recognition</h1>
	<h2>🎤 Speak into Your Microphone (Allow your System to use Microphone)</h2>

	<section class="section--mic">
	<h2>Microphone</h2>
	<div class="mic-info">
	<span class="label">Device:</span> <span id="micName">Detecting…</span><br>
	<span class="label">Sample Rate:</span> <span id="sampleRate">-</span> Hz
	</div>
	<progress id="vol" max="1" value="0"></progress>
	</section>

	<section class="section--transcript">
	<h2>Transcript</h2>
	<div id="transcript" class="transcript-container">…</div>
	</section>

	<!-- <section class="section--translate"> -->
	<!-- <h2>Translation</h2> -->
	<!-- <button id="translateBtn" onclick="sendForTranslation()">🌐 Translate to English</button> -->
	<!-- <div id="translatedText" class="transcript-container" style="margin-top: 1rem;">…</div> -->
	<!-- </section> -->

	</section>

	<script>
	let orig_sample_rate;
	let ws;
	// let lastTranslatedText = "";
	// let lastTranslation = "";

	const vol = document.getElementById("vol");
	const transcript = document.getElementById("transcript");
	const micNameElem = document.getElementById("micName");
	const sampleRateElem = document.getElementById("sampleRate");


	navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
	const context = new AudioContext();
	orig_sample_rate = context.sampleRate;

	// Update mic info in UI
	const track = stream.getAudioTracks()[0];
	micNameElem.textContent = track.label \|\| 'Unknown';
	sampleRateElem.textContent = orig_sample_rate;

	// Now that we know the sample rate, open the WS
	ws = new WebSocket(`wss://${location.host}/ws`);
	ws.onopen = () => sendConfig();
	ws.onerror = err => console.error("WebSocket error:", err);
	ws.onclose = () => console.log("WebSocket closed");

	// Unified handler for partial + final messages
	ws.onmessage = e => {
	const msg = JSON.parse(e.data);
	console.log(msg)
	// 1) update volume bar
	if (msg.volume !== undefined) {
	vol.value = Math.min(msg.volume, 1.0);
	}

	// 2) distinguish “final” vs “partial”
	if (msg.final !== undefined) {
	finalUtterances.push(msg.final.trim());
	currentInterim = "";
	} else if (msg.partial !== undefined) {
	currentInterim = msg.partial;
	}

	// 3) rebuild the full, colored transcript
	transcript.innerHTML =
	finalUtterances
	.map(u => `<span class="final">${u}</span>`)
	.join("") /* margin in CSS handles spacing */
	+ (currentInterim
	? ` <span class="interim">${currentInterim}</span>`
	: "");

	// 4) auto-scroll to newest text
	transcript.scrollTop = transcript.scrollHeight;
	};


	const source = context.createMediaStreamSource(stream);
	const processor = context.createScriptProcessor(4096, 1, 1);
	source.connect(processor);
	processor.connect(context.destination);
	processor.onaudioprocess = e => {
	const input = e.inputBuffer.getChannelData(0);
	ws.send(new Float32Array(input).buffer);
	};
	});

	// 2) Declare state for final/interim rendering
	const finalUtterances = [];
	let currentInterim = "";

	// 4) Extend sendConfig() to include epRule1/2/3
	function sendConfig() {
	if (ws && ws.readyState === WebSocket.OPEN) {
	ws.send(JSON.stringify({
	type: "config",
	sampleRate: orig_sample_rate,
	}));
	}
	}


	// 6) Replace your existing ws.onmessage handler with this:
	ws.onmessage = e => {
	const msg = JSON.parse(e.data);

	if (msg.volume !== undefined) {
	vol.value = Math.min(msg.volume, 1.0);
	}

	if (msg.final !== undefined) {
	// endpoint fired → lock in the final utterance
	finalUtterances.push(msg.final.trim());
	currentInterim = "";
	} else if (msg.partial !== undefined) {
	// update the rolling interim
	currentInterim = msg.partial;
	}

	// rebuild the full transcript: green finals + red interim
	transcript.innerHTML =
	finalUtterances
	.map(u => `<span class="final">${u}</span>`)
	.join("") // no explicit space here, margin handles it
	+ (currentInterim
	? `<span class="interim">${currentInterim}</span>`
	: "");

	// always scroll to bottom
	// transcript.scrollTop = transcript.scrollHeight;
	};


	// This part is for translation

	// async function sendForTranslation() {
	// const fullText = finalUtterances.join(" ").trim();

	// if (!fullText) {
	// // alert("No transcript to translate.");
	// console.log('[TRANSLATION] Nothing to translate')
	// return;
	// }

	// if (fullText === lastTranslatedText) {
	// console.log("Using cached translation");
	// document.getElementById("translatedText").textContent = lastTranslation;
	// return;
	// }

	// const response = await fetch("/translate", {
	// method: "POST",
	// headers: {
	// "Content-Type": "application/json"
	// },
	// body: JSON.stringify({ text: fullText })
	// });

	// const data = await response.json();

	// lastTranslatedText = fullText;
	// lastTranslation = data.translation;

	// // Display result
	// document.getElementById("translatedText").textContent = data.translation;
	// }


	</script>
	</body>
	</html>