Spaces:

junaid008
/

Katib-ASR

Running on Zero

App Files Files Community

Katib-ASR / app.py

junaid008

Update app.py

83e303c verified 10 days ago

raw

history blame contribute delete

3.14 kB

	import gradio as gr
	import torch
	from transformers import pipeline
	import spaces

	# =========================================
	# LOAD MODEL
	# =========================================
	# Load on CPU with bfloat16 to save memory during startup
	pipe = pipeline(
	"automatic-speech-recognition",
	model="uzair0/Katib-ASR",
	torch_dtype=torch.bfloat16,
	device="cpu"
	)

	@spaces.GPU(duration=60)
	def transcribe_audio(audio_filepath):
	if audio_filepath is None:
	return "⚠️ Please record some audio first!"

	# 1. Move model to GPU
	pipe.model.to("cuda")

	# 2. Re-assign the device to the pipeline for this call
	pipe.device = torch.device("cuda")

	# 3. Run transcription
	result = pipe(
	audio_filepath,
	chunk_length_s=30, # Helps with longer recordings
	generate_kwargs={
	"language": "pashto",
	"task": "transcribe"
	}
	)

	# 4. Cleanup: Move back to CPU so ZeroGPU can release the hook
	pipe.model.to("cpu")
	pipe.device = torch.device("cpu")

	return result["text"]

	# =========================================
	# UI DESIGN (Dark Reference Layout)
	# =========================================

	custom_css = """
	.gradio-container { background-color: #0b0f19 !important; border: none !important; }
	h2, p { color: white !important; }

	/* Transcription box styling */
	.transcription-box textarea {
	direction: rtl !important;
	text-align: right !important;
	font-size: 1.2em !important;
	background-color: #161b22 !important;
	color: white !important;
	border: 1px solid #30363d !important;
	}

	/* Orange Submit Button */
	.submit-btn {
	background: #ff5722 !important;
	color: white !important;
	font-weight: bold !important;
	border: none !important;
	}

	.clear-btn {
	background-color: #21262d !important;
	color: white !important;
	border: 1px solid #30363d !important;
	}

	/* Make audio player look better in dark mode */
	audio { filter: invert(1) hue-rotate(180deg); }
	"""

	with gr.Blocks(theme=gr.themes.Default(), css=custom_css) as demo:
	with gr.Column():
	gr.Markdown("## 🎙️ Katib ASR: Pashto Speech Recognition")
	gr.Markdown("Click the Record button below, speak Pashto into your microphone, and see the result!")

	with gr.Row():
	with gr.Column(scale=1):
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="Record Pashto"
	)
	with gr.Row():
	clear_btn = gr.Button("Clear", elem_classes="clear-btn")
	submit_btn = gr.Button("Submit", elem_classes="submit-btn")

	with gr.Column(scale=1):
	output_text = gr.Textbox(
	label="Katib ASR Transcription",
	lines=8,
	elem_classes="transcription-box"
	)

	# Logic
	submit_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
	clear_btn.click(fn=lambda: [None, ""], inputs=None, outputs=[audio_input, output_text])

	demo.launch(ssr_mode=False)