Spaces:

siddqamar
/

FreeTranscriptMaker

Runtime error

App Files Files Community

FreeTranscriptMaker / app.py

siddqamar

Update app.py

d09d492 verified 8 months ago

raw

history blame

2.94 kB

	import gradio as gr
	import os
	from transformers import WhisperProcessor, WhisperForConditionalGeneration
	import numpy as np
	import librosa

	# Initialize Whisper model
	processor = WhisperProcessor.from_pretrained("openai/whisper-base")
	model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")

	# Set light green theme
	theme = gr.themes.Base(
	primary_hue="emerald",
	secondary_hue="emerald",
	neutral_hue="gray",
	)

	def validate_file(file_path):
	# Check if file exists
	if not file_path or not os.path.exists(file_path):
	return False, "No file uploaded or file not found."

	# Check file size (25 MB limit)
	file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
	if file_size_mb > 25:
	return False, f"File size is {file_size_mb:.2f} MB. Please upload a file smaller than 25 MB."

	# Check file extension
	file_extension = os.path.splitext(file_path)[1].lower()
	if file_extension not in ['.mp3', '.wav']:
	return False, "Only .mp3 and .wav formats are supported."

	return True, "File is valid."

	def transcribe_audio(audio_file):
	# Check if audio_file is None
	if audio_file is None:
	return "Please upload an audio file."

	# Validate the file first
	is_valid, message = validate_file(audio_file)
	if not is_valid:
	return message

	try:
	# Load audio file
	speech_array, sampling_rate = librosa.load(audio_file, sr=16000)

	# Process the audio file
	input_features = processor(speech_array, sampling_rate=16000, return_tensors="pt").input_features

	# Generate token ids
	predicted_ids = model.generate(input_features)

	# Decode token ids to text
	transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]

	return transcription

	except Exception as e:
	return f"An error occurred during transcription: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(theme=theme) as demo:
	gr.Markdown("# Audio Transcription with Whisper")
	gr.Markdown("Upload an audio file (.mp3 or .wav) of maximum 25MB to get the transcription.")

	with gr.Row():
	with gr.Column():
	# Fixed: Use sources parameter instead of type
	audio_input = gr.Audio(sources=["upload"], label="Upload Audio File")
	submit_btn = gr.Button("Transcribe", variant="primary")

	with gr.Column():
	output = gr.Textbox(label="Transcription Result", lines=10)

	submit_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=output)

	gr.Markdown("### Limitations")
	gr.Markdown("- Maximum file size: 25 MB")
	gr.Markdown("- Supported formats: .mp3 and .wav")
	gr.Markdown("- Uses the Whisper base model which works best with clear audio")

	# Launch the app
	if __name__ == "__main__":
	demo.launch()