Spaces:

jaimin
/

Whisper

Sleeping

App Files Files Community

Whisper / app.py

jaimin

Update app.py

a74cb16 verified about 2 years ago

raw

history blame contribute delete

1.77 kB

	import gradio as gr
	from transformers import pipeline
	import numpy as np
	from ner import perform_ner
	from intent import perform_intent_classification

	transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

	def transcribe(stream, new_chunk):
	transcription = ""
	sentence_buffer = ""
	results = []
	sr, y = new_chunk
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))

	if stream is not None:
	stream = np.concatenate([stream, y])
	else:
	stream = y
	print(transcriber({"sampling_rate": sr, "raw": stream})["text"])
	transcription=transcriber({"sampling_rate": sr, "raw": stream})["text"]
	# Check for sentence boundaries
	sentence_boundary = "." in transcription or "?" in transcription
	# Initialize ner_result and intent_result
	ner_result = None
	intent_result = None
	if sentence_boundary:
	sentence = sentence_buffer + transcription.split(transcription[-1])[0]
	print("Sentence Buffer :",sentence_buffer)
	print("Sentence :",sentence)
	ner_result = perform_ner(sentence)
	intent_result = perform_intent_classification(sentence)
	print("NER Result (sentence):", ner_result)
	print("Intent Result (sentence):", intent_result)
	sentence_buffer = transcription[-1] # Start a new sentence buffer
	transcription = "" # Reset transcription for the new sentence
	return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"], ner_result, intent_result

	demo = gr.Interface(
	transcribe,["state", gr.Audio(sources=["microphone"], streaming=True),
	],
	["state", gr.Text(label="Transcribe"), gr.Text(label="NER"), gr.Text(label="Intent")],
	live=True,
	)
	demo.launch(share=True)