File size: 2,277 Bytes
d809d42
f96a97b
d809d42
 
 
 
f96a97b
 
 
d809d42
 
 
 
 
 
 
 
 
 
 
f96a97b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d809d42
f96a97b
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import azure.cognitiveservices.speech as speechsdk
from azure.cognitiveservices.speech.audio import PushAudioInputStream
import os
import time
# Replace with your own subscription key and service region

speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY',''), region=os.environ.get('SPEECH_REGION',''))
speech_config.speech_recognition_language="en-US"


def recognize_from_file(file=None):
    # This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
    audio_config = speechsdk.audio.AudioConfig(filename=file)
    
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
    
    result = speech_recognizer.recognize_once_async().get()
    return result.text
        

def recognize_from_stream(stream,rec_cb):
    audio_config = speechsdk.audio.AudioConfig(stream=stream)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
    
    speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
    speech_recognizer.recognized.connect(rec_cb)
    speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
    speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
    speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))


    def stop_cb(evt):
        print('CLOSING on {}'.format(evt))
        speech_recognizer.stop_continuous_recognition()
        
    speech_recognizer.session_stopped.connect(stop_cb)
    speech_recognizer.canceled.connect(stop_cb)
    
    speech_recognizer.start_continuous_recognition_async()
    return speech_recognizer
    


if __name__ == '__main__':
    ## Recognize from file
    # for audio_file in os.listdir("audio_samples"):
    #     start = time.time()
    #     print(recognize_from_file(f"audio_samples/{audio_file}"), " in ", time.time()-start)
    
    ## real-time recognition
    def rec_cb(evt):
        print("##########################")
        print(evt.result.text)
    stream = PushAudioInputStream()
    stream.write(open("audio_samples/audo_0.wav","rb").read())
    recognize_from_stream(stream,rec_cb)
    time.sleep(10)