srd4
/

faster-whisper-large-v2

Automatic Speech Recognition

Model card Files Files and versions

srd4 commited on Jan 13, 2024

Commit

cf9a3bd

·

verified ·

1 Parent(s): 12879c9

Update handler.py

Files changed (1) hide show

handler.py +16 -21

handler.py CHANGED Viewed

@@ -3,31 +3,26 @@ from faster_whisper import WhisperModel
 class EndpointHandler:
     def __init__(self):
-        # Initialize WhisperModel
-        self.model = WhisperModel("large-v2")
     def __call__(self, data: Dict) -> Dict:
-        # Get the audio file bytes from the request data
         audio_bytes = data["inputs"]
-        # Perform transcription
-        results = []
         segments, info = self.model.transcribe(audio_bytes)
-        for segment in segments:
-            result = {
-                "start": segment.start,
-                "end": segment.end,
-                "text": segment.text
-            }
-            results.append(result)
-        # Return the transcribed text along with language data
-        language_code, language_prob = info.language, info.language_probability
-        response = {
-            "transcription": results,
-            "language": {
-                "code": language_code,
-                "probability": language_prob
-            }
         }
-        return response

 class EndpointHandler:
     def __init__(self):
+        # Initialize WhisperModel; assume that "large-v2" model files are in /repository
+        self.model = WhisperModel("large-v2", device="cpu")
     def __call__(self, data: Dict) -> Dict:
+        # Process the input data expected to be in 'inputs' key containing audio file bytes
         audio_bytes = data["inputs"]
+        # Perform transcription using the model
         segments, info = self.model.transcribe(audio_bytes)
+        # Compile the results into a text string and extract language information
+        text = " ".join(segment.text for segment in segments)
+        language_code = info.language
+        language_prob = info.language_probability
+        # Compile the response dictionary
+        result = {
+            "text": text,
+            "language": language_code,
+            "language_probability": language_prob
         }
+        return result