Daniel Tse
commited on
Commit
·
555a0ea
1
Parent(s):
752daec
Use simpler method for transcription.
Browse files
app.py
CHANGED
|
@@ -20,21 +20,10 @@ def transcribe_audio(audiofile):
|
|
| 20 |
podcast_duration = podcast.duration_seconds
|
| 21 |
print(f"Audio Duration: {podcast_duration}")
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
pipe = pipeline(
|
| 27 |
-
"automatic-speech-recognition",
|
| 28 |
-
model="openai/whisper-small.en",
|
| 29 |
-
chunk_length_s=30,
|
| 30 |
-
device=device,
|
| 31 |
-
max_new_tokens=60,
|
| 32 |
-
)
|
| 33 |
-
|
| 34 |
-
transcription = pipe(audiofile, batch_size=8)["text"]
|
| 35 |
-
|
| 36 |
st.session_state['transcription'] = transcription
|
| 37 |
-
print(f"
|
| 38 |
st.info('Done Transcription')
|
| 39 |
|
| 40 |
return transcription
|
|
|
|
| 20 |
podcast_duration = podcast.duration_seconds
|
| 21 |
print(f"Audio Duration: {podcast_duration}")
|
| 22 |
|
| 23 |
+
whisper_model = whisper.load_model("small.en")
|
| 24 |
+
transcription = whisper_model.transcribe(audiofile)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
st.session_state['transcription'] = transcription
|
| 26 |
+
print(f"ranscription: {transcription['text']}")
|
| 27 |
st.info('Done Transcription')
|
| 28 |
|
| 29 |
return transcription
|