Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ from huggingface_hub import model_info
|
|
| 7 |
import openai
|
| 8 |
|
| 9 |
|
|
|
|
| 10 |
def transcribe(microphone, file_upload):
|
| 11 |
warn_output = ""
|
| 12 |
if (microphone is not None) and (file_upload is not None):
|
|
@@ -20,6 +21,13 @@ def transcribe(microphone, file_upload):
|
|
| 20 |
|
| 21 |
file = microphone if microphone is not None else file_upload
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
text = pipe(file)["text"]
|
| 24 |
|
| 25 |
return warn_output + text
|
|
@@ -45,7 +53,7 @@ def yt_transcribe(yt_url):
|
|
| 45 |
res_format = 'srt'
|
| 46 |
pipe = openai.Audio.transcribe(model="whisper-1", file=open('audio.mp3', 'rb'), response_format=res_format, prompt='请使用书面语')
|
| 47 |
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
|
| 48 |
-
|
| 49 |
text = pipe("audio.mp3")["text"]
|
| 50 |
|
| 51 |
return html_embed_str, text
|
|
|
|
| 7 |
import openai
|
| 8 |
|
| 9 |
|
| 10 |
+
|
| 11 |
def transcribe(microphone, file_upload):
|
| 12 |
warn_output = ""
|
| 13 |
if (microphone is not None) and (file_upload is not None):
|
|
|
|
| 21 |
|
| 22 |
file = microphone if microphone is not None else file_upload
|
| 23 |
|
| 24 |
+
openai.api_key = "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"
|
| 25 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
| 26 |
+
res_format = 'srt'
|
| 27 |
+
pipe = openai.Audio.transcribe(model="whisper-1", file=open('file', 'rb'), response_format=res_format, prompt='请使用书面语')
|
| 28 |
+
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
text = pipe(file)["text"]
|
| 32 |
|
| 33 |
return warn_output + text
|
|
|
|
| 53 |
res_format = 'srt'
|
| 54 |
pipe = openai.Audio.transcribe(model="whisper-1", file=open('audio.mp3', 'rb'), response_format=res_format, prompt='请使用书面语')
|
| 55 |
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
|
| 56 |
+
|
| 57 |
text = pipe("audio.mp3")["text"]
|
| 58 |
|
| 59 |
return html_embed_str, text
|