Spaces:

Boadiwaa
/

App-Transcriber

Build error

App Files Files Community

Boadiwaa commited on Jun 13, 2024

Commit

844fd6a

verified ·

1 Parent(s): 1b51008

Create app.py

Browse files

Files changed (1) hide show

app.py +64 -0

app.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import gradio as gr
+import base64
+import os
+import torch
+from transformers import (
+    AutomaticSpeechRecognitionPipeline,
+    WhisperForConditionalGeneration,
+    WhisperTokenizer,
+    WhisperProcessor,
+)
+from peft import PeftModel, PeftConfig
+peft_model_id = "Boadiwaa/LORA-colab-Distil-Whisper-medium2"
+task = "transcribe"
+peft_config = PeftConfig.from_pretrained(peft_model_id)
+model = WhisperForConditionalGeneration.from_pretrained(
+    peft_config.base_model_name_or_path,device_map="auto"
+)
+model = PeftModel.from_pretrained(model, peft_model_id)
+tokenizer = WhisperTokenizer.from_pretrained(peft_config.base_model_name_or_path,task=task)
+processor = WhisperProcessor.from_pretrained(peft_config.base_model_name_or_path,task=task)
+feature_extractor = processor.feature_extractor
+#forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
+pipe = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
+#api_key = os.getenv("HF_API_TOKEN")
+def transcribe(data):
+    audio_string = data[0]
+    wav_file = open("temp.wav", "wb")
+    decode_string = base64.b64decode(audio_string)
+    wav_file.write(decode_string)
+    with torch.cuda.amp.autocast():
+        text = pipe(wav_file,max_new_tokens=255)["text"]
+    return text
+#hf_writer = gr.HuggingFaceDatasetSaver(hf_token = api_key,dataset_name="interaction-log2")
+demo = gr.Interface(
+    fn=transcribe,
+    inputs=data,
+    outputs="text",
+    title="Transcriber for Ghanaian-accented speech (English)",
+    description="Realtime demo for Ghanaian-accented speech recognition (in English).",
+    article = """
+    By using this app you consent to your voice being used to train the underlying open-source model further.
+    INSTRUCTIONS FOR USE:
+    1. Click on record and speak into your microphone
+    2. Click on stop and submit after you are done speaking.
+    3. Speech input should not exceed 40s for optimal results.
+    4. Please wait a few secs after input to see your results.
+    NB: You might see "no microphone detected" when you first open the app, CONSIDER THAT A MICROPHONE TEST, record anyway and submit. You might see an Error in the output. Now delete the input by clicking the 'x' at the top and record your main input.
+    The app should run seamlessly in the subsequent inputs.
+    """,
+    allow_flagging = "auto",flagging_callback=hf_writer
+)
+demo.launch(share=True)
+if __name__ == "__main__":
+    demo.launch()