Spaces:

jujutech
/

TalkingFace

Running

App Files Files Community

jujutech commited on Jul 11, 2024

Commit

7634353

verified ·

1 Parent(s): 5eaa950

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -37

app.py CHANGED Viewed

@@ -1,42 +1,27 @@
 import gradio as gr
-from gradio_client import Client
-def get_speech(text, voice):
-    try:
-        client = Client("https://collabora-whisperspeech.hf.space/")
-        result = client.predict(
-            text,   # str  in 'Enter multilingual text💬📝' Textbox component
-            voice,  # filepath  in 'Upload or Record Speaker Audio (optional)🌬️💬' Audio component
-            "",     # str  in 'alternatively, you can paste in an audio file URL:' Textbox component
-            14,     # float (numeric value between 10 and 15) in 'Tempo (in characters per second)' Slider component
-            api_name="/whisper_speech_demo"
-        )
-        print(result)
-        return result
-    except Exception as e:
-        raise gr.Error(f"Error in get_speech: {str(e)}")
-def get_dreamtalk(image_in, speech):
-    try:
-        client = Client("https://fffiloni-dreamtalk.hf.space/")
-        result = client.predict(
-            speech, # filepath  in 'Audio input' Audio component
-            image_in,   # filepath  in 'Image' Image component
-            "M030_front_neutral_level1_001.mat",   # Literal in 'emotional style' Dropdown component
-            api_name="/infer"
-        )
-        print(result)
-        return result['video']
-    except Exception as e:
-        raise gr.Error(f"Error in get_dreamtalk: {str(e)}")
 def pipe(text, voice, image_in):
-    try:
-        speech = get_speech(text, voice)
-        video = get_dreamtalk(image_in, speech)
-        return video
-    except Exception as e:
-        raise gr.Error(f"An error occurred while processing: {str(e)}")
 with gr.Blocks() as demo:
     with gr.Column():
@@ -44,11 +29,9 @@ with gr.Blocks() as demo:
         <h1 style="text-align: center;">
         Talking Image
         </h1>
-        <p style="text-align: center;"></p>
         <h3 style="text-align: center;">
         Clone your voice and make your photos speak.
         </h3>
-        <p style="text-align: center;"></p>
         """)
         with gr.Row():
             with gr.Column():
@@ -65,4 +48,4 @@ with gr.Blocks() as demo:
         outputs=[video_o],
         concurrency_limit=3
     )
-demo.queue(max_size=10).launch(show_error=True, show_api=False)

 import gradio as gr
+from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
+import torch
+import librosa
+# Load the model and processor
+processor = Wav2Vec2Processor.from_pretrained("SpeechResearch/whisper-ft-normal")
+model = Wav2Vec2ForCTC.from_pretrained("SpeechResearch/whisper-ft-normal")
+def transcribe_speech(audio_path):
+    speech, _ = librosa.load(audio_path, sr=16000)
+    input_values = processor(speech, return_tensors="pt", padding="longest").input_values
+    with torch.no_grad():
+        logits = model(input_values).logits
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.batch_decode(predicted_ids)
+    return transcription[0]
 def pipe(text, voice, image_in):
+    # Assuming voice is a file path to the audio file
+    transcription = transcribe_speech(voice)
+    # Now use this transcription with your get_dreamtalk function
+    video = get_dreamtalk(image_in, transcription)
+    return video
 with gr.Blocks() as demo:
     with gr.Column():
         <h1 style="text-align: center;">
         Talking Image
         </h1>
         <h3 style="text-align: center;">
         Clone your voice and make your photos speak.
         </h3>
         """)
         with gr.Row():
             with gr.Column():
         outputs=[video_o],
         concurrency_limit=3
     )
+demo.queue(max_size=10).launch(show_error=True, show_api=False)