Shanuka01 commited on
Commit
ba7d1fa
·
1 Parent(s): 9992141

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -12
app.py CHANGED
@@ -22,15 +22,7 @@ tts = TextToSpeech(kv_cache=True, use_deepspeed=True, half=True)
22
 
23
  def convert_audio(filepath, voice="indian_F_1"):
24
  # Transcribe audio to text using STT
25
- transcription_output = pipe(
26
- filepath,
27
- max_new_tokens=256,
28
- generate_kwargs={
29
- "task": "transcribe",
30
- "language": "english",
31
- },
32
- chunk_length_s=30,
33
- batch_size=8
34
  )
35
  transcribed_text = transcription_output["text"]
36
 
@@ -42,15 +34,14 @@ def convert_audio(filepath, voice="indian_F_1"):
42
  for audio_frame in tts.tts_with_preset(
43
  text,
44
  voice_samples=voice_samples,
45
- conditioning_latents=conditioning_latents,
46
- preset="ultra_fast",
47
  k=1
48
  ):
49
  audio_frames.append(audio_frame.cpu().detach().numpy())
50
 
51
  # Joining the audio frames for output using numpy's concatenate
52
  final_audio = np.concatenate(audio_frames, axis=0)
53
- return (24000, final_audio)
54
 
55
  interface = gr.Interface(
56
  fn=convert_audio,
 
22
 
23
  def convert_audio(filepath, voice="indian_F_1"):
24
  # Transcribe audio to text using STT
25
+
 
 
 
 
 
 
 
 
26
  )
27
  transcribed_text = transcription_output["text"]
28
 
 
34
  for audio_frame in tts.tts_with_preset(
35
  text,
36
  voice_samples=voice_samples,
37
+
 
38
  k=1
39
  ):
40
  audio_frames.append(audio_frame.cpu().detach().numpy())
41
 
42
  # Joining the audio frames for output using numpy's concatenate
43
  final_audio = np.concatenate(audio_frames, axis=0)
44
+
45
 
46
  interface = gr.Interface(
47
  fn=convert_audio,