naxemCDA commited on
Commit
a775afa
·
1 Parent(s): 303147d

modified process_audio function for 4 args

Browse files
Files changed (1) hide show
  1. app.py +54 -3
app.py CHANGED
@@ -46,6 +46,52 @@ speaker_embeddings = {
46
 
47
  print("All models loaded successfully!")
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def process_audio(audio_path, voice_choice, conversation_history):
50
  """Process audio input and generate response"""
51
  # Transcribe audio
@@ -54,14 +100,17 @@ def process_audio(audio_path, voice_choice, conversation_history):
54
  user_input = result["text"]
55
  except Exception as e:
56
  print(f"ASR error: {e}")
57
- return None, "Could not process audio. Please try again.", conversation_history
 
58
 
59
  # Check if input is English
60
  try:
61
  if detect(user_input) != "en":
62
- return user_input, "You must try to speak in English for me to respond", conversation_history
 
63
  except LangDetectException:
64
- return user_input, "Could not detect language. Please speak clearly.", conversation_history
 
65
 
66
  # Grammar correction
67
  corrected_input = grammar_pipe(user_input, max_length=256)[0]["generated_text"]
@@ -89,8 +138,10 @@ def process_audio(audio_path, voice_choice, conversation_history):
89
  output_audio = "response.wav"
90
  sf.write(output_audio, speech.numpy(), samplerate=16000)
91
 
 
92
  return user_input, response_text, output_audio, conversation_history
93
 
 
94
  # Gradio interface
95
  with gr.Blocks(title="Audio English Teacher") as demo:
96
  gr.Markdown("# 🎓 Audio English Teacher")
 
46
 
47
  print("All models loaded successfully!")
48
 
49
+ #####################################################################
50
+ ###def process_audio(audio_path, voice_choice, conversation_history):
51
+ ### """Process audio input and generate response"""
52
+ ### # Transcribe audio
53
+ ### try:
54
+ ### result = asr_pipe(audio_path)
55
+ ### user_input = result["text"]
56
+ ### except Exception as e:
57
+ ### print(f"ASR error: {e}")
58
+ ### return None, "Could not process audio. Please try again.", conversation_history
59
+ ###
60
+ ### # Check if input is English
61
+ ### try:
62
+ ### if detect(user_input) != "en":
63
+ ### return user_input, "You must try to speak in English for me to respond", conversation_history
64
+ ### except LangDetectException:
65
+ ### return user_input, "Could not detect language. Please speak clearly.", conversation_history
66
+ ###
67
+ ### # Grammar correction
68
+ ### corrected_input = grammar_pipe(user_input, max_length=256)[0]["generated_text"]
69
+ ###
70
+ ### # Update conversation history
71
+ ### conversation_history.append(f"{corrected_input}")
72
+ ###
73
+ ### # Generate conversational response
74
+ ### chat_input = "\n".join(conversation_history[-4:]) # Keep last 4 exchanges
75
+ ### response = chat_pipe(chat_input, max_length=256, pad_token_id=chat_pipe.tokenizer.eos_token_id)
76
+ ### response_text = response[0]["generated_text"].split("Teacher:")[-1].strip()
77
+ ###
78
+ ### # Update conversation history
79
+ ### conversation_history.append(f"Teacher: {response_text}")
80
+ ###
81
+ ### # Generate speech
82
+ ### inputs = tts_processor(text=response_text, return_tensors="pt")
83
+ ### speech = tts_model.generate_speech(
84
+ ### inputs["input_ids"],
85
+ ### speaker_embeddings[voice_choice],
86
+ ### vocoder=tts_vocoder
87
+ ### )
88
+ ###
89
+ ### # Save audio output
90
+ ### output_audio = "response.wav"
91
+ ### sf.write(output_audio, speech.numpy(), samplerate=16000)
92
+ ###
93
+ ### return user_input, response_text, output_audio, conversation_history
94
+ ###########################################################################
95
  def process_audio(audio_path, voice_choice, conversation_history):
96
  """Process audio input and generate response"""
97
  # Transcribe audio
 
100
  user_input = result["text"]
101
  except Exception as e:
102
  print(f"ASR error: {e}")
103
+ # Return 4 values, including placeholders for the missing outputs
104
+ return None, "Could not process audio. Please try again.", None, conversation_history
105
 
106
  # Check if input is English
107
  try:
108
  if detect(user_input) != "en":
109
+ # Return 4 values
110
+ return user_input, "You must try to speak in English for me to respond", None, conversation_history
111
  except LangDetectException:
112
+ # Return 4 values
113
+ return user_input, "Could not detect language. Please speak clearly.", None, conversation_history
114
 
115
  # Grammar correction
116
  corrected_input = grammar_pipe(user_input, max_length=256)[0]["generated_text"]
 
138
  output_audio = "response.wav"
139
  sf.write(output_audio, speech.numpy(), samplerate=16000)
140
 
141
+ # Return 4 values
142
  return user_input, response_text, output_audio, conversation_history
143
 
144
+ ########################################################################
145
  # Gradio interface
146
  with gr.Blocks(title="Audio English Teacher") as demo:
147
  gr.Markdown("# 🎓 Audio English Teacher")