Spaces:

naxemCDA
/

Audio_English_Teacher

Sleeping

App Files Files Community

naxemCDA commited on Jun 10, 2025

Commit

a775afa

1 Parent(s): 303147d

modified process_audio function for 4 args

Browse files

Files changed (1) hide show

app.py +54 -3

app.py CHANGED Viewed

@@ -46,6 +46,52 @@ speaker_embeddings = {
 print("All models loaded successfully!")
 def process_audio(audio_path, voice_choice, conversation_history):
     """Process audio input and generate response"""
     # Transcribe audio
@@ -54,14 +100,17 @@ def process_audio(audio_path, voice_choice, conversation_history):
         user_input = result["text"]
     except Exception as e:
         print(f"ASR error: {e}")
-        return None, "Could not process audio. Please try again.", conversation_history
     # Check if input is English
     try:
         if detect(user_input) != "en":
-            return user_input, "You must try to speak in English for me to respond", conversation_history
     except LangDetectException:
-        return user_input, "Could not detect language. Please speak clearly.", conversation_history
     # Grammar correction
     corrected_input = grammar_pipe(user_input, max_length=256)[0]["generated_text"]
@@ -89,8 +138,10 @@ def process_audio(audio_path, voice_choice, conversation_history):
     output_audio = "response.wav"
     sf.write(output_audio, speech.numpy(), samplerate=16000)
     return user_input, response_text, output_audio, conversation_history
 # Gradio interface
 with gr.Blocks(title="Audio English Teacher") as demo:
     gr.Markdown("# 🎓 Audio English Teacher")

 print("All models loaded successfully!")
+#####################################################################
+###def process_audio(audio_path, voice_choice, conversation_history):
+###    """Process audio input and generate response"""
+###    # Transcribe audio
+###    try:
+###        result = asr_pipe(audio_path)
+###        user_input = result["text"]
+###    except Exception as e:
+###        print(f"ASR error: {e}")
+###        return None, "Could not process audio. Please try again.", conversation_history
+###
+###    # Check if input is English
+###    try:
+###        if detect(user_input) != "en":
+###            return user_input, "You must try to speak in English for me to respond", conversation_history
+###    except LangDetectException:
+###        return user_input, "Could not detect language. Please speak clearly.", conversation_history
+###
+###    # Grammar correction
+###    corrected_input = grammar_pipe(user_input, max_length=256)[0]["generated_text"]
+###
+###    # Update conversation history
+###    conversation_history.append(f"{corrected_input}")
+###
+###    # Generate conversational response
+###    chat_input = "\n".join(conversation_history[-4:])  # Keep last 4 exchanges
+###    response = chat_pipe(chat_input, max_length=256, pad_token_id=chat_pipe.tokenizer.eos_token_id)
+###    response_text = response[0]["generated_text"].split("Teacher:")[-1].strip()
+###
+###    # Update conversation history
+###    conversation_history.append(f"Teacher: {response_text}")
+###
+###    # Generate speech
+###    inputs = tts_processor(text=response_text, return_tensors="pt")
+###    speech = tts_model.generate_speech(
+###        inputs["input_ids"],
+###        speaker_embeddings[voice_choice],
+###        vocoder=tts_vocoder
+###    )
+###
+###    # Save audio output
+###    output_audio = "response.wav"
+###    sf.write(output_audio, speech.numpy(), samplerate=16000)
+###
+###    return user_input, response_text, output_audio, conversation_history
+###########################################################################
 def process_audio(audio_path, voice_choice, conversation_history):
     """Process audio input and generate response"""
     # Transcribe audio
         user_input = result["text"]
     except Exception as e:
         print(f"ASR error: {e}")
+        # Return 4 values, including placeholders for the missing outputs
+        return None, "Could not process audio. Please try again.", None, conversation_history
     # Check if input is English
     try:
         if detect(user_input) != "en":
+            # Return 4 values
+            return user_input, "You must try to speak in English for me to respond", None, conversation_history
     except LangDetectException:
+        # Return 4 values
+        return user_input, "Could not detect language. Please speak clearly.", None, conversation_history
     # Grammar correction
     corrected_input = grammar_pipe(user_input, max_length=256)[0]["generated_text"]
     output_audio = "response.wav"
     sf.write(output_audio, speech.numpy(), samplerate=16000)
+    # Return 4 values
     return user_input, response_text, output_audio, conversation_history
+########################################################################
 # Gradio interface
 with gr.Blocks(title="Audio English Teacher") as demo:
     gr.Markdown("# 🎓 Audio English Teacher")