Spaces:

DroolingPanda
/

teachingAssistant

Sleeping

App Files Files Community

Michael Hu commited on Jan 26, 2025

Commit

2477bc4

1 Parent(s): ee54430

update logging level

Browse files

Files changed (4) hide show

app.py +7 -7
utils/stt.py +5 -5
utils/translation.py +3 -3
utils/tts.py +3 -3

app.py CHANGED Viewed

@@ -26,12 +26,12 @@ from utils.tts_dummy import generate_speech
 # Hugging Face Spaces Setup Automation
 def setup_huggingface_space():
     """Automatically configure Hugging Face Space requirements"""
-    logger.debug("Running Hugging Face space setup")
     st.sidebar.header("Space Configuration")
     try:
         subprocess.run(["espeak-ng", "--version"], check=True, capture_output=True)
-        logger.debug("espeak-ng verification successful")
     except (FileNotFoundError, subprocess.CalledProcessError):
         logger.error("Missing espeak-ng dependency")
         st.sidebar.error("""
@@ -64,7 +64,7 @@ os.makedirs("temp/outputs", exist_ok=True)
 def configure_page():
     """Set up Streamlit page configuration"""
-    logger.debug("Configuring Streamlit page")
     st.set_page_config(
         page_title="Audio Translator",
         page_icon="🎧",
@@ -93,7 +93,7 @@ def handle_file_processing(upload_path):
     try:
         # STT Phase
-        logger.debug("Beginning STT processing")
         status_text.markdown("🔍 **Performing Speech Recognition...**")
         with st.spinner("Initializing Whisper model..."):
             english_text = transcribe_audio(upload_path)
@@ -101,7 +101,7 @@ def handle_file_processing(upload_path):
         logger.info(f"STT completed. Text length: {len(english_text)} characters")
         # Translation Phase
-        logger.debug("Beginning translation")
         status_text.markdown("🌐 **Translating Content...**")
         with st.spinner("Loading translation model..."):
             chinese_text = translate_text(english_text)
@@ -109,7 +109,7 @@ def handle_file_processing(upload_path):
         logger.info(f"Translation completed. Translated length: {len(chinese_text)} characters")
         # TTS Phase
-        logger.debug("Beginning TTS generation")
         status_text.markdown("🎵 **Generating Chinese Speech...**")
         with st.spinner("Initializing TTS engine..."):
             output_path = generate_speech(chinese_text, language="zh")
@@ -131,7 +131,7 @@ def handle_file_processing(upload_path):
 def render_results(english_text, chinese_text, output_path):
     """Display processing results in organized columns"""
-    logger.debug("Rendering results")
     st.divider()
     col1, col2 = st.columns([2, 1])

 # Hugging Face Spaces Setup Automation
 def setup_huggingface_space():
     """Automatically configure Hugging Face Space requirements"""
+    logger.info("Running Hugging Face space setup")
     st.sidebar.header("Space Configuration")
     try:
         subprocess.run(["espeak-ng", "--version"], check=True, capture_output=True)
+        logger.info("espeak-ng verification successful")
     except (FileNotFoundError, subprocess.CalledProcessError):
         logger.error("Missing espeak-ng dependency")
         st.sidebar.error("""
 def configure_page():
     """Set up Streamlit page configuration"""
+    logger.info("Configuring Streamlit page")
     st.set_page_config(
         page_title="Audio Translator",
         page_icon="🎧",
     try:
         # STT Phase
+        logger.info("Beginning STT processing")
         status_text.markdown("🔍 **Performing Speech Recognition...**")
         with st.spinner("Initializing Whisper model..."):
             english_text = transcribe_audio(upload_path)
         logger.info(f"STT completed. Text length: {len(english_text)} characters")
         # Translation Phase
+        logger.info("Beginning translation")
         status_text.markdown("🌐 **Translating Content...**")
         with st.spinner("Loading translation model..."):
             chinese_text = translate_text(english_text)
         logger.info(f"Translation completed. Translated length: {len(chinese_text)} characters")
         # TTS Phase
+        logger.info("Beginning TTS generation")
         status_text.markdown("🎵 **Generating Chinese Speech...**")
         with st.spinner("Initializing TTS engine..."):
             output_path = generate_speech(chinese_text, language="zh")
 def render_results(english_text, chinese_text, output_path):
     """Display processing results in organized columns"""
+    logger.info("Rendering results")
     st.divider()
     col1, col2 = st.columns([2, 1])

utils/stt.py CHANGED Viewed

@@ -22,17 +22,17 @@ def transcribe_audio(audio_path):
     try:
         # Audio conversion
-        logger.debug("Converting audio format")
         audio = AudioSegment.from_file(audio_path)
         processed_audio = audio.set_frame_rate(16000).set_channels(1)
         wav_path = audio_path.replace(".mp3", ".wav")
         processed_audio.export(wav_path, format="wav")
-        logger.debug(f"Audio converted to: {wav_path}")
         # Model initialization
         logger.info("Loading Whisper model")
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        logger.debug(f"Using device: {device}")
         model = AutoModelForSpeechSeq2Seq.from_pretrained(
             "openai/whisper-large-v3",
@@ -42,10 +42,10 @@ def transcribe_audio(audio_path):
         ).to(device)
         processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
-        logger.debug("Model loaded successfully")
         # Processing
-        logger.debug("Processing audio input")
         inputs = processor(
             wav_path,
             sampling_rate=16000,

     try:
         # Audio conversion
+        logger.info("Converting audio format")
         audio = AudioSegment.from_file(audio_path)
         processed_audio = audio.set_frame_rate(16000).set_channels(1)
         wav_path = audio_path.replace(".mp3", ".wav")
         processed_audio.export(wav_path, format="wav")
+        logger.info(f"Audio converted to: {wav_path}")
         # Model initialization
         logger.info("Loading Whisper model")
         device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {device}")
         model = AutoModelForSpeechSeq2Seq.from_pretrained(
             "openai/whisper-large-v3",
         ).to(device)
         processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
+        logger.info("Model loaded successfully")
         # Processing
+        logger.info("Processing audio input")
         inputs = processor(
             wav_path,
             sampling_rate=16000,

utils/translation.py CHANGED Viewed

@@ -23,7 +23,7 @@ def translate_text(text):
         logger.info("Loading NLLB model")
         tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-3.3B")
         model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-3.3B")
-        logger.debug("Translation model loaded")
         # Text processing
         max_chunk_length = 1000
@@ -32,7 +32,7 @@ def translate_text(text):
         translated_chunks = []
         for i, chunk in enumerate(text_chunks):
-            logger.debug(f"Processing chunk {i+1}/{len(text_chunks)}")
             inputs = tokenizer(
                 chunk,
                 return_tensors="pt",
@@ -47,7 +47,7 @@ def translate_text(text):
             )
             translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
             translated_chunks.append(translated)
-            logger.debug(f"Chunk {i+1} translated successfully")
         result = "".join(translated_chunks)
         logger.info(f"Translation completed. Total length: {len(result)}")

         logger.info("Loading NLLB model")
         tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-3.3B")
         model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-3.3B")
+        logger.info("Translation model loaded")
         # Text processing
         max_chunk_length = 1000
         translated_chunks = []
         for i, chunk in enumerate(text_chunks):
+            logger.info(f"Processing chunk {i+1}/{len(text_chunks)}")
             inputs = tokenizer(
                 chunk,
                 return_tensors="pt",
             )
             translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
             translated_chunks.append(translated)
+            logger.info(f"Chunk {i+1} translated successfully")
         result = "".join(translated_chunks)
         logger.info(f"Translation completed. Total length: {len(result)}")

utils/tts.py CHANGED Viewed

@@ -19,7 +19,7 @@ class TTSEngine:
     def __init__(self):
         logger.info("Initializing TTS Engine")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        logger.debug(f"Using device: {self.device}")
         self._verify_model_files()
         logger.info("Loading Kokoro model")
         self.model = build_model(f"{MODEL_DIR}/kokoro-v0_19.pth", self.device)
@@ -56,7 +56,7 @@ class TTSEngine:
                 logger.warning(f"Truncating long text ({len(text)} characters)")
                 text = text[:495] + "[TRUNCATED]"
-            logger.debug("Starting audio generation")
             audio, _ = generate_full(
                 self.model,
                 text,
@@ -66,7 +66,7 @@ class TTSEngine:
             )
             output_path = f"temp/outputs/output_{int(time.time())}.wav"
-            logger.debug(f"Saving audio to {output_path}")
             AudioSegment(
                 audio.numpy().tobytes(),
                 frame_rate=24000,

     def __init__(self):
         logger.info("Initializing TTS Engine")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {self.device}")
         self._verify_model_files()
         logger.info("Loading Kokoro model")
         self.model = build_model(f"{MODEL_DIR}/kokoro-v0_19.pth", self.device)
                 logger.warning(f"Truncating long text ({len(text)} characters)")
                 text = text[:495] + "[TRUNCATED]"
+            logger.info("Starting audio generation")
             audio, _ = generate_full(
                 self.model,
                 text,
             )
             output_path = f"temp/outputs/output_{int(time.time())}.wav"
+            logger.info(f"Saving audio to {output_path}")
             AudioSegment(
                 audio.numpy().tobytes(),
                 frame_rate=24000,