Spaces:

omar1232
/

Advanced_Audio_Visualizer

Sleeping

App Files Files Community

omar1232 commited on Apr 24, 2025

Commit

b554eea

verified ·

1 Parent(s): 4a31f88

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -42

app.py CHANGED Viewed

@@ -1,51 +1,44 @@
 import gradio as gr
 from pydub import AudioSegment
 import tempfile
 from langdetect import detect
 import os
 import asyncio
-import torch
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 from telegram import Update
 from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
 # Telegram bot token (to be set via Hugging Face Space secrets)
 TELEGRAM_BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
-# Load the Hugging Face model and processor for transcription
-processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
-model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
 # Process audio and transcribe
 def process_audio(audio_input):
-    # Convert all audio inputs to WAV format with 16kHz sample rate (required by wav2vec2)
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
         if isinstance(audio_input, tuple):  # Recorded audio (sample_rate, numpy_array)
             sample_rate, audio_data = audio_input
-            audio_segment = AudioSegment(audio_data, sample_rate=sample_rate, frame_rate=sample_rate, channels=1)
         else:  # Uploaded audio file (file path or Telegram file)
-            audio_segment = AudioSegment.from_file(audio_input).set_channels(1)
-        # Resample to 16kHz (required by wav2vec2)
-        audio_segment = audio_segment.set_frame_rate(16000)
-        audio_segment.export(temp_file.name, format="wav")
         audio_file_path = temp_file.name
-    # Load the WAV file for transcription
-    import soundfile as sf
-    audio_data, sample_rate = sf.read(audio_file_path)
-    assert sample_rate == 16000, "Sample rate must be 16kHz"
-    # Preprocess audio for the model
-    inputs = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt", padding=True)
-    # Perform transcription
-    with torch.no_grad():
-        logits = model(inputs.input_values).logits
-    # Decode the logits to text
-    predicted_ids = torch.argmax(logits, dim=-1)
-    transcription = processor.batch_decode(predicted_ids)[0]
     # Detect language
     try:
@@ -174,34 +167,31 @@ with gr.Blocks() as demo:
         outputs=[audio_file, audio_record]
     )
-# Start the Telegram bot in a separate thread with its own event loop
 def run_telegram_bot():
     if not TELEGRAM_BOT_TOKEN:
         print("Telegram bot token not found. Please set TELEGRAM_BOT_TOKEN in the Space secrets.")
         return
-    # Create a new event loop for this thread
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
     application = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
     # Add handlers
     application.add_handler(CommandHandler("start", start))
     application.add_handler(MessageHandler(filters.AUDIO, handle_audio))
-    # Start the bot
     print("Starting Telegram bot...")
-    try:
-        loop.run_until_complete(application.run_polling(allowed_updates=Update.ALL_TYPES))
-    finally:
-        loop.close()
 # Launch Gradio app and Telegram bot
 if __name__ == "__main__":
     import threading
-    # Start the Telegram bot in a separate thread
-    bot_thread = threading.Thread(target=run_telegram_bot)
-    bot_thread.start()
-    # Launch Gradio app
-    demo.launch()

 import gradio as gr
+import speech_recognition as sr
 from pydub import AudioSegment
 import tempfile
 from langdetect import detect
 import os
 import asyncio
 from telegram import Update
 from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
 # Telegram bot token (to be set via Hugging Face Space secrets)
 TELEGRAM_BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
 # Process audio and transcribe
 def process_audio(audio_input):
+    recognizer = sr.Recognizer()
+    # Convert all audio inputs to WAV format
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
         if isinstance(audio_input, tuple):  # Recorded audio (sample_rate, numpy_array)
             sample_rate, audio_data = audio_input
+            AudioSegment(audio_data, sample_rate=sample_rate, frame_rate=sample_rate, channels=1).export(temp_file.name, format="wav")
         else:  # Uploaded audio file (file path or Telegram file)
+            audio = AudioSegment.from_file(audio_input)
+            audio = audio.set_channels(1)  # Convert to mono for consistency
+            audio.export(temp_file.name, format="wav")
         audio_file_path = temp_file.name
+    # Debug: Check if the WAV file is valid
+    if os.path.getsize(audio_file_path) == 0:
+        raise ValueError("The converted WAV file is empty. The input audio may be corrupted.")
+    # Transcribe the WAV file using pocketsphinx (offline)
+    with sr.AudioFile(audio_file_path) as source:
+        audio = recognizer.record(source)
+        try:
+            transcription = recognizer.recognize_sphinx(audio)  # Use pocketsphinx for offline transcription
+        except sr.UnknownValueError:
+            transcription = "Could not understand the audio."
+        except sr.RequestError as e:
+            transcription = f"Transcription failed: {str(e)}"
     # Detect language
     try:
         outputs=[audio_file, audio_record]
     )
+# Start the Telegram bot in the main thread
 def run_telegram_bot():
     if not TELEGRAM_BOT_TOKEN:
         print("Telegram bot token not found. Please set TELEGRAM_BOT_TOKEN in the Space secrets.")
         return
     application = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
     # Add handlers
     application.add_handler(CommandHandler("start", start))
     application.add_handler(MessageHandler(filters.AUDIO, handle_audio))
+    # Start the bot in the main thread
     print("Starting Telegram bot...")
+    asyncio.run(application.run_polling(allowed_updates=Update.ALL_TYPES))
+# Launch Gradio app in a separate thread
+def run_gradio():
+    demo.launch()
 # Launch Gradio app and Telegram bot
 if __name__ == "__main__":
     import threading
+    # Start Gradio in a separate thread
+    gradio_thread = threading.Thread(target=run_gradio)
+    gradio_thread.start()
+    # Run Telegram bot in the main thread
+    run_telegram_bot()