Spaces:

omar1232
/

Advanced_Audio_Visualizer

Sleeping

App Files Files Community

omar1232 commited on Apr 24, 2025

Commit

a9fdb06

verified ·

1 Parent(s): 7d6aec7

Update app.py

Browse files

Files changed (1) hide show

app.py +181 -21

app.py CHANGED Viewed

@@ -1,32 +1,192 @@
-import threading
 import gradio as gr
 from telegram import Update
-from telegram.ext import ApplicationBuilder, CommandHandler, ContextTypes
-# ====== Telegram Bot Setup ======
-TELEGRAM_TOKEN = "8030235633:AAHKvxM9Nzp0DkxfdotMux3572tC_5CGEUA"  # Replace this with your real bot token
-# Telegram command handler
 async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
-    await update.message.reply_text("Hello! I'm your bot!")
-# ====== Gradio App Setup ======
-def greet(name):
-    return f"Hello, {name}!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-# Function to run Gradio (no SSR or share needed on Spaces)
-def run_gradio():
-    demo.launch()  # No SSR or share argument needed
-# ====== Start Both Gradio & Telegram Bot ======
-if __name__ == "__main__":
-    # Start Gradio in a separate thread
-    gradio_thread = threading.Thread(target=run_gradio)
-    gradio_thread.start()
-    # Start the Telegram bot in the main thread
-    application = ApplicationBuilder().token(TELEGRAM_TOKEN).build()
     application.add_handler(CommandHandler("start", start))
-    application.run_polling()

 import gradio as gr
+import speech_recognition as sr
+from pydub import AudioSegment
+import tempfile
+from langdetect import detect
+import os
 from telegram import Update
+from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
+# Telegram bot token (to be set via Hugging Face Space secrets)
+TELEGRAM_BOT_TOKEN = os.getenv("8030235633:AAHKvxM9Nzp0DkxfdotMux3572tC_5CGEUA")
+# Process audio and transcribe
+def process_audio(audio_input):
+    recognizer = sr.Recognizer()
+    # Convert all audio inputs to WAV format
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+        if isinstance(audio_input, tuple):  # Recorded audio (sample_rate, numpy_array)
+            sample_rate, audio_data = audio_input
+            AudioSegment(audio_data, sample_rate=sample_rate, frame_rate=sample_rate, channels=1).export(temp_file.name, format="wav")
+        else:  # Uploaded audio file (file path or Telegram file)
+            audio = AudioSegment.from_file(audio_input)
+            audio = audio.set_channels(1)  # Convert to mono for consistency
+            audio.export(temp_file.name, format="wav")
+        audio_file_path = temp_file.name
+    # Debug: Check if the WAV file is valid
+    if os.path.getsize(audio_file_path) == 0:
+        raise ValueError("The converted WAV file is empty. The input audio may be corrupted.")
+    # Transcribe the WAV file using pocketsphinx (offline)
+    with sr.AudioFile(audio_file_path) as source:
+        audio = recognizer.record(source)
+        try:
+            transcription = recognizer.recognize_sphinx(audio)  # Use pocketsphinx for offline transcription
+        except sr.UnknownValueError:
+            transcription = "Could not understand the audio."
+        except sr.RequestError as e:
+            transcription = f"Transcription failed: {str(e)}"
+    # Detect language
+    try:
+        language = detect(transcription)
+    except:
+        language = "Unknown"
+    # Save transcription to a text file
+    with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode='w') as text_file:
+        text_file.write(transcription)
+        text_file_path = text_file.name
+    # Clean up temporary WAV file
+    if os.path.exists(audio_file_path):
+        os.remove(audio_file_path)
+    return language, transcription, text_file_path
+# Gradio interface function
+def audio_transcriptor(audio_file, audio_record):
+    if audio_file:
+        language, transcription, text_file = process_audio(audio_file)
+    elif audio_record:
+        language, transcription, text_file = process_audio(audio_record)
+    else:
+        return "Please upload an audio file or record audio.", "", None
+    return language, transcription, text_file
+# Telegram bot handlers
 async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    await update.message.reply_text("Hello! Send me an audio file, and I'll transcribe it for you.")
+async def handle_audio(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    # Download the audio file from Telegram
+    audio_file = await update.message.audio.get_file()
+    audio_path = f"/tmp/{audio_file.file_id}.ogg"  # Telegram audio files are typically in OGG format
+    await audio_file.download_to_drive(audio_path)
+    # Process the audio using the existing transcriptor function
+    language, transcription, text_file_path = process_audio(audio_path)
+    # Send the transcription back to the user
+    await update.message.reply_text(f"Detected Language: {language}\nTranscription: {transcription}")
+    # Send the transcription file
+    with open(text_file_path, 'rb') as f:
+        await update.message.reply_document(document=f, filename="transcription.txt")
+    # Clean up temporary files
+    if os.path.exists(audio_path):
+        os.remove(audio_path)
+    if os.path.exists(text_file_path):
+        os.remove(text_file_path)
+# Custom HTML for styled transcription display (for Gradio interface)
+transcription_html = """
+<div class="transcription-container" id="transcriptionContainer">
+    <h2>Transcription Results</h2>
+    <div class="language" id="languageOutput">Detected Language: Waiting...</div>
+    <div class="transcription" id="transcriptionOutput">Transcription: Waiting...</div>
+</div>
+<style>
+    .transcription-container {
+        max-width: 600px;
+        margin: 20px auto;
+        padding: 20px;
+        background: #16213e;
+        border-radius: 10px;
+        box-shadow: 0 10px 20px rgba(0, 0, 0, 0.3);
+        color: #fff;
+        text-align: center;
+    }
+    .language, .transcription {
+        margin: 10px 0;
+        padding: 10px;
+        background: #0f172a;
+        border-radius: 5px;
+    }
+</style>
+<script>
+    setInterval(() => {
+        const languageOutput = document.querySelector('div[label="Detected Language"] textarea');
+        const transcriptionOutput = document.querySelector('div[label="Transcription"] textarea');
+        if (languageOutput && languageOutput.value) {
+            document.getElementById('languageOutput').textContent = `Detected Language: ${languageOutput.value}`;
+        }
+        if (transcriptionOutput && transcriptionOutput.value) {
+            document.getElementById('transcriptionOutput').textContent = `Transcription: ${transcriptionOutput.value}`;
+        }
+    }, 1000);
+</script>
+"""
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Audio Transcriptor")
+    gr.Markdown("Upload an audio file or record audio to transcribe the speech and detect the language. You can also interact with the bot via Telegram!")
+    with gr.Row():
+        audio_file = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio")
+        audio_record = gr.Audio(sources=["microphone"], type="numpy", label="Record Audio")
+    with gr.Row():
+        language_output = gr.Textbox(label="Detected Language")
+        transcription_output = gr.Textbox(label="Transcription")
+        text_file_output = gr.File(label="Download Transcription as Text File")
+    # Add styled HTML section
+    gr.HTML(transcription_html)
+    with gr.Row():
+        submit = gr.Button("Transcribe")
+        clear = gr.Button("Clear")
+    submit.click(
+        fn=audio_transcriptor,
+        inputs=[audio_file, audio_record],
+        outputs=[language_output, transcription_output, text_file_output]
+    )
+    clear.click(
+        fn=lambda: (None, None),
+        inputs=[],
+        outputs=[audio_file, audio_record]
+    )
+# Start the Telegram bot in a separate thread
+def run_telegram_bot():
+    if not TELEGRAM_BOT_TOKEN:
+        print("Telegram bot token not found. Please set TELEGRAM_BOT_TOKEN in the Space secrets.")
+        return
+    application = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
+    # Add handlers
     application.add_handler(CommandHandler("start", start))
+    application.add_handler(MessageHandler(filters.AUDIO, handle_audio))
+    # Start the bot
+    print("Starting Telegram bot...")
+    application.run_polling(allowed_updates=Update.ALL_TYPES)
+# Launch Gradio app and Telegram bot
+if __name__ == "__main__":
+    import threading
+    # Start the Telegram bot in a separate thread
+    bot_thread = threading.Thread(target=run_telegram_bot)
+    bot_thread.start()
+    # Launch Gradio app
+    demo.launch()