omar1232 commited on
Commit
a9fdb06
·
verified ·
1 Parent(s): 7d6aec7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -21
app.py CHANGED
@@ -1,32 +1,192 @@
1
- import threading
2
  import gradio as gr
 
 
 
 
 
3
  from telegram import Update
4
- from telegram.ext import ApplicationBuilder, CommandHandler, ContextTypes
5
 
6
- # ====== Telegram Bot Setup ======
7
- TELEGRAM_TOKEN = "8030235633:AAHKvxM9Nzp0DkxfdotMux3572tC_5CGEUA" # Replace this with your real bot token
8
 
9
- # Telegram command handler
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
11
- await update.message.reply_text("Hello! I'm your bot!")
12
 
13
- # ====== Gradio App Setup ======
14
- def greet(name):
15
- return f"Hello, {name}!"
 
 
16
 
17
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 
18
 
19
- # Function to run Gradio (no SSR or share needed on Spaces)
20
- def run_gradio():
21
- demo.launch() # No SSR or share argument needed
22
 
23
- # ====== Start Both Gradio & Telegram Bot ======
24
- if __name__ == "__main__":
25
- # Start Gradio in a separate thread
26
- gradio_thread = threading.Thread(target=run_gradio)
27
- gradio_thread.start()
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # Start the Telegram bot in the main thread
30
- application = ApplicationBuilder().token(TELEGRAM_TOKEN).build()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  application.add_handler(CommandHandler("start", start))
32
- application.run_polling()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import speech_recognition as sr
3
+ from pydub import AudioSegment
4
+ import tempfile
5
+ from langdetect import detect
6
+ import os
7
  from telegram import Update
8
+ from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
9
 
10
+ # Telegram bot token (to be set via Hugging Face Space secrets)
11
+ TELEGRAM_BOT_TOKEN = os.getenv("8030235633:AAHKvxM9Nzp0DkxfdotMux3572tC_5CGEUA")
12
 
13
+ # Process audio and transcribe
14
+ def process_audio(audio_input):
15
+ recognizer = sr.Recognizer()
16
+
17
+ # Convert all audio inputs to WAV format
18
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
19
+ if isinstance(audio_input, tuple): # Recorded audio (sample_rate, numpy_array)
20
+ sample_rate, audio_data = audio_input
21
+ AudioSegment(audio_data, sample_rate=sample_rate, frame_rate=sample_rate, channels=1).export(temp_file.name, format="wav")
22
+ else: # Uploaded audio file (file path or Telegram file)
23
+ audio = AudioSegment.from_file(audio_input)
24
+ audio = audio.set_channels(1) # Convert to mono for consistency
25
+ audio.export(temp_file.name, format="wav")
26
+ audio_file_path = temp_file.name
27
+
28
+ # Debug: Check if the WAV file is valid
29
+ if os.path.getsize(audio_file_path) == 0:
30
+ raise ValueError("The converted WAV file is empty. The input audio may be corrupted.")
31
+
32
+ # Transcribe the WAV file using pocketsphinx (offline)
33
+ with sr.AudioFile(audio_file_path) as source:
34
+ audio = recognizer.record(source)
35
+ try:
36
+ transcription = recognizer.recognize_sphinx(audio) # Use pocketsphinx for offline transcription
37
+ except sr.UnknownValueError:
38
+ transcription = "Could not understand the audio."
39
+ except sr.RequestError as e:
40
+ transcription = f"Transcription failed: {str(e)}"
41
+
42
+ # Detect language
43
+ try:
44
+ language = detect(transcription)
45
+ except:
46
+ language = "Unknown"
47
+
48
+ # Save transcription to a text file
49
+ with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode='w') as text_file:
50
+ text_file.write(transcription)
51
+ text_file_path = text_file.name
52
+
53
+ # Clean up temporary WAV file
54
+ if os.path.exists(audio_file_path):
55
+ os.remove(audio_file_path)
56
+
57
+ return language, transcription, text_file_path
58
+
59
+ # Gradio interface function
60
+ def audio_transcriptor(audio_file, audio_record):
61
+ if audio_file:
62
+ language, transcription, text_file = process_audio(audio_file)
63
+ elif audio_record:
64
+ language, transcription, text_file = process_audio(audio_record)
65
+ else:
66
+ return "Please upload an audio file or record audio.", "", None
67
+
68
+ return language, transcription, text_file
69
+
70
+ # Telegram bot handlers
71
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
72
+ await update.message.reply_text("Hello! Send me an audio file, and I'll transcribe it for you.")
73
 
74
+ async def handle_audio(update: Update, context: ContextTypes.DEFAULT_TYPE):
75
+ # Download the audio file from Telegram
76
+ audio_file = await update.message.audio.get_file()
77
+ audio_path = f"/tmp/{audio_file.file_id}.ogg" # Telegram audio files are typically in OGG format
78
+ await audio_file.download_to_drive(audio_path)
79
 
80
+ # Process the audio using the existing transcriptor function
81
+ language, transcription, text_file_path = process_audio(audio_path)
82
 
83
+ # Send the transcription back to the user
84
+ await update.message.reply_text(f"Detected Language: {language}\nTranscription: {transcription}")
 
85
 
86
+ # Send the transcription file
87
+ with open(text_file_path, 'rb') as f:
88
+ await update.message.reply_document(document=f, filename="transcription.txt")
89
+
90
+ # Clean up temporary files
91
+ if os.path.exists(audio_path):
92
+ os.remove(audio_path)
93
+ if os.path.exists(text_file_path):
94
+ os.remove(text_file_path)
95
+
96
+ # Custom HTML for styled transcription display (for Gradio interface)
97
+ transcription_html = """
98
+ <div class="transcription-container" id="transcriptionContainer">
99
+ <h2>Transcription Results</h2>
100
+ <div class="language" id="languageOutput">Detected Language: Waiting...</div>
101
+ <div class="transcription" id="transcriptionOutput">Transcription: Waiting...</div>
102
+ </div>
103
 
104
+ <style>
105
+ .transcription-container {
106
+ max-width: 600px;
107
+ margin: 20px auto;
108
+ padding: 20px;
109
+ background: #16213e;
110
+ border-radius: 10px;
111
+ box-shadow: 0 10px 20px rgba(0, 0, 0, 0.3);
112
+ color: #fff;
113
+ text-align: center;
114
+ }
115
+ .language, .transcription {
116
+ margin: 10px 0;
117
+ padding: 10px;
118
+ background: #0f172a;
119
+ border-radius: 5px;
120
+ }
121
+ </style>
122
+
123
+ <script>
124
+ setInterval(() => {
125
+ const languageOutput = document.querySelector('div[label="Detected Language"] textarea');
126
+ const transcriptionOutput = document.querySelector('div[label="Transcription"] textarea');
127
+ if (languageOutput && languageOutput.value) {
128
+ document.getElementById('languageOutput').textContent = `Detected Language: ${languageOutput.value}`;
129
+ }
130
+ if (transcriptionOutput && transcriptionOutput.value) {
131
+ document.getElementById('transcriptionOutput').textContent = `Transcription: ${transcriptionOutput.value}`;
132
+ }
133
+ }, 1000);
134
+ </script>
135
+ """
136
+
137
+ # Gradio interface
138
+ with gr.Blocks() as demo:
139
+ gr.Markdown("# Audio Transcriptor")
140
+ gr.Markdown("Upload an audio file or record audio to transcribe the speech and detect the language. You can also interact with the bot via Telegram!")
141
+
142
+ with gr.Row():
143
+ audio_file = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio")
144
+ audio_record = gr.Audio(sources=["microphone"], type="numpy", label="Record Audio")
145
+
146
+ with gr.Row():
147
+ language_output = gr.Textbox(label="Detected Language")
148
+ transcription_output = gr.Textbox(label="Transcription")
149
+ text_file_output = gr.File(label="Download Transcription as Text File")
150
+
151
+ # Add styled HTML section
152
+ gr.HTML(transcription_html)
153
+
154
+ with gr.Row():
155
+ submit = gr.Button("Transcribe")
156
+ clear = gr.Button("Clear")
157
+
158
+ submit.click(
159
+ fn=audio_transcriptor,
160
+ inputs=[audio_file, audio_record],
161
+ outputs=[language_output, transcription_output, text_file_output]
162
+ )
163
+ clear.click(
164
+ fn=lambda: (None, None),
165
+ inputs=[],
166
+ outputs=[audio_file, audio_record]
167
+ )
168
+
169
+ # Start the Telegram bot in a separate thread
170
+ def run_telegram_bot():
171
+ if not TELEGRAM_BOT_TOKEN:
172
+ print("Telegram bot token not found. Please set TELEGRAM_BOT_TOKEN in the Space secrets.")
173
+ return
174
+
175
+ application = Application.builder().token(TELEGRAM_BOT_TOKEN).build()
176
+
177
+ # Add handlers
178
  application.add_handler(CommandHandler("start", start))
179
+ application.add_handler(MessageHandler(filters.AUDIO, handle_audio))
180
+
181
+ # Start the bot
182
+ print("Starting Telegram bot...")
183
+ application.run_polling(allowed_updates=Update.ALL_TYPES)
184
+
185
+ # Launch Gradio app and Telegram bot
186
+ if __name__ == "__main__":
187
+ import threading
188
+ # Start the Telegram bot in a separate thread
189
+ bot_thread = threading.Thread(target=run_telegram_bot)
190
+ bot_thread.start()
191
+ # Launch Gradio app
192
+ demo.launch()