chandras002 commited on
Commit
ca39ddc
·
verified ·
1 Parent(s): fb906d2

upload app.py

Browse files

upload app.py for the first time

Files changed (1) hide show
  1. app.py +423 -0
app.py ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is version 2 updated on 17th Sept 2024.
2
+ # Uses the Whiper Medium model ( on RTX 4070 with 8GB vram)
3
+ #Beep done changed and beepify_segments function not used instead now using audio_to_beep.overlay
4
+ # Please change beep sound wave filepath according to your local dir in "Beeped_Audio_Path": line 254
5
+ #output audio stored in "pii_beep_audio_uploads" in local dir where this file located
6
+
7
+
8
+ import gradio as gr
9
+ import os
10
+ import random
11
+ import whisper_timestamped as whisper
12
+ from pydub import AudioSegment
13
+ import numpy as np
14
+ import spacy
15
+ import torch
16
+ import threading
17
+ import zipfile
18
+ import shutil
19
+ from pathlib import Path
20
+ from werkzeug.utils import secure_filename
21
+ import time
22
+ from gradio_rich_textbox import RichTextbox
23
+ import re
24
+
25
+ # Worker class to process the audio file and load models
26
+ class Worker(threading.Thread):
27
+ def __init__(self, audio_file_path, model_directory, callback):
28
+ threading.Thread.__init__(self)
29
+ self._AudiofileName = audio_file_path
30
+ self._ModelDirectory = model_directory
31
+ self._BeepAudiofileName = "beep2.wav"
32
+ self.callback = callback
33
+
34
+ self._PII_text_and_Timestamp =""
35
+ self._Transcribe_Text_With_Entities =""
36
+ self._Metrics =""
37
+ self._BeepedAudiofileName =""
38
+
39
+ print(f"Audio File: {self._AudiofileName}")
40
+ print(f"Model Directory: {self._ModelDirectory}")
41
+ print(f"Beep Audio File: {self._BeepAudiofileName}")
42
+
43
+ def run(self):
44
+ try:
45
+ print("loading SpaCy model with custom model ",str(self._ModelDirectory))
46
+ # Load spaCy model from directory or a known model name
47
+ self.nlp = spacy.load(str(self._ModelDirectory))
48
+ print("SpaCy model loaded.")
49
+
50
+ # Load Whisper model
51
+ devices = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
52
+ print(devices)
53
+ time.sleep(0.2)
54
+ self.model = whisper.load_model("medium", device=devices)
55
+
56
+ print("Whisper model loaded.")
57
+
58
+ self.processData()
59
+ self.callback("callback Processing complete!")
60
+
61
+ except Exception as e:
62
+ print(f"Error during processing: {str(e)}")
63
+
64
+ def count_entities(self,entities):
65
+ entity_counts = {} # Initialize an empty dictionary to store counts
66
+
67
+ for _, entity_type in entities:
68
+ # Increment the count for each entity type
69
+ entity_counts[entity_type] = entity_counts.get(entity_type, 0) + 1
70
+
71
+ return entity_counts
72
+
73
+ def colorize_entities(self, data, entities):
74
+ # Define color mappings (you can customize these)
75
+ color_map = {
76
+ 'PERSON': 'blue',
77
+ 'GPE': 'green',
78
+ 'LOC': 'purple',
79
+ 'PHONE': 'orange',
80
+ 'EMAIL': 'blue',
81
+ 'CAR_PLATE':'red',
82
+ 'ORG':'purple',
83
+ 'NRIC': 'red',
84
+ 'PASSPORT_NUM':'green'
85
+ }
86
+
87
+ print("entities",entities)
88
+ # Replace entities with colored versions
89
+ for entity, entity_type in entities:
90
+ #print("before update data",data)
91
+ color = color_map.get(entity_type, 'blue') # Default to blue if type not found
92
+ colored_entity = f'<span style="color: {color};">{entity} {entity_type}</span>'
93
+ data = data.replace(entity, colored_entity)
94
+ #print("after update data",data)
95
+
96
+ return data
97
+
98
+ def processData(self):
99
+ # Transcribe audio and extract entities
100
+ try:
101
+ # Load audio
102
+ audio = whisper.load_audio(self._AudiofileName)
103
+ output = whisper.transcribe(self.model, audio, beam_size=5, best_of=5, temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0),vad=True, language="en", remove_punctuation_from_words=True,refine_whisper_precision=0.6,min_word_duration=0.01)
104
+ #output = whisper.transcribe(self.model, audio, language="en", task='transcribe', temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), best_of=5, beam_size=5)""
105
+ transcription_text = output['text']
106
+ transcription_text = re.sub(r"\.(?!\S)", " ", transcription_text)
107
+ print("~~~~~~~~~~~~~~~~")
108
+ print(transcription_text)
109
+
110
+ #append text
111
+ self._PII_text_and_Timestamp += (transcription_text)+"\n"
112
+ # Run NER with spaCy
113
+ doc = self.nlp(transcription_text)
114
+ entities = [(ent.text, ent.label_) for ent in doc.ents]
115
+ uniqueentities = list(set(entities))
116
+ entity_counts = self.count_entities(entities)
117
+
118
+ for entity_type, count in entity_counts.items():
119
+ #append to metrics
120
+ self._Metrics += (entity_type+ " : "+ str(count))+"\n"
121
+
122
+ transcribeWithEntities = self.colorize_entities(transcription_text, uniqueentities)
123
+
124
+ #append to transcribeWithEntities
125
+ self._Transcribe_Text_With_Entities = transcribeWithEntities
126
+
127
+ print(f"Transcription: {transcription_text}")
128
+ print(f"Entities: {entities}")
129
+
130
+ # Beepify audio segments containing PII entities
131
+ audio_to_beep = AudioSegment.from_file(self._AudiofileName)
132
+
133
+ # Process the audio file to beepify words (remaining unchanged)
134
+ # Extract segments to be beeped
135
+ self.segments_to_beep = []
136
+
137
+ pii_Text_TimeStamp = []
138
+
139
+ for ent in doc.ents:
140
+ self.segments_to_beep.append((ent.start_char, ent.end_char))
141
+ pii_Text_TimeStamp.append((ent.text,ent.start_char*200,ent.end_char*200))
142
+ print("=======")
143
+ print("ent.text",ent.text)
144
+ print("ent.start",ent.start_char)
145
+ print("ent.end",ent.end_char)
146
+
147
+ print(pii_Text_TimeStamp)
148
+ for ent in pii_Text_TimeStamp:
149
+ self._PII_text_and_Timestamp += ("Timestamp: "+str(ent[1]/1000)+ " --- "+str(ent[2]/1000)+" sec")+"\n"
150
+ self._PII_text_and_Timestamp += ("Text: "+ent[0])+"\n"
151
+
152
+
153
+ # Convert character offsets to time (assuming 1 character = 20 ms)
154
+ segments_in_ms = [(start*200, end*200) for start, end in self.segments_to_beep]
155
+ print("Segments:", segments_in_ms)
156
+
157
+
158
+
159
+ words_to_beepify =[]
160
+
161
+ # append the all text in the doc the words_to_beepify array
162
+ for word in doc.ents:
163
+ # words_to_beepify.append(word.text)
164
+ words_to_beepify.append(word.text.replace('.', ''))
165
+
166
+ print(words_to_beepify)
167
+
168
+ # New list to store individual words
169
+ individual_words_to_beepify = []
170
+
171
+ # Split each phrase into individual words and append to the new list
172
+ for phrase in words_to_beepify:
173
+ individual_words_to_beepify.extend(phrase.split())
174
+
175
+ # Remove duplicates by converting the list to a set and then back to a list
176
+ #individual_words_to_beepify = list(set(individual_words_to_beepify))
177
+ individual_words_to_beepify = list(dict.fromkeys(individual_words_to_beepify))
178
+
179
+ print(individual_words_to_beepify)
180
+
181
+ # Load the beep sound
182
+ beep_sound = AudioSegment.from_file(self._BeepAudiofileName)
183
+
184
+
185
+
186
+ # Iterate over the words array in segment array of the output
187
+ for segment in output["segments"]:
188
+ for word in segment["words"]:
189
+
190
+ # Check if the word is in the list of words to beepify
191
+ if word["text"] in individual_words_to_beepify:
192
+ # Get the start and end time of the word
193
+ print("*******")
194
+ print(word)
195
+
196
+ start_time = word["start"]
197
+ end_time = word["end"]
198
+
199
+ # Get the start and end indices of the word
200
+ start_index = float(start_time * 1000)
201
+ end_index = float(end_time * 1000 + 100) # Add 100ms buffer
202
+
203
+ # Calculate the duration of the word segment
204
+ word_duration = (end_index - start_index)
205
+ print(word_duration)
206
+ # Create a silent segment with the same duration as the word
207
+ silent_segment = AudioSegment.silent(duration=word_duration)
208
+
209
+
210
+ # Replace the word segment with the silent segment in the original audio
211
+ audio_to_beep = audio_to_beep[:int(start_index)] + silent_segment + audio_to_beep[int(end_index):]
212
+
213
+ # Get the start and end indices of the beep sound to match the word's duration
214
+ beep_start_index = 0
215
+ beep_end_index = word_duration + 200 # Add 200ms
216
+ #beep_end_index = word_duration
217
+
218
+
219
+ # Trim the beep sound to match the word's duration
220
+ beep_sound = beep_sound[beep_start_index:beep_end_index]
221
+
222
+ """ if word_duration > len(beep_sound):
223
+ beep_sound = beep_sound + AudioSegment.silent(duration=word_duration - len(beep_sound))
224
+ else:
225
+ beep_sound = beep_sound[:word_duration] """
226
+
227
+ #Overlay the beep sound on the silent segment
228
+ audio_to_beep = audio_to_beep.overlay(beep_sound, position=int(start_index))
229
+
230
+ # Save the beeped audio file
231
+ random_filename = str(random.getrandbits(32)) + secure_filename(Path(self._AudiofileName).name)
232
+ output_path = os.path.join("pii_beep_audio_uploads", f"new_{random_filename}")
233
+ os.makedirs("pii_beep_audio_uploads", exist_ok=True)
234
+
235
+
236
+ audio_to_beep.export(output_path)
237
+ #audio_to_beep.export(output_path, format="wav")
238
+ self._BeepedAudiofileName =output_path
239
+
240
+ print(f"Beeped audio file saved at: {output_path}")
241
+ self.callback({
242
+ "PII_text_and_Timestamp": self._Transcribe_Text_With_Entities,
243
+ "Transcribe_Text_With_Entities": self._PII_text_and_Timestamp,
244
+ "Metrics": self._Metrics,
245
+ "Beeped_Audio_Path": self._BeepedAudiofileName
246
+ })
247
+ except Exception as e:
248
+ print(f"An error occurred during transcription: {str(e)}")
249
+
250
+
251
+
252
+
253
+ # Callback function for Gradio
254
+ def start_worker(audio_file_path, model_directory):
255
+ result = {
256
+ "PII_text_and_Timestamp": "Processing...",
257
+ "Transcribe_Text_With_Entities": "Processing...",
258
+ "Metrics": "Processing...",
259
+ #"Beeped_Audio_Path": "/home/prema/Documents/Audio/beep2.wav"
260
+ "Beeped_Audio_Path": "/content/drive/MyDrive/2024_Project/Pipeline/NER/beep2.wav"
261
+ }
262
+
263
+ def update_result(message):
264
+ if isinstance(message, dict):
265
+ result.update({
266
+ "PII_text_and_Timestamp": str(message.get("PII_text_and_Timestamp")),
267
+ "Transcribe_Text_With_Entities": message.get("Transcribe_Text_With_Entities"),
268
+ "Metrics": str(message.get('Metrics')),
269
+ "Beeped_Audio_Path": str(message.get('Beeped_Audio_Path'))
270
+
271
+ })
272
+ print("Processing complete.")
273
+
274
+ if not audio_file_path or os.stat(audio_file_path).st_size == 0:
275
+ return gr.update(visible=True), "Error: No input provided. Please upload a audio file"
276
+
277
+ if not model_directory or os.stat(model_directory).st_size == 0:
278
+ return gr.update(visible=True), "Error: No input provided. Please upload model(.zip)file"
279
+
280
+
281
+ # Start worker in a separate thread
282
+ worker = Worker(audio_file_path, model_directory, update_result)
283
+ worker.start()
284
+
285
+ # Wait for the worker to finish
286
+ worker.join()
287
+
288
+ #returning result to called function
289
+ return result["PII_text_and_Timestamp"], result["Transcribe_Text_With_Entities"], result["Metrics"], result["Beeped_Audio_Path"]
290
+
291
+ def reset():
292
+ return None, None, None, None, None
293
+
294
+ def get_audio_file_path(audio):
295
+ return audio
296
+
297
+
298
+
299
+ def load_model(files):
300
+ if files:
301
+ # Assume the uploaded file is a zip file representing the directory
302
+ zip_file_path = files.name
303
+
304
+ # Define a directory to extract the zip
305
+ extract_dir = "extracted_model"
306
+
307
+ # Clean the directory if it already exists
308
+ if os.path.exists(extract_dir):
309
+ shutil.rmtree(extract_dir)
310
+
311
+ os.makedirs(extract_dir, exist_ok=True)
312
+
313
+ # Extract the zip file contents
314
+ with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
315
+ zip_ref.extractall(extract_dir)
316
+
317
+ # Debug output: List the contents of the extracted directory
318
+ extracted_files = []
319
+ for root, dirs, files in os.walk(extract_dir):
320
+ for file in files:
321
+ extracted_files.append(os.path.join(root, file))
322
+
323
+ print("Extracted files:")
324
+ for file in extracted_files:
325
+ print(file)
326
+
327
+ # Determine the base directory inside the extracted directory
328
+ base_dir = None
329
+ for root, dirs, files in os.walk(extract_dir):
330
+ if files and 'meta.json' in files:
331
+ base_dir = root
332
+ break
333
+
334
+ # Check if meta.json was found and construct the path
335
+ if base_dir:
336
+ meta_path = os.path.join(base_dir, "meta.json")
337
+ if os.path.exists(meta_path):
338
+ return base_dir
339
+ else:
340
+ directory_message = "Invalid model directory: meta.json not found"
341
+ else:
342
+ directory_message = "Invalid model directory: meta.json not found"
343
+
344
+ else:
345
+ directory_message = "No directory selected"
346
+
347
+ return directory_message
348
+
349
+ # Function to load and return the audio file path
350
+ def load_audio(beep_audio_file_output):
351
+ if beep_audio_file_output is not None:
352
+ return beep_audio_file_output.name # Return the path to the uploaded file
353
+ return None
354
+
355
+ # Gradio UI
356
+ with gr.Blocks(css="""
357
+ .centered {
358
+ display: flex;
359
+ justify-content: center;
360
+ align-items: center; }
361
+
362
+ .custom-label {
363
+ font-size: 14px;
364
+ font-weight: bold;
365
+ text-align: left;
366
+ height: 100px;
367
+ border: 0px solid black;
368
+ }
369
+ """) as demo:
370
+
371
+ gr.Markdown("# Speech De-Identification Framework ver-2.0", elem_classes="centered")
372
+
373
+ with gr.Column():
374
+
375
+ with gr.Row():
376
+
377
+ audio_input = gr.Audio(label="Upload Audio File", type="filepath")
378
+ audio_output = gr.Textbox(label="Audio File Path", interactive=False, visible = False)
379
+ audio_input.change(fn=get_audio_file_path, inputs=audio_input, outputs=audio_output)
380
+
381
+
382
+
383
+ # Model directory input (as a zip file)
384
+ model_dir_input = gr.File(label="Select ML Model as zip file", file_count="single")
385
+ model_output_path = gr.Textbox(label="Model Load Status", interactive=False, visible = False)
386
+ model_dir_input.change(fn=load_model, inputs=model_dir_input, outputs=model_output_path)
387
+
388
+ with gr.Row():
389
+ gr.Markdown("")
390
+ gr.Markdown("")
391
+ gr.Markdown("")
392
+ gr.Markdown("")
393
+ gr.Markdown("")
394
+
395
+ reset_button = gr.Button("Reset")
396
+ submit_button = gr.Button("Submit")
397
+
398
+
399
+
400
+
401
+ gr.Markdown("### Transcribe Text and Entities:")
402
+ pii_text_output = RichTextbox(show_label=False , interactive=False)
403
+ gr.Markdown("### PII Text and Time Stamps:")
404
+ transcribe_text_output = gr.Textbox(show_label=False , interactive=False)
405
+ gr.Markdown("### Metrics:")
406
+ metrics_output = gr.Textbox(show_label=False , interactive=False)
407
+
408
+ with gr.Row():
409
+ # Audio component to display the audio file in the interface
410
+ beep_audio_file_output = gr.File(label="Download Beeped Audio", interactive=False)
411
+
412
+ # Audio player component to play the selected audio file
413
+ audio_player = gr.Audio(label="Play Beeped Audio", type="filepath")
414
+
415
+ # Automatically update the audio player when the file component changes
416
+ beep_audio_file_output.change(load_audio, inputs=beep_audio_file_output, outputs=audio_player)
417
+
418
+
419
+ # Event Handlers
420
+ reset_button.click(reset, [], [audio_input, model_dir_input, pii_text_output, transcribe_text_output, metrics_output])
421
+ submit_button.click(start_worker, [audio_output, model_output_path], [pii_text_output, transcribe_text_output, metrics_output,beep_audio_file_output])
422
+
423
+ demo.launch(inbrowser=True, show_error=True,share = True)