MahmoudElsamadony commited on
Commit
d4362f5
·
1 Parent(s): aa5f8c6
Files changed (2) hide show
  1. app.py +10 -4
  2. utils.py +9 -46
app.py CHANGED
@@ -1,4 +1,3 @@
1
- from transformers import AutoModelForSeq2SeqLM,AutoTokenizer
2
  import gradio as gr
3
  from faster_whisper import WhisperModel
4
  import whisper_timestamped as whisper
@@ -8,18 +7,25 @@ from utils import *
8
  model = WhisperModel("large-v2")
9
 
10
 
11
- tokenizer = AutoTokenizer.from_pretrained("FinetuneModel/VoidFilter")
12
- FinetuneModel = AutoModelForSeq2SeqLM.from_pretrained("FinetuneModel/VoidFilter")
13
 
14
  def VoidFilter(audio_file):
15
  audio = whisper.load_audio(audio_file)
 
16
  segments, _ = model.transcribe(audio_file, word_timestamps=True)
 
17
  word_timestamps = get_word_timestamps(segments)
 
18
  transcription = get_transcription(word_timestamps)
19
- filtered_text = filterText(transcription,FinetuneModel,tokenizer)
 
 
20
  modified_timestamps = get_modified_timestamps(word_timestamps,filtered_text)
 
21
  final_audio, sample_rate = cut_audio(audio, modified_timestamps)
 
22
  sf.write("filtered_audio.wav", final_audio, sample_rate)
 
23
  return "filtered_audio.wav"
24
 
25
 
 
 
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
  import whisper_timestamped as whisper
 
7
  model = WhisperModel("large-v2")
8
 
9
 
10
+
 
11
 
12
  def VoidFilter(audio_file):
13
  audio = whisper.load_audio(audio_file)
14
+
15
  segments, _ = model.transcribe(audio_file, word_timestamps=True)
16
+
17
  word_timestamps = get_word_timestamps(segments)
18
+
19
  transcription = get_transcription(word_timestamps)
20
+
21
+ filtered_text = filterText(transcription)
22
+
23
  modified_timestamps = get_modified_timestamps(word_timestamps,filtered_text)
24
+
25
  final_audio, sample_rate = cut_audio(audio, modified_timestamps)
26
+
27
  sf.write("filtered_audio.wav", final_audio, sample_rate)
28
+
29
  return "filtered_audio.wav"
30
 
31
 
utils.py CHANGED
@@ -1,4 +1,8 @@
1
  import numpy as np
 
 
 
 
2
 
3
 
4
  def cut_audio(audio, timestamps):
@@ -69,48 +73,10 @@ def get_modified_timestamps(word_timestamps,filtered_text):
69
  return mod_timestemp
70
 
71
 
72
- # def preprocess_function(examples):
73
- # inputs = [doc for doc in examples["original"]]
74
- # model_inputs = tokenizer(inputs, max_length=30, truncation=True)
75
-
76
- # # Setup the tokenizer for targets
77
- # labels = tokenizer(text_target=examples["filtered"], max_length=29, truncation=True)
78
-
79
- # model_inputs["labels"] = labels["input_ids"]
80
- # return model_inputs
81
-
82
-
83
- # def compute_metrics(eval_pred):
84
- # predictions, labels = eval_pred
85
- # decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
86
- # # Replace -100 in the labels as we can't decode them.
87
- # labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
88
- # decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
89
-
90
- # decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
91
- # decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]
92
-
93
- # result = {}
94
- # rouge = Rouge()
95
- # scores = rouge.get_scores(decoded_preds, decoded_labels, avg=True)
96
-
97
- # for k, v in scores.items():
98
- # if isinstance(v, dict):
99
- # result.update({f'rouge-{k}': {m: round(val * 100, 4) for m, val in v.items()}})
100
- # else:
101
- # result[f'rouge-{k}'] = round(v * 100, 4)
102
-
103
- # prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
104
- # result["gen_len"] = np.mean(prediction_lens)
105
- # result = {
106
- # key: value['f']
107
- # for key, value in result.items()
108
- # if isinstance(value, dict) and 'f' in value
109
- # }
110
- # return result
111
-
112
-
113
- def filterText(text, model,tokenizer):
114
  device = 'cpu'
115
  model = model.to(device)
116
 
@@ -133,10 +99,7 @@ def filterText(text, model,tokenizer):
133
  repetition_penalty=2.0,
134
  length_penalty=0.5,
135
  num_beams = 10,
136
- num_return_sequences=1, # Generate 10 sequences
137
- # top_k = 10,
138
- # do_sample = True,
139
- # top_p = 0.97
140
  )
141
  preds = [
142
  tokenizer.decode(gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True)
 
1
  import numpy as np
2
+ from transformers import AutoModelForSeq2SeqLM,AutoTokenizer
3
+
4
+ tokenizer = AutoTokenizer.from_pretrained("FinetuneModel/VoidFilter")
5
+ FinetuneModel = AutoModelForSeq2SeqLM.from_pretrained("FinetuneModel/VoidFilter")
6
 
7
 
8
  def cut_audio(audio, timestamps):
 
73
  return mod_timestemp
74
 
75
 
76
+
77
+
78
+
79
+ def filterText(text, model=FinetuneModel,tokenizer=tokenizer):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  device = 'cpu'
81
  model = model.to(device)
82
 
 
99
  repetition_penalty=2.0,
100
  length_penalty=0.5,
101
  num_beams = 10,
102
+ num_return_sequences=1,
 
 
 
103
  )
104
  preds = [
105
  tokenizer.decode(gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True)