clementBE commited on
Commit
42e6d95
Β·
verified Β·
1 Parent(s): e32fb4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -7
app.py CHANGED
@@ -20,7 +20,9 @@ MODEL_SIZES = {
20
 
21
  # Cache for loaded models
22
  model_cache = {}
 
23
 
 
24
  def get_model_pipeline(model_name, progress):
25
  if model_name not in model_cache:
26
  progress(0, desc="πŸš€ Initializing ZeroGPU instance...")
@@ -35,6 +37,16 @@ def get_model_pipeline(model_name, progress):
35
  progress(0.5, desc="βœ… Model loaded successfully!")
36
  return model_cache[model_name]
37
 
 
 
 
 
 
 
 
 
 
 
38
  # --- Export Functions ---
39
  def create_vtt(segments, file_path):
40
  with open(file_path, "w", encoding="utf-8") as f:
@@ -64,9 +76,9 @@ def create_docx(segments, file_path, with_timestamps):
64
 
65
  # --- Main Transcription Function ---
66
  @spaces.GPU
67
- def transcribe_and_export(file, model_size, vtt_output, docx_timestamp_output, docx_no_timestamp_output, progress=gr.Progress()):
68
  if file is None:
69
- return (None, None, None, "Please upload an audio or video file.")
70
 
71
  start_time = time.time()
72
  ext = os.path.splitext(file)[1].lower()
@@ -85,6 +97,7 @@ def transcribe_and_export(file, model_size, vtt_output, docx_timestamp_output, d
85
  pipe = get_model_pipeline(model_size, progress)
86
  progress(0.75, desc="🎀 Transcribing audio...")
87
 
 
88
  if model_size == "Distil-Large-v3-FR (French-Specific)":
89
  raw_output = pipe(audio_file_path, return_timestamps=True, generate_kwargs={"language": "fr"})
90
  else:
@@ -109,23 +122,33 @@ def transcribe_and_export(file, model_size, vtt_output, docx_timestamp_output, d
109
  create_docx(segments, docx_no_ts_path, with_timestamps=False)
110
  outputs["DOCX (without timestamps)"] = docx_no_ts_path
111
 
 
 
 
 
 
 
 
 
 
 
112
  end_time = time.time()
113
  total_time = end_time - start_time
114
- transcribed_text = raw_output['text']
115
  downloadable_files = [path for path in outputs.values()]
116
  status_message = f"βœ… Transcription complete! Total time: {total_time:.2f} seconds."
117
 
118
  return (
119
  transcribed_text,
120
  gr.Files(value=downloadable_files, label="Download Transcripts"),
121
- gr.Audio(value=None),
 
122
  status_message
123
  )
124
 
125
  # --- Gradio UI ---
126
  with gr.Blocks(title="Whisper ZeroGPU Transcription") as demo:
127
  gr.Markdown("# πŸŽ™οΈ Whisper ZeroGPU Transcription")
128
- gr.Markdown("Transcribe audio or video files with timestamps and choose your output format.")
129
 
130
  with gr.Row():
131
  audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio/Video File")
@@ -140,17 +163,19 @@ with gr.Blocks(title="Whisper ZeroGPU Transcription") as demo:
140
  vtt_checkbox = gr.Checkbox(label="VTT", value=True)
141
  docx_ts_checkbox = gr.Checkbox(label="DOCX (with timestamps)", value=False)
142
  docx_no_ts_checkbox = gr.Checkbox(label="DOCX (without timestamps)", value=True)
 
143
 
144
  transcribe_btn = gr.Button("Transcribe", variant="primary")
145
  status_text = gr.Textbox(label="Status", interactive=False)
146
 
147
  transcription_output = gr.Textbox(label="Full Transcription", lines=10)
148
  downloadable_files_output = gr.Files(label="Download Transcripts")
 
149
 
150
  transcribe_btn.click(
151
  fn=transcribe_and_export,
152
- inputs=[audio_input, model_selector, vtt_checkbox, docx_ts_checkbox, docx_no_ts_checkbox],
153
- outputs=[transcription_output, downloadable_files_output, audio_input, status_text]
154
  )
155
 
156
  if __name__ == "__main__":
 
20
 
21
  # Cache for loaded models
22
  model_cache = {}
23
+ summary_cache = {}
24
 
25
+ # --- Whisper pipeline loader ---
26
  def get_model_pipeline(model_name, progress):
27
  if model_name not in model_cache:
28
  progress(0, desc="πŸš€ Initializing ZeroGPU instance...")
 
37
  progress(0.5, desc="βœ… Model loaded successfully!")
38
  return model_cache[model_name]
39
 
40
+ # --- French summarization pipeline ---
41
+ def get_summary_pipeline():
42
+ if "summarizer" not in summary_cache:
43
+ # French-compatible summarization
44
+ summary_cache["summarizer"] = pipeline(
45
+ "summarization",
46
+ model="csebuetnlp/mT5_multilingual_XLSum"
47
+ )
48
+ return summary_cache["summarizer"]
49
+
50
  # --- Export Functions ---
51
  def create_vtt(segments, file_path):
52
  with open(file_path, "w", encoding="utf-8") as f:
 
76
 
77
  # --- Main Transcription Function ---
78
  @spaces.GPU
79
+ def transcribe_and_export(file, model_size, vtt_output, docx_timestamp_output, docx_no_timestamp_output, generate_summary, progress=gr.Progress()):
80
  if file is None:
81
+ return (None, None, None, None, "Please upload an audio or video file.")
82
 
83
  start_time = time.time()
84
  ext = os.path.splitext(file)[1].lower()
 
97
  pipe = get_model_pipeline(model_size, progress)
98
  progress(0.75, desc="🎀 Transcribing audio...")
99
 
100
+ # Set French language if using French-specific model
101
  if model_size == "Distil-Large-v3-FR (French-Specific)":
102
  raw_output = pipe(audio_file_path, return_timestamps=True, generate_kwargs={"language": "fr"})
103
  else:
 
122
  create_docx(segments, docx_no_ts_path, with_timestamps=False)
123
  outputs["DOCX (without timestamps)"] = docx_no_ts_path
124
 
125
+ transcribed_text = raw_output['text']
126
+
127
+ # Generate summary if requested
128
+ summary_text = None
129
+ if generate_summary:
130
+ progress(0.95, desc="πŸ“ Generating summary...")
131
+ summarizer = get_summary_pipeline()
132
+ summary_output = summarizer(transcribed_text, max_length=150, min_length=30, do_sample=False)
133
+ summary_text = summary_output[0]['summary_text']
134
+
135
  end_time = time.time()
136
  total_time = end_time - start_time
 
137
  downloadable_files = [path for path in outputs.values()]
138
  status_message = f"βœ… Transcription complete! Total time: {total_time:.2f} seconds."
139
 
140
  return (
141
  transcribed_text,
142
  gr.Files(value=downloadable_files, label="Download Transcripts"),
143
+ audio_file_path,
144
+ summary_text,
145
  status_message
146
  )
147
 
148
  # --- Gradio UI ---
149
  with gr.Blocks(title="Whisper ZeroGPU Transcription") as demo:
150
  gr.Markdown("# πŸŽ™οΈ Whisper ZeroGPU Transcription")
151
+ gr.Markdown("Transcribe audio or video files with timestamps, and optionally generate a French summary.")
152
 
153
  with gr.Row():
154
  audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio/Video File")
 
163
  vtt_checkbox = gr.Checkbox(label="VTT", value=True)
164
  docx_ts_checkbox = gr.Checkbox(label="DOCX (with timestamps)", value=False)
165
  docx_no_ts_checkbox = gr.Checkbox(label="DOCX (without timestamps)", value=True)
166
+ summary_checkbox = gr.Checkbox(label="Generate Summary", value=False)
167
 
168
  transcribe_btn = gr.Button("Transcribe", variant="primary")
169
  status_text = gr.Textbox(label="Status", interactive=False)
170
 
171
  transcription_output = gr.Textbox(label="Full Transcription", lines=10)
172
  downloadable_files_output = gr.Files(label="Download Transcripts")
173
+ summary_output = gr.Textbox(label="Summary", lines=5)
174
 
175
  transcribe_btn.click(
176
  fn=transcribe_and_export,
177
+ inputs=[audio_input, model_selector, vtt_checkbox, docx_ts_checkbox, docx_no_ts_checkbox, summary_checkbox],
178
+ outputs=[transcription_output, downloadable_files_output, audio_input, summary_output, status_text]
179
  )
180
 
181
  if __name__ == "__main__":