paulalbrecht commited on
Commit
a6b9939
·
verified ·
1 Parent(s): f2f6c73

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import json
4
+ from datetime import datetime
5
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
6
+ import torch
7
+ import soundfile as sf
8
+
9
+ MODEL_NAME = "mistralai/Voxtral-Small-24B-2507"
10
+
11
+ processor = AutoProcessor.from_pretrained(MODEL_NAME)
12
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_NAME)
13
+
14
+ def transcribe(audio_file, metadata_file):
15
+ # Lade Audio
16
+ audio_input, sample_rate = sf.read(audio_file)
17
+ inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt")
18
+ with torch.no_grad():
19
+ generated_ids = model.generate(**inputs)
20
+ transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
21
+
22
+ # Lade Metadaten
23
+ metadata = None
24
+ if metadata_file is not None:
25
+ metadata = json.load(metadata_file)
26
+
27
+ # Ergebnis
28
+ result = {
29
+ "transcription": transcription,
30
+ "model": MODEL_NAME,
31
+ "timestamp": datetime.now().isoformat(),
32
+ "metadata": metadata
33
+ }
34
+ return json.dumps(result, ensure_ascii=False, indent=2)
35
+
36
+ demo = gr.Interface(
37
+ fn=transcribe,
38
+ inputs=[
39
+ gr.Audio(type="filepath", label="Audio/Video-Datei (mp3, wav, mp4, etc.)"),
40
+ gr.File(label="Passende Metadata JSON-Datei")
41
+ ],
42
+ outputs=gr.Textbox(label="Transkriptions-JSON"),
43
+ title="Voxtral Transkription mit Metadaten"
44
+ )
45
+
46
+ if __name__ == "__main__":
47
+ demo.launch()