Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import json | |
| from datetime import datetime | |
| from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq | |
| import torch | |
| import soundfile as sf | |
| MODEL_NAME = "mistralai/Voxtral-Small-24B-2507" | |
| processor = AutoProcessor.from_pretrained(MODEL_NAME) | |
| model = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_NAME) | |
| def transcribe(audio_file, metadata_file): | |
| # Lade Audio | |
| audio_input, sample_rate = sf.read(audio_file) | |
| inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt") | |
| with torch.no_grad(): | |
| generated_ids = model.generate(**inputs) | |
| transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| # Lade Metadaten | |
| metadata = None | |
| if metadata_file is not None: | |
| metadata = json.load(metadata_file) | |
| # Ergebnis | |
| result = { | |
| "transcription": transcription, | |
| "model": MODEL_NAME, | |
| "timestamp": datetime.now().isoformat(), | |
| "metadata": metadata | |
| } | |
| return json.dumps(result, ensure_ascii=False, indent=2) | |
| demo = gr.Interface( | |
| fn=transcribe, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Audio/Video-Datei (mp3, wav, mp4, etc.)"), | |
| gr.File(label="Passende Metadata JSON-Datei") | |
| ], | |
| outputs=gr.Textbox(label="Transkriptions-JSON"), | |
| title="Voxtral Transkription mit Metadaten" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |