| import whisper | |
| def transcribe_video(video_path, model_size="large"): | |
| model = whisper.load_model(model_size) | |
| result = model.transcribe(video_path) | |
| return [{"start_sec": seg["start"], "end_sec": seg["end"], "text": seg["text"]} for seg in result["segments"]] | |