stivenDR14
feat: Introduce audio captioning and categorization model with ONNX/ExecuTorch hybrid inference and category embedding generation.
5c8d855
raw
history blame contribute delete
572 Bytes
[project]
name = "whisper-audio-captioning-pte"
version = "0.1.0"
description = "Export Whisper audio captioning model to ExecuTorch PTE format"
requires-python = ">=3.10"
dependencies = [
"torch>=2.1.0",
"transformers>=4.36.0",
"datasets>=2.14.0",
"torchaudio>=2.1.0",
"soundfile>=0.12.1",
"executorch>=0.3.0",
"onnxruntime>=1.16.0",
"librosa>=0.10.0",
"optimum[exporters]",
"onnx",
"efficientnet_pytorch",
"einops",
"onnxscript",
"python-dotenv",
"onnxruntime-extensions>=0.14.0",
]
[tool.uv]
package = false