stivenDR14
feat: Introduce audio captioning and categorization model with ONNX/ExecuTorch hybrid inference and category embedding generation.
5c8d855
| [project] | |
| name = "whisper-audio-captioning-pte" | |
| version = "0.1.0" | |
| description = "Export Whisper audio captioning model to ExecuTorch PTE format" | |
| requires-python = ">=3.10" | |
| dependencies = [ | |
| "torch>=2.1.0", | |
| "transformers>=4.36.0", | |
| "datasets>=2.14.0", | |
| "torchaudio>=2.1.0", | |
| "soundfile>=0.12.1", | |
| "executorch>=0.3.0", | |
| "onnxruntime>=1.16.0", | |
| "librosa>=0.10.0", | |
| "optimum[exporters]", | |
| "onnx", | |
| "efficientnet_pytorch", | |
| "einops", | |
| "onnxscript", | |
| "python-dotenv", | |
| "onnxruntime-extensions>=0.14.0", | |
| ] | |
| [tool.uv] | |
| package = false | |