Peeble commited on
Commit
e0f2e87
·
verified ·
1 Parent(s): 3d32de8

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -89
app.py DELETED
@@ -1,89 +0,0 @@
1
- import os
2
- import torch
3
- import gradio as gr
4
- from pathlib import Path
5
- from transformers import AutoFeatureExtractor
6
- from optimum.onnxruntime import ORTModelForFeatureExtraction
7
-
8
- # 1. Environment & Backend Management
9
- # We force a check for libnppicc.so.13 to prevent crashes
10
- try:
11
- import torchcodec
12
- from torchcodec.decoders import VideoDecoder # Works for audio streams too
13
- HAS_CODEC = True
14
- except Exception as e:
15
- print(f"Warning: torchcodec load failed ({e}). Reverting to standard torchaudio.")
16
- os.environ["TORCHAUDIO_USE_TORCHCODEC"] = "0"
17
- HAS_CODEC = False
18
-
19
- import torchaudio
20
-
21
- # Configuration
22
- SAVE_DIR = "fastrvc_onnx_export"
23
- HF_HUBERT = "facebook/hubert-base-ls960" # Default for FastRVC 3.0
24
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
25
-
26
- def process_and_compile(audio_path):
27
- if not audio_path:
28
- return None, "Error: Audio file required."
29
-
30
- try:
31
- # A. Faster Loading with TorchCodec (if available)
32
- if HAS_CODEC:
33
- # torchcodec handles the GPU decoding and resampling efficiently
34
- decoder = torchcodec.decoders.create_from_file(audio_path, device=DEVICE)
35
- # Fetch all audio frames from the stream
36
- waveform = decoder.get_frames_by_index(range(decoder.metadata.num_frames))
37
- sample_rate = decoder.metadata.sample_rate
38
- else:
39
- waveform, sample_rate = torchaudio.load(audio_path)
40
- waveform = waveform.to(DEVICE)
41
-
42
- # B. Resampling (HuBERT requires 16000Hz)
43
- if sample_rate != 16000:
44
- resampler = torchaudio.transforms.Resample(sample_rate, 16000).to(DEVICE)
45
- waveform = resampler(waveform)
46
-
47
- # C. Hugging Face to ONNX Export (using Optimum)
48
- # This compiles the voice feature extraction layer
49
- print(f"Starting ONNX compilation for {HF_HUBERT}...")
50
-
51
- onnx_model = ORTModelForFeatureExtraction.from_pretrained(
52
- HF_HUBERT,
53
- export=True,
54
- torch_dtype=torch.float32
55
- )
56
-
57
- # Create output directory and save the compiled binary
58
- Path(SAVE_DIR).mkdir(parents=True, exist_ok=True)
59
- onnx_model.save_pretrained(SAVE_DIR)
60
-
61
- final_model_path = os.path.join(SAVE_DIR, "model.onnx")
62
-
63
- return final_model_path, "✅ Compilation Successful: RVC-compatible ONNX generated."
64
-
65
- except Exception as e:
66
- return None, f"❌ Status: {str(e)}"
67
-
68
- # 3. Gradio Interface Definition
69
- with gr.Blocks(title="FastRVC 3.0 ONNX Compiler") as demo:
70
- gr.Markdown("# 🎙️ FastRVC 3.0 Voice-to-ONNX Compiler")
71
- gr.Markdown("Uses **torchcodec** for high-speed decoding and **HF Optimum** for ONNX compilation.")
72
-
73
- with gr.Row():
74
- with gr.Column():
75
- input_audio = gr.Audio(label="Voice Sample (Calibration)", type="filepath")
76
- compile_btn = gr.Button("Compile Model", variant="primary")
77
-
78
- with gr.Column():
79
- status_log = gr.Textbox(label="Build Status")
80
- output_file = gr.File(label="Download .onnx Binary")
81
-
82
- compile_btn.click(
83
- fn=process_and_compile,
84
- inputs=[input_audio],
85
- outputs=[output_file, status_log]
86
- )
87
-
88
- if __name__ == "__main__":
89
- demo.launch(server_name="0.0.0.0", port=7860)