Upload folder using huggingface_hub
Browse files- app.py +10 -4
- model.yml +66 -0
- models/__pycache__/audio.cpython-311.pyc +0 -0
- models/audio.py +4 -0
- routes/InferenceRoute.py +15 -0
- routes/__pycache__/AudioTokenizerRoute.cpython-311.pyc +0 -0
- routes/__pycache__/InferenceRoute.cpython-311.pyc +0 -0
- services/__pycache__/AudioTokenizerService.cpython-311.pyc +0 -0
- utils/__pycache__/custom_component.cpython-311.pyc +0 -0
- utils/__pycache__/utils.cpython-311.pyc +0 -0
- utils/utils.py +34 -0
app.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
import argparse, os,sys
|
| 2 |
parser = argparse.ArgumentParser(description="WhisperVQ Application")
|
| 3 |
-
parser.add_argument('--
|
| 4 |
default='whisper.log', help='The log file path')
|
| 5 |
-
parser.add_argument('--
|
| 6 |
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'TRACE'], help='The log level')
|
| 7 |
parser.add_argument('--port', type=int, default=3348,
|
| 8 |
help='The port to run the WhisperVQ app on')
|
| 9 |
-
parser.add_argument('--
|
| 10 |
help='The port to run the WhisperVQ app on')
|
| 11 |
-
parser.add_argument('--
|
| 12 |
help='The package-dir to be extended to sys.path')
|
| 13 |
args = parser.parse_args()
|
| 14 |
sys.path.insert(0, args.package_dir)
|
|
@@ -34,6 +34,7 @@ logger = logging.getLogger(__name__)
|
|
| 34 |
|
| 35 |
from services.AudioTokenizerService import get_audio_tokenizer_service
|
| 36 |
from routes.AudioTokenizerRoute import audio_tokenizer_router
|
|
|
|
| 37 |
|
| 38 |
@asynccontextmanager
|
| 39 |
async def lifespan(app: FastAPI):
|
|
@@ -47,6 +48,7 @@ app = FastAPI(lifespan=lifespan)
|
|
| 47 |
|
| 48 |
# include the routes
|
| 49 |
app.include_router(audio_tokenizer_router)
|
|
|
|
| 50 |
|
| 51 |
def self_terminate():
|
| 52 |
time.sleep(1)
|
|
@@ -59,6 +61,10 @@ async def destroy():
|
|
| 59 |
threading.Thread(target=self_terminate, daemon=True).start()
|
| 60 |
return {"success": True}
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
if __name__ == "__main__":
|
| 63 |
import uvicorn
|
| 64 |
from uvicorn.config import LOGGING_CONFIG
|
|
|
|
| 1 |
import argparse, os,sys
|
| 2 |
parser = argparse.ArgumentParser(description="WhisperVQ Application")
|
| 3 |
+
parser.add_argument('--log_path', type=str,
|
| 4 |
default='whisper.log', help='The log file path')
|
| 5 |
+
parser.add_argument('--log_level', type=str, default='INFO',
|
| 6 |
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'TRACE'], help='The log level')
|
| 7 |
parser.add_argument('--port', type=int, default=3348,
|
| 8 |
help='The port to run the WhisperVQ app on')
|
| 9 |
+
parser.add_argument('--device_id', type=str, default="0",
|
| 10 |
help='The port to run the WhisperVQ app on')
|
| 11 |
+
parser.add_argument('--package_dir', type=str, default="",
|
| 12 |
help='The package-dir to be extended to sys.path')
|
| 13 |
args = parser.parse_args()
|
| 14 |
sys.path.insert(0, args.package_dir)
|
|
|
|
| 34 |
|
| 35 |
from services.AudioTokenizerService import get_audio_tokenizer_service
|
| 36 |
from routes.AudioTokenizerRoute import audio_tokenizer_router
|
| 37 |
+
from routes.InferenceRoute import audio_inference_router
|
| 38 |
|
| 39 |
@asynccontextmanager
|
| 40 |
async def lifespan(app: FastAPI):
|
|
|
|
| 48 |
|
| 49 |
# include the routes
|
| 50 |
app.include_router(audio_tokenizer_router)
|
| 51 |
+
app.include_router(audio_inference_router)
|
| 52 |
|
| 53 |
def self_terminate():
|
| 54 |
time.sleep(1)
|
|
|
|
| 61 |
threading.Thread(target=self_terminate, daemon=True).start()
|
| 62 |
return {"success": True}
|
| 63 |
|
| 64 |
+
@app.get("/health")
|
| 65 |
+
async def health():
|
| 66 |
+
return {"status": "OK"}
|
| 67 |
+
|
| 68 |
if __name__ == "__main__":
|
| 69 |
import uvicorn
|
| 70 |
from uvicorn.config import LOGGING_CONFIG
|
model.yml
CHANGED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# BEGIN GENERAL GGUF METADATA
|
| 2 |
+
id: ichigo-whispervq # Model ID unique between models
|
| 3 |
+
model: ichigo-whispervq # Model ID which is used for request construct - should be unique between models (author / quantization)
|
| 4 |
+
name: Ichigo WhisperVQ
|
| 5 |
+
version: 1 # metadata.version
|
| 6 |
+
|
| 7 |
+
# END GENERAL METADATA
|
| 8 |
+
|
| 9 |
+
# BEGIN INFERENCE PARAMETERS
|
| 10 |
+
# BEGIN REQUIRED
|
| 11 |
+
|
| 12 |
+
load_model: # method to load python model through API
|
| 13 |
+
method: post
|
| 14 |
+
path: /loadmodel
|
| 15 |
+
transform_request: "" # jinja2 template to transform request
|
| 16 |
+
transform_response: "" # jinja2 template to transform response
|
| 17 |
+
|
| 18 |
+
destroy: # method to destroy python process through API
|
| 19 |
+
method: delete
|
| 20 |
+
path: /detroy
|
| 21 |
+
|
| 22 |
+
health_check: # method to destroy python process through API
|
| 23 |
+
method: get
|
| 24 |
+
path: /health
|
| 25 |
+
|
| 26 |
+
inference: # method to do inference python model through API
|
| 27 |
+
method: post
|
| 28 |
+
path: /inference
|
| 29 |
+
transform_request: ""
|
| 30 |
+
transform_response: ""
|
| 31 |
+
|
| 32 |
+
extra_endpoints: # untilities methods
|
| 33 |
+
- method: post
|
| 34 |
+
path: /tokenize/wav
|
| 35 |
+
transform_request: ""
|
| 36 |
+
transform_response: ""
|
| 37 |
+
- method: get
|
| 38 |
+
path: /supported_formats
|
| 39 |
+
|
| 40 |
+
# END REQUIRED
|
| 41 |
+
|
| 42 |
+
# BEGIN OPTIONAL
|
| 43 |
+
|
| 44 |
+
# END OPTIONAL
|
| 45 |
+
# END INFERENCE PARAMETERS
|
| 46 |
+
|
| 47 |
+
# BEGIN SERVER START PARAMETERS
|
| 48 |
+
# BEGIN REQUIRED
|
| 49 |
+
files: /home/thuan/cortexcpp/models/cortex.so/whispervq/fp16
|
| 50 |
+
port: 3348
|
| 51 |
+
log_path: whisper.log
|
| 52 |
+
log_level: INFO
|
| 53 |
+
environment: whispervq # python environment to run model
|
| 54 |
+
script: app.py
|
| 55 |
+
command: ["python"] # this is the base command, cortex will automatic find the correct location of python in env and add params when execute command
|
| 56 |
+
|
| 57 |
+
engine: python-engine
|
| 58 |
+
# END REQUIRED
|
| 59 |
+
|
| 60 |
+
# BEGIN OPTIONAL
|
| 61 |
+
extra_params:
|
| 62 |
+
device_id: "0"
|
| 63 |
+
package_dir: "" # the package directory to be searched
|
| 64 |
+
|
| 65 |
+
# END OPTIONAL
|
| 66 |
+
# END SERVER START PARAMETERS
|
models/__pycache__/audio.cpython-311.pyc
ADDED
|
Binary file (1.24 kB). View file
|
|
|
models/audio.py
CHANGED
|
@@ -20,3 +20,7 @@ FORMAT_BACKENDS = {
|
|
| 20 |
AudioFormat.OPUS: ["ffmpeg"],
|
| 21 |
AudioFormat.PCM: ["soundfile"]
|
| 22 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
AudioFormat.OPUS: ["ffmpeg"],
|
| 21 |
AudioFormat.PCM: ["soundfile"]
|
| 22 |
}
|
| 23 |
+
|
| 24 |
+
class AudioRequest(BaseModel):
|
| 25 |
+
data: str
|
| 26 |
+
format: AudioFormat = "wav"
|
routes/InferenceRoute.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from services.AudioTokenizerService import get_audio_tokenizer_service
|
| 2 |
+
from fastapi import APIRouter, Depends, HTTPException, status
|
| 3 |
+
from fastapi import File, UploadFile
|
| 4 |
+
from models.audio import AudioFormat, FORMAT_BACKENDS, AudioRequest
|
| 5 |
+
from utils.utils import decode_base64_to_audio
|
| 6 |
+
import base64
|
| 7 |
+
|
| 8 |
+
audio_inference_router = APIRouter(
|
| 9 |
+
prefix="/audio", tags=["audio"])
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@audio_inference_router.post("/inference")
|
| 13 |
+
async def tokenize_audio(request: AudioRequest):
|
| 14 |
+
file_obj = decode_base64_to_audio(request.data)
|
| 15 |
+
return get_audio_tokenizer_service().tokenize(file_obj, request.format)
|
routes/__pycache__/AudioTokenizerRoute.cpython-311.pyc
ADDED
|
Binary file (1.68 kB). View file
|
|
|
routes/__pycache__/InferenceRoute.cpython-311.pyc
ADDED
|
Binary file (1.29 kB). View file
|
|
|
services/__pycache__/AudioTokenizerService.cpython-311.pyc
ADDED
|
Binary file (8.88 kB). View file
|
|
|
utils/__pycache__/custom_component.cpython-311.pyc
ADDED
|
Binary file (12.8 kB). View file
|
|
|
utils/__pycache__/utils.cpython-311.pyc
ADDED
|
Binary file (1.8 kB). View file
|
|
|
utils/utils.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
|
| 3 |
+
def decode_base64_to_audio(
|
| 4 |
+
base64_string: str
|
| 5 |
+
) -> bytes:
|
| 6 |
+
"""
|
| 7 |
+
Decode a base64 string to audio bytes and optionally save to file.
|
| 8 |
+
|
| 9 |
+
Args:
|
| 10 |
+
base64_string (str): Base64 encoded string
|
| 11 |
+
output_path (Optional[Union[str, Path]]): Path to save the decoded audio file
|
| 12 |
+
|
| 13 |
+
Returns:
|
| 14 |
+
bytes: Decoded audio bytes
|
| 15 |
+
|
| 16 |
+
Raises:
|
| 17 |
+
ValueError: If the base64 string is invalid
|
| 18 |
+
IOError: If there's an error writing the file
|
| 19 |
+
"""
|
| 20 |
+
try:
|
| 21 |
+
audio_bytes = base64.b64decode(base64_string)
|
| 22 |
+
return audio_bytes
|
| 23 |
+
except base64.binascii.Error as e:
|
| 24 |
+
raise ValueError(f"Invalid base64 string: {e}")
|
| 25 |
+
except IOError as e:
|
| 26 |
+
raise IOError(f"Error writing audio file: {e}")
|
| 27 |
+
|
| 28 |
+
def encode_audio_to_base64(byte_data: bytes) -> str:
|
| 29 |
+
|
| 30 |
+
try:
|
| 31 |
+
base64_encoded = base64.b64encode(byte_data).decode('utf-8')
|
| 32 |
+
return base64_encoded
|
| 33 |
+
except IOError as e:
|
| 34 |
+
raise IOError(f"Error reading audio file: {e}")
|