Spaces:

FunAudioLLM
/

PrismAudio

Running on Zero

PrismAudio / scripts /ThinkSound /demo.bat

prismaudio-project

init

ddb382a 3 days ago

2.7 kB

	@echo off
	setlocal enabledelayedexpansion

	:: Check number of arguments
	if "%~3"=="" (
	echo Usage: %~nx0 ^<video_path^> ^<title^> ^<description^> [use-half]
	exit /b 1
	)

	set "VIDEO_PATH=%~1"
	set "TITLE=%~2"
	set "DESCRIPTION=%~3"
	set "USE_HALF_FLAG=%~4"

	set "MODEL_CONFIG=ThinkSound\configs\model_configs\thinksound.json"

	:: Generate unique ID
	for /f %%i in ('powershell -Command "[guid]::NewGuid().ToString().Substring(0,8)"') do set "UNIQUE_ID=%%i"

	:: Create necessary directories
	if not exist videos mkdir videos
	if not exist cot_coarse mkdir cot_coarse
	if not exist results mkdir results

	:: Extract file info
	for %%f in ("%VIDEO_PATH%") do (
	set "VIDEO_FILE=%%~nxf"
	set "VIDEO_ID=%%~nf"
	set "VIDEO_EXT=%%~xf"
	)

	:: Normalize extension
	set "VIDEO_EXT=!VIDEO_EXT:.=!"
	set "TEMP_VIDEO_PATH=videos\demo.mp4"

	:: Convert to mp4 if needed
	echo VIDEO_EXT is: !VIDEO_EXT!

	if /i not "!VIDEO_EXT!"=="mp4" (
	echo Converting to mp4...
	ffmpeg -y -i "%VIDEO_PATH%" -c:v libx264 -preset fast -c:a aac "%TEMP_VIDEO_PATH%" >nul 2>&1
	if errorlevel 1 (
	echo Video conversion failed.
	exit /b 2
	)
	) else (
	echo Copying "%VIDEO_PATH%" to "%TEMP_VIDEO_PATH%"
	copy "%VIDEO_PATH%" "%TEMP_VIDEO_PATH%"
	)

	:: Get duration (in seconds)
	for /f %%i in ('ffprobe -v error -show_entries format^=duration -of default^=noprint_wrappers^=1:nokey^=1 "%TEMP_VIDEO_PATH%"') do set "DURATION=%%i"
	for /f "tokens=1 delims=." %%a in ("%DURATION%") do set "DURATION_SEC=%%a"
	echo Duration is: %DURATION_SEC%

	:: Create cot.csv
	set "CSV_PATH=cot_coarse\cot.csv"
	echo id,caption,caption_cot> "%CSV_PATH%"
	echo demo,"%TITLE%","%DESCRIPTION:"='%" >> "%CSV_PATH%"

	:: Run feature extraction
	echo Extracting features...
	set "CMD=python extract_latents.py --duration_sec %DURATION_SEC%"
	if "%USE_HALF_FLAG%"=="use-half" (
	set "CMD=%CMD% --use_half"
	)
	call %CMD%
	if errorlevel 1 (
	echo Feature extraction failed.
	del /f "%TEMP_VIDEO_PATH%"
	exit /b 3
	)

	:: Run inference
	echo Running inference...
	python predict.py --model-config "%MODEL_CONFIG%" --duration-sec %DURATION_SEC% --results-dir "results"
	if errorlevel 1 (
	echo Inference failed.
	del /f "%TEMP_VIDEO_PATH%"
	exit /b 4
	)

	:: Locate audio output
	for /f %%i in ('powershell -Command "(Get-Date).ToString('MMdd')"') do set "CURRENT_DATE=%%i"
	set "AUDIO_PATH=results\%CURRENT_DATE%_batch_size1\demo.wav"

	if not exist "%AUDIO_PATH%" (
	echo Audio file not found.
	del /f "%TEMP_VIDEO_PATH%"
	exit /b 5
	)

	del /f "%TEMP_VIDEO_PATH%"
	echo Audio successfully generated: %AUDIO_PATH%
	exit /b 0