LogicGoInfotechSpaces
/

Audio2Image

Model card Files Files and versions

Audio2Image / handler.py

HariLogicgo's picture

added model and weights

40cfce6 3 months ago

history blame contribute delete

2.04 kB

	# handler.py
	import subprocess, os, tempfile, shutil, uuid, base64
	from typing import Dict

	class Handler:
	def __init__(self, model_dir: str):
	self.model_dir = model_dir
	self.code_dir = os.path.join(model_dir, "Wan2.2")
	self.ckpt_dir = os.path.join(model_dir, "Wan2.2-S2V-14B")

	def __call__(self, inputs: Dict):
	prompt = inputs.get("prompt", "a person is talking")
	image_b64 = inputs.get("image_b64")
	audio_b64 = inputs.get("audio_b64")

	tmpd = tempfile.mkdtemp()
	try:
	image_path = os.path.join(tmpd, "input.jpg")
	audio_path = os.path.join(tmpd, "input.wav")

	if image_b64:
	with open(image_path, "wb") as f:
	f.write(base64.b64decode(image_b64))
	if audio_b64:
	with open(audio_path, "wb") as f:
	f.write(base64.b64decode(audio_b64))

	out_path = os.path.join(tmpd, f"out_{uuid.uuid4().hex}.mp4")

	cmd = [
	"python", "generate.py",
	"--task", "s2v-14B",
	"--size", "1024*704",
	"--ckpt_dir", self.ckpt_dir,
	"--offload_model", "True",
	"--convert_model_dtype",
	"--prompt", prompt,
	"--image", image_path,
	"--audio", audio_path,
	"--num_clip", "1"
	]

	subprocess.check_call(cmd, cwd=self.code_dir)

	# Wan2.2 usually writes to outputs/, so adapt if needed
	if os.path.exists("outputs"):
	video_file = sorted(os.listdir("outputs"))[-1]
	with open(os.path.join("outputs", video_file), "rb") as f:
	return {"video_b64": base64.b64encode(f.read()).decode("utf-8")}
	else:
	with open(out_path, "rb") as f:
	return {"video_b64": base64.b64encode(f.read()).decode("utf-8")}

	finally:
	shutil.rmtree(tmpd, ignore_errors=True)