miguelamendez
/

openlipsync

Model card Files Files and versions

openlipsync / scripts /tests /modal_volume.py

miguelamendez's picture

Initial upload of directory

a73c654 verified 3 months ago

history blame contribute delete

2.49 kB

	import modal
	app = modal.App("example-app-with-volumes")


	#Download checkpoints
	model_cache = modal.Volume.from_name("checkpoints-cache", create_if_missing=True)
	download_image = (
	modal.Image.debian_slim(python_version="3.11")
	.pip_install("huggingface_hub[hf_transfer]==0.26.2")
	.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
	)

	@app.function(image=download_image, volumes={cache_dir: model_cache}, timeout=30 * MINUTES)
	def download_model(repo_id, allow_patterns, revision: Optional[str] = None):
	from huggingface_hub import snapshot_download

	print(f"🦙 downloading model from {repo_id} if not present")
	snapshot_download(
	repo_id=repo_id,
	revision=revision,
	local_dir=cache_dir,
	allow_patterns=allow_patterns,
	)
	model_cache.commit() # ensure other Modal Functions can see our writes before we quit
	print("🦙 model loaded")


	@app.local_entrypoint()
	def main(video: Optional[str] = None, audio: str = "DeepSeek-R1"):
	"""Run llama.cpp inference on Modal for phi-4 or deepseek r1."""
	import shlex
	org_name = "unsloth"
	# two sample models: the diminutive phi-4 and the chonky deepseek r1
	if model.lower() == "phi-4":
	model_name = "phi-4-GGUF"
	quant = "Q2_K"
	model_entrypoint_file = f"phi-4-{quant}.gguf"
	model_pattern = f"{quant}"
	revision = None
	parsed_args = DEFAULT_PHI_ARGS if args is None else shlex.split(args)
	elif model.lower() == "deepseek-r1":
	model_name = "DeepSeek-R1-GGUF"
	quant = "UD-IQ1_S"
	model_entrypoint_file = (
	f"{model}-{quant}/DeepSeek-R1-{quant}-00001-of-00003.gguf"
	)
	model_pattern = f"{quant}"
	revision = "02656f62d2aa9da4d3f0cdb34c341d30dd87c3b6"
	parsed_args = DEFAULT_DEEPSEEK_R1_ARGS if args is None else shlex.split(args)
	else:
	raise ValueError(f"Unknown model {model}")

	repo_id = f"{org_name}/{model_name}"
	download_model.remote(repo_id, [model_pattern], revision)

	# call out to a `.remote` Function on Modal for inference
	result = llama_cpp_inference.remote(
	model_entrypoint_file,
	prompt,
	n_predict,
	parsed_args,
	store_output=model.lower() == "deepseek-r1",
	)
	output_path = Path("/tmp") / f"llama-cpp-{model}.txt"
	output_path.parent.mkdir(parents=True, exist_ok=True)
	print(f"🦙 writing response to {output_path}")
	output_path.write_text(result)