|
|
import modal |
|
|
app = modal.App("example-app-with-volumes") |
|
|
|
|
|
|
|
|
|
|
|
model_cache = modal.Volume.from_name("checkpoints-cache", create_if_missing=True) |
|
|
download_image = ( |
|
|
modal.Image.debian_slim(python_version="3.11") |
|
|
.pip_install("huggingface_hub[hf_transfer]==0.26.2") |
|
|
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) |
|
|
) |
|
|
|
|
|
@app.function(image=download_image, volumes={cache_dir: model_cache}, timeout=30 * MINUTES) |
|
|
def download_model(repo_id, allow_patterns, revision: Optional[str] = None): |
|
|
from huggingface_hub import snapshot_download |
|
|
|
|
|
print(f"π¦ downloading model from {repo_id} if not present") |
|
|
snapshot_download( |
|
|
repo_id=repo_id, |
|
|
revision=revision, |
|
|
local_dir=cache_dir, |
|
|
allow_patterns=allow_patterns, |
|
|
) |
|
|
model_cache.commit() |
|
|
print("π¦ model loaded") |
|
|
|
|
|
|
|
|
@app.local_entrypoint() |
|
|
def main(video: Optional[str] = None, audio: str = "DeepSeek-R1"): |
|
|
"""Run llama.cpp inference on Modal for phi-4 or deepseek r1.""" |
|
|
import shlex |
|
|
org_name = "unsloth" |
|
|
|
|
|
if model.lower() == "phi-4": |
|
|
model_name = "phi-4-GGUF" |
|
|
quant = "Q2_K" |
|
|
model_entrypoint_file = f"phi-4-{quant}.gguf" |
|
|
model_pattern = f"*{quant}*" |
|
|
revision = None |
|
|
parsed_args = DEFAULT_PHI_ARGS if args is None else shlex.split(args) |
|
|
elif model.lower() == "deepseek-r1": |
|
|
model_name = "DeepSeek-R1-GGUF" |
|
|
quant = "UD-IQ1_S" |
|
|
model_entrypoint_file = ( |
|
|
f"{model}-{quant}/DeepSeek-R1-{quant}-00001-of-00003.gguf" |
|
|
) |
|
|
model_pattern = f"*{quant}*" |
|
|
revision = "02656f62d2aa9da4d3f0cdb34c341d30dd87c3b6" |
|
|
parsed_args = DEFAULT_DEEPSEEK_R1_ARGS if args is None else shlex.split(args) |
|
|
else: |
|
|
raise ValueError(f"Unknown model {model}") |
|
|
|
|
|
repo_id = f"{org_name}/{model_name}" |
|
|
download_model.remote(repo_id, [model_pattern], revision) |
|
|
|
|
|
|
|
|
result = llama_cpp_inference.remote( |
|
|
model_entrypoint_file, |
|
|
prompt, |
|
|
n_predict, |
|
|
parsed_args, |
|
|
store_output=model.lower() == "deepseek-r1", |
|
|
) |
|
|
output_path = Path("/tmp") / f"llama-cpp-{model}.txt" |
|
|
output_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
print(f"π¦ writing response to {output_path}") |
|
|
output_path.write_text(result) |
|
|
|
|
|
|
|
|
|