import os os.environ["HF_HOME"] = "/data/.cache" os.environ["CUDA_VISIBLE_DEVICES"] = "" # force CPU from flask import Flask, request, send_file from huggingface_hub import snapshot_download, login from diffusers import DiffusionPipeline import torch from diffusers.utils import export_to_video import subprocess app = Flask(__name__) MODEL_ID = "damo-vilab/text-to-video-ms-1.7b" MODEL_DIR = "/data/models/t2v-1.7b" if os.getenv("HF_TOKEN"): login(token=os.getenv("HF_TOKEN")) if not os.path.exists(MODEL_DIR): print("Downloading to /data...") snapshot_download(MODEL_ID, local_dir=MODEL_DIR, local_dir_use_symlinks=False) # PURE CPU — no offload calls pipe = DiffusionPipeline.from_pretrained( MODEL_DIR, torch_dtype=torch.float32, local_files_only=True ) pipe = pipe.to("cpu") pipe.vae.enable_slicing() pipe.vae.enable_tiling() @app.route("/generate", methods=["POST"]) def generate(): prompt = request.json.get("prompt", "cartoon chaiwala at station") # 16 frames -> repeat to 48 for 6 sec result = pipe( prompt, num_inference_steps=20, guidance_scale=7.5, num_frames=16, height=256, width=256 ) frames = result.frames[0] frames = (frames * 3)[:48] tmp = "/tmp/low.mp4" export_to_video(frames, tmp, fps=8) out = "/tmp/out_720p.mp4" # upscale to 720x1280 9:16 subprocess.run([ "ffmpeg","-y","-i",tmp, "-vf","scale=720:1280:force_original_aspect_ratio=increase,crop=720:1280", "-c:v","libx264","-preset","ultrafast","-crf","28", out ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) return send_file(out, mimetype="video/mp4") if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)