Spaces:

SherryX
/

STAR

Running on Zero

App Files Files Community

Fabrice-TIERCELIN commited on May 22, 2025

Commit

602cbe0

verified ·

1 Parent(s): 0c79af5

Update video_super_resolution/scripts/inference_sr.py

Browse files

Files changed (1) hide show

video_super_resolution/scripts/inference_sr.py +56 -142

video_super_resolution/scripts/inference_sr.py CHANGED Viewed

@@ -1,142 +1,56 @@
-import os
-import torch
-from argparse import ArgumentParser, Namespace
-import json
-from typing import Any, Dict, List, Mapping, Tuple
-from easydict import EasyDict
-from video_to_video.video_to_video_model import VideoToVideo_sr
-from video_to_video.utils.seed import setup_seed
-from video_to_video.utils.logger import get_logger
-from video_super_resolution.color_fix import adain_color_fix
-from inference_utils import *
-logger = get_logger()
-class STAR_sr():
-    def __init__(self,
-                 result_dir='./results/',
-                 file_name='000_video.mp4',
-                 model_path='./pretrained_weight',
-                 solver_mode='fast',
-                 steps=15,
-                 guide_scale=7.5,
-                 upscale=4,
-                 max_chunk_len=32,
-                 variant_info=None,
-                 chunk_size=3,
-                 ):
-        self.model_path=model_path
-        logger.info('checkpoint_path: {}'.format(self.model_path))
-        self.result_dir = result_dir
-        self.file_name = file_name
-        os.makedirs(self.result_dir, exist_ok=True)
-        model_cfg = EasyDict(__name__='model_cfg')
-        model_cfg.model_path = self.model_path
-        model_cfg.chunk_size = chunk_size
-        self.model = VideoToVideo_sr(model_cfg)
-        steps = 15 if solver_mode == 'fast' else steps
-        self.solver_mode=solver_mode
-        self.steps=steps
-        self.guide_scale=guide_scale
-        self.upscale = upscale
-        self.max_chunk_len=max_chunk_len
-        self.variant_info=variant_info
-    def enhance_a_video(self, video_path, prompt):
-        logger.info('input video path: {}'.format(video_path))
-        text = prompt
-        logger.info('text: {}'.format(text))
-        caption = text + self.model.positive_prompt
-        input_frames, input_fps = load_video(video_path)
-        in_f_num = len(input_frames)
-        logger.info('input frames length: {}'.format(in_f_num))
-        logger.info('input fps: {}'.format(input_fps))
-        video_data = preprocess(input_frames)
-        _, _, h, w = video_data.shape
-        logger.info('input resolution: {}'.format((h, w)))
-        target_h, target_w = h * self.upscale, w * self.upscale   # adjust_resolution(h, w, up_scale=4)
-        logger.info('target resolution: {}'.format((target_h, target_w)))
-        pre_data = {'video_data': video_data, 'y': caption}
-        pre_data['target_res'] = (target_h, target_w)
-        total_noise_levels = 900
-        setup_seed(666)
-        with torch.no_grad():
-            data_tensor = collate_fn(pre_data, 'cuda:0')
-            output = self.model.test(data_tensor, total_noise_levels, steps=self.steps, \
-                                solver_mode=self.solver_mode, guide_scale=self.guide_scale, \
-                                max_chunk_len=self.max_chunk_len
-                                )
-        output = tensor2vid(output)
-        # Using color fix
-        output = adain_color_fix(output, video_data)
-        save_video(output, self.result_dir, self.file_name, fps=input_fps)
-        return os.path.join(self.result_dir, self.file_name)
-def parse_args():
-    parser = ArgumentParser()
-    parser.add_argument("--input_path", required=True, type=str, help="input video path")
-    parser.add_argument("--save_dir", type=str, default='results', help="save directory")
-    parser.add_argument("--file_name", type=str, help="file name")
-    parser.add_argument("--model_path", type=str, default='./pretrained_weight/model.pt', help="model path")
-    parser.add_argument("--prompt", type=str, default='a good video', help="prompt")
-    parser.add_argument("--upscale", type=int, default=4, help='up-scale')
-    parser.add_argument("--max_chunk_len", type=int, default=32, help='max_chunk_len')
-    parser.add_argument("--variant_info", type=str, default=None, help='information of inference strategy')
-    parser.add_argument("--cfg", type=float, default=7.5)
-    parser.add_argument("--solver_mode", type=str, default='fast', help='fast | normal')
-    parser.add_argument("--steps", type=int, default=15)
-    return parser.parse_args()
-def main():
-    args = parse_args()
-    input_path = args.input_path
-    prompt = args.prompt
-    model_path = args.model_path
-    save_dir = args.save_dir
-    file_name = args.file_name
-    upscale = args.upscale
-    max_chunk_len = args.max_chunk_len
-    steps = args.steps
-    solver_mode = args.solver_mode
-    guide_scale = args.cfg
-    assert solver_mode in ('fast', 'normal')
-    star_sr = STAR_sr(
-                            result_dir=save_dir,
-                            file_name=file_name,  # new added
-                            model_path=model_path,
-                            solver_mode=solver_mode,
-                            steps=steps,
-                            guide_scale=guide_scale,
-                            upscale=upscale,
-                            max_chunk_len=max_chunk_len,
-                            variant_info=None,
-                            )
-    star_sr.enhance_a_video(input_path, prompt)
-if __name__ == '__main__':
-    main()

+#!/bin/bash
+# Folder paths
+video_folder_path='./input/video'
+txt_file_path='./input/text/prompt.txt'
+# Get all .mp4 files in the folder using find to handle special characters
+mapfile -t mp4_files < <(find "$video_folder_path" -type f -name "*.mp4")
+# Print the list of MP4 files
+echo "MP4 files to be processed:"
+for mp4_file in "${mp4_files[@]}"; do
+    echo "$mp4_file"
+done
+# Read lines from the text file, skipping empty lines
+mapfile -t lines < <(grep -v '^\s*$' "$txt_file_path")
+# List of frame counts
+frame_length=32
+# Debugging output
+echo "Number of MP4 files: ${#mp4_files[@]}"
+echo "Number of lines in the text file: ${#lines[@]}"
+# Ensure the number of video files matches the number of lines
+if [ ${#mp4_files[@]} -ne ${#lines[@]} ]; then
+    echo "Number of MP4 files and lines in the text file do not match."
+    exit 1
+fi
+# Loop through video files and corresponding lines
+for i in "${!mp4_files[@]}"; do
+    mp4_file="${mp4_files[$i]}"
+    line="${lines[$i]}"
+    # Extract the filename without the extension
+    file_name=$(basename "$mp4_file" .mp4)
+    echo "Processing video file: $mp4_file with prompt: $line"
+    # Run Python script with parameters
+    python \
+        ./video_super_resolution/scripts/inference_sr.py \
+        --solver_mode 'fast' \
+        --steps 15 \
+        --input_path "${mp4_file}" \
+        --model_path /mnt/bn/videodataset/VSR/pretrained_models/STAR/heavy_deg.pt \
+        --prompt "${line}" \
+        --upscale 4 \
+        --max_chunk_len ${frame_length} \
+        --file_name "${file_name}.mp4" \
+        --save_dir ./results
+done
+echo "All videos processed successfully."