Video-R1 / src /r1-v /Evaluation /check_path_mp4.py
DingZhenDojoCat's picture
Add files using upload-large-folder tool
22e5669 verified
import json
import os
import subprocess
from tqdm import tqdm
def is_strict_mp4(file_path):
"""
Check the video file's format information using ffprobe.
If the 'format_name' contains "mp4", then the file meets the strict mp4 encoding requirements;
otherwise, return False along with ffprobe's output information.
"""
command = [
"ffprobe",
"-v", "error",
"-print_format", "json",
"-show_format",
file_path
]
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
return False, result.stderr
try:
info = json.loads(result.stdout)
format_name = info.get("format", {}).get("format_name", "")
tokens = [token.strip() for token in format_name.split(',')]
if "mp4" in tokens:
return True, result.stdout
else:
return False, result.stdout
except Exception as e:
return False, str(e)
def convert_to_mp4(input_file, output_file):
"""
Use ffmpeg to convert the video to MP4 encoding.
The output is saved as a temporary file, and if the conversion is successful,
the temporary file replaces the output_file.
A scale filter is added to ensure the output resolution dimensions are even,
preventing errors from libx264.
"""
temp_file = output_file + ".temp.mp4"
command = [
"ffmpeg",
"-y", # Overwrite output file if it exists
"-i", input_file, # Input file
"-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", # Ensure width and height are even numbers
"-c:v", "libx264", # Use libx264 for video encoding
"-c:a", "aac", # Use AAC for audio encoding
temp_file
]
print(f"Converting: {input_file} -> {output_file}")
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
print(f"Conversion failed: {input_file}\n{result.stderr}")
if os.path.exists(temp_file):
os.remove(temp_file)
return False
else:
os.replace(temp_file, output_file)
print(f"Conversion succeeded: {output_file}")
return True
def find_alternative(file_path):
"""
If the file specified by file_path does not exist, try to find a file with the same base name
but with a different extension in the same directory.
"""
dir_name = os.path.dirname(file_path)
base_name = os.path.splitext(os.path.basename(file_path))[0]
if not os.path.exists(dir_name):
return None
for candidate in os.listdir(dir_name):
candidate_base, candidate_ext = os.path.splitext(candidate)
if candidate_base == base_name and candidate_ext.lower() != ".mp4":
candidate_full = os.path.join(dir_name, candidate)
if os.path.isfile(candidate_full):
return candidate_full
return None
def process_videos_from_json(json_file):
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
checked_paths = set() # Record the file paths that have been checked
for item in tqdm(data, desc="Processing videos", unit="item"):
file_path = item.get("path", "").strip()
# Skip if the file has already been checked
if file_path in checked_paths:
continue
checked_paths.add(file_path)
if os.path.exists(file_path):
strict, info = is_strict_mp4(file_path)
if not strict:
print(f"\nVideo does not meet strict mp4 encoding requirements: {file_path}")
print("ffprobe output:")
print(info)
# Convert the existing file to mp4 encoding (overwrite)
convert_to_mp4(file_path, file_path)
else:
# Try to find an alternative file with the same base name but different extension
alternative_file = find_alternative(file_path)
if alternative_file:
print(f"\nFound alternative: {alternative_file}")
# Convert the alternative file to mp4 and save with the desired file_path
convert_to_mp4(alternative_file, file_path)
else:
print(f"File does not exist and no alternative found: {file_path}")
if __name__ == "__main__":
# Change this to the path of your JSON file
process_videos_from_json("eval_mvbench.json")