John Ho commited on
Commit
4fa18d9
·
1 Parent(s): 8fe5da3

added function to read fps

Browse files
Files changed (2) hide show
  1. app.py +29 -5
  2. pyproject.toml +1 -0
app.py CHANGED
@@ -1,7 +1,14 @@
1
- import spaces
2
  import gradio as gr
3
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
4
  from qwen_vl_utils import process_vision_info
 
 
 
 
 
 
 
5
 
6
  # --- Installing Flash Attention for ZeroGPU is special --- #
7
  import subprocess
@@ -16,6 +23,20 @@ subprocess.run(
16
  # The model is trained on 8.0 FPS which we recommend for optimal inference
17
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  @spaces.GPU(duration=30)
20
  def load_model(
21
  model_name: str = "chancharikm/qwen2.5-vl-7b-cam-motion-preview",
@@ -40,10 +61,13 @@ def load_model(
40
 
41
 
42
  @spaces.GPU(duration=120)
43
- def inference(video_path: str):
 
 
44
  # default processor
45
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
46
-
 
47
  messages = [
48
  {
49
  "role": "user",
@@ -51,9 +75,9 @@ def inference(video_path: str):
51
  {
52
  "type": "video",
53
  "video": video_path,
54
- "fps": 8.0,
55
  },
56
- {"type": "text", "text": "Describe the camera motion in this video."},
57
  ],
58
  }
59
  ]
 
1
+ import spaces, ffmpeg, os
2
  import gradio as gr
3
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
4
  from qwen_vl_utils import process_vision_info
5
+ from loguru import logger
6
+
7
+ logger.remove()
8
+ logger.add(
9
+ sys.stderr,
10
+ format="<d>{time:YYYY-MM-DD ddd HH:mm:ss}</d> | <lvl>{level}</lvl> | <lvl>{message}</lvl>",
11
+ )
12
 
13
  # --- Installing Flash Attention for ZeroGPU is special --- #
14
  import subprocess
 
23
  # The model is trained on 8.0 FPS which we recommend for optimal inference
24
 
25
 
26
+ def get_fps_ffmpeg(video_path: str):
27
+ probe = ffmpeg.probe(video_path)
28
+ # Find the first video stream
29
+ video_stream = next(
30
+ (stream for stream in probe["streams"] if stream["codec_type"] == "video"), None
31
+ )
32
+ if video_stream is None:
33
+ raise ValueError("No video stream found")
34
+ # Frame rate is given as a string fraction, e.g., '30000/1001'
35
+ r_frame_rate = video_stream["r_frame_rate"]
36
+ num, denom = map(int, r_frame_rate.split("/"))
37
+ return num / denom
38
+
39
+
40
  @spaces.GPU(duration=30)
41
  def load_model(
42
  model_name: str = "chancharikm/qwen2.5-vl-7b-cam-motion-preview",
 
61
 
62
 
63
  @spaces.GPU(duration=120)
64
+ def inference(
65
+ video_path: str, prompt: str = "Describe the camera motion in this video."
66
+ ):
67
  # default processor
68
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
69
+ fps = get_fps_ffmpeg(video_path)
70
+ logger.info(f"{os.path.basename(video_path)} FPS: {fps}")
71
  messages = [
72
  {
73
  "role": "user",
 
75
  {
76
  "type": "video",
77
  "video": video_path,
78
+ "fps": fps,
79
  },
80
+ {"type": "text", "text": prompt},
81
  ],
82
  }
83
  ]
pyproject.toml CHANGED
@@ -11,4 +11,5 @@ dependencies = [
11
  "loguru>=0.7.3",
12
  "qwen-vl-utils>=0.0.11",
13
  "torchvision==0.19.0",
 
14
  ]
 
11
  "loguru>=0.7.3",
12
  "qwen-vl-utils>=0.0.11",
13
  "torchvision==0.19.0",
14
+ "ffmpeg-python>=0.2.0"
15
  ]