Update README.md
Browse files
README.md
CHANGED
|
@@ -29,6 +29,7 @@ You can use the raw model for temporal video grounding.
|
|
| 29 |
Here is how to use this model to get the logits of a given video and text in PyTorch:
|
| 30 |
```python
|
| 31 |
import av
|
|
|
|
| 32 |
import numpy as np
|
| 33 |
import torch
|
| 34 |
from huggingface_hub import hf_hub_download
|
|
@@ -118,7 +119,21 @@ data = processor(
|
|
| 118 |
|
| 119 |
output = model(**data)
|
| 120 |
|
| 121 |
-
print(output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
```
|
| 123 |
|
| 124 |
### Limitations and bias
|
|
|
|
| 29 |
Here is how to use this model to get the logits of a given video and text in PyTorch:
|
| 30 |
```python
|
| 31 |
import av
|
| 32 |
+
import cv2
|
| 33 |
import numpy as np
|
| 34 |
import torch
|
| 35 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 119 |
|
| 120 |
output = model(**data)
|
| 121 |
|
| 122 |
+
print(f"The model output is {output}")
|
| 123 |
+
|
| 124 |
+
def get_video_duration(filename):
|
| 125 |
+
cap = cv2.VideoCapture(filename)
|
| 126 |
+
if cap.isOpened():
|
| 127 |
+
rate = cap.get(5)
|
| 128 |
+
frame_num =cap.get(7)
|
| 129 |
+
duration = frame_num/rate
|
| 130 |
+
return duration
|
| 131 |
+
return -1
|
| 132 |
+
|
| 133 |
+
duration = get_video_duration(file)
|
| 134 |
+
timestamp = output['logits'].tolist()
|
| 135 |
+
start, end = round(timestamp[0][0]*duration, 1), round(timestamp[0][1]*duration, 1)
|
| 136 |
+
print(f"The time slot of the video corresponding to the text is from {start}s to {end}s")
|
| 137 |
```
|
| 138 |
|
| 139 |
### Limitations and bias
|