Spaces:
Sleeping
Sleeping
update
Browse files
app.py
CHANGED
|
@@ -25,7 +25,9 @@ from huggingface_hub import (
|
|
| 25 |
from PIL import Image
|
| 26 |
|
| 27 |
cached_latest_posts_df = None
|
|
|
|
| 28 |
last_fetched = None
|
|
|
|
| 29 |
|
| 30 |
import os
|
| 31 |
import tempfile
|
|
@@ -37,7 +39,24 @@ from decord import VideoReader
|
|
| 37 |
from decord import cpu
|
| 38 |
|
| 39 |
|
| 40 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
frames = extract_frames_decord(video_url, num_frames)
|
| 42 |
|
| 43 |
# Create a temporary directory to store the images
|
|
@@ -50,7 +69,9 @@ def download_samples(video_url, num_frames):
|
|
| 50 |
) # Adjust quality as needed
|
| 51 |
|
| 52 |
# Create a zip file in a persistent location
|
| 53 |
-
|
|
|
|
|
|
|
| 54 |
with ZipFile(zip_path, "w") as zipf:
|
| 55 |
for i in range(num_frames):
|
| 56 |
frame_path = os.path.join(temp_dir, f"frame_{i}.jpg")
|
|
@@ -91,7 +112,43 @@ def extract_frames_decord(video_path, num_frames=10):
|
|
| 91 |
raise Exception(f"Error extracting frames from video: {e}")
|
| 92 |
|
| 93 |
|
| 94 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
global cached_latest_posts_df
|
| 96 |
global last_fetched
|
| 97 |
|
|
@@ -129,25 +186,26 @@ def get_latest_pots():
|
|
| 129 |
|
| 130 |
def row_selected(evt: gr.SelectData):
|
| 131 |
global cached_latest_posts_df
|
| 132 |
-
|
| 133 |
-
post_id = cached_latest_posts_df.iloc[row]["post_id"]
|
| 134 |
-
return post_id
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
|
| 144 |
-
if match:
|
| 145 |
-
# Extract the post ID from the URL
|
| 146 |
-
post_id = match.group(1) or match.group(2)
|
| 147 |
-
print(f"Valid GamePhysics post ID: {post_id}")
|
| 148 |
-
else:
|
| 149 |
-
post_id = url
|
| 150 |
|
|
|
|
|
|
|
| 151 |
video_url = f"https://huggingface.co/datasets/asgaardlab/GamePhysicsDailyDump/resolve/main/data/videos/{post_id}.mp4?download=true"
|
| 152 |
|
| 153 |
# make sure file exists before returning, make a request without downloading the file
|
|
@@ -175,12 +233,13 @@ with gr.Blocks() as demo:
|
|
| 175 |
with gr.Column():
|
| 176 |
gr.Markdown("## Latest Posts")
|
| 177 |
latest_post_dataframe = gr.Dataframe()
|
| 178 |
-
|
|
|
|
| 179 |
|
| 180 |
with gr.Column():
|
| 181 |
gr.Markdown("## Sampled Frames from Video")
|
| 182 |
with gr.Row():
|
| 183 |
-
num_frames = gr.Slider(minimum=1, maximum=
|
| 184 |
sample_decord_btn = gr.Button("Sample decord")
|
| 185 |
|
| 186 |
sampled_frames = gr.Gallery()
|
|
@@ -189,7 +248,9 @@ with gr.Blocks() as demo:
|
|
| 189 |
output_files = gr.File()
|
| 190 |
|
| 191 |
download_samples_btn.click(
|
| 192 |
-
download_samples,
|
|
|
|
|
|
|
| 193 |
)
|
| 194 |
|
| 195 |
sample_decord_btn.click(
|
|
@@ -199,8 +260,11 @@ with gr.Blocks() as demo:
|
|
| 199 |
)
|
| 200 |
|
| 201 |
load_btn.click(load_video, inputs=[reddit_id], outputs=[video_player])
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
latest_post_dataframe.select(fn=row_selected, outputs=[reddit_id]).then(
|
| 206 |
load_video, inputs=[reddit_id], outputs=[video_player]
|
|
|
|
| 25 |
from PIL import Image
|
| 26 |
|
| 27 |
cached_latest_posts_df = None
|
| 28 |
+
cached_top_posts = None
|
| 29 |
last_fetched = None
|
| 30 |
+
last_fetched_top = None
|
| 31 |
|
| 32 |
import os
|
| 33 |
import tempfile
|
|
|
|
| 39 |
from decord import cpu
|
| 40 |
|
| 41 |
|
| 42 |
+
def get_reddit_id(url):
|
| 43 |
+
# Regular expression pattern for r/GamePhysics URLs and IDs
|
| 44 |
+
pattern = r"https://www\.reddit\.com/r/GamePhysics/comments/([0-9a-zA-Z]+).*|([0-9a-zA-Z]+)"
|
| 45 |
+
|
| 46 |
+
# Match the URL or ID against the pattern
|
| 47 |
+
match = re.match(pattern, url)
|
| 48 |
+
|
| 49 |
+
if match:
|
| 50 |
+
# Extract the post ID from the URL
|
| 51 |
+
post_id = match.group(1) or match.group(2)
|
| 52 |
+
print(f"Valid GamePhysics post ID: {post_id}")
|
| 53 |
+
else:
|
| 54 |
+
post_id = url
|
| 55 |
+
|
| 56 |
+
return post_id
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def download_samples(url, video_url, num_frames):
|
| 60 |
frames = extract_frames_decord(video_url, num_frames)
|
| 61 |
|
| 62 |
# Create a temporary directory to store the images
|
|
|
|
| 69 |
) # Adjust quality as needed
|
| 70 |
|
| 71 |
# Create a zip file in a persistent location
|
| 72 |
+
post_id = get_reddit_id(url)
|
| 73 |
+
print(f"Creating zip file for post {post_id}")
|
| 74 |
+
zip_path = f"frames-{post_id}.zip"
|
| 75 |
with ZipFile(zip_path, "w") as zipf:
|
| 76 |
for i in range(num_frames):
|
| 77 |
frame_path = os.path.join(temp_dir, f"frame_{i}.jpg")
|
|
|
|
| 112 |
raise Exception(f"Error extracting frames from video: {e}")
|
| 113 |
|
| 114 |
|
| 115 |
+
def get_top_posts():
|
| 116 |
+
global cached_top_posts
|
| 117 |
+
global last_fetched_top
|
| 118 |
+
|
| 119 |
+
# make sure we don't fetch data too often, limit to 1 request per 10 minutes
|
| 120 |
+
now_time = datetime.now()
|
| 121 |
+
if last_fetched_top is not None and (now_time - last_fetched_top).seconds < 600:
|
| 122 |
+
print("Using cached data")
|
| 123 |
+
return cached_top_posts
|
| 124 |
+
|
| 125 |
+
last_fetched_top = now_time
|
| 126 |
+
url = "https://www.reddit.com/r/GamePhysics/top/.json?t=month"
|
| 127 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
| 128 |
+
|
| 129 |
+
response = requests.get(url, headers=headers)
|
| 130 |
+
if response.status_code != 200:
|
| 131 |
+
return []
|
| 132 |
+
|
| 133 |
+
data = response.json()
|
| 134 |
+
|
| 135 |
+
# Extract posts from the data
|
| 136 |
+
posts = data["data"]["children"]
|
| 137 |
+
|
| 138 |
+
for post in posts:
|
| 139 |
+
title = post["data"]["title"]
|
| 140 |
+
post_id = post["data"]["id"]
|
| 141 |
+
# print(f"ID: {post_id}, Title: {title}")
|
| 142 |
+
|
| 143 |
+
# create [post_id, title] list
|
| 144 |
+
examples = [[post["data"]["id"], post["data"]["title"]] for post in posts]
|
| 145 |
+
# make a dataframe
|
| 146 |
+
examples = pd.DataFrame(examples, columns=["post_id", "title"])
|
| 147 |
+
cached_top_posts = examples
|
| 148 |
+
return examples
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def get_latest_posts():
|
| 152 |
global cached_latest_posts_df
|
| 153 |
global last_fetched
|
| 154 |
|
|
|
|
| 186 |
|
| 187 |
def row_selected(evt: gr.SelectData):
|
| 188 |
global cached_latest_posts_df
|
| 189 |
+
global cached_top_posts
|
|
|
|
|
|
|
| 190 |
|
| 191 |
+
# find which dataframe was selected
|
| 192 |
+
string_value = evt.value
|
| 193 |
+
row = evt.index[0]
|
| 194 |
+
target_df = None
|
| 195 |
|
| 196 |
+
if cached_latest_posts_df.isin([string_value]).any().any():
|
| 197 |
+
target_df = cached_latest_posts_df
|
| 198 |
+
elif cached_top_posts.isin([string_value]).any().any():
|
| 199 |
+
target_df = cached_top_posts
|
| 200 |
+
else:
|
| 201 |
+
raise gr.Error("Could not find selected post in any dataframe")
|
| 202 |
|
| 203 |
+
post_id = target_df.iloc[row]["post_id"]
|
| 204 |
+
return post_id
|
| 205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
+
def load_video(url):
|
| 208 |
+
post_id = get_reddit_id(url)
|
| 209 |
video_url = f"https://huggingface.co/datasets/asgaardlab/GamePhysicsDailyDump/resolve/main/data/videos/{post_id}.mp4?download=true"
|
| 210 |
|
| 211 |
# make sure file exists before returning, make a request without downloading the file
|
|
|
|
| 233 |
with gr.Column():
|
| 234 |
gr.Markdown("## Latest Posts")
|
| 235 |
latest_post_dataframe = gr.Dataframe()
|
| 236 |
+
latest_posts_btn = gr.Button("Refresh Latest Posts")
|
| 237 |
+
top_posts_btn = gr.Button("Refresh Top Posts")
|
| 238 |
|
| 239 |
with gr.Column():
|
| 240 |
gr.Markdown("## Sampled Frames from Video")
|
| 241 |
with gr.Row():
|
| 242 |
+
num_frames = gr.Slider(minimum=1, maximum=60, step=1, value=10)
|
| 243 |
sample_decord_btn = gr.Button("Sample decord")
|
| 244 |
|
| 245 |
sampled_frames = gr.Gallery()
|
|
|
|
| 248 |
output_files = gr.File()
|
| 249 |
|
| 250 |
download_samples_btn.click(
|
| 251 |
+
download_samples,
|
| 252 |
+
inputs=[reddit_id, video_player, num_frames],
|
| 253 |
+
outputs=[output_files],
|
| 254 |
)
|
| 255 |
|
| 256 |
sample_decord_btn.click(
|
|
|
|
| 260 |
)
|
| 261 |
|
| 262 |
load_btn.click(load_video, inputs=[reddit_id], outputs=[video_player])
|
| 263 |
+
|
| 264 |
+
latest_posts_btn.click(get_latest_posts, outputs=[latest_post_dataframe])
|
| 265 |
+
top_posts_btn.click(get_top_posts, outputs=[latest_post_dataframe])
|
| 266 |
+
|
| 267 |
+
demo.load(get_latest_posts, outputs=[latest_post_dataframe])
|
| 268 |
|
| 269 |
latest_post_dataframe.select(fn=row_selected, outputs=[reddit_id]).then(
|
| 270 |
load_video, inputs=[reddit_id], outputs=[video_player]
|