Spaces:

asgaardlab
/

GamePhysicsDumpPreview

Sleeping

App Files Files Community

taesiri commited on Nov 28, 2023

Commit

0edcc33

1 Parent(s): 30bb4fd

update

Browse files

Files changed (1) hide show

app.py +86 -22

app.py CHANGED Viewed

@@ -25,7 +25,9 @@ from huggingface_hub import (
 from PIL import Image
 cached_latest_posts_df = None
 last_fetched = None
 import os
 import tempfile
@@ -37,7 +39,24 @@ from decord import VideoReader
 from decord import cpu
-def download_samples(video_url, num_frames):
     frames = extract_frames_decord(video_url, num_frames)
     # Create a temporary directory to store the images
@@ -50,7 +69,9 @@ def download_samples(video_url, num_frames):
             )  # Adjust quality as needed
         # Create a zip file in a persistent location
-        zip_path = "frames.zip"
         with ZipFile(zip_path, "w") as zipf:
             for i in range(num_frames):
                 frame_path = os.path.join(temp_dir, f"frame_{i}.jpg")
@@ -91,7 +112,43 @@ def extract_frames_decord(video_path, num_frames=10):
         raise Exception(f"Error extracting frames from video: {e}")
-def get_latest_pots():
     global cached_latest_posts_df
     global last_fetched
@@ -129,25 +186,26 @@ def get_latest_pots():
 def row_selected(evt: gr.SelectData):
     global cached_latest_posts_df
-    row = evt.index[0]
-    post_id = cached_latest_posts_df.iloc[row]["post_id"]
-    return post_id
-def load_video(url):
-    # Regular expression pattern for r/GamePhysics URLs and IDs
-    pattern = r"https://www\.reddit\.com/r/GamePhysics/comments/([0-9a-zA-Z]+).*|([0-9a-zA-Z]+)"
-    # Match the URL or ID against the pattern
-    match = re.match(pattern, url)
-    if match:
-        # Extract the post ID from the URL
-        post_id = match.group(1) or match.group(2)
-        print(f"Valid GamePhysics post ID: {post_id}")
-    else:
-        post_id = url
     video_url = f"https://huggingface.co/datasets/asgaardlab/GamePhysicsDailyDump/resolve/main/data/videos/{post_id}.mp4?download=true"
     # make sure file exists before returning, make a request without downloading the file
@@ -175,12 +233,13 @@ with gr.Blocks() as demo:
         with gr.Column():
             gr.Markdown("## Latest Posts")
             latest_post_dataframe = gr.Dataframe()
-            get_latest_pots_btn = gr.Button("Refresh Latest Posts")
     with gr.Column():
         gr.Markdown("## Sampled Frames from Video")
         with gr.Row():
-            num_frames = gr.Slider(minimum=1, maximum=20, step=1, value=10)
             sample_decord_btn = gr.Button("Sample decord")
         sampled_frames = gr.Gallery()
@@ -189,7 +248,9 @@ with gr.Blocks() as demo:
         output_files = gr.File()
         download_samples_btn.click(
-            download_samples, inputs=[video_player, num_frames], outputs=[output_files]
         )
     sample_decord_btn.click(
@@ -199,8 +260,11 @@ with gr.Blocks() as demo:
     )
     load_btn.click(load_video, inputs=[reddit_id], outputs=[video_player])
-    get_latest_pots_btn.click(get_latest_pots, outputs=[latest_post_dataframe])
-    demo.load(get_latest_pots, outputs=[latest_post_dataframe])
     latest_post_dataframe.select(fn=row_selected, outputs=[reddit_id]).then(
         load_video, inputs=[reddit_id], outputs=[video_player]

 from PIL import Image
 cached_latest_posts_df = None
+cached_top_posts = None
 last_fetched = None
+last_fetched_top = None
 import os
 import tempfile
 from decord import cpu
+def get_reddit_id(url):
+    # Regular expression pattern for r/GamePhysics URLs and IDs
+    pattern = r"https://www\.reddit\.com/r/GamePhysics/comments/([0-9a-zA-Z]+).*|([0-9a-zA-Z]+)"
+    # Match the URL or ID against the pattern
+    match = re.match(pattern, url)
+    if match:
+        # Extract the post ID from the URL
+        post_id = match.group(1) or match.group(2)
+        print(f"Valid GamePhysics post ID: {post_id}")
+    else:
+        post_id = url
+    return post_id
+def download_samples(url, video_url, num_frames):
     frames = extract_frames_decord(video_url, num_frames)
     # Create a temporary directory to store the images
             )  # Adjust quality as needed
         # Create a zip file in a persistent location
+        post_id = get_reddit_id(url)
+        print(f"Creating zip file for post {post_id}")
+        zip_path = f"frames-{post_id}.zip"
         with ZipFile(zip_path, "w") as zipf:
             for i in range(num_frames):
                 frame_path = os.path.join(temp_dir, f"frame_{i}.jpg")
         raise Exception(f"Error extracting frames from video: {e}")
+def get_top_posts():
+    global cached_top_posts
+    global last_fetched_top
+    # make sure we don't fetch data too often, limit to 1 request per 10 minutes
+    now_time = datetime.now()
+    if last_fetched_top is not None and (now_time - last_fetched_top).seconds < 600:
+        print("Using cached data")
+        return cached_top_posts
+    last_fetched_top = now_time
+    url = "https://www.reddit.com/r/GamePhysics/top/.json?t=month"
+    headers = {"User-Agent": "Mozilla/5.0"}
+    response = requests.get(url, headers=headers)
+    if response.status_code != 200:
+        return []
+    data = response.json()
+    # Extract posts from the data
+    posts = data["data"]["children"]
+    for post in posts:
+        title = post["data"]["title"]
+        post_id = post["data"]["id"]
+        # print(f"ID: {post_id}, Title: {title}")
+    # create [post_id, title] list
+    examples = [[post["data"]["id"], post["data"]["title"]] for post in posts]
+    # make a dataframe
+    examples = pd.DataFrame(examples, columns=["post_id", "title"])
+    cached_top_posts = examples
+    return examples
+def get_latest_posts():
     global cached_latest_posts_df
     global last_fetched
 def row_selected(evt: gr.SelectData):
     global cached_latest_posts_df
+    global cached_top_posts
+    # find which dataframe was selected
+    string_value = evt.value
+    row = evt.index[0]
+    target_df = None
+    if cached_latest_posts_df.isin([string_value]).any().any():
+        target_df = cached_latest_posts_df
+    elif cached_top_posts.isin([string_value]).any().any():
+        target_df = cached_top_posts
+    else:
+        raise gr.Error("Could not find selected post in any dataframe")
+    post_id = target_df.iloc[row]["post_id"]
+    return post_id
+def load_video(url):
+    post_id = get_reddit_id(url)
     video_url = f"https://huggingface.co/datasets/asgaardlab/GamePhysicsDailyDump/resolve/main/data/videos/{post_id}.mp4?download=true"
     # make sure file exists before returning, make a request without downloading the file
         with gr.Column():
             gr.Markdown("## Latest Posts")
             latest_post_dataframe = gr.Dataframe()
+            latest_posts_btn = gr.Button("Refresh Latest Posts")
+            top_posts_btn = gr.Button("Refresh Top Posts")
     with gr.Column():
         gr.Markdown("## Sampled Frames from Video")
         with gr.Row():
+            num_frames = gr.Slider(minimum=1, maximum=60, step=1, value=10)
             sample_decord_btn = gr.Button("Sample decord")
         sampled_frames = gr.Gallery()
         output_files = gr.File()
         download_samples_btn.click(
+            download_samples,
+            inputs=[reddit_id, video_player, num_frames],
+            outputs=[output_files],
         )
     sample_decord_btn.click(
     )
     load_btn.click(load_video, inputs=[reddit_id], outputs=[video_player])
+    latest_posts_btn.click(get_latest_posts, outputs=[latest_post_dataframe])
+    top_posts_btn.click(get_top_posts, outputs=[latest_post_dataframe])
+    demo.load(get_latest_posts, outputs=[latest_post_dataframe])
     latest_post_dataframe.select(fn=row_selected, outputs=[reddit_id]).then(
         load_video, inputs=[reddit_id], outputs=[video_player]