Ernie4.5-VL-Video2Coder

Sleeping

App Files Files Community

jzhang533 commited on Nov 26, 2025

Commit

83dc473

1 Parent(s): f162685

add a new example

Browse files

Signed-off-by: Zhang Jun <jzhang533@gmail.com>

Files changed (3) hide show

.gitattributes +1 -0
app.py +8 -6
skiing.mp4 +3 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 sample_demo.mp4 filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 sample_demo.mp4 filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -49,10 +49,10 @@ def extract_frames(video_path, interval_sec=1):
 def process_chunk_with_retry(client, chunk_index, frames_b64, max_retries=3):
     """Send chunk to LLM."""
     prompt = (
-        "This is a segment from a frontend web development video tutorial (screenshots taken every second). "
-        "Please focus intently on the code shown on the screen and the resulting web page style. "
-        "Describe in detail the HTML structure, CSS styling rules, or JavaScript logic presented in this segment. "
-        "Ignore unrelated video elements."
     )
     content = [{"type": "text", "text": prompt}]
     for f in frames_b64:
@@ -74,6 +74,8 @@ def aggregate_and_generate_webpage(client, summaries):
     full_summary = "\n".join([f"Segment {i+1} Summary: {s}" for i, s in sorted(summaries.items()) if s])
     final_prompt = f"""
     You are an expert Frontend Engineer. Based on the video segment summaries, write a complete HTML file.
     **Summaries:**
     {full_summary}
@@ -182,7 +184,7 @@ with gr.Blocks(title="Ernie 4.5 Video2Code", theme=gr.themes.Soft()) as demo:
         *   **🚀 Long Context Window**: Supports up to 128k context length for analyzing long tutorials.
         """)
-    gr.Markdown("Upload a frontend coding tutorial video. The AI will watch it, understand the code, and render the result instantly.")
     with gr.Row():
         with gr.Column(scale=1):
@@ -190,7 +192,7 @@ with gr.Blocks(title="Ernie 4.5 Video2Code", theme=gr.themes.Soft()) as demo:
             video_input = gr.Video(label="Upload Video", format="mp4")
             gr.Examples(
-                examples=[["sample_demo.mp4"]],
                 inputs=[video_input],
                 label="▶️ Or try this example video:",
                 cache_examples=False

 def process_chunk_with_retry(client, chunk_index, frames_b64, max_retries=3):
     """Send chunk to LLM."""
     prompt = (
+        "This is a segment from a video. "
+        "If code is visible, please extract the HTML, CSS, and JavaScript. "
+        "If it is a general video (e.g., sports, nature), describe the scene, colors, and mood to inspire a website design. "
+        "Provide a detailed summary suitable for a frontend engineer to build a website from."
     )
     content = [{"type": "text", "text": prompt}]
     for f in frames_b64:
     full_summary = "\n".join([f"Segment {i+1} Summary: {s}" for i, s in sorted(summaries.items()) if s])
     final_prompt = f"""
     You are an expert Frontend Engineer. Based on the video segment summaries, write a complete HTML file.
+    If the video was a coding tutorial, reconstruct the code.
+    If the video was a general scene, create a modern, responsive website inspired by the video's content (e.g., a skiing resort page for a skiing video).
     **Summaries:**
     {full_summary}
         *   **🚀 Long Context Window**: Supports up to 128k context length for analyzing long tutorials.
         """)
+    gr.Markdown("Upload a video (e.g., a coding tutorial or a scene like skiing). The AI will watch it and generate a website based on the content.")
     with gr.Row():
         with gr.Column(scale=1):
             video_input = gr.Video(label="Upload Video", format="mp4")
             gr.Examples(
+                examples=[["sample_demo.mp4"], ["skiing.mp4"]],
                 inputs=[video_input],
                 label="▶️ Or try this example video:",
                 cache_examples=False

skiing.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cca642ed04145e8a2df8cea332431be3a4c432f887686bbe52f66a25dd78a3d
+size 8513765