zhijun.li commited on
Commit
35da597
·
1 Parent(s): 5f6df4d

Cancel mandatory video agreement

Browse files
Files changed (1) hide show
  1. app.py +33 -27
app.py CHANGED
@@ -96,8 +96,8 @@ def aggregate_and_generate_webpage(client, summaries):
96
  return content
97
 
98
  def main_process(video_file, progress=gr.Progress()):
99
- # Clean progress bar logic: explicitly call progress()
100
- progress(0, desc="Starting...")
101
 
102
  api_key = os.environ.get("ERNIE_API_KEY")
103
  if not api_key: raise gr.Error("Server Config Error: API KEY missing.")
@@ -114,30 +114,34 @@ def main_process(video_file, progress=gr.Progress()):
114
 
115
  client = OpenAI(api_key=api_key, base_url=BASE_URL)
116
 
117
- progress(0.1, desc="Step 1/3: Extracting frames...")
 
 
118
  chunks = extract_frames(video_file)
119
  if not chunks: raise gr.Error("Frame extraction failed.")
120
 
121
- progress(0.3, desc="Step 2/3: ERNIE Analyzing content...")
 
 
122
  chunk_summaries = {}
123
 
124
- # Using ThreadPool without tqdm to avoid UI glitches
125
  with ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS) as executor:
126
  future_to_chunk = {executor.submit(process_chunk_with_retry, client, i, chunk): i for i, chunk in enumerate(chunks)}
127
 
128
- total_chunks = len(chunks)
129
  completed = 0
 
130
 
131
  for future in as_completed(future_to_chunk):
132
  idx, summary = future.result()
133
  if summary: chunk_summaries[idx] = summary
134
 
135
  completed += 1
136
- # Smooth progress update from 0.3 to 0.8
137
- current_progress = 0.3 + (0.5 * (completed / total_chunks))
138
- progress(current_progress, desc=f"Step 2/3: Analyzing segment {completed}/{total_chunks}")
139
-
140
- progress(0.85, desc="Step 3/3: Synthesizing final code...")
 
141
  html_code = aggregate_and_generate_webpage(client, chunk_summaries)
142
 
143
  output_path = "generated_website.html"
@@ -149,46 +153,47 @@ def main_process(video_file, progress=gr.Progress()):
149
  data_uri = f"data:text/html;charset=utf-8;base64,{b64_html}"
150
  iframe_html = f"""<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc; border-radius: 8px; background-color: white;"></iframe>"""
151
 
152
- progress(1.0, desc="Completed!")
153
- return iframe_html, output_path, html_code
 
154
 
155
  # --- UI ---
156
 
157
  with gr.Blocks(title="Ernie 4.5 Video2Code", theme=gr.themes.Soft()) as demo:
158
 
159
- # --- Header & Description (Goal 3) ---
160
  gr.Markdown("# ⚡ ERNIE 4.5-VL: Video to Code Agent")
161
 
162
- with gr.Accordion("📚 Technical Capabilities of ERNIE 4.5-VL", open=False):
 
163
  gr.Markdown("""
164
  This application is powered by **Baidu ERNIE 4.5 **, a state-of-the-art foundation model with specific enhancements for video understanding:
165
 
166
  * **👁️ Multimodal Heterogeneous MoE**: Uses dedicated vision experts to process images and video frames without interfering with text generation capabilities.
167
- * **⏳ 3D-RoPE Temporal Modeling**: Incorporates 3D Rotary Position Embeddings to independently encode temporal, width, and height information, allowing precise understanding of event sequences in videos.
168
- * **📐 Adaptive Resolution**: Dynamically adjusts to different video aspect ratios, ensuring fine-grained details (like small code font on screen) are captured accurately.
169
- * **🚀 Long Context Window**: Supports up to 128k context length, enabling the analysis of longer tutorials and complex logic flows.
170
  """)
171
 
172
- gr.Markdown("Upload a frontend coding tutorial video (or try the example below). The AI will watch it, understand the code, and render the result instantly.")
173
 
174
  with gr.Row():
175
  with gr.Column(scale=1):
176
- # --- Input Section ---
177
- video_input = gr.Video(label="Upload Video", format="mp4", height=320)
178
 
179
- # --- Goal 1: Examples Component ---
180
- # 用户点击这里的视频,会自动填充到上面的 video_input 中
181
  gr.Examples(
182
- examples=[["sample_demo.mp4"]], # ⚠️ 确保你上传了名为 sample_demo.mp4 的文件
183
  inputs=[video_input],
184
  label="▶️ Or try this example video:",
185
- cache_examples=False # 关闭缓存以节省空间
186
  )
187
 
188
  submit_btn = gr.Button("🚀 Generate & Render", variant="primary", size="lg")
 
 
 
189
 
190
  with gr.Column(scale=2):
191
- # --- Output Section ---
192
  with gr.Tabs():
193
  with gr.TabItem("🌐 Live Preview (Result)"):
194
  html_preview = gr.HTML(label="Rendered Page")
@@ -199,10 +204,11 @@ with gr.Blocks(title="Ernie 4.5 Video2Code", theme=gr.themes.Soft()) as demo:
199
  with gr.TabItem("⬇️ Download"):
200
  file_download = gr.File(label="Download .html File")
201
 
 
202
  submit_btn.click(
203
  fn=main_process,
204
  inputs=[video_input],
205
- outputs=[html_preview, file_download, code_output]
206
  )
207
 
208
  if __name__ == "__main__":
 
96
  return content
97
 
98
  def main_process(video_file, progress=gr.Progress()):
99
+ # 1. 初始化状态
100
+ yield "⏳ Initializing...", None, None, None
101
 
102
  api_key = os.environ.get("ERNIE_API_KEY")
103
  if not api_key: raise gr.Error("Server Config Error: API KEY missing.")
 
114
 
115
  client = OpenAI(api_key=api_key, base_url=BASE_URL)
116
 
117
+ # 2. 抽帧阶段
118
+ yield "🎞️ Step 1/3: Extracting video frames...", None, None, None
119
+ progress(0.1, desc="Extracting frames...")
120
  chunks = extract_frames(video_file)
121
  if not chunks: raise gr.Error("Frame extraction failed.")
122
 
123
+ # 3. 分析阶段
124
+ yield f"🧠 Step 2/3: ERNIE is analyzing {len(chunks)} segments...", None, None, None
125
+ progress(0.3, desc="Analyzing content...")
126
  chunk_summaries = {}
127
 
 
128
  with ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS) as executor:
129
  future_to_chunk = {executor.submit(process_chunk_with_retry, client, i, chunk): i for i, chunk in enumerate(chunks)}
130
 
 
131
  completed = 0
132
+ total = len(chunks)
133
 
134
  for future in as_completed(future_to_chunk):
135
  idx, summary = future.result()
136
  if summary: chunk_summaries[idx] = summary
137
 
138
  completed += 1
139
+ # 实时更新状态文字
140
+ yield f"🧠 Step 2/3: Analyzed segment {completed}/{total}...", None, None, None
141
+
142
+ # 4. 生成代码阶段
143
+ yield "✍️ Step 3/3: Synthesizing final HTML code...", None, None, None
144
+ progress(0.85, desc="Synthesizing code...")
145
  html_code = aggregate_and_generate_webpage(client, chunk_summaries)
146
 
147
  output_path = "generated_website.html"
 
153
  data_uri = f"data:text/html;charset=utf-8;base64,{b64_html}"
154
  iframe_html = f"""<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc; border-radius: 8px; background-color: white;"></iframe>"""
155
 
156
+ progress(1.0, desc="Done")
157
+ # 5. 完成,返回所有结果
158
+ yield "✅ Generation Complete!", iframe_html, output_path, html_code
159
 
160
  # --- UI ---
161
 
162
  with gr.Blocks(title="Ernie 4.5 Video2Code", theme=gr.themes.Soft()) as demo:
163
 
 
164
  gr.Markdown("# ⚡ ERNIE 4.5-VL: Video to Code Agent")
165
 
166
+ # 修复2:将 open 设置为 True,默认展开
167
+ with gr.Accordion("📚 Technical Capabilities of ERNIE 4.5-VL", open=True):
168
  gr.Markdown("""
169
  This application is powered by **Baidu ERNIE 4.5 **, a state-of-the-art foundation model with specific enhancements for video understanding:
170
 
171
  * **👁️ Multimodal Heterogeneous MoE**: Uses dedicated vision experts to process images and video frames without interfering with text generation capabilities.
172
+ * **⏳ 3D-RoPE Temporal Modeling**: Incorporates 3D Rotary Position Embeddings to independently encode temporal, width, and height information.
173
+ * **📐 Adaptive Resolution**: Dynamically adjusts to different video aspect ratios to capture fine-grained code details.
174
+ * **🚀 Long Context Window**: Supports up to 128k context length for analyzing long tutorials.
175
  """)
176
 
177
+ gr.Markdown("Upload a frontend coding tutorial video. The AI will watch it, understand the code, and render the result instantly.")
178
 
179
  with gr.Row():
180
  with gr.Column(scale=1):
181
+ # 修复1:去掉了 height 参数,让它自适应高度,不会再“扁扁的”
182
+ video_input = gr.Video(label="Upload Video", format="mp4")
183
 
 
 
184
  gr.Examples(
185
+ examples=[["sample_demo.mp4"]],
186
  inputs=[video_input],
187
  label="▶️ Or try this example video:",
188
+ cache_examples=False
189
  )
190
 
191
  submit_btn = gr.Button("🚀 Generate & Render", variant="primary", size="lg")
192
+
193
+ # 修复3:新增一个状态文本框,直接显示在这里,不用滚轮找进度条了
194
+ status_output = gr.Textbox(label="Agent Status", value="Ready to start...", interactive=False)
195
 
196
  with gr.Column(scale=2):
 
197
  with gr.Tabs():
198
  with gr.TabItem("🌐 Live Preview (Result)"):
199
  html_preview = gr.HTML(label="Rendered Page")
 
204
  with gr.TabItem("⬇️ Download"):
205
  file_download = gr.File(label="Download .html File")
206
 
207
+ # 绑定事件:outputs 增加了 status_output
208
  submit_btn.click(
209
  fn=main_process,
210
  inputs=[video_input],
211
+ outputs=[status_output, html_preview, file_download, code_output]
212
  )
213
 
214
  if __name__ == "__main__":