EuuIia commited on
Commit
1ec204c
·
verified ·
1 Parent(s): c1c1220

Update app_seedvr.py

Browse files
Files changed (1) hide show
  1. app_seedvr.py +230 -195
app_seedvr.py CHANGED
@@ -1,213 +1,248 @@
1
- # app.py (Versão Corrigida)
2
 
3
- import gradio as gr
4
- from PIL import Image
5
  import os
6
- import imageio
7
- from api.ltx_server import video_generation_service
8
-
9
-
10
- from huggingface_hub import logging
11
-
12
-
13
- logging.set_verbosity_error()
14
- logging.set_verbosity_warning()
15
- logging.set_verbosity_info()
16
- logging.set_verbosity_debug()
17
-
18
- enable_progress_bars()
19
-
20
- # --- FUNÇÕES DE AJUDA PARA A UI ---
21
- # ... (calculate_new_dimensions e handle_media_upload_for_dims permanecem as mesmas) ...
22
- TARGET_FIXED_SIDE = 768
23
- MIN_DIM_SLIDER = 256
24
- MAX_IMAGE_SIZE = 1280
25
-
26
- def calculate_new_dimensions(orig_w, orig_h):
27
- if orig_w == 0 or orig_h == 0: return int(TARGET_FIXED_SIDE), int(TARGET_FIXED_SIDE)
28
- if orig_w >= orig_h:
29
- new_h, aspect_ratio = TARGET_FIXED_SIDE, orig_w / orig_h
30
- new_w = round((new_h * aspect_ratio) / 32) * 32
31
- new_w = max(MIN_DIM_SLIDER, min(new_w, MAX_IMAGE_SIZE))
32
- new_h = max(MIN_DIM_SLIDER, min(new_h, MAX_IMAGE_SIZE))
33
- else:
34
- new_w, aspect_ratio = TARGET_FIXED_SIDE, orig_h / orig_w
35
- new_h = round((new_w * aspect_ratio) / 32) * 32
36
- new_h = max(MIN_DIM_SLIDER, min(new_h, MAX_IMAGE_SIZE))
37
- new_w = max(MIN_DIM_SLIDER, min(new_w, MAX_IMAGE_SIZE))
38
- return int(new_h), int(new_w)
39
-
40
- def handle_media_upload_for_dims(filepath, current_h, current_w):
41
- if not filepath or not os.path.exists(str(filepath)): return gr.update(value=current_h), gr.update(value=current_w)
42
  try:
43
- if str(filepath).lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
44
- with Image.open(filepath) as img:
45
- orig_w, orig_h = img.size
46
- else: # Assumir que é um vídeo
47
- with imageio.get_reader(filepath) as reader:
48
- meta = reader.get_meta_data()
49
- orig_w, orig_h = meta.get('size', (current_w, current_h))
50
- new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
51
- return gr.update(value=new_h), gr.update(value=new_w)
 
 
 
 
 
52
  except Exception as e:
53
- print(f"Erro ao processar mídia para dimensões: {e}")
54
- return gr.update(value=current_h), gr.update(value=current_w)
55
-
56
- def update_frame_slider(duration):
57
- """Atualiza o valor máximo do slider de frame do meio com base na duração."""
58
- fps = 24.0
59
- max_frames = int(duration * fps)
60
- # Garante que o valor padrão não seja maior que o novo máximo
61
- new_value = 48 if max_frames >= 48 else max_frames // 2
62
- return gr.update(maximum=max_frames, value=new_value)
63
-
64
-
65
- # --- FUNÇÃO WRAPPER PARA CHAMAR O SERVIÇO ---
66
- def gradio_generate_wrapper(
67
- prompt, negative_prompt, mode,
68
- # Entradas de Keyframe
69
- start_image,
70
- middle_image, middle_frame, middle_weight,
71
- end_image, end_weight,
72
- # Outras entradas
73
- input_video, height, width, duration,
74
- frames_to_use, seed, randomize_seed,
75
- guidance_scale, improve_texture,
 
 
 
76
  progress=gr.Progress(track_tqdm=True)
77
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  try:
79
- def progress_handler(step, total_steps):
80
- progress(step / total_steps, desc="Salvando vídeo...")
81
-
82
- output_path, used_seed = video_generation_service.generate(
83
- prompt=prompt, negative_prompt=negative_prompt, mode=mode,
84
- start_image_filepath=start_image,
85
- middle_image_filepath=middle_image,
86
- middle_frame_number=middle_frame,
87
- middle_image_weight=middle_weight,
88
- end_image_filepath=end_image,
89
- end_image_weight=end_weight,
90
- input_video_filepath=input_video,
91
- height=int(height), width=int(width), duration=float(duration),
92
- frames_to_use=int(frames_to_use), seed=int(seed),
93
- randomize_seed=bool(randomize_seed), guidance_scale=float(guidance_scale),
94
- improve_texture=bool(improve_texture), progress_callback=progress_handler
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  )
96
- return output_path, used_seed
97
- except ValueError as e:
98
- raise gr.Error(str(e))
99
  except Exception as e:
100
- print(f"Erro inesperado na geração: {e}")
101
- raise gr.Error("Ocorreu um erro inesperado. Verifique os logs.")
 
 
 
 
 
 
 
 
 
 
102
 
103
- # --- DEFINIÇÃO DA INTERFACE GRADIO ---
104
- css = "#col-container { margin: 0 auto; max-width: 900px; }"
105
- with gr.Blocks(css=css) as demo:
106
- gr.Markdown("# LTX Video com Keyframes")
107
- gr.Markdown("Guie a geração de vídeo usando imagens de início, meio e fim.")
 
 
 
 
 
 
 
108
 
109
  with gr.Row():
110
- with gr.Column():
111
- with gr.Tab("image-to-video (Keyframes)") as image_tab:
112
- i2v_prompt = gr.Textbox(label="Prompt", value="Uma bela transição entre as imagens", lines=2)
113
-
114
- with gr.Row():
115
- with gr.Column(scale=1):
116
- gr.Markdown("#### Início (Obrigatório)")
117
- start_image_i2v = gr.Image(label="Imagem de Início", type="filepath", sources=["upload", "clipboard"])
118
- with gr.Column(scale=1):
119
- gr.Markdown("#### Meio (Opcional)")
120
- middle_image_i2v = gr.Image(label="Imagem do Meio", type="filepath", sources=["upload", "clipboard"])
121
- middle_frame_i2v = gr.Slider(label="Frame Alvo", minimum=0, maximum=200, step=1, value=48)
122
- middle_weight_i2v = gr.Slider(label="Peso/Força", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
123
- with gr.Column(scale=1):
124
- gr.Markdown("#### Fim (Opcional)")
125
- end_image_i2v = gr.Image(label="Imagem de Fim", type="filepath", sources=["upload", "clipboard"])
126
- end_weight_i2v = gr.Slider(label="Peso/Força", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
127
 
128
- i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
129
-
130
- with gr.Tab("text-to-video") as text_tab:
131
- t2v_prompt = gr.Textbox(label="Prompt", value="A majestic dragon flying over a medieval castle", lines=3)
132
- t2v_button = gr.Button("Generate Text-to-Video", variant="primary")
133
-
134
- with gr.Tab("video-to-video") as video_tab:
135
- video_v2v = gr.Video(label="Input Video", sources=["upload", "webcam"])
136
- frames_to_use = gr.Slider(label="Frames to use from input video", minimum=9, maximum=257, value=9, step=8, info="Must be N*8+1.")
137
- v2v_prompt = gr.Textbox(label="Prompt", value="Change the style to cinematic anime", lines=3)
138
- v2v_button = gr.Button("Generate Video-to-Video", variant="primary")
139
-
140
- duration_input = gr.Slider(label="Video Duration (seconds)", minimum=0.3, maximum=8.5, value=4, step=0.1)
141
- improve_texture = gr.Checkbox(label="Improve Texture (multi-scale)", value=True, visible=True)
142
-
143
- with gr.Column():
144
- output_video = gr.Video(label="Generated Video", interactive=False)
145
-
146
- with gr.Accordion("Advanced settings", open=False):
147
- mode = gr.Dropdown(["text-to-video", "image-to-video", "video-to-video"], label="task", value="image-to-video", visible=False)
148
- negative_prompt_input = gr.Textbox(label="Negative Prompt", value="worst quality, blurry, jittery", lines=2)
149
- with gr.Row():
150
- seed_input = gr.Number(label="Seed", value=42, precision=0)
151
- randomize_seed_input = gr.Checkbox(label="Randomize Seed", value=True)
152
- guidance_scale_input = gr.Slider(label="Guidance Scale (CFG)", minimum=1.0, maximum=10.0, value=3.0, step=0.1)
153
- with gr.Row():
154
- height_input = gr.Slider(label="Height", value=512, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE)
155
- width_input = gr.Slider(label="Width", value=704, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE)
156
-
157
- # --- LÓGICA DE EVENTOS DA UI ---
158
-
159
- start_image_i2v.upload(fn=handle_media_upload_for_dims, inputs=[start_image_i2v, height_input, width_input], outputs=[height_input, width_input])
160
- video_v2v.upload(fn=handle_media_upload_for_dims, inputs=[video_v2v, height_input, width_input], outputs=[height_input, width_input])
161
- duration_input.change(fn=update_frame_slider, inputs=duration_input, outputs=middle_frame_i2v)
162
 
163
- image_tab.select(fn=lambda: "image-to-video", outputs=[mode])
164
- text_tab.select(fn=lambda: "text-to-video", outputs=[mode])
165
- video_tab.select(fn=lambda: "video-to-video", outputs=[mode])
166
 
167
- # --- <INÍCIO DA CORREÇÃO> ---
168
- # Reescrevendo as listas de inputs de forma explícita para evitar erros.
 
 
 
 
169
 
170
- # Placeholders para os botões que não usam certos inputs
171
- none_image = gr.Textbox(visible=False, value=None)
172
- none_video = gr.Textbox(visible=False, value=None)
173
-
174
- # Parâmetros comuns a todos
175
- shared_params = [
176
- height_input, width_input, duration_input, frames_to_use,
177
- seed_input, randomize_seed_input, guidance_scale_input, improve_texture
178
- ]
179
-
180
- i2v_inputs = [
181
- i2v_prompt, negative_prompt_input, mode,
182
- start_image_i2v, middle_image_i2v, middle_frame_i2v, middle_weight_i2v,
183
- end_image_i2v, end_weight_i2v,
184
- none_video, # Placeholder para input_video
185
- *shared_params
186
- ]
187
-
188
- t2v_inputs = [
189
- t2v_prompt, negative_prompt_input, mode,
190
- none_image, none_image, gr.Number(value=-1, visible=False), gr.Slider(value=0, visible=False), # Placeholders para keyframes
191
- none_image, gr.Slider(value=0, visible=False),
192
- none_video, # Placeholder para input_video
193
- *shared_params
194
- ]
195
-
196
- v2v_inputs = [
197
- v2v_prompt, negative_prompt_input, mode,
198
- none_image, none_image, gr.Number(value=-1, visible=False), gr.Slider(value=0, visible=False), # Placeholders para keyframes
199
- none_image, gr.Slider(value=0, visible=False),
200
- video_v2v, # Input de vídeo real
201
- *shared_params
202
- ]
203
-
204
- common_outputs = [output_video, seed_input]
205
-
206
- i2v_button.click(fn=gradio_generate_wrapper, inputs=i2v_inputs, outputs=common_outputs, api_name="image_to_video_keyframes")
207
- t2v_button.click(fn=gradio_generate_wrapper, inputs=t2v_inputs, outputs=common_outputs, api_name="text_to_video")
208
- v2v_button.click(fn=gradio_generate_wrapper, inputs=v2v_inputs, outputs=common_outputs, api_name="video_to_video")
209
- # --- <FIM DA CORREÇÃO> ---
210
-
211
 
212
  if __name__ == "__main__":
213
- demo.queue().launch(debug=True, share=False)
 
 
 
 
 
1
+ # app_seedvr.py
2
 
 
 
3
  import os
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import Optional
7
+ import gradio as gr
8
+ import cv2
9
+
10
+ # --- INTEGRAÇÃO COM A LÓGICA DO SERVIDOR ---
11
+ try:
12
+ # Importa a classe SeedVRServer que agora atua como nossa biblioteca de inferência.
13
+ from api.seedvr_server import SeedVRServer
14
+ except ImportError as e:
15
+ print(f"ERRO FATAL: Não foi possível importar o SeedVRServer. Detalhes: {e}")
16
+ # A aplicação não pode rodar sem a lógica do servidor.
17
+ raise
18
+
19
+ # --- INICIALIZAÇÃO ---
20
+ # Cria uma instância única e persistente do servidor.
21
+ # A inicialização (clonar repo, baixar modelos) acontece apenas uma vez, no início.
22
+ server = SeedVRServer()
23
+
24
+ # --- FUNÇÕES AUXILIARES ---
25
+
26
+ def _is_video(path: str) -> bool:
27
+ """Verifica se um caminho de arquivo corresponde a um tipo de vídeo."""
28
+ if not path: return False
29
+ import mimetypes
30
+ mime, _ = mimetypes.guess_type(path)
31
+ return (mime or "").startswith("video")
32
+
33
+ def _extract_first_frame(video_path: str) -> Optional[str]:
34
+ """Extrai o primeiro frame de um vídeo e o salva como uma imagem JPG."""
35
+ if not video_path or not os.path.exists(video_path): return None
 
 
 
 
36
  try:
37
+ vid_cap = cv2.VideoCapture(video_path)
38
+ if not vid_cap.isOpened():
39
+ print(f"Erro: Não foi possível abrir o vídeo em {video_path}")
40
+ return None
41
+ success, image = vid_cap.read()
42
+ vid_cap.release()
43
+ if not success:
44
+ print(f"Erro: Não foi possível ler o primeiro frame de {video_path}")
45
+ return None
46
+
47
+ # Salva o frame no mesmo diretório do vídeo, com extensão .jpg
48
+ image_path = Path(video_path).with_suffix(".jpg")
49
+ cv2.imwrite(str(image_path), image)
50
+ return str(image_path)
51
  except Exception as e:
52
+ print(f"Erro ao extrair o primeiro frame: {e}")
53
+ return None
54
+
55
+ def on_file_upload(file_obj):
56
+ """
57
+ Callback acionado quando o usuário faz o upload de um arquivo.
58
+ Verifica se o arquivo é um vídeo e sugere um `sp_size` apropriado.
59
+ """
60
+ if file_obj is None:
61
+ # Limpa os resultados e o log se o arquivo for removido
62
+ return gr.update(value=1), None, None, None, gr.update(value=None, visible=False)
63
+
64
+ if _is_video(file_obj.name):
65
+ # Para vídeos, sugere um valor padrão para multi-GPU e torna o slider interativo
66
+ return gr.update(value=8, interactive=True), None, None, None, gr.update(value=None, visible=False)
67
+ else:
68
+ # Para imagens, trava o valor em 1
69
+ return gr.update(value=1, interactive=False), None, None, None, gr.update(value=None, visible=False)
70
+
71
+ # --- FUNÇÃO PRINCIPAL DE INFERÊNCIA DA UI ---
72
+
73
+ def run_inference_ui(
74
+ input_file_path: Optional[str],
75
+ resolution: str,
76
+ sp_size: int,
77
+ fps: float,
78
  progress=gr.Progress(track_tqdm=True)
79
  ):
80
+ """
81
+ A função de callback principal do Gradio. Usa geradores (`yield`)
82
+ para permitir atualizações da UI em tempo real durante a tarefa de longa duração.
83
+ """
84
+ # 1. Estado Inicial e Validação
85
+ # No início, desabilita o botão, limpa resultados anteriores e mostra a janela de log.
86
+ yield (
87
+ gr.update(interactive=False, value="Processing... 🚀"),
88
+ gr.update(value=None, visible=False),
89
+ gr.update(value=None, visible=False),
90
+ gr.update(value=None, visible=False),
91
+ gr.update(value="▶ Starting inference process...\n", visible=True)
92
+ )
93
+
94
+ if not input_file_path:
95
+ gr.Warning("Please upload a media file first.")
96
+ # Reabilita o botão e esconde os componentes de saída
97
+ yield (gr.update(interactive=True, value="Restore Media"), None, None, None, gr.update(visible=False))
98
+ return
99
+
100
+ log_buffer = ["▶ Starting inference process...\n"]
101
+ last_log_message = ""
102
+ was_input_video = _is_video(input_file_path)
103
+
104
  try:
105
+ # Define um callback que será chamado pelo backend para atualizar o progresso e o log
106
+ def progress_callback_wrapper(step: float, desc: str):
107
+ """ Wrapper para formatar logs e atualizar o progresso. """
108
+ nonlocal last_log_message
109
+ # Só adiciona ao log se a mensagem for nova, para evitar poluição visual
110
+ if desc != last_log_message:
111
+ log_buffer.append(f"{desc}\n")
112
+ last_log_message = desc
113
+ # Atualiza o objeto de progresso do Gradio
114
+ progress(step, desc=desc)
115
+
116
+ # 2. Executa a Inferência
117
+ # Chama o método direto do servidor, passando o nosso callback.
118
+ video_result_path = server.run_inference_direct(
119
+ file_path=input_file_path,
120
+ seed=42, # Semente fixa conforme solicitado
121
+ res_h=int(resolution),
122
+ res_w=int(resolution), # Largura igual à altura
123
+ sp_size=int(sp_size),
124
+ fps=float(fps) if fps and fps > 0 else None,
125
+ progress=progress_callback_wrapper, # Passa nossa função de callback
126
+ )
127
+
128
+ progress(1.0, desc="Complete!")
129
+ log_buffer.append("✅ Inference complete! Processing final output...\n")
130
+
131
+ # 3. Processa e Exibe os Resultados
132
+ final_image, final_video = None, None
133
+ if was_input_video:
134
+ final_video = video_result_path
135
+ log_buffer.append("✅ Video result is ready.\n")
136
+ else: # Se a entrada foi uma imagem
137
+ final_image = _extract_first_frame(video_result_path)
138
+ final_video = video_result_path # Também disponibiliza o vídeo de 1 frame
139
+ log_buffer.append("✅ Image result extracted from video.\n")
140
+
141
+ # Yield final para mostrar os resultados e reabilitar o botão
142
+ yield (
143
+ gr.update(interactive=True, value="Restore Media"),
144
+ gr.update(value=final_image, visible=final_image is not None),
145
+ gr.update(value=final_video, visible=final_video is not None),
146
+ gr.update(value=video_result_path, visible=video_result_path is not None),
147
+ ''.join(log_buffer)
148
  )
149
+
 
 
150
  except Exception as e:
151
+ error_message = f" Inference failed: {e}"
152
+ gr.Error(error_message)
153
+ log_buffer.append(f"\n{error_message}")
154
+ import traceback
155
+ traceback.print_exc()
156
+
157
+ # Yield para estado de erro: reabilita o botão e mostra o log com o erro
158
+ yield (
159
+ gr.update(interactive=True, value="Restore Media"),
160
+ None, None, None,
161
+ gr.update(value=''.join(log_buffer), visible=True)
162
+ )
163
 
164
+ # --- LAYOUT DA INTERFACE GRÁFICA (GRADIO) ---
165
+
166
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="SeedVR Media Restoration") as demo:
167
+ # Cabeçalho
168
+ gr.Markdown(
169
+ """
170
+ <div style='text-align: center; margin-bottom: 20px;'>
171
+ <h1>📸 SeedVR - Image & Video Restoration 🚀</h1>
172
+ <p>High-quality media upscaling powered by SeedVR-3B. Upload your file and see the magic.</p>
173
+ </div>
174
+ """
175
+ )
176
 
177
  with gr.Row():
178
+ # --- Coluna da Esquerda: Entradas e Controles ---
179
+ with gr.Column(scale=1):
180
+ gr.Markdown("### 1. Upload Media")
181
+ input_media = gr.File(label="Input File (Video or Image)", type="filepath", interactive=True)
182
+
183
+ gr.Markdown("### 2. Configure Settings")
184
+ with gr.Accordion("Generation Parameters", open=True):
185
+ resolution_select = gr.Dropdown(
186
+ label="Resolution",
187
+ choices=["480", "560", "720", "960", "1024", "2048"],
188
+ value="480",
189
+ info="Sets the output height and width to this value."
190
+ )
 
 
 
 
191
 
192
+ sp_size_slider = gr.Slider(
193
+ label="Frames per Batch (sp_size)",
194
+ minimum=1, maximum=16, step=4, value=8,
195
+ info="For multi-GPU videos. Automatically set to 1 for images."
196
+ )
197
+
198
+ fps_out = gr.Number(label="Output FPS (for Videos)", value=24, precision=0, info="Set to 0 to use the original FPS.")
199
+
200
+ run_button = gr.Button("Restore Media", variant="primary", icon="")
201
+
202
+ # --- Coluna da Direita: Resultados ---
203
+ with gr.Column(scale=2):
204
+ gr.Markdown("### 3. Results")
205
+
206
+ # Janela de Log
207
+ log_window = gr.Textbox(
208
+ label="Inference Log 📝",
209
+ lines=8, max_lines=15,
210
+ interactive=False, visible=False, autoscroll=True
211
+ )
212
+
213
+ # Componentes de saída (começam invisíveis)
214
+ output_image = gr.Image(label="Image Result", show_download_button=True, type="filepath", visible=False)
215
+ output_video = gr.Video(label="Video Result", visible=False)
216
+ output_download = gr.File(label="Download Full Result (Video)", visible=False)
217
+
218
+ # --- Rodapé ---
219
+ gr.Markdown(
220
+ """
221
+ ---
222
+ *Space and Docker were developed by Carlex.*
223
+ *Contact: Email: Carlex22@gmail.com | GitHub: [carlex22](https://github.com/carlex22)*
224
+ """
225
+ )
226
 
227
+ # --- Lógica de Eventos da UI ---
 
 
228
 
229
+ # Ao fazer upload de um arquivo, ajusta o slider `sp_size` e limpa saídas antigas.
230
+ input_media.upload(
231
+ fn=on_file_upload,
232
+ inputs=[input_media],
233
+ outputs=[sp_size_slider, output_image, output_video, output_download, log_window]
234
+ )
235
 
236
+ # Ao clicar no botão, executa a função de inferência principal.
237
+ run_button.click(
238
+ fn=run_inference_ui,
239
+ inputs=[input_media, resolution_select, sp_size_slider, fps_out],
240
+ outputs=[run_button, output_image, output_video, output_download, log_window],
241
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  if __name__ == "__main__":
244
+ demo.launch(
245
+ server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
246
+ server_port=int(os.getenv("GRADIO_SERVER_PORT", "7860")),
247
+ show_error=True
248
+ )