Spaces:
Running
Running
File size: 11,621 Bytes
21d1989 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 |
import os
import tempfile
import gradio as gr
import requests
import json
from loguru import logger
from typing import Optional, Tuple
import base64
import time
def call_gradio_client_api(video_file, text_prompt, guidance_scale, inference_steps, sample_nums):
"""调用官方Hugging Face Space的API"""
try:
from gradio_client import Client
logger.info("连接到官方 HunyuanVideo-Foley Space...")
# 连接到官方Space
client = Client("tencent/HunyuanVideo-Foley")
logger.info("发送推理请求...")
# 调用推理函数
result = client.predict(
video_file, # 视频文件
text_prompt, # 文本提示
guidance_scale, # CFG scale
inference_steps, # 推理步数
sample_nums, # 样本数量
api_name="/infer_single_video" # API端点名称
)
return result, "✅ 成功通过官方API生成音频!"
except Exception as e:
error_msg = str(e)
logger.error(f"Gradio Client API 调用失败: {error_msg}")
if "not found" in error_msg.lower():
return None, "❌ 官方Space的API端点未找到,可能接口已更改"
elif "connection" in error_msg.lower():
return None, "❌ 无法连接到官方Space,请检查网络"
elif "queue" in error_msg.lower():
return None, "⏳ 官方Space繁忙,请稍后重试"
else:
return None, f"❌ API调用错误: {error_msg}"
def call_huggingface_inference_api(video_file, text_prompt):
"""调用Hugging Face Inference API"""
try:
logger.info("尝试Hugging Face Inference API...")
API_URL = "https://api-inference.huggingface.co/models/tencent/HunyuanVideo-Foley"
# 读取视频文件
with open(video_file, "rb") as f:
video_data = f.read()
# 准备请求数据
headers = {
"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}",
}
# 发送请求
response = requests.post(
API_URL,
headers=headers,
json={"inputs": {"video": base64.b64encode(video_data).decode(), "text": text_prompt}},
timeout=300
)
if response.status_code == 200:
# 保存结果
temp_dir = tempfile.mkdtemp()
audio_path = os.path.join(temp_dir, "generated_audio.wav")
with open(audio_path, 'wb') as f:
f.write(response.content)
return [audio_path], "✅ 通过Hugging Face API生成成功!"
else:
logger.error(f"HF API错误: {response.status_code}")
return None, f"❌ Hugging Face API返回错误: {response.status_code}"
except Exception as e:
logger.error(f"HF API调用失败: {str(e)}")
return None, f"❌ Hugging Face API调用失败: {str(e)}"
def try_alternative_apis(video_file, text_prompt):
"""尝试其他可能的API服务"""
# 1. 尝试通过公开的demo接口
try:
logger.info("尝试demo接口...")
# 这里可以尝试其他公开的API服务
# 比如Replicate、RunPod等
return None, "❌ 暂无可用的替代API服务"
except Exception as e:
return None, f"❌ 替代API调用失败: {str(e)}"
def smart_api_inference(video_file, text_prompt, guidance_scale=4.5, inference_steps=50, sample_nums=1):
"""智能API推理 - 尝试多种API调用方式"""
if video_file is None:
return [], "❌ 请上传视频文件!"
if not text_prompt:
text_prompt = "audio for this video"
logger.info(f"开始API推理: {video_file}")
logger.info(f"文本提示: {text_prompt}")
status_updates = []
# 方法1: 尝试Gradio Client (最可能成功)
status_updates.append("🔄 尝试连接官方Space API...")
try:
result, status = call_gradio_client_api(
video_file, text_prompt, guidance_scale, inference_steps, sample_nums
)
if result:
return result, "\n".join(status_updates + [status])
status_updates.append(status)
except ImportError:
status_updates.append("⚠️ gradio_client未安装,跳过官方API调用")
# 方法2: 尝试Hugging Face Inference API
status_updates.append("🔄 尝试Hugging Face Inference API...")
result, status = call_huggingface_inference_api(video_file, text_prompt)
if result:
return result, "\n".join(status_updates + [status])
status_updates.append(status)
# 方法3: 尝试其他API
status_updates.append("🔄 尝试替代API服务...")
result, status = try_alternative_apis(video_file, text_prompt)
status_updates.append(status)
# 所有方法都失败了
final_message = "\n".join(status_updates + [
"",
"💡 **解决方案建议:**",
"• 安装 gradio_client: pip install gradio_client",
"• 配置 HF_TOKEN 环境变量",
"• 等待官方Space负载降低",
"• 本地运行完整模型(需24GB+ RAM)",
"",
"🔗 **官方Space**: https://huggingface.co/spaces/tencent/HunyuanVideo-Foley"
])
return [], final_message
def create_real_api_interface():
"""创建真实API调用界面"""
css = """
.api-status {
background: #f0f8ff;
border: 2px solid #4169e1;
border-radius: 10px;
padding: 1rem;
margin: 1rem 0;
color: #191970;
}
"""
with gr.Blocks(css=css, title="HunyuanVideo-Foley API Client") as app:
# Header
gr.HTML("""
<div style="text-align: center; padding: 2rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 20px; margin-bottom: 2rem; color: white;">
<h1>🎵 HunyuanVideo-Foley</h1>
<p>API客户端 - 调用真实模型推理</p>
</div>
""")
# API Status Notice
gr.HTML("""
<div class="api-status">
<strong>🌐 真实API调用模式:</strong> 这个版本会通过API调用真实的HunyuanVideo-Foley模型进行推理。
<br><strong>优点:</strong> 真实AI音频生成,无需本地大内存
<br><strong>缺点:</strong> 依赖外部服务可用性,可能需要等待队列
</div>
""")
with gr.Row():
# 输入区域
with gr.Column(scale=1):
gr.Markdown("### 📹 视频输入")
video_input = gr.Video(
label="上传视频",
info="支持MP4、AVI、MOV等格式"
)
text_input = gr.Textbox(
label="🎯 音频描述",
placeholder="描述你想要的音频效果,例如:脚步声、雨声、车辆行驶等",
lines=3,
value="audio sound effects for this video"
)
with gr.Row():
guidance_scale = gr.Slider(
minimum=1.0,
maximum=10.0,
value=4.5,
step=0.1,
label="🎚️ CFG Scale"
)
inference_steps = gr.Slider(
minimum=10,
maximum=100,
value=50,
step=5,
label="⚡ 推理步数"
)
sample_nums = gr.Slider(
minimum=1,
maximum=6,
value=1,
step=1,
label="🎲 样本数量"
)
generate_btn = gr.Button(
"🎵 调用API生成音频",
variant="primary",
size="lg"
)
# 输出区域
with gr.Column(scale=1):
gr.Markdown("### 🎵 生成结果")
audio_outputs = []
for i in range(6):
audio_output = gr.Audio(
label=f"样本 {i+1}",
visible=(i == 0) # 只显示第一个
)
audio_outputs.append(audio_output)
status_output = gr.Textbox(
label="API状态",
interactive=False,
lines=10,
placeholder="等待API调用..."
)
# 事件处理
def process_with_api(video_file, text_prompt, guidance_scale, inference_steps, sample_nums):
# 调用API推理
results, status_msg = smart_api_inference(
video_file, text_prompt, guidance_scale, inference_steps, int(sample_nums)
)
# 准备输出
outputs = [None] * 6
visibilities = [False] * 6
if results and isinstance(results, list):
for i, result in enumerate(results[:6]):
outputs[i] = result
visibilities[i] = True
return outputs + visibilities + [status_msg]
# 动态显示样本数量
def update_visibility(sample_nums):
sample_nums = int(sample_nums)
return [gr.update(visible=(i < sample_nums)) for i in range(6)]
# 连接事件
sample_nums.change(
fn=update_visibility,
inputs=[sample_nums],
outputs=audio_outputs
)
generate_btn.click(
fn=process_with_api,
inputs=[video_input, text_input, guidance_scale, inference_steps, sample_nums],
outputs=audio_outputs + [gr.update(visible=(i < 6)) for i in range(6)] + [status_output]
)
# Footer
gr.HTML("""
<div style="text-align: center; padding: 2rem; color: #666; border-top: 1px solid #eee; margin-top: 2rem;">
<p><strong>📡 API调用版本</strong> - 通过网络调用真实模型进行推理</p>
<p>🔗 官方Space: <a href="https://huggingface.co/spaces/tencent/HunyuanVideo-Foley" target="_blank">tencent/HunyuanVideo-Foley</a></p>
<p>⚠️ 需要安装: <code>pip install gradio_client</code></p>
</div>
""")
return app
if __name__ == "__main__":
# 设置日志
logger.remove()
logger.add(lambda msg: print(msg, end=''), level="INFO")
logger.info("启动 HunyuanVideo-Foley API 客户端...")
# 检查依赖
try:
import gradio_client
logger.info("✅ gradio_client 已安装")
except ImportError:
logger.warning("⚠️ gradio_client 未安装,API调用功能可能受限")
# 创建并启动应用
app = create_real_api_interface()
logger.info("API客户端就绪,准备调用真实模型...")
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
debug=False,
show_error=True
) |