import gradio as gr import matplotlib.pyplot as plt import os import json import random from huggingface_hub import hf_hub_download, list_repo_files def get_haptic_sample(): repo_id = "GuiminHu/HapticCap" try: # 1. 扫描仓库 print("正在扫描仓库文件夹...") all_files = list_repo_files(repo_id, repo_type="dataset") # 筛选文件夹 signal_files = [f for f in all_files if f.startswith('haptic_signals/') and f.endswith('.wav')] json_files = [f for f in all_files if f.startswith('json/') and f.endswith('.json')] if not signal_files: return "错误:在 haptic_signals/ 文件夹下没找到 .wav 文件", None # 2. 随机抽取一个信号文件 test_signal = random.choice(signal_files) file_name = os.path.basename(test_signal) # 比如 F100_loop_aug0.wav # 提取核心 ID (假设核心 ID 是下划线分割的第一部分,如 F100) core_id = file_name.split('_')[0] # 3. 寻找对应的 JSON 描述 # 匹配策略:寻找文件名包含核心 ID 的 JSON target_json = None for jf in json_files: if core_id in jf: target_json = jf break if not target_json: # 如果没找到精准匹配,就随便拿一个 JSON 看看结构,或者报错 return f"找到了信号 {file_name},但没找到对应的 JSON。核心ID是: {core_id}", None # 4. 下载并解析 sig_path = hf_hub_download(repo_id=repo_id, filename=test_signal, repo_type="dataset") json_path = hf_hub_download(repo_id=repo_id, filename=target_json, repo_type="dataset") with open(json_path, 'r', encoding='utf-8') as f: meta = json.load(f) # 尝试获取描述字段,HapticCap 可能会把描述放在 'caption' 键里 caption = meta.get('caption', meta.get('description', 'JSON中未找到描述字段')) # 5. 绘图 (可视化震动信号) import librosa signal, sr = librosa.load(sig_path, sr=None) plt.figure(figsize=(12, 4)) plt.plot(signal, color='#FF5722', linewidth=0.8) plt.title(f"Haptic Waveform: {file_name}") plt.xlabel("Time Samples") plt.ylabel("Intensity") plt.grid(True, linestyle='--', alpha=0.6) plot_path = "waveform.png" plt.savefig(plot_path) plt.close() return f"【文件名】: {file_name}\n【匹配JSON】: {target_json}\n【自然语言描述】: {caption}", plot_path except Exception as e: return f"发生错误: {str(e)}", None # 创建 Gradio 界面 with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("## 🎧 HapticCap 信号浏览器") gr.Markdown("从 75GB 的数据集中随机抽取样本,查看自然语言描述与震动波形的对应关系。") with gr.Row(): btn = gr.Button("随机抽取样本", variant="primary") with gr.Column(): info_box = gr.Textbox(label="数据详情", lines=5) plot_img = gr.Image(label="波形预览") btn.click(get_haptic_sample, outputs=[info_box, plot_img]) demo.launch()