#!/usr/bin/env python3 # Purpose: ArtifactNet HF Spaces (ZeroGPU) — Gradio demo """ArtifactNet — AI Music Forensic Detector. HF Spaces + ZeroGPU 전용 빌드. - Upload-only (YouTube/URL 제거) - Remote inference / residual snapshot / sqlite 로그 제거 - Error report 는 api.intrect.io 로 POST (옵션) - AcoustID 제거 (API key 비공개 유지) """ import json import os import sys import tempfile import time import warnings from pathlib import Path import gradio as gr import numpy as np import requests as _requests import torch sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from config import SR, CHUNK_SAMPLES, MIN_CONFIDENT_DURATION from inference.audio_utils import load_audio_mono_tensor, get_audio_info from inference.e2e_model import run_e2e_inference, load_models from visualization.feature_bars import plot_feature_bars from visualization.radar import plot_forensic_radar, forensic_features_explanation from visualization.spectrogram import plot_spectrograms from visualization.timeline import plot_timeline warnings.filterwarnings("ignore") API_BASE = os.environ.get("INTRECT_API_BASE", "https://api.intrect.io") # ============================================================ # Upload validation # ============================================================ _AUDIO_MAGIC = { b"RIFF": "wav", b"fLaC": "flac", b"\xff\xfb": "mp3", b"\xff\xf3": "mp3", b"\xff\xf2": "mp3", b"ID3": "mp3", b"OggS": "ogg", } _FTYP_BRANDS = {b"M4A ", b"isom", b"mp42", b"dash", b"MSNV"} _MAX_UPLOAD_BYTES = 100 * 1024 * 1024 _ALLOWED_EXTENSIONS = {".wav", ".flac", ".mp3", ".ogg", ".opus", ".m4a", ".aac", ".webm"} def _validate_audio_file(path: str) -> str | None: if not os.path.isfile(path): return "
파일을 찾을 수 없습니다.
" file_size = os.path.getsize(path) if file_size > _MAX_UPLOAD_BYTES: mb = file_size / 1024 / 1024 return f"파일이 너무 큽니다 ({mb:.0f}MB). 최대 100MB까지 허용됩니다.
" if file_size < 100: return "파일이 너무 작습니다.
" ext = os.path.splitext(path)[1].lower() if ext not in _ALLOWED_EXTENSIONS: return (f"지원하지 않는 형식입니다 ({ext}). " f"WAV, FLAC, MP3, OGG, Opus, M4A만 지원합니다.
") try: with open(path, "rb") as f: header = f.read(12) except Exception: return "파일을 읽을 수 없습니다.
" detected = None for magic, fmt in _AUDIO_MAGIC.items(): if header[:len(magic)] == magic: detected = fmt break if detected is None and header[4:8] == b"ftyp": if header[8:12] in _FTYP_BRANDS: detected = "m4a" if detected is None and header[:4] == b"\x1a\x45\xdf\xa3": detected = "webm" if detected is None: return ("유효한 오디오 파일이 아닙니다.
") return None # ============================================================ # Verdict stats # ============================================================ _MEDIAN_THRESHOLD = 0.5 def _compute_segment_stats(chunk_probs, chunk_metadata=None): arr = np.array(chunk_probs) n = len(arr) q25, q50, q75 = np.percentile(arr, [25, 50, 75]) if chunk_metadata and len(chunk_metadata) == len(chunk_probs): rms_arr = np.array([m.get('rms', 1.0) for m in chunk_metadata]) median_rms = np.median(rms_arr) weights = rms_arr / (median_rms + 1e-10) weights = weights / weights.sum() sorted_indices = np.argsort(arr) sorted_probs = arr[sorted_indices] sorted_weights = weights[sorted_indices] cumsum_weights = np.cumsum(sorted_weights) idx = np.searchsorted(cumsum_weights, 0.5) weighted_median = float(sorted_probs[min(idx, len(sorted_probs) - 1)]) else: weighted_median = float(q50) return { "n": n, "mean": float(np.mean(arr)), "median": float(q50), "weighted_median": weighted_median, "q25": float(q25), "q75": float(q75), "iqr": float(q75 - q25), "std": float(np.std(arr)), "pct_high": float((arr >= 0.8).sum() / n) if n else 0.0, "pct_above_50": float((arr >= 0.5).sum() / n) if n else 0.0, "pct_low": float((arr < 0.2).sum() / n) if n else 0.0, "n_high": int((arr >= 0.8).sum()), "n_mid": int(((arr >= 0.5) & (arr < 0.8)).sum()), "n_low": int((arr < 0.5).sum()), } # ============================================================ # Verdict HTML card # ============================================================ def _verdict_html(verdict, stats, is_stereo, duration=0, elapsed=0, is_short=False, audio_format=""): if verdict == "No file": return """Upload an audio file to begin analysis
Error loading audio: {e}
" return err, None, None, None, None, None, None, {} info = get_audio_info(audio_np, is_stereo) mono_np = mono_tensor.numpy() duration = info["duration"] progress(0.2, desc="🔬 Running AI forensic analysis on CPU (ONNX)...") chunk_probs, _, chunk_metadata, forensic_stats, router_feat, verdict_feat = \ run_e2e_inference(mono_tensor) progress(0.6, desc="📊 Computing distribution statistics...") seg_stats = _compute_segment_stats(chunk_probs, chunk_metadata) elapsed = time.time() - t0 progress(0.8, desc="🎨 Generating visualizations...") is_short = duration < MIN_CONFIDENT_DURATION audio_ext = os.path.splitext(audio_path)[1].lower() fmt_map = {".wav": "WAV", ".flac": "FLAC", ".mp3": "MP3", ".opus": "Opus", ".ogg": "OGG", ".m4a": "M4A", ".aac": "AAC", ".webm": "WebM"} audio_format = fmt_map.get(audio_ext, audio_ext.lstrip(".").upper() or "Unknown") median_prob = seg_stats.get("weighted_median", seg_stats["median"]) verdict = "AI Generated" if median_prob >= _MEDIAN_THRESHOLD else "Human-Made" iqr = seg_stats.get("iqr", 0) n_high = seg_stats.get("n_high", 0) n_low = seg_stats.get("n_low", 0) n_total = seg_stats.get("n", 1) if (iqr >= 0.4 and n_high >= max(3, n_total * 0.1) and n_low >= max(3, n_total * 0.1)): verdict = "Partial AI" verdict_html = _verdict_html( verdict, seg_stats, is_stereo, duration=duration, elapsed=elapsed, is_short=is_short, audio_format=audio_format, ) spec_fig = plot_spectrograms(mono_np) timeline_fig = plot_timeline( chunk_probs, mono_np, chunk_metadata, weighted_median=seg_stats.get("weighted_median") ) radar_fig = plot_forensic_radar(forensic_stats) bars_fig = plot_feature_bars(forensic_stats) forensic_explanation = forensic_features_explanation() filename = os.path.basename(audio_path) if audio_path else "unknown" result_json = { "filename": filename, "verdict": verdict, "is_short_file": is_short, "duration_sec": round(duration, 2), "is_stereo": is_stereo, "elapsed_sec": round(elapsed, 2), "segment_stats": {k: round(v, 4) if isinstance(v, float) else v for k, v in seg_stats.items()}, "segment_probs": [round(p, 4) for p in chunk_probs], "format": audio_format, } json_path = os.path.join(tempfile.gettempdir(), "artifactnet_result.json") with open(json_path, "w") as f: json.dump(result_json, f, indent=2) progress(1.0, desc="✅ Analysis complete!") analysis_state = { "filename": filename, "duration": duration, "is_stereo": is_stereo, "elapsed": elapsed, "verdict": verdict, "forensic_stats": forensic_stats, "seg_stats": seg_stats, "chunk_probs": chunk_probs, "is_short": is_short, "predicted_verdict": "ai" if verdict == "AI Generated" else ( "real" if verdict == "Human-Made" else "unknown" ), "predicted_probability": round(median_prob, 6), } return verdict_html, spec_fig, timeline_fig, radar_fig, bars_fig, forensic_explanation, json_path, analysis_state # ============================================================ # Error report → api.intrect.io # ============================================================ def submit_error_report(analysis_state, reported_as: str, comment: str): if not analysis_state or not analysis_state.get("filename"): return gr.update(visible=True, value='Please analyze a file first.') meta = { "filename": analysis_state.get("filename"), "reported_as": (reported_as or "unsure").lower(), "comment": (comment or "").strip()[:500], "predicted_verdict": analysis_state.get("predicted_verdict"), "predicted_probability": analysis_state.get("predicted_probability"), "source_hint": "hf-space", } try: with _requests.Session() as s: r = s.post( f"{API_BASE.rstrip('/')}/v1/reports", data={"report": json.dumps(meta)}, timeout=10, ) if r.status_code >= 300: try: detail = r.json().get("detail", r.text[:200]) except Exception: detail = r.text[:200] return gr.update(visible=True, value=f'Report failed: {detail}') except Exception as e: return gr.update(visible=True, value=f'Report failed: {e}') return gr.update( visible=True, value='✅ Thanks! Report submitted.', ) # ============================================================ # Gradio UI # ============================================================ def build_ui(): theme = gr.themes.Base( primary_hue="orange", secondary_hue="blue", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), ).set( body_background_fill="#0f0f23", block_background_fill="#1a1a2e", block_border_color="#333", input_background_fill="#16213e", button_primary_background_fill="#ffa502", button_primary_text_color="black", ) custom_css = """ .gradio-container { margin: 0 auto !important; } footer { display: none !important; } .gr-button-primary { border-radius: 8px !important; font-weight: 600 !important; } .gr-input, .gr-box { border-color: #333 !important; } .gr-panel { border-color: #333 !important; } h1, h2, h3 { font-family: 'Inter', sans-serif !important; } .demo-nav { display: flex; justify-content: space-between; align-items: center; padding: 12px 20px; border-bottom: 1px solid #333; margin: -16px -16px 16px; } .demo-nav a { color: #8b949e; text-decoration: none; font-size: 13px; } .demo-nav a:hover { color: #ffa502; } .demo-nav .brand { color: #ffa502; font-weight: 700; font-size: 16px; letter-spacing: 2px; text-transform: uppercase; } """ with gr.Blocks(theme=theme, css=custom_css, title="ArtifactNet — AI Music Forensic Detector") as demo: gr.HTML(""" """) gr.HTML(f"""AI-Generated Music Detection — ONNX Runtime CPU
Help us improve — anonymous feedback.
""" ) report_reported_as = gr.Radio( choices=[ ("It should be AI", "ai"), ("It should be Real / Human", "real"), ("Unsure / Mixed", "unsure"), ], label="What do you think it actually is?", value="ai", ) report_comment = gr.Textbox( label="Optional comment (≤500 chars)", placeholder="Any context we should know?", max_lines=3, lines=2, ) report_submit_btn = gr.Button("🚩 Submit report", variant="secondary", size="sm") report_status = gr.HTML(value="", visible=False) with gr.Row(): spec_output = gr.Plot(label="Spectral Analysis") with gr.Row(): with gr.Column(scale=2): timeline_output = gr.Plot(label="P(AI) Timeline") with gr.Column(scale=1): radar_output = gr.Plot(label="Forensic Features") with gr.Row(): bars_output = gr.Plot(label="Feature Strength Analysis") forensic_explanation_output = gr.HTML(visible=False) with gr.Row(): json_output = gr.File(label="Result JSON", visible=True) with gr.Accordion("About ArtifactNet", open=False): gr.HTML(f"""ArtifactNet is a neural forensic detector for AI-generated music. It uses HPSS and 7-channel forensic features to detect generation artifacts.
Research project — interpret alongside other evidence. See Disclaimer.