Spaces:

AnamikaP
/

pyAnnote_Ft_Segmentation

Sleeping

File size: 1,847 Bytes

import os
import torch
import matplotlib.pyplot as plt
from pyannote.metrics.diarization import DiarizationErrorRate

# THE ULTIMATE BYPASS (Fixes PyTorch 2.6 security errors)
import torch.serialization
original_load = torch.load
def patched_load(*args, **kwargs):
    kwargs['weights_only'] = False
    return original_load(*args, **kwargs)
torch.load = patched_load

# IMPORTS
from pyannote.core import notebook
from pyannote.audio import Pipeline
from pyannote.database.util import load_rttm

AUDIO_PATH = r"dataset/audio/clip_03.wav"
RTTM_PATH = r"dataset/rttm/clip_03.rttm"

hf_token = os.getenv("HF_TOKEN")

# INITIALIZE PIPELINE
print("Initializing AI Pipeline...")
pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization-3.1",
    use_auth_token="hf_token"  # Replace with your Hugging Face token
)

# --- RUN DIARIZATION ---
print("AI is analyzing the audio...")
prediction = pipeline(AUDIO_PATH)

# --- LOAD GROUND TRUTH ---
gt_dict = load_rttm(RTTM_PATH)
uri = list(gt_dict.keys())[0]
ground_truth = gt_dict[uri]

# --- FIXED: CALCULATE DER USING REPORT ---
metric = DiarizationErrorRate()
# We process the specific file to get a clean report
metric(ground_truth, prediction, notebook=True) 
report = metric.report(display=True)

print("\n" + "="*50)
print("FINAL EVALUATION REPORT")
print(report)
print("="*50 + "\n")

## --- VISUALIZATION (UNCHANGED) ---
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 8), sharex=True)

plt.sca(ax1)
notebook.plot_annotation(ground_truth, ax=ax1) 
ax1.set_title("REFERENCE (Ground Truth)", fontsize=14, fontweight='bold')

plt.sca(ax2)
notebook.plot_annotation(prediction, ax=ax2)
ax2.set_title("HYPOTHESIS (Model Prediction)", fontsize=14, fontweight='bold')

plt.xlabel("Time (seconds)", fontsize=12)
plt.tight_layout()

print("Diarization complete! Displaying plot...")
plt.show()