Spaces:

vrajshroff
/

SeeHowMuchYouTalk

Build error

App Files Files Community

vrajshroff commited on Dec 25, 2024

Commit

d4cae71

verified ·

1 Parent(s): bdbee05

Create app.py

Browse files

Files changed (1) hide show

app.py +65 -0

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import gradio as gr
+import soundfile as sf
+import wave
+from pyannote.audio import Pipeline
+import torch
+pipeline = Pipeline.from_pretrained(
+  "pyannote/speaker-diarization-3.0",
+  use_auth_token=os.getenv("HF_AUTH_TOKEN")
+pipeline.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
+def process_audio(audio_file):
+    diarization = pipeline(audio_file)
+    with open("audio.rttm", "w") as rttm:
+        diarization.write_rttm(rttm)
+    speaker_durations = {}
+    first_speaker = None
+    with open("audio.rttm", "r") as file:
+        for line in file:
+            parts = line.strip().split()
+            speaker = parts[7]
+            start_time = float(parts[3])
+            duration = float(parts[4])
+            if first_speaker is None:
+                first_speaker = speaker
+            if speaker not in speaker_durations:
+                speaker_durations[speaker] = 0
+            speaker_durations[speaker] += duration
+    total_duration = sum(speaker_durations.values())
+    first_speaker_duration = speaker_durations.get(first_speaker, 0)
+    percentage_first_speaker = (first_speaker_duration / total_duration) * 100 if total_duration > 0 else 0
+    return percentage_first_speaker
+def record_and_process(audio):
+    if audio is None:
+        return "No audio was recorded. Please try again."
+    sample_rate, audio_data = audio
+    file_path = "audio.wav"
+    sf.write(file_path, audio_data, sample_rate)
+    percentage = process_audio(file_path)
+    return f"Percentage of time spoken by the first speaker: {percentage:.2f}%"
+interface = gr.Interface(
+    fn=record_and_process,
+    inputs=gr.Audio(type="numpy"),
+    outputs="text",
+    title="See How Much You Talk in a Conversation",
+    description=(
+    "Make sure you are the first person to speak!<br>"
+    "You can also use a sample audio file for testing: "
+    "<a href='https://www.uclass.psychol.ucl.ac.uk/Release2/Conversation/AudioOnly/wav/M_0025_11y10m_1.wav' "
+    "target='_blank'>sample audio</a>.<br>"
+    "<u>Your voice never leaves your device.</u>"
+    ),
+    allow_flagging="never"
+)
+interface.launch()