File size: 3,224 Bytes
6997ae2
 
 
 
 
 
c085ee2
6997ae2
c085ee2
 
 
 
 
 
 
 
 
 
6997ae2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c085ee2
6997ae2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c085ee2
 
 
 
 
 
6997ae2
 
c085ee2
6997ae2
 
 
 
 
 
 
 
 
 
c085ee2
 
6997ae2
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import gradio as gr
import pandas as pd
import requests
import io
import zipfile
from pydub import AudioSegment
import os

def process_csv(file_input):
    # file_input can be a file path (str) or raw bytes (bytes)
    if isinstance(file_input, str):
        # Path from type="filepath"
        df = pd.read_csv(file_input)
    elif isinstance(file_input, bytes):
        # Raw bytes from type="binary"
        df = pd.read_csv(io.BytesIO(file_input))
    else:
        raise ValueError("Unsupported file input type")

    results = []
    errors = []

    # Process each audio link
    for _, row in df.iterrows():
        recorder_id = row.get("Fullname")
        audio_url = row.get("Audio_Link")

        if pd.isna(audio_url):
            continue

        try:
            response = requests.get(audio_url, stream=True, timeout=30)
            if response.status_code == 200:
                audio_file = io.BytesIO(response.content)
                audio = AudioSegment.from_file(audio_file)
                duration_sec = len(audio) / 1000.0
                duration_min = duration_sec / 60.0

                results.append({
                    "recorder_id": recorder_id,
                    "audio_url": audio_url,
                    "duration_seconds": round(duration_sec, 2),
                    "duration_minutes": round(duration_min, 2)
                })
            else:
                errors.append((recorder_id, audio_url, "Download failed"))
        except Exception as e:
            errors.append((recorder_id, audio_url, str(e)))

    # Save results
    output_df = pd.DataFrame(results)
    output_filename = "audio_durations.csv"
    output_df.to_csv(output_filename, index=False)

    totals_df = output_df.groupby("recorder_id")["duration_minutes"].sum().reset_index()
    totals_df.rename(columns={"duration_minutes": "total_minutes"}, inplace=True)
    totals_filename = "total_durations_per_recorder.csv"
    totals_df.to_csv(totals_filename, index=False)

    error_filename = None
    if errors:
        error_df = pd.DataFrame(errors, columns=["recorder_id", "audio_url", "error"])
        error_filename = "audio_errors.csv"
        error_df.to_csv(error_filename, index=False)

    # Create ZIP with results
    zip_filename = "results.zip"
    with zipfile.ZipFile(zip_filename, "w") as zipf:
        zipf.write(output_filename)
        zipf.write(totals_filename)
        if error_filename:
            zipf.write(error_filename)

    # Clean up CSV files after zipping
    os.remove(output_filename)
    os.remove(totals_filename)
    if error_filename:
        os.remove(error_filename)

    return zip_filename


# Gradio UI
title = "Audio Duration Calculator"
description = """
Upload your CSV with 'Fullname' and 'Audio_Link' columns.
The app will calculate audio durations, total durations per recorder, and create error logs if needed.
You'll get a downloadable ZIP file.
"""

demo = gr.Interface(
    fn=process_csv,
    inputs=gr.File(label="Upload CSV", type="filepath"),  # can switch to 'binary' if preferred
    outputs=gr.File(label="Download Results ZIP", type="filepath"),
    title=title,
    description=description
)

if __name__ == "__main__":
    demo.launch()