Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import requests | |
| import io | |
| import zipfile | |
| from pydub import AudioSegment | |
| import os | |
| def process_csv(file_input): | |
| # file_input can be a file path (str) or raw bytes (bytes) | |
| if isinstance(file_input, str): | |
| # Path from type="filepath" | |
| df = pd.read_csv(file_input) | |
| elif isinstance(file_input, bytes): | |
| # Raw bytes from type="binary" | |
| df = pd.read_csv(io.BytesIO(file_input)) | |
| else: | |
| raise ValueError("Unsupported file input type") | |
| results = [] | |
| errors = [] | |
| # Process each audio link | |
| for _, row in df.iterrows(): | |
| recorder_id = row.get("Fullname") | |
| audio_url = row.get("Audio_Link") | |
| if pd.isna(audio_url): | |
| continue | |
| try: | |
| response = requests.get(audio_url, stream=True, timeout=30) | |
| if response.status_code == 200: | |
| audio_file = io.BytesIO(response.content) | |
| audio = AudioSegment.from_file(audio_file) | |
| duration_sec = len(audio) / 1000.0 | |
| duration_min = duration_sec / 60.0 | |
| results.append({ | |
| "recorder_id": recorder_id, | |
| "audio_url": audio_url, | |
| "duration_seconds": round(duration_sec, 2), | |
| "duration_minutes": round(duration_min, 2) | |
| }) | |
| else: | |
| errors.append((recorder_id, audio_url, "Download failed")) | |
| except Exception as e: | |
| errors.append((recorder_id, audio_url, str(e))) | |
| # Save results | |
| output_df = pd.DataFrame(results) | |
| output_filename = "audio_durations.csv" | |
| output_df.to_csv(output_filename, index=False) | |
| totals_df = output_df.groupby("recorder_id")["duration_minutes"].sum().reset_index() | |
| totals_df.rename(columns={"duration_minutes": "total_minutes"}, inplace=True) | |
| totals_filename = "total_durations_per_recorder.csv" | |
| totals_df.to_csv(totals_filename, index=False) | |
| error_filename = None | |
| if errors: | |
| error_df = pd.DataFrame(errors, columns=["recorder_id", "audio_url", "error"]) | |
| error_filename = "audio_errors.csv" | |
| error_df.to_csv(error_filename, index=False) | |
| # Create ZIP with results | |
| zip_filename = "results.zip" | |
| with zipfile.ZipFile(zip_filename, "w") as zipf: | |
| zipf.write(output_filename) | |
| zipf.write(totals_filename) | |
| if error_filename: | |
| zipf.write(error_filename) | |
| # Clean up CSV files after zipping | |
| os.remove(output_filename) | |
| os.remove(totals_filename) | |
| if error_filename: | |
| os.remove(error_filename) | |
| return zip_filename | |
| # Gradio UI | |
| title = "Audio Duration Calculator" | |
| description = """ | |
| Upload your CSV with 'Fullname' and 'Audio_Link' columns. | |
| The app will calculate audio durations, total durations per recorder, and create error logs if needed. | |
| You'll get a downloadable ZIP file. | |
| """ | |
| demo = gr.Interface( | |
| fn=process_csv, | |
| inputs=gr.File(label="Upload CSV", type="filepath"), # can switch to 'binary' if preferred | |
| outputs=gr.File(label="Download Results ZIP", type="filepath"), | |
| title=title, | |
| description=description | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |