Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| from datetime import datetime | |
| import torchaudio | |
| import librosa | |
| import tensorflow as tf | |
| from huggingface_hub import hf_hub_download, HfApi | |
| import tempfile | |
| import gc | |
| import uuid | |
| import shutil | |
| import json | |
| # Enhanced configuration with multiple models per species | |
| MODEL_CONFIG = { | |
| "Amphibians": { | |
| "Base": { | |
| "repo_id": "RonaldCeballos/SpeciesClassifiers", | |
| "model_file": "models/amphibia/base/model_base.h5", | |
| "classes_file": "models/amphibia/base/classes.npy" | |
| }, | |
| "M1": { | |
| "repo_id": "RonaldCeballos/SpeciesClassifiers", | |
| "model_file": "models/amphibia/models/model_M1.h5", | |
| "classes_file": "models/amphibia/class/classes.npy" | |
| }, | |
| "M2": { | |
| "repo_id": "RonaldCeballos/SpeciesClassifiers", | |
| "model_file": "models/amphibia/models/model_M2.h5", | |
| "classes_file": "models/amphibia/class/classes.npy" | |
| } | |
| }, | |
| "Mammals": { | |
| "Base": { | |
| "repo_id": "RonaldCeballos/SpeciesClassifiers", | |
| "model_file": "models/mammals/base/model_base.h5", | |
| "classes_file": "models/mammals/base/classes.npy" | |
| }, | |
| "M1": { | |
| "repo_id": "RonaldCeballos/SpeciesClassifiers", | |
| "model_file": "models/mammals/models/model_M1.h5", | |
| "classes_file": "models/mammals/class/classes.npy" | |
| }, | |
| "M2": { | |
| "repo_id": "RonaldCeballos/SpeciesClassifiers", | |
| "model_file": "models/mammals/models/model_M2.h5", | |
| "classes_file": "models/mammals/class/classes.npy" | |
| } | |
| }, | |
| "Insects": { | |
| "Base": { | |
| "repo_id": "RonaldCeballos/SpeciesClassifiers", | |
| "model_file": "models/insects/base/model_base.h5", | |
| "classes_file": "models/insects/base/classes.npy" | |
| }, | |
| "M1": { | |
| "repo_id": "RonaldCeballos/SpeciesClassifiers", | |
| "model_file": "models/insects/models/model_M1.h5", | |
| "classes_file": "models/insects/class/classes.npy" | |
| }, | |
| "M2": { | |
| "repo_id": "RonaldCeballos/SpeciesClassifiers", | |
| "model_file": "models/insects/models/model_M2.h5", | |
| "classes_file": "models/insects/class/classes.npy" | |
| } | |
| } | |
| } | |
| # Cache for models | |
| model_cache = {} | |
| current_audio_path = None # To store the current audio being analyzed | |
| current_audio_name = None # To store the original audio file name | |
| def load_model_from_hub(species_type, model_version): | |
| """Load models from Hugging Face Hub with version selection""" | |
| cache_key = f"{species_type}_{model_version}" | |
| if cache_key in model_cache: | |
| return model_cache[cache_key] | |
| try: | |
| config = MODEL_CONFIG[species_type][model_version] | |
| print(f"Downloading {species_type} - {model_version}...") | |
| # Download files | |
| model_path = hf_hub_download( | |
| repo_id=config["repo_id"], | |
| filename=config["model_file"], | |
| cache_dir=tempfile.mkdtemp() | |
| ) | |
| classes_path = hf_hub_download( | |
| repo_id=config["repo_id"], | |
| filename=config["classes_file"], | |
| cache_dir=tempfile.mkdtemp() | |
| ) | |
| # Load model and classes | |
| model = tf.keras.models.load_model(model_path, compile=False) | |
| classes = np.load(classes_path) | |
| # Cache the model | |
| model_cache[cache_key] = (model, classes) | |
| gc.collect() | |
| print(f"Model {species_type} - {model_version} loaded successfully") | |
| return model, classes | |
| except Exception as e: | |
| print(f"Error loading {species_type} - {model_version}: {str(e)}") | |
| return None, None | |
| def predict_with_model(spec, model, classes): | |
| """Predict species from spectrogram - Adapted from notebook""" | |
| # Ensure correct dimensions (1025, 313) | |
| if spec.shape != (1025, 313): | |
| # Resize if needed | |
| spec = resize_spectrogram(spec, (1025, 313)) | |
| # Preprocess for model - exactly as in notebook | |
| arr = np.expand_dims(spec[..., np.newaxis], axis=0).astype('float32') | |
| X = arr / np.max(arr) | |
| # Predict | |
| pred = model.predict(X, verbose=0) | |
| pred_class_idx = np.argmax(pred) | |
| pred_class = str(classes[pred_class_idx]) | |
| prob = float(pred[0][pred_class_idx]) | |
| return pred_class, prob | |
| def extract_chunks(audio_clean, sr, time=5): | |
| """Extract audio chunks - Adapted from notebook's ext_chunks function""" | |
| n_samples = sr * time | |
| chunks = [] | |
| for i in range(0, len(audio_clean), n_samples): | |
| start = i | |
| end = i + n_samples | |
| if end <= len(audio_clean): | |
| chunk = audio_clean[start:end] | |
| else: | |
| # Circular padding - exactly as in notebook | |
| faltan = end - len(audio_clean) | |
| padding = audio_clean[:faltan] | |
| chunk = np.concatenate([audio_clean[start:], padding]) | |
| chunks.append(chunk) | |
| return np.array(chunks) | |
| def create_spectrogram(array_audio, n_fft=2048): | |
| """Create spectrogram from audio array - Adapted from notebook's spectogram function""" | |
| if isinstance(array_audio, np.ndarray): | |
| dta = np.abs(librosa.stft(array_audio, n_fft=n_fft)) | |
| D = librosa.amplitude_to_db(dta, ref=np.max) | |
| else: | |
| dta = np.abs(librosa.stft(array_audio.numpy())) | |
| D = librosa.amplitude_to_db(dta, ref=np.max) | |
| return D | |
| def resize_spectrogram(spec, target_shape): | |
| """Resize spectrogram to target shape""" | |
| from scipy import ndimage | |
| zoom_factors = (target_shape[0] / spec.shape[0], target_shape[1] / spec.shape[1]) | |
| resized = ndimage.zoom(spec, zoom_factors, order=1) | |
| return resized | |
| def predict_species_all_chunks(species_type, model_version, audio_file): | |
| """Main prediction function that processes all chunks""" | |
| global current_audio_path, current_audio_name | |
| if audio_file is None: | |
| return pd.DataFrame({"Info": ["Please upload an audio file"]}) | |
| try: | |
| # Store the current audio path and name for feedback | |
| current_audio_path = audio_file | |
| current_audio_name = os.path.basename(audio_file) | |
| # Load model | |
| model, classes = load_model_from_hub(species_type, model_version) | |
| if model is None or classes is None: | |
| return pd.DataFrame({"Error": [f"Could not load {species_type} - {model_version} model"]}) | |
| # Process audio - using notebook approach | |
| wav, sr = torchaudio.load(audio_file) | |
| wav = wav.mean(dim=0) # Convert to mono | |
| # Extract 5-second chunks using notebook function | |
| chunks = extract_chunks(wav.numpy(), sr, time=5) | |
| results = [] | |
| for i, chunk in enumerate(chunks): | |
| # Create spectrogram using notebook function | |
| spectrogram = create_spectrogram(chunk) | |
| # Normalize exactly as in notebook | |
| spectrogram = (spectrogram - np.mean(spectrogram)) / np.std(spectrogram) | |
| # Predict using adapted notebook function | |
| species, confidence = predict_with_model(spectrogram, model, classes) | |
| time_start = i * 5 | |
| time_end = (i + 1) * 5 | |
| results.append({ | |
| 'Segment': f'{i+1}', | |
| 'Time': f'{time_start}s - {time_end}s', | |
| 'Species': species, | |
| 'Confidence': f'{confidence:.1%}' | |
| }) | |
| # Clean memory | |
| del model | |
| gc.collect() | |
| if not results: | |
| return pd.DataFrame({"Info": ["No valid segments detected in the audio"]}) | |
| return pd.DataFrame(results) | |
| except Exception as e: | |
| print(f"Prediction error: {str(e)}") | |
| return pd.DataFrame({"Error": [f"Error during analysis: {str(e)}"]}) | |
| def predict_species_final(species_type, model_version, audio_file): | |
| """Enhanced prediction with voting system across chunks""" | |
| global current_audio_path, current_audio_name | |
| if audio_file is None: | |
| return pd.DataFrame({"Info": ["Please upload an audio file"]}) | |
| try: | |
| current_audio_path = audio_file | |
| current_audio_name = os.path.basename(audio_file) | |
| # Load model | |
| model, classes = load_model_from_hub(species_type, model_version) | |
| if model is None or classes is None: | |
| return pd.DataFrame({"Error": [f"Could not load {species_type} - {model_version} model"]}) | |
| # Process audio | |
| wav, sr = torchaudio.load(audio_file) | |
| wav = wav.mean(dim=0) | |
| # Extract chunks | |
| chunks = extract_chunks(wav.numpy(), sr, time=5) | |
| results = [] | |
| species_votes = {} | |
| for i, chunk in enumerate(chunks): | |
| # Create and normalize spectrogram | |
| spectrogram = create_spectrogram(chunk) | |
| spectrogram = (spectrogram - np.mean(spectrogram)) / np.std(spectrogram) | |
| # Predict | |
| species, confidence = predict_with_model(spectrogram, model, classes) | |
| # Count votes for final prediction | |
| if species in species_votes: | |
| species_votes[species] += confidence | |
| else: | |
| species_votes[species] = confidence | |
| time_start = i * 5 | |
| time_end = (i + 1) * 5 | |
| results.append({ | |
| 'Segment': f'{i+1}', | |
| 'Time': f'{time_start}s - {time_end}s', | |
| 'Species': species, | |
| 'Confidence': f'{confidence:.1%}' | |
| }) | |
| # Determine final prediction | |
| if species_votes: | |
| final_species = max(species_votes, key=species_votes.get) | |
| final_confidence = species_votes[final_species] / len(chunks) | |
| # Add final prediction row | |
| final_row = pd.DataFrame({ | |
| 'Segment': ['FINAL'], | |
| 'Time': ['Full Audio'], | |
| 'Species': [final_species], | |
| 'Confidence': [f'{final_confidence:.1%}'] | |
| }) | |
| results_df = pd.concat([pd.DataFrame(results), final_row], ignore_index=True) | |
| else: | |
| results_df = pd.DataFrame(results) | |
| # Clean memory | |
| del model | |
| gc.collect() | |
| if results_df.empty: | |
| return pd.DataFrame({"Info": ["No valid segments detected in the audio"]}) | |
| return results_df | |
| except Exception as e: | |
| print(f"Prediction error: {str(e)}") | |
| return pd.DataFrame({"Error": [f"Error during analysis: {str(e)}"]}) | |
| def save_feedback_to_dataset(audio_file_path, original_audio_name, feedback_text, consent_given, species_type, model_version, results_df): | |
| """Save audio and feedback to private Hugging Face dataset""" | |
| if not consent_given: | |
| return "β You must accept the consent to submit feedback." | |
| try: | |
| # Get HF token from environment (set in your Space secrets) | |
| hf_token = os.getenv('HF_TOKEN') | |
| if not hf_token: | |
| return "β HF_TOKEN not found. Please set it in Space secrets." | |
| api = HfApi(token=hf_token) | |
| repo_id = "RonaldCeballos/Audios-Feedback" # Your private dataset | |
| # Create temp directory | |
| temp_dir = "temp_uploads" | |
| os.makedirs(temp_dir, exist_ok=True) | |
| # Generate unique filename for audio, but keep the original extension | |
| original_base = os.path.splitext(original_audio_name)[0] | |
| unique_id = uuid.uuid4().hex[:8] # Short unique ID | |
| extension = os.path.splitext(original_audio_name)[1] or '.wav' | |
| audio_filename = f"{original_base}_{unique_id}{extension}" | |
| new_audio_path = f"{temp_dir}/{audio_filename}" | |
| # Copy audio to temp location | |
| shutil.copy(audio_file_path, new_audio_path) | |
| # Prepare metadata | |
| metadata = { | |
| 'timestamp': datetime.now().isoformat(), | |
| 'original_audio_name': original_audio_name, | |
| 'audio_file': audio_filename, | |
| 'feedback': feedback_text, | |
| 'consent_given': consent_given, | |
| 'species_type': species_type, | |
| 'model_version': model_version, | |
| 'analysis_results': results_df.to_dict() if results_df is not None else {} | |
| } | |
| # Save metadata to JSON | |
| metadata_filename = f"{original_base}_{unique_id}.json" | |
| metadata_path = f"{temp_dir}/{metadata_filename}" | |
| with open(metadata_path, 'w', encoding='utf-8') as f: | |
| json.dump(metadata, f, ensure_ascii=False, indent=2) | |
| # Upload files to dataset | |
| api.upload_file( | |
| path_or_fileobj=new_audio_path, | |
| path_in_repo=f"audios/{audio_filename}", | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| commit_message=f"Add audio feedback: {audio_filename}" | |
| ) | |
| api.upload_file( | |
| path_or_fileobj=metadata_path, | |
| path_in_repo=f"metadata/{metadata_filename}", | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| commit_message=f"Add metadata for: {audio_filename}" | |
| ) | |
| # Clean up temp files | |
| if os.path.exists(new_audio_path): | |
| os.remove(new_audio_path) | |
| if os.path.exists(metadata_path): | |
| os.remove(metadata_path) | |
| return f"β Audio '{original_audio_name}' and feedback saved successfully for model improvement!" | |
| except Exception as e: | |
| print(f"Error saving to dataset: {e}") | |
| # Clean up temp files in case of error | |
| if 'new_audio_path' in locals() and os.path.exists(new_audio_path): | |
| os.remove(new_audio_path) | |
| if 'metadata_path' in locals() and os.path.exists(metadata_path): | |
| os.remove(metadata_path) | |
| return f"β Error saving feedback: {str(e)}" | |
| def submit_feedback(feedback_text, consent_checkbox, species_type, model_version, results_df): | |
| """Handle feedback submission to private dataset""" | |
| global current_audio_path, current_audio_name | |
| if not feedback_text or not feedback_text.strip(): | |
| return "π Please write your comment" | |
| if current_audio_path is None: | |
| return "β No audio file available for feedback" | |
| return save_feedback_to_dataset( | |
| current_audio_path, | |
| current_audio_name, | |
| feedback_text, | |
| consent_checkbox, | |
| species_type, | |
| model_version, | |
| results_df | |
| ) | |
| def clear_interface(): | |
| """Clear interface and free memory""" | |
| global current_audio_path, current_audio_name | |
| current_audio_path = None | |
| current_audio_name = None | |
| gc.collect() | |
| return None, pd.DataFrame(), "Amphibians", "Base", "", False | |
| # Gradio Interface | |
| with gr.Blocks( | |
| title="Species Audio Classifier", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { max-width: 1200px; margin: auto; } | |
| .consent-text { font-size: 0.9em; color: #666; } | |
| .final-prediction { background-color: #e8f5e8 !important; font-weight: bold; } | |
| """ | |
| ) as demo: | |
| # Store current results for feedback | |
| current_results = gr.State(value=pd.DataFrame()) | |
| gr.Markdown(""" | |
| ## Species Audio Classifier | |
| **Upload an audio file to identify species using AI models** | |
| *Based on your notebook implementation - Models are loaded from: [RonaldCeballos/SpeciesClassifiers](https://huggingface.co/RonaldCeballos/SpeciesClassifiers)* | |
| π **How it works:** | |
| - Audio is split into 5-second segments | |
| - Each segment is converted to a spectrogram | |
| - AI model predicts species for each segment | |
| - Final prediction is based on voting across all segments | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### βοΈ Configuration") | |
| species_selector = gr.Dropdown( | |
| choices=list(MODEL_CONFIG.keys()), | |
| label="π― Species Category", | |
| value="Amphibians", | |
| info="Select the type of species to identify" | |
| ) | |
| model_selector = gr.Dropdown( | |
| choices=["Base", "M1", "M2"], | |
| label="π§ Model Version", | |
| value="Base", | |
| info="Choose the model version to use" | |
| ) | |
| audio_input = gr.Audio( | |
| label="Upload Audio File", | |
| type="filepath", | |
| sources=["upload"], | |
| waveform_options={"show_controls": True} | |
| ) | |
| with gr.Row(): | |
| predict_btn = gr.Button("π Analyze Audio", variant="primary") | |
| clear_btn = gr.Button("π Clear", variant="secondary") | |
| gr.Markdown(""" | |
| ### π‘ Instructions: | |
| 1. Select species category | |
| 2. Choose model version | |
| 3. Upload audio file (WAV, MP3, etc.) | |
| 4. Click "Analyze Audio" | |
| 5. Review results by 5-second segments | |
| 6. Final prediction shown at the bottom | |
| """) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π Results") | |
| results_display = gr.Dataframe( | |
| label="π§ Analyzed Chunks", | |
| headers=["Chunks", "Time", "Species", "Confidence"], | |
| wrap=True, | |
| max_height=500, | |
| datatype=["str", "str", "str", "str"] | |
| ) | |
| with gr.Accordion("π¬ Submit Feedback for Model Improvement", open=False): | |
| gr.Markdown(""" | |
| **Help us improve!** Submit your audio and feedback to our private dataset for model training. | |
| *Using the same approach as your notebook implementation* | |
| """) | |
| consent_checkbox = gr.Checkbox( | |
| label="I consent to share this audio and my feedback for research and model improvement purposes", | |
| info="Your data will be stored in a private Hugging Face dataset" | |
| ) | |
| feedback_input = gr.Textbox( | |
| label="Your Feedback", | |
| placeholder="Example: Species X was misidentified as Y...", | |
| lines=3 | |
| ) | |
| feedback_btn = gr.Button("π€ Submit Feedback & Audio", variant="primary") | |
| feedback_status = gr.Textbox( | |
| label="Submission Status", | |
| interactive=False | |
| ) | |
| # Event handlers | |
| predict_btn.click( | |
| fn=predict_species_final, # Using the enhanced version with voting | |
| inputs=[species_selector, model_selector, audio_input], | |
| outputs=results_display | |
| ).then( | |
| fn=lambda results: results, | |
| inputs=[results_display], | |
| outputs=[current_results] | |
| ) | |
| feedback_btn.click( | |
| fn=submit_feedback, | |
| inputs=[feedback_input, consent_checkbox, species_selector, model_selector, current_results], | |
| outputs=feedback_status | |
| ) | |
| clear_btn.click( | |
| fn=clear_interface, | |
| inputs=None, | |
| outputs=[audio_input, results_display, species_selector, model_selector, feedback_input, consent_checkbox] | |
| ) | |
| # Configuration for Spaces | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860 | |
| ) |