# app.py import gradio as gr import pandas as pd import numpy as np import librosa import joblib import tensorflow as tf from keras.models import load_model from transformers import AutoTokenizer, TFAutoModel # ==================== # 1. Load Model and Assets # ==================== model = load_model("raga_predictor_model.keras") scaler = joblib.load("scaler.pkl") encoder = joblib.load("label_encoder.pkl") # Load tokenizer and BERT model directly from Hugging Face tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBERTv2-MLM-only") bert_model = TFAutoModel.from_pretrained("ai4bharat/IndicBERTv2-MLM-only") # Load metadata meta = pd.read_csv("raga_metadata.csv") raga_descriptions = dict(zip(meta['raga'], meta['description'])) # ==================== # 2. Define Utility Functions # ==================== def extract_features(file_path): y, sr = librosa.load(file_path, sr=22050) features = { "chroma_stft": np.mean(librosa.feature.chroma_stft(y=y, sr=sr)), "spec_cent": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), } mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=18) for i in range(18): features[f"mfcc{i+1}"] = np.mean(mfccs[i]) return pd.DataFrame([features]) def tokenize_description(description_text): desc_tok = tokenizer(description_text, padding=True, truncation=True, max_length=64, return_tensors="tf") desc_embed = bert_model(desc_tok['input_ids'], attention_mask=desc_tok['attention_mask'])[0][:, 0, :] return desc_embed def predict_raga(audio_file): # Extract features audio_df = extract_features(audio_file.name) audio_scaled = scaler.transform(audio_df) audio_lstm_input = audio_scaled.reshape((1, 1, audio_scaled.shape[1])) # Use a dummy description description_text = "" # Tokenize dummy description desc_embed = tokenize_description([description_text]) # Predict pred = model.predict([audio_lstm_input, desc_embed]) raga_pred = encoder.inverse_transform([np.argmax(pred)])[0] # Get description description = raga_descriptions.get(raga_pred, "No description available.") return f"šŸŽµ Predicted Raga: {raga_pred}\n\nšŸ“ Description:\n{description}" # ==================== # 3. Gradio Interface # ==================== title = "šŸŽ¶ Raga Prediction App" description = "Upload an Indian classical music clip, and I will predict the Raga for you!" interface = gr.Interface( fn=predict_raga, inputs=gr.Audio(type="file", label="Upload Audio File"), outputs="text", title=title, description=description, ) interface.launch()