| | |
| | import gradio as gr |
| | import pandas as pd |
| | import numpy as np |
| | import librosa |
| | import joblib |
| | import tensorflow as tf |
| | from keras.models import load_model |
| | from transformers import AutoTokenizer, TFAutoModel |
| |
|
| | |
| | |
| | |
| | model = load_model("raga_predictor_model.keras") |
| | scaler = joblib.load("scaler.pkl") |
| | encoder = joblib.load("label_encoder.pkl") |
| |
|
| | |
| | tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBERTv2-MLM-only") |
| | bert_model = TFAutoModel.from_pretrained("ai4bharat/IndicBERTv2-MLM-only") |
| |
|
| | |
| | meta = pd.read_csv("raga_metadata.csv") |
| | raga_descriptions = dict(zip(meta['raga'], meta['description'])) |
| |
|
| | |
| | |
| | |
| | def extract_features(file_path): |
| | y, sr = librosa.load(file_path, sr=22050) |
| | features = { |
| | "chroma_stft": np.mean(librosa.feature.chroma_stft(y=y, sr=sr)), |
| | "spec_cent": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), |
| | } |
| | mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=18) |
| | for i in range(18): |
| | features[f"mfcc{i+1}"] = np.mean(mfccs[i]) |
| | return pd.DataFrame([features]) |
| |
|
| | def tokenize_description(description_text): |
| | desc_tok = tokenizer(description_text, padding=True, truncation=True, max_length=64, return_tensors="tf") |
| | desc_embed = bert_model(desc_tok['input_ids'], attention_mask=desc_tok['attention_mask'])[0][:, 0, :] |
| | return desc_embed |
| |
|
| | def predict_raga(audio_file): |
| | |
| | audio_df = extract_features(audio_file.name) |
| | audio_scaled = scaler.transform(audio_df) |
| | audio_lstm_input = audio_scaled.reshape((1, 1, audio_scaled.shape[1])) |
| |
|
| | |
| | description_text = "" |
| |
|
| | |
| | desc_embed = tokenize_description([description_text]) |
| |
|
| | |
| | pred = model.predict([audio_lstm_input, desc_embed]) |
| | raga_pred = encoder.inverse_transform([np.argmax(pred)])[0] |
| |
|
| | |
| | description = raga_descriptions.get(raga_pred, "No description available.") |
| |
|
| | return f"🎵 Predicted Raga: {raga_pred}\n\n📝 Description:\n{description}" |
| |
|
| | |
| | |
| | |
| | title = "🎶 Raga Prediction App" |
| | description = "Upload an Indian classical music clip, and I will predict the Raga for you!" |
| |
|
| | interface = gr.Interface( |
| | fn=predict_raga, |
| | inputs=gr.Audio(type="file", label="Upload Audio File"), |
| | outputs="text", |
| | title=title, |
| | description=description, |
| | ) |
| |
|
| | interface.launch() |
| |
|