|
|
import streamlit as st |
|
|
import numpy as np |
|
|
import librosa |
|
|
from tensorflow.keras.models import load_model |
|
|
import faiss |
|
|
import google.generativeai as genai |
|
|
from gtts import gTTS |
|
|
from io import BytesIO |
|
|
|
|
|
|
|
|
def extract_mfcc(audio_path, n_mfcc=13, target_sr=8000): |
|
|
try: |
|
|
audio, sr = librosa.load(audio_path, sr=target_sr) |
|
|
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc) |
|
|
return np.mean(mfccs.T, axis=0) |
|
|
except Exception as e: |
|
|
st.error(f"Error loading audio file: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
ml_model = load_model("model (2).h5") |
|
|
|
|
|
|
|
|
def evaluate_recording(model, audio_path): |
|
|
mfcc = extract_mfcc(audio_path).reshape(1, 1, -1) |
|
|
return model.predict(mfcc) |
|
|
|
|
|
|
|
|
def create_faiss_index(embeddings): |
|
|
dimension = embeddings.shape[1] |
|
|
index = faiss.IndexFlatL2(dimension) |
|
|
index.add(embeddings.astype(np.float32)) |
|
|
return index |
|
|
|
|
|
|
|
|
def search_in_index(index, query_embedding, k=5): |
|
|
distances, indices = index.search(query_embedding.astype(np.float32), k) |
|
|
return indices, distances |
|
|
|
|
|
|
|
|
def clean_text(raw_text): |
|
|
clean_text = raw_text.replace('*', '') |
|
|
return clean_text.strip() |
|
|
|
|
|
|
|
|
def generate_concise_feedback(indices, distances, descriptions): |
|
|
feedback_prompt = """ |
|
|
User ki Azaan ko expert recitations ke saath compare karke, seedha aur aasaan Hyderabadi Urdu mai feedback dein. User ko unki recitation behtar karne ke liye mashwara dein aur unki overall performance ko 1 se 10 tak score karein.Dyhan se sirf Roman Urdu ka istemal karein. |
|
|
""" |
|
|
response = gemini_model.generate_content(feedback_prompt) |
|
|
feedback_text = response.text.strip() |
|
|
return feedback_text |
|
|
|
|
|
|
|
|
def text_to_audio(text): |
|
|
tts = gTTS(text=text, lang='en-IN') |
|
|
audio_file = BytesIO() |
|
|
tts.write_to_fp(audio_file) |
|
|
audio_file.seek(0) |
|
|
return audio_file |
|
|
|
|
|
|
|
|
st.sidebar.write("AIzaSyC710KECl8MZRTlmNHNgWnHsvmWOCX3ydk") |
|
|
st.title('Test Your Azaan Skills With AI') |
|
|
st.write('Welcome! Upload your Azaan recitation to get instant feedback to improve.') |
|
|
|
|
|
|
|
|
api_key = st.sidebar.text_input("Enter your Gemini API key", type="password") |
|
|
|
|
|
|
|
|
if api_key: |
|
|
|
|
|
genai.configure(api_key=api_key) |
|
|
gemini_model = genai.GenerativeModel('gemini-pro') |
|
|
|
|
|
st.write('Please upload your Azaan recitation below:') |
|
|
audio_file = st.file_uploader("Click here to upload your audio file", help="Upload your audio file of Azaan recitation.") |
|
|
|
|
|
if audio_file is not None: |
|
|
with st.spinner('Analyzing your recitation...'): |
|
|
audio_path = audio_file.name |
|
|
with open(audio_path, "wb") as f: |
|
|
f.write(audio_file.getbuffer()) |
|
|
|
|
|
embedding = evaluate_recording(ml_model, audio_path) |
|
|
|
|
|
|
|
|
reference_embeddings = np.random.rand(10, 13) |
|
|
index = create_faiss_index(reference_embeddings) |
|
|
indices, distances = search_in_index(index, embedding, k=3) |
|
|
|
|
|
descriptions = ["very good melody, but a bit fast", "perfect clear words", "slight timing issues"] |
|
|
feedback = generate_concise_feedback(indices, distances, descriptions) |
|
|
cleaned_feedback = clean_text(feedback) |
|
|
|
|
|
st.markdown('## Your Azaan Feedback') |
|
|
st.write(cleaned_feedback) |
|
|
|
|
|
|
|
|
audio_feedback = text_to_audio(cleaned_feedback) |
|
|
|
|
|
|
|
|
st.audio(audio_feedback, format="audio/mp3") |
|
|
|
|
|
st.success("Thank you for using our service! Please come back to track your progress.") |
|
|
else: |
|
|
st.sidebar.warning("Please enter your Gemini API key to use this service.") |
|
|
|