File size: 4,486 Bytes
764b11e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58c70a1
764b11e
 
 
 
 
 
 
1bc8e15
764b11e
 
 
 
 
 
6d4368c
764b11e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import streamlit as st
import numpy as np
import librosa
from tensorflow.keras.models import load_model
import faiss
import google.generativeai as genai
from gtts import gTTS
from io import BytesIO

# Function to extract MFCCs
def extract_mfcc(audio_path, n_mfcc=13, target_sr=8000):
    try:
        audio, sr = librosa.load(audio_path, sr=target_sr)  # Load audio with lower sample rate
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
        return np.mean(mfccs.T, axis=0)
    except Exception as e:
        st.error(f"Error loading audio file: {e}")
        return None

# Load the machine learning model
ml_model = load_model("model (2).h5")

# Function to evaluate recordings
def evaluate_recording(model, audio_path):
    mfcc = extract_mfcc(audio_path).reshape(1, 1, -1)
    return model.predict(mfcc)

# Function to create a FAISS Index
def create_faiss_index(embeddings):
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings.astype(np.float32))
    return index

# Function to search in the index
def search_in_index(index, query_embedding, k=5):
    distances, indices = index.search(query_embedding.astype(np.float32), k)
    return indices, distances

# Function to clean text, removing unwanted characters
def clean_text(raw_text):
    clean_text = raw_text.replace('*', '')  # Example: remove asterisks
    return clean_text.strip()

# Function to generate concise feedback using Gemini API
def generate_concise_feedback(indices, distances, descriptions):
    feedback_prompt = """
    User ki Azaan ko expert recitations ke saath compare karke, seedha aur aasaan Hyderabadi Urdu mai feedback dein. User ko unki recitation behtar karne ke liye mashwara dein aur unki overall performance ko 1 se 10 tak score karein.Dyhan se sirf Roman Urdu ka istemal karein.
    """
    response = gemini_model.generate_content(feedback_prompt)
    feedback_text = response.text.strip()
    return feedback_text

# Function to convert feedback to audio
def text_to_audio(text):
    tts = gTTS(text=text, lang='en-IN')
    audio_file = BytesIO()  # Create an in-memory file
    tts.write_to_fp(audio_file)  # Write TTS output to the in-memory file
    audio_file.seek(0)  # Move the cursor to the beginning of the file
    return audio_file

# Streamlit app setup
st.sidebar.write("AIzaSyC710KECl8MZRTlmNHNgWnHsvmWOCX3ydk")
st.title('Test Your Azaan Skills With AI')
st.write('Welcome! Upload your Azaan recitation to get instant feedback to improve.')

# Sidebar for API key input
api_key = st.sidebar.text_input("Enter your Gemini API key", type="password")

# Proceed only if the API key is entered
if api_key:
    # Configure Gemini with the user-provided API key
    genai.configure(api_key=api_key)
    gemini_model = genai.GenerativeModel('gemini-pro')

    st.write('Please upload your Azaan recitation below:')
    audio_file = st.file_uploader("Click here to upload your audio file", help="Upload your audio file of Azaan recitation.")

    if audio_file is not None:
        with st.spinner('Analyzing your recitation...'):
            audio_path = audio_file.name
            with open(audio_path, "wb") as f:
                f.write(audio_file.getbuffer())
            
            embedding = evaluate_recording(ml_model, audio_path)
            
            # Simulate reference embeddings for demonstration purposes
            reference_embeddings = np.random.rand(10, 13)  # Replace with actual data in production
            index = create_faiss_index(reference_embeddings)
            indices, distances = search_in_index(index, embedding, k=3)
            
            descriptions = ["very good melody, but a bit fast", "perfect clear words", "slight timing issues"]
            feedback = generate_concise_feedback(indices, distances, descriptions)
            cleaned_feedback = clean_text(feedback)  # Clean the text
            
            st.markdown('## Your Azaan Feedback')
            st.write(cleaned_feedback)  # Display cleaned text
            
            # Convert cleaned feedback to audio and play it
            audio_feedback = text_to_audio(cleaned_feedback)
            
            # Play audio feedback in Streamlit
            st.audio(audio_feedback, format="audio/mp3")
            
            st.success("Thank you for using our service! Please come back to track your progress.")
else:
    st.sidebar.warning("Please enter your Gemini API key to use this service.")