Rename gemi2.py to app.py
Browse files- gemi2.py → app.py +107 -106
gemi2.py → app.py
RENAMED
|
@@ -1,106 +1,107 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
-
import numpy as np
|
| 3 |
-
import librosa
|
| 4 |
-
from tensorflow.keras.models import load_model
|
| 5 |
-
import faiss
|
| 6 |
-
import google.generativeai as genai
|
| 7 |
-
from gtts import gTTS
|
| 8 |
-
from io import BytesIO
|
| 9 |
-
|
| 10 |
-
# Function to extract MFCCs
|
| 11 |
-
def extract_mfcc(audio_path, n_mfcc=13, target_sr=8000):
|
| 12 |
-
try:
|
| 13 |
-
audio, sr = librosa.load(audio_path, sr=target_sr) # Load audio with lower sample rate
|
| 14 |
-
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
|
| 15 |
-
return np.mean(mfccs.T, axis=0)
|
| 16 |
-
except Exception as e:
|
| 17 |
-
st.error(f"Error loading audio file: {e}")
|
| 18 |
-
return None
|
| 19 |
-
|
| 20 |
-
# Load the machine learning model
|
| 21 |
-
ml_model = load_model(r"C:\Users\USER\Downloads\model (2).h5")
|
| 22 |
-
|
| 23 |
-
# Function to evaluate recordings
|
| 24 |
-
def evaluate_recording(model, audio_path):
|
| 25 |
-
mfcc = extract_mfcc(audio_path).reshape(1, 1, -1)
|
| 26 |
-
return model.predict(mfcc)
|
| 27 |
-
|
| 28 |
-
# Function to create a FAISS Index
|
| 29 |
-
def create_faiss_index(embeddings):
|
| 30 |
-
dimension = embeddings.shape[1]
|
| 31 |
-
index = faiss.IndexFlatL2(dimension)
|
| 32 |
-
index.add(embeddings.astype(np.float32))
|
| 33 |
-
return index
|
| 34 |
-
|
| 35 |
-
# Function to search in the index
|
| 36 |
-
def search_in_index(index, query_embedding, k=5):
|
| 37 |
-
distances, indices = index.search(query_embedding.astype(np.float32), k)
|
| 38 |
-
return indices, distances
|
| 39 |
-
|
| 40 |
-
# Function to clean text, removing unwanted characters
|
| 41 |
-
def clean_text(raw_text):
|
| 42 |
-
clean_text = raw_text.replace('*', '') # Example: remove asterisks
|
| 43 |
-
return clean_text.strip()
|
| 44 |
-
|
| 45 |
-
# Function to generate concise feedback using Gemini API
|
| 46 |
-
def generate_concise_feedback(indices, distances, descriptions):
|
| 47 |
-
feedback_prompt = """
|
| 48 |
-
User ki Azaan ko expert recitations ke saath compare karke, seedha aur aasaan Hyderabadi Urdu mai feedback dein. User ko unki recitation behtar karne ke liye mashwara dein aur unki overall performance ko 1 se 10 tak score karein. Roman Urdu ka istemal karein.
|
| 49 |
-
"""
|
| 50 |
-
response = gemini_model.generate_content(feedback_prompt)
|
| 51 |
-
feedback_text = response.text.strip()
|
| 52 |
-
return feedback_text
|
| 53 |
-
|
| 54 |
-
# Function to convert feedback to audio
|
| 55 |
-
def text_to_audio(text):
|
| 56 |
-
tts = gTTS(text=text, lang='en')
|
| 57 |
-
audio_file = BytesIO() # Create an in-memory file
|
| 58 |
-
tts.write_to_fp(audio_file) # Write TTS output to the in-memory file
|
| 59 |
-
audio_file.seek(0) # Move the cursor to the beginning of the file
|
| 60 |
-
return audio_file
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
st.
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
if
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
st.
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import numpy as np
|
| 3 |
+
import librosa
|
| 4 |
+
from tensorflow.keras.models import load_model
|
| 5 |
+
import faiss
|
| 6 |
+
import google.generativeai as genai
|
| 7 |
+
from gtts import gTTS
|
| 8 |
+
from io import BytesIO
|
| 9 |
+
|
| 10 |
+
# Function to extract MFCCs
|
| 11 |
+
def extract_mfcc(audio_path, n_mfcc=13, target_sr=8000):
|
| 12 |
+
try:
|
| 13 |
+
audio, sr = librosa.load(audio_path, sr=target_sr) # Load audio with lower sample rate
|
| 14 |
+
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
|
| 15 |
+
return np.mean(mfccs.T, axis=0)
|
| 16 |
+
except Exception as e:
|
| 17 |
+
st.error(f"Error loading audio file: {e}")
|
| 18 |
+
return None
|
| 19 |
+
|
| 20 |
+
# Load the machine learning model
|
| 21 |
+
ml_model = load_model(r"C:\Users\USER\Downloads\model (2).h5")
|
| 22 |
+
|
| 23 |
+
# Function to evaluate recordings
|
| 24 |
+
def evaluate_recording(model, audio_path):
|
| 25 |
+
mfcc = extract_mfcc(audio_path).reshape(1, 1, -1)
|
| 26 |
+
return model.predict(mfcc)
|
| 27 |
+
|
| 28 |
+
# Function to create a FAISS Index
|
| 29 |
+
def create_faiss_index(embeddings):
|
| 30 |
+
dimension = embeddings.shape[1]
|
| 31 |
+
index = faiss.IndexFlatL2(dimension)
|
| 32 |
+
index.add(embeddings.astype(np.float32))
|
| 33 |
+
return index
|
| 34 |
+
|
| 35 |
+
# Function to search in the index
|
| 36 |
+
def search_in_index(index, query_embedding, k=5):
|
| 37 |
+
distances, indices = index.search(query_embedding.astype(np.float32), k)
|
| 38 |
+
return indices, distances
|
| 39 |
+
|
| 40 |
+
# Function to clean text, removing unwanted characters
|
| 41 |
+
def clean_text(raw_text):
|
| 42 |
+
clean_text = raw_text.replace('*', '') # Example: remove asterisks
|
| 43 |
+
return clean_text.strip()
|
| 44 |
+
|
| 45 |
+
# Function to generate concise feedback using Gemini API
|
| 46 |
+
def generate_concise_feedback(indices, distances, descriptions):
|
| 47 |
+
feedback_prompt = """
|
| 48 |
+
User ki Azaan ko expert recitations ke saath compare karke, seedha aur aasaan Hyderabadi Urdu mai feedback dein. User ko unki recitation behtar karne ke liye mashwara dein aur unki overall performance ko 1 se 10 tak score karein. Roman Urdu ka istemal karein.
|
| 49 |
+
"""
|
| 50 |
+
response = gemini_model.generate_content(feedback_prompt)
|
| 51 |
+
feedback_text = response.text.strip()
|
| 52 |
+
return feedback_text
|
| 53 |
+
|
| 54 |
+
# Function to convert feedback to audio
|
| 55 |
+
def text_to_audio(text):
|
| 56 |
+
tts = gTTS(text=text, lang='en')
|
| 57 |
+
audio_file = BytesIO() # Create an in-memory file
|
| 58 |
+
tts.write_to_fp(audio_file) # Write TTS output to the in-memory file
|
| 59 |
+
audio_file.seek(0) # Move the cursor to the beginning of the file
|
| 60 |
+
return audio_file
|
| 61 |
+
|
| 62 |
+
st.sidebar.write("AIzaSyC710KECl8MZRTlmNHNgWnHsvmWOCX3ydk")
|
| 63 |
+
# Streamlit app setup
|
| 64 |
+
st.title('Test Your Azaan Skills With AI')
|
| 65 |
+
st.write('Welcome! Upload your Azaan recitation to get instant feedback to improve.')
|
| 66 |
+
|
| 67 |
+
# Sidebar for API key input
|
| 68 |
+
api_key = st.sidebar.text_input("Enter your Gemini API key", type="password")
|
| 69 |
+
|
| 70 |
+
# Proceed only if the API key is entered
|
| 71 |
+
if api_key:
|
| 72 |
+
# Configure Gemini with the user-provided API key
|
| 73 |
+
genai.configure(api_key=api_key)
|
| 74 |
+
gemini_model = genai.GenerativeModel('gemini-pro')
|
| 75 |
+
|
| 76 |
+
st.write('Please upload your Azaan recitation below:')
|
| 77 |
+
audio_file = st.file_uploader("Click here to upload your audio file", help="Upload your audio file of Azaan recitation.")
|
| 78 |
+
|
| 79 |
+
if audio_file is not None:
|
| 80 |
+
with st.spinner('Analyzing your recitation...'):
|
| 81 |
+
audio_path = audio_file.name
|
| 82 |
+
with open(audio_path, "wb") as f:
|
| 83 |
+
f.write(audio_file.getbuffer())
|
| 84 |
+
|
| 85 |
+
embedding = evaluate_recording(ml_model, audio_path)
|
| 86 |
+
|
| 87 |
+
# Simulate reference embeddings for demonstration purposes
|
| 88 |
+
reference_embeddings = np.random.rand(10, 13) # Replace with actual data in production
|
| 89 |
+
index = create_faiss_index(reference_embeddings)
|
| 90 |
+
indices, distances = search_in_index(index, embedding, k=3)
|
| 91 |
+
|
| 92 |
+
descriptions = ["very good melody, but a bit fast", "perfect clear words", "slight timing issues"]
|
| 93 |
+
feedback = generate_concise_feedback(indices, distances, descriptions)
|
| 94 |
+
cleaned_feedback = clean_text(feedback) # Clean the text
|
| 95 |
+
|
| 96 |
+
st.markdown('## Your Azaan Feedback')
|
| 97 |
+
st.write(cleaned_feedback) # Display cleaned text
|
| 98 |
+
|
| 99 |
+
# Convert cleaned feedback to audio and play it
|
| 100 |
+
audio_feedback = text_to_audio(cleaned_feedback)
|
| 101 |
+
|
| 102 |
+
# Play audio feedback in Streamlit
|
| 103 |
+
st.audio(audio_feedback, format="audio/mp3")
|
| 104 |
+
|
| 105 |
+
st.success("Thank you for using our service! Please come back to track your progress.")
|
| 106 |
+
else:
|
| 107 |
+
st.sidebar.warning("Please enter your Gemini API key to use this service.")
|