Spaces:

Khushi1612
/

FinalYearProject

Build error

App Files Files Community

Khushi1612 commited on Nov 13, 2025

Commit

894a53d

verified ·

1 Parent(s): 9c54e1a

Upload appg.py

Browse files

Files changed (1) hide show

appg.py +241 -0

appg.py ADDED Viewed

	@@ -0,0 +1,241 @@

+import streamlit as st
+import whisper
+import librosa
+import numpy as np
+from pydub import AudioSegment
+from pydub.effects import normalize, speedup
+from pydub.silence import split_on_silence
+import tempfile
+import os
+from gtts import gTTS
+import io
+from audio_recorder_streamlit import audio_recorder
+import torch
+from auralis import TTS, TTSRequest
+import random
+import time
+# Streamlit Page Config and CSS omitted for brevity — use your existing styles
+# Load Whisper model once
+@st.cache_resource
+def load_whisper_model():
+    return whisper.load_model("base")
+# Load Hugging Face XTTS2 voice cloning model once
+@st.cache_resource
+def load_xtts_model():
+    return TTS().from_pretrained("AstraMindAI/xtts2-gpt")
+whisper_model = load_whisper_model()
+xtts_model = load_xtts_model()
+def create_tts(text):
+    """Create TTS audio using gTTS."""
+    tts = gTTS(text, lang='en', slow=False)
+    audio_buffer = io.BytesIO()
+    tts.write_to_fp(audio_buffer)
+    audio_buffer.seek(0)
+    return audio_buffer
+@st.cache_data
+def preprocess_audio(file_obj):
+    audio = AudioSegment.from_file(file_obj)
+    audio = normalize(audio)
+    audio = audio.strip_silence(silence_thresh=-40, silence_len=500)
+    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
+        audio.export(tmp.name, format='wav', parameters=['-ar', '16000', '-ac', '1'])
+        audio_path = tmp.name
+    return audio_path
+def transcribe_audio(audio_path):
+    result = whisper_model.transcribe(audio_path, word_timestamps=True)
+    text = result["text"]
+    segments = result["segments"]
+    confidences = [seg.get('confidence', 0.5) for seg in segments]
+    avg_confidence = sum(confidences) / len(confidences) if confidences else 0.5
+    return text, segments, avg_confidence
+def analyze_prosody(audio_path, transcript, segments, confidence):
+    y, sr = librosa.load(audio_path, sr=16000)
+    total_duration = librosa.get_duration(y=y, sr=sr)
+    pitches, magnitudes = librosa.piptrack(y=y, sr=sr, fmin=75, fmax=300)
+    pitch_values = pitches[magnitudes > np.median(magnitudes)]
+    pitch_mean = np.mean(pitch_values[pitch_values > 0]) if len(pitch_values[pitch_values > 0]) > 0 else 150
+    words = len(transcript.split())
+    pace_wpm = (words / total_duration) * 60 if total_duration > 0 else 0
+    intervals = librosa.effects.split(y, top_db=20)
+    pause_ratio = 1 - (sum(end - start for start, end in intervals) / len(y) / sr)
+    return {
+        'pitch_mean': pitch_mean,
+        'pace_wpm': pace_wpm,
+        'pause_ratio': pause_ratio,
+        'confidence': confidence
+    }
+def pronunciation_feedback(transcript, segments, prosody):
+    pace_var = np.var([seg['end'] - seg['start'] for seg in segments])
+    pronun_score = (prosody['confidence'] * 100) * (1 - abs(prosody['pace_wpm'] - 120) / 120)
+    pronun_score = max(0, min(100, pronun_score - (pace_var * 10)))
+    return pronun_score
+def calculate_score(prosody, pronun_score, transcript):
+    pitch_score = min(100, max(0, (prosody['pitch_mean'] - 100) / 50 * 100))
+    pace_score = 100 if 100 < prosody['pace_wpm'] < 150 else 70
+    pause_score = 100 * (1 - prosody['pause_ratio'])
+    conf_score = prosody['confidence'] * 100
+    prosody_total = (pitch_score + pace_score + pause_score + conf_score) / 4
+    content_score = min(100, len(transcript.split()) * 0.5 + (prosody['confidence'] * 50))
+    total = (prosody_total * 0.5) + (pronun_score * 0.3) + (content_score * 0.2)
+    return min(100, total)
+def generate_voice_feedback(score, prosody, pronun_score):
+    pace = prosody['pace_wpm']
+    pauses = prosody['pause_ratio']
+    pitch = prosody['pitch_mean']
+    if score > 90:
+        opening = "Excellent work! Your speech was outstanding."
+    elif score > 80:
+        opening = "Great job! You have strong communication skills."
+    elif score > 60:
+        opening = "Good effort! You're making solid progress."
+    else:
+        opening = "Nice try! Keep practicing to improve."
+    feedback_parts = [opening]
+    if pace < 100:
+        feedback_parts.append(f"Your pace was {pace:.0f} words per minute. Try speaking faster, aiming for 120 to 140 words per minute.")
+    elif pace > 160:
+        feedback_parts.append(f"You spoke at {pace:.0f} words per minute, which is quite fast. Slow down to around 140 words per minute for better clarity.")
+    else:
+        feedback_parts.append(f"Your pace of {pace:.0f} words per minute is excellent.")
+    if pauses > 0.20:
+        feedback_parts.append(f"You paused {pauses:.0%} of the time. Try reducing pauses to 10 to 15 percent for smoother flow.")
+    elif pauses < 0.05:
+        feedback_parts.append("Consider adding brief pauses between ideas for better comprehension.")
+    else:
+        feedback_parts.append("Your use of pauses is well balanced.")
+    if pronun_score < 80:
+        feedback_parts.append("Work on clearer pronunciation by practicing tongue twisters and speaking more slowly.")
+    else:
+        feedback_parts.append("Your pronunciation is clear and articulate.")
+    feedback_parts.append("I've prepared an enhanced version of your speech with optimized pacing. Keep practicing!")
+    return " ".join(feedback_parts)
+def generate_cloned_voice_xtts(audio_path, cleaned_text):
+    request = TTSRequest(
+        text=cleaned_text,
+        speaker_files=[audio_path],
+        language="en"
+    )
+    out = xtts_model.generate_speech(request)
+    output_path = tempfile.mktemp(suffix=".wav")
+    out.save(output_path)
+    return output_path
+st.markdown('<div class="main-header"><h1><span class="status-indicator"></span>🎤 FLUENTRA AI</h1><h3>Your Voice-Activated Speech Coach</h3></div>', unsafe_allow_html=True)
+if not st.session_state.get('greeted', False):
+    greeting_text = "Hello! I am Fluentra, your personal speech coach. Click the microphone button and speak for 20 to 60 seconds. I will analyze your speech and help you improve."
+    st.markdown(f'<div class="voice-message">🔊 {greeting_text}</div>', unsafe_allow_html=True)
+    greeting_audio = create_tts(greeting_text)
+    st.audio(greeting_audio, format="audio/mp3")
+    st.session_state['greeted'] = True
+st.markdown("---")
+st.subheader("🎙️ Ready to Record")
+audio_bytes = audio_recorder(
+    text="Click to Start Recording",
+    recording_color="#00f7ff",
+    neutral_color="#4a5568",
+    icon_size="3x",
+    pause_threshold=2.0
+)
+if audio_bytes:
+    st.success("✅ Recording captured!")
+    st.audio(audio_bytes, format="audio/wav")
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp:
+        tmp.write(audio_bytes)
+        recorded_path = tmp.name
+    processing_msg = "Processing your speech. Please wait."
+    st.markdown(f'<div class="voice-message">🔊 {processing_msg}</div>', unsafe_allow_html=True)
+    processing_audio = create_tts(processing_msg)
+    st.audio(processing_audio, format="audio/mp3", autoplay=True)
+    with st.spinner("🧠 Analyzing..."):
+        audio_path = preprocess_audio(recorded_path)
+        transcript, segments, confidence = transcribe_audio(audio_path)
+        prosody = analyze_prosody(audio_path, transcript, segments, confidence)
+        pronun_score = pronunciation_feedback(transcript, segments, prosody)
+        score = calculate_score(prosody, pronun_score, transcript)
+        feedback_text = generate_voice_feedback(score, prosody, pronun_score)
+        feedback_audio = create_tts(feedback_text)
+        cleaned_text = " ".join([w for w in transcript.split() if w.lower() not in {"um", "uh", "like", "you know", "er", "ah", "so", "well"}])
+        cloned_voice_path = generate_cloned_voice_xtts(audio_path, cleaned_text)
+    st.session_state['analysis_count'] = st.session_state.get('analysis_count', 0) + 1
+    st.markdown("---")
+    st.subheader("💬 Fluentra's Feedback")
+    st.markdown(f'<div class="voice-message">🔊 {feedback_text}</div>', unsafe_allow_html=True)
+    st.audio(feedback_audio, format="audio/mp3")
+    st.markdown("---")
+    st.subheader("📊 Analysis Results")
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.markdown(f'<div class="metric-card"><h3>Overall Score</h3><h1>{score:.1f}/100</h1></div>', unsafe_allow_html=True)
+    with col2:
+        st.markdown(f'<div class="metric-card"><h3>Pace</h3><h1>{prosody["pace_wpm"]:.0f} WPM</h1></div>', unsafe_allow_html=True)
+    with col3:
+        st.markdown(f'<div class="metric-card"><h3>Pitch</h3><h1>{prosody["pitch_mean"]:.0f} Hz</h1></div>', unsafe_allow_html=True)
+    with col4:
+        st.markdown(f'<div class="metric-card"><h3>Confidence</h3><h1>{confidence:.0%}</h1></div>', unsafe_allow_html=True)
+    st.markdown("---")
+    st.subheader("✨ Your Enhanced Voice")
+    enhanced_msg = "Here is your speech with fillers removed and pace optimized."
+    st.markdown(f'<div class="voice-message">🔊 {enhanced_msg}</div>', unsafe_allow_html=True)
+    st.audio(cloned_voice_path, format="audio/wav")
+    st.markdown("---")
+    with st.expander("📝 View Transcription"):
+        st.info(transcript)
+    if st.session_state['analysis_count'] == 1:
+        closing = "Great start! Feel free to record again to track your improvement."
+    else:
+        closing = f"This is your {st.session_state['analysis_count']}th analysis. You're making progress!"
+    st.markdown(f'<div class="voice-message">🔊 {closing}</div>', unsafe_allow_html=True)
+    closing_audio = create_tts(closing)
+    st.audio(closing_audio, format="audio/mp3")
+    os.unlink(audio_path)
+    os.unlink(recorded_path)
+# Footer
+st.markdown("---")
+st.markdown("""
+<div style='text-align: center; color: #00f7ff; padding: 2rem;'>
+    <p>🎤 <strong>FLUENTRA AI</strong> - Voice-Activated Speech Coach</p>
+    <p>Powered by Whisper AI, Librosa & Google TTS | © 2025</p>
+</div>
+""", unsafe_allow_html=True)