Spaces:

makhdoomnaeem
/

Spoken_English_Practice

Sleeping

File size: 4,321 Bytes

ce69e96
595953f
59892e7
ce69e96
7d14fa6
595953f
8641ba3
be7422d
 
59892e7
3dceea0
ce69e96
b1f9f9d
be7422d
 
 
 
e7ec41b
be7422d
 
 
415df60
2540048
5a1addb
788e261
 
 
 
 
 
5a1addb
788e261
 
 
5a1addb
be7422d
e7ec41b
be7422d
 
e7ec41b
 
 
 
 
 
 
 
 
2540048
e7ec41b
 
 
b7abd3a
e7ec41b
b7abd3a
 
e7ec41b
 
 
 
 
 
 
 
b7abd3a
0fdea7b
ce69e96
 
244ae39
be7422d
5a1addb
d2a7428
244ae39
788e261
b7abd3a
e7ec41b
1c0cc8c
d2a7428
415df60
 
 
7d55a5c
415df60
7d55a5c
ce69e96
 
 
 
 
 
 
 
 
d2a7428
244ae39
ce69e96
 
 
244ae39
ce69e96
3dceea0
 
 
ce69e96
244ae39
d2a7428
244ae39
3dceea0
0fdea7b
 
 
 
 
ce69e96
e7ec41b
ce69e96
3dceea0
2540048

import os
import streamlit as st
import sounddevice as sd
import vosk
from gtts import gTTS
import tempfile
import subprocess
import requests
import zipfile
from groq import Groq

# Initialize Groq client
GROQ_API_KEY = "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"  # Replace with your Groq API key
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Function to download and extract the Vosk model
def download_and_extract_model(model_url, model_path):
    try:
        st.write("Downloading Vosk model. Please wait...")
        response = requests.get(model_url, stream=True)
        if response.status_code != 200:
            raise Exception(f"Failed to download model: HTTP {response.status_code}")

        with tempfile.TemporaryDirectory() as tmp_dir:
            zip_path = os.path.join(tmp_dir, "model.zip")
            with open(zip_path, "wb") as file:
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk:
                        file.write(chunk)

            st.write("Extracting Vosk model...")
            with zipfile.ZipFile(zip_path, "r") as zip_ref:
                zip_ref.extractall(model_path)

        st.write("Vosk model setup completed.")
        return True
    except Exception as e:
        st.error(f"Failed to download or extract the Vosk model: {e}")
        return False

# Function to validate the Vosk model structure
def validate_model_files(model_path):
    required_files = [
        "am/final.mdl",
        "graph/phones/align_lexicon.int"
    ]
    missing_files = [f for f in required_files if not os.path.exists(os.path.join(model_path, f))]
    if missing_files:
        st.write(f"Missing required files: {', '.join(missing_files)}")
        return False
    return True

# Function to ensure the Vosk model is ready
def ensure_vosk_model(model_path, model_url):
    for attempt in range(3):  # Retry up to 3 times
        st.write(f"Validating Vosk model (Attempt {attempt + 1}/3)...")
        if validate_model_files(model_path):
            return True
        st.error("Validation failed. Re-downloading the model...")
        if not download_and_extract_model(model_url, model_path):
            return False
    st.error("Failed to download or validate the Vosk model after multiple attempts.")
    return False

# Function to capture audio and process speech
def capture_audio():
    try:
        model_path = "model"
        model_url = "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"

        # Ensure Vosk model is valid
        if not os.path.exists(model_path):
            os.makedirs(model_path, exist_ok=True)
        if not ensure_vosk_model(model_path, model_url):
            return "Error: Unable to download or validate the Vosk model."

        # Initialize Vosk model
        try:
            model = vosk.Model(model_path)
        except Exception as e:
            return f"Error: Failed to initialize the Vosk model. Details: {e}"

        # Record audio
        samplerate = 16000
        duration = 5  # seconds
        st.write("Recording... Speak now!")
        audio = sd.rec(int(samplerate * duration), samplerate=samplerate, channels=1, dtype="int16")
        sd.wait()

        # Speech-to-text using Vosk
        rec = vosk.KaldiRecognizer(model, samplerate)
        if rec.AcceptWaveform(audio.tobytes()):
            result = eval(rec.Result())
            return result.get("text", "Unable to recognize speech.")
        else:
            return "Unable to recognize speech."
    except Exception as e:
        return f"Error capturing audio: {str(e)}"

# Streamlit interface
st.title("Spoken English Practice")
if st.button("Speak Now"):
    user_input = capture_audio()
    if user_input.startswith("Error"):
        st.error(user_input)
    else:
        st.write(f"You said: {user_input}")
        corrected = client.chat.completions.create(
            messages=[{"role": "user", "content": f"Correct the grammar: {user_input}"}],
            model="llama-3.3-70b-versatile",
            stream=False
        ).choices[0].message.content.strip()
        st.write(f"Corrected: {corrected}")
        # Speak the corrected text
        speak_text(corrected)

st.write("Ready to practice? Click 'Speak Now'!")