import gradio as gr
from gtts import gTTS
import tempfile
import difflib
import pandas as pd
from Levenshtein import distance as lev_distance
import whisper
import string

# Load Whisper model once (choose "small" or "medium" for better results)
#model = whisper.load_model("small")
model = whisper.load_model("large-v3")

def play_text(text):
    tts = gTTS(text=text, lang='hi', slow=False)
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
    tts.save(temp_file.name)
    return temp_file.name

def get_error_type(asr_word, correct_word):
    if not asr_word:
        return "Missing word"
    if not correct_word:
        return "अतिरिक्त शब्द"
    if lev_distance(asr_word, correct_word) <= 2:
        return "उच्चारण दोष (Pronunciation Errors) "
    set1, set2 = set(asr_word), set(correct_word)
    if set1 & set2:
        return "Phonetic/Matra error"
    return "Substitution/Distorted"

def compare_hindi_sentences(expected, transcribed):
    expected_words = expected.strip().split()
    expected_clean = expected.translate(str.maketrans('', '', string.punctuation))
    expected_words = expected_clean.strip().split()
    transcribed = transcribed.translate(str.maketrans('', '', string.punctuation))
    transcribed_words = transcribed.strip().split()
    matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
    errors = []
    for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
        if opcode == "equal":
            continue
        elif opcode == "replace":
            for k in range(max(i2 - i1, j2 - j1)):
                asr_word = transcribed_words[i1 + k] if i1 + k < i2 else ""
                correct_word = expected_words[j1 + k] if j1 + k < j2 else ""
                error_type = get_error_type(asr_word, correct_word)
                errors.append((asr_word, correct_word, error_type))
        elif opcode == "insert":
            for k in range(j1, j2):
                errors.append(("", expected_words[k], "Missing word"))
        elif opcode == "delete":
            for k in range(i1, i2):
                errors.append((transcribed_words[k], "", "Extra word"))
    return errors

def calculate_accuracy(expected, transcribed):
    expected_words = expected.strip().split()
    transcribed = transcribed.translate(str.maketrans('', '', string.punctuation))
    transcribed  = transcribed.replace(',',' ')
    transcribed_words = transcribed.strip().split()
    matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
    correct = 0
    total = len(expected_words)
    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
        if tag == 'equal':
            correct += (j2-j1)
    accuracy = (correct / total) * 100 if total > 0 else 0
    return round(accuracy, 2)

def transcribe_audio(audio_path, original_text):
    try:
        # Use Whisper for transcription
        result = model.transcribe(audio_path, language='hi')
        transcription = result['text'].strip()
        # Error analysis
        errors = compare_hindi_sentences(original_text, transcription)
        df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
        # Speaking speed
        transcribed_words = transcription.strip().split()  
        duration = result['segments'][-1]['end'] if result.get('segments') else 1.0
        speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
        # Accuracy
        accuracy = calculate_accuracy(original_text, transcription)
        result_dict = {
            "📝 Transcribed Text": transcription,
            "⏱️ Speaking Speed (words/sec)": speed,
            "✅ Reading Accuracy (%)": accuracy,
        }
        return result_dict, df_errors
    except Exception as e:
        return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])

with gr.Blocks() as app:
    gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (OpenAI Whisper)")
    with gr.Row():
        input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
        play_button = gr.Button("🔊 Listen to Text")
        audio_output = gr.Audio(label="Text-to-Speech Output", type="filepath")
    play_button.click(play_text, inputs=input_text, outputs=audio_output)

    gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
    audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
    submit_button = gr.Button("✅ Submit Recording for Checking")
    output = gr.JSON(label="Results")
    error_table = gr.Dataframe(label="गलती तालिका (Error Table)")
    submit_button.click(
        transcribe_audio,
        inputs=[audio_input, input_text],
        outputs=[output, error_table]
    )

app.launch()