import gradio as gr
import json
import re
from collections import Counter

STOPWORDS = {
    "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
    "of", "with", "is", "are", "was", "were", "be", "been", "by", "from",
    "that", "this", "it", "as", "your", "you", "we", "they", "he", "she"
}

POSITIVE_WORDS = {
    "good", "great", "excellent", "amazing", "wonderful", "fantastic", "love",
    "loved", "like", "happy", "best", "awesome", "nice", "perfect", "beautiful",
    "brilliant", "positive", "pleased", "delighted", "enjoy", "enjoyed", "superb",
    "outstanding", "favorite", "recommend", "helpful", "impressive", "win", "won"
}

NEGATIVE_WORDS = {
    "bad", "terrible", "awful", "horrible", "hate", "hated", "dislike", "poor",
    "worst", "ugly", "disappointing", "disappointed", "sad", "angry", "broken",
    "negative", "useless", "boring", "annoying", "fail", "failed", "wrong",
    "slow", "difficult", "confusing", "lacking", "problem", "issue", "bug"
}

NEGATIONS = {"not", "no", "never", "n't", "without", "hardly", "barely", "neither", "nor"}

LANGUAGE_STOPWORDS = {
    "English": {"the", "and", "is", "in", "to", "of", "that", "it", "for", "with", "was", "on", "are", "you", "this"},
    "Spanish": {"el", "la", "de", "que", "y", "los", "en", "un", "una", "es", "por", "con", "para", "del", "las"},
    "French": {"le", "la", "les", "de", "et", "un", "une", "des", "est", "que", "en", "dans", "pour", "qui", "avec"},
    "German": {"der", "die", "das", "und", "ist", "den", "ein", "eine", "zu", "mit", "auf", "fur", "nicht", "von", "im"},
    "Italian": {"il", "la", "di", "che", "un", "una", "per", "con", "non", "sono", "del", "della", "gli", "le", "ed"},
    "Portuguese": {"o", "a", "de", "que", "do", "da", "em", "um", "uma", "para", "com", "nao", "os", "as", "se"},
}

COMMON_MISSPELLINGS = {
    "teh": "the", "recieve": "receive", "seperate": "separate", "definately": "definitely",
    "occured": "occurred", "untill": "until", "wich": "which", "thier": "their",
    "alot": "a lot", "becuase": "because", "wierd": "weird", "accomodate": "accommodate",
    "neccessary": "necessary", "occassion": "occasion", "tommorow": "tomorrow", "grammer": "grammar",
    "beleive": "believe", "calender": "calendar", "concious": "conscious", "embarass": "embarrass",
    "existance": "existence", "goverment": "government", "independant": "independent",
    "occurence": "occurrence", "priviledge": "privilege", "publically": "publicly",
    "recomend": "recommend", "refered": "referred", "succesful": "successful", "truely": "truly",
    "writting": "writing", "adress": "address", "arguement": "argument", "commitee": "committee",
    "enviroment": "environment", "febuary": "february", "foriegn": "foreign", "gaurd": "guard",
    "harrass": "harass", "liason": "liaison", "maintainance": "maintenance", "mispell": "misspell",
    "noticable": "noticeable", "persistant": "persistent", "posession": "possession",
    "questionaire": "questionnaire", "rythm": "rhythm", "supercede": "supersede",
    "threshhold": "threshold", "tendancy": "tendency", "vaccuum": "vacuum",
}

def _tokenize(text: str):
    return [w.strip(".,!?;:\"'()[]").lower() for w in text.split()]

def _split_sentences(text: str):
    return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text.strip()) if s.strip()]

def analyze_text(text: str) -> str:
    """Analyze text and return statistics.

    Args:
        text: The input text to analyze

    Returns:
        JSON string with analysis results
    """
    words = text.split()
    chars = len(text)
    chars_no_spaces = len(text.replace(" ", ""))
    sentences = text.count(".") + text.count("!") + text.count("?")

    avg_word_length = round(chars_no_spaces / len(words), 2) if words else 0
    avg_sentence_length = round(len(words) / max(sentences, 1), 2)

    return json.dumps({
        "total_characters": chars,
        "characters_without_spaces": chars_no_spaces,
        "total_words": len(words),
        "total_sentences": max(sentences, 1),
        "average_word_length": avg_word_length,
        "average_sentence_length": avg_sentence_length
    }, indent=2)

def extract_keywords(text: str, count: int = 5) -> str:
    """Extract keywords (most common words) from text.

    Args:
        text: The input text
        count: Number of keywords to return (default 5)

    Returns:
        JSON string with keywords and frequencies
    """
    words = text.lower().split()
    filtered = [w.strip(".,!?;:") for w in words if w.lower() not in STOPWORDS]

    word_freq = Counter(filtered)
    top_words = word_freq.most_common(count)

    return json.dumps({
        "keywords": [{"word": w, "frequency": f} for w, f in top_words]
    }, indent=2)

def check_reading_level(text: str) -> str:
    """Estimate reading difficulty level.

    Args:
        text: The input text

    Returns:
        JSON string with reading level estimate
    """
    sentences = max(text.count(".") + text.count("!") + text.count("?"), 1)
    words = len(text.split())
    vowels = "aeiou"
    syllables = sum(1 for c in text.lower() if c in vowels)

    if words == 0:
        return json.dumps({"error": "No text to analyze"})

    grade = max(0, (0.39 * (words / sentences)) + (11.8 * (syllables / words)) - 15.59)

    if grade < 6:
        level = "Elementary School"
    elif grade < 9:
        level = "Middle School"
    elif grade < 13:
        level = "High School"
    else:
        level = "College/Academic"

    return json.dumps({
        "grade_level": round(grade, 1),
        "reading_level": level
    }, indent=2)

def analyze_sentiment(text: str) -> str:
    """Detect the emotional tone of text as positive, negative, or neutral.

    Uses a keyword lexicon with simple negation handling.

    Args:
        text: The input text to analyze

    Returns:
        JSON string with sentiment label, score, and matched word counts
    """
    words = _tokenize(text)
    if not words:
        return json.dumps({"error": "No text to analyze"}, indent=2)

    positive = negative = 0
    for i, word in enumerate(words):
        negated = i > 0 and words[i - 1] in NEGATIONS
        if word in POSITIVE_WORDS:
            negative += 1 if negated else 0
            positive += 0 if negated else 1
        elif word in NEGATIVE_WORDS:
            positive += 1 if negated else 0
            negative += 0 if negated else 1

    total = positive + negative
    score = round((positive - negative) / total, 2) if total else 0.0

    if score > 0.1:
        label = "positive"
    elif score < -0.1:
        label = "negative"
    else:
        label = "neutral"

    return json.dumps({
        "sentiment": label,
        "score": score,
        "positive_matches": positive,
        "negative_matches": negative
    }, indent=2)

def detect_language(text: str) -> str:
    """Identify the most likely language of the text.

    Compares the text against common stopwords for several European languages.

    Args:
        text: The input text

    Returns:
        JSON string with the detected language, confidence, and per-language scores
    """
    words = _tokenize(text)
    if not words:
        return json.dumps({"error": "No text to analyze"}, indent=2)

    scores = {
        lang: sum(1 for w in words if w in stopwords)
        for lang, stopwords in LANGUAGE_STOPWORDS.items()
    }
    best = max(scores, key=scores.get)
    confidence = round(scores[best] / len(words), 2)

    return json.dumps({
        "language": best if scores[best] > 0 else "Unknown",
        "confidence": confidence,
        "scores": scores
    }, indent=2)

def summarize_text(text: str, sentence_count: int = 2) -> str:
    """Create a short extractive summary by selecting the most important sentences.

    Sentences are ranked by the average frequency of their non-stopword terms.

    Args:
        text: The input text
        sentence_count: Number of sentences to keep in the summary (default 2)

    Returns:
        JSON string with the summary and original sentence count
    """
    sentences = _split_sentences(text)
    if len(sentences) <= sentence_count:
        return json.dumps({
            "summary": text.strip(),
            "original_sentences": len(sentences)
        }, indent=2)

    freq = Counter(w for w in _tokenize(text) if w and w not in STOPWORDS)

    def sentence_score(sentence: str) -> float:
        tokens = [w for w in _tokenize(sentence) if w]
        return sum(freq[w] for w in tokens) / len(tokens) if tokens else 0

    ranked = sorted(range(len(sentences)), key=lambda i: sentence_score(sentences[i]), reverse=True)
    chosen = sorted(ranked[:sentence_count])
    summary = " ".join(sentences[i] for i in chosen)

    return json.dumps({
        "summary": summary,
        "original_sentences": len(sentences)
    }, indent=2)

def check_spelling(text: str) -> str:
    """Identify commonly misspelled words and suggest corrections.

    Checks each word against a dictionary of frequent English misspellings.

    Args:
        text: The input text

    Returns:
        JSON string with the count and list of misspelled words with suggestions
    """
    found = []
    seen = set()
    for word in text.split():
        clean = word.strip(".,!?;:\"'()[]").lower()
        if clean in COMMON_MISSPELLINGS and clean not in seen:
            seen.add(clean)
            found.append({"word": clean, "suggestion": COMMON_MISSPELLINGS[clean]})

    return json.dumps({
        "misspelled_count": len(found),
        "misspelled_words": found
    }, indent=2)

def readability_tips(text: str) -> str:
    """Suggest concrete improvements for clarity and readability.

    Flags long sentences, long words, excessive adverbs, passive voice, and filler words.

    Args:
        text: The input text

    Returns:
        JSON string with a list of actionable writing tips
    """
    sentences = _split_sentences(text)
    words = text.split()
    if not words:
        return json.dumps({"error": "No text to analyze"}, indent=2)

    tips = []

    long_sentences = [s for s in sentences if len(s.split()) > 25]
    if long_sentences:
        tips.append(f"{len(long_sentences)} sentence(s) exceed 25 words; consider splitting them for clarity.")

    long_words = [w for w in words if len(w.strip(".,!?;:\"'()[]")) >= 13]
    if long_words:
        tips.append(f"{len(long_words)} long word(s) (13+ characters); simpler synonyms may read more easily.")

    adverbs = [w for w in words if w.strip(".,!?;:\"'()[]").lower().endswith("ly")]
    if len(adverbs) > max(1, len(words) // 20):
        tips.append(f"Frequent -ly adverbs ({len(adverbs)}); trimming some tightens the prose.")

    passive = len(re.findall(r"\b(?:was|were|been|be|is|are)\s+\w+ed\b", text.lower()))
    if passive:
        tips.append(f"{passive} possible passive-voice construction(s); active voice is usually clearer.")

    fillers = {"very", "really", "just", "actually", "basically", "literally", "quite"}
    filler_hits = [w for w in words if w.strip(".,!?;:\"'()[]").lower() in fillers]
    if filler_hits:
        tips.append(f"{len(filler_hits)} filler word(s) detected (e.g. very, really, just); removing them strengthens writing.")

    if not tips:
        tips.append("No major readability issues detected. Nice and clear!")

    return json.dumps({"tips": tips}, indent=2)

# Create web UI
with gr.Blocks(title="Text Processor") as demo:
    gr.Markdown("# Text Processing Tools")
    gr.Markdown("Analyze statistics, extract keywords, gauge sentiment, detect language, summarize, spell-check, and get readability tips.")

    with gr.Tab("Analyze Text"):
        text_input1 = gr.Textbox(label="Enter text", lines=8, placeholder="Paste your text here...")
        analysis_output = gr.Textbox(label="Analysis Results", lines=8)
        gr.Button("Analyze", size="lg").click(analyze_text, text_input1, analysis_output)

    with gr.Tab("Extract Keywords"):
        text_input2 = gr.Textbox(label="Enter text", lines=8)
        count_input = gr.Slider(1, 20, value=5, step=1, label="Number of keywords")
        keywords_output = gr.Textbox(label="Keywords", lines=8)
        gr.Button("Extract", size="lg").click(extract_keywords, [text_input2, count_input], keywords_output)

    with gr.Tab("Reading Level"):
        text_input3 = gr.Textbox(label="Enter text", lines=8)
        level_output = gr.Textbox(label="Reading Level Analysis", lines=5)
        gr.Button("Check Level", size="lg").click(check_reading_level, text_input3, level_output)

    with gr.Tab("Sentiment"):
        text_input4 = gr.Textbox(label="Enter text", lines=8)
        sentiment_output = gr.Textbox(label="Sentiment Analysis", lines=6)
        gr.Button("Analyze Sentiment", size="lg").click(analyze_sentiment, text_input4, sentiment_output)

    with gr.Tab("Language"):
        text_input5 = gr.Textbox(label="Enter text", lines=8)
        language_output = gr.Textbox(label="Detected Language", lines=8)
        gr.Button("Detect Language", size="lg").click(detect_language, text_input5, language_output)

    with gr.Tab("Summarize"):
        text_input6 = gr.Textbox(label="Enter text", lines=8)
        sentence_slider = gr.Slider(1, 10, value=2, step=1, label="Summary sentences")
        summary_output = gr.Textbox(label="Summary", lines=6)
        gr.Button("Summarize", size="lg").click(summarize_text, [text_input6, sentence_slider], summary_output)

    with gr.Tab("Spell Check"):
        text_input7 = gr.Textbox(label="Enter text", lines=8)
        spelling_output = gr.Textbox(label="Spelling Results", lines=8)
        gr.Button("Check Spelling", size="lg").click(check_spelling, text_input7, spelling_output)

    with gr.Tab("Readability Tips"):
        text_input8 = gr.Textbox(label="Enter text", lines=8)
        tips_output = gr.Textbox(label="Readability Tips", lines=8)
        gr.Button("Get Tips", size="lg").click(readability_tips, text_input8, tips_output)

if __name__ == "__main__":
    demo.launch(mcp_server=True)