Spaces:

miesnerjacob
/

text-processor-mcp

Sleeping

File size: 11,752 Bytes

from mcp.server.fastmcp import FastMCP
import json
import re
from collections import Counter

mcp = FastMCP("text-processor")

STOPWORDS = {
    "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
    "of", "with", "is", "are", "was", "were", "be", "been", "by", "from",
    "that", "this", "it", "as", "your", "you", "we", "they", "he", "she"
}

POSITIVE_WORDS = {
    "good", "great", "excellent", "amazing", "wonderful", "fantastic", "love",
    "loved", "like", "happy", "best", "awesome", "nice", "perfect", "beautiful",
    "brilliant", "positive", "pleased", "delighted", "enjoy", "enjoyed", "superb",
    "outstanding", "favorite", "recommend", "helpful", "impressive", "win", "won"
}

NEGATIVE_WORDS = {
    "bad", "terrible", "awful", "horrible", "hate", "hated", "dislike", "poor",
    "worst", "ugly", "disappointing", "disappointed", "sad", "angry", "broken",
    "negative", "useless", "boring", "annoying", "fail", "failed", "wrong",
    "slow", "difficult", "confusing", "lacking", "problem", "issue", "bug"
}

NEGATIONS = {"not", "no", "never", "n't", "without", "hardly", "barely", "neither", "nor"}

LANGUAGE_STOPWORDS = {
    "English": {"the", "and", "is", "in", "to", "of", "that", "it", "for", "with", "was", "on", "are", "you", "this"},
    "Spanish": {"el", "la", "de", "que", "y", "los", "en", "un", "una", "es", "por", "con", "para", "del", "las"},
    "French": {"le", "la", "les", "de", "et", "un", "une", "des", "est", "que", "en", "dans", "pour", "qui", "avec"},
    "German": {"der", "die", "das", "und", "ist", "den", "ein", "eine", "zu", "mit", "auf", "fur", "nicht", "von", "im"},
    "Italian": {"il", "la", "di", "che", "un", "una", "per", "con", "non", "sono", "del", "della", "gli", "le", "ed"},
    "Portuguese": {"o", "a", "de", "que", "do", "da", "em", "um", "uma", "para", "com", "nao", "os", "as", "se"},
}

COMMON_MISSPELLINGS = {
    "teh": "the", "recieve": "receive", "seperate": "separate", "definately": "definitely",
    "occured": "occurred", "untill": "until", "wich": "which", "thier": "their",
    "alot": "a lot", "becuase": "because", "wierd": "weird", "accomodate": "accommodate",
    "neccessary": "necessary", "occassion": "occasion", "tommorow": "tomorrow", "grammer": "grammar",
    "beleive": "believe", "calender": "calendar", "concious": "conscious", "embarass": "embarrass",
    "existance": "existence", "goverment": "government", "independant": "independent",
    "occurence": "occurrence", "priviledge": "privilege", "publically": "publicly",
    "recomend": "recommend", "refered": "referred", "succesful": "successful", "truely": "truly",
    "writting": "writing", "adress": "address", "arguement": "argument", "commitee": "committee",
    "enviroment": "environment", "febuary": "february", "foriegn": "foreign", "gaurd": "guard",
    "harrass": "harass", "liason": "liaison", "maintainance": "maintenance", "mispell": "misspell",
    "noticable": "noticeable", "persistant": "persistent", "posession": "possession",
    "questionaire": "questionnaire", "rythm": "rhythm", "supercede": "supersede",
    "threshhold": "threshold", "tendancy": "tendency", "vaccuum": "vacuum",
}

def _tokenize(text: str):
    return [w.strip(".,!?;:\"'()[]").lower() for w in text.split()]

def _split_sentences(text: str):
    return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text.strip()) if s.strip()]

@mcp.tool()
def analyze_text(text: str) -> str:
    """Analyze text and return statistics.

    Args:
        text: The input text to analyze

    Returns:
        JSON string with analysis results
    """
    words = text.split()
    chars = len(text)
    chars_no_spaces = len(text.replace(" ", ""))
    sentences = text.count(".") + text.count("!") + text.count("?")

    avg_word_length = round(chars_no_spaces / len(words), 2) if words else 0
    avg_sentence_length = round(len(words) / max(sentences, 1), 2)

    return json.dumps({
        "total_characters": chars,
        "characters_without_spaces": chars_no_spaces,
        "total_words": len(words),
        "total_sentences": max(sentences, 1),
        "average_word_length": avg_word_length,
        "average_sentence_length": avg_sentence_length,
        "unique_words": len(set(word.lower() for word in words))
    })

@mcp.tool()
def extract_keywords(text: str, count: int = 5) -> str:
    """Extract keywords (most common words) from text.

    Args:
        text: The input text
        count: Number of keywords to return (default 5)

    Returns:
        JSON string with keywords and frequencies
    """
    words = text.lower().split()
    filtered = [w.strip(".,!?;:") for w in words if w.lower() not in STOPWORDS]

    word_freq = Counter(filtered)
    top_words = word_freq.most_common(count)

    return json.dumps({
        "keywords": [{"word": w, "frequency": f} for w, f in top_words]
    })

@mcp.tool()
def check_reading_level(text: str) -> str:
    """Estimate reading difficulty level.

    Args:
        text: The input text

    Returns:
        JSON string with reading level estimate
    """
    sentences = max(text.count(".") + text.count("!") + text.count("?"), 1)
    words = len(text.split())
    syllables = text.count("a") + text.count("e") + text.count("i") + text.count("o") + text.count("u")

    if words == 0:
        return json.dumps({"error": "No text to analyze"})

    grade = (0.39 * (words / sentences)) + (11.8 * (syllables / words)) - 15.59
    grade = max(0, round(grade, 1))

    if grade < 6:
        level = "Elementary School"
    elif grade < 9:
        level = "Middle School"
    elif grade < 13:
        level = "High School"
    else:
        level = "College/Academic"

    return json.dumps({
        "grade_level": grade,
        "reading_level": level
    })

@mcp.tool()
def reverse_text(text: str) -> str:
    """Reverse a string.

    Args:
        text: The input text

    Returns:
        The reversed text
    """
    return text[::-1]

@mcp.tool()
def analyze_sentiment(text: str) -> str:
    """Detect the emotional tone of text as positive, negative, or neutral.

    Uses a keyword lexicon with simple negation handling.

    Args:
        text: The input text to analyze

    Returns:
        JSON string with sentiment label, score, and matched word counts
    """
    words = _tokenize(text)
    if not words:
        return json.dumps({"error": "No text to analyze"})

    positive = negative = 0
    for i, word in enumerate(words):
        negated = i > 0 and words[i - 1] in NEGATIONS
        if word in POSITIVE_WORDS:
            negative += 1 if negated else 0
            positive += 0 if negated else 1
        elif word in NEGATIVE_WORDS:
            positive += 1 if negated else 0
            negative += 0 if negated else 1

    total = positive + negative
    score = round((positive - negative) / total, 2) if total else 0.0

    if score > 0.1:
        label = "positive"
    elif score < -0.1:
        label = "negative"
    else:
        label = "neutral"

    return json.dumps({
        "sentiment": label,
        "score": score,
        "positive_matches": positive,
        "negative_matches": negative
    })

@mcp.tool()
def detect_language(text: str) -> str:
    """Identify the most likely language of the text.

    Compares the text against common stopwords for several European languages.

    Args:
        text: The input text

    Returns:
        JSON string with the detected language, confidence, and per-language scores
    """
    words = _tokenize(text)
    if not words:
        return json.dumps({"error": "No text to analyze"})

    scores = {
        lang: sum(1 for w in words if w in stopwords)
        for lang, stopwords in LANGUAGE_STOPWORDS.items()
    }
    best = max(scores, key=scores.get)
    confidence = round(scores[best] / len(words), 2)

    return json.dumps({
        "language": best if scores[best] > 0 else "Unknown",
        "confidence": confidence,
        "scores": scores
    })

@mcp.tool()
def summarize_text(text: str, sentence_count: int = 2) -> str:
    """Create a short extractive summary by selecting the most important sentences.

    Sentences are ranked by the average frequency of their non-stopword terms.

    Args:
        text: The input text
        sentence_count: Number of sentences to keep in the summary (default 2)

    Returns:
        JSON string with the summary and original sentence count
    """
    sentences = _split_sentences(text)
    if len(sentences) <= sentence_count:
        return json.dumps({
            "summary": text.strip(),
            "original_sentences": len(sentences)
        })

    freq = Counter(w for w in _tokenize(text) if w and w not in STOPWORDS)

    def sentence_score(sentence: str) -> float:
        tokens = [w for w in _tokenize(sentence) if w]
        return sum(freq[w] for w in tokens) / len(tokens) if tokens else 0

    ranked = sorted(range(len(sentences)), key=lambda i: sentence_score(sentences[i]), reverse=True)
    chosen = sorted(ranked[:sentence_count])
    summary = " ".join(sentences[i] for i in chosen)

    return json.dumps({
        "summary": summary,
        "original_sentences": len(sentences)
    })

@mcp.tool()
def check_spelling(text: str) -> str:
    """Identify commonly misspelled words and suggest corrections.

    Checks each word against a dictionary of frequent English misspellings.

    Args:
        text: The input text

    Returns:
        JSON string with the count and list of misspelled words with suggestions
    """
    found = []
    seen = set()
    for word in text.split():
        clean = word.strip(".,!?;:\"'()[]").lower()
        if clean in COMMON_MISSPELLINGS and clean not in seen:
            seen.add(clean)
            found.append({"word": clean, "suggestion": COMMON_MISSPELLINGS[clean]})

    return json.dumps({
        "misspelled_count": len(found),
        "misspelled_words": found
    })

@mcp.tool()
def readability_tips(text: str) -> str:
    """Suggest concrete improvements for clarity and readability.

    Flags long sentences, long words, excessive adverbs, passive voice, and filler words.

    Args:
        text: The input text

    Returns:
        JSON string with a list of actionable writing tips
    """
    sentences = _split_sentences(text)
    words = text.split()
    if not words:
        return json.dumps({"error": "No text to analyze"})

    tips = []

    long_sentences = [s for s in sentences if len(s.split()) > 25]
    if long_sentences:
        tips.append(f"{len(long_sentences)} sentence(s) exceed 25 words; consider splitting them for clarity.")

    long_words = [w for w in words if len(w.strip(".,!?;:\"'()[]")) >= 13]
    if long_words:
        tips.append(f"{len(long_words)} long word(s) (13+ characters); simpler synonyms may read more easily.")

    adverbs = [w for w in words if w.strip(".,!?;:\"'()[]").lower().endswith("ly")]
    if len(adverbs) > max(1, len(words) // 20):
        tips.append(f"Frequent -ly adverbs ({len(adverbs)}); trimming some tightens the prose.")

    passive = len(re.findall(r"\b(?:was|were|been|be|is|are)\s+\w+ed\b", text.lower()))
    if passive:
        tips.append(f"{passive} possible passive-voice construction(s); active voice is usually clearer.")

    fillers = {"very", "really", "just", "actually", "basically", "literally", "quite"}
    filler_hits = [w for w in words if w.strip(".,!?;:\"'()[]").lower() in fillers]
    if filler_hits:
        tips.append(f"{len(filler_hits)} filler word(s) detected (e.g. very, really, just); removing them strengthens writing.")

    if not tips:
        tips.append("No major readability issues detected. Nice and clear!")

    return json.dumps({"tips": tips})

if __name__ == "__main__":
    mcp.run()