Spaces:
Sleeping
Sleeping
| from mcp.server.fastmcp import FastMCP | |
| import json | |
| import re | |
| from collections import Counter | |
| mcp = FastMCP("text-processor") | |
| STOPWORDS = { | |
| "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", | |
| "of", "with", "is", "are", "was", "were", "be", "been", "by", "from", | |
| "that", "this", "it", "as", "your", "you", "we", "they", "he", "she" | |
| } | |
| POSITIVE_WORDS = { | |
| "good", "great", "excellent", "amazing", "wonderful", "fantastic", "love", | |
| "loved", "like", "happy", "best", "awesome", "nice", "perfect", "beautiful", | |
| "brilliant", "positive", "pleased", "delighted", "enjoy", "enjoyed", "superb", | |
| "outstanding", "favorite", "recommend", "helpful", "impressive", "win", "won" | |
| } | |
| NEGATIVE_WORDS = { | |
| "bad", "terrible", "awful", "horrible", "hate", "hated", "dislike", "poor", | |
| "worst", "ugly", "disappointing", "disappointed", "sad", "angry", "broken", | |
| "negative", "useless", "boring", "annoying", "fail", "failed", "wrong", | |
| "slow", "difficult", "confusing", "lacking", "problem", "issue", "bug" | |
| } | |
| NEGATIONS = {"not", "no", "never", "n't", "without", "hardly", "barely", "neither", "nor"} | |
| LANGUAGE_STOPWORDS = { | |
| "English": {"the", "and", "is", "in", "to", "of", "that", "it", "for", "with", "was", "on", "are", "you", "this"}, | |
| "Spanish": {"el", "la", "de", "que", "y", "los", "en", "un", "una", "es", "por", "con", "para", "del", "las"}, | |
| "French": {"le", "la", "les", "de", "et", "un", "une", "des", "est", "que", "en", "dans", "pour", "qui", "avec"}, | |
| "German": {"der", "die", "das", "und", "ist", "den", "ein", "eine", "zu", "mit", "auf", "fur", "nicht", "von", "im"}, | |
| "Italian": {"il", "la", "di", "che", "un", "una", "per", "con", "non", "sono", "del", "della", "gli", "le", "ed"}, | |
| "Portuguese": {"o", "a", "de", "que", "do", "da", "em", "um", "uma", "para", "com", "nao", "os", "as", "se"}, | |
| } | |
| COMMON_MISSPELLINGS = { | |
| "teh": "the", "recieve": "receive", "seperate": "separate", "definately": "definitely", | |
| "occured": "occurred", "untill": "until", "wich": "which", "thier": "their", | |
| "alot": "a lot", "becuase": "because", "wierd": "weird", "accomodate": "accommodate", | |
| "neccessary": "necessary", "occassion": "occasion", "tommorow": "tomorrow", "grammer": "grammar", | |
| "beleive": "believe", "calender": "calendar", "concious": "conscious", "embarass": "embarrass", | |
| "existance": "existence", "goverment": "government", "independant": "independent", | |
| "occurence": "occurrence", "priviledge": "privilege", "publically": "publicly", | |
| "recomend": "recommend", "refered": "referred", "succesful": "successful", "truely": "truly", | |
| "writting": "writing", "adress": "address", "arguement": "argument", "commitee": "committee", | |
| "enviroment": "environment", "febuary": "february", "foriegn": "foreign", "gaurd": "guard", | |
| "harrass": "harass", "liason": "liaison", "maintainance": "maintenance", "mispell": "misspell", | |
| "noticable": "noticeable", "persistant": "persistent", "posession": "possession", | |
| "questionaire": "questionnaire", "rythm": "rhythm", "supercede": "supersede", | |
| "threshhold": "threshold", "tendancy": "tendency", "vaccuum": "vacuum", | |
| } | |
| def _tokenize(text: str): | |
| return [w.strip(".,!?;:\"'()[]").lower() for w in text.split()] | |
| def _split_sentences(text: str): | |
| return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text.strip()) if s.strip()] | |
| def analyze_text(text: str) -> str: | |
| """Analyze text and return statistics. | |
| Args: | |
| text: The input text to analyze | |
| Returns: | |
| JSON string with analysis results | |
| """ | |
| words = text.split() | |
| chars = len(text) | |
| chars_no_spaces = len(text.replace(" ", "")) | |
| sentences = text.count(".") + text.count("!") + text.count("?") | |
| avg_word_length = round(chars_no_spaces / len(words), 2) if words else 0 | |
| avg_sentence_length = round(len(words) / max(sentences, 1), 2) | |
| return json.dumps({ | |
| "total_characters": chars, | |
| "characters_without_spaces": chars_no_spaces, | |
| "total_words": len(words), | |
| "total_sentences": max(sentences, 1), | |
| "average_word_length": avg_word_length, | |
| "average_sentence_length": avg_sentence_length, | |
| "unique_words": len(set(word.lower() for word in words)) | |
| }) | |
| def extract_keywords(text: str, count: int = 5) -> str: | |
| """Extract keywords (most common words) from text. | |
| Args: | |
| text: The input text | |
| count: Number of keywords to return (default 5) | |
| Returns: | |
| JSON string with keywords and frequencies | |
| """ | |
| words = text.lower().split() | |
| filtered = [w.strip(".,!?;:") for w in words if w.lower() not in STOPWORDS] | |
| word_freq = Counter(filtered) | |
| top_words = word_freq.most_common(count) | |
| return json.dumps({ | |
| "keywords": [{"word": w, "frequency": f} for w, f in top_words] | |
| }) | |
| def check_reading_level(text: str) -> str: | |
| """Estimate reading difficulty level. | |
| Args: | |
| text: The input text | |
| Returns: | |
| JSON string with reading level estimate | |
| """ | |
| sentences = max(text.count(".") + text.count("!") + text.count("?"), 1) | |
| words = len(text.split()) | |
| syllables = text.count("a") + text.count("e") + text.count("i") + text.count("o") + text.count("u") | |
| if words == 0: | |
| return json.dumps({"error": "No text to analyze"}) | |
| grade = (0.39 * (words / sentences)) + (11.8 * (syllables / words)) - 15.59 | |
| grade = max(0, round(grade, 1)) | |
| if grade < 6: | |
| level = "Elementary School" | |
| elif grade < 9: | |
| level = "Middle School" | |
| elif grade < 13: | |
| level = "High School" | |
| else: | |
| level = "College/Academic" | |
| return json.dumps({ | |
| "grade_level": grade, | |
| "reading_level": level | |
| }) | |
| def reverse_text(text: str) -> str: | |
| """Reverse a string. | |
| Args: | |
| text: The input text | |
| Returns: | |
| The reversed text | |
| """ | |
| return text[::-1] | |
| def analyze_sentiment(text: str) -> str: | |
| """Detect the emotional tone of text as positive, negative, or neutral. | |
| Uses a keyword lexicon with simple negation handling. | |
| Args: | |
| text: The input text to analyze | |
| Returns: | |
| JSON string with sentiment label, score, and matched word counts | |
| """ | |
| words = _tokenize(text) | |
| if not words: | |
| return json.dumps({"error": "No text to analyze"}) | |
| positive = negative = 0 | |
| for i, word in enumerate(words): | |
| negated = i > 0 and words[i - 1] in NEGATIONS | |
| if word in POSITIVE_WORDS: | |
| negative += 1 if negated else 0 | |
| positive += 0 if negated else 1 | |
| elif word in NEGATIVE_WORDS: | |
| positive += 1 if negated else 0 | |
| negative += 0 if negated else 1 | |
| total = positive + negative | |
| score = round((positive - negative) / total, 2) if total else 0.0 | |
| if score > 0.1: | |
| label = "positive" | |
| elif score < -0.1: | |
| label = "negative" | |
| else: | |
| label = "neutral" | |
| return json.dumps({ | |
| "sentiment": label, | |
| "score": score, | |
| "positive_matches": positive, | |
| "negative_matches": negative | |
| }) | |
| def detect_language(text: str) -> str: | |
| """Identify the most likely language of the text. | |
| Compares the text against common stopwords for several European languages. | |
| Args: | |
| text: The input text | |
| Returns: | |
| JSON string with the detected language, confidence, and per-language scores | |
| """ | |
| words = _tokenize(text) | |
| if not words: | |
| return json.dumps({"error": "No text to analyze"}) | |
| scores = { | |
| lang: sum(1 for w in words if w in stopwords) | |
| for lang, stopwords in LANGUAGE_STOPWORDS.items() | |
| } | |
| best = max(scores, key=scores.get) | |
| confidence = round(scores[best] / len(words), 2) | |
| return json.dumps({ | |
| "language": best if scores[best] > 0 else "Unknown", | |
| "confidence": confidence, | |
| "scores": scores | |
| }) | |
| def summarize_text(text: str, sentence_count: int = 2) -> str: | |
| """Create a short extractive summary by selecting the most important sentences. | |
| Sentences are ranked by the average frequency of their non-stopword terms. | |
| Args: | |
| text: The input text | |
| sentence_count: Number of sentences to keep in the summary (default 2) | |
| Returns: | |
| JSON string with the summary and original sentence count | |
| """ | |
| sentences = _split_sentences(text) | |
| if len(sentences) <= sentence_count: | |
| return json.dumps({ | |
| "summary": text.strip(), | |
| "original_sentences": len(sentences) | |
| }) | |
| freq = Counter(w for w in _tokenize(text) if w and w not in STOPWORDS) | |
| def sentence_score(sentence: str) -> float: | |
| tokens = [w for w in _tokenize(sentence) if w] | |
| return sum(freq[w] for w in tokens) / len(tokens) if tokens else 0 | |
| ranked = sorted(range(len(sentences)), key=lambda i: sentence_score(sentences[i]), reverse=True) | |
| chosen = sorted(ranked[:sentence_count]) | |
| summary = " ".join(sentences[i] for i in chosen) | |
| return json.dumps({ | |
| "summary": summary, | |
| "original_sentences": len(sentences) | |
| }) | |
| def check_spelling(text: str) -> str: | |
| """Identify commonly misspelled words and suggest corrections. | |
| Checks each word against a dictionary of frequent English misspellings. | |
| Args: | |
| text: The input text | |
| Returns: | |
| JSON string with the count and list of misspelled words with suggestions | |
| """ | |
| found = [] | |
| seen = set() | |
| for word in text.split(): | |
| clean = word.strip(".,!?;:\"'()[]").lower() | |
| if clean in COMMON_MISSPELLINGS and clean not in seen: | |
| seen.add(clean) | |
| found.append({"word": clean, "suggestion": COMMON_MISSPELLINGS[clean]}) | |
| return json.dumps({ | |
| "misspelled_count": len(found), | |
| "misspelled_words": found | |
| }) | |
| def readability_tips(text: str) -> str: | |
| """Suggest concrete improvements for clarity and readability. | |
| Flags long sentences, long words, excessive adverbs, passive voice, and filler words. | |
| Args: | |
| text: The input text | |
| Returns: | |
| JSON string with a list of actionable writing tips | |
| """ | |
| sentences = _split_sentences(text) | |
| words = text.split() | |
| if not words: | |
| return json.dumps({"error": "No text to analyze"}) | |
| tips = [] | |
| long_sentences = [s for s in sentences if len(s.split()) > 25] | |
| if long_sentences: | |
| tips.append(f"{len(long_sentences)} sentence(s) exceed 25 words; consider splitting them for clarity.") | |
| long_words = [w for w in words if len(w.strip(".,!?;:\"'()[]")) >= 13] | |
| if long_words: | |
| tips.append(f"{len(long_words)} long word(s) (13+ characters); simpler synonyms may read more easily.") | |
| adverbs = [w for w in words if w.strip(".,!?;:\"'()[]").lower().endswith("ly")] | |
| if len(adverbs) > max(1, len(words) // 20): | |
| tips.append(f"Frequent -ly adverbs ({len(adverbs)}); trimming some tightens the prose.") | |
| passive = len(re.findall(r"\b(?:was|were|been|be|is|are)\s+\w+ed\b", text.lower())) | |
| if passive: | |
| tips.append(f"{passive} possible passive-voice construction(s); active voice is usually clearer.") | |
| fillers = {"very", "really", "just", "actually", "basically", "literally", "quite"} | |
| filler_hits = [w for w in words if w.strip(".,!?;:\"'()[]").lower() in fillers] | |
| if filler_hits: | |
| tips.append(f"{len(filler_hits)} filler word(s) detected (e.g. very, really, just); removing them strengthens writing.") | |
| if not tips: | |
| tips.append("No major readability issues detected. Nice and clear!") | |
| return json.dumps({"tips": tips}) | |
| if __name__ == "__main__": | |
| mcp.run() | |