import gradio as gr import json import re from collections import Counter STOPWORDS = { "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "is", "are", "was", "were", "be", "been", "by", "from", "that", "this", "it", "as", "your", "you", "we", "they", "he", "she" } POSITIVE_WORDS = { "good", "great", "excellent", "amazing", "wonderful", "fantastic", "love", "loved", "like", "happy", "best", "awesome", "nice", "perfect", "beautiful", "brilliant", "positive", "pleased", "delighted", "enjoy", "enjoyed", "superb", "outstanding", "favorite", "recommend", "helpful", "impressive", "win", "won" } NEGATIVE_WORDS = { "bad", "terrible", "awful", "horrible", "hate", "hated", "dislike", "poor", "worst", "ugly", "disappointing", "disappointed", "sad", "angry", "broken", "negative", "useless", "boring", "annoying", "fail", "failed", "wrong", "slow", "difficult", "confusing", "lacking", "problem", "issue", "bug" } NEGATIONS = {"not", "no", "never", "n't", "without", "hardly", "barely", "neither", "nor"} LANGUAGE_STOPWORDS = { "English": {"the", "and", "is", "in", "to", "of", "that", "it", "for", "with", "was", "on", "are", "you", "this"}, "Spanish": {"el", "la", "de", "que", "y", "los", "en", "un", "una", "es", "por", "con", "para", "del", "las"}, "French": {"le", "la", "les", "de", "et", "un", "une", "des", "est", "que", "en", "dans", "pour", "qui", "avec"}, "German": {"der", "die", "das", "und", "ist", "den", "ein", "eine", "zu", "mit", "auf", "fur", "nicht", "von", "im"}, "Italian": {"il", "la", "di", "che", "un", "una", "per", "con", "non", "sono", "del", "della", "gli", "le", "ed"}, "Portuguese": {"o", "a", "de", "que", "do", "da", "em", "um", "uma", "para", "com", "nao", "os", "as", "se"}, } COMMON_MISSPELLINGS = { "teh": "the", "recieve": "receive", "seperate": "separate", "definately": "definitely", "occured": "occurred", "untill": "until", "wich": "which", "thier": "their", "alot": "a lot", "becuase": "because", "wierd": "weird", "accomodate": "accommodate", "neccessary": "necessary", "occassion": "occasion", "tommorow": "tomorrow", "grammer": "grammar", "beleive": "believe", "calender": "calendar", "concious": "conscious", "embarass": "embarrass", "existance": "existence", "goverment": "government", "independant": "independent", "occurence": "occurrence", "priviledge": "privilege", "publically": "publicly", "recomend": "recommend", "refered": "referred", "succesful": "successful", "truely": "truly", "writting": "writing", "adress": "address", "arguement": "argument", "commitee": "committee", "enviroment": "environment", "febuary": "february", "foriegn": "foreign", "gaurd": "guard", "harrass": "harass", "liason": "liaison", "maintainance": "maintenance", "mispell": "misspell", "noticable": "noticeable", "persistant": "persistent", "posession": "possession", "questionaire": "questionnaire", "rythm": "rhythm", "supercede": "supersede", "threshhold": "threshold", "tendancy": "tendency", "vaccuum": "vacuum", } def _tokenize(text: str): return [w.strip(".,!?;:\"'()[]").lower() for w in text.split()] def _split_sentences(text: str): return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text.strip()) if s.strip()] def analyze_text(text: str) -> str: """Analyze text and return statistics. Args: text: The input text to analyze Returns: JSON string with analysis results """ words = text.split() chars = len(text) chars_no_spaces = len(text.replace(" ", "")) sentences = text.count(".") + text.count("!") + text.count("?") avg_word_length = round(chars_no_spaces / len(words), 2) if words else 0 avg_sentence_length = round(len(words) / max(sentences, 1), 2) return json.dumps({ "total_characters": chars, "characters_without_spaces": chars_no_spaces, "total_words": len(words), "total_sentences": max(sentences, 1), "average_word_length": avg_word_length, "average_sentence_length": avg_sentence_length }, indent=2) def extract_keywords(text: str, count: int = 5) -> str: """Extract keywords (most common words) from text. Args: text: The input text count: Number of keywords to return (default 5) Returns: JSON string with keywords and frequencies """ words = text.lower().split() filtered = [w.strip(".,!?;:") for w in words if w.lower() not in STOPWORDS] word_freq = Counter(filtered) top_words = word_freq.most_common(count) return json.dumps({ "keywords": [{"word": w, "frequency": f} for w, f in top_words] }, indent=2) def check_reading_level(text: str) -> str: """Estimate reading difficulty level. Args: text: The input text Returns: JSON string with reading level estimate """ sentences = max(text.count(".") + text.count("!") + text.count("?"), 1) words = len(text.split()) vowels = "aeiou" syllables = sum(1 for c in text.lower() if c in vowels) if words == 0: return json.dumps({"error": "No text to analyze"}) grade = max(0, (0.39 * (words / sentences)) + (11.8 * (syllables / words)) - 15.59) if grade < 6: level = "Elementary School" elif grade < 9: level = "Middle School" elif grade < 13: level = "High School" else: level = "College/Academic" return json.dumps({ "grade_level": round(grade, 1), "reading_level": level }, indent=2) def analyze_sentiment(text: str) -> str: """Detect the emotional tone of text as positive, negative, or neutral. Uses a keyword lexicon with simple negation handling. Args: text: The input text to analyze Returns: JSON string with sentiment label, score, and matched word counts """ words = _tokenize(text) if not words: return json.dumps({"error": "No text to analyze"}, indent=2) positive = negative = 0 for i, word in enumerate(words): negated = i > 0 and words[i - 1] in NEGATIONS if word in POSITIVE_WORDS: negative += 1 if negated else 0 positive += 0 if negated else 1 elif word in NEGATIVE_WORDS: positive += 1 if negated else 0 negative += 0 if negated else 1 total = positive + negative score = round((positive - negative) / total, 2) if total else 0.0 if score > 0.1: label = "positive" elif score < -0.1: label = "negative" else: label = "neutral" return json.dumps({ "sentiment": label, "score": score, "positive_matches": positive, "negative_matches": negative }, indent=2) def detect_language(text: str) -> str: """Identify the most likely language of the text. Compares the text against common stopwords for several European languages. Args: text: The input text Returns: JSON string with the detected language, confidence, and per-language scores """ words = _tokenize(text) if not words: return json.dumps({"error": "No text to analyze"}, indent=2) scores = { lang: sum(1 for w in words if w in stopwords) for lang, stopwords in LANGUAGE_STOPWORDS.items() } best = max(scores, key=scores.get) confidence = round(scores[best] / len(words), 2) return json.dumps({ "language": best if scores[best] > 0 else "Unknown", "confidence": confidence, "scores": scores }, indent=2) def summarize_text(text: str, sentence_count: int = 2) -> str: """Create a short extractive summary by selecting the most important sentences. Sentences are ranked by the average frequency of their non-stopword terms. Args: text: The input text sentence_count: Number of sentences to keep in the summary (default 2) Returns: JSON string with the summary and original sentence count """ sentences = _split_sentences(text) if len(sentences) <= sentence_count: return json.dumps({ "summary": text.strip(), "original_sentences": len(sentences) }, indent=2) freq = Counter(w for w in _tokenize(text) if w and w not in STOPWORDS) def sentence_score(sentence: str) -> float: tokens = [w for w in _tokenize(sentence) if w] return sum(freq[w] for w in tokens) / len(tokens) if tokens else 0 ranked = sorted(range(len(sentences)), key=lambda i: sentence_score(sentences[i]), reverse=True) chosen = sorted(ranked[:sentence_count]) summary = " ".join(sentences[i] for i in chosen) return json.dumps({ "summary": summary, "original_sentences": len(sentences) }, indent=2) def check_spelling(text: str) -> str: """Identify commonly misspelled words and suggest corrections. Checks each word against a dictionary of frequent English misspellings. Args: text: The input text Returns: JSON string with the count and list of misspelled words with suggestions """ found = [] seen = set() for word in text.split(): clean = word.strip(".,!?;:\"'()[]").lower() if clean in COMMON_MISSPELLINGS and clean not in seen: seen.add(clean) found.append({"word": clean, "suggestion": COMMON_MISSPELLINGS[clean]}) return json.dumps({ "misspelled_count": len(found), "misspelled_words": found }, indent=2) def readability_tips(text: str) -> str: """Suggest concrete improvements for clarity and readability. Flags long sentences, long words, excessive adverbs, passive voice, and filler words. Args: text: The input text Returns: JSON string with a list of actionable writing tips """ sentences = _split_sentences(text) words = text.split() if not words: return json.dumps({"error": "No text to analyze"}, indent=2) tips = [] long_sentences = [s for s in sentences if len(s.split()) > 25] if long_sentences: tips.append(f"{len(long_sentences)} sentence(s) exceed 25 words; consider splitting them for clarity.") long_words = [w for w in words if len(w.strip(".,!?;:\"'()[]")) >= 13] if long_words: tips.append(f"{len(long_words)} long word(s) (13+ characters); simpler synonyms may read more easily.") adverbs = [w for w in words if w.strip(".,!?;:\"'()[]").lower().endswith("ly")] if len(adverbs) > max(1, len(words) // 20): tips.append(f"Frequent -ly adverbs ({len(adverbs)}); trimming some tightens the prose.") passive = len(re.findall(r"\b(?:was|were|been|be|is|are)\s+\w+ed\b", text.lower())) if passive: tips.append(f"{passive} possible passive-voice construction(s); active voice is usually clearer.") fillers = {"very", "really", "just", "actually", "basically", "literally", "quite"} filler_hits = [w for w in words if w.strip(".,!?;:\"'()[]").lower() in fillers] if filler_hits: tips.append(f"{len(filler_hits)} filler word(s) detected (e.g. very, really, just); removing them strengthens writing.") if not tips: tips.append("No major readability issues detected. Nice and clear!") return json.dumps({"tips": tips}, indent=2) # Create web UI with gr.Blocks(title="Text Processor") as demo: gr.Markdown("# Text Processing Tools") gr.Markdown("Analyze statistics, extract keywords, gauge sentiment, detect language, summarize, spell-check, and get readability tips.") with gr.Tab("Analyze Text"): text_input1 = gr.Textbox(label="Enter text", lines=8, placeholder="Paste your text here...") analysis_output = gr.Textbox(label="Analysis Results", lines=8) gr.Button("Analyze", size="lg").click(analyze_text, text_input1, analysis_output) with gr.Tab("Extract Keywords"): text_input2 = gr.Textbox(label="Enter text", lines=8) count_input = gr.Slider(1, 20, value=5, step=1, label="Number of keywords") keywords_output = gr.Textbox(label="Keywords", lines=8) gr.Button("Extract", size="lg").click(extract_keywords, [text_input2, count_input], keywords_output) with gr.Tab("Reading Level"): text_input3 = gr.Textbox(label="Enter text", lines=8) level_output = gr.Textbox(label="Reading Level Analysis", lines=5) gr.Button("Check Level", size="lg").click(check_reading_level, text_input3, level_output) with gr.Tab("Sentiment"): text_input4 = gr.Textbox(label="Enter text", lines=8) sentiment_output = gr.Textbox(label="Sentiment Analysis", lines=6) gr.Button("Analyze Sentiment", size="lg").click(analyze_sentiment, text_input4, sentiment_output) with gr.Tab("Language"): text_input5 = gr.Textbox(label="Enter text", lines=8) language_output = gr.Textbox(label="Detected Language", lines=8) gr.Button("Detect Language", size="lg").click(detect_language, text_input5, language_output) with gr.Tab("Summarize"): text_input6 = gr.Textbox(label="Enter text", lines=8) sentence_slider = gr.Slider(1, 10, value=2, step=1, label="Summary sentences") summary_output = gr.Textbox(label="Summary", lines=6) gr.Button("Summarize", size="lg").click(summarize_text, [text_input6, sentence_slider], summary_output) with gr.Tab("Spell Check"): text_input7 = gr.Textbox(label="Enter text", lines=8) spelling_output = gr.Textbox(label="Spelling Results", lines=8) gr.Button("Check Spelling", size="lg").click(check_spelling, text_input7, spelling_output) with gr.Tab("Readability Tips"): text_input8 = gr.Textbox(label="Enter text", lines=8) tips_output = gr.Textbox(label="Readability Tips", lines=8) gr.Button("Get Tips", size="lg").click(readability_tips, text_input8, tips_output) if __name__ == "__main__": demo.launch(mcp_server=True)