import gradio as gr from transformers import pipeline from langdetect import detect_langs import yake # --- Lazy global pipelines to avoid reloading --- _pipes = {} def get_pipe(task, model=None): key = (task, model or "") if key not in _pipes: if model is None: _pipes[key] = pipeline(task) else: _pipes[key] = pipeline(task, model=model) return _pipes[key] # --- Utilities --- def safe_text(txt: str) -> str: return (txt or "").strip() def detect_language(text: str): text = safe_text(text) if not text or len(text.split()) < 3: return "❌ Please provide a longer text (at least 3 words)." try: langs = detect_langs(text) results = [f"{str(l.lang).upper()} — {l.prob:.2f}" for l in langs[:3]] return " / ".join(results) except Exception as e: return f"⚠️ Could not detect language: {e}" def summarize_text(text: str, target_ratio: float = 0.25, min_words: int = 30, max_words: int = 160): text = safe_text(text) if not text or len(text.split()) < 50: return "❌ Please paste a longer text (50+ words) to summarize." # Heuristic: map words to token-ish lengths n_words = len(text.split()) approx_tokens = int(n_words * 1.3) max_new_tokens = max(int(approx_tokens * target_ratio), 64) max_new_tokens = min(max_new_tokens, int(max_words * 1.3)) min_length = int(max_new_tokens * 0.5) summarizer = get_pipe("summarization", model="sshleifer/distilbart-cnn-12-6") try: out = summarizer( text, max_length=max_new_tokens, min_length=min_length, do_sample=False, truncation=True, )[0]["summary_text"] return out except Exception as e: return f"⚠️ Summarization error: {e}" def extract_keywords(text: str, top_k: int = 10, lang_hint: str = "auto"): text = safe_text(text) if not text or len(text.split()) < 20: return "❌ Please provide at least 20 words for keyword extraction." language = None if lang_hint == "auto" else lang_hint try: kw_extractor = yake.KeywordExtractor(lan=language or "en", n=1, top=top_k) keywords = kw_extractor.extract_keywords(text) keywords_sorted = sorted(keywords, key=lambda x: x[1]) lines = [f"{term} — score: {score:.4f}" for term, score in keywords_sorted] return "\n".join(lines) except Exception as e: return f"⚠️ Keyword extraction error: {e}" def analyze_sentiment(text: str): text = safe_text(text) if not text: return "❌ Please enter some text." clf = get_pipe("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") try: res = clf(text)[0] label = res["label"].upper() score = float(res["score"]) emoji_map = { "POSITIVE": "😊🌟🎉", "NEGATIVE": "😞💔👎", "NEUTRAL": "😐🤔", } if score < 0.60: label = "NEUTRAL" return f"{emoji_map.get(label, '🤷‍♂️')} ({label}, confidence: {score:.2f})" except Exception as e: return f"⚠️ Sentiment error: {e}" with gr.Blocks(title="Smart Text Toolbox") as demo: gr.Markdown( """ # Smart Text Toolbox A multi-tool NLP demo for education and research. Runs on CPU. """ ) with gr.Tab("Language Detection"): ld_in = gr.Textbox(label="Input text", lines=6, placeholder="Paste a paragraph in any language...") ld_btn = gr.Button("Detect Language") ld_out = gr.Textbox(label="Detected languages (top-3)", lines=2) ld_btn.click(detect_language, inputs=ld_in, outputs=ld_out) with gr.Tab("Summarization"): sm_in = gr.Textbox(label="Input text (50+ words)", lines=10, placeholder="Paste a long article or paragraph...") with gr.Row(): sm_ratio = gr.Slider(0.1, 0.6, value=0.25, step=0.05, label="Compression ratio target") sm_btn = gr.Button("Summarize") sm_out = gr.Textbox(label="Summary", lines=10) sm_btn.click(summarize_text, inputs=[sm_in, sm_ratio], outputs=sm_out) with gr.Tab("Keyword Extraction"): kw_in = gr.Textbox(label="Input text (20+ words)", lines=8, placeholder="Paste a paragraph...") with gr.Row(): kw_topk = gr.Slider(5, 20, value=10, step=1, label="Top-K keywords") kw_lang = gr.Dropdown( label="Language (hint)", choices=["auto","en","it","es","fr","de","pt","nl","sv","no","da","fi","pl","cs","sk","sl","hr","ro","hu","tr"], value="auto" ) kw_btn = gr.Button("Extract Keywords") kw_out = gr.Textbox(label="Keywords", lines=10) kw_btn.click(extract_keywords, inputs=[kw_in, kw_topk, kw_lang], outputs=kw_out) with gr.Tab("Sentiment Analysis"): st_in = gr.Textbox(label="Input text", lines=4, placeholder="Type a sentence...") st_btn = gr.Button("Analyze Sentiment") st_out = gr.Textbox(label="Sentiment", lines=2) st_btn.click(analyze_sentiment, inputs=st_in, outputs=st_out) if __name__ == "__main__": demo.launch()