Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| from langdetect import detect_langs | |
| import yake | |
| # --- Lazy global pipelines to avoid reloading --- | |
| _pipes = {} | |
| def get_pipe(task, model=None): | |
| key = (task, model or "") | |
| if key not in _pipes: | |
| if model is None: | |
| _pipes[key] = pipeline(task) | |
| else: | |
| _pipes[key] = pipeline(task, model=model) | |
| return _pipes[key] | |
| # --- Utilities --- | |
| def safe_text(txt: str) -> str: | |
| return (txt or "").strip() | |
| def detect_language(text: str): | |
| text = safe_text(text) | |
| if not text or len(text.split()) < 3: | |
| return "β Please provide a longer text (at least 3 words)." | |
| try: | |
| langs = detect_langs(text) | |
| results = [f"{str(l.lang).upper()} β {l.prob:.2f}" for l in langs[:3]] | |
| return " / ".join(results) | |
| except Exception as e: | |
| return f"β οΈ Could not detect language: {e}" | |
| def summarize_text(text: str, target_ratio: float = 0.25, min_words: int = 30, max_words: int = 160): | |
| text = safe_text(text) | |
| if not text or len(text.split()) < 50: | |
| return "β Please paste a longer text (50+ words) to summarize." | |
| # Heuristic: map words to token-ish lengths | |
| n_words = len(text.split()) | |
| approx_tokens = int(n_words * 1.3) | |
| max_new_tokens = max(int(approx_tokens * target_ratio), 64) | |
| max_new_tokens = min(max_new_tokens, int(max_words * 1.3)) | |
| min_length = int(max_new_tokens * 0.5) | |
| summarizer = get_pipe("summarization", model="sshleifer/distilbart-cnn-12-6") | |
| try: | |
| out = summarizer( | |
| text, | |
| max_length=max_new_tokens, | |
| min_length=min_length, | |
| do_sample=False, | |
| truncation=True, | |
| )[0]["summary_text"] | |
| return out | |
| except Exception as e: | |
| return f"β οΈ Summarization error: {e}" | |
| def extract_keywords(text: str, top_k: int = 10, lang_hint: str = "auto"): | |
| text = safe_text(text) | |
| if not text or len(text.split()) < 20: | |
| return "β Please provide at least 20 words for keyword extraction." | |
| language = None if lang_hint == "auto" else lang_hint | |
| try: | |
| kw_extractor = yake.KeywordExtractor(lan=language or "en", n=1, top=top_k) | |
| keywords = kw_extractor.extract_keywords(text) | |
| keywords_sorted = sorted(keywords, key=lambda x: x[1]) | |
| lines = [f"{term} β score: {score:.4f}" for term, score in keywords_sorted] | |
| return "\n".join(lines) | |
| except Exception as e: | |
| return f"β οΈ Keyword extraction error: {e}" | |
| def analyze_sentiment(text: str): | |
| text = safe_text(text) | |
| if not text: | |
| return "β Please enter some text." | |
| clf = get_pipe("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") | |
| try: | |
| res = clf(text)[0] | |
| label = res["label"].upper() | |
| score = float(res["score"]) | |
| emoji_map = { | |
| "POSITIVE": "πππ", | |
| "NEGATIVE": "πππ", | |
| "NEUTRAL": "ππ€", | |
| } | |
| if score < 0.60: | |
| label = "NEUTRAL" | |
| return f"{emoji_map.get(label, 'π€·ββοΈ')} ({label}, confidence: {score:.2f})" | |
| except Exception as e: | |
| return f"β οΈ Sentiment error: {e}" | |
| with gr.Blocks(title="Smart Text Toolbox") as demo: | |
| gr.Markdown( | |
| """ | |
| # Smart Text Toolbox | |
| A multi-tool NLP demo for education and research. Runs on CPU. | |
| """ | |
| ) | |
| with gr.Tab("Language Detection"): | |
| ld_in = gr.Textbox(label="Input text", lines=6, placeholder="Paste a paragraph in any language...") | |
| ld_btn = gr.Button("Detect Language") | |
| ld_out = gr.Textbox(label="Detected languages (top-3)", lines=2) | |
| ld_btn.click(detect_language, inputs=ld_in, outputs=ld_out) | |
| with gr.Tab("Summarization"): | |
| sm_in = gr.Textbox(label="Input text (50+ words)", lines=10, placeholder="Paste a long article or paragraph...") | |
| with gr.Row(): | |
| sm_ratio = gr.Slider(0.1, 0.6, value=0.25, step=0.05, label="Compression ratio target") | |
| sm_btn = gr.Button("Summarize") | |
| sm_out = gr.Textbox(label="Summary", lines=10) | |
| sm_btn.click(summarize_text, inputs=[sm_in, sm_ratio], outputs=sm_out) | |
| with gr.Tab("Keyword Extraction"): | |
| kw_in = gr.Textbox(label="Input text (20+ words)", lines=8, placeholder="Paste a paragraph...") | |
| with gr.Row(): | |
| kw_topk = gr.Slider(5, 20, value=10, step=1, label="Top-K keywords") | |
| kw_lang = gr.Dropdown( | |
| label="Language (hint)", | |
| choices=["auto","en","it","es","fr","de","pt","nl","sv","no","da","fi","pl","cs","sk","sl","hr","ro","hu","tr"], | |
| value="auto" | |
| ) | |
| kw_btn = gr.Button("Extract Keywords") | |
| kw_out = gr.Textbox(label="Keywords", lines=10) | |
| kw_btn.click(extract_keywords, inputs=[kw_in, kw_topk, kw_lang], outputs=kw_out) | |
| with gr.Tab("Sentiment Analysis"): | |
| st_in = gr.Textbox(label="Input text", lines=4, placeholder="Type a sentence...") | |
| st_btn = gr.Button("Analyze Sentiment") | |
| st_out = gr.Textbox(label="Sentiment", lines=2) | |
| st_btn.click(analyze_sentiment, inputs=st_in, outputs=st_out) | |
| if __name__ == "__main__": | |
| demo.launch() | |