Spaces:

maldons77
/

smart-text-toolbox

Sleeping

App Files Files Community

maldons77 commited on Aug 14, 2025

Commit

422c6f8

verified ·

1 Parent(s): 447bb1a

Upload 5 files

Browse files

Files changed (5) hide show

LICENSE +22 -0
README.md +33 -0
app.py +133 -0
requirements.txt +5 -0
runtime.txt +1 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,22 @@

+MIT License
+Copyright (c) 2025 Eric Maldon
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,33 @@

+---
+title: Smart Text Toolbox
+emoji: 🧰
+colorFrom: indigo
+colorTo: green
+sdk: gradio
+app_file: app.py
+pinned: false
+---
+# Smart Text Toolbox
+## Overview
+A CPU-friendly, education-focused NLP toolbox built with Gradio and 🤗 Transformers. It bundles four common text tasks into a single, simple interface:
+- **Language Detection** (auto-detects top-3 languages)
+- **Summarization** (with adjustable compression ratio)
+- **Keyword Extraction** (YAKE-based, with language hint)
+- **Sentiment Analysis** (with emoji feedback)
+## Why this project?
+A clean, student-friendly example that demonstrates multiple NLP tasks without GPU dependencies. Perfect for learning and sharing safe, reproducible demos.
+## How to Run Locally
+```bash
+pip install -r requirements.txt
+python app.py
+```
+## Acceptable Use
+This project is strictly for legitimate, non-harmful, and responsible AI use cases (education, research, prototyping).
+Do **not** use it to generate or support illegal, harmful, or unethical content.
+Please follow the model licenses and the Hugging Face Acceptable Use Policy.

app.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import gradio as gr
+from transformers import pipeline
+from langdetect import detect_langs
+import yake
+# --- Lazy global pipelines to avoid reloading ---
+_pipes = {}
+def get_pipe(task, model=None):
+    key = (task, model or "")
+    if key not in _pipes:
+        if model is None:
+            _pipes[key] = pipeline(task)
+        else:
+            _pipes[key] = pipeline(task, model=model)
+    return _pipes[key]
+# --- Utilities ---
+def safe_text(txt: str) -> str:
+    return (txt or "").strip()
+def detect_language(text: str):
+    text = safe_text(text)
+    if not text or len(text.split()) < 3:
+        return "❌ Please provide a longer text (at least 3 words)."
+    try:
+        langs = detect_langs(text)
+        results = [f"{str(l.lang).upper()} — {l.prob:.2f}" for l in langs[:3]]
+        return " / ".join(results)
+    except Exception as e:
+        return f"⚠️ Could not detect language: {e}"
+def summarize_text(text: str, target_ratio: float = 0.25, min_words: int = 30, max_words: int = 160):
+    text = safe_text(text)
+    if not text or len(text.split()) < 50:
+        return "❌ Please paste a longer text (50+ words) to summarize."
+    # Heuristic: map words to token-ish lengths
+    n_words = len(text.split())
+    approx_tokens = int(n_words * 1.3)
+    max_new_tokens = max(int(approx_tokens * target_ratio), 64)
+    max_new_tokens = min(max_new_tokens, int(max_words * 1.3))
+    min_length = int(max_new_tokens * 0.5)
+    summarizer = get_pipe("summarization", model="sshleifer/distilbart-cnn-12-6")
+    try:
+        out = summarizer(
+            text,
+            max_length=max_new_tokens,
+            min_length=min_length,
+            do_sample=False,
+            truncation=True,
+        )[0]["summary_text"]
+        return out
+    except Exception as e:
+        return f"⚠️ Summarization error: {e}"
+def extract_keywords(text: str, top_k: int = 10, lang_hint: str = "auto"):
+    text = safe_text(text)
+    if not text or len(text.split()) < 20:
+        return "❌ Please provide at least 20 words for keyword extraction."
+    language = None if lang_hint == "auto" else lang_hint
+    try:
+        kw_extractor = yake.KeywordExtractor(lan=language or "en", n=1, top=top_k)
+        keywords = kw_extractor.extract_keywords(text)
+        keywords_sorted = sorted(keywords, key=lambda x: x[1])
+        lines = [f"{term}  —  score: {score:.4f}" for term, score in keywords_sorted]
+        return "\n".join(lines)
+    except Exception as e:
+        return f"⚠️ Keyword extraction error: {e}"
+def analyze_sentiment(text: str):
+    text = safe_text(text)
+    if not text:
+        return "❌ Please enter some text."
+    clf = get_pipe("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
+    try:
+        res = clf(text)[0]
+        label = res["label"].upper()
+        score = float(res["score"])
+        emoji_map = {
+            "POSITIVE": "😊🌟🎉",
+            "NEGATIVE": "😞💔👎",
+            "NEUTRAL":  "😐🤔",
+        }
+        if score < 0.60:
+            label = "NEUTRAL"
+        return f"{emoji_map.get(label, '🤷‍♂️')}  ({label}, confidence: {score:.2f})"
+    except Exception as e:
+        return f"⚠️ Sentiment error: {e}"
+with gr.Blocks(title="Smart Text Toolbox") as demo:
+    gr.Markdown(
+        """
+        # Smart Text Toolbox
+        A multi-tool NLP demo for education and research. Runs on CPU.
+        """
+    )
+    with gr.Tab("Language Detection"):
+        ld_in = gr.Textbox(label="Input text", lines=6, placeholder="Paste a paragraph in any language...")
+        ld_btn = gr.Button("Detect Language")
+        ld_out = gr.Textbox(label="Detected languages (top-3)", lines=2)
+        ld_btn.click(detect_language, inputs=ld_in, outputs=ld_out)
+    with gr.Tab("Summarization"):
+        sm_in = gr.Textbox(label="Input text (50+ words)", lines=10, placeholder="Paste a long article or paragraph...")
+        with gr.Row():
+            sm_ratio = gr.Slider(0.1, 0.6, value=0.25, step=0.05, label="Compression ratio target")
+        sm_btn = gr.Button("Summarize")
+        sm_out = gr.Textbox(label="Summary", lines=10)
+        sm_btn.click(summarize_text, inputs=[sm_in, sm_ratio], outputs=sm_out)
+    with gr.Tab("Keyword Extraction"):
+        kw_in = gr.Textbox(label="Input text (20+ words)", lines=8, placeholder="Paste a paragraph...")
+        with gr.Row():
+            kw_topk = gr.Slider(5, 20, value=10, step=1, label="Top-K keywords")
+            kw_lang = gr.Dropdown(
+                label="Language (hint)",
+                choices=["auto","en","it","es","fr","de","pt","nl","sv","no","da","fi","pl","cs","sk","sl","hr","ro","hu","tr"],
+                value="auto"
+            )
+        kw_btn = gr.Button("Extract Keywords")
+        kw_out = gr.Textbox(label="Keywords", lines=10)
+        kw_btn.click(extract_keywords, inputs=[kw_in, kw_topk, kw_lang], outputs=kw_out)
+    with gr.Tab("Sentiment Analysis"):
+        st_in = gr.Textbox(label="Input text", lines=4, placeholder="Type a sentence...")
+        st_btn = gr.Button("Analyze Sentiment")
+        st_out = gr.Textbox(label="Sentiment", lines=2)
+        st_btn.click(analyze_sentiment, inputs=st_in, outputs=st_out)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=4.36.1
+transformers>=4.41.0
+torch
+langdetect
+yake

runtime.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python-3.10