maldons77 commited on
Commit
422c6f8
Β·
verified Β·
1 Parent(s): 447bb1a

Upload 5 files

Browse files
Files changed (5) hide show
  1. LICENSE +22 -0
  2. README.md +33 -0
  3. app.py +133 -0
  4. requirements.txt +5 -0
  5. runtime.txt +1 -0
LICENSE ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ MIT License
3
+
4
+ Copyright (c) 2025 Eric Maldon
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ title: Smart Text Toolbox
4
+ emoji: 🧰
5
+ colorFrom: indigo
6
+ colorTo: green
7
+ sdk: gradio
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # Smart Text Toolbox
13
+
14
+ ## Overview
15
+ A CPU-friendly, education-focused NLP toolbox built with Gradio and πŸ€— Transformers. It bundles four common text tasks into a single, simple interface:
16
+ - **Language Detection** (auto-detects top-3 languages)
17
+ - **Summarization** (with adjustable compression ratio)
18
+ - **Keyword Extraction** (YAKE-based, with language hint)
19
+ - **Sentiment Analysis** (with emoji feedback)
20
+
21
+ ## Why this project?
22
+ A clean, student-friendly example that demonstrates multiple NLP tasks without GPU dependencies. Perfect for learning and sharing safe, reproducible demos.
23
+
24
+ ## How to Run Locally
25
+ ```bash
26
+ pip install -r requirements.txt
27
+ python app.py
28
+ ```
29
+
30
+ ## Acceptable Use
31
+ This project is strictly for legitimate, non-harmful, and responsible AI use cases (education, research, prototyping).
32
+ Do **not** use it to generate or support illegal, harmful, or unethical content.
33
+ Please follow the model licenses and the Hugging Face Acceptable Use Policy.
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from langdetect import detect_langs
4
+ import yake
5
+
6
+ # --- Lazy global pipelines to avoid reloading ---
7
+ _pipes = {}
8
+
9
+ def get_pipe(task, model=None):
10
+ key = (task, model or "")
11
+ if key not in _pipes:
12
+ if model is None:
13
+ _pipes[key] = pipeline(task)
14
+ else:
15
+ _pipes[key] = pipeline(task, model=model)
16
+ return _pipes[key]
17
+
18
+ # --- Utilities ---
19
+ def safe_text(txt: str) -> str:
20
+ return (txt or "").strip()
21
+
22
+ def detect_language(text: str):
23
+ text = safe_text(text)
24
+ if not text or len(text.split()) < 3:
25
+ return "❌ Please provide a longer text (at least 3 words)."
26
+ try:
27
+ langs = detect_langs(text)
28
+ results = [f"{str(l.lang).upper()} β€” {l.prob:.2f}" for l in langs[:3]]
29
+ return " / ".join(results)
30
+ except Exception as e:
31
+ return f"⚠️ Could not detect language: {e}"
32
+
33
+ def summarize_text(text: str, target_ratio: float = 0.25, min_words: int = 30, max_words: int = 160):
34
+ text = safe_text(text)
35
+ if not text or len(text.split()) < 50:
36
+ return "❌ Please paste a longer text (50+ words) to summarize."
37
+ # Heuristic: map words to token-ish lengths
38
+ n_words = len(text.split())
39
+ approx_tokens = int(n_words * 1.3)
40
+ max_new_tokens = max(int(approx_tokens * target_ratio), 64)
41
+ max_new_tokens = min(max_new_tokens, int(max_words * 1.3))
42
+ min_length = int(max_new_tokens * 0.5)
43
+
44
+ summarizer = get_pipe("summarization", model="sshleifer/distilbart-cnn-12-6")
45
+ try:
46
+ out = summarizer(
47
+ text,
48
+ max_length=max_new_tokens,
49
+ min_length=min_length,
50
+ do_sample=False,
51
+ truncation=True,
52
+ )[0]["summary_text"]
53
+ return out
54
+ except Exception as e:
55
+ return f"⚠️ Summarization error: {e}"
56
+
57
+ def extract_keywords(text: str, top_k: int = 10, lang_hint: str = "auto"):
58
+ text = safe_text(text)
59
+ if not text or len(text.split()) < 20:
60
+ return "❌ Please provide at least 20 words for keyword extraction."
61
+ language = None if lang_hint == "auto" else lang_hint
62
+ try:
63
+ kw_extractor = yake.KeywordExtractor(lan=language or "en", n=1, top=top_k)
64
+ keywords = kw_extractor.extract_keywords(text)
65
+ keywords_sorted = sorted(keywords, key=lambda x: x[1])
66
+ lines = [f"{term} β€” score: {score:.4f}" for term, score in keywords_sorted]
67
+ return "\n".join(lines)
68
+ except Exception as e:
69
+ return f"⚠️ Keyword extraction error: {e}"
70
+
71
+ def analyze_sentiment(text: str):
72
+ text = safe_text(text)
73
+ if not text:
74
+ return "❌ Please enter some text."
75
+ clf = get_pipe("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
76
+ try:
77
+ res = clf(text)[0]
78
+ label = res["label"].upper()
79
+ score = float(res["score"])
80
+ emoji_map = {
81
+ "POSITIVE": "πŸ˜ŠπŸŒŸπŸŽ‰",
82
+ "NEGATIVE": "πŸ˜žπŸ’”πŸ‘Ž",
83
+ "NEUTRAL": "πŸ˜πŸ€”",
84
+ }
85
+ if score < 0.60:
86
+ label = "NEUTRAL"
87
+ return f"{emoji_map.get(label, 'πŸ€·β€β™‚οΈ')} ({label}, confidence: {score:.2f})"
88
+ except Exception as e:
89
+ return f"⚠️ Sentiment error: {e}"
90
+
91
+ with gr.Blocks(title="Smart Text Toolbox") as demo:
92
+ gr.Markdown(
93
+ """
94
+ # Smart Text Toolbox
95
+ A multi-tool NLP demo for education and research. Runs on CPU.
96
+ """
97
+ )
98
+
99
+ with gr.Tab("Language Detection"):
100
+ ld_in = gr.Textbox(label="Input text", lines=6, placeholder="Paste a paragraph in any language...")
101
+ ld_btn = gr.Button("Detect Language")
102
+ ld_out = gr.Textbox(label="Detected languages (top-3)", lines=2)
103
+ ld_btn.click(detect_language, inputs=ld_in, outputs=ld_out)
104
+
105
+ with gr.Tab("Summarization"):
106
+ sm_in = gr.Textbox(label="Input text (50+ words)", lines=10, placeholder="Paste a long article or paragraph...")
107
+ with gr.Row():
108
+ sm_ratio = gr.Slider(0.1, 0.6, value=0.25, step=0.05, label="Compression ratio target")
109
+ sm_btn = gr.Button("Summarize")
110
+ sm_out = gr.Textbox(label="Summary", lines=10)
111
+ sm_btn.click(summarize_text, inputs=[sm_in, sm_ratio], outputs=sm_out)
112
+
113
+ with gr.Tab("Keyword Extraction"):
114
+ kw_in = gr.Textbox(label="Input text (20+ words)", lines=8, placeholder="Paste a paragraph...")
115
+ with gr.Row():
116
+ kw_topk = gr.Slider(5, 20, value=10, step=1, label="Top-K keywords")
117
+ kw_lang = gr.Dropdown(
118
+ label="Language (hint)",
119
+ choices=["auto","en","it","es","fr","de","pt","nl","sv","no","da","fi","pl","cs","sk","sl","hr","ro","hu","tr"],
120
+ value="auto"
121
+ )
122
+ kw_btn = gr.Button("Extract Keywords")
123
+ kw_out = gr.Textbox(label="Keywords", lines=10)
124
+ kw_btn.click(extract_keywords, inputs=[kw_in, kw_topk, kw_lang], outputs=kw_out)
125
+
126
+ with gr.Tab("Sentiment Analysis"):
127
+ st_in = gr.Textbox(label="Input text", lines=4, placeholder="Type a sentence...")
128
+ st_btn = gr.Button("Analyze Sentiment")
129
+ st_out = gr.Textbox(label="Sentiment", lines=2)
130
+ st_btn.click(analyze_sentiment, inputs=st_in, outputs=st_out)
131
+
132
+ if __name__ == "__main__":
133
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio>=4.36.1
2
+ transformers>=4.41.0
3
+ torch
4
+ langdetect
5
+ yake
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.10