npaleti2002 commited on
Commit
2e13d88
·
verified ·
1 Parent(s): 0c5deae

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -0
app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from functools import lru_cache
4
+ import math
5
+
6
+ DEFAULT_LABELS = [
7
+ "finance", "sports", "tech", "politics", "health", "entertainment",
8
+ "science", "business", "travel", "education"
9
+ ]
10
+
11
+ @lru_cache(maxsize=1)
12
+ def get_pipes():
13
+ summarizer = pipeline(
14
+ "summarization",
15
+ model="sshleifer/distilbart-cnn-12-6"
16
+ )
17
+ zshot = pipeline(
18
+ "zero-shot-classification",
19
+ model="valhalla/distilbart-mnli-12-1"
20
+ )
21
+ sentiment = pipeline(
22
+ "sentiment-analysis",
23
+ model="distilbert-base-uncased-finetuned-sst-2-english"
24
+ )
25
+ return summarizer, zshot, sentiment
26
+
27
+
28
+ def chunk_text(text: str, max_chars: int = 1600):
29
+ """Naive chunker to keep inputs within summarizer limits.
30
+ Splits on sentences by '. ' and groups into ~max_chars chunks.
31
+ """
32
+ sentences = [s.strip() for s in text.replace("\n", " ").split(". ") if s.strip()]
33
+ chunks, buf = [], ""
34
+ for s in sentences:
35
+ add = (s + (". " if not s.endswith(".") else " "))
36
+ if len(buf) + len(add) <= max_chars:
37
+ buf += add
38
+ else:
39
+ if buf:
40
+ chunks.append(buf.strip())
41
+ buf = add
42
+ if buf:
43
+ chunks.append(buf.strip())
44
+ # Fallback if text had no periods
45
+ if not chunks:
46
+ for i in range(0, len(text), max_chars):
47
+ chunks.append(text[i:i+max_chars])
48
+ return chunks
49
+
50
+
51
+ def summarize_long(text: str, target_words: int = 120):
52
+ summarizer, _, _ = get_pipes()
53
+ # Map rough word target to token lengths
54
+ max_len = min(256, max(64, int(target_words * 1.6)))
55
+ min_len = max(20, int(max_len * 0.4))
56
+ pieces = []
57
+ for ch in chunk_text(text, max_chars=1600):
58
+ try:
59
+ out = summarizer(ch, max_length=max_len, min_length=min_len, do_sample=False)
60
+ pieces.append(out[0]["summary_text"])
61
+ except Exception:
62
+ # If the model complains about length, try a smaller window
63
+ out = summarizer(ch[:1200], max_length=max_len, min_length=min_len, do_sample=False)
64
+ pieces.append(out[0]["summary_text"])
65
+ # If multiple pieces, do a second pass to fuse
66
+ fused = " ".join(pieces)
67
+ if len(pieces) > 1 and len(fused.split()) > target_words:
68
+ out = summarizer(fused, max_length=max_len, min_length=min_len, do_sample=False)
69
+ return out[0]["summary_text"].strip()
70
+ return fused.strip()
71
+
72
+
73
+ def classify_topics(text: str, labels: list[str]):
74
+ _, zshot, _ = get_pipes()
75
+ res = zshot(text, candidate_labels=labels, multi_label=True)
76
+ # Zip labels and scores, sort desc
77
+ pairs = sorted(zip(res["labels"], res["scores"]), key=lambda x: x[1], reverse=True)
78
+ top3 = pairs[:3]
79
+ return pairs, top3
80
+
81
+
82
+ def analyze_sentiment(text: str):
83
+ _, _, sentiment = get_pipes()
84
+ out = sentiment(text[:2000])[0] # keep it snappy
85
+ return out["label"], float(out["score"]) # ('POSITIVE'/'NEGATIVE', score)
86
+
87
+
88
+ def analyze(text, labels_csv, summary_words):
89
+ text = (text or "").strip()
90
+ if not text:
91
+ return (
92
+ "", # summary
93
+ [], # table rows
94
+ "", # top topics string
95
+ "", # sentiment label
96
+ 0.0, # sentiment score
97
+ )
98
+
99
+ # Prepare labels (CSV → list)
100
+ labels_csv = (labels_csv or "").strip()
101
+ labels = [l.strip() for l in labels_csv.split(",") if l.strip()] or DEFAULT_LABELS
102
+
103
+ summary = summarize_long(text, target_words=int(summary_words))
104
+ pairs, top3 = classify_topics(text, labels)
105
+ sent_label, sent_score = analyze_sentiment(text)
106
+
107
+ # Build a friendly top-topics string
108
+ top_str = ", ".join([f"{lab} ({score:.2f})" for lab, score in top3]) if top3 else ""
109
+
110
+ # Convert for Dataframe: list[list]
111
+ table_rows = [[lab, round(score, 4)] for lab, score in pairs]
112
+
113
+ return summary, table_rows, top_str, sent_label, sent_score
114
+
115
+
116
+ with gr.Blocks(title="Text Insight Stack", css="""
117
+ :root{--radius:16px}
118
+ .header {font-size: 28px; font-weight: 800;}
119
+ .subtle {opacity:.8}
120
+ .card {border:1px solid #e5e7eb; border-radius: var(--radius); padding:16px}
121
+ """) as demo:
122
+ gr.Markdown("""
123
+ <div class="header">🧠 Text Insight Stack</div>
124
+ <div class="subtle">Summarize • Topic Classify • Sentiment — powered by three open models on Hugging Face</div>
125
+ """)
126
+
127
+ with gr.Row():
128
+ with gr.Column(scale=5):
129
+ txt = gr.Textbox(
130
+ label="Paste text",
131
+ placeholder="Paste any article, JD, email, or paragraph...",
132
+ lines=12,
133
+ elem_classes=["card"],
134
+ )
135
+ labels = gr.Textbox(
136
+ label="Candidate topic labels (comma‑separated)",
137
+ value=", ".join(DEFAULT_LABELS),
138
+ elem_classes=["card"],
139
+ )
140
+ words = gr.Slider(
141
+ minimum=40, maximum=200, value=120, step=10,
142
+ label="Target summary length (words)",
143
+ elem_classes=["card"],
144
+ )
145
+ run = gr.Button("Analyze", variant="primary")
146
+
147
+ with gr.Column(scale=5):
148
+ with gr.Tab("Summary"):
149
+ out_summary = gr.Markdown()
150
+ with gr.Tab("Topics"):
151
+ out_table = gr.Dataframe(headers=["label", "score"], datatype=["str", "number"], interactive=False)
152
+ out_top = gr.Markdown()
153
+ with gr.Tab("Sentiment"):
154
+ out_sent_label = gr.Label(num_top_classes=2)
155
+ out_sent_score = gr.Number(label="Confidence score")
156
+
157
+ ex1 = gr.Examples(
158
+ label="Try an example",
159
+ examples=[[
160
+ "Open-source models are transforming AI by enabling broad access to powerful capabilities. However, organizations must balance innovation with governance, ensuring that safety and compliance keep pace with deployment. This article explores how companies can adopt a pragmatic approach to evaluation, monitoring, and human oversight while still benefiting from the speed of open development."]],
161
+ inputs=[txt]
162
+ )
163
+
164
+ run.click(
165
+ analyze,
166
+ inputs=[txt, labels, words],
167
+ outputs=[out_summary, out_table, out_top, out_sent_label, out_sent_score]
168
+ )
169
+
170
+ if __name__ == "__main__":
171
+ demo.launch()