Spaces:

npaleti2002
/

TextSense

Sleeping

App Files Files Community

TextSense / app.py

npaleti2002

Update app.py

3931bcf verified 5 months ago

raw

history blame contribute delete

6.99 kB

	import gradio as gr
	from transformers import pipeline
	from functools import lru_cache

	DEFAULT_LABELS = [
	"finance", "sports", "tech", "politics", "health", "entertainment",
	"science", "business", "travel", "education"
	]


	@lru_cache(maxsize=1)
	def get_pipes():
	summarizer = pipeline(
	"summarization",
	model="sshleifer/distilbart-cnn-12-6"
	)
	zshot = pipeline(
	"zero-shot-classification",
	model="valhalla/distilbart-mnli-12-1"
	)
	# 3-class sentiment: NEGATIVE / NEUTRAL / POSITIVE
	sentiment = pipeline(
	"sentiment-analysis",
	model="cardiffnlp/twitter-roberta-base-sentiment-latest",
	tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest"
	)
	return summarizer, zshot, sentiment


	def chunk_text(text: str, max_chars: int = 1600):
	"""Naive chunker to keep inputs within summarizer limits.
	Splits on sentences by '. ' and groups into ~max_chars chunks.
	"""
	sentences = [s.strip() for s in text.replace("\n", " ").split(". ") if s.strip()]
	chunks, buf = [], ""
	for s in sentences:
	add = (s + (". " if not s.endswith(".") else " "))
	if len(buf) + len(add) <= max_chars:
	buf += add
	else:
	if buf:
	chunks.append(buf.strip())
	buf = add
	if buf:
	chunks.append(buf.strip())
	# Fallback if text had no periods
	if not chunks:
	for i in range(0, len(text), max_chars):
	chunks.append(text[i:i+max_chars])
	return chunks


	def summarize_long(text: str, target_words: int = 120):
	summarizer, _, _ = get_pipes()
	# Map rough word target to token lengths
	max_len = min(256, max(64, int(target_words * 1.6)))
	min_len = max(20, int(max_len * 0.4))
	pieces = []
	for ch in chunk_text(text, max_chars=1600):
	try:
	out = summarizer(ch, max_length=max_len, min_length=min_len, do_sample=False)
	pieces.append(out[0]["summary_text"])
	except Exception:
	# If the model complains about length, try a smaller window
	out = summarizer(ch[:1200], max_length=max_len, min_length=min_len, do_sample=False)
	pieces.append(out[0]["summary_text"])
	# If multiple pieces, do a second pass to fuse
	fused = " ".join(pieces)
	if len(pieces) > 1 and len(fused.split()) > target_words:
	out = summarizer(fused, max_length=max_len, min_length=min_len, do_sample=False)
	return out[0]["summary_text"].strip()
	return fused.strip()


	def classify_topics(text: str, labels: list[str]):
	_, zshot, _ = get_pipes()
	res = zshot(text, candidate_labels=labels, multi_label=True)
	# Zip labels and scores, sort desc
	pairs = sorted(zip(res["labels"], res["scores"]), key=lambda x: x[1], reverse=True)
	top3 = pairs[:3]
	return pairs, top3


	def analyze_sentiment(text: str):
	"""3-class sentiment with chunk-aware averaging for long inputs."""
	_, _, sentiment = get_pipes()
	# Smaller chunk for sentiment; keep first few for speed
	s_chunks = chunk_text(text, max_chars=300) or [text[:300]]
	s_chunks = s_chunks[:8]

	agg = {"NEGATIVE": 0.0, "NEUTRAL": 0.0, "POSITIVE": 0.0}
	for ch in s_chunks:
	scores = sentiment(ch, return_all_scores=True)[0]
	for s in scores:
	agg[s["label"].upper()] += float(s["score"])
	n = float(len(s_chunks))
	for k in agg:
	agg[k] /= n

	label = max(agg, key=agg.get)
	score = agg[label]
	return label, score


	def analyze(text, labels_csv, summary_words):
	text = (text or "").strip()
	if not text:
	return (
	"", # summary
	[], # table rows
	"", # top topics string
	"", # sentiment label
	0.0, # sentiment score
	)

	# Prepare labels (CSV → list)
	labels_csv = (labels_csv or "").strip()
	labels = [l.strip() for l in labels_csv.split(",") if l.strip()] or DEFAULT_LABELS

	summary = summarize_long(text, target_words=int(summary_words))
	pairs, top3 = classify_topics(text, labels)
	sent_label, sent_score = analyze_sentiment(text)

	# Build a friendly top-topics string
	top_str = ", ".join([f"{lab} ({score:.2f})" for lab, score in top3]) if top3 else ""

	# Convert for Dataframe: list[list]
	table_rows = [[lab, round(score, 4)] for lab, score in pairs]

	return summary, table_rows, top_str, sent_label, sent_score


	with gr.Blocks(title="TriScope — Text Insight Stack", css="""
	:root{--radius:16px}
	.header {font-size: 28px; font-weight: 800;}
	.subtle {opacity:.8}
	.card {border:1px solid #e5e7eb; border-radius: var(--radius); padding:16px}
	""") as demo:
	gr.Markdown("""
	<div class="header">🧠 TriScope — Text Insight Stack</div>
	<div class="subtle">Summarize • Topic Classify • Sentiment — powered by three open models on Hugging Face</div>
	""")

	with gr.Row():
	with gr.Column(scale=5):
	txt = gr.Textbox(
	label="Paste text",
	placeholder="Paste any article, JD, email, or paragraph...",
	lines=12,
	elem_classes=["card"],
	)
	labels = gr.Textbox(
	label="Candidate topic labels (comma-separated)",
	value=", ".join(DEFAULT_LABELS),
	elem_classes=["card"],
	)
	words = gr.Slider(
	minimum=40, maximum=200, value=120, step=10,
	label="Target summary length (words)",
	elem_classes=["card"],
	)
	run = gr.Button("Analyze", variant="primary")

	with gr.Column(scale=5):
	with gr.Tab("Summary"):
	out_summary = gr.Markdown()
	with gr.Tab("Topics"):
	out_table = gr.Dataframe(headers=["label", "score"], datatype=["str", "number"], interactive=False)
	out_top = gr.Markdown()
	with gr.Tab("Sentiment"):
	# Show 3 classes
	out_sent_label = gr.Label(num_top_classes=3)
	out_sent_score = gr.Number(label="Confidence score")

	gr.Examples(
	label="Try an example",
	examples=[[
	"Open-source models are transforming AI by enabling broad access to powerful capabilities. However, organizations must balance innovation with governance, ensuring that safety and compliance keep pace with deployment. This article explores how companies can adopt a pragmatic approach to evaluation, monitoring, and human oversight while still benefiting from the speed of open development."
	]],
	inputs=[txt]
	)

	run.click(
	analyze,
	inputs=[txt, labels, words],
	outputs=[out_summary, out_table, out_top, out_sent_label, out_sent_score]
	)

	if __name__ == "__main__":
	# Helpful for Spaces; enables logs and proper binding
	demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)