import gradio as gr from transformers import pipeline from functools import lru_cache DEFAULT_LABELS = [ "finance", "sports", "tech", "politics", "health", "entertainment", "science", "business", "travel", "education" ] @lru_cache(maxsize=1) def get_pipes(): summarizer = pipeline( "summarization", model="sshleifer/distilbart-cnn-12-6" ) zshot = pipeline( "zero-shot-classification", model="valhalla/distilbart-mnli-12-1" ) # 3-class sentiment: NEGATIVE / NEUTRAL / POSITIVE sentiment = pipeline( "sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest" ) return summarizer, zshot, sentiment def chunk_text(text: str, max_chars: int = 1600): """Naive chunker to keep inputs within summarizer limits. Splits on sentences by '. ' and groups into ~max_chars chunks. """ sentences = [s.strip() for s in text.replace("\n", " ").split(". ") if s.strip()] chunks, buf = [], "" for s in sentences: add = (s + (". " if not s.endswith(".") else " ")) if len(buf) + len(add) <= max_chars: buf += add else: if buf: chunks.append(buf.strip()) buf = add if buf: chunks.append(buf.strip()) # Fallback if text had no periods if not chunks: for i in range(0, len(text), max_chars): chunks.append(text[i:i+max_chars]) return chunks def summarize_long(text: str, target_words: int = 120): summarizer, _, _ = get_pipes() # Map rough word target to token lengths max_len = min(256, max(64, int(target_words * 1.6))) min_len = max(20, int(max_len * 0.4)) pieces = [] for ch in chunk_text(text, max_chars=1600): try: out = summarizer(ch, max_length=max_len, min_length=min_len, do_sample=False) pieces.append(out[0]["summary_text"]) except Exception: # If the model complains about length, try a smaller window out = summarizer(ch[:1200], max_length=max_len, min_length=min_len, do_sample=False) pieces.append(out[0]["summary_text"]) # If multiple pieces, do a second pass to fuse fused = " ".join(pieces) if len(pieces) > 1 and len(fused.split()) > target_words: out = summarizer(fused, max_length=max_len, min_length=min_len, do_sample=False) return out[0]["summary_text"].strip() return fused.strip() def classify_topics(text: str, labels: list[str]): _, zshot, _ = get_pipes() res = zshot(text, candidate_labels=labels, multi_label=True) # Zip labels and scores, sort desc pairs = sorted(zip(res["labels"], res["scores"]), key=lambda x: x[1], reverse=True) top3 = pairs[:3] return pairs, top3 def analyze_sentiment(text: str): """3-class sentiment with chunk-aware averaging for long inputs.""" _, _, sentiment = get_pipes() # Smaller chunk for sentiment; keep first few for speed s_chunks = chunk_text(text, max_chars=300) or [text[:300]] s_chunks = s_chunks[:8] agg = {"NEGATIVE": 0.0, "NEUTRAL": 0.0, "POSITIVE": 0.0} for ch in s_chunks: scores = sentiment(ch, return_all_scores=True)[0] for s in scores: agg[s["label"].upper()] += float(s["score"]) n = float(len(s_chunks)) for k in agg: agg[k] /= n label = max(agg, key=agg.get) score = agg[label] return label, score def analyze(text, labels_csv, summary_words): text = (text or "").strip() if not text: return ( "", # summary [], # table rows "", # top topics string "", # sentiment label 0.0, # sentiment score ) # Prepare labels (CSV → list) labels_csv = (labels_csv or "").strip() labels = [l.strip() for l in labels_csv.split(",") if l.strip()] or DEFAULT_LABELS summary = summarize_long(text, target_words=int(summary_words)) pairs, top3 = classify_topics(text, labels) sent_label, sent_score = analyze_sentiment(text) # Build a friendly top-topics string top_str = ", ".join([f"{lab} ({score:.2f})" for lab, score in top3]) if top3 else "" # Convert for Dataframe: list[list] table_rows = [[lab, round(score, 4)] for lab, score in pairs] return summary, table_rows, top_str, sent_label, sent_score with gr.Blocks(title="TriScope — Text Insight Stack", css=""" :root{--radius:16px} .header {font-size: 28px; font-weight: 800;} .subtle {opacity:.8} .card {border:1px solid #e5e7eb; border-radius: var(--radius); padding:16px} """) as demo: gr.Markdown("""
🧠 TriScope — Text Insight Stack
Summarize • Topic Classify • Sentiment — powered by three open models on Hugging Face
""") with gr.Row(): with gr.Column(scale=5): txt = gr.Textbox( label="Paste text", placeholder="Paste any article, JD, email, or paragraph...", lines=12, elem_classes=["card"], ) labels = gr.Textbox( label="Candidate topic labels (comma-separated)", value=", ".join(DEFAULT_LABELS), elem_classes=["card"], ) words = gr.Slider( minimum=40, maximum=200, value=120, step=10, label="Target summary length (words)", elem_classes=["card"], ) run = gr.Button("Analyze", variant="primary") with gr.Column(scale=5): with gr.Tab("Summary"): out_summary = gr.Markdown() with gr.Tab("Topics"): out_table = gr.Dataframe(headers=["label", "score"], datatype=["str", "number"], interactive=False) out_top = gr.Markdown() with gr.Tab("Sentiment"): # Show 3 classes out_sent_label = gr.Label(num_top_classes=3) out_sent_score = gr.Number(label="Confidence score") gr.Examples( label="Try an example", examples=[[ "Open-source models are transforming AI by enabling broad access to powerful capabilities. However, organizations must balance innovation with governance, ensuring that safety and compliance keep pace with deployment. This article explores how companies can adopt a pragmatic approach to evaluation, monitoring, and human oversight while still benefiting from the speed of open development." ]], inputs=[txt] ) run.click( analyze, inputs=[txt, labels, words], outputs=[out_summary, out_table, out_top, out_sent_label, out_sent_score] ) if __name__ == "__main__": # Helpful for Spaces; enables logs and proper binding demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)