Spaces:

DelaliScratchwerk
/

TextPeriod_Summarization

Sleeping

App Files Files Community

DelaliScratchwerk commited on Oct 1, 2025

Commit

e8485c8

verified ·

1 Parent(s): 5e572ba

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -20

app.py CHANGED Viewed

@@ -1,42 +1,79 @@
 import json, numpy as np, gradio as gr
 from setfit import SetFitModel
 from huggingface_hub import hf_hub_download
 MODEL_ID = "DelaliScratchwerk/text-period-setfit"
-# Try to load labels from the model repo; if missing, use local labels.json
 try:
     labels_path = hf_hub_download(MODEL_ID, "labels.json")
     LABELS = json.load(open(labels_path))
 except Exception:
     LABELS = json.load(open("labels.json"))
 model = SetFitModel.from_pretrained(MODEL_ID)
 def predict(txt: str):
     if not txt.strip():
-        return "—", {"error": "Please paste some text."}
     probs = np.asarray(model.predict_proba([txt])[0], dtype=float).ravel()
     if probs.size != len(LABELS):
-        return "—", {"error": f"label mismatch: model has {probs.size}, labels.json has {len(LABELS)}"}
     order = np.argsort(probs)[::-1]
-    top = LABELS[int(order[0])]
-    return top, {LABELS[int(i)]: float(probs[int(i)]) for i in order}
-demo = gr.Interface(
-    fn=predict,
-    inputs=gr.Textbox(lines=8, label="Paste text"),
-    outputs=[gr.Label(label="Predicted Period"), gr.JSON(label="Scores")],
-    title="Text → Time Period (SetFit)",
-    examples=[
-        "Schools went remote during the pandemic; everyone wore N95s and used Zoom.",
-        "Sputnik launched and kicked off the space race.",
-        "MySpace was the most popular social network for a while.",
-        "TikTok creators exploded in popularity.",
-    ],
-    cache_examples=False,
-    allow_flagging="never",
-)
 if __name__ == "__main__":
     demo.launch()

 import json, numpy as np, gradio as gr
 from setfit import SetFitModel
 from huggingface_hub import hf_hub_download
+from evidence import extract_evidence
 MODEL_ID = "DelaliScratchwerk/text-period-setfit"
+# Load labels: try from model repo, else local labels.json in the Space
 try:
     labels_path = hf_hub_download(MODEL_ID, "labels.json")
     LABELS = json.load(open(labels_path))
 except Exception:
     LABELS = json.load(open("labels.json"))
+# thresholds – tweak later with validation
+TOP_K = 3
+UNCERTAINTY_THRESHOLD = 0.42   # if top1 prob below this → "uncertain"
+MARGIN_THRESHOLD = 0.08        # or if (top1 - top2) < this → "uncertain"
 model = SetFitModel.from_pretrained(MODEL_ID)
+def format_evidence(ev):
+    parts = []
+    if ev.get("years"):
+        parts.append("**Years found:** " + ", ".join(ev["years"]))
+    if ev.get("keyword_hits"):
+        for b, ks in ev["keyword_hits"].items():
+            parts.append(f"**{b}:** " + ", ".join(ks))
+    return "\n\n".join(parts) if parts else "_No explicit time clues found._"
 def predict(txt: str):
     if not txt.strip():
+        return "—", "Paste some text.", {}
     probs = np.asarray(model.predict_proba([txt])[0], dtype=float).ravel()
     if probs.size != len(LABELS):
+        return "—", f"Label mismatch: model has {probs.size} classes, labels.json has {len(LABELS)}", {}
     order = np.argsort(probs)[::-1]
+    top1, top2 = probs[order[0]], probs[order[1]] if probs.size > 1 else 0.0
+    ev = extract_evidence(txt)
+    # uncertain mode
+    if top1 < UNCERTAINTY_THRESHOLD or (top1 - top2) < MARGIN_THRESHOLD:
+        topk = [{ "label": LABELS[i], "score": float(probs[i]) } for i in order[:TOP_K]]
+        md = "**Uncertain** — here are the top candidates:\n" + "\n".join(
+            [f"- **{d['label']}**: {d['score']:.3f}" for d in topk]
+        )
+        return "uncertain", md + "\n\n" + format_evidence(ev), {LABELS[i]: float(probs[i]) for i in order}
+    # confident
+    best = LABELS[order[0]]
+    md = f"**Reasoning hints**\n\n" + format_evidence(ev)
+    return best, md, {LABELS[i]: float(probs[i]) for i in order}
+with gr.Blocks(title="Text → Time Period (SetFit)") as demo:
+    gr.Markdown("# Text → Time Period (SetFit)")
+    with gr.Row():
+        text = gr.Textbox(lines=8, label="Paste text")
+        with gr.Column():
+            pred = gr.Label(label="Predicted")
+            reason = gr.Markdown(label="Evidence")
+    scores = gr.JSON(label="Scores")
+    btn = gr.Button("Submit", variant="primary")
+    btn.click(predict, inputs=text, outputs=[pred, reason, scores])
+    gr.Examples(
+        examples=[
+            "Schools went remote during the pandemic; everyone wore N95s and used Zoom.",
+            "Sputnik launched and kicked off the space race.",
+            "MySpace was the most popular social network for a while.",
+            "Creators blew up on TikTok; companies rolled out ChatGPT-powered tools.",
+        ],
+        inputs=text
+    )
 if __name__ == "__main__":
     demo.launch()