Spaces:

oracat
/

PaperClassifierArxiv

Sleeping

oracat commited on Apr 17, 2023

Commit

8dcbe0a

1 Parent(s): 429523a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,13 +14,40 @@ def prepare_model():
     return (tokenizer, model)
 def process(text):
     """
     Translate incoming text to tokens and classify it
     """
     pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
     result = pipe(text)[0]
-    return result["label"]
 tokenizer, model = prepare_model()
@@ -105,4 +132,4 @@ text = "\n".join([title, abstract])
 ## Output
 if len(text.strip()) > 0:
-    st.markdown(f"<h4>Predicted class: {process(text)}</h4>", unsafe_allow_html=True)

     return (tokenizer, model)
+def top_pct(preds, threshold=0.95):
+    """
+    Output top predictions and their scores
+    """
+    preds = sorted(preds, key=lambda x: -x["score"])
+    cum_score = 0
+    for i, item in enumerate(preds):
+        cum_score += item["score"]
+        if cum_score >= threshold:
+            break
+    preds = preds[: (i + 1)]
+    return preds
+def format_predictions(preds) -> str:
+    """
+    Prepare predictions and their scores for printing to the user
+    """
+    out = ""
+    for i, item in enumerate(preds):
+        out += f"{i+1}. **{item['label']}** *(score {item['score']:.2f})*\n"
+    return out
 def process(text):
     """
     Translate incoming text to tokens and classify it
     """
     pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
     result = pipe(text)[0]
+    return format_predictions(top_pct(result))
 tokenizer, model = prepare_model()
 ## Output
 if len(text.strip()) > 0:
+    st.markdown(f"{process(text)}", unsafe_allow_html=True)