Spaces:

MurDanya
/

ml-course-article-classifier

Sleeping

App Files Files Community

MurDanya commited on Apr 6, 2025

Commit

f0de0e1

verified ·

1 Parent(s): f21e94b

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -13

app.py CHANGED Viewed

@@ -1,18 +1,23 @@
-# app.py
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import numpy as np
 import json
 @st.cache_resource
 def load_model():
-    model = AutoModelForSequenceClassification.from_pretrained("MurDanya/ml-course-article-classifier-scibert")
-    tokenizer = AutoTokenizer.from_pretrained("MurDanya/ml-course-article-classifier-scibert")
-    with open("labels.json") as f:
-        id2label = json.load(f)
-    id2label = {int(idx): label for idx, label in id2label.items()}
-    return tokenizer, model, id2label
 def get_top95(labels, probs):
     sorted_indices = torch.argsort(probs, descending=True)
@@ -20,7 +25,7 @@ def get_top95(labels, probs):
     sorted_labels = [labels[i.item()] for i in sorted_indices]
     cumulative = torch.cumsum(sorted_probs, dim=0)
-    cutoff = torch.where(cumulative >= 0.95)[0]
     last_idx = cutoff[0].item() + 1 if len(cutoff) > 0 else len(sorted_probs)
     return list(zip(sorted_labels[:last_idx], sorted_probs[:last_idx].tolist()))
@@ -37,9 +42,9 @@ if st.button("Classify"):
     if not title and not abstract:
         st.warning("Please enter at least the title.")
     else:
-        tokenizer, model, id2label = load_model()
-        text = title + ". " + abstract if abstract else title
         inputs = tokenizer(text, return_tensors="pt", truncation=True)
         with torch.no_grad():
             outputs = model(**inputs)
@@ -47,6 +52,5 @@ if st.button("Classify"):
         top_labels = get_top95(id2label, probs)
-        st.subheader("📚 Top topics (95% confidence)")
         for label, prob in top_labels:
-            st.markdown(f"- **{label}**: {prob:.3f}")

 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from huggingface_hub import hf_hub_download
 import torch
 import numpy as np
 import json
 @st.cache_resource
 def load_model():
+    repo_id = "MurDanya/ml-course-article-classifier-distilbert"
+    model = AutoModelForSequenceClassification.from_pretrained(repo_id)
+    tokenizer = AutoTokenizer.from_pretrained(repo_id)
+    file_path = hf_hub_download(repo_id, "labels.json")
+    with open(file_path) as f:
+        labels = json.load(f)
+    id2label = {int(idx): label for idx, label in labels['id2label'].items()}
+    categories = labels['categories']
+    return tokenizer, model, id2label, categories
 def get_top95(labels, probs):
     sorted_indices = torch.argsort(probs, descending=True)
     sorted_labels = [labels[i.item()] for i in sorted_indices]
     cumulative = torch.cumsum(sorted_probs, dim=0)
+    cutoff = torch.where(cumulative >= 0.8)[0]
     last_idx = cutoff[0].item() + 1 if len(cutoff) > 0 else len(sorted_probs)
     return list(zip(sorted_labels[:last_idx], sorted_probs[:last_idx].tolist()))
     if not title and not abstract:
         st.warning("Please enter at least the title.")
     else:
+        tokenizer, model, id2label, categories = load_model()
+        text = title + " - " + abstract if abstract else title
         inputs = tokenizer(text, return_tensors="pt", truncation=True)
         with torch.no_grad():
             outputs = model(**inputs)
         top_labels = get_top95(id2label, probs)
         for label, prob in top_labels:
+            print(f"- **{categories[label]} ({label})**: {prob * 100:.1f}%")