grano1 commited on
Commit
0c58d90
·
verified ·
1 Parent(s): c4db2e2

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +66 -18
README.md CHANGED
@@ -78,32 +78,80 @@ For **26 parent subjects**, F1-score improves to **0.934** with full metadata.
78
  ## 🔍 Example Usage
79
 
80
  ```python
81
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
82
- import torch
83
- import json
 
 
84
 
85
  # Load model and tokenizer
86
- model_name = "your-hf-username/open-asjc-multilabel"
87
- tokenizer = AutoTokenizer.from_pretrained(model_name)
88
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
 
 
89
 
90
- # Load sample input
91
- with open("small_example.json") as f:
92
- data = json.load(f)
93
 
94
- text = data["title"] + " " + data.get("abstract", "")
95
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
96
 
97
- # Predict
98
- with torch.no_grad():
99
- outputs = model(**inputs)
100
- probs = torch.sigmoid(outputs.logits).cpu().numpy()[0]
101
 
102
- # Apply threshold
103
- threshold = 0.3
104
- predicted_labels = [label for label, prob in zip(model.config.id2label.values(), probs) if prob >= threshold]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  ```
106
 
 
107
 
108
  ## 📖 Citation
109
  If you use this work, please cite:
 
78
  ## 🔍 Example Usage
79
 
80
  ```python
81
+ # Load packages
82
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
83
+ import pandas as pd
84
+ import json # for json files
85
+ import torch # for tensor computation and deep learning
86
 
87
  # Load model and tokenizer
88
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
89
+ model = AutoModelForSequenceClassification.from_pretrained("asjc-classification/scibert_multilabel_asjc_classifier")
90
+ model.to(device)
91
+ model.eval()
92
+ tokenizer = AutoTokenizer.from_pretrained("asjc-classification/scibert_multilabel_asjc_classifier", do_lower_case=True)
93
 
94
+ # Load the JSON file
95
+ with open("small_example.json", "r") as file:
96
+ data = json.load(file)
97
 
98
+ # Access the "all" array
99
+ all_articles = data["all"]
100
 
101
+ # Load the categories from the CSV file
102
+ path = 'Categories.csv'
 
 
103
 
104
+ # Read the CSV file into a pandas DataFrame
105
+ df_categories = pd.read_csv(path, delimiter=';')
106
+
107
+ # Extract the 'SUBJECT TERM' column as a list of class names
108
+ classes = df_categories['SUBJECT TERM'].tolist()
109
+
110
+ # Create mappings from class names to integer IDs and vice versa
111
+ class2id = {class_: id for id, class_ in enumerate(classes)}
112
+ id2class = {id: class_ for class_, id in class2id.items()}
113
+
114
+ # Iterate over each example in the "all" array
115
+ for example_data in all_articles:
116
+ # Extract the text and labels
117
+ example_article = example_data["string"]
118
+ true_labels_example = json.loads(example_data["subject"]) # Load the labels as a list
119
+
120
+ # Tokenize the article metadata
121
+ inputs = tokenizer(example_article, return_tensors='pt', truncation=True, padding=True, max_length=512)
122
+
123
+ # Move inputs to the correct device (CPU or GPU)
124
+ inputs = {key: value.to(device) for key, value in inputs.items()}
125
+
126
+ # Make predictions with the model
127
+ model.eval() # Set the model to evaluation mode
128
+ with torch.no_grad(): # No gradient computation
129
+ outs = model(**inputs)
130
+ logits = outs[0] # Raw predictions (logits)
131
+ pred_probs = torch.sigmoid(logits) # Convert to probabilities using Sigmoid
132
+
133
+ # Convert probabilities to NumPy array
134
+ pred_probs = pred_probs.cpu().numpy().flatten()
135
+
136
+ # Create a DataFrame with probabilities and label names
137
+ df_probs = pd.DataFrame([pred_probs], columns=classes)
138
+
139
+ # Sort by highest probabilities and output the top 5 labels
140
+ top_5_predictions = df_probs.iloc[0].sort_values(ascending=False).head(5)
141
+
142
+ print(f"\n Text: {example_article}")
143
+
144
+ print(f"\n🔹 **Top 5 predicted labels for the example:**")
145
+ for label, prob in top_5_predictions.items():
146
+ print(f" - {label}: {prob:.4f}")
147
+
148
+ # Display the actual labels of the example (True Labels)
149
+ print("\n✅ **Actual labels for the example:**")
150
+ for label in true_labels_example:
151
+ print(f" - {label}")
152
  ```
153
 
154
+ ---
155
 
156
  ## 📖 Citation
157
  If you use this work, please cite: