grano1 commited on
Commit
3b4fe0f
·
verified ·
1 Parent(s): 368d3da

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +0 -74
README.md CHANGED
@@ -84,77 +84,6 @@ For **26 parent subjects**, F1-score improves to **0.934** with full metadata.
84
  ## 🔍 Example Usage
85
 
86
  ```python
87
- # # Load packages
88
- # from transformers import AutoModelForSequenceClassification, AutoTokenizer
89
- # import pandas as pd
90
- # import json # for json files
91
- # import torch # for tensor computation and deep learning
92
-
93
- # # Load model and tokenizer
94
- # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
95
- # model = AutoModelForSequenceClassification.from_pretrained("asjc-classification/scibert_multilabel_asjc_classifier")
96
- # model.to(device)
97
- # model.eval()
98
- # tokenizer = AutoTokenizer.from_pretrained("asjc-classification/scibert_multilabel_asjc_classifier", do_lower_case=True)
99
-
100
- # # Load the JSON file
101
- # with open("small_example.json", "r") as file:
102
- # data = json.load(file)
103
-
104
- # # Access the "all" array
105
- # all_articles = data["all"]
106
-
107
- # # Load the categories from the CSV file
108
- # path = 'Categories.csv'
109
-
110
- # # Read the CSV file into a pandas DataFrame
111
- # df_categories = pd.read_csv(path, delimiter=';')
112
-
113
- # # Extract the 'SUBJECT TERM' column as a list of class names
114
- # classes = df_categories['SUBJECT TERM'].tolist()
115
-
116
- # # Create mappings from class names to integer IDs and vice versa
117
- # class2id = {class_: id for id, class_ in enumerate(classes)}
118
- # id2class = {id: class_ for class_, id in class2id.items()}
119
-
120
- # # Iterate over each example in the "all" array
121
- # for example_data in all_articles:
122
- # # Extract the text and labels
123
- # example_article = example_data["string"]
124
- # true_labels_example = json.loads(example_data["subject"]) # Load the labels as a list
125
-
126
- # # Tokenize the article metadata
127
- # inputs = tokenizer(example_article, return_tensors='pt', truncation=True, padding=True, max_length=512)
128
-
129
- # # Move inputs to the correct device (CPU or GPU)
130
- # inputs = {key: value.to(device) for key, value in inputs.items()}
131
-
132
- # # Make predictions with the model
133
- # with torch.no_grad(): # No gradient computation
134
- # outs = model(**inputs)
135
- # logits = outs[0] # Raw predictions (logits)
136
- # pred_probs = torch.sigmoid(logits) # Convert to probabilities using Sigmoid
137
-
138
- # # Convert probabilities to NumPy array
139
- # pred_probs = pred_probs.cpu().numpy().flatten()
140
-
141
- # # Create a DataFrame with probabilities and label names
142
- # df_probs = pd.DataFrame([pred_probs], columns=classes)
143
-
144
- # # Sort by highest probabilities and output the top 5 labels
145
- # top_5_predictions = df_probs.iloc[0].sort_values(ascending=False).head(5)
146
-
147
- # print(f"\n Text: {example_article}")
148
-
149
- # print(f"\n🔹 **Top 5 predicted labels for the example:**")
150
- # for label, prob in top_5_predictions.items():
151
- # print(f" - {label}: {prob:.4f}")
152
-
153
- # # Display the actual labels of the example (True Labels)
154
- # print("\n✅ **Actual labels for the example:**")
155
- # for label in true_labels_example:
156
- # print(f" - {label}")
157
- # ```
158
  from transformers import pipeline
159
  from custom_pipeline import ASJCMultiLabelPipeline
160
 
@@ -172,9 +101,6 @@ text = (
172
 
173
  result = pipe(text)
174
  print(result)
175
-
176
-
177
-
178
  ---
179
 
180
  ## 📖 Citation
 
84
  ## 🔍 Example Usage
85
 
86
  ```python
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  from transformers import pipeline
88
  from custom_pipeline import ASJCMultiLabelPipeline
89
 
 
101
 
102
  result = pipe(text)
103
  print(result)
 
 
 
104
  ---
105
 
106
  ## 📖 Citation