asjc-classification
/

scibert_multilabel_asjc_classifier

@@ -84,77 +84,6 @@ For **26 parent subjects**, F1-score improves to **0.934** with full metadata.
 ## 🔍 Example Usage
 ```python
-# # Load packages
-# from transformers import AutoModelForSequenceClassification, AutoTokenizer
-# import pandas as pd
-# import json     # for json files
-# import torch    # for tensor computation and deep learning
-# # Load model and tokenizer
-# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# model = AutoModelForSequenceClassification.from_pretrained("asjc-classification/scibert_multilabel_asjc_classifier")
-# model.to(device)
-# model.eval()
-# tokenizer = AutoTokenizer.from_pretrained("asjc-classification/scibert_multilabel_asjc_classifier", do_lower_case=True)
-# # Load the JSON file
-# with open("small_example.json", "r") as file:
-#     data = json.load(file)
-# # Access the "all" array
-# all_articles = data["all"]
-# # Load the categories from the CSV file
-# path = 'Categories.csv'
-# # Read the CSV file into a pandas DataFrame
-# df_categories = pd.read_csv(path, delimiter=';')
-# # Extract the 'SUBJECT TERM' column as a list of class names
-# classes = df_categories['SUBJECT TERM'].tolist()
-# # Create mappings from class names to integer IDs and vice versa
-# class2id = {class_: id for id, class_ in enumerate(classes)}
-# id2class = {id: class_ for class_, id in class2id.items()}
-# # Iterate over each example in the "all" array
-# for example_data in all_articles:
-#     # Extract the text and labels
-#     example_article = example_data["string"]
-#     true_labels_example = json.loads(example_data["subject"])  # Load the labels as a list
-#     # Tokenize the article metadata
-#     inputs = tokenizer(example_article, return_tensors='pt', truncation=True, padding=True, max_length=512)
-#     # Move inputs to the correct device (CPU or GPU)
-#     inputs = {key: value.to(device) for key, value in inputs.items()}
-#     # Make predictions with the model
-#     with torch.no_grad():   # No gradient computation
-#         outs = model(**inputs)
-#         logits = outs[0]    # Raw predictions (logits)
-#         pred_probs = torch.sigmoid(logits)  # Convert to probabilities using Sigmoid
-#     # Convert probabilities to NumPy array
-#     pred_probs = pred_probs.cpu().numpy().flatten()
-#     # Create a DataFrame with probabilities and label names
-#     df_probs = pd.DataFrame([pred_probs], columns=classes)
-#     # Sort by highest probabilities and output the top 5 labels
-#     top_5_predictions = df_probs.iloc[0].sort_values(ascending=False).head(5)
-#     print(f"\n Text: {example_article}")
-#     print(f"\n🔹 **Top 5 predicted labels for the example:**")
-#     for label, prob in top_5_predictions.items():
-#         print(f"   - {label}: {prob:.4f}")
-#     # Display the actual labels of the example (True Labels)
-#     print("\n✅ **Actual labels for the example:**")
-#     for label in true_labels_example:
-#         print(f"   - {label}")
-# ```
 from transformers import pipeline
 from custom_pipeline import ASJCMultiLabelPipeline
@@ -172,9 +101,6 @@ text = (
 result = pipe(text)
 print(result)
 ---
 ## 📖 Citation

 ## 🔍 Example Usage
 ```python
 from transformers import pipeline
 from custom_pipeline import ASJCMultiLabelPipeline
 result = pipe(text)
 print(result)
 ---
 ## 📖 Citation