KevSun
/

Personality_LM

@@ -25,39 +25,109 @@ The project of predicting human cognition and emotion, and training details are
 The following provides the code to implement the task of detecting personality from an input text.
 ```python
-#import packages
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 model = AutoModelForSequenceClassification.from_pretrained("KevSun/Personality_LM")
 tokenizer = AutoTokenizer.from_pretrained("KevSun/Personality_LM")
-# Example new text input
-#new_text = "I really enjoy working on complex problems and collaborating with others."
-file_path = 'path/to/your/textfile.txt'
-with open(file_path, 'r', encoding='utf-8') as file:
-    new_text = file.read()
 # Encode the text using the same tokenizer used during training
 encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
-# Move the model to the correct device (CPU in this case, or GPU if available)
-#model.eval()  # Set the model to evaluation mode
 # Perform the prediction
 with torch.no_grad():
     outputs = model(**encoded_input)
-# Get the predictions (the output here depends on whether you are doing regression or classification)
 predictions = outputs.logits.squeeze()
-# Assuming the model is a regression model and outputs raw scores
-predicted_scores = predictions.numpy()  # Convert to numpy array if necessary
 trait_names = ["Agreeableness", "Openness", "Conscientiousness", "Extraversion", "Neuroticism"]
 # Print the predicted personality traits scores
 for trait, score in zip(trait_names, predicted_scores):
     print(f"{trait}: {score:.4f}")
-##"output": "agreeableness: 0.46; openness: 0.27; conscientiousness: 0.31; extraversion: 0.1; neuroticism: 0.84"
 ```

 The following provides the code to implement the task of detecting personality from an input text.
 ```python
+# install these packages before importing them (transformers, PyTorch)
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 model = AutoModelForSequenceClassification.from_pretrained("KevSun/Personality_LM")
 tokenizer = AutoTokenizer.from_pretrained("KevSun/Personality_LM")
+# Choose between direct text input or file input
+use_file = False  # Set to True if you want to read from a file
+if use_file:
+    file_path = 'path/to/your/textfile.txt'  # Replace with your file path
+    with open(file_path, 'r', encoding='utf-8') as file:
+        new_text = file.read()
+else:
+    new_text = "I really enjoy working on complex problems and collaborating with others."
 # Encode the text using the same tokenizer used during training
 encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
+model.eval()  # Set the model to evaluation mode
 # Perform the prediction
 with torch.no_grad():
     outputs = model(**encoded_input)
+# Get the predictions
 predictions = outputs.logits.squeeze()
+# Convert to numpy array if necessary
+predicted_scores = predictions.numpy()
 trait_names = ["Agreeableness", "Openness", "Conscientiousness", "Extraversion", "Neuroticism"]
 # Print the predicted personality traits scores
 for trait, score in zip(trait_names, predicted_scores):
     print(f"{trait}: {score:.4f}")
+##"output":
+#Agreeableness: 0.3965
+#Openness: 0.6714
+#Conscientiousness: 0.3283
+#Extraversion: 0.0026
+#Neuroticism: 0.4645
+```
+**Alternatively**, you can use the following code to make inference based on the **bash** terminal.
+```
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import torch
+import argparse
+def load_model_and_tokenizer(model_name):
+    model = AutoModelForSequenceClassification.from_pretrained(model_name)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    return model, tokenizer
+def process_input(input_text, tokenizer, max_length=64):
+    return tokenizer(input_text, return_tensors='pt', padding=True, truncation=True, max_length=max_length)
+def predict_personality(model, encoded_input):
+    model.eval()  # Set the model to evaluation mode
+    with torch.no_grad():
+        outputs = model(**encoded_input)
+    return outputs.logits.squeeze()
+def print_predictions(predictions, trait_names):
+    for trait, score in zip(trait_names, predictions):
+        print(f"{trait}: {score:.4f}")
+def main():
+    parser = argparse.ArgumentParser(description="Predict personality traits from text.")
+    parser.add_argument("--input", type=str, required=True, help="Input text or path to text file")
+    parser.add_argument("--model", type=str, default="KevSun/Personality_LM", help="Model name or path")
+    args = parser.parse_args()
+    model, tokenizer = load_model_and_tokenizer(args.model)
+    # Check if input is a file path or direct text
+    if args.input.endswith('.txt'):
+        with open(args.input, 'r', encoding='utf-8') as file:
+            input_text = file.read()
+    else:
+        input_text = args.input
+    encoded_input = process_input(input_text, tokenizer)
+    predictions = predict_personality(model, encoded_input)
+    trait_names = ["Agreeableness", "Openness", "Conscientiousness", "Extraversion", "Neuroticism"]
+    print_predictions(predictions.numpy(), trait_names)
+if __name__ == "__main__":
+    main()
+```
+```
+bash
+python script_name.py --input "Your text here"
+```
+or
+```
+bash
+python script_name.py --input path/to/your/textfile.txt
 ```