KevSun
/

Engessay_grading_ML

@@ -25,7 +25,6 @@ tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
 new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
 # Define the path to your text file
 #file_path = 'path/to/yourfile.txt'
@@ -33,22 +32,17 @@ new_text = "The English Language Learner Insight, Proficiency and Skills Evaluat
 #with open(file_path, 'r', encoding='utf-8') as file:
 #    new_text = file.read()
 encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
 model.eval()
 # Perform the prediction
 with torch.no_grad():
     outputs = model(**encoded_input)
-# Get the predictions (the output here depends on whether you are doing regression or classification)
 predictions = outputs.logits.squeeze()
 predicted_scores = predictions.numpy()
 item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar",  "conventions"]
 for item, score in zip(item_names, predicted_scores):
     print(f"{item}: {score:.4f}")
@@ -68,35 +62,26 @@ for item, score in zip(item_names, predicted_scores):
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
 tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
 new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
 encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
 model.eval()
 with torch.no_grad():
     outputs = model(**encoded_input)
 predictions = outputs.logits.squeeze()
 predicted_scores = predictions.numpy()  # Convert to numpy array
 item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
 # Scale predictions from 1 to 10
 scaled_scores = 2.25 * predicted_scores - 1.25
 for item, score in zip(item_names, scaled_scores):
     print(f"{trait}: {score:.4f}")
 ##"ouput" (values between 1-10)
 #cohesion: 6.7147
 #syntax: 6.9354
 #vocabulary: 7.5814

 new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
 # Define the path to your text file
 #file_path = 'path/to/yourfile.txt'
 #with open(file_path, 'r', encoding='utf-8') as file:
 #    new_text = file.read()
 encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
 model.eval()
 # Perform the prediction
 with torch.no_grad():
     outputs = model(**encoded_input)
 predictions = outputs.logits.squeeze()
 predicted_scores = predictions.numpy()
 item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar",  "conventions"]
 for item, score in zip(item_names, predicted_scores):
     print(f"{item}: {score:.4f}")
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
 tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
 new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
 encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
 model.eval()
 with torch.no_grad():
     outputs = model(**encoded_input)
 predictions = outputs.logits.squeeze()
 predicted_scores = predictions.numpy()  # Convert to numpy array
 item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
 # Scale predictions from 1 to 10
 scaled_scores = 2.25 * predicted_scores - 1.25
 for item, score in zip(item_names, scaled_scores):
     print(f"{trait}: {score:.4f}")
 ##"ouput" (values between 1-10)
 #cohesion: 6.7147
 #syntax: 6.9354
 #vocabulary: 7.5814