kevintu commited on
Commit
392ee6b
·
verified ·
1 Parent(s): e7c7b50

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -15
README.md CHANGED
@@ -25,7 +25,6 @@ tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
25
 
26
  new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
27
 
28
-
29
  # Define the path to your text file
30
  #file_path = 'path/to/yourfile.txt'
31
 
@@ -33,22 +32,17 @@ new_text = "The English Language Learner Insight, Proficiency and Skills Evaluat
33
  #with open(file_path, 'r', encoding='utf-8') as file:
34
  # new_text = file.read()
35
 
36
-
37
  encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
38
-
39
  model.eval()
40
 
41
  # Perform the prediction
42
  with torch.no_grad():
43
  outputs = model(**encoded_input)
44
 
45
- # Get the predictions (the output here depends on whether you are doing regression or classification)
46
  predictions = outputs.logits.squeeze()
47
 
48
-
49
  predicted_scores = predictions.numpy()
50
  item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
51
-
52
  for item, score in zip(item_names, predicted_scores):
53
  print(f"{item}: {score:.4f}")
54
 
@@ -68,35 +62,26 @@ for item, score in zip(item_names, predicted_scores):
68
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
69
  import torch
70
 
71
-
72
  model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
73
  tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
74
 
75
-
76
  new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
77
-
78
  encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
79
 
80
-
81
  model.eval()
82
  with torch.no_grad():
83
  outputs = model(**encoded_input)
84
 
85
-
86
  predictions = outputs.logits.squeeze()
87
-
88
  predicted_scores = predictions.numpy() # Convert to numpy array
89
  item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
90
 
91
  # Scale predictions from 1 to 10
92
  scaled_scores = 2.25 * predicted_scores - 1.25
93
-
94
-
95
  for item, score in zip(item_names, scaled_scores):
96
  print(f"{trait}: {score:.4f}")
97
 
98
  ##"ouput" (values between 1-10)
99
-
100
  #cohesion: 6.7147
101
  #syntax: 6.9354
102
  #vocabulary: 7.5814
 
25
 
26
  new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
27
 
 
28
  # Define the path to your text file
29
  #file_path = 'path/to/yourfile.txt'
30
 
 
32
  #with open(file_path, 'r', encoding='utf-8') as file:
33
  # new_text = file.read()
34
 
 
35
  encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
 
36
  model.eval()
37
 
38
  # Perform the prediction
39
  with torch.no_grad():
40
  outputs = model(**encoded_input)
41
 
 
42
  predictions = outputs.logits.squeeze()
43
 
 
44
  predicted_scores = predictions.numpy()
45
  item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
 
46
  for item, score in zip(item_names, predicted_scores):
47
  print(f"{item}: {score:.4f}")
48
 
 
62
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
63
  import torch
64
 
 
65
  model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
66
  tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
67
 
 
68
  new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
 
69
  encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
70
 
 
71
  model.eval()
72
  with torch.no_grad():
73
  outputs = model(**encoded_input)
74
 
 
75
  predictions = outputs.logits.squeeze()
 
76
  predicted_scores = predictions.numpy() # Convert to numpy array
77
  item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
78
 
79
  # Scale predictions from 1 to 10
80
  scaled_scores = 2.25 * predicted_scores - 1.25
 
 
81
  for item, score in zip(item_names, scaled_scores):
82
  print(f"{trait}: {score:.4f}")
83
 
84
  ##"ouput" (values between 1-10)
 
85
  #cohesion: 6.7147
86
  #syntax: 6.9354
87
  #vocabulary: 7.5814