Ylemnox commited on
Commit
cf47052
Β·
verified Β·
1 Parent(s): 6544fd6

Upload cli.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. cli.py +242 -0
cli.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple TOEFL Independent Speaking Judge - Command Line Interface
4
+ Usage: python toefl_judge_cli.py
5
+ """
6
+
7
+ from mlx_lm import load, generate
8
+ import re
9
+ import argparse
10
+ import sys
11
+
12
+ class SimpleTOEFLJudge:
13
+ def __init__(self, model_path="mlx-community/Llama-3.2-3B-Instruct-4bit", adapter_path="toefl_judge_adapter"):
14
+ """Initialize the TOEFL judge with model paths"""
15
+ self.model_path = model_path
16
+ self.adapter_path = adapter_path
17
+ self.model = None
18
+ self.tokenizer = None
19
+
20
+ print("πŸŽ“ TOEFL Independent Speaking Judge")
21
+ print("="*50)
22
+ print("Loading model... (this may take a moment)")
23
+
24
+ self.load_model()
25
+
26
+ def load_model(self):
27
+ """Load the fine-tuned model"""
28
+ try:
29
+ self.model, self.tokenizer = load(
30
+ path_or_hf_repo=self.model_path,
31
+ adapter_path=self.adapter_path
32
+ )
33
+ print("βœ… Model loaded successfully!")
34
+ print(f"πŸ“Š Model Performance: 86.1% accuracy, 0.943 correlation")
35
+ print("="*50)
36
+ except Exception as e:
37
+ print(f"❌ Error loading model: {e}")
38
+ print("Please check that your model and adapter paths are correct.")
39
+ sys.exit(1)
40
+
41
+ def create_prompt(self, question, answer):
42
+ """Create the evaluation prompt"""
43
+ system_prompt = """You are an expert TOEFL iBT Independent Speaking evaluator. Your task is to score student responses on a scale of 0-4 based on the official TOEFL Independent Speaking rubric.
44
+
45
+ Scoring Criteria:
46
+ - Score 4: Fulfills task demands with sustained, coherent discourse. Well-paced delivery, effective grammar/vocabulary, well-developed and coherent ideas.
47
+ - Score 3: Addresses task appropriately but may lack full development. Generally clear speech with some fluency, fairly effective language use, mostly coherent with some limitations.
48
+ - Score 2: Addresses task but limited development. Intelligible speech requiring listener effort, limited grammar/vocabulary range, basic ideas with limited elaboration.
49
+ - Score 1: Very limited content/coherence, largely unintelligible speech, severely limited language control, lacks substance beyond basic ideas.
50
+ - Score 0: No attempt or unrelated to topic.
51
+
52
+ Provide your score and a brief explanation focusing on delivery, language use, and topic development."""
53
+
54
+ prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
55
+ {system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>
56
+ Please evaluate this TOEFL Independent Speaking response:
57
+
58
+ Question: {question}
59
+
60
+ Student Response: {answer}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
61
+ """
62
+ return prompt
63
+
64
+ def extract_score(self, response):
65
+ """Extract numerical score from response"""
66
+ score_match = re.search(r'Score:\s*(\d+)', response)
67
+ return int(score_match.group(1)) if score_match else None
68
+
69
+ def evaluate(self, question, answer):
70
+ """Evaluate a TOEFL response"""
71
+ print("πŸ€” Evaluating response...")
72
+
73
+ try:
74
+ prompt = self.create_prompt(question, answer)
75
+
76
+ response = generate(
77
+ self.model,
78
+ self.tokenizer,
79
+ prompt=prompt,
80
+ max_tokens=400
81
+ )
82
+
83
+ score = self.extract_score(response)
84
+
85
+ return {
86
+ 'score': score,
87
+ 'response': response,
88
+ 'success': True
89
+ }
90
+
91
+ except Exception as e:
92
+ return {
93
+ 'score': None,
94
+ 'response': f"Error: {e}",
95
+ 'success': False
96
+ }
97
+
98
+ def print_result(self, question, answer, result):
99
+ """Print evaluation results in a nice format"""
100
+ print("\n" + "="*60)
101
+ print("πŸ“ EVALUATION RESULTS")
102
+ print("="*60)
103
+
104
+ print(f"\nπŸ’¬ Question:")
105
+ print(f"{question}")
106
+
107
+ print(f"\nπŸ—£οΈ Student Response:")
108
+ print(f"{answer}")
109
+
110
+ if result['success'] and result['score'] is not None:
111
+ score = result['score']
112
+
113
+ # Score with emoji
114
+ if score == 4:
115
+ print(f"\n🌟 SCORE: {score}/4 (Excellent)")
116
+ elif score == 3:
117
+ print(f"\nβœ… SCORE: {score}/4 (Good)")
118
+ elif score == 2:
119
+ print(f"\n⚠️ SCORE: {score}/4 (Limited)")
120
+ elif score == 1:
121
+ print(f"\n❌ SCORE: {score}/4 (Very Limited)")
122
+ else:
123
+ print(f"\nπŸ’€ SCORE: {score}/4 (No Response)")
124
+
125
+ print(f"\nπŸ“‹ Detailed Feedback:")
126
+ print(f"{result['response']}")
127
+
128
+ else:
129
+ print(f"\n❌ Evaluation failed:")
130
+ print(f"{result['response']}")
131
+
132
+ print("\n" + "="*60)
133
+
134
+ def interactive_mode(self):
135
+ """Run in interactive mode"""
136
+ print("\n🎯 Interactive Mode")
137
+ print("Enter 'quit' or 'exit' to stop, 'help' for assistance")
138
+ print("-" * 50)
139
+
140
+ while True:
141
+ try:
142
+ # Get question
143
+ print("\nπŸ“ Enter the TOEFL Speaking question:")
144
+ question = input("> ").strip()
145
+
146
+ if question.lower() in ['quit', 'exit', 'q']:
147
+ print("πŸ‘‹ Goodbye!")
148
+ break
149
+ elif question.lower() == 'help':
150
+ self.show_help()
151
+ continue
152
+ elif not question:
153
+ print("⚠️ Please enter a question.")
154
+ continue
155
+
156
+ # Get student response
157
+ print("\nπŸ—£οΈ Enter the student's response:")
158
+ answer = input("> ").strip()
159
+
160
+ if not answer:
161
+ print("⚠️ Please enter a response.")
162
+ continue
163
+
164
+ # Evaluate
165
+ result = self.evaluate(question, answer)
166
+ self.print_result(question, answer, result)
167
+
168
+ # Ask if they want to continue
169
+ print("\nπŸ”„ Evaluate another response? (y/n)")
170
+ continue_choice = input("> ").strip().lower()
171
+ if continue_choice in ['n', 'no']:
172
+ print("πŸ‘‹ Goodbye!")
173
+ break
174
+
175
+ except KeyboardInterrupt:
176
+ print("\nπŸ‘‹ Goodbye!")
177
+ break
178
+ except Exception as e:
179
+ print(f"❌ Error: {e}")
180
+
181
+ def show_help(self):
182
+ """Show help information"""
183
+ print("\nπŸ“š HELP - TOEFL Independent Speaking Judge")
184
+ print("="*50)
185
+ print("This tool evaluates TOEFL Independent Speaking responses on a 0-4 scale.")
186
+ print("\nScoring Rubric:")
187
+ print("🌟 Score 4: Excellent - Sustained, coherent discourse")
188
+ print("βœ… Score 3: Good - Mostly coherent with minor limitations")
189
+ print("⚠️ Score 2: Limited - Basic ideas with limited development")
190
+ print("❌ Score 1: Very Limited - Major language/content issues")
191
+ print("πŸ’€ Score 0: No attempt or unrelated to topic")
192
+ print("\nTips:")
193
+ print("β€’ Enter the exact question as given in TOEFL")
194
+ print("β€’ Provide the student's transcribed spoken response")
195
+ print("β€’ The model evaluates based on language use, delivery, and topic development")
196
+ print("β€’ Type 'quit' or 'exit' to stop")
197
+ print("="*50)
198
+
199
+ def main():
200
+ """Main function with command line argument support"""
201
+ parser = argparse.ArgumentParser(description='TOEFL Independent Speaking Judge')
202
+ parser.add_argument('--model', default='mlx-community/Llama-3.2-3B-Instruct-4bit',
203
+ help='Model path')
204
+ parser.add_argument('--adapter', default='toefl_judge_adapter',
205
+ help='Adapter path')
206
+ parser.add_argument('--question', '-q', help='TOEFL question')
207
+ parser.add_argument('--answer', '-a', help='Student response')
208
+ parser.add_argument('--interactive', '-i', action='store_true',
209
+ help='Run in interactive mode')
210
+
211
+ args = parser.parse_args()
212
+
213
+ # Initialize judge
214
+ judge = SimpleTOEFLJudge(model_path=args.model, adapter_path=args.adapter)
215
+
216
+ # Command line mode
217
+ if args.question and args.answer:
218
+ result = judge.evaluate(args.question, args.answer)
219
+ judge.print_result(args.question, args.answer, result)
220
+
221
+ # Interactive mode (default)
222
+ else:
223
+ judge.interactive_mode()
224
+
225
+ # Sample questions for testing
226
+ SAMPLE_QUESTIONS = [
227
+ "Do you agree or disagree with the following statement: it is better to work in teams than to work alone? Use specific examples to support your answer.",
228
+ "Some people prefer to live in small towns, while others prefer big cities. Which do you prefer and why?",
229
+ "Do you think students should be required to wear uniforms in school? Explain your opinion with specific reasons.",
230
+ "Would you rather have a job that pays well but is stressful, or a job that pays less but is enjoyable? Explain your choice.",
231
+ "Some people believe that technology has made our lives easier, while others think it has made life more complicated. What is your opinion?"
232
+ ]
233
+
234
+ if __name__ == "__main__":
235
+ print("πŸš€ Quick Test Mode")
236
+ print("Want to try a sample question? Here are some examples:")
237
+ for i, q in enumerate(SAMPLE_QUESTIONS[:3], 1):
238
+ print(f"{i}. {q}")
239
+ print("\nStarting TOEFL Judge...")
240
+ print("-" * 70)
241
+
242
+ main()